Merge branch 'main' into rewbs/tool-use-charge-to-subscription

2026-04-25 00:51:20 +00:00 · 2026-04-02 11:00:35 +11:00 · 2026-04-02 11:00:35 +11:00 · a2e56d044b
commit a2e56d044b
parent 1b7473e702 bd9e0b605f
175 changed files with 18848 additions and 3772 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -11,3 +11,5 @@ node_modules

 # Environment files
 .env
+
+*.md
--- a/.env.example
+++ b/.env.example
@ -7,18 +7,19 @@
 # OpenRouter provides access to many models through one API
 # All LLM calls go through OpenRouter - no direct provider keys needed
 # Get your key at: https://openrouter.ai/keys
-OPENROUTER_API_KEY=
+# OPENROUTER_API_KEY=

-# Default model to use (OpenRouter format: provider/model)
-# Examples: anthropic/claude-opus-4.6, openai/gpt-4o, google/gemini-3-flash-preview, zhipuai/glm-4-plus
-LLM_MODEL=anthropic/claude-opus-4.6
+# Default model is configured in ~/.hermes/config.yaml (model.default).
+# Use 'hermes model' or 'hermes setup' to change it.
+# LLM_MODEL is no longer read from .env — this line is kept for reference only.
+# LLM_MODEL=anthropic/claude-opus-4.6

 # =============================================================================
 # LLM PROVIDER (z.ai / GLM)
 # =============================================================================
 # z.ai provides access to ZhipuAI GLM models (GLM-4-Plus, etc.)
 # Get your key at: https://z.ai or https://open.bigmodel.cn
-GLM_API_KEY=
+# GLM_API_KEY=
 # GLM_BASE_URL=https://api.z.ai/api/paas/v4  # Override default base URL

 # =============================================================================
@ -28,7 +29,7 @@ GLM_API_KEY=
 # Get your key at: https://platform.kimi.ai (Kimi Code console)
 # Keys prefixed sk-kimi- use the Kimi Code API (api.kimi.com) by default.
 # Legacy keys from platform.moonshot.ai need KIMI_BASE_URL override below.
-KIMI_API_KEY=
+# KIMI_API_KEY=
 # KIMI_BASE_URL=https://api.kimi.com/coding/v1  # Default for sk-kimi- keys
 # KIMI_BASE_URL=https://api.moonshot.ai/v1      # For legacy Moonshot keys
 # KIMI_BASE_URL=https://api.moonshot.cn/v1       # For Moonshot China keys
@ -38,11 +39,11 @@ KIMI_API_KEY=
 # =============================================================================
 # MiniMax provides access to MiniMax models (global endpoint)
 # Get your key at: https://www.minimax.io
-MINIMAX_API_KEY=
+# MINIMAX_API_KEY=
 # MINIMAX_BASE_URL=https://api.minimax.io/v1  # Override default base URL

 # MiniMax China endpoint (for users in mainland China)
-MINIMAX_CN_API_KEY=
+# MINIMAX_CN_API_KEY=
 # MINIMAX_CN_BASE_URL=https://api.minimaxi.com/v1  # Override default base URL

 # =============================================================================
@ -50,7 +51,7 @@ MINIMAX_CN_API_KEY=
 # =============================================================================
 # OpenCode Zen provides curated, tested models (GPT, Claude, Gemini, MiniMax, GLM, Kimi)
 # Pay-as-you-go pricing. Get your key at: https://opencode.ai/auth
-OPENCODE_ZEN_API_KEY=
+# OPENCODE_ZEN_API_KEY=
 # OPENCODE_ZEN_BASE_URL=https://opencode.ai/zen/v1  # Override default base URL

 # =============================================================================
@ -58,7 +59,7 @@ OPENCODE_ZEN_API_KEY=
 # =============================================================================
 # OpenCode Go provides access to open models (GLM-5, Kimi K2.5, MiniMax M2.5)
 # $10/month subscription. Get your key at: https://opencode.ai/auth
-OPENCODE_GO_API_KEY=
+# OPENCODE_GO_API_KEY=

 # =============================================================================
 # LLM PROVIDER (Hugging Face Inference Providers)
@ -67,7 +68,7 @@ OPENCODE_GO_API_KEY=
 # Free tier included ($0.10/month), no markup on provider rates.
 # Get your token at: https://huggingface.co/settings/tokens
 # Required permission: "Make calls to Inference Providers"
-HF_TOKEN=
+# HF_TOKEN=
 # OPENCODE_GO_BASE_URL=https://opencode.ai/zen/go/v1  # Override default base URL

 # =============================================================================
@ -76,26 +77,26 @@ HF_TOKEN=

 # Exa API Key - AI-native web search and contents
 # Get at: https://exa.ai
-EXA_API_KEY=
+# EXA_API_KEY=

 # Parallel API Key - AI-native web search and extract
 # Get at: https://parallel.ai
-PARALLEL_API_KEY=
+# PARALLEL_API_KEY=

 # Firecrawl API Key - Web search, extract, and crawl
 # Get at: https://firecrawl.dev/
-FIRECRAWL_API_KEY=
+# FIRECRAWL_API_KEY=


 # FAL.ai API Key - Image generation
 # Get at: https://fal.ai/
-FAL_KEY=
+# FAL_KEY=

 # Honcho - Cross-session AI-native user modeling (optional)
 # Builds a persistent understanding of the user across sessions and tools.
 # Get at: https://app.honcho.dev
 # Also requires ~/.honcho/config.json with enabled=true (see README).
-HONCHO_API_KEY=
+# HONCHO_API_KEY=

 # =============================================================================
 # TERMINAL TOOL CONFIGURATION
@ -181,10 +182,10 @@ TERMINAL_LIFETIME_SECONDS=300

 # Browserbase API Key - Cloud browser execution
 # Get at: https://browserbase.com/
-BROWSERBASE_API_KEY=
+# BROWSERBASE_API_KEY=

 # Browserbase Project ID - From your Browserbase dashboard
-BROWSERBASE_PROJECT_ID=
+# BROWSERBASE_PROJECT_ID=

 # Enable residential proxies for better CAPTCHA solving (default: true)
 # Routes traffic through residential IPs, significantly improves success rate
@ -216,7 +217,7 @@ BROWSER_INACTIVITY_TIMEOUT=120
 # Uses OpenAI's API directly (not via OpenRouter).
 # Named VOICE_TOOLS_OPENAI_KEY to avoid interference with OpenRouter.
 # Get at: https://platform.openai.com/api-keys
-VOICE_TOOLS_OPENAI_KEY=
+# VOICE_TOOLS_OPENAI_KEY=

 # =============================================================================
 # SLACK INTEGRATION
@ -231,6 +232,21 @@ VOICE_TOOLS_OPENAI_KEY=
 # Slack allowed users (comma-separated Slack user IDs)
 # SLACK_ALLOWED_USERS=

+# =============================================================================
+# TELEGRAM INTEGRATION
+# =============================================================================
+# Telegram Bot Token - From @BotFather (https://t.me/BotFather)
+# TELEGRAM_BOT_TOKEN=
+# TELEGRAM_ALLOWED_USERS=                  # Comma-separated user IDs
+# TELEGRAM_HOME_CHANNEL=                   # Default chat for cron delivery
+# TELEGRAM_HOME_CHANNEL_NAME=              # Display name for home channel
+
+# Webhook mode (optional — for cloud deployments like Fly.io/Railway)
+# Default is long polling. Setting TELEGRAM_WEBHOOK_URL switches to webhook mode.
+# TELEGRAM_WEBHOOK_URL=https://my-app.fly.dev/telegram
+# TELEGRAM_WEBHOOK_PORT=8443
+# TELEGRAM_WEBHOOK_SECRET=                 # Recommended for production
+
 # WhatsApp (built-in Baileys bridge — run `hermes whatsapp` to pair)
 # WHATSAPP_ENABLED=false
 # WHATSAPP_ALLOWED_USERS=15551234567
@ -287,11 +303,11 @@ IMAGE_TOOLS_DEBUG=false

 # Tinker API Key - RL training service
 # Get at: https://tinker-console.thinkingmachines.ai/keys
-TINKER_API_KEY=
+# TINKER_API_KEY=

 # Weights & Biases API Key - Experiment tracking and metrics
 # Get at: https://wandb.ai/authorize
-WANDB_API_KEY=
+# WANDB_API_KEY=

 # RL API Server URL (default: http://localhost:8080)
 # Change if running the rl-server on a different host/port
--- a/.github/workflows/deploy-site.yml
+++ b/.github/workflows/deploy-site.yml
@ -19,6 +19,8 @@ concurrency:

 jobs:
  build-and-deploy:
+    # Only run on the upstream repository, not on forks
+    if: github.repository == 'NousResearch/hermes-agent'
    runs-on: ubuntu-latest
    environment:
      name: github-pages
--- a/.github/workflows/docker-publish.yml
+++ b/.github/workflows/docker-publish.yml
@ -5,6 +5,8 @@ on:
    branches: [main]
  pull_request:
    branches: [main]
+  release:
+    types: [published]

 concurrency:
  group: docker-${{ github.ref }}
@ -12,6 +14,8 @@ concurrency:

 jobs:
  build-and-push:
+    # Only run on the upstream repository, not on forks
+    if: github.repository == 'NousResearch/hermes-agent'
    runs-on: ubuntu-latest
    timeout-minutes: 30
    steps:
@ -41,13 +45,13 @@ jobs:
            nousresearch/hermes-agent:test --help

      - name: Log in to Docker Hub
-        if: github.event_name == 'push' && github.ref == 'refs/heads/main'
+        if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
        uses: docker/login-action@v3
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}

-      - name: Push image
+      - name: Push image (main branch)
        if: github.event_name == 'push' && github.ref == 'refs/heads/main'
        uses: docker/build-push-action@v6
        with:
@ -59,3 +63,17 @@ jobs:
            nousresearch/hermes-agent:${{ github.sha }}
          cache-from: type=gha
          cache-to: type=gha,mode=max
+
+      - name: Push image (release)
+        if: github.event_name == 'release'
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          file: Dockerfile
+          push: true
+          tags: |
+            nousresearch/hermes-agent:latest
+            nousresearch/hermes-agent:${{ github.event.release.tag_name }}
+            nousresearch/hermes-agent:${{ github.sha }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@ -34,9 +34,37 @@ jobs:
      - name: Run tests
        run: |
          source .venv/bin/activate
-          python -m pytest tests/ -q --ignore=tests/integration --tb=short -n auto
+          python -m pytest tests/ -q --ignore=tests/integration --ignore=tests/e2e --tb=short -n auto
        env:
          # Ensure tests don't accidentally call real APIs
          OPENROUTER_API_KEY: ""
          OPENAI_API_KEY: ""
          NOUS_API_KEY: ""
+
+  e2e:
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+
+      - name: Set up Python 3.11
+        run: uv python install 3.11
+
+      - name: Install dependencies
+        run: |
+          uv venv .venv --python 3.11
+          source .venv/bin/activate
+          uv pip install -e ".[all,dev]"
+
+      - name: Run e2e tests
+        run: |
+          source .venv/bin/activate
+          python -m pytest tests/e2e/ -v --tb=short
+        env:
+          OPENROUTER_API_KEY: ""
+          OPENAI_API_KEY: ""
+          NOUS_API_KEY: ""
--- a/19
+++ b/19
@ -1,16 +1,21 @@
 FROM debian:13.4

-RUN apt-get update
-RUN apt-get install -y nodejs npm python3 python3-pip ripgrep ffmpeg gcc python3-dev libffi-dev
+# Install system dependencies in one layer, clear APT cache
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+        build-essential nodejs npm python3 python3-pip ripgrep ffmpeg gcc python3-dev libffi-dev && \
+    rm -rf /var/lib/apt/lists/*

 COPY . /opt/hermes
 WORKDIR /opt/hermes

-RUN pip install -e ".[all]" --break-system-packages
-RUN npm install
-RUN npx playwright install --with-deps chromium
-WORKDIR /opt/hermes/scripts/whatsapp-bridge
-RUN npm install
+# Install Python and Node dependencies in one layer, no cache
+RUN pip install --no-cache-dir -e ".[all]" --break-system-packages && \
+    npm install --prefer-offline --no-audit && \
+    npx playwright install --with-deps chromium --only-shell && \
+    cd /opt/hermes/scripts/whatsapp-bridge && \
+    npm install --prefer-offline --no-audit && \
+    npm cache clean --force

 WORKDIR /opt/hermes
 RUN chmod +x /opt/hermes/docker/entrypoint.sh
--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -0,0 +1,4 @@
+graft skills
+graft optional-skills
+global-exclude __pycache__
+global-exclude *.py[cod]
--- a/acp_adapter/session.py
+++ b/acp_adapter/session.py
@ -426,7 +426,7 @@ class SessionManager:

        config = load_config()
        model_cfg = config.get("model")
-        default_model = "anthropic/claude-opus-4.6"
+        default_model = ""
        config_provider = None
        if isinstance(model_cfg, dict):
            default_model = str(model_cfg.get("default") or default_model)
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@ -162,6 +162,21 @@ def _is_oauth_token(key: str) -> bool:
    return True


+def _is_third_party_anthropic_endpoint(base_url: str | None) -> bool:
+    """Return True for non-Anthropic endpoints using the Anthropic Messages API.
+
+    Third-party proxies (Azure AI Foundry, AWS Bedrock, self-hosted) authenticate
+    with their own API keys via x-api-key, not Anthropic OAuth tokens. OAuth
+    detection should be skipped for these endpoints.
+    """
+    if not base_url:
+        return False  # No base_url = direct Anthropic API
+    normalized = base_url.rstrip("/").lower()
+    if "anthropic.com" in normalized:
+        return False  # Direct Anthropic API — OAuth applies
+    return True  # Any other endpoint is a third-party proxy
+
+
 def _requires_bearer_auth(base_url: str | None) -> bool:
    """Return True for Anthropic-compatible providers that require Bearer auth.

@ -205,6 +220,14 @@ def build_anthropic_client(api_key: str, base_url: str = None):
        kwargs["auth_token"] = api_key
        if _COMMON_BETAS:
            kwargs["default_headers"] = {"anthropic-beta": ",".join(_COMMON_BETAS)}
+    elif _is_third_party_anthropic_endpoint(base_url):
+        # Third-party proxies (Azure AI Foundry, AWS Bedrock, etc.) use their
+        # own API keys with x-api-key auth. Skip OAuth detection — their keys
+        # don't follow Anthropic's sk-ant-* prefix convention and would be
+        # misclassified as OAuth tokens.
+        kwargs["api_key"] = api_key
+        if _COMMON_BETAS:
+            kwargs["default_headers"] = {"anthropic-beta": ",".join(_COMMON_BETAS)}
    elif _is_oauth_token(api_key):
        # OAuth access token / setup-token → Bearer auth + Claude Code identity.
        # Anthropic routes OAuth requests based on user-agent and headers;
@ -284,71 +307,105 @@ def is_claude_code_token_valid(creds: Dict[str, Any]) -> bool:
    return now_ms < (expires_at - 60_000)


-def _refresh_oauth_token(creds: Dict[str, Any]) -> Optional[str]:
-    """Attempt to refresh an expired Claude Code OAuth token.
-
-    Uses the same token endpoint and client_id as Claude Code / OpenCode.
-    Only works for credentials that have a refresh token (from claude /login
-    or claude setup-token with OAuth flow).
-
-    Tries the new platform.claude.com endpoint first (Claude Code >=2.1.81),
-    then falls back to console.anthropic.com for older tokens.
-
-    Returns the new access token, or None if refresh fails.
-    """
+def refresh_anthropic_oauth_pure(refresh_token: str, *, use_json: bool = False) -> Dict[str, Any]:
+    """Refresh an Anthropic OAuth token without mutating local credential files."""
    import time
+    import urllib.parse
    import urllib.request

+    if not refresh_token:
+        raise ValueError("refresh_token is required")
+
+    client_id = "9d1c250a-e61b-44d9-88ed-5944d1962f5e"
+    if use_json:
+        data = json.dumps({
+            "grant_type": "refresh_token",
+            "refresh_token": refresh_token,
+            "client_id": client_id,
+        }).encode()
+        content_type = "application/json"
+    else:
+        data = urllib.parse.urlencode({
+            "grant_type": "refresh_token",
+            "refresh_token": refresh_token,
+            "client_id": client_id,
+        }).encode()
+        content_type = "application/x-www-form-urlencoded"
+
+    token_endpoints = [
+        "https://platform.claude.com/v1/oauth/token",
+        "https://console.anthropic.com/v1/oauth/token",
+    ]
+    last_error = None
+    for endpoint in token_endpoints:
+        req = urllib.request.Request(
+            endpoint,
+            data=data,
+            headers={
+                "Content-Type": content_type,
+                "User-Agent": f"claude-cli/{_get_claude_code_version()} (external, cli)",
+            },
+            method="POST",
+        )
+        try:
+            with urllib.request.urlopen(req, timeout=10) as resp:
+                result = json.loads(resp.read().decode())
+        except Exception as exc:
+            last_error = exc
+            logger.debug("Anthropic token refresh failed at %s: %s", endpoint, exc)
+            continue
+
+        access_token = result.get("access_token", "")
+        if not access_token:
+            raise ValueError("Anthropic refresh response was missing access_token")
+        next_refresh = result.get("refresh_token", refresh_token)
+        expires_in = result.get("expires_in", 3600)
+        return {
+            "access_token": access_token,
+            "refresh_token": next_refresh,
+            "expires_at_ms": int(time.time() * 1000) + (expires_in * 1000),
+        }
+
+    if last_error is not None:
+        raise last_error
+    raise ValueError("Anthropic token refresh failed")
+
+
+def _refresh_oauth_token(creds: Dict[str, Any]) -> Optional[str]:
+    """Attempt to refresh an expired Claude Code OAuth token."""
    refresh_token = creds.get("refreshToken", "")
    if not refresh_token:
        logger.debug("No refresh token available — cannot refresh")
        return None

-    # Client ID used by Claude Code's OAuth flow
-    CLIENT_ID = "9d1c250a-e61b-44d9-88ed-5944d1962f5e"
-
-    # Anthropic migrated OAuth from console.anthropic.com to platform.claude.com
-    # (Claude Code v2.1.81+). Try new endpoint first, fall back to old.
-    token_endpoints = [
-        "https://platform.claude.com/v1/oauth/token",
-        "https://console.anthropic.com/v1/oauth/token",
-    ]
-
-    payload = json.dumps({
-        "grant_type": "refresh_token",
-        "refresh_token": refresh_token,
-        "client_id": CLIENT_ID,
-    }).encode()
-
-    headers = {
-        "Content-Type": "application/json",
-        "User-Agent": f"claude-cli/{_get_claude_code_version()} (external, cli)",
-    }
-
-    for endpoint in token_endpoints:
-        req = urllib.request.Request(
-            endpoint, data=payload, headers=headers, method="POST",
+    try:
+        refreshed = refresh_anthropic_oauth_pure(refresh_token, use_json=False)
+        _write_claude_code_credentials(
+            refreshed["access_token"],
+            refreshed["refresh_token"],
+            refreshed["expires_at_ms"],
        )
-        try:
-            with urllib.request.urlopen(req, timeout=10) as resp:
-                result = json.loads(resp.read().decode())
-                new_access = result.get("access_token", "")
-                new_refresh = result.get("refresh_token", refresh_token)
-                expires_in = result.get("expires_in", 3600)
-
-                if new_access:
-                    new_expires_ms = int(time.time() * 1000) + (expires_in * 1000)
-                    _write_claude_code_credentials(new_access, new_refresh, new_expires_ms)
-                    logger.debug("Refreshed Claude Code OAuth token via %s", endpoint)
-                    return new_access
-        except Exception as e:
-            logger.debug("Token refresh failed at %s: %s", endpoint, e)
-
-    return None
+        logger.debug("Successfully refreshed Claude Code OAuth token")
+        return refreshed["access_token"]
+    except Exception as e:
+        logger.debug("Failed to refresh Claude Code token: %s", e)
+        return None


-def _write_claude_code_credentials(access_token: str, refresh_token: str, expires_at_ms: int) -> None:
-    """Write refreshed credentials back to ~/.claude/.credentials.json."""
+def _write_claude_code_credentials(
+    access_token: str,
+    refresh_token: str,
+    expires_at_ms: int,
+    *,
+    scopes: Optional[list] = None,
+) -> None:
+    """Write refreshed credentials back to ~/.claude/.credentials.json.
+
+    The optional *scopes* list (e.g. ``["user:inference", "user:profile", ...]``)
+    is persisted so that Claude Code's own auth check recognises the credential
+    as valid.  Claude Code >=2.1.81 gates on the presence of ``"user:inference"``
+    in the stored scopes before it will use the token.
+    """
    cred_path = Path.home() / ".claude" / ".credentials.json"
    try:
        # Read existing file to preserve other fields
@ -356,11 +413,19 @@ def _write_claude_code_credentials(access_token: str, refresh_token: str, expire
        if cred_path.exists():
            existing = json.loads(cred_path.read_text(encoding="utf-8"))

-        existing["claudeAiOauth"] = {
+        oauth_data: Dict[str, Any] = {
            "accessToken": access_token,
            "refreshToken": refresh_token,
            "expiresAt": expires_at_ms,
        }
+        if scopes is not None:
+            oauth_data["scopes"] = scopes
+        elif "claudeAiOauth" in existing and "scopes" in existing["claudeAiOauth"]:
+            # Preserve previously-stored scopes when the refresh response
+            # does not include a scope field.
+            oauth_data["scopes"] = existing["claudeAiOauth"]["scopes"]
+
+        existing["claudeAiOauth"] = oauth_data

        cred_path.parent.mkdir(parents=True, exist_ok=True)
        cred_path.write_text(json.dumps(existing, indent=2), encoding="utf-8")
@ -520,10 +585,208 @@ def run_oauth_setup_token() -> Optional[str]:
    return None


+# ── Hermes-native PKCE OAuth flow ────────────────────────────────────────
+# Mirrors the flow used by Claude Code, pi-ai, and OpenCode.
+# Stores credentials in ~/.hermes/.anthropic_oauth.json (our own file).
+
+_OAUTH_CLIENT_ID = "9d1c250a-e61b-44d9-88ed-5944d1962f5e"
+_OAUTH_TOKEN_URL = "https://console.anthropic.com/v1/oauth/token"
+_OAUTH_REDIRECT_URI = "https://console.anthropic.com/oauth/code/callback"
+_OAUTH_SCOPES = "org:create_api_key user:profile user:inference"
+_HERMES_OAUTH_FILE = get_hermes_home() / ".anthropic_oauth.json"


+def _generate_pkce() -> tuple:
+    """Generate PKCE code_verifier and code_challenge (S256)."""
+    import base64
+    import hashlib
+    import secrets
+
+    verifier = base64.urlsafe_b64encode(secrets.token_bytes(32)).rstrip(b"=").decode()
+    challenge = base64.urlsafe_b64encode(
+        hashlib.sha256(verifier.encode()).digest()
+    ).rstrip(b"=").decode()
+    return verifier, challenge


+def run_hermes_oauth_login_pure() -> Optional[Dict[str, Any]]:
+    """Run Hermes-native OAuth PKCE flow and return credential state."""
+    import time
+    import webbrowser
+
+    verifier, challenge = _generate_pkce()
+
+    params = {
+        "code": "true",
+        "client_id": _OAUTH_CLIENT_ID,
+        "response_type": "code",
+        "redirect_uri": _OAUTH_REDIRECT_URI,
+        "scope": _OAUTH_SCOPES,
+        "code_challenge": challenge,
+        "code_challenge_method": "S256",
+        "state": verifier,
+    }
+    from urllib.parse import urlencode
+
+    auth_url = f"https://claude.ai/oauth/authorize?{urlencode(params)}"
+
+    print()
+    print("Authorize Hermes with your Claude Pro/Max subscription.")
+    print()
+    print("╭─ Claude Pro/Max Authorization ────────────────────╮")
+    print("│                                                   │")
+    print("│  Open this link in your browser:                  │")
+    print("╰───────────────────────────────────────────────────╯")
+    print()
+    print(f"  {auth_url}")
+    print()
+
+    try:
+        webbrowser.open(auth_url)
+        print("  (Browser opened automatically)")
+    except Exception:
+        pass
+
+    print()
+    print("After authorizing, you'll see a code. Paste it below.")
+    print()
+    try:
+        auth_code = input("Authorization code: ").strip()
+    except (KeyboardInterrupt, EOFError):
+        return None
+
+    if not auth_code:
+        print("No code entered.")
+        return None
+
+    splits = auth_code.split("#")
+    code = splits[0]
+    state = splits[1] if len(splits) > 1 else ""
+
+    try:
+        import urllib.request
+
+        exchange_data = json.dumps({
+            "grant_type": "authorization_code",
+            "client_id": _OAUTH_CLIENT_ID,
+            "code": code,
+            "state": state,
+            "redirect_uri": _OAUTH_REDIRECT_URI,
+            "code_verifier": verifier,
+        }).encode()
+
+        req = urllib.request.Request(
+            _OAUTH_TOKEN_URL,
+            data=exchange_data,
+            headers={
+                "Content-Type": "application/json",
+                "User-Agent": f"claude-cli/{_get_claude_code_version()} (external, cli)",
+            },
+            method="POST",
+        )
+
+        with urllib.request.urlopen(req, timeout=15) as resp:
+            result = json.loads(resp.read().decode())
+    except Exception as e:
+        print(f"Token exchange failed: {e}")
+        return None
+
+    access_token = result.get("access_token", "")
+    refresh_token = result.get("refresh_token", "")
+    expires_in = result.get("expires_in", 3600)
+
+    if not access_token:
+        print("No access token in response.")
+        return None
+
+    expires_at_ms = int(time.time() * 1000) + (expires_in * 1000)
+    return {
+        "access_token": access_token,
+        "refresh_token": refresh_token,
+        "expires_at_ms": expires_at_ms,
+    }
+
+
+def run_hermes_oauth_login() -> Optional[str]:
+    """Run Hermes-native OAuth PKCE flow for Claude Pro/Max subscription.
+
+    Opens a browser to claude.ai for authorization, prompts for the code,
+    exchanges it for tokens, and stores them in ~/.hermes/.anthropic_oauth.json.
+
+    Returns the access token on success, None on failure.
+    """
+    result = run_hermes_oauth_login_pure()
+    if not result:
+        return None
+
+    access_token = result["access_token"]
+    refresh_token = result["refresh_token"]
+    expires_at_ms = result["expires_at_ms"]
+
+    _save_hermes_oauth_credentials(access_token, refresh_token, expires_at_ms)
+    _write_claude_code_credentials(access_token, refresh_token, expires_at_ms)
+
+    print("Authentication successful!")
+    return access_token
+
+
+def _save_hermes_oauth_credentials(access_token: str, refresh_token: str, expires_at_ms: int) -> None:
+    """Save OAuth credentials to ~/.hermes/.anthropic_oauth.json."""
+    data = {
+        "accessToken": access_token,
+        "refreshToken": refresh_token,
+        "expiresAt": expires_at_ms,
+    }
+    try:
+        _HERMES_OAUTH_FILE.parent.mkdir(parents=True, exist_ok=True)
+        _HERMES_OAUTH_FILE.write_text(json.dumps(data, indent=2), encoding="utf-8")
+        _HERMES_OAUTH_FILE.chmod(0o600)
+    except (OSError, IOError) as e:
+        logger.debug("Failed to save Hermes OAuth credentials: %s", e)
+
+
+def read_hermes_oauth_credentials() -> Optional[Dict[str, Any]]:
+    """Read Hermes-managed OAuth credentials from ~/.hermes/.anthropic_oauth.json."""
+    if _HERMES_OAUTH_FILE.exists():
+        try:
+            data = json.loads(_HERMES_OAUTH_FILE.read_text(encoding="utf-8"))
+            if data.get("accessToken"):
+                return data
+        except (json.JSONDecodeError, OSError, IOError) as e:
+            logger.debug("Failed to read Hermes OAuth credentials: %s", e)
+    return None
+
+
+def refresh_hermes_oauth_token() -> Optional[str]:
+    """Refresh the Hermes-managed OAuth token using the stored refresh token.
+
+    Returns the new access token, or None if refresh fails.
+    """
+    creds = read_hermes_oauth_credentials()
+    if not creds or not creds.get("refreshToken"):
+        return None
+
+    try:
+        refreshed = refresh_anthropic_oauth_pure(
+            creds["refreshToken"],
+            use_json=True,
+        )
+        _save_hermes_oauth_credentials(
+            refreshed["access_token"],
+            refreshed["refresh_token"],
+            refreshed["expires_at_ms"],
+        )
+        _write_claude_code_credentials(
+            refreshed["access_token"],
+            refreshed["refresh_token"],
+            refreshed["expires_at_ms"],
+        )
+        logger.debug("Successfully refreshed Hermes OAuth token")
+        return refreshed["access_token"]
+    except Exception as e:
+        logger.debug("Failed to refresh Hermes OAuth token: %s", e)
+
+    return None


 # ---------------------------------------------------------------------------
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@ -7,7 +7,7 @@ the best available backend without duplicating fallback logic.
 Resolution order for text tasks (auto mode):
  1. OpenRouter  (OPENROUTER_API_KEY)
  2. Nous Portal (~/.hermes/auth.json active provider)
-  3. Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY)
+  3. Custom endpoint (config.yaml model.base_url + OPENAI_API_KEY)
  4. Codex OAuth (Responses API via chatgpt.com with gpt-5.3-codex,
     wrapped to look like a chat.completions client)
  5. Native Anthropic
@ -47,7 +47,8 @@ from typing import Any, Dict, List, Optional, Tuple

 from openai import OpenAI

-from hermes_constants import OPENROUTER_BASE_URL, get_hermes_home
+from hermes_cli.config import get_hermes_home
+from hermes_constants import OPENROUTER_BASE_URL

 logger = logging.getLogger(__name__)

@ -95,6 +96,45 @@ _CODEX_AUX_MODEL = "gpt-5.2-codex"
 _CODEX_AUX_BASE_URL = "https://chatgpt.com/backend-api/codex"


+def _select_pool_entry(provider: str) -> Tuple[bool, Optional[Any]]:
+    """Return (pool_exists_for_provider, selected_entry)."""
+    try:
+        pool = load_pool(provider)
+    except Exception as exc:
+        logger.debug("Auxiliary client: could not load pool for %s: %s", provider, exc)
+        return False, None
+    if not pool or not pool.has_credentials():
+        return False, None
+    try:
+        return True, pool.select()
+    except Exception as exc:
+        logger.debug("Auxiliary client: could not select pool entry for %s: %s", provider, exc)
+        return True, None
+
+
+def _pool_runtime_api_key(entry: Any) -> str:
+    if entry is None:
+        return ""
+    # Use the PooledCredential.runtime_api_key property which handles
+    # provider-specific fallback (e.g. agent_key for nous).
+    key = getattr(entry, "runtime_api_key", None) or getattr(entry, "access_token", "")
+    return str(key or "").strip()
+
+
+def _pool_runtime_base_url(entry: Any, fallback: str = "") -> str:
+    if entry is None:
+        return str(fallback or "").strip().rstrip("/")
+    # runtime_base_url handles provider-specific logic (e.g. nous prefers inference_base_url).
+    # Fall back through inference_base_url and base_url for non-PooledCredential entries.
+    url = (
+        getattr(entry, "runtime_base_url", None)
+        or getattr(entry, "inference_base_url", None)
+        or getattr(entry, "base_url", None)
+        or fallback
+    )
+    return str(url or "").strip().rstrip("/")
+
+
 # ── Codex Responses → chat.completions adapter ─────────────────────────────
 # All auxiliary consumers call client.chat.completions.create(**kwargs) and
 # read response.choices[0].message.content. This adapter translates those
@ -438,6 +478,22 @@ def _read_nous_auth() -> Optional[dict]:
    Returns the provider state dict if Nous is active with tokens,
    otherwise None.
    """
+    pool_present, entry = _select_pool_entry("nous")
+    if pool_present:
+        if entry is None:
+            return None
+        return {
+            "access_token": getattr(entry, "access_token", ""),
+            "refresh_token": getattr(entry, "refresh_token", None),
+            "agent_key": getattr(entry, "agent_key", None),
+            "inference_base_url": _pool_runtime_base_url(entry, _NOUS_DEFAULT_BASE_URL),
+            "portal_base_url": getattr(entry, "portal_base_url", None),
+            "client_id": getattr(entry, "client_id", None),
+            "scope": getattr(entry, "scope", None),
+            "token_type": getattr(entry, "token_type", "Bearer"),
+            "source": "pool",
+        }
+
    try:
        if not _AUTH_JSON_PATH.is_file():
            return None
@ -466,6 +522,11 @@ def _nous_base_url() -> str:

 def _read_codex_access_token() -> Optional[str]:
    """Read a valid, non-expired Codex OAuth access token from Hermes auth store."""
+    pool_present, entry = _select_pool_entry("openai-codex")
+    if pool_present:
+        token = _pool_runtime_api_key(entry)
+        return token or None
+
    try:
        from hermes_cli.auth import _read_codex_tokens
        data = _read_codex_tokens()
@ -512,6 +573,24 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
        if provider_id == "anthropic":
            return _try_anthropic()

+        pool_present, entry = _select_pool_entry(provider_id)
+        if pool_present:
+            api_key = _pool_runtime_api_key(entry)
+            if not api_key:
+                continue
+
+            base_url = _pool_runtime_base_url(entry, pconfig.inference_base_url) or pconfig.inference_base_url
+            model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id, "default")
+            logger.debug("Auxiliary text client: %s (%s) via pool", pconfig.name, model)
+            extra = {}
+            if "api.kimi.com" in base_url.lower():
+                extra["default_headers"] = {"User-Agent": "KimiCLI/1.0"}
+            elif "api.githubcopilot.com" in base_url.lower():
+                from hermes_cli.models import copilot_default_headers
+
+                extra["default_headers"] = copilot_default_headers()
+            return OpenAI(api_key=api_key, base_url=base_url, **extra), model
+
        creds = resolve_api_key_provider_credentials(provider_id)
        api_key = str(creds.get("api_key", "")).strip()
        if not api_key:
@ -561,6 +640,16 @@ def _get_auxiliary_env_override(task: str, suffix: str) -> Optional[str]:


 def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]:
+    pool_present, entry = _select_pool_entry("openrouter")
+    if pool_present:
+        or_key = _pool_runtime_api_key(entry)
+        if not or_key:
+            return None, None
+        base_url = _pool_runtime_base_url(entry, OPENROUTER_BASE_URL) or OPENROUTER_BASE_URL
+        logger.debug("Auxiliary client: OpenRouter via pool")
+        return OpenAI(api_key=or_key, base_url=base_url,
+                       default_headers=_OR_HEADERS), _OPENROUTER_MODEL
+
    or_key = os.getenv("OPENROUTER_API_KEY")
    if not or_key:
        return None, None
@ -576,22 +665,22 @@ def _try_nous() -> Tuple[Optional[OpenAI], Optional[str]]:
    global auxiliary_is_nous
    auxiliary_is_nous = True
    logger.debug("Auxiliary client: Nous Portal")
+    model = "gemini-3-flash" if nous.get("source") == "pool" else _NOUS_MODEL
    return (
-        OpenAI(api_key=_nous_api_key(nous), base_url=_nous_base_url()),
-        _NOUS_MODEL,
+        OpenAI(
+            api_key=_nous_api_key(nous),
+            base_url=str(nous.get("inference_base_url") or _nous_base_url()).rstrip("/"),
+        ),
+        model,
    )


 def _read_main_model() -> str:
-    """Read the user's configured main model from config/env.
+    """Read the user's configured main model from config.yaml.

-    Falls back through HERMES_MODEL → LLM_MODEL → config.yaml model.default
-    so the auxiliary client can use the same model as the main agent when no
-    dedicated auxiliary model is available.
+    config.yaml model.default is the single source of truth for the active
+    model. Environment variables are no longer consulted.
    """
-    from_env = os.getenv("OPENAI_MODEL") or os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL")
-    if from_env:
-        return from_env.strip()
    try:
        from hermes_cli.config import load_config
        cfg = load_config()
@ -658,11 +747,19 @@ def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]:


 def _try_codex() -> Tuple[Optional[Any], Optional[str]]:
-    codex_token = _read_codex_access_token()
-    if not codex_token:
-        return None, None
+    pool_present, entry = _select_pool_entry("openai-codex")
+    if pool_present:
+        codex_token = _pool_runtime_api_key(entry)
+        if not codex_token:
+            return None, None
+        base_url = _pool_runtime_base_url(entry, _CODEX_AUX_BASE_URL) or _CODEX_AUX_BASE_URL
+    else:
+        codex_token = _read_codex_access_token()
+        if not codex_token:
+            return None, None
+        base_url = _CODEX_AUX_BASE_URL
    logger.debug("Auxiliary client: Codex OAuth (%s via Responses API)", _CODEX_AUX_MODEL)
-    real_client = OpenAI(api_key=codex_token, base_url=_CODEX_AUX_BASE_URL)
+    real_client = OpenAI(api_key=codex_token, base_url=base_url)
    return CodexAuxiliaryClient(real_client, _CODEX_AUX_MODEL), _CODEX_AUX_MODEL


@ -672,14 +769,21 @@ def _try_anthropic() -> Tuple[Optional[Any], Optional[str]]:
    except ImportError:
        return None, None

-    token = resolve_anthropic_token()
+    pool_present, entry = _select_pool_entry("anthropic")
+    if pool_present:
+        if entry is None:
+            return None, None
+        token = _pool_runtime_api_key(entry)
+    else:
+        entry = None
+        token = resolve_anthropic_token()
    if not token:
        return None, None

    # Allow base URL override from config.yaml model.base_url, but only
    # when the configured provider is anthropic — otherwise a non-Anthropic
    # base_url (e.g. Codex endpoint) would leak into Anthropic requests.
-    base_url = _ANTHROPIC_DEFAULT_BASE_URL
+    base_url = _pool_runtime_base_url(entry, _ANTHROPIC_DEFAULT_BASE_URL) if pool_present else _ANTHROPIC_DEFAULT_BASE_URL
    try:
        from hermes_cli.config import load_config
        cfg = load_config()
--- a/agent/context_references.py
+++ b/agent/context_references.py
@ -17,7 +17,7 @@ REFERENCE_PATTERN = re.compile(
    r"(?<![\w/])@(?:(?P<simple>diff|staged)\b|(?P<kind>file|folder|git|url):(?P<value>\S+))"
 )
 TRAILING_PUNCTUATION = ",.;!?"
-_SENSITIVE_HOME_DIRS = (".ssh", ".aws", ".gnupg", ".kube")
+_SENSITIVE_HOME_DIRS = (".ssh", ".aws", ".gnupg", ".kube", ".docker", ".azure", ".config/gh")
 _SENSITIVE_HERMES_DIRS = (Path("skills") / ".hub",)
 _SENSITIVE_HOME_FILES = (
    Path(".ssh") / "authorized_keys",
--- a/agent/credential_pool.py
+++ b/agent/credential_pool.py
@ -0,0 +1,848 @@
+"""Persistent multi-credential pool for same-provider failover."""
+
+from __future__ import annotations
+
+import logging
+import random
+import threading
+import time
+import uuid
+import os
+from dataclasses import dataclass, fields, replace
+from typing import Any, Dict, List, Optional, Set, Tuple
+
+from hermes_constants import OPENROUTER_BASE_URL
+import hermes_cli.auth as auth_mod
+from hermes_cli.auth import (
+    ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
+    CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
+    DEFAULT_AGENT_KEY_MIN_TTL_SECONDS,
+    PROVIDER_REGISTRY,
+    _agent_key_is_usable,
+    _codex_access_token_is_expiring,
+    _decode_jwt_claims,
+    _is_expiring,
+    _load_auth_store,
+    _load_provider_state,
+    read_credential_pool,
+    write_credential_pool,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def _load_config_safe() -> Optional[dict]:
+    """Load config.yaml, returning None on any error."""
+    try:
+        from hermes_cli.config import load_config
+
+        return load_config()
+    except Exception:
+        return None
+
+
+# --- Status and type constants ---
+
+STATUS_OK = "ok"
+STATUS_EXHAUSTED = "exhausted"
+
+AUTH_TYPE_OAUTH = "oauth"
+AUTH_TYPE_API_KEY = "api_key"
+
+SOURCE_MANUAL = "manual"
+
+STRATEGY_FILL_FIRST = "fill_first"
+STRATEGY_ROUND_ROBIN = "round_robin"
+STRATEGY_RANDOM = "random"
+STRATEGY_LEAST_USED = "least_used"
+SUPPORTED_POOL_STRATEGIES = {
+    STRATEGY_FILL_FIRST,
+    STRATEGY_ROUND_ROBIN,
+    STRATEGY_RANDOM,
+    STRATEGY_LEAST_USED,
+}
+
+# Cooldown before retrying an exhausted credential.
+# 429 (rate-limited) cools down faster since quotas reset frequently.
+# 402 (billing/quota) and other codes use a longer default.
+EXHAUSTED_TTL_429_SECONDS = 60 * 60          # 1 hour
+EXHAUSTED_TTL_DEFAULT_SECONDS = 24 * 60 * 60 # 24 hours
+
+# Pool key prefix for custom OpenAI-compatible endpoints.
+# Custom endpoints all share provider='custom' but are keyed by their
+# custom_providers name: 'custom:<normalized_name>'.
+CUSTOM_POOL_PREFIX = "custom:"
+
+
+# Fields that are only round-tripped through JSON — never used for logic as attributes.
+_EXTRA_KEYS = frozenset({
+    "token_type", "scope", "client_id", "portal_base_url", "obtained_at",
+    "expires_in", "agent_key_id", "agent_key_expires_in", "agent_key_reused",
+    "agent_key_obtained_at", "tls",
+})
+
+
+@dataclass
+class PooledCredential:
+    provider: str
+    id: str
+    label: str
+    auth_type: str
+    priority: int
+    source: str
+    access_token: str
+    refresh_token: Optional[str] = None
+    last_status: Optional[str] = None
+    last_status_at: Optional[float] = None
+    last_error_code: Optional[int] = None
+    base_url: Optional[str] = None
+    expires_at: Optional[str] = None
+    expires_at_ms: Optional[int] = None
+    last_refresh: Optional[str] = None
+    inference_base_url: Optional[str] = None
+    agent_key: Optional[str] = None
+    agent_key_expires_at: Optional[str] = None
+    request_count: int = 0
+    extra: Dict[str, Any] = None  # type: ignore[assignment]
+
+    def __post_init__(self):
+        if self.extra is None:
+            self.extra = {}
+
+    def __getattr__(self, name: str):
+        if name in _EXTRA_KEYS:
+            return self.extra.get(name)
+        raise AttributeError(f"'{type(self).__name__}' object has no attribute {name!r}")
+
+    @classmethod
+    def from_dict(cls, provider: str, payload: Dict[str, Any]) -> "PooledCredential":
+        field_names = {f.name for f in fields(cls) if f.name != "provider"}
+        data = {k: payload.get(k) for k in field_names if k in payload}
+        extra = {k: payload[k] for k in _EXTRA_KEYS if k in payload and payload[k] is not None}
+        data["extra"] = extra
+        data.setdefault("id", uuid.uuid4().hex[:6])
+        data.setdefault("label", payload.get("source", provider))
+        data.setdefault("auth_type", AUTH_TYPE_API_KEY)
+        data.setdefault("priority", 0)
+        data.setdefault("source", SOURCE_MANUAL)
+        data.setdefault("access_token", "")
+        return cls(provider=provider, **data)
+
+    def to_dict(self) -> Dict[str, Any]:
+        _ALWAYS_EMIT = {"last_status", "last_status_at", "last_error_code"}
+        result: Dict[str, Any] = {}
+        for field_def in fields(self):
+            if field_def.name in ("provider", "extra"):
+                continue
+            value = getattr(self, field_def.name)
+            if value is not None or field_def.name in _ALWAYS_EMIT:
+                result[field_def.name] = value
+        for k, v in self.extra.items():
+            if v is not None:
+                result[k] = v
+        return result
+
+    @property
+    def runtime_api_key(self) -> str:
+        if self.provider == "nous":
+            return str(self.agent_key or self.access_token or "")
+        return str(self.access_token or "")
+
+    @property
+    def runtime_base_url(self) -> Optional[str]:
+        if self.provider == "nous":
+            return self.inference_base_url or self.base_url
+        return self.base_url
+
+
+def label_from_token(token: str, fallback: str) -> str:
+    claims = _decode_jwt_claims(token)
+    for key in ("email", "preferred_username", "upn"):
+        value = claims.get(key)
+        if isinstance(value, str) and value.strip():
+            return value.strip()
+    return fallback
+
+
+def _next_priority(entries: List[PooledCredential]) -> int:
+    return max((entry.priority for entry in entries), default=-1) + 1
+
+
+def _is_manual_source(source: str) -> bool:
+    normalized = (source or "").strip().lower()
+    return normalized == SOURCE_MANUAL or normalized.startswith(f"{SOURCE_MANUAL}:")
+
+
+def _exhausted_ttl(error_code: Optional[int]) -> int:
+    """Return cooldown seconds based on the HTTP status that caused exhaustion."""
+    if error_code == 429:
+        return EXHAUSTED_TTL_429_SECONDS
+    return EXHAUSTED_TTL_DEFAULT_SECONDS
+
+
+def _normalize_custom_pool_name(name: str) -> str:
+    """Normalize a custom provider name for use as a pool key suffix."""
+    return name.strip().lower().replace(" ", "-")
+
+
+def _iter_custom_providers(config: Optional[dict] = None):
+    """Yield (normalized_name, entry_dict) for each valid custom_providers entry."""
+    if config is None:
+        config = _load_config_safe()
+    if config is None:
+        return
+    custom_providers = config.get("custom_providers")
+    if not isinstance(custom_providers, list):
+        return
+    for entry in custom_providers:
+        if not isinstance(entry, dict):
+            continue
+        name = entry.get("name")
+        if not isinstance(name, str):
+            continue
+        yield _normalize_custom_pool_name(name), entry
+
+
+def get_custom_provider_pool_key(base_url: str) -> Optional[str]:
+    """Look up the custom_providers list in config.yaml and return 'custom:<name>' for a matching base_url.
+
+    Returns None if no match is found.
+    """
+    if not base_url:
+        return None
+    normalized_url = base_url.strip().rstrip("/")
+    for norm_name, entry in _iter_custom_providers():
+        entry_url = str(entry.get("base_url") or "").strip().rstrip("/")
+        if entry_url and entry_url == normalized_url:
+            return f"{CUSTOM_POOL_PREFIX}{norm_name}"
+    return None
+
+
+def list_custom_pool_providers() -> List[str]:
+    """Return all 'custom:*' pool keys that have entries in auth.json."""
+    pool_data = read_credential_pool(None)
+    return sorted(
+        key for key in pool_data
+        if key.startswith(CUSTOM_POOL_PREFIX)
+        and isinstance(pool_data.get(key), list)
+        and pool_data[key]
+    )
+
+
+def _get_custom_provider_config(pool_key: str) -> Optional[Dict[str, Any]]:
+    """Return the custom_providers config entry matching a pool key like 'custom:together.ai'."""
+    if not pool_key.startswith(CUSTOM_POOL_PREFIX):
+        return None
+    suffix = pool_key[len(CUSTOM_POOL_PREFIX):]
+    for norm_name, entry in _iter_custom_providers():
+        if norm_name == suffix:
+            return entry
+    return None
+
+
+def get_pool_strategy(provider: str) -> str:
+    """Return the configured selection strategy for a provider."""
+    config = _load_config_safe()
+    if config is None:
+        return STRATEGY_FILL_FIRST
+
+    strategies = config.get("credential_pool_strategies")
+    if not isinstance(strategies, dict):
+        return STRATEGY_FILL_FIRST
+
+    strategy = str(strategies.get(provider, "") or "").strip().lower()
+    if strategy in SUPPORTED_POOL_STRATEGIES:
+        return strategy
+    return STRATEGY_FILL_FIRST
+
+
+class CredentialPool:
+    def __init__(self, provider: str, entries: List[PooledCredential]):
+        self.provider = provider
+        self._entries = sorted(entries, key=lambda entry: entry.priority)
+        self._current_id: Optional[str] = None
+        self._strategy = get_pool_strategy(provider)
+        self._lock = threading.Lock()
+
+    def has_credentials(self) -> bool:
+        return bool(self._entries)
+
+    def has_available(self) -> bool:
+        """True if at least one entry is not currently in exhaustion cooldown."""
+        return bool(self._available_entries())
+
+    def entries(self) -> List[PooledCredential]:
+        return list(self._entries)
+
+    def current(self) -> Optional[PooledCredential]:
+        if not self._current_id:
+            return None
+        return next((entry for entry in self._entries if entry.id == self._current_id), None)
+
+    def _replace_entry(self, old: PooledCredential, new: PooledCredential) -> None:
+        """Swap an entry in-place by id, preserving sort order."""
+        for idx, entry in enumerate(self._entries):
+            if entry.id == old.id:
+                self._entries[idx] = new
+                return
+
+    def _persist(self) -> None:
+        write_credential_pool(
+            self.provider,
+            [entry.to_dict() for entry in self._entries],
+        )
+
+    def _mark_exhausted(self, entry: PooledCredential, status_code: Optional[int]) -> PooledCredential:
+        updated = replace(
+            entry,
+            last_status=STATUS_EXHAUSTED,
+            last_status_at=time.time(),
+            last_error_code=status_code,
+        )
+        self._replace_entry(entry, updated)
+        self._persist()
+        return updated
+
+    def _refresh_entry(self, entry: PooledCredential, *, force: bool) -> Optional[PooledCredential]:
+        if entry.auth_type != AUTH_TYPE_OAUTH or not entry.refresh_token:
+            if force:
+                self._mark_exhausted(entry, None)
+            return None
+
+        try:
+            if self.provider == "anthropic":
+                from agent.anthropic_adapter import refresh_anthropic_oauth_pure
+
+                refreshed = refresh_anthropic_oauth_pure(
+                    entry.refresh_token,
+                    use_json=entry.source.endswith("hermes_pkce"),
+                )
+                updated = replace(
+                    entry,
+                    access_token=refreshed["access_token"],
+                    refresh_token=refreshed["refresh_token"],
+                    expires_at_ms=refreshed["expires_at_ms"],
+                )
+            elif self.provider == "openai-codex":
+                refreshed = auth_mod.refresh_codex_oauth_pure(
+                    entry.access_token,
+                    entry.refresh_token,
+                )
+                updated = replace(
+                    entry,
+                    access_token=refreshed["access_token"],
+                    refresh_token=refreshed["refresh_token"],
+                    last_refresh=refreshed.get("last_refresh"),
+                )
+            elif self.provider == "nous":
+                nous_state = {
+                    "access_token": entry.access_token,
+                    "refresh_token": entry.refresh_token,
+                    "client_id": entry.client_id,
+                    "portal_base_url": entry.portal_base_url,
+                    "inference_base_url": entry.inference_base_url,
+                    "token_type": entry.token_type,
+                    "scope": entry.scope,
+                    "obtained_at": entry.obtained_at,
+                    "expires_at": entry.expires_at,
+                    "agent_key": entry.agent_key,
+                    "agent_key_expires_at": entry.agent_key_expires_at,
+                    "tls": entry.tls,
+                }
+                refreshed = auth_mod.refresh_nous_oauth_from_state(
+                    nous_state,
+                    min_key_ttl_seconds=DEFAULT_AGENT_KEY_MIN_TTL_SECONDS,
+                    force_refresh=force,
+                    force_mint=force,
+                )
+                # Apply returned fields: dataclass fields via replace, extras via dict update
+                field_updates = {}
+                extra_updates = dict(entry.extra)
+                _field_names = {f.name for f in fields(entry)}
+                for k, v in refreshed.items():
+                    if k in _field_names:
+                        field_updates[k] = v
+                    elif k in _EXTRA_KEYS:
+                        extra_updates[k] = v
+                updated = replace(entry, extra=extra_updates, **field_updates)
+            else:
+                return entry
+        except Exception as exc:
+            logger.debug("Credential refresh failed for %s/%s: %s", self.provider, entry.id, exc)
+            self._mark_exhausted(entry, None)
+            return None
+
+        updated = replace(updated, last_status=STATUS_OK, last_status_at=None, last_error_code=None)
+        self._replace_entry(entry, updated)
+        self._persist()
+        return updated
+
+    def _entry_needs_refresh(self, entry: PooledCredential) -> bool:
+        if entry.auth_type != AUTH_TYPE_OAUTH:
+            return False
+        if self.provider == "anthropic":
+            if entry.expires_at_ms is None:
+                return False
+            return int(entry.expires_at_ms) <= int(time.time() * 1000) + 120_000
+        if self.provider == "openai-codex":
+            return _codex_access_token_is_expiring(
+                entry.access_token,
+                CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
+            )
+        if self.provider == "nous":
+            # Nous refresh/mint can require network access and should happen when
+            # runtime credentials are actually resolved, not merely when the pool
+            # is enumerated for listing, migration, or selection.
+            return False
+        return False
+
+    def mark_used(self, entry_id: Optional[str] = None) -> None:
+        """Increment request_count for tracking. Used by least_used strategy."""
+        target_id = entry_id or self._current_id
+        if not target_id:
+            return
+        with self._lock:
+            for idx, entry in enumerate(self._entries):
+                if entry.id == target_id:
+                    self._entries[idx] = replace(entry, request_count=entry.request_count + 1)
+                    return
+
+    def select(self) -> Optional[PooledCredential]:
+        with self._lock:
+            return self._select_unlocked()
+
+    def _available_entries(self, *, clear_expired: bool = False, refresh: bool = False) -> List[PooledCredential]:
+        """Return entries not currently in exhaustion cooldown.
+
+        When *clear_expired* is True, entries whose cooldown has elapsed are
+        reset to STATUS_OK and persisted.  When *refresh* is True, entries
+        that need a token refresh are refreshed (skipped on failure).
+        """
+        now = time.time()
+        cleared_any = False
+        available: List[PooledCredential] = []
+        for entry in self._entries:
+            if entry.last_status == STATUS_EXHAUSTED:
+                ttl = _exhausted_ttl(entry.last_error_code)
+                if entry.last_status_at and now - entry.last_status_at < ttl:
+                    continue
+                if clear_expired:
+                    cleared = replace(entry, last_status=STATUS_OK, last_status_at=None, last_error_code=None)
+                    self._replace_entry(entry, cleared)
+                    entry = cleared
+                    cleared_any = True
+            if refresh and self._entry_needs_refresh(entry):
+                refreshed = self._refresh_entry(entry, force=False)
+                if refreshed is None:
+                    continue
+                entry = refreshed
+            available.append(entry)
+        if cleared_any:
+            self._persist()
+        return available
+
+    def _select_unlocked(self) -> Optional[PooledCredential]:
+        available = self._available_entries(clear_expired=True, refresh=True)
+        if not available:
+            self._current_id = None
+            return None
+
+        if self._strategy == STRATEGY_RANDOM:
+            entry = random.choice(available)
+            self._current_id = entry.id
+            return entry
+
+        if self._strategy == STRATEGY_LEAST_USED and len(available) > 1:
+            entry = min(available, key=lambda e: e.request_count)
+            self._current_id = entry.id
+            return entry
+
+        if self._strategy == STRATEGY_ROUND_ROBIN and len(available) > 1:
+            entry = available[0]
+            rotated = [candidate for candidate in self._entries if candidate.id != entry.id]
+            rotated.append(replace(entry, priority=len(self._entries) - 1))
+            self._entries = [replace(candidate, priority=idx) for idx, candidate in enumerate(rotated)]
+            self._persist()
+            self._current_id = entry.id
+            return self.current() or entry
+
+        entry = available[0]
+        self._current_id = entry.id
+        return entry
+
+    def peek(self) -> Optional[PooledCredential]:
+        current = self.current()
+        if current is not None:
+            return current
+        available = self._available_entries()
+        return available[0] if available else None
+
+    def mark_exhausted_and_rotate(self, *, status_code: Optional[int]) -> Optional[PooledCredential]:
+        with self._lock:
+            entry = self.current() or self._select_unlocked()
+            if entry is None:
+                return None
+            self._mark_exhausted(entry, status_code)
+            self._current_id = None
+            return self._select_unlocked()
+
+    def try_refresh_current(self) -> Optional[PooledCredential]:
+        with self._lock:
+            return self._try_refresh_current_unlocked()
+
+    def _try_refresh_current_unlocked(self) -> Optional[PooledCredential]:
+        entry = self.current()
+        if entry is None:
+            return None
+        refreshed = self._refresh_entry(entry, force=True)
+        if refreshed is not None:
+            self._current_id = refreshed.id
+        return refreshed
+
+    def reset_statuses(self) -> int:
+        count = 0
+        new_entries = []
+        for entry in self._entries:
+            if entry.last_status or entry.last_status_at or entry.last_error_code:
+                new_entries.append(replace(entry, last_status=None, last_status_at=None, last_error_code=None))
+                count += 1
+            else:
+                new_entries.append(entry)
+        if count:
+            self._entries = new_entries
+            self._persist()
+        return count
+
+    def remove_index(self, index: int) -> Optional[PooledCredential]:
+        if index < 1 or index > len(self._entries):
+            return None
+        removed = self._entries.pop(index - 1)
+        self._entries = [
+            replace(entry, priority=new_priority)
+            for new_priority, entry in enumerate(self._entries)
+        ]
+        self._persist()
+        if self._current_id == removed.id:
+            self._current_id = None
+        return removed
+
+    def add_entry(self, entry: PooledCredential) -> PooledCredential:
+        entry = replace(entry, priority=_next_priority(self._entries))
+        self._entries.append(entry)
+        self._persist()
+        return entry
+
+
+def _upsert_entry(entries: List[PooledCredential], provider: str, source: str, payload: Dict[str, Any]) -> bool:
+    existing_idx = None
+    for idx, entry in enumerate(entries):
+        if entry.source == source:
+            existing_idx = idx
+            break
+
+    if existing_idx is None:
+        payload.setdefault("id", uuid.uuid4().hex[:6])
+        payload.setdefault("priority", _next_priority(entries))
+        payload.setdefault("label", payload.get("label") or source)
+        entries.append(PooledCredential.from_dict(provider, payload))
+        return True
+
+    existing = entries[existing_idx]
+    field_updates = {}
+    extra_updates = {}
+    _field_names = {f.name for f in fields(existing)}
+    for key, value in payload.items():
+        if key in {"id", "priority"} or value is None:
+            continue
+        if key == "label" and existing.label:
+            continue
+        if key in _field_names:
+            if getattr(existing, key) != value:
+                field_updates[key] = value
+        elif key in _EXTRA_KEYS:
+            if existing.extra.get(key) != value:
+                extra_updates[key] = value
+    if field_updates or extra_updates:
+        if extra_updates:
+            field_updates["extra"] = {**existing.extra, **extra_updates}
+        entries[existing_idx] = replace(existing, **field_updates)
+        return True
+    return False
+
+
+def _normalize_pool_priorities(provider: str, entries: List[PooledCredential]) -> bool:
+    if provider != "anthropic":
+        return False
+
+    source_rank = {
+        "env:ANTHROPIC_TOKEN": 0,
+        "env:CLAUDE_CODE_OAUTH_TOKEN": 1,
+        "hermes_pkce": 2,
+        "claude_code": 3,
+        "env:ANTHROPIC_API_KEY": 4,
+    }
+    manual_entries = sorted(
+        (entry for entry in entries if _is_manual_source(entry.source)),
+        key=lambda entry: entry.priority,
+    )
+    seeded_entries = sorted(
+        (entry for entry in entries if not _is_manual_source(entry.source)),
+        key=lambda entry: (
+            source_rank.get(entry.source, len(source_rank)),
+            entry.priority,
+            entry.label,
+        ),
+    )
+
+    ordered = [*manual_entries, *seeded_entries]
+    id_to_idx = {entry.id: idx for idx, entry in enumerate(entries)}
+    changed = False
+    for new_priority, entry in enumerate(ordered):
+        if entry.priority != new_priority:
+            entries[id_to_idx[entry.id]] = replace(entry, priority=new_priority)
+            changed = True
+    return changed
+
+
+def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tuple[bool, Set[str]]:
+    changed = False
+    active_sources: Set[str] = set()
+    auth_store = _load_auth_store()
+
+    if provider == "anthropic":
+        from agent.anthropic_adapter import read_claude_code_credentials, read_hermes_oauth_credentials
+
+        for source_name, creds in (
+            ("hermes_pkce", read_hermes_oauth_credentials()),
+            ("claude_code", read_claude_code_credentials()),
+        ):
+            if creds and creds.get("accessToken"):
+                active_sources.add(source_name)
+                changed |= _upsert_entry(
+                    entries,
+                    provider,
+                    source_name,
+                    {
+                        "source": source_name,
+                        "auth_type": AUTH_TYPE_OAUTH,
+                        "access_token": creds.get("accessToken", ""),
+                        "refresh_token": creds.get("refreshToken"),
+                        "expires_at_ms": creds.get("expiresAt"),
+                        "label": label_from_token(creds.get("accessToken", ""), source_name),
+                    },
+                )
+
+    elif provider == "nous":
+        state = _load_provider_state(auth_store, "nous")
+        if state:
+            active_sources.add("device_code")
+            changed |= _upsert_entry(
+                entries,
+                provider,
+                "device_code",
+                {
+                    "source": "device_code",
+                    "auth_type": AUTH_TYPE_OAUTH,
+                    "access_token": state.get("access_token", ""),
+                    "refresh_token": state.get("refresh_token"),
+                    "expires_at": state.get("expires_at"),
+                    "token_type": state.get("token_type"),
+                    "scope": state.get("scope"),
+                    "client_id": state.get("client_id"),
+                    "portal_base_url": state.get("portal_base_url"),
+                    "inference_base_url": state.get("inference_base_url"),
+                    "agent_key": state.get("agent_key"),
+                    "agent_key_expires_at": state.get("agent_key_expires_at"),
+                    "tls": state.get("tls") if isinstance(state.get("tls"), dict) else None,
+                    "label": label_from_token(state.get("access_token", ""), "device_code"),
+                },
+            )
+
+    elif provider == "openai-codex":
+        state = _load_provider_state(auth_store, "openai-codex")
+        tokens = state.get("tokens") if isinstance(state, dict) else None
+        if isinstance(tokens, dict) and tokens.get("access_token"):
+            active_sources.add("device_code")
+            changed |= _upsert_entry(
+                entries,
+                provider,
+                "device_code",
+                {
+                    "source": "device_code",
+                    "auth_type": AUTH_TYPE_OAUTH,
+                    "access_token": tokens.get("access_token", ""),
+                    "refresh_token": tokens.get("refresh_token"),
+                    "base_url": "https://chatgpt.com/backend-api/codex",
+                    "last_refresh": state.get("last_refresh"),
+                    "label": label_from_token(tokens.get("access_token", ""), "device_code"),
+                },
+            )
+
+    return changed, active_sources
+
+
+def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool, Set[str]]:
+    changed = False
+    active_sources: Set[str] = set()
+    if provider == "openrouter":
+        token = os.getenv("OPENROUTER_API_KEY", "").strip()
+        if token:
+            source = "env:OPENROUTER_API_KEY"
+            active_sources.add(source)
+            changed |= _upsert_entry(
+                entries,
+                provider,
+                source,
+                {
+                    "source": source,
+                    "auth_type": AUTH_TYPE_API_KEY,
+                    "access_token": token,
+                    "base_url": OPENROUTER_BASE_URL,
+                    "label": "OPENROUTER_API_KEY",
+                },
+            )
+        return changed, active_sources
+
+    pconfig = PROVIDER_REGISTRY.get(provider)
+    if not pconfig or pconfig.auth_type != AUTH_TYPE_API_KEY:
+        return changed, active_sources
+
+    env_url = ""
+    if pconfig.base_url_env_var:
+        env_url = os.getenv(pconfig.base_url_env_var, "").strip().rstrip("/")
+
+    env_vars = list(pconfig.api_key_env_vars)
+    if provider == "anthropic":
+        env_vars = [
+            "ANTHROPIC_TOKEN",
+            "CLAUDE_CODE_OAUTH_TOKEN",
+            "ANTHROPIC_API_KEY",
+        ]
+
+    for env_var in env_vars:
+        token = os.getenv(env_var, "").strip()
+        if not token:
+            continue
+        source = f"env:{env_var}"
+        active_sources.add(source)
+        auth_type = AUTH_TYPE_OAUTH if provider == "anthropic" and not token.startswith("sk-ant-api") else AUTH_TYPE_API_KEY
+        base_url = env_url or pconfig.inference_base_url
+        changed |= _upsert_entry(
+            entries,
+            provider,
+            source,
+            {
+                "source": source,
+                "auth_type": auth_type,
+                "access_token": token,
+                "base_url": base_url,
+                "label": env_var,
+            },
+        )
+    return changed, active_sources
+
+
+def _prune_stale_seeded_entries(entries: List[PooledCredential], active_sources: Set[str]) -> bool:
+    retained = [
+        entry
+        for entry in entries
+        if _is_manual_source(entry.source)
+        or entry.source in active_sources
+        or not (
+            entry.source.startswith("env:")
+            or entry.source in {"claude_code", "hermes_pkce"}
+        )
+    ]
+    if len(retained) == len(entries):
+        return False
+    entries[:] = retained
+    return True
+
+
+def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[bool, Set[str]]:
+    """Seed a custom endpoint pool from custom_providers config and model config."""
+    changed = False
+    active_sources: Set[str] = set()
+
+    # Seed from the custom_providers config entry's api_key field
+    cp_config = _get_custom_provider_config(pool_key)
+    if cp_config:
+        api_key = str(cp_config.get("api_key") or "").strip()
+        base_url = str(cp_config.get("base_url") or "").strip().rstrip("/")
+        name = str(cp_config.get("name") or "").strip()
+        if api_key:
+            source = f"config:{name}"
+            active_sources.add(source)
+            changed |= _upsert_entry(
+                entries,
+                pool_key,
+                source,
+                {
+                    "source": source,
+                    "auth_type": AUTH_TYPE_API_KEY,
+                    "access_token": api_key,
+                    "base_url": base_url,
+                    "label": name or source,
+                },
+            )
+
+    # Seed from model.api_key if model.provider=='custom' and model.base_url matches
+    try:
+        config = _load_config_safe()
+        model_cfg = config.get("model") if config else None
+        if isinstance(model_cfg, dict):
+            model_provider = str(model_cfg.get("provider") or "").strip().lower()
+            model_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/")
+            model_api_key = ""
+            for k in ("api_key", "api"):
+                v = model_cfg.get(k)
+                if isinstance(v, str) and v.strip():
+                    model_api_key = v.strip()
+                    break
+            if model_provider == "custom" and model_base_url and model_api_key:
+                # Check if this model's base_url matches our custom provider
+                matched_key = get_custom_provider_pool_key(model_base_url)
+                if matched_key == pool_key:
+                    source = "model_config"
+                    active_sources.add(source)
+                    changed |= _upsert_entry(
+                        entries,
+                        pool_key,
+                        source,
+                        {
+                            "source": source,
+                            "auth_type": AUTH_TYPE_API_KEY,
+                            "access_token": model_api_key,
+                            "base_url": model_base_url,
+                            "label": "model_config",
+                        },
+                    )
+    except Exception:
+        pass
+
+    return changed, active_sources
+
+
+def load_pool(provider: str) -> CredentialPool:
+    provider = (provider or "").strip().lower()
+    raw_entries = read_credential_pool(provider)
+    entries = [PooledCredential.from_dict(provider, payload) for payload in raw_entries]
+
+    if provider.startswith(CUSTOM_POOL_PREFIX):
+        # Custom endpoint pool — seed from custom_providers config and model config
+        custom_changed, custom_sources = _seed_custom_pool(provider, entries)
+        changed = custom_changed
+        changed |= _prune_stale_seeded_entries(entries, custom_sources)
+    else:
+        singleton_changed, singleton_sources = _seed_from_singletons(provider, entries)
+        env_changed, env_sources = _seed_from_env(provider, entries)
+        changed = singleton_changed or env_changed
+        changed |= _prune_stale_seeded_entries(entries, singleton_sources | env_sources)
+        changed |= _normalize_pool_priorities(provider, entries)
+
+    if changed:
+        write_credential_pool(
+            provider,
+            [entry.to_dict() for entry in sorted(entries, key=lambda item: item.priority)],
+        )
+    return CredentialPool(provider, entries)
--- a/agent/display.py
+++ b/agent/display.py
@ -10,6 +10,9 @@ import os
 import sys
 import threading
 import time
+from dataclasses import dataclass, field
+from difflib import unified_diff
+from pathlib import Path

 # ANSI escape codes for coloring tool failure indicators
 _RED = "\033[31m"
@ -17,6 +20,22 @@ _RESET = "\033[0m"

 logger = logging.getLogger(__name__)

+_ANSI_RESET = "\033[0m"
+_ANSI_DIM = "\033[38;2;150;150;150m"
+_ANSI_FILE = "\033[38;2;180;160;255m"
+_ANSI_HUNK = "\033[38;2;120;120;140m"
+_ANSI_MINUS = "\033[38;2;255;255;255;48;2;120;20;20m"
+_ANSI_PLUS = "\033[38;2;255;255;255;48;2;20;90;20m"
+_MAX_INLINE_DIFF_FILES = 6
+_MAX_INLINE_DIFF_LINES = 80
+
+
+@dataclass
+class LocalEditSnapshot:
+    """Pre-tool filesystem snapshot used to render diffs locally after writes."""
+    paths: list[Path] = field(default_factory=list)
+    before: dict[str, str | None] = field(default_factory=dict)
+
 # =========================================================================
 # Configurable tool preview length (0 = no limit)
 # Set once at startup by CLI or gateway from display.tool_preview_length config.
@ -218,6 +237,300 @@ def build_tool_preview(tool_name: str, args: dict, max_len: int | None = None) -
    return preview


+# =========================================================================
+# Inline diff previews for write actions
+# =========================================================================
+
+def _resolved_path(path: str) -> Path:
+    """Resolve a possibly-relative filesystem path against the current cwd."""
+    candidate = Path(os.path.expanduser(path))
+    if candidate.is_absolute():
+        return candidate
+    return Path.cwd() / candidate
+
+
+def _snapshot_text(path: Path) -> str | None:
+    """Return UTF-8 file content, or None for missing/unreadable files."""
+    try:
+        return path.read_text(encoding="utf-8")
+    except (FileNotFoundError, IsADirectoryError, UnicodeDecodeError, OSError):
+        return None
+
+
+def _display_diff_path(path: Path) -> str:
+    """Prefer cwd-relative paths in diffs when available."""
+    try:
+        return str(path.resolve().relative_to(Path.cwd().resolve()))
+    except Exception:
+        return str(path)
+
+
+def _resolve_skill_manage_paths(args: dict) -> list[Path]:
+    """Resolve skill_manage write targets to filesystem paths."""
+    action = args.get("action")
+    name = args.get("name")
+    if not action or not name:
+        return []
+
+    from tools.skill_manager_tool import _find_skill, _resolve_skill_dir
+
+    if action == "create":
+        skill_dir = _resolve_skill_dir(name, args.get("category"))
+        return [skill_dir / "SKILL.md"]
+
+    existing = _find_skill(name)
+    if not existing:
+        return []
+
+    skill_dir = Path(existing["path"])
+    if action in {"edit", "patch"}:
+        file_path = args.get("file_path")
+        return [skill_dir / file_path] if file_path else [skill_dir / "SKILL.md"]
+    if action in {"write_file", "remove_file"}:
+        file_path = args.get("file_path")
+        return [skill_dir / file_path] if file_path else []
+    if action == "delete":
+        files = [path for path in sorted(skill_dir.rglob("*")) if path.is_file()]
+        return files
+    return []
+
+
+def _resolve_local_edit_paths(tool_name: str, function_args: dict | None) -> list[Path]:
+    """Resolve local filesystem targets for write-capable tools."""
+    if not isinstance(function_args, dict):
+        return []
+
+    if tool_name == "write_file":
+        path = function_args.get("path")
+        return [_resolved_path(path)] if path else []
+
+    if tool_name == "patch":
+        path = function_args.get("path")
+        return [_resolved_path(path)] if path else []
+
+    if tool_name == "skill_manage":
+        return _resolve_skill_manage_paths(function_args)
+
+    return []
+
+
+def capture_local_edit_snapshot(tool_name: str, function_args: dict | None) -> LocalEditSnapshot | None:
+    """Capture before-state for local write previews."""
+    paths = _resolve_local_edit_paths(tool_name, function_args)
+    if not paths:
+        return None
+
+    snapshot = LocalEditSnapshot(paths=paths)
+    for path in paths:
+        snapshot.before[str(path)] = _snapshot_text(path)
+    return snapshot
+
+
+def _result_succeeded(result: str | None) -> bool:
+    """Conservatively detect whether a tool result represents success."""
+    if not result:
+        return False
+    try:
+        data = json.loads(result)
+    except (json.JSONDecodeError, TypeError):
+        return False
+    if not isinstance(data, dict):
+        return False
+    if data.get("error"):
+        return False
+    if "success" in data:
+        return bool(data.get("success"))
+    return True
+
+
+def _diff_from_snapshot(snapshot: LocalEditSnapshot | None) -> str | None:
+    """Generate unified diff text from a stored before-state and current files."""
+    if not snapshot:
+        return None
+
+    chunks: list[str] = []
+    for path in snapshot.paths:
+        before = snapshot.before.get(str(path))
+        after = _snapshot_text(path)
+        if before == after:
+            continue
+
+        display_path = _display_diff_path(path)
+        diff = "".join(
+            unified_diff(
+                [] if before is None else before.splitlines(keepends=True),
+                [] if after is None else after.splitlines(keepends=True),
+                fromfile=f"a/{display_path}",
+                tofile=f"b/{display_path}",
+            )
+        )
+        if diff:
+            chunks.append(diff)
+
+    if not chunks:
+        return None
+    return "".join(chunk if chunk.endswith("\n") else chunk + "\n" for chunk in chunks)
+
+
+def extract_edit_diff(
+    tool_name: str,
+    result: str | None,
+    *,
+    function_args: dict | None = None,
+    snapshot: LocalEditSnapshot | None = None,
+) -> str | None:
+    """Extract a unified diff from a file-edit tool result."""
+    if tool_name == "patch" and result:
+        try:
+            data = json.loads(result)
+        except (json.JSONDecodeError, TypeError):
+            data = None
+        if isinstance(data, dict):
+            diff = data.get("diff")
+            if isinstance(diff, str) and diff.strip():
+                return diff
+
+    if tool_name not in {"write_file", "patch", "skill_manage"}:
+        return None
+    if not _result_succeeded(result):
+        return None
+    return _diff_from_snapshot(snapshot)
+
+
+def _emit_inline_diff(diff_text: str, print_fn) -> bool:
+    """Emit rendered diff text through the CLI's prompt_toolkit-safe printer."""
+    if print_fn is None or not diff_text:
+        return False
+    try:
+        print_fn("  ┊ review diff")
+        for line in diff_text.rstrip("\n").splitlines():
+            print_fn(line)
+        return True
+    except Exception:
+        return False
+
+
+def _render_inline_unified_diff(diff: str) -> list[str]:
+    """Render unified diff lines in Hermes' inline transcript style."""
+    rendered: list[str] = []
+    from_file = None
+    to_file = None
+
+    for raw_line in diff.splitlines():
+        if raw_line.startswith("--- "):
+            from_file = raw_line[4:].strip()
+            continue
+        if raw_line.startswith("+++ "):
+            to_file = raw_line[4:].strip()
+            if from_file or to_file:
+                rendered.append(f"{_ANSI_FILE}{from_file or 'a/?'} → {to_file or 'b/?'}{_ANSI_RESET}")
+            continue
+        if raw_line.startswith("@@"):
+            rendered.append(f"{_ANSI_HUNK}{raw_line}{_ANSI_RESET}")
+            continue
+        if raw_line.startswith("-"):
+            rendered.append(f"{_ANSI_MINUS}{raw_line}{_ANSI_RESET}")
+            continue
+        if raw_line.startswith("+"):
+            rendered.append(f"{_ANSI_PLUS}{raw_line}{_ANSI_RESET}")
+            continue
+        if raw_line.startswith(" "):
+            rendered.append(f"{_ANSI_DIM}{raw_line}{_ANSI_RESET}")
+            continue
+        if raw_line:
+            rendered.append(raw_line)
+
+    return rendered
+
+
+def _split_unified_diff_sections(diff: str) -> list[str]:
+    """Split a unified diff into per-file sections."""
+    sections: list[list[str]] = []
+    current: list[str] = []
+
+    for line in diff.splitlines():
+        if line.startswith("--- ") and current:
+            sections.append(current)
+            current = [line]
+            continue
+        current.append(line)
+
+    if current:
+        sections.append(current)
+
+    return ["\n".join(section) for section in sections if section]
+
+
+def _summarize_rendered_diff_sections(
+    diff: str,
+    *,
+    max_files: int = _MAX_INLINE_DIFF_FILES,
+    max_lines: int = _MAX_INLINE_DIFF_LINES,
+) -> list[str]:
+    """Render diff sections while capping file count and total line count."""
+    sections = _split_unified_diff_sections(diff)
+    rendered: list[str] = []
+    omitted_files = 0
+    omitted_lines = 0
+
+    for idx, section in enumerate(sections):
+        if idx >= max_files:
+            omitted_files += 1
+            omitted_lines += len(_render_inline_unified_diff(section))
+            continue
+
+        section_lines = _render_inline_unified_diff(section)
+        remaining_budget = max_lines - len(rendered)
+        if remaining_budget <= 0:
+            omitted_lines += len(section_lines)
+            omitted_files += 1
+            continue
+
+        if len(section_lines) <= remaining_budget:
+            rendered.extend(section_lines)
+            continue
+
+        rendered.extend(section_lines[:remaining_budget])
+        omitted_lines += len(section_lines) - remaining_budget
+        omitted_files += 1 + max(0, len(sections) - idx - 1)
+        for leftover in sections[idx + 1:]:
+            omitted_lines += len(_render_inline_unified_diff(leftover))
+        break
+
+    if omitted_files or omitted_lines:
+        summary = f"… omitted {omitted_lines} diff line(s)"
+        if omitted_files:
+            summary += f" across {omitted_files} additional file(s)/section(s)"
+        rendered.append(f"{_ANSI_HUNK}{summary}{_ANSI_RESET}")
+
+    return rendered
+
+
+def render_edit_diff_with_delta(
+    tool_name: str,
+    result: str | None,
+    *,
+    function_args: dict | None = None,
+    snapshot: LocalEditSnapshot | None = None,
+    print_fn=None,
+) -> bool:
+    """Render an edit diff inline without taking over the terminal UI."""
+    diff = extract_edit_diff(
+        tool_name,
+        result,
+        function_args=function_args,
+        snapshot=snapshot,
+    )
+    if not diff:
+        return False
+    try:
+        rendered_lines = _summarize_rendered_diff_sections(diff)
+    except Exception as exc:
+        logger.debug("Could not render inline diff: %s", exc)
+        return False
+    return _emit_inline_diff("\n".join(rendered_lines), print_fn)
+
+
 # =========================================================================
 # KawaiiSpinner
 # =========================================================================
--- a/agent/insights.py
+++ b/agent/insights.py
@ -644,6 +644,9 @@ class InsightsEngine:
        lines.append(f"  Sessions:          {o['total_sessions']:<12}  Messages:        {o['total_messages']:,}")
        lines.append(f"  Tool calls:        {o['total_tool_calls']:<12,}  User messages:   {o['user_messages']:,}")
        lines.append(f"  Input tokens:      {o['total_input_tokens']:<12,}  Output tokens:   {o['total_output_tokens']:,}")
+        cache_total = o.get("total_cache_read_tokens", 0) + o.get("total_cache_write_tokens", 0)
+        if cache_total > 0:
+            lines.append(f"  Cache read:        {o['total_cache_read_tokens']:<12,}  Cache write:     {o['total_cache_write_tokens']:,}")
        cost_str = f"${o['estimated_cost']:.2f}"
        if o.get("models_without_pricing"):
            cost_str += " *"
@ -746,7 +749,11 @@ class InsightsEngine:

        # Overview
        lines.append(f"**Sessions:** {o['total_sessions']} | **Messages:** {o['total_messages']:,} | **Tool calls:** {o['total_tool_calls']:,}")
-        lines.append(f"**Tokens:** {o['total_tokens']:,} (in: {o['total_input_tokens']:,} / out: {o['total_output_tokens']:,})")
+        cache_total = o.get("total_cache_read_tokens", 0) + o.get("total_cache_write_tokens", 0)
+        if cache_total > 0:
+            lines.append(f"**Tokens:** {o['total_tokens']:,} (in: {o['total_input_tokens']:,} / out: {o['total_output_tokens']:,} / cache: {cache_total:,})")
+        else:
+            lines.append(f"**Tokens:** {o['total_tokens']:,} (in: {o['total_input_tokens']:,} / out: {o['total_output_tokens']:,})")
        cost_note = ""
        if o.get("models_without_pricing"):
            cost_note = " _(excludes custom/self-hosted models)_"
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@ -176,6 +176,7 @@ _URL_TO_PROVIDER: Dict[str, str] = {
    "api.deepseek.com": "deepseek",
    "api.githubcopilot.com": "copilot",
    "models.github.ai": "copilot",
+    "api.fireworks.ai": "fireworks",
 }


--- a/agent/models_dev.py
+++ b/agent/models_dev.py
@ -43,6 +43,7 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
    "opencode-zen": "opencode",
    "opencode-go": "opencode-go",
    "kilocode": "kilo",
+    "fireworks": "fireworks-ai",
 }


--- a/agent/prompt_builder.py
+++ b/agent/prompt_builder.py
@ -189,6 +189,13 @@ TOOL_USE_ENFORCEMENT_GUIDANCE = (
 # Add new patterns here when a model family needs explicit steering.
 TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex")

+# Model name substrings that should use the 'developer' role instead of
+# 'system' for the system prompt.  OpenAI's newer models (GPT-5, Codex)
+# give stronger instruction-following weight to the 'developer' role.
+# The swap happens at the API boundary in _build_api_kwargs() so internal
+# message representation stays consistent ("system" everywhere).
+DEVELOPER_ROLE_MODELS = ("gpt-5", "codex")
+
 PLATFORM_HINTS = {
    "whatsapp": (
        "You are on a text messaging communication platform, WhatsApp. "
--- a/agent/redact.py
+++ b/agent/redact.py
@ -13,11 +13,19 @@ import re

 logger = logging.getLogger(__name__)

+# Snapshot at import time so runtime env mutations (e.g. LLM-generated
+# `export HERMES_REDACT_SECRETS=false`) cannot disable redaction mid-session.
+_REDACT_ENABLED = os.getenv("HERMES_REDACT_SECRETS", "").lower() not in ("0", "false", "no", "off")
+
 # Known API key prefixes -- match the prefix + contiguous token chars
 _PREFIX_PATTERNS = [
    r"sk-[A-Za-z0-9_-]{10,}",           # OpenAI / OpenRouter / Anthropic (sk-ant-*)
    r"ghp_[A-Za-z0-9]{10,}",            # GitHub PAT (classic)
    r"github_pat_[A-Za-z0-9_]{10,}",    # GitHub PAT (fine-grained)
+    r"gho_[A-Za-z0-9]{10,}",            # GitHub OAuth access token
+    r"ghu_[A-Za-z0-9]{10,}",            # GitHub user-to-server token
+    r"ghs_[A-Za-z0-9]{10,}",            # GitHub server-to-server token
+    r"ghr_[A-Za-z0-9]{10,}",            # GitHub refresh token
    r"xox[baprs]-[A-Za-z0-9-]{10,}",    # Slack tokens
    r"AIza[A-Za-z0-9_-]{30,}",          # Google API keys
    r"pplx-[A-Za-z0-9]{10,}",           # Perplexity
@ -109,7 +117,7 @@ def redact_sensitive_text(text: str) -> str:
        text = str(text)
    if not text:
        return text
-    if os.getenv("HERMES_REDACT_SECRETS", "").lower() in ("0", "false", "no", "off"):
+    if not _REDACT_ENABLED:
        return text

    # Known prefixes (sk-, ghp_, etc.)
--- a/agent/skill_utils.py
+++ b/agent/skill_utils.py
@ -230,7 +230,13 @@ def get_all_skills_dirs() -> List[Path]:

 def extract_skill_conditions(frontmatter: Dict[str, Any]) -> Dict[str, List]:
    """Extract conditional activation fields from parsed frontmatter."""
-    hermes = (frontmatter.get("metadata") or {}).get("hermes") or {}
+    metadata = frontmatter.get("metadata")
+    # Handle cases where metadata is not a dict (e.g., a string from malformed YAML)
+    if not isinstance(metadata, dict):
+        metadata = {}
+    hermes = metadata.get("hermes") or {}
+    if not isinstance(hermes, dict):
+        hermes = {}
    return {
        "fallback_for_toolsets": hermes.get("fallback_for_toolsets", []),
        "requires_toolsets": hermes.get("requires_toolsets", []),
--- a/agent/smart_model_routing.py
+++ b/agent/smart_model_routing.py
@ -123,6 +123,7 @@ def resolve_turn_route(user_message: str, routing_config: Optional[Dict[str, Any
                "api_mode": primary.get("api_mode"),
                "command": primary.get("command"),
                "args": list(primary.get("args") or []),
+                "credential_pool": primary.get("credential_pool"),
            },
            "label": None,
            "signature": (
@ -158,6 +159,7 @@ def resolve_turn_route(user_message: str, routing_config: Optional[Dict[str, Any
                "api_mode": primary.get("api_mode"),
                "command": primary.get("command"),
                "args": list(primary.get("args") or []),
+                "credential_pool": primary.get("credential_pool"),
            },
            "label": None,
            "signature": (
--- a/cli.py
+++ b/cli.py
@ -144,8 +144,8 @@ def load_cli_config() -> Dict[str, Any]:
    # Default configuration
    defaults = {
        "model": {
-            "default": "anthropic/claude-opus-4.6",
-            "base_url": OPENROUTER_BASE_URL,
+            "default": "",
+            "base_url": "",
            "provider": "auto",
        },
        "terminal": {
@ -262,18 +262,29 @@ def load_cli_config() -> Dict[str, Any]:
                elif isinstance(file_config["model"], dict):
                    # Old format: model is a dict with default/base_url
                    defaults["model"].update(file_config["model"])
+                    # If the user config sets model.model but not model.default,
+                    # promote model.model to model.default so the user's explicit
+                    # choice isn't shadowed by the hardcoded default.  Without this,
+                    # profile configs that only set "model:" (not "default:") silently
+                    # fall back to claude-opus because the merge preserves the
+                    # hardcoded default and HermesCLI.__init__ checks "default" first.
+                    if "model" in file_config["model"] and "default" not in file_config["model"]:
+                        defaults["model"]["default"] = file_config["model"]["model"]

-            # Root-level provider and base_url override model config.
-            # Users may write:
-            #   model: kimi-k2.5:cloud
-            #   provider: custom
-            #   base_url: http://localhost:11434/v1
-            # These root-level keys must be merged into defaults["model"] so
-            # they are picked up by CLI provider resolution.
-            if "provider" in file_config and file_config["provider"]:
-                defaults["model"]["provider"] = file_config["provider"]
-            if "base_url" in file_config and file_config["base_url"]:
-                defaults["model"]["base_url"] = file_config["base_url"]
+            # Legacy root-level provider/base_url fallback.
+            # Some users (or old code) put provider: / base_url: at the
+            # config root instead of inside the model: section.  These are
+            # only used as a FALLBACK when model.provider / model.base_url
+            # is not already set — never as an override.  The canonical
+            # location is model.provider (written by `hermes model`).
+            if not defaults["model"].get("provider"):
+                root_provider = file_config.get("provider")
+                if root_provider:
+                    defaults["model"]["provider"] = root_provider
+            if not defaults["model"].get("base_url"):
+                root_base_url = file_config.get("base_url")
+                if root_base_url:
+                    defaults["model"]["base_url"] = root_base_url
            
            # Deep merge file_config into defaults.
            # First: merge keys that exist in both (deep-merge dicts, overwrite scalars)
@ -991,9 +1002,10 @@ def save_config_value(key_path: str, value: any) -> bool:
            current = current[key]
        current[keys[-1]] = value
        
-        # Save back
-        with open(config_path, 'w') as f:
-            yaml.dump(config, f, default_flow_style=False, sort_keys=False)
+        # Save back atomically — write to temp file + fsync + os.replace
+        # so an interrupt never leaves config.yaml truncated or empty.
+        from utils import atomic_yaml_write
+        atomic_yaml_write(config_path, config)
        
        # Enforce owner-only permissions on config files (contain API keys)
        try:
@ -1073,12 +1085,16 @@ class HermesCLI:
        # streaming: stream tokens to the terminal as they arrive (display.streaming in config.yaml)
        self.streaming_enabled = CLI_CONFIG["display"].get("streaming", False)

+        # Inline diff previews for write actions (display.inline_diffs in config.yaml)
+        self._inline_diffs_enabled = CLI_CONFIG["display"].get("inline_diffs", True)
+
        # Streaming display state
        self._stream_buf = ""        # Partial line buffer for line-buffered rendering
        self._stream_started = False  # True once first delta arrives
        self._stream_box_opened = False  # True once the response box header is printed
        self._reasoning_stream_started = False  # True once live reasoning starts streaming
        self._reasoning_preview_buf = ""  # Coalesce tiny reasoning chunks for [thinking] output
+        self._pending_edit_snapshots = {}
        
        # Configuration - priority: CLI args > env vars > config file
        # Model comes from: CLI arg or config.yaml (single source of truth).
@ -1087,7 +1103,7 @@ class HermesCLI:
        # env vars would stomp each other.
        _model_config = CLI_CONFIG.get("model", {})
        _config_model = (_model_config.get("default") or _model_config.get("model") or "") if isinstance(_model_config, dict) else (_model_config or "")
-        _DEFAULT_CONFIG_MODEL = "anthropic/claude-opus-4.6"
+        _DEFAULT_CONFIG_MODEL = ""
        self.model = model or _config_model or _DEFAULT_CONFIG_MODEL
        # Auto-detect model from local server if still on default
        if self.model == _DEFAULT_CONFIG_MODEL:
@ -1124,9 +1140,9 @@ class HermesCLI:
        self.acp_args: list[str] = []
        self.base_url = (
            base_url
-            or os.getenv("OPENAI_BASE_URL")
-            or os.getenv("OPENROUTER_BASE_URL", CLI_CONFIG["model"]["base_url"])
-        )
+            or CLI_CONFIG["model"].get("base_url", "")
+            or os.getenv("OPENROUTER_BASE_URL", "")
+        ) or None
        # Match key to resolved base_url: OpenRouter URL → prefer OPENROUTER_API_KEY,
        # custom endpoint → prefer OPENAI_API_KEY (issue #560).
        # Note: _ensure_runtime_credentials() re-resolves this before first use.
@ -1955,6 +1971,7 @@ class HermesCLI:
        resolved_api_mode = runtime.get("api_mode", self.api_mode)
        resolved_acp_command = runtime.get("command")
        resolved_acp_args = list(runtime.get("args") or [])
+        resolved_credential_pool = runtime.get("credential_pool")
        if not isinstance(api_key, str) or not api_key:
            # Custom / local endpoints (llama.cpp, ollama, vLLM, etc.) often
            # don't require authentication.  When a base_url IS configured but
@ -1970,10 +1987,12 @@ class HermesCLI:
                    base_url, _source,
                )
            else:
-                self.console.print("[bold red]Provider resolver returned an empty API key.[/]")
+                print("\n⚠️  Provider resolver returned an empty API key. "
+                      "Set OPENROUTER_API_KEY or run: hermes setup")
                return False
        if not isinstance(base_url, str) or not base_url:
-            self.console.print("[bold red]Provider resolver returned an empty base URL.[/]")
+            print("\n⚠️  Provider resolver returned an empty base URL. "
+                  "Check your provider config or run: hermes setup")
            return False

        credentials_changed = api_key != self.api_key or base_url != self.base_url
@ -1987,6 +2006,7 @@ class HermesCLI:
        self.api_mode = resolved_api_mode
        self.acp_command = resolved_acp_command
        self.acp_args = resolved_acp_args
+        self._credential_pool = resolved_credential_pool
        self._provider_source = runtime.get("source")
        self.api_key = api_key
        self.base_url = base_url
@ -2018,6 +2038,7 @@ class HermesCLI:
                "api_mode": self.api_mode,
                "command": self.acp_command,
                "args": list(self.acp_args or []),
+                "credential_pool": getattr(self, "_credential_pool", None),
            },
        )

@ -2088,6 +2109,7 @@ class HermesCLI:
                "api_mode": self.api_mode,
                "command": self.acp_command,
                "args": list(self.acp_args or []),
+                "credential_pool": getattr(self, "_credential_pool", None),
            }
            effective_model = model_override or self.model
            self.agent = AIAgent(
@ -2098,6 +2120,7 @@ class HermesCLI:
                api_mode=runtime.get("api_mode"),
                acp_command=runtime.get("command"),
                acp_args=runtime.get("args"),
+                credential_pool=runtime.get("credential_pool"),
                max_iterations=self.max_turns,
                enabled_toolsets=self.enabled_toolsets,
                verbose_logging=self.verbose,
@ -2123,6 +2146,8 @@ class HermesCLI:
                checkpoint_max_snapshots=self.checkpoint_max_snapshots,
                pass_session_id=self.pass_session_id,
                tool_progress_callback=self._on_tool_progress,
+                tool_start_callback=self._on_tool_start if self._inline_diffs_enabled else None,
+                tool_complete_callback=self._on_tool_complete if self._inline_diffs_enabled else None,
                stream_delta_callback=self._stream_delta if self.streaming_enabled else None,
                tool_gen_callback=self._on_tool_gen_start if self.streaming_enabled else None,
            )
@ -2155,6 +2180,12 @@ class HermesCLI:
        """Display the welcome banner in Claude Code style."""
        self.console.clear()

+        # Get context length for display before branching so it remains
+        # available to the low-context warning logic in compact mode too.
+        ctx_len = None
+        if hasattr(self, 'agent') and self.agent and hasattr(self.agent, 'context_compressor'):
+            ctx_len = self.agent.context_compressor.context_length
+        
        # Auto-compact for narrow terminals — the full banner with caduceus
        # + tool list needs ~80 columns minimum to render without wrapping.
        term_width = shutil.get_terminal_size().columns
@ -2170,11 +2201,6 @@ class HermesCLI:
            # Get terminal working directory (where commands will execute)
            cwd = os.getenv("TERMINAL_CWD", os.getcwd())
            
-            # Get context length for display
-            ctx_len = None
-            if hasattr(self, 'agent') and self.agent and hasattr(self.agent, 'context_compressor'):
-                ctx_len = self.agent.context_compressor.context_length
-            
            # Build and display the banner
            build_welcome_banner(
                console=self.console,
@ -2189,6 +2215,30 @@ class HermesCLI:
        # Show tool availability warnings if any tools are disabled
        self._show_tool_availability_warnings()

+        # Warn about very low context lengths (common with local servers)
+        if ctx_len and ctx_len <= 8192:
+            self.console.print()
+            self.console.print(
+                f"[yellow]⚠️  Context length is only {ctx_len:,} tokens — "
+                f"this is likely too low for agent use with tools.[/]"
+            )
+            self.console.print(
+                "[dim]   Hermes needs 16k–32k minimum. Tool schemas + system prompt alone use ~4k–8k.[/]"
+            )
+            base_url = getattr(self, "base_url", "") or ""
+            if "11434" in base_url or "ollama" in base_url.lower():
+                self.console.print(
+                    "[dim]   Ollama fix: OLLAMA_CONTEXT_LENGTH=32768 ollama serve[/]"
+                )
+            elif "1234" in base_url:
+                self.console.print(
+                    "[dim]   LM Studio fix: Set context length in model settings → reload model[/]"
+                )
+            else:
+                self.console.print(
+                    "[dim]   Fix: Set model.context_length in config.yaml, or increase your server's context setting[/]"
+                )
+
        self.console.print()

    def _preload_resumed_session(self) -> bool:
@ -3239,7 +3289,7 @@ class HermesCLI:
                        print(f"      {mid}{current_marker}")
                elif p["id"] == "custom":
                    from hermes_cli.models import _get_custom_base_url
-                    custom_url = _get_custom_base_url() or os.getenv("OPENAI_BASE_URL", "")
+                    custom_url = _get_custom_base_url()
                    if custom_url:
                        print(f"      endpoint: {custom_url}")
                    if is_active:
@ -3904,6 +3954,8 @@ class HermesCLI:
            self._handle_stop_command()
        elif canonical == "background":
            self._handle_background_command(cmd_original)
+        elif canonical == "btw":
+            self._handle_btw_command(cmd_original)
        elif canonical == "queue":
            # Extract prompt after "/queue " or "/q "
            parts = cmd_original.split(None, 1)
@ -4190,6 +4242,121 @@ class HermesCLI:
        self._background_tasks[task_id] = thread
        thread.start()

+    def _handle_btw_command(self, cmd: str):
+        """Handle /btw <question> — ephemeral side question using session context.
+
+        Snapshots the current conversation history, spawns a no-tools agent in
+        a background thread, and prints the answer without persisting anything
+        to the main session.
+        """
+        parts = cmd.strip().split(maxsplit=1)
+        if len(parts) < 2 or not parts[1].strip():
+            _cprint("  Usage: /btw <question>")
+            _cprint("  Example: /btw what module owns session title sanitization?")
+            _cprint("  Answers using session context. No tools, not persisted.")
+            return
+
+        question = parts[1].strip()
+        task_id = f"btw_{datetime.now().strftime('%H%M%S')}_{uuid.uuid4().hex[:6]}"
+
+        if not self._ensure_runtime_credentials():
+            _cprint("  (>_<) Cannot start /btw: no valid credentials.")
+            return
+
+        turn_route = self._resolve_turn_agent_config(question)
+        history_snapshot = list(self.conversation_history)
+
+        preview = question[:60] + ("..." if len(question) > 60 else "")
+        _cprint(f'  💬 /btw: "{preview}"')
+
+        def run_btw():
+            try:
+                btw_agent = AIAgent(
+                    model=turn_route["model"],
+                    api_key=turn_route["runtime"].get("api_key"),
+                    base_url=turn_route["runtime"].get("base_url"),
+                    provider=turn_route["runtime"].get("provider"),
+                    api_mode=turn_route["runtime"].get("api_mode"),
+                    acp_command=turn_route["runtime"].get("command"),
+                    acp_args=turn_route["runtime"].get("args"),
+                    max_iterations=8,
+                    enabled_toolsets=[],
+                    quiet_mode=True,
+                    verbose_logging=False,
+                    session_id=task_id,
+                    platform="cli",
+                    reasoning_config=self.reasoning_config,
+                    providers_allowed=self._providers_only,
+                    providers_ignored=self._providers_ignore,
+                    providers_order=self._providers_order,
+                    provider_sort=self._provider_sort,
+                    provider_require_parameters=self._provider_require_params,
+                    provider_data_collection=self._provider_data_collection,
+                    fallback_model=self._fallback_model,
+                    session_db=None,
+                    skip_memory=True,
+                    skip_context_files=True,
+                    persist_session=False,
+                )
+
+                btw_prompt = (
+                    "[Ephemeral /btw side question. Answer using the conversation "
+                    "context. No tools available. Be direct and concise.]\n\n"
+                    + question
+                )
+                result = btw_agent.run_conversation(
+                    user_message=btw_prompt,
+                    conversation_history=history_snapshot,
+                    task_id=task_id,
+                    sync_honcho=False,
+                )
+
+                response = (result.get("final_response") or "") if result else ""
+                if not response and result and result.get("error"):
+                    response = f"Error: {result['error']}"
+
+                # TUI refresh before printing
+                if self._app:
+                    self._app.invalidate()
+                    time.sleep(0.05)
+                print()
+
+                if response:
+                    try:
+                        from hermes_cli.skin_engine import get_active_skin
+                        _skin = get_active_skin()
+                        _resp_color = _skin.get_color("response_border", "#4F6D4A")
+                    except Exception:
+                        _resp_color = "#4F6D4A"
+
+                    ChatConsole().print(Panel(
+                        _rich_text_from_ansi(response),
+                        title=f"[{_resp_color} bold]⚕ /btw[/]",
+                        title_align="left",
+                        border_style=_resp_color,
+                        box=rich_box.HORIZONTALS,
+                        padding=(1, 2),
+                    ))
+                else:
+                    _cprint("  💬 /btw: (no response)")
+
+                if self.bell_on_complete:
+                    sys.stdout.write("\a")
+                    sys.stdout.flush()
+
+            except Exception as e:
+                if self._app:
+                    self._app.invalidate()
+                    time.sleep(0.05)
+                print()
+                _cprint(f"  ❌ /btw failed: {e}")
+            finally:
+                if self._app:
+                    self._invalidate(min_interval=0)
+
+        thread = threading.Thread(target=run_btw, daemon=True, name=f"btw-{task_id}")
+        thread.start()
+
    @staticmethod
    def _try_launch_chrome_debug(port: int, system: str) -> bool:
        """Try to launch Chrome/Chromium with remote debugging enabled.
@ -4883,6 +5050,33 @@ class HermesCLI:
        except Exception:
            pass

+    def _on_tool_start(self, tool_call_id: str, function_name: str, function_args: dict):
+        """Capture local before-state for write-capable tools."""
+        try:
+            from agent.display import capture_local_edit_snapshot
+
+            snapshot = capture_local_edit_snapshot(function_name, function_args)
+            if snapshot is not None:
+                self._pending_edit_snapshots[tool_call_id] = snapshot
+        except Exception:
+            logger.debug("Edit snapshot capture failed for %s", function_name, exc_info=True)
+
+    def _on_tool_complete(self, tool_call_id: str, function_name: str, function_args: dict, function_result: str):
+        """Render file edits with inline diff after write-capable tools complete."""
+        snapshot = self._pending_edit_snapshots.pop(tool_call_id, None)
+        try:
+            from agent.display import render_edit_diff_with_delta
+
+            render_edit_diff_with_delta(
+                function_name,
+                function_result,
+                function_args=function_args,
+                snapshot=snapshot,
+                print_fn=_cprint,
+            )
+        except Exception:
+            logger.debug("Edit diff preview failed for %s", function_name, exc_info=True)
+
    # ====================================================================
    # Voice mode methods
    # ====================================================================
@ -5597,6 +5791,8 @@ class HermesCLI:
            self.agent = None

        # Initialize agent if needed
+        if self.agent is None:
+            _cprint(f"{_DIM}Initializing agent...{_RST}")
        if not self._init_agent(
            model_override=turn_route["model"],
            runtime_override=turn_route["runtime"],
@ -6192,6 +6388,17 @@ class HermesCLI:

    def run(self):
        """Run the interactive CLI loop with persistent input at bottom."""
+        # Push the entire TUI to the bottom of the terminal so the banner,
+        # responses, and prompt all appear pinned to the bottom — empty
+        # space stays above, not below.  This prints enough blank lines to
+        # scroll the cursor to the last row before any content is rendered.
+        try:
+            _term_lines = shutil.get_terminal_size().lines
+            if _term_lines > 2:
+                print("\n" * (_term_lines - 1), end="", flush=True)
+        except Exception:
+            pass
+
        self.show_banner()

        # One-line Honcho session indicator (TTY-only, not captured by agent).
@ -7417,6 +7624,7 @@ class HermesCLI:
                    finally:
                        self._agent_running = False
                        self._spinner_text = ""
+
                        app.invalidate()  # Refresh status line

                        # Continuous voice: auto-restart recording after agent responds.
@ -7445,6 +7653,20 @@ class HermesCLI:
        # Register atexit cleanup so resources are freed even on unexpected exit
        atexit.register(_run_cleanup)
        
+        # Register signal handlers for graceful shutdown on SSH disconnect / SIGTERM
+        def _signal_handler(signum, frame):
+            """Handle SIGHUP/SIGTERM by triggering graceful cleanup."""
+            logger.debug("Received signal %s, triggering graceful shutdown", signum)
+            raise KeyboardInterrupt()
+        
+        try:
+            import signal as _signal
+            _signal.signal(_signal.SIGTERM, _signal_handler)
+            if hasattr(_signal, 'SIGHUP'):
+                _signal.signal(_signal.SIGHUP, _signal_handler)
+        except Exception:
+            pass  # Signal handlers may fail in restricted environments
+        
        # Install a custom asyncio exception handler that suppresses the
        # "Event loop is closed" RuntimeError from httpx transport cleanup.
        # This is defense-in-depth — the primary fix is neuter_async_httpx_del
@ -7468,7 +7690,7 @@ class HermesCLI:
                except Exception:
                    pass
                app.run()
-        except (EOFError, KeyboardInterrupt):
+        except (EOFError, KeyboardInterrupt, BrokenPipeError):
            pass
        finally:
            self._should_exit = True
@ -7507,6 +7729,23 @@ class HermesCLI:
                    self._session_db.end_session(self.agent.session_id, "cli_close")
                except (Exception, KeyboardInterrupt) as e:
                    logger.debug("Could not close session in DB: %s", e)
+            # Plugin hook: on_session_end — safety net for interrupted exits.
+            # run_conversation() already fires this per-turn on normal completion,
+            # so only fire here if the agent was mid-turn (_agent_running) when
+            # the exit occurred, meaning run_conversation's hook didn't fire.
+            if self.agent and getattr(self, '_agent_running', False):
+                try:
+                    from hermes_cli.plugins import invoke_hook as _invoke_hook
+                    _invoke_hook(
+                        "on_session_end",
+                        session_id=self.agent.session_id,
+                        completed=False,
+                        interrupted=True,
+                        model=getattr(self.agent, 'model', None),
+                        platform=getattr(self.agent, 'platform', None) or "cli",
+                    )
+                except Exception:
+                    pass
            _run_cleanup()
            self._print_exit_summary()

--- a/gateway/config.py
+++ b/gateway/config.py
@ -547,6 +547,8 @@ def load_gateway_config() -> GatewayConfig:
                    os.environ["DISCORD_FREE_RESPONSE_CHANNELS"] = str(frc)
                if "auto_thread" in discord_cfg and not os.getenv("DISCORD_AUTO_THREAD"):
                    os.environ["DISCORD_AUTO_THREAD"] = str(discord_cfg["auto_thread"]).lower()
+                if "reactions" in discord_cfg and not os.getenv("DISCORD_REACTIONS"):
+                    os.environ["DISCORD_REACTIONS"] = str(discord_cfg["reactions"]).lower()

            # Telegram settings → env vars (env vars take precedence)
            telegram_cfg = yaml_cfg.get("telegram", {})
--- a/gateway/delivery.py
+++ b/gateway/delivery.py
@ -70,12 +70,15 @@ class DeliveryTarget:
        if target == "local":
            return cls(platform=Platform.LOCAL)
        
-        # Check for platform:chat_id format
+        # Check for platform:chat_id or platform:chat_id:thread_id format
        if ":" in target:
-            platform_str, chat_id = target.split(":", 1)
+            parts = target.split(":", 2)
+            platform_str = parts[0]
+            chat_id = parts[1] if len(parts) > 1 else None
+            thread_id = parts[2] if len(parts) > 2 else None
            try:
                platform = Platform(platform_str)
-                return cls(platform=platform, chat_id=chat_id, is_explicit=True)
+                return cls(platform=platform, chat_id=chat_id, thread_id=thread_id, is_explicit=True)
            except ValueError:
                # Unknown platform, treat as local
                return cls(platform=Platform.LOCAL)
@ -94,6 +97,8 @@ class DeliveryTarget:
            return "origin"
        if self.platform == Platform.LOCAL:
            return "local"
+        if self.chat_id and self.thread_id:
+            return f"{self.platform.value}:{self.chat_id}:{self.thread_id}"
        if self.chat_id:
            return f"{self.platform.value}:{self.chat_id}"
        return self.platform.value
--- a/gateway/platforms/api_server.py
+++ b/gateway/platforms/api_server.py
@ -2,7 +2,7 @@
 OpenAI-compatible API server platform adapter.

 Exposes an HTTP server with endpoints:
- POST /v1/chat/completions        — OpenAI Chat Completions format (stateless)
+- POST /v1/chat/completions        — OpenAI Chat Completions format (stateless; opt-in session continuity via X-Hermes-Session-Id header)
 - POST /v1/responses               — OpenAI Responses API format (stateful via previous_response_id)
 - GET  /v1/responses/{response_id} — Retrieve a stored response
 - DELETE /v1/responses/{response_id} — Delete a stored response
@ -300,6 +300,7 @@ class APIServerAdapter(BasePlatformAdapter):
        self._runner: Optional["web.AppRunner"] = None
        self._site: Optional["web.TCPSite"] = None
        self._response_store = ResponseStore()
+        self._session_db: Optional[Any] = None  # Lazy-init SessionDB for session continuity

    @staticmethod
    def _parse_cors_origins(value: Any) -> tuple[str, ...]:
@ -380,6 +381,7 @@ class APIServerAdapter(BasePlatformAdapter):
        ephemeral_system_prompt: Optional[str] = None,
        session_id: Optional[str] = None,
        stream_delta_callback=None,
+        tool_progress_callback=None,
    ) -> Any:
        """
        Create an AIAgent instance using the gateway's runtime config.
@ -412,6 +414,7 @@ class APIServerAdapter(BasePlatformAdapter):
            session_id=session_id,
            platform="api_server",
            stream_delta_callback=stream_delta_callback,
+            tool_progress_callback=tool_progress_callback,
        )
        return agent

@ -494,7 +497,23 @@ class APIServerAdapter(BasePlatformAdapter):
                status=400,
            )

-        session_id = str(uuid.uuid4())
+        # Allow caller to continue an existing session by passing X-Hermes-Session-Id.
+        # When provided, history is loaded from state.db instead of from the request body.
+        provided_session_id = request.headers.get("X-Hermes-Session-Id", "").strip()
+        if provided_session_id:
+            session_id = provided_session_id
+            try:
+                if self._session_db is None:
+                    from hermes_state import SessionDB
+                    self._session_db = SessionDB()
+                history = self._session_db.get_messages_as_conversation(session_id)
+            except Exception as e:
+                logger.warning("Failed to load session history for %s: %s", session_id, e)
+                history = []
+        else:
+            session_id = str(uuid.uuid4())
+            # history already set from request body above
+
        completion_id = f"chatcmpl-{uuid.uuid4().hex[:29]}"
        model_name = body.get("model", "hermes-agent")
        created = int(time.time())
@ -514,6 +533,15 @@ class APIServerAdapter(BasePlatformAdapter):
                if delta is not None:
                    _stream_q.put(delta)

+            def _on_tool_progress(name, preview, args):
+                """Inject tool progress into the SSE stream for Open WebUI."""
+                if name.startswith("_"):
+                    return  # Skip internal events (_thinking)
+                from agent.display import get_tool_emoji
+                emoji = get_tool_emoji(name)
+                label = preview or name
+                _stream_q.put(f"\n`{emoji} {label}`\n")
+
            # Start agent in background.  agent_ref is a mutable container
            # so the SSE writer can interrupt the agent on client disconnect.
            agent_ref = [None]
@ -523,12 +551,13 @@ class APIServerAdapter(BasePlatformAdapter):
                ephemeral_system_prompt=system_prompt,
                session_id=session_id,
                stream_delta_callback=_on_delta,
+                tool_progress_callback=_on_tool_progress,
                agent_ref=agent_ref,
            ))

            return await self._write_sse_chat_completion(
                request, completion_id, model_name, created, _stream_q,
-                agent_task, agent_ref,
+                agent_task, agent_ref, session_id=session_id,
            )

        # Non-streaming: run the agent (with optional Idempotency-Key)
@ -587,11 +616,11 @@ class APIServerAdapter(BasePlatformAdapter):
            },
        }

-        return web.json_response(response_data)
+        return web.json_response(response_data, headers={"X-Hermes-Session-Id": session_id})

    async def _write_sse_chat_completion(
        self, request: "web.Request", completion_id: str, model: str,
-        created: int, stream_q, agent_task, agent_ref=None,
+        created: int, stream_q, agent_task, agent_ref=None, session_id: str = None,
    ) -> "web.StreamResponse":
        """Write real streaming SSE from agent's stream_delta_callback queue.

@ -608,6 +637,8 @@ class APIServerAdapter(BasePlatformAdapter):
        cors = self._cors_headers_for_origin(origin) if origin else None
        if cors:
            sse_headers.update(cors)
+        if session_id:
+            sse_headers["X-Hermes-Session-Id"] = session_id
        response = web.StreamResponse(status=200, headers=sse_headers)
        await response.prepare(request)

@ -1194,6 +1225,7 @@ class APIServerAdapter(BasePlatformAdapter):
        ephemeral_system_prompt: Optional[str] = None,
        session_id: Optional[str] = None,
        stream_delta_callback=None,
+        tool_progress_callback=None,
        agent_ref: Optional[list] = None,
    ) -> tuple:
        """
@ -1214,6 +1246,7 @@ class APIServerAdapter(BasePlatformAdapter):
                ephemeral_system_prompt=ephemeral_system_prompt,
                session_id=session_id,
                stream_delta_callback=stream_delta_callback,
+                tool_progress_callback=tool_progress_callback,
            )
            if agent_ref is not None:
                agent_ref[0] = agent
--- a/gateway/platforms/discord.py
+++ b/gateway/platforms/discord.py
@ -548,6 +548,10 @@ class DiscordAdapter(BasePlatformAdapter):
                if message.type not in (discord.MessageType.default, discord.MessageType.reply):
                    return

+                # Check if the message author is in the allowed user list
+                if not self._is_allowed_user(str(message.author.id)):
+                    return
+
                # Bot message filtering (DISCORD_ALLOW_BOTS):
                #   "none"     — ignore all other bots (default)
                #   "mentions" — accept bot messages only when they @mention us
@ -683,14 +687,22 @@ class DiscordAdapter(BasePlatformAdapter):
            logger.debug("[%s] remove_reaction failed (%s): %s", self.name, emoji, e)
            return False

+    def _reactions_enabled(self) -> bool:
+        """Check if message reactions are enabled via config/env."""
+        return os.getenv("DISCORD_REACTIONS", "true").lower() not in ("false", "0", "no")
+
    async def on_processing_start(self, event: MessageEvent) -> None:
        """Add an in-progress reaction for normal Discord message events."""
+        if not self._reactions_enabled():
+            return
        message = event.raw_message
        if hasattr(message, "add_reaction"):
            await self._add_reaction(message, "👀")

    async def on_processing_complete(self, event: MessageEvent, success: bool) -> None:
        """Swap the in-progress reaction for a final success/failure reaction."""
+        if not self._reactions_enabled():
+            return
        message = event.raw_message
        if hasattr(message, "add_reaction"):
            await self._remove_reaction(message, "👀")
--- a/gateway/platforms/matrix.py
+++ b/gateway/platforms/matrix.py
@ -49,6 +49,14 @@ _STORE_DIR = _get_hermes_dir("platforms/matrix/store", "matrix/store")
 # Grace period: ignore messages older than this many seconds before startup.
 _STARTUP_GRACE_SECONDS = 5

+# E2EE key export file for persistence across restarts.
+_KEY_EXPORT_FILE = _STORE_DIR / "exported_keys.txt"
+_KEY_EXPORT_PASSPHRASE = "hermes-matrix-e2ee-keys"
+
+# Pending undecrypted events: cap and TTL for retry buffer.
+_MAX_PENDING_EVENTS = 100
+_PENDING_EVENT_TTL = 300  # seconds — stop retrying after 5 min
+

 def check_matrix_requirements() -> bool:
    """Return True if the Matrix adapter can be used."""
@ -111,6 +119,10 @@ class MatrixAdapter(BasePlatformAdapter):
        self._processed_events: deque = deque(maxlen=1000)
        self._processed_events_set: set = set()

+        # Buffer for undecrypted events pending key receipt.
+        # Each entry: (room, event, timestamp)
+        self._pending_megolm: list = []
+
    def _is_duplicate_event(self, event_id) -> bool:
        """Return True if this event was already processed. Tracks the ID otherwise."""
        if not event_id:
@ -232,6 +244,16 @@ class MatrixAdapter(BasePlatformAdapter):
                logger.info("Matrix: E2EE crypto initialized")
            except Exception as exc:
                logger.warning("Matrix: crypto init issue: %s", exc)
+
+            # Import previously exported Megolm keys (survives restarts).
+            if _KEY_EXPORT_FILE.exists():
+                try:
+                    await client.import_keys(
+                        str(_KEY_EXPORT_FILE), _KEY_EXPORT_PASSPHRASE,
+                    )
+                    logger.info("Matrix: imported Megolm keys from backup")
+                except Exception as exc:
+                    logger.debug("Matrix: could not import keys: %s", exc)
        elif self._encryption:
            logger.warning(
                "Matrix: E2EE requested but crypto store is not loaded; "
@ -286,6 +308,18 @@ class MatrixAdapter(BasePlatformAdapter):
            except (asyncio.CancelledError, Exception):
                pass

+        # Export Megolm keys before closing so the next restart can decrypt
+        # events that used sessions from this run.
+        if self._client and self._encryption and getattr(self._client, "olm", None):
+            try:
+                _STORE_DIR.mkdir(parents=True, exist_ok=True)
+                await self._client.export_keys(
+                    str(_KEY_EXPORT_FILE), _KEY_EXPORT_PASSPHRASE,
+                )
+                logger.info("Matrix: exported Megolm keys for next restart")
+            except Exception as exc:
+                logger.debug("Matrix: could not export keys on disconnect: %s", exc)
+
        if self._client:
            await self._client.close()
            self._client = None
@ -665,17 +699,22 @@ class MatrixAdapter(BasePlatformAdapter):
        Hermes uses a custom sync loop instead of matrix-nio's sync_forever(),
        so we need to explicitly drive the key management work that sync_forever()
        normally handles for encrypted rooms.
+
+        Also auto-trusts all devices (so senders share session keys with us)
+        and retries decryption for any buffered MegolmEvents.
        """
        client = self._client
        if not client or not self._encryption or not getattr(client, "olm", None):
            return

+        did_query_keys = client.should_query_keys
+
        tasks = [asyncio.create_task(client.send_to_device_messages())]

        if client.should_upload_keys:
            tasks.append(asyncio.create_task(client.keys_upload()))

-        if client.should_query_keys:
+        if did_query_keys:
            tasks.append(asyncio.create_task(client.keys_query()))

        if client.should_claim_keys:
@ -691,6 +730,111 @@ class MatrixAdapter(BasePlatformAdapter):
            except Exception as exc:
                logger.warning("Matrix: E2EE maintenance task failed: %s", exc)

+        # After key queries, auto-trust all devices so senders share keys with
+        # us.  For a bot this is the right default — we want to decrypt
+        # everything, not enforce manual verification.
+        if did_query_keys:
+            self._auto_trust_devices()
+
+        # Retry any buffered undecrypted events now that new keys may have
+        # arrived (from key requests, key queries, or to-device forwarding).
+        if self._pending_megolm:
+            await self._retry_pending_decryptions()
+
+    def _auto_trust_devices(self) -> None:
+        """Trust/verify all unverified devices we know about.
+
+        When other clients see our device as verified, they proactively share
+        Megolm session keys with us.  Without this, many clients will refuse
+        to include an unverified device in key distributions.
+        """
+        client = self._client
+        if not client:
+            return
+
+        device_store = getattr(client, "device_store", None)
+        if not device_store:
+            return
+
+        own_device = getattr(client, "device_id", None)
+        trusted_count = 0
+
+        try:
+            # DeviceStore.__iter__ yields OlmDevice objects directly.
+            for device in device_store:
+                if getattr(device, "device_id", None) == own_device:
+                    continue
+                if not getattr(device, "verified", False):
+                    client.verify_device(device)
+                    trusted_count += 1
+        except Exception as exc:
+            logger.debug("Matrix: auto-trust error: %s", exc)
+
+        if trusted_count:
+            logger.info("Matrix: auto-trusted %d new device(s)", trusted_count)
+
+    async def _retry_pending_decryptions(self) -> None:
+        """Retry decrypting buffered MegolmEvents after new keys arrive."""
+        import nio
+
+        client = self._client
+        if not client or not self._pending_megolm:
+            return
+
+        now = time.time()
+        still_pending: list = []
+
+        for room, event, ts in self._pending_megolm:
+            # Drop events that have aged past the TTL.
+            if now - ts > _PENDING_EVENT_TTL:
+                logger.debug(
+                    "Matrix: dropping expired pending event %s (age %.0fs)",
+                    getattr(event, "event_id", "?"), now - ts,
+                )
+                continue
+
+            try:
+                decrypted = client.decrypt_event(event)
+            except Exception:
+                # Still missing the key — keep in buffer.
+                still_pending.append((room, event, ts))
+                continue
+
+            if isinstance(decrypted, nio.MegolmEvent):
+                # decrypt_event returned the same undecryptable event.
+                still_pending.append((room, event, ts))
+                continue
+
+            logger.info(
+                "Matrix: decrypted buffered event %s (%s)",
+                getattr(event, "event_id", "?"),
+                type(decrypted).__name__,
+            )
+
+            # Route to the appropriate handler based on decrypted type.
+            try:
+                if isinstance(decrypted, nio.RoomMessageText):
+                    await self._on_room_message(room, decrypted)
+                elif isinstance(
+                    decrypted,
+                    (nio.RoomMessageImage, nio.RoomMessageAudio,
+                     nio.RoomMessageVideo, nio.RoomMessageFile),
+                ):
+                    await self._on_room_message_media(room, decrypted)
+                else:
+                    logger.debug(
+                        "Matrix: decrypted event %s has unhandled type %s",
+                        getattr(event, "event_id", "?"),
+                        type(decrypted).__name__,
+                    )
+            except Exception as exc:
+                logger.warning(
+                    "Matrix: error processing decrypted event %s: %s",
+                    getattr(event, "event_id", "?"), exc,
+                )
+
+        self._pending_megolm = still_pending
+
    # ------------------------------------------------------------------
    # Event callbacks
    # ------------------------------------------------------------------
@ -712,13 +856,29 @@ class MatrixAdapter(BasePlatformAdapter):
        if event_ts and event_ts < self._startup_ts - _STARTUP_GRACE_SECONDS:
            return

-        # Handle decrypted MegolmEvents — extract the inner event.
+        # Handle undecryptable MegolmEvents: request the missing session key
+        # and buffer the event for retry once the key arrives.
        if isinstance(event, nio.MegolmEvent):
-            # Failed to decrypt.
            logger.warning(
-                "Matrix: could not decrypt event %s in %s",
+                "Matrix: could not decrypt event %s in %s — requesting key",
                event.event_id, room.room_id,
            )
+
+            # Ask other devices in the room to forward the session key.
+            try:
+                resp = await self._client.request_room_key(event)
+                if hasattr(resp, "event_id") or not isinstance(resp, Exception):
+                    logger.debug(
+                        "Matrix: room key request sent for session %s",
+                        getattr(event, "session_id", "?"),
+                    )
+            except Exception as exc:
+                logger.debug("Matrix: room key request failed: %s", exc)
+
+            # Buffer for retry on next maintenance cycle.
+            self._pending_megolm.append((room, event, time.time()))
+            if len(self._pending_megolm) > _MAX_PENDING_EVENTS:
+                self._pending_megolm = self._pending_megolm[-_MAX_PENDING_EVENTS:]
            return

        # Skip edits (m.replace relation).
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@ -742,6 +742,10 @@ class TelegramAdapter(BasePlatformAdapter):
        if not self._bot:
            return SendResult(success=False, error="Not connected")
        
+        # Skip whitespace-only text to prevent Telegram 400 empty-text errors.
+        if not content or not content.strip():
+            return SendResult(success=True, message_id=None)
+        
        try:
            # Format and split message if needed
            formatted = self.format_message(content)
--- a/gateway/platforms/telegram_network.py
+++ b/gateway/platforms/telegram_network.py
@ -135,6 +135,9 @@ def _normalize_fallback_ips(values: Iterable[str]) -> list[str]:
        if addr.version != 4:
            logger.warning("Ignoring non-IPv4 Telegram fallback IP: %s", raw)
            continue
+        if addr.is_private or addr.is_loopback or addr.is_link_local or addr.is_unspecified:
+            logger.warning("Ignoring private/internal Telegram fallback IP: %s", raw)
+            continue
        normalized.append(str(addr))
    return normalized

--- a/gateway/run.py
+++ b/gateway/run.py
@ -24,6 +24,7 @@ import signal
 import tempfile
 import threading
 import time
+import uuid
 from logging.handlers import RotatingFileHandler
 from pathlib import Path
 from datetime import datetime
@ -298,6 +299,7 @@ def _resolve_runtime_agent_kwargs() -> dict:
        "api_mode": runtime.get("api_mode"),
        "command": runtime.get("command"),
        "args": list(runtime.get("args") or []),
+        "credential_pool": runtime.get("credential_pool"),
    }


@ -325,9 +327,9 @@ def _check_unavailable_skill(command_name: str) -> str | None:
                )

        # Check optional skills (shipped with repo but not installed)
-        from hermes_constants import get_hermes_home
+        from hermes_constants import get_hermes_home, get_optional_skills_dir
        repo_root = Path(__file__).resolve().parent.parent
-        optional_dir = repo_root / "optional-skills"
+        optional_dir = get_optional_skills_dir(repo_root / "optional-skills")
        if optional_dir.exists():
            for skill_md in optional_dir.rglob("SKILL.md"):
                name = skill_md.parent.name.lower().replace("_", "-")
@ -364,20 +366,19 @@ def _load_gateway_config() -> dict:


 def _resolve_gateway_model(config: dict | None = None) -> str:
-    """Read model from env/config — mirrors the resolution in _run_agent_sync.
+    """Read model from config.yaml — single source of truth.

    Without this, temporary AIAgent instances (memory flush, /compress) fall
    back to the hardcoded default which fails when the active provider is
    openai-codex.
    """
-    model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or ""
    cfg = config if config is not None else _load_gateway_config()
    model_cfg = cfg.get("model", {})
    if isinstance(model_cfg, str):
-        model = model_cfg
+        return model_cfg
    elif isinstance(model_cfg, dict):
-        model = model_cfg.get("default") or model_cfg.get("model") or model
-    return model
+        return model_cfg.get("default") or model_cfg.get("model") or ""
+    return ""


 def _resolve_hermes_bin() -> Optional[list[str]]:
@ -476,12 +477,7 @@ class GatewayRunner:
        self._honcho_managers: Dict[str, Any] = {}
        self._honcho_configs: Dict[str, Any] = {}

-        # Rate-limit compression warning messages sent to users.
-        # Keyed by chat_id — value is the timestamp of the last warning sent.
-        # Prevents the warning from firing on every message when a session
-        # remains above the threshold after compression.
-        self._compression_warn_sent: Dict[str, float] = {}
-        self._compression_warn_cooldown: int = 3600  # seconds (1 hour)
+

        # Ensure tirith security scanner is available (downloads if needed)
        try:
@ -793,6 +789,7 @@ class GatewayRunner:
            "api_mode": runtime_kwargs.get("api_mode"),
            "command": runtime_kwargs.get("command"),
            "args": list(runtime_kwargs.get("args") or []),
+            "credential_pool": runtime_kwargs.get("credential_pool"),
        }
        return resolve_turn_route(user_message, getattr(self, "_smart_model_routing", {}), primary)

@ -1283,8 +1280,8 @@ class GatewayRunner:
            try:
                self.session_store._ensure_loaded()
                for key, entry in list(self.session_store._entries.items()):
-                    if entry.session_id in self.session_store._pre_flushed_sessions:
-                        continue  # already flushed this session
+                    if entry.memory_flushed:
+                        continue  # already flushed this session (persisted to disk)
                    if not self.session_store._is_session_expired(entry):
                        continue  # session still active
                    # Session has expired — flush memories in the background
@ -1295,7 +1292,15 @@ class GatewayRunner:
                    try:
                        await self._async_flush_memories(entry.session_id, key)
                        self._shutdown_gateway_honcho(key)
-                        self.session_store._pre_flushed_sessions.add(entry.session_id)
+                        # Mark as flushed and persist to disk so the flag
+                        # survives gateway restarts.
+                        with self.session_store._lock:
+                            entry.memory_flushed = True
+                            self.session_store._save()
+                        logger.info(
+                            "Pre-reset memory flush completed for session %s",
+                            entry.session_id,
+                        )
                    except Exception as e:
                        logger.debug("Proactive memory flush failed for %s: %s", entry.session_id, e)
            except Exception as e:
@ -1655,6 +1660,11 @@ class GatewayRunner:
        if global_allowlist:
            allowed_ids.update(uid.strip() for uid in global_allowlist.split(",") if uid.strip())

+        # "*" in any allowlist means allow everyone (consistent with
+        # SIGNAL_GROUP_ALLOWED_USERS precedent)
+        if "*" in allowed_ids:
+            return True
+
        check_ids = {user_id}
        if "@" in user_id:
            check_ids.add(user_id.split("@")[0])
@ -1967,6 +1977,9 @@ class GatewayRunner:
        if canonical == "background":
            return await self._handle_background_command(event)

+        if canonical == "btw":
+            return await self._handle_btw_command(event)
+
        if canonical == "voice":
            return await self._handle_voice_command(event)

@ -2284,6 +2297,29 @@ class GatewayRunner:
                        _hyg_api_key = _hyg_runtime.get("api_key")
                    except Exception:
                        pass
+
+                # Check custom_providers per-model context_length
+                # (same fallback as run_agent.py lines 1171-1189).
+                # Must run after runtime resolution so _hyg_base_url is set.
+                if _hyg_config_context_length is None and _hyg_base_url:
+                    try:
+                        _hyg_custom_providers = _hyg_data.get("custom_providers")
+                        if isinstance(_hyg_custom_providers, list):
+                            for _cp in _hyg_custom_providers:
+                                if not isinstance(_cp, dict):
+                                    continue
+                                _cp_url = (_cp.get("base_url") or "").rstrip("/")
+                                if _cp_url and _cp_url == _hyg_base_url.rstrip("/"):
+                                    _cp_models = _cp.get("models", {})
+                                    if isinstance(_cp_models, dict):
+                                        _cp_model_cfg = _cp_models.get(_hyg_model, {})
+                                        if isinstance(_cp_model_cfg, dict):
+                                            _cp_ctx = _cp_model_cfg.get("context_length")
+                                            if _cp_ctx is not None:
+                                                _hyg_config_context_length = int(_cp_ctx)
+                                    break
+                    except (TypeError, ValueError):
+                        pass
            except Exception:
                pass

@ -2331,18 +2367,7 @@ class GatewayRunner:
                        f"{_compress_token_threshold:,}",
                    )

-                    _hyg_adapter = self.adapters.get(source.platform)
                    _hyg_meta = {"thread_id": source.thread_id} if source.thread_id else None
-                    if _hyg_adapter:
-                        try:
-                            await _hyg_adapter.send(
-                                source.chat_id,
-                                f"🗜️ Session is large ({_msg_count} messages, "
-                                f"~{_approx_tokens:,} tokens). Auto-compressing...",
-                                metadata=_hyg_meta,
-                            )
-                        except Exception:
-                            pass

                    try:
                        from run_agent import AIAgent
@ -2403,70 +2428,17 @@ class GatewayRunner:
                                    f"{_approx_tokens:,}", f"{_new_tokens:,}",
                                )

-                                if _hyg_adapter:
-                                    try:
-                                        await _hyg_adapter.send(
-                                            source.chat_id,
-                                            f"🗜️ Compressed: {_msg_count} → "
-                                            f"{_new_count} messages, "
-                                            f"~{_approx_tokens:,} → "
-                                            f"~{_new_tokens:,} tokens",
-                                            metadata=_hyg_meta,
-                                        )
-                                    except Exception:
-                                        pass
-
-                                # Still too large after compression — warn user
-                                # Rate-limited to once per cooldown period per
-                                # chat to avoid spamming on every message.
                                if _new_tokens >= _warn_token_threshold:
                                    logger.warning(
                                        "Session hygiene: still ~%s tokens after "
-                                        "compression — suggesting /reset",
+                                        "compression",
                                        f"{_new_tokens:,}",
                                    )
-                                    _now = time.time()
-                                    _last_warn = self._compression_warn_sent.get(source.chat_id, 0)
-                                    if _hyg_adapter and _now - _last_warn >= self._compression_warn_cooldown:
-                                        self._compression_warn_sent[source.chat_id] = _now
-                                        try:
-                                            await _hyg_adapter.send(
-                                                source.chat_id,
-                                                "⚠️ Session is still very large "
-                                                "after compression "
-                                                f"(~{_new_tokens:,} tokens). "
-                                                "Consider using /reset to start "
-                                                "fresh if you experience issues.",
-                                                metadata=_hyg_meta,
-                                            )
-                                        except Exception:
-                                            pass

                    except Exception as e:
                        logger.warning(
                            "Session hygiene auto-compress failed: %s", e
                        )
-                        # Compression failed and session is dangerously large
-                        if _approx_tokens >= _warn_token_threshold:
-                            _hyg_adapter = self.adapters.get(source.platform)
-                            _hyg_meta = {"thread_id": source.thread_id} if source.thread_id else None
-                            _now = time.time()
-                            _last_warn = self._compression_warn_sent.get(source.chat_id, 0)
-                            if _hyg_adapter and _now - _last_warn >= self._compression_warn_cooldown:
-                                self._compression_warn_sent[source.chat_id] = _now
-                                try:
-                                    await _hyg_adapter.send(
-                                        source.chat_id,
-                                        f"⚠️ Session is very large "
-                                        f"({_msg_count} messages, "
-                                        f"~{_approx_tokens:,} tokens) and "
-                                        "auto-compression failed. Consider "
-                                        "using /compress or /reset to avoid "
-                                        "issues.",
-                                        metadata=_hyg_meta,
-                                    )
-                                except Exception:
-                                    pass

        # First-message onboarding -- only on the very first interaction ever
        if not history and not self.session_store.has_any_sessions():
@ -2805,7 +2777,7 @@ class GatewayRunner:
                    {
                        "role": "session_meta",
                        "tools": tool_defs or [],
-                        "model": os.getenv("HERMES_MODEL", ""),
+                        "model": _resolve_gateway_model(),
                        "platform": source.platform.value if source.platform else "",
                        "timestamp": ts,
                    }
@ -3270,9 +3242,11 @@ class GatewayRunner:
            except Exception:
                current_provider = "openrouter"

-        # Detect custom endpoint
-        if current_provider == "openrouter" and os.getenv("OPENAI_BASE_URL", "").strip():
-            current_provider = "custom"
+        # Detect custom endpoint from config base_url
+        if current_provider == "openrouter":
+            _cfg_base = model_cfg.get("base_url", "") if isinstance(model_cfg, dict) else ""
+            if _cfg_base and "openrouter.ai" not in _cfg_base:
+                current_provider = "custom"

        current_label = _PROVIDER_LABELS.get(current_provider, current_provider)

@ -4084,6 +4058,167 @@ class GatewayRunner:
            except Exception:
                pass

+    async def _handle_btw_command(self, event: MessageEvent) -> str:
+        """Handle /btw <question> — ephemeral side question in the same chat."""
+        question = event.get_command_args().strip()
+        if not question:
+            return (
+                "Usage: /btw <question>\n"
+                "Example: /btw what module owns session title sanitization?\n\n"
+                "Answers using session context. No tools, not persisted."
+            )
+
+        source = event.source
+        session_key = self._session_key_for_source(source)
+
+        # Guard: one /btw at a time per session
+        existing = getattr(self, "_active_btw_tasks", {}).get(session_key)
+        if existing and not existing.done():
+            return "A /btw is already running for this chat. Wait for it to finish."
+
+        if not hasattr(self, "_active_btw_tasks"):
+            self._active_btw_tasks: dict = {}
+
+        import uuid as _uuid
+        task_id = f"btw_{datetime.now().strftime('%H%M%S')}_{_uuid.uuid4().hex[:6]}"
+        _task = asyncio.create_task(self._run_btw_task(question, source, session_key, task_id))
+        self._background_tasks.add(_task)
+        self._active_btw_tasks[session_key] = _task
+
+        def _cleanup(task):
+            self._background_tasks.discard(task)
+            if self._active_btw_tasks.get(session_key) is task:
+                self._active_btw_tasks.pop(session_key, None)
+
+        _task.add_done_callback(_cleanup)
+
+        preview = question[:60] + ("..." if len(question) > 60 else "")
+        return f'💬 /btw: "{preview}"\nReply will appear here shortly.'
+
+    async def _run_btw_task(
+        self, question: str, source, session_key: str, task_id: str,
+    ) -> None:
+        """Execute an ephemeral /btw side question and deliver the answer."""
+        from run_agent import AIAgent
+
+        adapter = self.adapters.get(source.platform)
+        if not adapter:
+            logger.warning("No adapter for platform %s in /btw task %s", source.platform, task_id)
+            return
+
+        _thread_meta = {"thread_id": source.thread_id} if source.thread_id else None
+
+        try:
+            runtime_kwargs = _resolve_runtime_agent_kwargs()
+            if not runtime_kwargs.get("api_key"):
+                await adapter.send(
+                    source.chat_id,
+                    "❌ /btw failed: no provider credentials configured.",
+                    metadata=_thread_meta,
+                )
+                return
+
+            user_config = _load_gateway_config()
+            model = _resolve_gateway_model(user_config)
+            platform_key = _platform_config_key(source.platform)
+            reasoning_config = self._load_reasoning_config()
+            turn_route = self._resolve_turn_agent_config(question, model, runtime_kwargs)
+            pr = self._provider_routing
+
+            # Snapshot history from running agent or stored transcript
+            running_agent = self._running_agents.get(session_key)
+            if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
+                history_snapshot = list(getattr(running_agent, "_session_messages", []) or [])
+            else:
+                session_entry = self.session_store.get_or_create_session(source)
+                history_snapshot = self.session_store.load_transcript(session_entry.session_id)
+
+            btw_prompt = (
+                "[Ephemeral /btw side question. Answer using the conversation "
+                "context. No tools available. Be direct and concise.]\n\n"
+                + question
+            )
+
+            def run_sync():
+                agent = AIAgent(
+                    model=turn_route["model"],
+                    **turn_route["runtime"],
+                    max_iterations=8,
+                    quiet_mode=True,
+                    verbose_logging=False,
+                    enabled_toolsets=[],
+                    reasoning_config=reasoning_config,
+                    providers_allowed=pr.get("only"),
+                    providers_ignored=pr.get("ignore"),
+                    providers_order=pr.get("order"),
+                    provider_sort=pr.get("sort"),
+                    provider_require_parameters=pr.get("require_parameters", False),
+                    provider_data_collection=pr.get("data_collection"),
+                    session_id=task_id,
+                    platform=platform_key,
+                    session_db=None,
+                    fallback_model=self._fallback_model,
+                    skip_memory=True,
+                    skip_context_files=True,
+                    persist_session=False,
+                )
+                return agent.run_conversation(
+                    user_message=btw_prompt,
+                    conversation_history=history_snapshot,
+                    task_id=task_id,
+                    sync_honcho=False,
+                )
+
+            loop = asyncio.get_event_loop()
+            result = await loop.run_in_executor(None, run_sync)
+
+            response = (result.get("final_response") or "") if result else ""
+            if not response and result and result.get("error"):
+                response = f"Error: {result['error']}"
+            if not response:
+                response = "(No response generated)"
+
+            media_files, response = adapter.extract_media(response)
+            images, text_content = adapter.extract_images(response)
+            preview = question[:60] + ("..." if len(question) > 60 else "")
+            header = f'💬 /btw: "{preview}"\n\n'
+
+            if text_content:
+                await adapter.send(
+                    chat_id=source.chat_id,
+                    content=header + text_content,
+                    metadata=_thread_meta,
+                )
+            elif not images and not media_files:
+                await adapter.send(
+                    chat_id=source.chat_id,
+                    content=header + "(No response generated)",
+                    metadata=_thread_meta,
+                )
+
+            for image_url, alt_text in (images or []):
+                try:
+                    await adapter.send_image(chat_id=source.chat_id, image_url=image_url, caption=alt_text)
+                except Exception:
+                    pass
+
+            for media_path in (media_files or []):
+                try:
+                    await adapter.send_file(chat_id=source.chat_id, file_path=media_path)
+                except Exception:
+                    pass
+
+        except Exception as e:
+            logger.exception("/btw task %s failed", task_id)
+            try:
+                await adapter.send(
+                    chat_id=source.chat_id,
+                    content=f"❌ /btw failed: {e}",
+                    metadata=_thread_meta,
+                )
+            except Exception:
+                pass
+
    async def _handle_reasoning_command(self, event: MessageEvent) -> str:
        """Handle /reasoning command — manage reasoning effort and display toggle.

@ -4594,9 +4729,13 @@ class GatewayRunner:

    _APPROVAL_TIMEOUT_SECONDS = 300  # 5 minutes

-    async def _handle_approve_command(self, event: MessageEvent) -> str:
+    async def _handle_approve_command(self, event: MessageEvent) -> Optional[str]:
        """Handle /approve command — execute a pending dangerous command.

+        After execution, re-invokes the agent with the command result so it
+        can continue its multi-step task (fixes the "dead agent" bug where
+        the agent loop exited on approval_required and never resumed).
+
        Usage:
            /approve          — approve and execute the pending command
            /approve session  — approve and remember for this session
@ -4645,8 +4784,57 @@ class GatewayRunner:

        logger.info("User approved dangerous command via /approve: %s...%s", cmd[:60], scope_msg)
        from tools.terminal_tool import terminal_tool
-        result = terminal_tool(command=cmd, force=True)
-        return f"✅ Command approved and executed{scope_msg}.\n\n```\n{result[:3500]}\n```"
+        result = await asyncio.to_thread(terminal_tool, command=cmd, force=True)
+
+        # Send immediate feedback so the user sees the command output right away
+        immediate_msg = f"✅ Command approved and executed{scope_msg}.\n\n```\n{result[:3500]}\n```"
+        adapter = self.adapters.get(source.platform)
+        if adapter:
+            try:
+                await adapter.send(source.chat_id, immediate_msg)
+            except Exception as e:
+                logger.warning("Failed to send approval feedback: %s", e)
+
+        # Re-invoke the agent with the command result so it can continue its task.
+        # The agent's conversation history (persisted in SQLite) already contains
+        # the tool call that returned approval_required — the continuation message
+        # provides the actual execution output so the agent can pick up where it
+        # left off.
+        continuation_text = (
+            f"[System: The user approved the previously blocked command and it has been executed.\n"
+            f"Command: {cmd}\n"
+            f"<command_output>\n{result[:3500]}\n</command_output>\n\n"
+            f"Continue with the task you were working on.]"
+        )
+
+        synthetic_event = MessageEvent(
+            text=continuation_text,
+            source=source,
+            message_id=f"approve-continuation-{uuid.uuid4().hex}",
+        )
+
+        async def _continue_agent():
+            try:
+                response = await self._handle_message(synthetic_event)
+                if response and adapter:
+                    await adapter.send(source.chat_id, response)
+            except Exception as e:
+                logger.error("Failed to continue agent after /approve: %s", e)
+                if adapter:
+                    try:
+                        await adapter.send(
+                            source.chat_id,
+                            f"⚠️ Failed to resume agent after approval: {e}"
+                        )
+                    except Exception:
+                        pass
+
+        _task = asyncio.create_task(_continue_agent())
+        self._background_tasks.add(_task)
+        _task.add_done_callback(self._background_tasks.discard)
+        # Return None — we already sent the immediate feedback and the agent
+        # continuation is running in the background.
+        return None

    async def _handle_deny_command(self, event: MessageEvent) -> str:
        """Handle /deny command — reject a pending dangerous command."""
@ -4663,8 +4851,8 @@ class GatewayRunner:
    async def _handle_update_command(self, event: MessageEvent) -> str:
        """Handle /update command — update Hermes Agent to the latest version.

-        Spawns ``hermes update`` in a separate systemd scope so it survives the
-        gateway restart that ``hermes update`` may trigger at the end. Marker
+        Spawns ``hermes update`` in a detached session (via ``setsid``) so it
+        survives the gateway restart that ``hermes update`` may trigger. Marker
        files are written so either the current gateway process or the next one
        can notify the user when the update finishes.
        """
@ -4672,6 +4860,10 @@ class GatewayRunner:
        import shutil
        import subprocess
        from datetime import datetime
+        from hermes_cli.config import is_managed, format_managed_message
+
+        if is_managed():
+            return f"✗ {format_managed_message('update Hermes Agent')}"

        project_root = Path(__file__).parent.parent.resolve()
        git_dir = project_root / '.git'
@ -4700,28 +4892,28 @@ class GatewayRunner:
        pending_path.write_text(json.dumps(pending))
        exit_code_path.unlink(missing_ok=True)

-        # Spawn `hermes update` in a separate cgroup so it survives gateway
-        # restart. systemd-run --user --scope creates a transient scope unit.
+        # Spawn `hermes update` detached so it survives gateway restart.
+        # Use setsid for portable session detach (works under system services
+        # where systemd-run --user fails due to missing D-Bus session).
        hermes_cmd_str = " ".join(shlex.quote(part) for part in hermes_cmd)
        update_cmd = (
            f"{hermes_cmd_str} update > {shlex.quote(str(output_path))} 2>&1; "
            f"status=$?; printf '%s' \"$status\" > {shlex.quote(str(exit_code_path))}"
        )
        try:
-            systemd_run = shutil.which("systemd-run")
-            if systemd_run:
+            setsid_bin = shutil.which("setsid")
+            if setsid_bin:
+                # Preferred: setsid creates a new session, fully detached
                subprocess.Popen(
-                    [systemd_run, "--user", "--scope",
-                     "--unit=hermes-update", "--",
-                     "bash", "-c", update_cmd],
+                    [setsid_bin, "bash", "-c", update_cmd],
                    stdout=subprocess.DEVNULL,
                    stderr=subprocess.DEVNULL,
                    start_new_session=True,
                )
            else:
-                # Fallback: best-effort detach with start_new_session
+                # Fallback: start_new_session=True calls os.setsid() in child
                subprocess.Popen(
-                    ["bash", "-c", f"nohup {update_cmd} &"],
+                    ["bash", "-c", update_cmd],
                    stdout=subprocess.DEVNULL,
                    stderr=subprocess.DEVNULL,
                    start_new_session=True,
@ -5712,7 +5904,9 @@ class GatewayRunner:
            # If so, update the session store entry so the NEXT message loads
            # the compressed transcript, not the stale pre-compression one.
            agent = agent_holder[0]
+            _session_was_split = False
            if agent and session_key and hasattr(agent, 'session_id') and agent.session_id != session_id:
+                _session_was_split = True
                logger.info(
                    "Session split detected: %s → %s (compression)",
                    session_id, agent.session_id,
@ -5724,6 +5918,13 @@ class GatewayRunner:

            effective_session_id = getattr(agent, 'session_id', session_id) if agent else session_id

+            # When compression created a new session, the messages list was
+            # shortened.  Using the original history offset would produce an
+            # empty new_messages slice, causing the gateway to write only a
+            # user/assistant pair — losing the compressed summary and tail.
+            # Reset to 0 so the gateway writes ALL compressed messages.
+            _effective_history_offset = 0 if _session_was_split else len(agent_history)
+
            # Auto-generate session title after first exchange (non-blocking)
            if final_response and self._session_db:
                try:
@ -5745,7 +5946,7 @@ class GatewayRunner:
                "messages": result_holder[0].get("messages", []) if result_holder[0] else [],
                "api_calls": result_holder[0].get("api_calls", 0) if result_holder[0] else 0,
                "tools": tools_holder[0] or [],
-                "history_offset": len(agent_history),
+                "history_offset": _effective_history_offset,
                "last_prompt_tokens": _last_prompt_toks,
                "input_tokens": _input_toks,
                "output_tokens": _output_toks,
@ -5993,7 +6194,7 @@ def _start_cron_ticker(stop_event: threading.Event, adapters=None, interval: int
    logger.info("Cron ticker stopped")


-async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = False) -> bool:
+async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = False, verbosity: Optional[int] = 0) -> bool:
    """
    Start the gateway and run until interrupted.
    
@ -6095,6 +6296,21 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
    logging.getLogger().addHandler(file_handler)
    logging.getLogger().setLevel(logging.INFO)

+    # Optional stderr handler — level driven by -v/-q flags on the CLI.
+    # verbosity=None (-q/--quiet): no stderr output
+    # verbosity=0    (default):    WARNING and above
+    # verbosity=1    (-v):         INFO and above
+    # verbosity=2+   (-vv/-vvv):   DEBUG
+    if verbosity is not None:
+        _stderr_level = {0: logging.WARNING, 1: logging.INFO}.get(verbosity, logging.DEBUG)
+        _stderr_handler = logging.StreamHandler()
+        _stderr_handler.setLevel(_stderr_level)
+        _stderr_handler.setFormatter(RedactingFormatter('%(levelname)s %(name)s: %(message)s'))
+        logging.getLogger().addHandler(_stderr_handler)
+        # Lower root logger level if needed so DEBUG records can reach the handler
+        if _stderr_level < logging.getLogger().level:
+            logging.getLogger().setLevel(_stderr_level)
+
    # Separate errors-only log for easy debugging
    error_handler = RotatingFileHandler(
        log_dir / 'errors.log',
--- a/gateway/session.py
+++ b/gateway/session.py
@ -364,6 +364,12 @@ class SessionEntry:
    auto_reset_reason: Optional[str] = None  # "idle" or "daily"
    reset_had_activity: bool = False  # whether the expired session had any messages
    
+    # Set by the background expiry watcher after it successfully flushes
+    # memories for this session.  Persisted to sessions.json so the flag
+    # survives gateway restarts (the old in-memory _pre_flushed_sessions
+    # set was lost on restart, causing redundant re-flushes).
+    memory_flushed: bool = False
+    
    def to_dict(self) -> Dict[str, Any]:
        result = {
            "session_key": self.session_key,
@ -381,6 +387,7 @@ class SessionEntry:
            "last_prompt_tokens": self.last_prompt_tokens,
            "estimated_cost_usd": self.estimated_cost_usd,
            "cost_status": self.cost_status,
+            "memory_flushed": self.memory_flushed,
        }
        if self.origin:
            result["origin"] = self.origin.to_dict()
@ -416,6 +423,7 @@ class SessionEntry:
            last_prompt_tokens=data.get("last_prompt_tokens", 0),
            estimated_cost_usd=data.get("estimated_cost_usd", 0.0),
            cost_status=data.get("cost_status", "unknown"),
+            memory_flushed=data.get("memory_flushed", False),
        )


@ -479,9 +487,6 @@ class SessionStore:
        self._loaded = False
        self._lock = threading.Lock()
        self._has_active_processes_fn = has_active_processes_fn
-        # on_auto_reset is deprecated — memory flush now runs proactively
-        # via the background session expiry watcher in GatewayRunner.
-        self._pre_flushed_sessions: set = set()  # session_ids already flushed by watcher
        
        # Initialize SQLite session database
        self._db = None
@ -684,15 +689,12 @@ class SessionStore:
                    self._save()
                    return entry
                else:
-                    # Session is being auto-reset.  The background expiry watcher
-                    # should have already flushed memories proactively; discard
-                    # the marker so it doesn't accumulate.
+                    # Session is being auto-reset.
                    was_auto_reset = True
                    auto_reset_reason = reset_reason
                    # Track whether the expired session had any real conversation
                    reset_had_activity = entry.total_tokens > 0
                    db_end_session_id = entry.session_id
-                    self._pre_flushed_sessions.discard(entry.session_id)
            else:
                was_auto_reset = False
                auto_reset_reason = None
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@ -160,7 +160,7 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        id="alibaba",
        name="Alibaba Cloud (DashScope)",
        auth_type="api_key",
-        inference_base_url="https://coding-intl.dashscope.aliyuncs.com/v1",
+        inference_base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
        api_key_env_vars=("DASHSCOPE_API_KEY",),
        base_url_env_var="DASHSCOPE_BASE_URL",
    ),
@ -545,7 +545,11 @@ def _load_auth_store(auth_file: Optional[Path] = None) -> Dict[str, Any]:
    except Exception:
        return {"version": AUTH_STORE_VERSION, "providers": {}}

-    if isinstance(raw, dict) and isinstance(raw.get("providers"), dict):
+    if isinstance(raw, dict) and (
+        isinstance(raw.get("providers"), dict)
+        or isinstance(raw.get("credential_pool"), dict)
+    ):
+        raw.setdefault("providers", {})
        return raw

    # Migrate from PR's "systems" format if present
@ -613,6 +617,30 @@ def _save_provider_state(auth_store: Dict[str, Any], provider_id: str, state: Di
    auth_store["active_provider"] = provider_id


+def read_credential_pool(provider_id: Optional[str] = None) -> Dict[str, Any]:
+    """Return the persisted credential pool, or one provider slice."""
+    auth_store = _load_auth_store()
+    pool = auth_store.get("credential_pool")
+    if not isinstance(pool, dict):
+        pool = {}
+    if provider_id is None:
+        return dict(pool)
+    provider_entries = pool.get(provider_id)
+    return list(provider_entries) if isinstance(provider_entries, list) else []
+
+
+def write_credential_pool(provider_id: str, entries: List[Dict[str, Any]]) -> Path:
+    """Persist one provider's credential pool under auth.json."""
+    with _auth_store_lock():
+        auth_store = _load_auth_store()
+        pool = auth_store.get("credential_pool")
+        if not isinstance(pool, dict):
+            pool = {}
+            auth_store["credential_pool"] = pool
+        pool[provider_id] = list(entries)
+        return _save_auth_store(auth_store)
+
+
 def get_provider_auth_state(provider_id: str) -> Optional[Dict[str, Any]]:
    """Return persisted auth state for a provider, or None."""
    auth_store = _load_auth_store()
@ -638,10 +666,25 @@ def clear_provider_auth(provider_id: Optional[str] = None) -> bool:
            return False

        providers = auth_store.get("providers", {})
-        if target not in providers:
-            return False
+        if not isinstance(providers, dict):
+            providers = {}
+            auth_store["providers"] = providers

-        del providers[target]
+        pool = auth_store.get("credential_pool")
+        if not isinstance(pool, dict):
+            pool = {}
+            auth_store["credential_pool"] = pool
+
+        cleared = False
+        if target in providers:
+            del providers[target]
+            cleared = True
+        if target in pool:
+            del pool[target]
+            cleared = True
+
+        if not cleared:
+            return False
        if auth_store.get("active_provider") == target:
            auth_store["active_provider"] = None
        _save_auth_store(auth_store)
@ -898,15 +941,14 @@ def _save_codex_tokens(tokens: Dict[str, str], last_refresh: str = None) -> None
        _save_auth_store(auth_store)


-def _refresh_codex_auth_tokens(
-    tokens: Dict[str, str],
-    timeout_seconds: float,
-) -> Dict[str, str]:
-    """Refresh Codex access token using the refresh token.
-    
-    Saves the new tokens to Hermes auth store automatically.
-    """
-    refresh_token = tokens.get("refresh_token")
+def refresh_codex_oauth_pure(
+    access_token: str,
+    refresh_token: str,
+    *,
+    timeout_seconds: float = 20.0,
+) -> Dict[str, Any]:
+    """Refresh Codex OAuth tokens without mutating Hermes auth state."""
+    del access_token  # Access token is only used by callers to decide whether to refresh.
    if not isinstance(refresh_token, str) or not refresh_token.strip():
        raise AuthError(
            "Codex auth is missing refresh_token. Run `hermes login` to re-authenticate.",
@ -961,8 +1003,8 @@ def _refresh_codex_auth_tokens(
            relogin_required=True,
        ) from exc

-    access_token = refresh_payload.get("access_token")
-    if not isinstance(access_token, str) or not access_token.strip():
+    refreshed_access = refresh_payload.get("access_token")
+    if not isinstance(refreshed_access, str) or not refreshed_access.strip():
        raise AuthError(
            "Codex token refresh response was missing access_token.",
            provider="openai-codex",
@ -970,11 +1012,33 @@ def _refresh_codex_auth_tokens(
            relogin_required=True,
        )

-    updated_tokens = dict(tokens)
-    updated_tokens["access_token"] = access_token.strip()
+    updated = {
+        "access_token": refreshed_access.strip(),
+        "refresh_token": refresh_token.strip(),
+        "last_refresh": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
+    }
    next_refresh = refresh_payload.get("refresh_token")
    if isinstance(next_refresh, str) and next_refresh.strip():
-        updated_tokens["refresh_token"] = next_refresh.strip()
+        updated["refresh_token"] = next_refresh.strip()
+    return updated
+
+
+def _refresh_codex_auth_tokens(
+    tokens: Dict[str, str],
+    timeout_seconds: float,
+) -> Dict[str, str]:
+    """Refresh Codex access token using the refresh token.
+    
+    Saves the new tokens to Hermes auth store automatically.
+    """
+    refreshed = refresh_codex_oauth_pure(
+        str(tokens.get("access_token", "") or ""),
+        str(tokens.get("refresh_token", "") or ""),
+        timeout_seconds=timeout_seconds,
+    )
+    updated_tokens = dict(tokens)
+    updated_tokens["access_token"] = refreshed["access_token"]
+    updated_tokens["refresh_token"] = refreshed["refresh_token"]

    _save_codex_tokens(updated_tokens)
    return updated_tokens
@ -1396,6 +1460,122 @@ def resolve_nous_access_token(
        return state["access_token"]


+def refresh_nous_oauth_pure(
+    access_token: str,
+    refresh_token: str,
+    client_id: str,
+    portal_base_url: str,
+    inference_base_url: str,
+    *,
+    token_type: str = "Bearer",
+    scope: str = DEFAULT_NOUS_SCOPE,
+    obtained_at: Optional[str] = None,
+    expires_at: Optional[str] = None,
+    agent_key: Optional[str] = None,
+    agent_key_expires_at: Optional[str] = None,
+    min_key_ttl_seconds: int = DEFAULT_AGENT_KEY_MIN_TTL_SECONDS,
+    timeout_seconds: float = 15.0,
+    insecure: Optional[bool] = None,
+    ca_bundle: Optional[str] = None,
+    force_refresh: bool = False,
+    force_mint: bool = False,
+) -> Dict[str, Any]:
+    """Refresh Nous OAuth state without mutating auth.json."""
+    state: Dict[str, Any] = {
+        "access_token": access_token,
+        "refresh_token": refresh_token,
+        "client_id": client_id or DEFAULT_NOUS_CLIENT_ID,
+        "portal_base_url": (portal_base_url or DEFAULT_NOUS_PORTAL_URL).rstrip("/"),
+        "inference_base_url": (inference_base_url or DEFAULT_NOUS_INFERENCE_URL).rstrip("/"),
+        "token_type": token_type or "Bearer",
+        "scope": scope or DEFAULT_NOUS_SCOPE,
+        "obtained_at": obtained_at,
+        "expires_at": expires_at,
+        "agent_key": agent_key,
+        "agent_key_expires_at": agent_key_expires_at,
+        "tls": {
+            "insecure": bool(insecure),
+            "ca_bundle": ca_bundle,
+        },
+    }
+    verify = _resolve_verify(insecure=insecure, ca_bundle=ca_bundle, auth_state=state)
+    timeout = httpx.Timeout(timeout_seconds if timeout_seconds else 15.0)
+
+    with httpx.Client(timeout=timeout, headers={"Accept": "application/json"}, verify=verify) as client:
+        if force_refresh or _is_expiring(state.get("expires_at"), ACCESS_TOKEN_REFRESH_SKEW_SECONDS):
+            refreshed = _refresh_access_token(
+                client=client,
+                portal_base_url=state["portal_base_url"],
+                client_id=state["client_id"],
+                refresh_token=state["refresh_token"],
+            )
+            now = datetime.now(timezone.utc)
+            access_ttl = _coerce_ttl_seconds(refreshed.get("expires_in"))
+            state["access_token"] = refreshed["access_token"]
+            state["refresh_token"] = refreshed.get("refresh_token") or state["refresh_token"]
+            state["token_type"] = refreshed.get("token_type") or state.get("token_type") or "Bearer"
+            state["scope"] = refreshed.get("scope") or state.get("scope")
+            refreshed_url = _optional_base_url(refreshed.get("inference_base_url"))
+            if refreshed_url:
+                state["inference_base_url"] = refreshed_url
+            state["obtained_at"] = now.isoformat()
+            state["expires_in"] = access_ttl
+            state["expires_at"] = datetime.fromtimestamp(
+                now.timestamp() + access_ttl, tz=timezone.utc
+            ).isoformat()
+
+        if force_mint or not _agent_key_is_usable(state, max(60, int(min_key_ttl_seconds))):
+            mint_payload = _mint_agent_key(
+                client=client,
+                portal_base_url=state["portal_base_url"],
+                access_token=state["access_token"],
+                min_ttl_seconds=min_key_ttl_seconds,
+            )
+            now = datetime.now(timezone.utc)
+            state["agent_key"] = mint_payload.get("api_key")
+            state["agent_key_id"] = mint_payload.get("key_id")
+            state["agent_key_expires_at"] = mint_payload.get("expires_at")
+            state["agent_key_expires_in"] = mint_payload.get("expires_in")
+            state["agent_key_reused"] = bool(mint_payload.get("reused", False))
+            state["agent_key_obtained_at"] = now.isoformat()
+            minted_url = _optional_base_url(mint_payload.get("inference_base_url"))
+            if minted_url:
+                state["inference_base_url"] = minted_url
+
+    return state
+
+
+def refresh_nous_oauth_from_state(
+    state: Dict[str, Any],
+    *,
+    min_key_ttl_seconds: int = DEFAULT_AGENT_KEY_MIN_TTL_SECONDS,
+    timeout_seconds: float = 15.0,
+    force_refresh: bool = False,
+    force_mint: bool = False,
+) -> Dict[str, Any]:
+    """Refresh Nous OAuth from a state dict. Thin wrapper around refresh_nous_oauth_pure."""
+    tls = state.get("tls") or {}
+    return refresh_nous_oauth_pure(
+        state.get("access_token", ""),
+        state.get("refresh_token", ""),
+        state.get("client_id", "hermes-cli"),
+        state.get("portal_base_url", DEFAULT_NOUS_PORTAL_URL),
+        state.get("inference_base_url", DEFAULT_NOUS_INFERENCE_URL),
+        token_type=state.get("token_type", "Bearer"),
+        scope=state.get("scope", DEFAULT_NOUS_SCOPE),
+        obtained_at=state.get("obtained_at"),
+        expires_at=state.get("expires_at"),
+        agent_key=state.get("agent_key"),
+        agent_key_expires_at=state.get("agent_key_expires_at"),
+        min_key_ttl_seconds=min_key_ttl_seconds,
+        timeout_seconds=timeout_seconds,
+        insecure=tls.get("insecure"),
+        ca_bundle=tls.get("ca_bundle"),
+        force_refresh=force_refresh,
+        force_mint=force_mint,
+    )
+
+
 def resolve_nous_runtime_credentials(
    *,
    min_key_ttl_seconds: int = DEFAULT_AGENT_KEY_MIN_TTL_SECONDS,
@ -2263,34 +2443,36 @@ def _codex_device_code_login() -> Dict[str, Any]:
    }


-def _login_nous(args, pconfig: ProviderConfig) -> None:
-    """Nous Portal device authorization flow."""
+def _nous_device_code_login(
+    *,
+    portal_base_url: Optional[str] = None,
+    inference_base_url: Optional[str] = None,
+    client_id: Optional[str] = None,
+    scope: Optional[str] = None,
+    open_browser: bool = True,
+    timeout_seconds: float = 15.0,
+    insecure: bool = False,
+    ca_bundle: Optional[str] = None,
+    min_key_ttl_seconds: int = 5 * 60,
+) -> Dict[str, Any]:
+    """Run the Nous device-code flow and return full OAuth state without persisting."""
+    pconfig = PROVIDER_REGISTRY["nous"]
    portal_base_url = (
-        getattr(args, "portal_url", None)
+        portal_base_url
        or os.getenv("HERMES_PORTAL_BASE_URL")
        or os.getenv("NOUS_PORTAL_BASE_URL")
        or pconfig.portal_base_url
    ).rstrip("/")
    requested_inference_url = (
-        getattr(args, "inference_url", None)
+        inference_base_url
        or os.getenv("NOUS_INFERENCE_BASE_URL")
        or pconfig.inference_base_url
    ).rstrip("/")
-    client_id = getattr(args, "client_id", None) or pconfig.client_id
-    scope = getattr(args, "scope", None) or pconfig.scope
-    open_browser = not getattr(args, "no_browser", False)
-    timeout_seconds = getattr(args, "timeout", None) or 15.0
+    client_id = client_id or pconfig.client_id
+    scope = scope or pconfig.scope
    timeout = httpx.Timeout(timeout_seconds)
-
-    insecure = bool(getattr(args, "insecure", False))
-    ca_bundle = (
-        getattr(args, "ca_bundle", None)
-        or os.getenv("HERMES_CA_BUNDLE")
-        or os.getenv("SSL_CERT_FILE")
-    )
    verify: bool | str = False if insecure else (ca_bundle if ca_bundle else True)

-    # Skip browser open in SSH sessions
    if _is_remote_session():
        open_browser = False

@ -2301,74 +2483,109 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
    elif ca_bundle:
        print(f"TLS verification: custom CA bundle ({ca_bundle})")

-    try:
-        with httpx.Client(timeout=timeout, headers={"Accept": "application/json"}, verify=verify) as client:
-            device_data = _request_device_code(
-                client=client, portal_base_url=portal_base_url,
-                client_id=client_id, scope=scope,
-            )
-
-            verification_url = str(device_data["verification_uri_complete"])
-            user_code = str(device_data["user_code"])
-            expires_in = int(device_data["expires_in"])
-            interval = int(device_data["interval"])
-
-            print()
-            print("To continue:")
-            print(f"  1. Open: {verification_url}")
-            print(f"  2. If prompted, enter code: {user_code}")
-
-            if open_browser:
-                opened = webbrowser.open(verification_url)
-                if opened:
-                    print("  (Opened browser for verification)")
-                else:
-                    print("  Could not open browser automatically — use the URL above.")
-
-            effective_interval = max(1, min(interval, DEVICE_AUTH_POLL_INTERVAL_CAP_SECONDS))
-            print(f"Waiting for approval (polling every {effective_interval}s)...")
-
-            token_data = _poll_for_token(
-                client=client, portal_base_url=portal_base_url,
-                client_id=client_id, device_code=str(device_data["device_code"]),
-                expires_in=expires_in, poll_interval=interval,
-            )
-
-        # Process token response
-        now = datetime.now(timezone.utc)
-        token_expires_in = _coerce_ttl_seconds(token_data.get("expires_in", 0))
-        expires_at = now.timestamp() + token_expires_in
-        inference_base_url = (
-            _optional_base_url(token_data.get("inference_base_url"))
-            or requested_inference_url
+    with httpx.Client(timeout=timeout, headers={"Accept": "application/json"}, verify=verify) as client:
+        device_data = _request_device_code(
+            client=client,
+            portal_base_url=portal_base_url,
+            client_id=client_id,
+            scope=scope,
        )
-        if inference_base_url != requested_inference_url:
-            print(f"Using portal-provided inference URL: {inference_base_url}")

-        auth_state = {
-            "portal_base_url": portal_base_url,
-            "inference_base_url": inference_base_url,
-            "client_id": client_id,
-            "scope": token_data.get("scope") or scope,
-            "token_type": token_data.get("token_type", "Bearer"),
-            "access_token": token_data["access_token"],
-            "refresh_token": token_data.get("refresh_token"),
-            "obtained_at": now.isoformat(),
-            "expires_at": datetime.fromtimestamp(expires_at, tz=timezone.utc).isoformat(),
-            "expires_in": token_expires_in,
-            "tls": {
-                "insecure": verify is False,
-                "ca_bundle": verify if isinstance(verify, str) else None,
-            },
-            "agent_key": None,
-            "agent_key_id": None,
-            "agent_key_expires_at": None,
-            "agent_key_expires_in": None,
-            "agent_key_reused": None,
-            "agent_key_obtained_at": None,
-        }
+        verification_url = str(device_data["verification_uri_complete"])
+        user_code = str(device_data["user_code"])
+        expires_in = int(device_data["expires_in"])
+        interval = int(device_data["interval"])
+
+        print()
+        print("To continue:")
+        print(f"  1. Open: {verification_url}")
+        print(f"  2. If prompted, enter code: {user_code}")
+
+        if open_browser:
+            opened = webbrowser.open(verification_url)
+            if opened:
+                print("  (Opened browser for verification)")
+            else:
+                print("  Could not open browser automatically — use the URL above.")
+
+        effective_interval = max(1, min(interval, DEVICE_AUTH_POLL_INTERVAL_CAP_SECONDS))
+        print(f"Waiting for approval (polling every {effective_interval}s)...")
+
+        token_data = _poll_for_token(
+            client=client,
+            portal_base_url=portal_base_url,
+            client_id=client_id,
+            device_code=str(device_data["device_code"]),
+            expires_in=expires_in,
+            poll_interval=interval,
+        )
+
+    now = datetime.now(timezone.utc)
+    token_expires_in = _coerce_ttl_seconds(token_data.get("expires_in", 0))
+    expires_at = now.timestamp() + token_expires_in
+    resolved_inference_url = (
+        _optional_base_url(token_data.get("inference_base_url"))
+        or requested_inference_url
+    )
+    if resolved_inference_url != requested_inference_url:
+        print(f"Using portal-provided inference URL: {resolved_inference_url}")
+
+    auth_state = {
+        "portal_base_url": portal_base_url,
+        "inference_base_url": resolved_inference_url,
+        "client_id": client_id,
+        "scope": token_data.get("scope") or scope,
+        "token_type": token_data.get("token_type", "Bearer"),
+        "access_token": token_data["access_token"],
+        "refresh_token": token_data.get("refresh_token"),
+        "obtained_at": now.isoformat(),
+        "expires_at": datetime.fromtimestamp(expires_at, tz=timezone.utc).isoformat(),
+        "expires_in": token_expires_in,
+        "tls": {
+            "insecure": verify is False,
+            "ca_bundle": verify if isinstance(verify, str) else None,
+        },
+        "agent_key": None,
+        "agent_key_id": None,
+        "agent_key_expires_at": None,
+        "agent_key_expires_in": None,
+        "agent_key_reused": None,
+        "agent_key_obtained_at": None,
+    }
+    return refresh_nous_oauth_from_state(
+        auth_state,
+        min_key_ttl_seconds=min_key_ttl_seconds,
+        timeout_seconds=timeout_seconds,
+        force_refresh=False,
+        force_mint=True,
+    )
+
+
+def _login_nous(args, pconfig: ProviderConfig) -> None:
+    """Nous Portal device authorization flow."""
+    timeout_seconds = getattr(args, "timeout", None) or 15.0
+    insecure = bool(getattr(args, "insecure", False))
+    ca_bundle = (
+        getattr(args, "ca_bundle", None)
+        or os.getenv("HERMES_CA_BUNDLE")
+        or os.getenv("SSL_CERT_FILE")
+    )
+
+    try:
+        auth_state = _nous_device_code_login(
+            portal_base_url=getattr(args, "portal_url", None) or pconfig.portal_base_url,
+            inference_base_url=getattr(args, "inference_url", None) or pconfig.inference_base_url,
+            client_id=getattr(args, "client_id", None) or pconfig.client_id,
+            scope=getattr(args, "scope", None) or pconfig.scope,
+            open_browser=not getattr(args, "no_browser", False),
+            timeout_seconds=timeout_seconds,
+            insecure=insecure,
+            ca_bundle=ca_bundle,
+            min_key_ttl_seconds=5 * 60,
+        )
+        inference_base_url = auth_state["inference_base_url"]
+        verify: bool | str = False if insecure else (ca_bundle if ca_bundle else True)

-        # Save auth state
        with _auth_store_lock():
            auth_store = _load_auth_store()
            _save_provider_state(auth_store, "nous", auth_state)
@ -2380,18 +2597,14 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
        print(f"  Auth state: {saved_to}")
        print(f"  Config updated: {config_path} (model.provider=nous)")

-        # Mint an initial agent key and list available models
        try:
-            runtime_creds = resolve_nous_runtime_credentials(
-                min_key_ttl_seconds=5 * 60,
-                timeout_seconds=timeout_seconds,
-                insecure=insecure, ca_bundle=ca_bundle,
-            )
-            runtime_key = runtime_creds.get("api_key")
-            runtime_base_url = runtime_creds.get("base_url") or inference_base_url
+            runtime_key = auth_state.get("agent_key") or auth_state.get("access_token")
            if not isinstance(runtime_key, str) or not runtime_key:
-                raise AuthError("No runtime API key available to fetch models",
-                                provider="nous", code="invalid_token")
+                raise AuthError(
+                    "No runtime API key available to fetch models",
+                    provider="nous",
+                    code="invalid_token",
+                )

            # Use curated model list (same as OpenRouter defaults) instead
            # of the full /models dump which returns hundreds of models.
--- a/hermes_cli/auth_commands.py
+++ b/hermes_cli/auth_commands.py
@ -0,0 +1,470 @@
+"""Credential-pool auth subcommands."""
+
+from __future__ import annotations
+
+from getpass import getpass
+import math
+import time
+from types import SimpleNamespace
+import uuid
+
+from agent.credential_pool import (
+    AUTH_TYPE_API_KEY,
+    AUTH_TYPE_OAUTH,
+    CUSTOM_POOL_PREFIX,
+    SOURCE_MANUAL,
+    STATUS_EXHAUSTED,
+    STRATEGY_FILL_FIRST,
+    STRATEGY_ROUND_ROBIN,
+    STRATEGY_RANDOM,
+    STRATEGY_LEAST_USED,
+    SUPPORTED_POOL_STRATEGIES,
+    PooledCredential,
+    _normalize_custom_pool_name,
+    get_pool_strategy,
+    label_from_token,
+    list_custom_pool_providers,
+    load_pool,
+    _exhausted_ttl,
+)
+import hermes_cli.auth as auth_mod
+from hermes_cli.auth import PROVIDER_REGISTRY
+from hermes_constants import OPENROUTER_BASE_URL
+
+
+# Providers that support OAuth login in addition to API keys.
+_OAUTH_CAPABLE_PROVIDERS = {"anthropic", "nous", "openai-codex"}
+
+
+def _get_custom_provider_names() -> list:
+    """Return list of (display_name, pool_key) tuples for custom_providers in config."""
+    try:
+        from hermes_cli.config import load_config
+
+        config = load_config()
+    except Exception:
+        return []
+    custom_providers = config.get("custom_providers")
+    if not isinstance(custom_providers, list):
+        return []
+    result = []
+    for entry in custom_providers:
+        if not isinstance(entry, dict):
+            continue
+        name = entry.get("name")
+        if not isinstance(name, str) or not name.strip():
+            continue
+        pool_key = f"{CUSTOM_POOL_PREFIX}{_normalize_custom_pool_name(name)}"
+        result.append((name.strip(), pool_key))
+    return result
+
+
+def _resolve_custom_provider_input(raw: str) -> str | None:
+    """If raw input matches a custom_providers entry name (case-insensitive), return its pool key."""
+    normalized = (raw or "").strip().lower().replace(" ", "-")
+    if not normalized:
+        return None
+    # Direct match on 'custom:name' format
+    if normalized.startswith(CUSTOM_POOL_PREFIX):
+        return normalized
+    for display_name, pool_key in _get_custom_provider_names():
+        if _normalize_custom_pool_name(display_name) == normalized:
+            return pool_key
+    return None
+
+
+def _normalize_provider(provider: str) -> str:
+    normalized = (provider or "").strip().lower()
+    if normalized in {"or", "open-router"}:
+        return "openrouter"
+    # Check if it matches a custom provider name
+    custom_key = _resolve_custom_provider_input(normalized)
+    if custom_key:
+        return custom_key
+    return normalized
+
+
+def _provider_base_url(provider: str) -> str:
+    if provider == "openrouter":
+        return OPENROUTER_BASE_URL
+    if provider.startswith(CUSTOM_POOL_PREFIX):
+        from agent.credential_pool import _get_custom_provider_config
+
+        cp_config = _get_custom_provider_config(provider)
+        if cp_config:
+            return str(cp_config.get("base_url") or "").strip()
+        return ""
+    pconfig = PROVIDER_REGISTRY.get(provider)
+    return pconfig.inference_base_url if pconfig else ""
+
+
+def _oauth_default_label(provider: str, count: int) -> str:
+    return f"{provider}-oauth-{count}"
+
+
+def _api_key_default_label(count: int) -> str:
+    return f"api-key-{count}"
+
+
+def _display_source(source: str) -> str:
+    return source.split(":", 1)[1] if source.startswith("manual:") else source
+
+
+def _format_exhausted_status(entry) -> str:
+    if entry.last_status != STATUS_EXHAUSTED:
+        return ""
+    code = f" ({entry.last_error_code})" if entry.last_error_code else ""
+    if not entry.last_status_at:
+        return f" exhausted{code}"
+    remaining = max(0, int(math.ceil((entry.last_status_at + _exhausted_ttl(entry.last_error_code)) - time.time())))
+    if remaining <= 0:
+        return f" exhausted{code} (ready to retry)"
+    minutes, seconds = divmod(remaining, 60)
+    hours, minutes = divmod(minutes, 60)
+    if hours:
+        wait = f"{hours}h {minutes}m"
+    elif minutes:
+        wait = f"{minutes}m {seconds}s"
+    else:
+        wait = f"{seconds}s"
+    return f" exhausted{code} ({wait} left)"
+
+
+def auth_add_command(args) -> None:
+    provider = _normalize_provider(getattr(args, "provider", ""))
+    if provider not in PROVIDER_REGISTRY and provider != "openrouter" and not provider.startswith(CUSTOM_POOL_PREFIX):
+        raise SystemExit(f"Unknown provider: {provider}")
+
+    requested_type = str(getattr(args, "auth_type", "") or "").strip().lower()
+    if requested_type in {AUTH_TYPE_API_KEY, "api-key"}:
+        requested_type = AUTH_TYPE_API_KEY
+    if not requested_type:
+        if provider.startswith(CUSTOM_POOL_PREFIX):
+            requested_type = AUTH_TYPE_API_KEY
+        else:
+            requested_type = AUTH_TYPE_OAUTH if provider in {"anthropic", "nous", "openai-codex"} else AUTH_TYPE_API_KEY
+
+    pool = load_pool(provider)
+
+    if requested_type == AUTH_TYPE_API_KEY:
+        token = (getattr(args, "api_key", None) or "").strip()
+        if not token:
+            token = getpass("Paste your API key: ").strip()
+        if not token:
+            raise SystemExit("No API key provided.")
+        default_label = _api_key_default_label(len(pool.entries()) + 1)
+        label = (getattr(args, "label", None) or "").strip()
+        if not label:
+            label = input(f"Label (optional, default: {default_label}): ").strip() or default_label
+        entry = PooledCredential(
+            provider=provider,
+            id=uuid.uuid4().hex[:6],
+            label=label,
+            auth_type=AUTH_TYPE_API_KEY,
+            priority=0,
+            source=SOURCE_MANUAL,
+            access_token=token,
+            base_url=_provider_base_url(provider),
+        )
+        pool.add_entry(entry)
+        print(f'Added {provider} credential #{len(pool.entries())}: "{label}"')
+        return
+
+    if provider == "anthropic":
+        from agent import anthropic_adapter as anthropic_mod
+
+        creds = anthropic_mod.run_hermes_oauth_login_pure()
+        if not creds:
+            raise SystemExit("Anthropic OAuth login did not return credentials.")
+        label = (getattr(args, "label", None) or "").strip() or label_from_token(
+            creds["access_token"],
+            _oauth_default_label(provider, len(pool.entries()) + 1),
+        )
+        entry = PooledCredential(
+            provider=provider,
+            id=uuid.uuid4().hex[:6],
+            label=label,
+            auth_type=AUTH_TYPE_OAUTH,
+            priority=0,
+            source=f"{SOURCE_MANUAL}:hermes_pkce",
+            access_token=creds["access_token"],
+            refresh_token=creds.get("refresh_token"),
+            expires_at_ms=creds.get("expires_at_ms"),
+            base_url=_provider_base_url(provider),
+        )
+        pool.add_entry(entry)
+        print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
+        return
+
+    if provider == "nous":
+        creds = auth_mod._nous_device_code_login(
+            portal_base_url=getattr(args, "portal_url", None),
+            inference_base_url=getattr(args, "inference_url", None),
+            client_id=getattr(args, "client_id", None),
+            scope=getattr(args, "scope", None),
+            open_browser=not getattr(args, "no_browser", False),
+            timeout_seconds=getattr(args, "timeout", None) or 15.0,
+            insecure=bool(getattr(args, "insecure", False)),
+            ca_bundle=getattr(args, "ca_bundle", None),
+            min_key_ttl_seconds=max(60, int(getattr(args, "min_key_ttl_seconds", 5 * 60))),
+        )
+        label = (getattr(args, "label", None) or "").strip() or label_from_token(
+            creds.get("access_token", ""),
+            _oauth_default_label(provider, len(pool.entries()) + 1),
+        )
+        entry = PooledCredential.from_dict(provider, {
+            **creds,
+            "label": label,
+            "auth_type": AUTH_TYPE_OAUTH,
+            "source": f"{SOURCE_MANUAL}:device_code",
+            "base_url": creds.get("inference_base_url"),
+        })
+        pool.add_entry(entry)
+        print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
+        return
+
+    if provider == "openai-codex":
+        creds = auth_mod._codex_device_code_login()
+        label = (getattr(args, "label", None) or "").strip() or label_from_token(
+            creds["tokens"]["access_token"],
+            _oauth_default_label(provider, len(pool.entries()) + 1),
+        )
+        entry = PooledCredential(
+            provider=provider,
+            id=uuid.uuid4().hex[:6],
+            label=label,
+            auth_type=AUTH_TYPE_OAUTH,
+            priority=0,
+            source=f"{SOURCE_MANUAL}:device_code",
+            access_token=creds["tokens"]["access_token"],
+            refresh_token=creds["tokens"].get("refresh_token"),
+            base_url=creds.get("base_url"),
+            last_refresh=creds.get("last_refresh"),
+        )
+        pool.add_entry(entry)
+        print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
+        return
+
+    raise SystemExit(f"`hermes auth add {provider}` is not implemented for auth type {requested_type} yet.")
+
+
+def auth_list_command(args) -> None:
+    provider_filter = _normalize_provider(getattr(args, "provider", "") or "")
+    if provider_filter:
+        providers = [provider_filter]
+    else:
+        providers = sorted({
+            *PROVIDER_REGISTRY.keys(),
+            "openrouter",
+            *list_custom_pool_providers(),
+        })
+    for provider in providers:
+        pool = load_pool(provider)
+        entries = pool.entries()
+        if not entries:
+            continue
+        current = pool.peek()
+        print(f"{provider} ({len(entries)} credentials):")
+        for idx, entry in enumerate(entries, start=1):
+            marker = "  "
+            if current is not None and entry.id == current.id:
+                marker = "← "
+            status = _format_exhausted_status(entry)
+            source = _display_source(entry.source)
+            print(f"  #{idx}  {entry.label:<20} {entry.auth_type:<7} {source}{status} {marker}".rstrip())
+        print()
+
+
+def auth_remove_command(args) -> None:
+    provider = _normalize_provider(getattr(args, "provider", ""))
+    index = int(getattr(args, "index"))
+    pool = load_pool(provider)
+    removed = pool.remove_index(index)
+    if removed is None:
+        raise SystemExit(f"No credential #{index} for provider {provider}.")
+    print(f"Removed {provider} credential #{index} ({removed.label})")
+
+
+def auth_reset_command(args) -> None:
+    provider = _normalize_provider(getattr(args, "provider", ""))
+    pool = load_pool(provider)
+    count = pool.reset_statuses()
+    print(f"Reset status on {count} {provider} credentials")
+
+
+def _interactive_auth() -> None:
+    """Interactive credential pool management when `hermes auth` is called bare."""
+    # Show current pool status first
+    print("Credential Pool Status")
+    print("=" * 50)
+
+    auth_list_command(SimpleNamespace(provider=None))
+    print()
+
+    # Main menu
+    choices = [
+        "Add a credential",
+        "Remove a credential",
+        "Reset cooldowns for a provider",
+        "Set rotation strategy for a provider",
+        "Exit",
+    ]
+    print("What would you like to do?")
+    for i, choice in enumerate(choices, 1):
+        print(f"  {i}. {choice}")
+
+    try:
+        raw = input("\nChoice: ").strip()
+    except (EOFError, KeyboardInterrupt):
+        return
+
+    if not raw or raw == str(len(choices)):
+        return
+
+    if raw == "1":
+        _interactive_add()
+    elif raw == "2":
+        _interactive_remove()
+    elif raw == "3":
+        _interactive_reset()
+    elif raw == "4":
+        _interactive_strategy()
+
+
+def _pick_provider(prompt: str = "Provider") -> str:
+    """Prompt for a provider name with auto-complete hints."""
+    known = sorted(set(list(PROVIDER_REGISTRY.keys()) + ["openrouter"]))
+    custom_names = _get_custom_provider_names()
+    if custom_names:
+        custom_display = [name for name, _key in custom_names]
+        print(f"\nKnown providers: {', '.join(known)}")
+        print(f"Custom endpoints: {', '.join(custom_display)}")
+    else:
+        print(f"\nKnown providers: {', '.join(known)}")
+    try:
+        raw = input(f"{prompt}: ").strip()
+    except (EOFError, KeyboardInterrupt):
+        raise SystemExit()
+    return _normalize_provider(raw)
+
+
+def _interactive_add() -> None:
+    provider = _pick_provider("Provider to add credential for")
+    if provider not in PROVIDER_REGISTRY and provider != "openrouter" and not provider.startswith(CUSTOM_POOL_PREFIX):
+        raise SystemExit(f"Unknown provider: {provider}")
+
+    # For OAuth-capable providers, ask which type
+    if provider in _OAUTH_CAPABLE_PROVIDERS:
+        print(f"\n{provider} supports both API keys and OAuth login.")
+        print("  1. API key (paste a key from the provider dashboard)")
+        print("  2. OAuth login (authenticate via browser)")
+        try:
+            type_choice = input("Type [1/2]: ").strip()
+        except (EOFError, KeyboardInterrupt):
+            return
+        if type_choice == "2":
+            auth_type = "oauth"
+        else:
+            auth_type = "api_key"
+    else:
+        auth_type = "api_key"
+
+    auth_add_command(SimpleNamespace(
+        provider=provider, auth_type=auth_type, label=None, api_key=None,
+        portal_url=None, inference_url=None, client_id=None, scope=None,
+        no_browser=False, timeout=None, insecure=False, ca_bundle=None,
+    ))
+
+
+def _interactive_remove() -> None:
+    provider = _pick_provider("Provider to remove credential from")
+    pool = load_pool(provider)
+    if not pool.has_credentials():
+        print(f"No credentials for {provider}.")
+        return
+
+    # Show entries with indices
+    for i, e in enumerate(pool.entries(), 1):
+        exhausted = _format_exhausted_status(e)
+        print(f"  #{i}  {e.label:25s} {e.auth_type:10s} {e.source}{exhausted}")
+
+    try:
+        raw = input("Remove # (or blank to cancel): ").strip()
+    except (EOFError, KeyboardInterrupt):
+        return
+    if not raw:
+        return
+
+    try:
+        index = int(raw)
+    except ValueError:
+        print("Invalid number.")
+        return
+
+    auth_remove_command(SimpleNamespace(provider=provider, index=index))
+
+
+def _interactive_reset() -> None:
+    provider = _pick_provider("Provider to reset cooldowns for")
+
+    auth_reset_command(SimpleNamespace(provider=provider))
+
+
+def _interactive_strategy() -> None:
+    provider = _pick_provider("Provider to set strategy for")
+    current = get_pool_strategy(provider)
+    strategies = [STRATEGY_FILL_FIRST, STRATEGY_ROUND_ROBIN, STRATEGY_LEAST_USED, STRATEGY_RANDOM]
+
+    print(f"\nCurrent strategy for {provider}: {current}")
+    print()
+    descriptions = {
+        STRATEGY_FILL_FIRST: "Use first key until exhausted, then next",
+        STRATEGY_ROUND_ROBIN: "Cycle through keys evenly",
+        STRATEGY_LEAST_USED: "Always pick the least-used key",
+        STRATEGY_RANDOM: "Random selection",
+    }
+    for i, s in enumerate(strategies, 1):
+        marker = " ←" if s == current else ""
+        print(f"  {i}. {s:15s} — {descriptions.get(s, '')}{marker}")
+
+    try:
+        raw = input("\nStrategy [1-4]: ").strip()
+    except (EOFError, KeyboardInterrupt):
+        return
+    if not raw:
+        return
+
+    try:
+        idx = int(raw) - 1
+        strategy = strategies[idx]
+    except (ValueError, IndexError):
+        print("Invalid choice.")
+        return
+
+    from hermes_cli.config import load_config, save_config
+    cfg = load_config()
+    pool_strategies = cfg.get("credential_pool_strategies") or {}
+    if not isinstance(pool_strategies, dict):
+        pool_strategies = {}
+    pool_strategies[provider] = strategy
+    cfg["credential_pool_strategies"] = pool_strategies
+    save_config(cfg)
+    print(f"Set {provider} strategy to: {strategy}")
+
+
+def auth_command(args) -> None:
+    action = getattr(args, "auth_action", "")
+    if action == "add":
+        auth_add_command(args)
+        return
+    if action == "list":
+        auth_list_command(args)
+        return
+    if action == "remove":
+        auth_remove_command(args)
+        return
+    if action == "reset":
+        auth_reset_command(args)
+        return
+    # No subcommand — launch interactive mode
+    _interactive_auth()
--- a/hermes_cli/banner.py
+++ b/hermes_cli/banner.py
@ -432,10 +432,11 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
    try:
        behind = get_update_result(timeout=0.5)
        if behind and behind > 0:
+            from hermes_cli.config import recommended_update_command
            commits_word = "commit" if behind == 1 else "commits"
            right_lines.append(
                f"[bold yellow]⚠ {behind} {commits_word} behind[/]"
-                f"[dim yellow] — run [bold]hermes update[/bold] to update[/]"
+                f"[dim yellow] — run [bold]{recommended_update_command()}[/bold] to update[/]"
            )
    except Exception:
        pass  # Never break the banner over an update check
--- a/hermes_cli/claw.py
+++ b/hermes_cli/claw.py
@ -4,14 +4,19 @@ Usage:
    hermes claw migrate              # Interactive migration from ~/.openclaw
    hermes claw migrate --dry-run    # Preview what would be migrated
    hermes claw migrate --preset full --overwrite  # Full migration, overwrite conflicts
+    hermes claw cleanup              # Archive leftover OpenClaw directories
+    hermes claw cleanup --dry-run    # Preview what would be archived
 """

 import importlib.util
 import logging
+import shutil
 import sys
+from datetime import datetime
 from pathlib import Path

 from hermes_cli.config import get_hermes_home, get_config_path, load_config, save_config
+from hermes_constants import get_optional_skills_dir
 from hermes_cli.setup import (
    Colors,
    color,
@ -19,6 +24,7 @@ from hermes_cli.setup import (
    print_info,
    print_success,
    print_error,
+    print_warning,
    prompt_yes_no,
 )

@ -27,8 +33,7 @@ logger = logging.getLogger(__name__)
 PROJECT_ROOT = Path(__file__).parent.parent.resolve()

 _OPENCLAW_SCRIPT = (
-    PROJECT_ROOT
-    / "optional-skills"
+    get_optional_skills_dir(PROJECT_ROOT / "optional-skills")
    / "migration"
    / "openclaw-migration"
    / "scripts"
@ -45,6 +50,18 @@ _OPENCLAW_SCRIPT_INSTALLED = (
    / "openclaw_to_hermes.py"
 )

+# Known OpenClaw directory names (current + legacy)
+_OPENCLAW_DIR_NAMES = (".openclaw", ".clawdbot", ".moldbot")
+
+# State files commonly found in OpenClaw workspace directories that cause
+# confusion after migration (the agent discovers them and writes to them)
+_WORKSPACE_STATE_GLOBS = (
+    "*/todo.json",
+    "*/sessions/*",
+    "*/memory/*.json",
+    "*/logs/*",
+)
+

 def _find_migration_script() -> Path | None:
    """Find the openclaw_to_hermes.py script in known locations."""
@ -71,19 +88,88 @@ def _load_migration_module(script_path: Path):
    return mod


+def _find_openclaw_dirs() -> list[Path]:
+    """Find all OpenClaw directories on disk."""
+    found = []
+    for name in _OPENCLAW_DIR_NAMES:
+        candidate = Path.home() / name
+        if candidate.is_dir():
+            found.append(candidate)
+    return found
+
+
+def _scan_workspace_state(source_dir: Path) -> list[tuple[Path, str]]:
+    """Scan an OpenClaw directory for workspace state files that cause confusion.
+
+    Returns a list of (path, description) tuples.
+    """
+    findings: list[tuple[Path, str]] = []
+
+    # Direct state files in the root
+    for name in ("todo.json", "sessions", "logs"):
+        candidate = source_dir / name
+        if candidate.exists():
+            kind = "directory" if candidate.is_dir() else "file"
+            findings.append((candidate, f"Root {kind}: {name}"))
+
+    # State files inside workspace directories
+    for child in sorted(source_dir.iterdir()):
+        if not child.is_dir() or child.name.startswith("."):
+            continue
+        # Check for workspace-like subdirectories
+        for state_name in ("todo.json", "sessions", "logs", "memory"):
+            state_path = child / state_name
+            if state_path.exists():
+                kind = "directory" if state_path.is_dir() else "file"
+                rel = state_path.relative_to(source_dir)
+                findings.append((state_path, f"Workspace {kind}: {rel}"))
+
+    return findings
+
+
+def _archive_directory(source_dir: Path, dry_run: bool = False) -> Path:
+    """Rename an OpenClaw directory to .pre-migration.
+
+    Returns the archive path.
+    """
+    timestamp = datetime.now().strftime("%Y%m%d")
+    archive_name = f"{source_dir.name}.pre-migration"
+    archive_path = source_dir.parent / archive_name
+
+    # If archive already exists, add timestamp
+    if archive_path.exists():
+        archive_name = f"{source_dir.name}.pre-migration-{timestamp}"
+        archive_path = source_dir.parent / archive_name
+
+    # If still exists (multiple runs same day), add counter
+    counter = 2
+    while archive_path.exists():
+        archive_name = f"{source_dir.name}.pre-migration-{timestamp}-{counter}"
+        archive_path = source_dir.parent / archive_name
+        counter += 1
+
+    if not dry_run:
+        source_dir.rename(archive_path)
+
+    return archive_path
+
+
 def claw_command(args):
    """Route hermes claw subcommands."""
    action = getattr(args, "claw_action", None)

    if action == "migrate":
        _cmd_migrate(args)
+    elif action in ("cleanup", "clean"):
+        _cmd_cleanup(args)
    else:
-        print("Usage: hermes claw migrate [options]")
+        print("Usage: hermes claw <command> [options]")
        print()
        print("Commands:")
        print("  migrate          Migrate settings from OpenClaw to Hermes")
+        print("  cleanup          Archive leftover OpenClaw directories after migration")
        print()
-        print("Run 'hermes claw migrate --help' for migration options.")
+        print("Run 'hermes claw <command> --help' for options.")


 def _cmd_migrate(args):
@ -210,6 +296,168 @@ def _cmd_migrate(args):
    # Print results
    _print_migration_report(report, dry_run)

+    # After successful non-dry-run migration, offer to archive the source directory
+    if not dry_run and report.get("summary", {}).get("migrated", 0) > 0:
+        _offer_source_archival(source_dir, getattr(args, "yes", False))
+
+
+def _offer_source_archival(source_dir: Path, auto_yes: bool = False):
+    """After migration, offer to rename the source directory to prevent state fragmentation.
+
+    OpenClaw workspace directories contain state files (todo.json, sessions, etc.)
+    that the agent may discover and write to, causing confusion. Renaming the
+    directory prevents this.
+    """
+    if not source_dir.is_dir():
+        return
+
+    # Scan for state files that could cause problems
+    state_files = _scan_workspace_state(source_dir)
+
+    print()
+    print_header("Post-Migration Cleanup")
+    print_info("The OpenClaw directory still exists and contains workspace state files")
+    print_info("that can confuse the agent (todo lists, sessions, logs).")
+    if state_files:
+        print()
+        print(color("  Found state files:", Colors.YELLOW))
+        # Show up to 10 most relevant findings
+        for path, desc in state_files[:10]:
+            print(f"      {desc}")
+        if len(state_files) > 10:
+            print(f"      ... and {len(state_files) - 10} more")
+    print()
+    print_info(f"Recommend: rename {source_dir.name}/ to {source_dir.name}.pre-migration/")
+    print_info("This prevents the agent from discovering old workspace directories.")
+    print_info("You can always rename it back if needed.")
+    print()
+
+    if auto_yes or prompt_yes_no(f"Archive {source_dir} now?", default=True):
+        try:
+            archive_path = _archive_directory(source_dir)
+            print_success(f"Archived: {source_dir} → {archive_path}")
+            print_info("The original directory has been renamed, not deleted.")
+            print_info(f"To undo: mv {archive_path} {source_dir}")
+        except OSError as e:
+            print_error(f"Could not archive: {e}")
+            print_info(f"You can do it manually: mv {source_dir} {source_dir}.pre-migration")
+    else:
+        print_info("Skipped. You can archive later with: hermes claw cleanup")
+
+
+def _cmd_cleanup(args):
+    """Archive leftover OpenClaw directories after migration.
+
+    Scans for OpenClaw directories that still exist after migration and offers
+    to rename them to .pre-migration to prevent state fragmentation.
+    """
+    dry_run = getattr(args, "dry_run", False)
+    auto_yes = getattr(args, "yes", False)
+    explicit_source = getattr(args, "source", None)
+
+    print()
+    print(
+        color(
+            "┌─────────────────────────────────────────────────────────┐",
+            Colors.MAGENTA,
+        )
+    )
+    print(
+        color(
+            "│          ⚕ Hermes — OpenClaw Cleanup                   │",
+            Colors.MAGENTA,
+        )
+    )
+    print(
+        color(
+            "└─────────────────────────────────────────────────────────┘",
+            Colors.MAGENTA,
+        )
+    )
+
+    # Find OpenClaw directories
+    if explicit_source:
+        dirs_to_check = [Path(explicit_source)]
+    else:
+        dirs_to_check = _find_openclaw_dirs()
+
+    if not dirs_to_check:
+        print()
+        print_success("No OpenClaw directories found. Nothing to clean up.")
+        return
+
+    total_archived = 0
+
+    for source_dir in dirs_to_check:
+        print()
+        print_header(f"Found: {source_dir}")
+
+        # Scan for state files
+        state_files = _scan_workspace_state(source_dir)
+
+        # Show directory stats
+        try:
+            workspace_dirs = [
+                d for d in source_dir.iterdir()
+                if d.is_dir() and not d.name.startswith(".")
+                and any((d / name).exists() for name in ("todo.json", "SOUL.md", "MEMORY.md", "USER.md"))
+            ]
+        except OSError:
+            workspace_dirs = []
+
+        if workspace_dirs:
+            print_info(f"Workspace directories: {len(workspace_dirs)}")
+            for ws in workspace_dirs[:5]:
+                items = []
+                if (ws / "todo.json").exists():
+                    items.append("todo.json")
+                if (ws / "sessions").is_dir():
+                    items.append("sessions/")
+                if (ws / "SOUL.md").exists():
+                    items.append("SOUL.md")
+                if (ws / "MEMORY.md").exists():
+                    items.append("MEMORY.md")
+                detail = ", ".join(items) if items else "empty"
+                print(f"      {ws.name}/  ({detail})")
+            if len(workspace_dirs) > 5:
+                print(f"      ... and {len(workspace_dirs) - 5} more")
+
+        if state_files:
+            print()
+            print(color(f"  {len(state_files)} state file(s) that could cause confusion:", Colors.YELLOW))
+            for path, desc in state_files[:8]:
+                print(f"      {desc}")
+            if len(state_files) > 8:
+                print(f"      ... and {len(state_files) - 8} more")
+
+        print()
+
+        if dry_run:
+            archive_path = _archive_directory(source_dir, dry_run=True)
+            print_info(f"Would archive: {source_dir} → {archive_path}")
+        else:
+            if auto_yes or prompt_yes_no(f"Archive {source_dir}?", default=True):
+                try:
+                    archive_path = _archive_directory(source_dir)
+                    print_success(f"Archived: {source_dir} → {archive_path}")
+                    total_archived += 1
+                except OSError as e:
+                    print_error(f"Could not archive: {e}")
+                    print_info(f"Try manually: mv {source_dir} {source_dir}.pre-migration")
+            else:
+                print_info("Skipped.")
+
+    # Summary
+    print()
+    if dry_run:
+        print_info(f"Dry run complete. {len(dirs_to_check)} directory(ies) would be archived.")
+        print_info("Run without --dry-run to archive them.")
+    elif total_archived:
+        print_success(f"Cleaned up {total_archived} OpenClaw directory(ies).")
+        print_info("Directories were renamed, not deleted. You can undo by renaming them back.")
+    else:
+        print_info("No directories were archived.")
+

 def _print_migration_report(report: dict, dry_run: bool):
    """Print a formatted migration report."""
--- a/hermes_cli/colors.py
+++ b/hermes_cli/colors.py
@ -1,8 +1,24 @@
 """Shared ANSI color utilities for Hermes CLI modules."""

+import os
 import sys


+def should_use_color() -> bool:
+    """Return True when colored output is appropriate.
+
+    Respects the NO_COLOR environment variable (https://no-color.org/)
+    and TERM=dumb, in addition to the existing TTY check.
+    """
+    if os.environ.get("NO_COLOR") is not None:
+        return False
+    if os.environ.get("TERM") == "dumb":
+        return False
+    if not sys.stdout.isatty():
+        return False
+    return True
+
+
 class Colors:
    RESET = "\033[0m"
    BOLD = "\033[1m"
@ -16,7 +32,7 @@ class Colors:


 def color(text: str, *codes) -> str:
-    """Apply color codes to text (only when output is a TTY)."""
-    if not sys.stdout.isatty():
+    """Apply color codes to text (only when color output is appropriate)."""
+    if not should_use_color():
        return text
    return "".join(codes) + text + Colors.RESET
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@ -67,6 +67,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
               gateway_only=True),
    CommandDef("background", "Run a prompt in the background", "Session",
               aliases=("bg",), args_hint="<prompt>"),
+    CommandDef("btw", "Ephemeral side question using session context (no tools, not persisted)", "Session",
+               args_hint="<question>"),
    CommandDef("queue", "Queue a prompt for the next turn (doesn't interrupt)", "Session",
               aliases=("q",), args_hint="<prompt>"),
    CommandDef("status", "Show session info", "Session",
@ -366,6 +368,42 @@ def telegram_bot_commands() -> list[tuple[str, str]]:
    return result


+_TG_NAME_LIMIT = 32
+
+
+def _clamp_telegram_names(
+    entries: list[tuple[str, str]],
+    reserved: set[str],
+) -> list[tuple[str, str]]:
+    """Enforce Telegram's 32-char command name limit with collision avoidance.
+
+    Names exceeding 32 chars are truncated.  If truncation creates a duplicate
+    (against *reserved* names or earlier entries in the same batch), the name is
+    shortened to 31 chars and a digit ``0``-``9`` is appended to differentiate.
+    If all 10 digit slots are taken the entry is silently dropped.
+    """
+    used: set[str] = set(reserved)
+    result: list[tuple[str, str]] = []
+    for name, desc in entries:
+        if len(name) > _TG_NAME_LIMIT:
+            candidate = name[:_TG_NAME_LIMIT]
+            if candidate in used:
+                prefix = name[:_TG_NAME_LIMIT - 1]
+                for digit in range(10):
+                    candidate = f"{prefix}{digit}"
+                    if candidate not in used:
+                        break
+                else:
+                    # All 10 digit slots exhausted — skip entry
+                    continue
+            name = candidate
+        if name in used:
+            continue
+        used.add(name)
+        result.append((name, desc))
+    return result
+
+
 def telegram_menu_commands(max_commands: int = 100) -> tuple[list[tuple[str, str]], int]:
    """Return Telegram menu commands capped to the Bot API limit.

@ -381,9 +419,13 @@ def telegram_menu_commands(max_commands: int = 100) -> tuple[list[tuple[str, str
        (menu_commands, hidden_count) where hidden_count is the number of
        skill commands omitted due to the cap.
    """
-    all_commands = list(telegram_bot_commands())
+    core_commands = list(telegram_bot_commands())
+    # Reserve core names so plugin/skill truncation can't collide with them
+    reserved_names = {n for n, _ in core_commands}
+    all_commands = list(core_commands)

    # Plugin slash commands get priority over skills
+    plugin_entries: list[tuple[str, str]] = []
    try:
        from hermes_cli.plugins import get_plugin_manager
        pm = get_plugin_manager()
@ -393,10 +435,15 @@ def telegram_menu_commands(max_commands: int = 100) -> tuple[list[tuple[str, str
            desc = "Plugin command"
            if len(desc) > 40:
                desc = desc[:37] + "..."
-            all_commands.append((tg_name, desc))
+            plugin_entries.append((tg_name, desc))
    except Exception:
        pass

+    # Clamp plugin names to 32 chars with collision avoidance
+    plugin_entries = _clamp_telegram_names(plugin_entries, reserved_names)
+    reserved_names.update(n for n, _ in plugin_entries)
+    all_commands.extend(plugin_entries)
+
    # Remaining slots go to built-in skill commands (not hub-installed).
    skill_entries: list[tuple[str, str]] = []
    try:
@ -422,6 +469,9 @@ def telegram_menu_commands(max_commands: int = 100) -> tuple[list[tuple[str, str
    except Exception:
        pass

+    # Clamp skill names to 32 chars with collision avoidance
+    skill_entries = _clamp_telegram_names(skill_entries, reserved_names)
+
    # Skills fill remaining slots — they're the only tier that gets trimmed
    remaining_slots = max(0, max_commands - len(all_commands))
    hidden_count = max(0, len(skill_entries) - remaining_slots)
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@ -53,26 +53,86 @@ from hermes_cli.default_soul import DEFAULT_SOUL_MD
 # Managed mode (NixOS declarative config)
 # =============================================================================

+_MANAGED_TRUE_VALUES = ("true", "1", "yes")
+_MANAGED_SYSTEM_NAMES = {
+    "brew": "Homebrew",
+    "homebrew": "Homebrew",
+    "nix": "NixOS",
+    "nixos": "NixOS",
+}
+
+
+def get_managed_system() -> Optional[str]:
+    """Return the package manager owning this install, if any."""
+    raw = os.getenv("HERMES_MANAGED", "").strip()
+    if raw:
+        normalized = raw.lower()
+        if normalized in _MANAGED_TRUE_VALUES:
+            return "NixOS"
+        return _MANAGED_SYSTEM_NAMES.get(normalized, raw)
+
+    managed_marker = get_hermes_home() / ".managed"
+    if managed_marker.exists():
+        return "NixOS"
+    return None
+
+
 def is_managed() -> bool:
-    """Check if hermes is running in Nix-managed mode.
+    """Check if Hermes is running in package-manager-managed mode.

    Two signals: the HERMES_MANAGED env var (set by the systemd service),
    or a .managed marker file in HERMES_HOME (set by the NixOS activation
    script, so interactive shells also see it).
    """
-    if os.getenv("HERMES_MANAGED", "").lower() in ("true", "1", "yes"):
-        return True
-    managed_marker = get_hermes_home() / ".managed"
-    return managed_marker.exists()
+    return get_managed_system() is not None
+
+
+def get_managed_update_command() -> Optional[str]:
+    """Return the preferred upgrade command for a managed install."""
+    managed_system = get_managed_system()
+    if managed_system == "Homebrew":
+        return "brew upgrade hermes-agent"
+    if managed_system == "NixOS":
+        return "sudo nixos-rebuild switch"
+    return None
+
+
+def recommended_update_command() -> str:
+    """Return the best update command for the current installation."""
+    return get_managed_update_command() or "hermes update"
+
+
+def format_managed_message(action: str = "modify this Hermes installation") -> str:
+    """Build a user-facing error for managed installs."""
+    managed_system = get_managed_system() or "a package manager"
+    raw = os.getenv("HERMES_MANAGED", "").strip().lower()
+
+    if managed_system == "NixOS":
+        env_hint = "true" if raw in _MANAGED_TRUE_VALUES else raw or "true"
+        return (
+            f"Cannot {action}: this Hermes installation is managed by NixOS "
+            f"(HERMES_MANAGED={env_hint}).\n"
+            "Edit services.hermes-agent.settings in your configuration.nix and run:\n"
+            "  sudo nixos-rebuild switch"
+        )
+
+    if managed_system == "Homebrew":
+        env_hint = raw or "homebrew"
+        return (
+            f"Cannot {action}: this Hermes installation is managed by Homebrew "
+            f"(HERMES_MANAGED={env_hint}).\n"
+            "Use:\n"
+            "  brew upgrade hermes-agent"
+        )
+
+    return (
+        f"Cannot {action}: this Hermes installation is managed by {managed_system}.\n"
+        "Use your package manager to upgrade or reinstall Hermes."
+    )

 def managed_error(action: str = "modify configuration"):
    """Print user-friendly error for managed mode."""
-    print(
-        f"Cannot {action}: configuration is managed by NixOS (HERMES_MANAGED=true).\n"
-        "Edit services.hermes-agent.settings in your configuration.nix and run:\n"
-        "  sudo nixos-rebuild switch",
-        file=sys.stderr,
-    )
+    print(format_managed_message(action), file=sys.stderr)


 # =============================================================================
@ -137,8 +197,9 @@ def ensure_hermes_home():
 # =============================================================================

 DEFAULT_CONFIG = {
-    "model": "anthropic/claude-opus-4.6",
+    "model": "",
    "fallback_providers": [],
+    "credential_pool_strategies": {},
    "toolsets": ["hermes-cli"],
    "agent": {
        "max_turns": 90,
@ -187,6 +248,14 @@ DEFAULT_CONFIG = {
        "inactivity_timeout": 120,
        "command_timeout": 30,  # Timeout for browser commands in seconds (screenshot, navigate, etc.)
        "record_sessions": False,  # Auto-record browser sessions as WebM videos
+        "allow_private_urls": False,  # Allow navigating to private/internal IPs (localhost, 192.168.x.x, etc.)
+        "camofox": {
+            # When true, Hermes sends a stable profile-scoped userId to Camofox
+            # so the server can map it to a persistent browser profile directory.
+            # Requires Camofox server to be configured with CAMOFOX_PROFILE_DIR.
+            # When false (default), each session gets a random userId (ephemeral).
+            "managed_persistence": False,
+        },
    },

    # Filesystem checkpoints — automatic snapshots before destructive file ops.
@ -197,6 +266,11 @@ DEFAULT_CONFIG = {
        "max_snapshots": 50,  # Max checkpoints to keep per directory
    },

+    # Maximum characters returned by a single read_file call.  Reads that
+    # exceed this are rejected with guidance to use offset+limit.
+    # 100K chars ≈ 25–35K tokens across typical tokenisers.
+    "file_read_max_chars": 100_000,
+    
    "compression": {
        "enabled": True,
        "threshold": 0.50,            # compress when context usage exceeds this ratio
@ -287,6 +361,7 @@ DEFAULT_CONFIG = {
        "bell_on_complete": False,
        "show_reasoning": False,
        "streaming": False,
+        "inline_diffs": True,     # Show inline diff previews for write actions (write_file, patch, skill_manage)
        "show_cost": False,       # Show $ cost in the status bar (off by default)
        "skin": "default",
        "tool_progress_command": False,  # Enable /verbose command in messaging gateway
@ -394,6 +469,7 @@ DEFAULT_CONFIG = {
        "require_mention": True,       # Require @mention to respond in server channels
        "free_response_channels": "",  # Comma-separated channel IDs where bot responds without mention
        "auto_thread": True,           # Auto-create threads on @mention in channels (like Slack)
+        "reactions": True,             # Add 👀/✅/❌ reactions to messages during processing
    },

    # WhatsApp platform settings (gateway mode)
@ -1349,6 +1425,36 @@ def _expand_env_vars(obj):
    return obj


+def _normalize_root_model_keys(config: Dict[str, Any]) -> Dict[str, Any]:
+    """Move stale root-level provider/base_url into model section.
+
+    Some users (or older code) placed ``provider:`` and ``base_url:`` at the
+    config root instead of inside ``model:``.  These root-level keys are only
+    used as a fallback when the corresponding ``model.*`` key is empty — they
+    never override an existing ``model.provider`` or ``model.base_url``.
+    After migration the root-level keys are removed so they can't cause
+    confusion on subsequent loads.
+    """
+    # Only act if there are root-level keys to migrate
+    has_root = any(config.get(k) for k in ("provider", "base_url"))
+    if not has_root:
+        return config
+
+    config = dict(config)
+    model = config.get("model")
+    if not isinstance(model, dict):
+        model = {"default": model} if model else {}
+        config["model"] = model
+
+    for key in ("provider", "base_url"):
+        root_val = config.get(key)
+        if root_val and not model.get(key):
+            model[key] = root_val
+        config.pop(key, None)
+
+    return config
+
+
 def _normalize_max_turns_config(config: Dict[str, Any]) -> Dict[str, Any]:
    """Normalize legacy root-level max_turns into agent.max_turns."""
    config = dict(config)
@ -1390,7 +1496,7 @@ def load_config() -> Dict[str, Any]:
        except Exception as e:
            print(f"Warning: Failed to load config: {e}")
    
-    return _expand_env_vars(_normalize_max_turns_config(config))
+    return _expand_env_vars(_normalize_root_model_keys(_normalize_max_turns_config(config)))


 _SECURITY_COMMENT = """
@ -1497,7 +1603,7 @@ def save_config(config: Dict[str, Any]):

    ensure_hermes_home()
    config_path = get_config_path()
-    normalized = _normalize_max_turns_config(config)
+    normalized = _normalize_root_model_keys(_normalize_max_turns_config(config))

    # Build optional commented-out sections for features that are off by
    # default or only relevant when explicitly configured.
@ -2024,7 +2130,7 @@ def config_command(args):
    elif subcmd == "set":
        key = getattr(args, 'key', None)
        value = getattr(args, 'value', None)
-        if not key or not value:
+        if not key or value is None:
            print("Usage: hermes config set <key> <value>")
            print()
            print("Examples:")
--- a/hermes_cli/cron.py
+++ b/hermes_cli/cron.py
@ -56,7 +56,7 @@ def cron_list(show_all: bool = False):
    print()

    for job in jobs:
-        job_id = job.get("id", "?")[:8]
+        job_id = job.get("id", "?")
        name = job.get("name", "(unnamed)")
        schedule = job.get("schedule_display", job.get("schedule", {}).get("value", "?"))
        state = job.get("state", "scheduled" if job.get("enabled", True) else "paused")
--- a/hermes_cli/gateway.py
+++ b/hermes_cli/gateway.py
@ -463,6 +463,32 @@ def _build_user_local_paths(home: Path, path_entries: list[str]) -> list[str]:
    return [p for p in candidates if p not in path_entries and Path(p).exists()]


+def _hermes_home_for_target_user(target_home_dir: str) -> str:
+    """Remap the current HERMES_HOME to the equivalent under a target user's home.
+
+    When installing a system service via sudo, get_hermes_home() resolves to
+    root's home.  This translates it to the target user's equivalent path:
+      /root/.hermes                    → /home/alice/.hermes
+      /root/.hermes/profiles/coder     → /home/alice/.hermes/profiles/coder
+      /opt/custom-hermes               → /opt/custom-hermes  (kept as-is)
+    """
+    current_hermes = get_hermes_home().resolve()
+    current_default = (Path.home() / ".hermes").resolve()
+    target_default = Path(target_home_dir) / ".hermes"
+
+    # Default ~/.hermes → remap to target user's default
+    if current_hermes == current_default:
+        return str(target_default)
+
+    # Profile or subdir of ~/.hermes → preserve the relative structure
+    try:
+        relative = current_hermes.relative_to(current_default)
+        return str(target_default / relative)
+    except ValueError:
+        # Completely custom path (not under ~/.hermes) — keep as-is
+        return str(current_hermes)
+
+
 def generate_systemd_unit(system: bool = False, run_as_user: str | None = None) -> str:
    python_path = get_python_path()
    working_dir = str(PROJECT_ROOT)
@ -478,12 +504,11 @@ def generate_systemd_unit(system: bool = False, run_as_user: str | None = None)
        if resolved_node_dir not in path_entries:
            path_entries.append(resolved_node_dir)

-    hermes_home = str(get_hermes_home().resolve())
-
    common_bin_paths = ["/usr/local/sbin", "/usr/local/bin", "/usr/sbin", "/usr/bin", "/sbin", "/bin"]

    if system:
        username, group_name, home_dir = _system_service_identity(run_as_user)
+        hermes_home = _hermes_home_for_target_user(home_dir)
        path_entries.extend(_build_user_local_paths(Path(home_dir), path_entries))
        path_entries.extend(common_bin_paths)
        sane_path = ":".join(path_entries)
@ -518,6 +543,7 @@ StandardError=journal
 WantedBy=multi-user.target
 """

+    hermes_home = str(get_hermes_home().resolve())
    path_entries.extend(_build_user_local_paths(Path.home(), path_entries))
    path_entries.extend(common_bin_paths)
    sane_path = ":".join(path_entries)
@ -1066,11 +1092,12 @@ def launchd_status(deep: bool = False):
 # Gateway Runner
 # =============================================================================

-def run_gateway(verbose: bool = False, replace: bool = False):
+def run_gateway(verbose: int = 0, quiet: bool = False, replace: bool = False):
    """Run the gateway in foreground.
    
    Args:
-        verbose: Enable verbose logging output.
+        verbose: Stderr log verbosity count added on top of default WARNING (0=WARNING, 1=INFO, 2+=DEBUG).
+        quiet: Suppress all stderr log output.
        replace: If True, kill any existing gateway instance before starting.
                 This prevents systemd restart loops when the old process
                 hasn't fully exited yet.
@ -1089,7 +1116,8 @@ def run_gateway(verbose: bool = False, replace: bool = False):
    
    # Exit with code 1 if gateway fails to connect any platform,
    # so systemd Restart=on-failure will retry on transient errors
-    success = asyncio.run(start_gateway(replace=replace))
+    verbosity = None if quiet else verbose
+    success = asyncio.run(start_gateway(replace=replace, verbosity=verbosity))
    if not success:
        sys.exit(1)

@ -1863,9 +1891,10 @@ def gateway_command(args):
    
    # Default to run if no subcommand
    if subcmd is None or subcmd == "run":
-        verbose = getattr(args, 'verbose', False)
+        verbose = getattr(args, 'verbose', 0)
+        quiet = getattr(args, 'quiet', False)
        replace = getattr(args, 'replace', False)
-        run_gateway(verbose, replace=replace)
+        run_gateway(verbose, quiet=quiet, replace=replace)
        return

    if subcmd == "setup":
@ -1993,7 +2022,7 @@ def gateway_command(args):

            # Start fresh
            print("Starting gateway...")
-            run_gateway(verbose=False)
+            run_gateway(verbose=0)
    
    elif subcmd == "status":
        deep = getattr(args, 'deep', False)
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@ -173,9 +173,25 @@ def _relative_time(ts) -> str:

 def _has_any_provider_configured() -> bool:
    """Check if at least one inference provider is usable."""
-    from hermes_cli.config import get_env_path, get_hermes_home
+    from hermes_cli.config import get_env_path, get_hermes_home, load_config
    from hermes_cli.auth import get_auth_status

+    # Determine whether Hermes itself has been explicitly configured (model
+    # in config that isn't the hardcoded default). Used below to gate external
+    # tool credentials (Claude Code, Codex CLI) that shouldn't silently skip
+    # the setup wizard on a fresh install.
+    from hermes_cli.config import DEFAULT_CONFIG
+    _DEFAULT_MODEL = DEFAULT_CONFIG.get("model", "")
+    cfg = load_config()
+    model_cfg = cfg.get("model")
+    if isinstance(model_cfg, dict):
+        _model_name = (model_cfg.get("default") or "").strip()
+    elif isinstance(model_cfg, str):
+        _model_name = model_cfg.strip()
+    else:
+        _model_name = ""
+    _has_hermes_config = _model_name and _model_name != _DEFAULT_MODEL
+
    # Check env vars (may be set by .env or shell).
    # OPENAI_BASE_URL alone counts — local models (vLLM, llama.cpp, etc.)
    # often don't require an API key.
@ -230,16 +246,28 @@ def _has_any_provider_configured() -> bool:
            pass


-    # Check for Claude Code OAuth credentials (~/.claude/.credentials.json)
-    # These are used by resolve_anthropic_token() at runtime but were missing
-    # from this startup gate check.
-    try:
-        from agent.anthropic_adapter import read_claude_code_credentials, is_claude_code_token_valid
-        creds = read_claude_code_credentials()
-        if creds and (is_claude_code_token_valid(creds) or creds.get("refreshToken")):
+    # Check config.yaml — if model is a dict with an explicit provider set,
+    # the user has gone through setup (fresh installs have model as a plain
+    # string).  Also covers custom endpoints that store api_key/base_url in
+    # config rather than .env.
+    if isinstance(model_cfg, dict):
+        cfg_provider = (model_cfg.get("provider") or "").strip()
+        cfg_base_url = (model_cfg.get("base_url") or "").strip()
+        cfg_api_key = (model_cfg.get("api_key") or "").strip()
+        if cfg_provider or cfg_base_url or cfg_api_key:
            return True
-    except Exception:
-        pass
+
+    # Check for Claude Code OAuth credentials (~/.claude/.credentials.json)
+    # Only count these if Hermes has been explicitly configured — Claude Code
+    # being installed doesn't mean the user wants Hermes to use their tokens.
+    if _has_hermes_config:
+        try:
+            from agent.anthropic_adapter import read_claude_code_credentials, is_claude_code_token_valid
+            creds = read_claude_code_credentials()
+            if creds and (is_claude_code_token_valid(creds) or creds.get("refreshToken")):
+                return True
+        except Exception:
+            pass

    return False

@ -615,6 +643,7 @@ def cmd_chat(args):
        "worktree": getattr(args, "worktree", False),
        "checkpoints": getattr(args, "checkpoints", False),
        "pass_session_id": getattr(args, "pass_session_id", False),
+        "max_turns": getattr(args, "max_turns", None),
    }
    # Filter out None values
    kwargs = {k: v for k, v in kwargs.items() if v is not None}
@ -829,6 +858,17 @@ def cmd_setup(args):
 def cmd_model(args):
    """Select default model — starts with provider selection, then model picker."""
    _require_tty("model")
+    select_provider_and_model()
+
+
+def select_provider_and_model():
+    """Core provider selection + model picking logic.
+
+    Shared by ``cmd_model`` (``hermes model``) and the setup wizard
+    (``setup_model_provider`` in setup.py).  Handles the full flow:
+    provider picker, credential prompting, model selection, and config
+    persistence.
+    """
    from hermes_cli.auth import (
        resolve_provider, AuthError, format_auth_error,
    )
@ -858,7 +898,10 @@ def cmd_model(args):
    except AuthError as exc:
        warning = format_auth_error(exc)
        print(f"Warning: {warning} Falling back to auto provider detection.")
-        active = resolve_provider("auto")
+        try:
+            active = resolve_provider("auto")
+        except AuthError:
+            active = "openrouter"  # no provider yet; show full picker

    # Detect custom endpoint
    if active == "openrouter" and get_env_value("OPENAI_BASE_URL"):
@ -1050,10 +1093,6 @@ def _model_flow_openrouter(config, current_model=""):

    selected = _prompt_model_selection(openrouter_models, current_model=current_model)
    if selected:
-        # Clear any custom endpoint and set provider to openrouter
-        if get_env_value("OPENAI_BASE_URL"):
-            save_env_value("OPENAI_BASE_URL", "")
-            save_env_value("OPENAI_API_KEY", "")
        _save_model_choice(selected)

        # Update config provider and deactivate any OAuth provider
@ -1228,10 +1267,6 @@ def _model_flow_openai_codex(config, current_model=""):
    if selected:
        _save_model_choice(selected)
        _update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL)
-        # Clear custom endpoint env vars that would otherwise override Codex.
-        if get_env_value("OPENAI_BASE_URL"):
-            save_env_value("OPENAI_BASE_URL", "")
-            save_env_value("OPENAI_API_KEY", "")
        print(f"Default model set to: {selected} (via OpenAI Codex)")
    else:
        print("No change.")
@ -1260,22 +1295,10 @@ def _model_flow_custom(config):
    try:
        base_url = input(f"API base URL [{current_url or 'e.g. https://api.example.com/v1'}]: ").strip()
        api_key = input(f"API key [{current_key[:8] + '...' if current_key else 'optional'}]: ").strip()
-        model_name = input("Model name (e.g. gpt-4, llama-3-70b): ").strip()
-        context_length_str = input("Context length in tokens [leave blank for auto-detect]: ").strip()
    except (KeyboardInterrupt, EOFError):
        print("\nCancelled.")
        return

-    context_length = None
-    if context_length_str:
-        try:
-            context_length = int(context_length_str.replace(",", "").replace("k", "000").replace("K", "000"))
-            if context_length <= 0:
-                context_length = None
-        except ValueError:
-            print(f"Invalid context length: {context_length_str} — will auto-detect.")
-            context_length = None
-
    if not base_url and not current_url:
        print("No URL provided. Cancelled.")
        return
@ -1312,10 +1335,43 @@ def _model_flow_custom(config):
        if probe.get("suggested_base_url"):
            print(f"  If this server expects /v1, try base URL: {probe['suggested_base_url']}")

-    if base_url:
-        save_env_value("OPENAI_BASE_URL", effective_url)
-    if api_key:
-        save_env_value("OPENAI_API_KEY", api_key)
+    # Select model — use probe results when available, fall back to manual input
+    model_name = ""
+    detected_models = probe.get("models") or []
+    try:
+        if len(detected_models) == 1:
+            print(f"  Detected model: {detected_models[0]}")
+            confirm = input("  Use this model? [Y/n]: ").strip().lower()
+            if confirm in ("", "y", "yes"):
+                model_name = detected_models[0]
+            else:
+                model_name = input("Model name (e.g. gpt-4, llama-3-70b): ").strip()
+        elif len(detected_models) > 1:
+            print("  Available models:")
+            for i, m in enumerate(detected_models, 1):
+                print(f"    {i}. {m}")
+            pick = input(f"  Select model [1-{len(detected_models)}] or type name: ").strip()
+            if pick.isdigit() and 1 <= int(pick) <= len(detected_models):
+                model_name = detected_models[int(pick) - 1]
+            elif pick:
+                model_name = pick
+        else:
+            model_name = input("Model name (e.g. gpt-4, llama-3-70b): ").strip()
+
+        context_length_str = input("Context length in tokens [leave blank for auto-detect]: ").strip()
+    except (KeyboardInterrupt, EOFError):
+        print("\nCancelled.")
+        return
+
+    context_length = None
+    if context_length_str:
+        try:
+            context_length = int(context_length_str.replace(",", "").replace("k", "000").replace("K", "000"))
+            if context_length <= 0:
+                context_length = None
+        except ValueError:
+            print(f"Invalid context length: {context_length_str} — will auto-detect.")
+            context_length = None

    if model_name:
        _save_model_choice(model_name)
@ -1328,14 +1384,33 @@ def _model_flow_custom(config):
            cfg["model"] = model
        model["provider"] = "custom"
        model["base_url"] = effective_url
+        if effective_key:
+            model["api_key"] = effective_key
        model.pop("api_mode", None)  # let runtime auto-detect from URL
        save_config(cfg)
        deactivate_provider()

+        # Sync the caller's config dict so the setup wizard's final
+        # save_config(config) preserves our model settings.  Without
+        # this, the wizard overwrites model.provider/base_url with
+        # the stale values from its own config dict (#4172).
+        config["model"] = dict(model)
+
        print(f"Default model set to: {model_name} (via {effective_url})")
    else:
        if base_url or api_key:
            deactivate_provider()
+        # Even without a model name, persist the custom endpoint on the
+        # caller's config dict so the setup wizard doesn't lose it.
+        _caller_model = config.get("model")
+        if not isinstance(_caller_model, dict):
+            _caller_model = {"default": _caller_model} if _caller_model else {}
+        _caller_model["provider"] = "custom"
+        _caller_model["base_url"] = effective_url
+        if effective_key:
+            _caller_model["api_key"] = effective_key
+        _caller_model.pop("api_mode", None)
+        config["model"] = _caller_model
        print("Endpoint saved. Use `/model` in chat or `hermes model` to set a model.")

    # Auto-save to custom_providers so it appears in the menu next time
@ -1476,9 +1551,6 @@ def _model_flow_named_custom(config, provider_info):

    # If a model is saved, just activate immediately — no probing needed
    if saved_model:
-        save_env_value("OPENAI_BASE_URL", base_url)
-        if api_key:
-            save_env_value("OPENAI_API_KEY", api_key)
        _save_model_choice(saved_model)

        cfg = load_config()
@ -1488,6 +1560,8 @@ def _model_flow_named_custom(config, provider_info):
            cfg["model"] = model
        model["provider"] = "custom"
        model["base_url"] = base_url
+        if api_key:
+            model["api_key"] = api_key
        save_config(cfg)
        deactivate_provider()

@ -1550,9 +1624,6 @@ def _model_flow_named_custom(config, provider_info):
            return

    # Activate and save the model to the custom_providers entry
-    save_env_value("OPENAI_BASE_URL", base_url)
-    if api_key:
-        save_env_value("OPENAI_API_KEY", api_key)
    _save_model_choice(model_name)

    cfg = load_config()
@ -1562,6 +1633,8 @@ def _model_flow_named_custom(config, provider_info):
        cfg["model"] = model
    model["provider"] = "custom"
    model["base_url"] = base_url
+    if api_key:
+        model["api_key"] = api_key
    save_config(cfg)
    deactivate_provider()

@ -1614,11 +1687,15 @@ _PROVIDER_MODELS = {
        "kimi-k2-0905-preview",
    ],
    "minimax": [
+        "MiniMax-M2.7",
+        "MiniMax-M2.7-highspeed",
        "MiniMax-M2.5",
        "MiniMax-M2.5-highspeed",
        "MiniMax-M2.1",
    ],
    "minimax-cn": [
+        "MiniMax-M2.7",
+        "MiniMax-M2.7-highspeed",
        "MiniMax-M2.5",
        "MiniMax-M2.5-highspeed",
        "MiniMax-M2.1",
@ -1866,11 +1943,6 @@ def _model_flow_copilot(config, current_model=""):
            catalog=catalog,
            api_key=api_key,
        ) or selected
-        # Clear stale custom-endpoint overrides so the Copilot provider wins cleanly.
-        if get_env_value("OPENAI_BASE_URL"):
-            save_env_value("OPENAI_BASE_URL", "")
-            save_env_value("OPENAI_API_KEY", "")
-
        initial_cfg = load_config()
        current_effort = _current_reasoning_effort(initial_cfg)
        reasoning_efforts = github_model_reasoning_efforts(
@ -2095,11 +2167,6 @@ def _model_flow_kimi(config, current_model=""):
            selected = None

    if selected:
-        # Clear custom endpoint if set (avoid confusion)
-        if get_env_value("OPENAI_BASE_URL"):
-            save_env_value("OPENAI_BASE_URL", "")
-            save_env_value("OPENAI_API_KEY", "")
-
        _save_model_choice(selected)

        # Update config with provider and base URL
@ -2202,11 +2269,6 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
            selected = None

    if selected:
-        # Clear custom endpoint if set (avoid confusion)
-        if get_env_value("OPENAI_BASE_URL"):
-            save_env_value("OPENAI_BASE_URL", "")
-            save_env_value("OPENAI_API_KEY", "")
-
        _save_model_choice(selected)

        # Update config with provider and base URL
@ -2418,11 +2480,6 @@ def _model_flow_anthropic(config, current_model=""):
            selected = None

    if selected:
-        # Clear custom endpoint if set
-        if get_env_value("OPENAI_BASE_URL"):
-            save_env_value("OPENAI_BASE_URL", "")
-            save_env_value("OPENAI_API_KEY", "")
-
        _save_model_choice(selected)

        # Update config with provider — clear base_url since
@ -2456,6 +2513,12 @@ def cmd_logout(args):
    logout_command(args)


+def cmd_auth(args):
+    """Manage pooled credentials."""
+    from hermes_cli.auth_commands import auth_command
+    auth_command(args)
+
+
 def cmd_status(args):
    """Show status of all components."""
    from hermes_cli.status import show_status
@ -2504,10 +2567,14 @@ def cmd_version(args):
    # Show update status (synchronous — acceptable since user asked for version info)
    try:
        from hermes_cli.banner import check_for_updates
+        from hermes_cli.config import recommended_update_command
        behind = check_for_updates()
        if behind and behind > 0:
            commits_word = "commit" if behind == 1 else "commits"
-            print(f"Update available: {behind} {commits_word} behind — run 'hermes update'")
+            print(
+                f"Update available: {behind} {commits_word} behind — "
+                f"run '{recommended_update_command()}'"
+            )
        elif behind == 0:
            print("Up to date")
    except Exception:
@ -2858,6 +2925,11 @@ def _invalidate_update_cache():
 def cmd_update(args):
    """Update Hermes Agent to the latest version."""
    import shutil
+    from hermes_cli.config import is_managed, managed_error
+
+    if is_managed():
+        managed_error("update Hermes Agent")
+        return
    
    print("⚕ Updating Hermes Agent...")
    print()
@ -3193,6 +3265,7 @@ def cmd_update(args):
            _gw_service_name = get_service_name()
            existing_pid = get_running_pid()
            has_systemd_service = False
+            has_system_service = False
            has_launchd_service = False

            try:
@ -3205,6 +3278,19 @@ def cmd_update(args):
            except (FileNotFoundError, subprocess.TimeoutExpired):
                pass

+            # Also check for a system-level service (hermes gateway install --system).
+            # This covers gateways running under system systemd where --user
+            # fails due to missing D-Bus session.
+            if not has_systemd_service and is_linux():
+                try:
+                    check = subprocess.run(
+                        ["systemctl", "is-active", _gw_service_name],
+                        capture_output=True, text=True, timeout=5,
+                    )
+                    has_system_service = check.stdout.strip() == "active"
+                except (FileNotFoundError, subprocess.TimeoutExpired):
+                    pass
+
            # Check for macOS launchd service
            if is_macos():
                try:
@ -3219,7 +3305,7 @@ def cmd_update(args):
                except (FileNotFoundError, subprocess.TimeoutExpired):
                    pass

-            if existing_pid or has_systemd_service or has_launchd_service:
+            if existing_pid or has_systemd_service or has_system_service or has_launchd_service:
                print()

                # When a service manager is handling the gateway, let it
@ -3260,6 +3346,21 @@ def cmd_update(args):
                                print("    hermes gateway restart")
                            else:
                                print("  Try manually: hermes gateway restart")
+                elif has_system_service:
+                    # System-level service (hermes gateway install --system).
+                    # No D-Bus session needed — systemctl without --user talks
+                    # directly to the system manager over /run/systemd/private.
+                    print("→ Restarting system gateway service...")
+                    restart = subprocess.run(
+                        ["systemctl", "restart", _gw_service_name],
+                        capture_output=True, text=True, timeout=15,
+                    )
+                    if restart.returncode == 0:
+                        print("✓ Gateway restarted (system service).")
+                    else:
+                        print(f"⚠ Gateway restart failed: {restart.stderr.strip()}")
+                        print("  System services may require root.  Try:")
+                        print(f"    sudo systemctl restart {_gw_service_name}")
                elif has_launchd_service:
                    # Refresh the plist first (picks up --replace and other
                    # changes from the update we just pulled).
@ -3323,7 +3424,7 @@ def _coalesce_session_name_args(argv: list) -> list:
    or a known top-level subcommand.
    """
    _SUBCOMMANDS = {
-        "chat", "model", "gateway", "setup", "whatsapp", "login", "logout",
+        "chat", "model", "gateway", "setup", "whatsapp", "login", "logout", "auth",
        "status", "cron", "doctor", "config", "pairing", "skills", "tools",
        "mcp", "sessions", "insights", "version", "update", "uninstall",
        "profile",
@ -3612,6 +3713,10 @@ Examples:
    hermes --resume <session_id>  Resume a specific session by ID
    hermes setup                  Run setup wizard
    hermes logout                 Clear stored authentication
+    hermes auth add <provider>    Add a pooled credential
+    hermes auth list              List pooled credentials
+    hermes auth remove <p> <n>    Remove pooled credential by index
+    hermes auth reset <provider>  Clear exhaustion status for a provider
    hermes model                  Select default model
    hermes config                 View configuration
    hermes config edit            Edit config in $EDITOR
@ -3745,6 +3850,13 @@ For more help on a command:
        default=False,
        help="Enable filesystem checkpoints before destructive file operations (use /rollback to restore)"
    )
+    chat_parser.add_argument(
+        "--max-turns",
+        type=int,
+        default=None,
+        metavar="N",
+        help="Maximum tool-calling iterations per conversation turn (default: 90, or agent.max_turns in config)"
+    )
    chat_parser.add_argument(
        "--yolo",
        action="store_true",
@ -3824,7 +3936,10 @@ For more help on a command:
    
    # gateway run (default)
    gateway_run = gateway_subparsers.add_parser("run", help="Run gateway in foreground")
-    gateway_run.add_argument("-v", "--verbose", action="store_true")
+    gateway_run.add_argument("-v", "--verbose", action="count", default=0,
+                             help="Increase stderr log verbosity (-v=INFO, -vv=DEBUG)")
+    gateway_run.add_argument("-q", "--quiet", action="store_true",
+                             help="Suppress all stderr log output")
    gateway_run.add_argument("--replace", action="store_true",
                             help="Replace any existing gateway instance (useful for systemd)")
    
@ -3968,6 +4083,33 @@ For more help on a command:
    )
    logout_parser.set_defaults(func=cmd_logout)

+    auth_parser = subparsers.add_parser(
+        "auth",
+        help="Manage pooled provider credentials",
+    )
+    auth_subparsers = auth_parser.add_subparsers(dest="auth_action")
+    auth_add = auth_subparsers.add_parser("add", help="Add a pooled credential")
+    auth_add.add_argument("provider", help="Provider id (for example: anthropic, openai-codex, openrouter)")
+    auth_add.add_argument("--type", dest="auth_type", choices=["oauth", "api-key", "api_key"], help="Credential type to add")
+    auth_add.add_argument("--label", help="Optional display label")
+    auth_add.add_argument("--api-key", help="API key value (otherwise prompted securely)")
+    auth_add.add_argument("--portal-url", help="Nous portal base URL")
+    auth_add.add_argument("--inference-url", help="Nous inference base URL")
+    auth_add.add_argument("--client-id", help="OAuth client id")
+    auth_add.add_argument("--scope", help="OAuth scope override")
+    auth_add.add_argument("--no-browser", action="store_true", help="Do not auto-open a browser for OAuth login")
+    auth_add.add_argument("--timeout", type=float, help="OAuth/network timeout in seconds")
+    auth_add.add_argument("--insecure", action="store_true", help="Disable TLS verification for OAuth login")
+    auth_add.add_argument("--ca-bundle", help="Custom CA bundle for OAuth login")
+    auth_list = auth_subparsers.add_parser("list", help="List pooled credentials")
+    auth_list.add_argument("provider", nargs="?", help="Optional provider filter")
+    auth_remove = auth_subparsers.add_parser("remove", help="Remove a pooled credential by index")
+    auth_remove.add_argument("provider", help="Provider id")
+    auth_remove.add_argument("index", type=int, help="1-based credential index")
+    auth_reset = auth_subparsers.add_parser("reset", help="Clear exhaustion status for all credentials for a provider")
+    auth_reset.add_argument("provider", help="Provider id")
+    auth_parser.set_defaults(func=cmd_auth)
+
    # =========================================================================
    # status command
    # =========================================================================
@ -4778,6 +4920,28 @@ For more help on a command:
        help="Skip confirmation prompts"
    )

+    # claw cleanup
+    claw_cleanup = claw_subparsers.add_parser(
+        "cleanup",
+        aliases=["clean"],
+        help="Archive leftover OpenClaw directories after migration",
+        description="Scan for and archive leftover OpenClaw directories to prevent state fragmentation"
+    )
+    claw_cleanup.add_argument(
+        "--source",
+        help="Path to a specific OpenClaw directory to clean up"
+    )
+    claw_cleanup.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Preview what would be archived without making changes"
+    )
+    claw_cleanup.add_argument(
+        "--yes", "-y",
+        action="store_true",
+        help="Skip confirmation prompts"
+    )
+
    def cmd_claw(args):
        from hermes_cli.claw import claw_command
        claw_command(args)
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@ -27,6 +27,8 @@ GITHUB_MODELS_CATALOG_URL = COPILOT_MODELS_URL
 # (model_id, display description shown in menus)
 OPENROUTER_MODELS: list[tuple[str, str]] = [
    ("anthropic/claude-opus-4.6",       "recommended"),
+    ("anthropic/claude-sonnet-4.6",     ""),
+    ("qwen/qwen3.6-plus-preview:free", "free"),
    ("anthropic/claude-sonnet-4.5",     ""),
    ("anthropic/claude-haiku-4.5",      ""),
    ("openai/gpt-5.4",                  ""),
@ -56,6 +58,8 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
 _PROVIDER_MODELS: dict[str, list[str]] = {
    "nous": [
        "anthropic/claude-opus-4.6",
+        "anthropic/claude-sonnet-4.6",
+        "qwen/qwen3.6-plus-preview:free",
        "anthropic/claude-sonnet-4.5",
        "anthropic/claude-haiku-4.5",
        "openai/gpt-5.4",
@ -189,7 +193,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
    "opencode-go": [
        "glm-5",
        "kimi-k2.5",
-        "minimax-m2.5",
+        "minimax-m2.7",
    ],
    "ai-gateway": [
        "anthropic/claude-opus-4.6",
@ -347,7 +351,7 @@ def list_available_providers() -> list[dict[str, str]]:
        try:
            from hermes_cli.auth import get_auth_status, has_usable_secret
            if pid == "custom":
-                custom_base_url = _get_custom_base_url() or os.getenv("OPENAI_BASE_URL", "")
+                custom_base_url = _get_custom_base_url() or ""
                has_creds = bool(custom_base_url.strip())
            elif pid == "openrouter":
                has_creds = has_usable_secret(os.getenv("OPENROUTER_API_KEY", ""))
--- a/hermes_cli/plugins_cmd.py
+++ b/hermes_cli/plugins_cmd.py
@ -265,10 +265,11 @@ def cmd_install(identifier: str, force: bool = False) -> None:
                )
                sys.exit(1)
            if mv_int > _SUPPORTED_MANIFEST_VERSION:
+                from hermes_cli.config import recommended_update_command
                console.print(
                    f"[red]Error:[/red] Plugin '{plugin_name}' requires manifest_version "
                    f"{mv}, but this installer only supports up to {_SUPPORTED_MANIFEST_VERSION}.\n"
-                    f"Run [bold]hermes update[/bold] to get a newer installer."
+                    f"Run [bold]{recommended_update_command()}[/bold] to get a newer installer."
                )
                sys.exit(1)

--- a/hermes_cli/profiles.py
+++ b/hermes_cli/profiles.py
@ -27,7 +27,7 @@ import stat
 import subprocess
 import sys
 from dataclasses import dataclass, field
-from pathlib import Path
+from pathlib import Path, PurePosixPath, PureWindowsPath
 from typing import List, Optional

 _PROFILE_ID_RE = re.compile(r"^[a-z0-9][a-z0-9_-]{0,63}$")
@ -58,6 +58,34 @@ _CLONE_ALL_STRIP = [
    "processes.json",
 ]

+# Directories/files to exclude when exporting the default (~/.hermes) profile.
+# The default profile contains infrastructure (repo checkout, worktrees, DBs,
+# caches, binaries) that named profiles don't have.  We exclude those so the
+# export is a portable, reasonable-size archive of actual profile data.
+_DEFAULT_EXPORT_EXCLUDE_ROOT = frozenset({
+    # Infrastructure
+    "hermes-agent",         # repo checkout (multi-GB)
+    ".worktrees",           # git worktrees
+    "profiles",             # other profiles — never recursive-export
+    "bin",                  # installed binaries (tirith, etc.)
+    "node_modules",         # npm packages
+    # Databases & runtime state
+    "state.db", "state.db-shm", "state.db-wal",
+    "hermes_state.db",
+    "response_store.db", "response_store.db-shm", "response_store.db-wal",
+    "gateway.pid", "gateway_state.json", "processes.json",
+    "auth.json",            # API keys, OAuth tokens, credential pools
+    ".env",                 # API keys (dotenv)
+    "auth.lock", "active_profile", ".update_check",
+    "errors.log",
+    ".hermes_history",
+    # Caches (regenerated on use)
+    "image_cache", "audio_cache", "document_cache",
+    "browser_screenshots", "checkpoints",
+    "sandboxes",
+    "logs",                 # gateway logs
+})
+
 # Names that cannot be used as profile aliases
 _RESERVED_NAMES = frozenset({
    "hermes", "default", "test", "tmp", "root", "sudo",
@ -241,7 +269,7 @@ def _read_config_model(profile_dir: Path) -> tuple:
        if isinstance(model_cfg, str):
            return model_cfg, None
        if isinstance(model_cfg, dict):
-            return model_cfg.get("model"), model_cfg.get("provider")
+            return model_cfg.get("default") or model_cfg.get("model"), model_cfg.get("provider")
        return None, None
    except Exception:
        return None, None
@ -685,11 +713,37 @@ def get_active_profile_name() -> str:
 # Export / Import
 # ---------------------------------------------------------------------------

+def _default_export_ignore(root_dir: Path):
+    """Return an *ignore* callable for :func:`shutil.copytree`.
+
+    At the root level it excludes everything in ``_DEFAULT_EXPORT_EXCLUDE_ROOT``.
+    At all levels it excludes ``__pycache__``, sockets, and temp files.
+    """
+
+    def _ignore(directory: str, contents: list) -> set:
+        ignored: set = set()
+        for entry in contents:
+            # Universal exclusions (any depth)
+            if entry == "__pycache__" or entry.endswith((".sock", ".tmp")):
+                ignored.add(entry)
+            # npm lockfiles can appear at root
+            elif entry in ("package.json", "package-lock.json"):
+                ignored.add(entry)
+        # Root-level exclusions
+        if Path(directory) == root_dir:
+            ignored.update(c for c in contents if c in _DEFAULT_EXPORT_EXCLUDE_ROOT)
+        return ignored
+
+    return _ignore
+
+
 def export_profile(name: str, output_path: str) -> Path:
    """Export a profile to a tar.gz archive.

    Returns the output file path.
    """
+    import tempfile
+
    validate_profile_name(name)
    profile_dir = get_profile_dir(name)
    if not profile_dir.is_dir():
@ -698,8 +752,84 @@ def export_profile(name: str, output_path: str) -> Path:
    output = Path(output_path)
    # shutil.make_archive wants the base name without extension
    base = str(output).removesuffix(".tar.gz").removesuffix(".tgz")
-    result = shutil.make_archive(base, "gztar", str(profile_dir.parent), name)
-    return Path(result)
+
+    if name == "default":
+        # The default profile IS ~/.hermes itself — its parent is ~/ and its
+        # directory name is ".hermes", not "default".  We stage a clean copy
+        # under a temp dir so the archive contains ``default/...``.
+        with tempfile.TemporaryDirectory() as tmpdir:
+            staged = Path(tmpdir) / "default"
+            shutil.copytree(
+                profile_dir,
+                staged,
+                ignore=_default_export_ignore(profile_dir),
+            )
+            result = shutil.make_archive(base, "gztar", tmpdir, "default")
+            return Path(result)
+
+    # Named profiles — stage a filtered copy to exclude credentials
+    with tempfile.TemporaryDirectory() as tmpdir:
+        staged = Path(tmpdir) / name
+        _CREDENTIAL_FILES = {"auth.json", ".env"}
+        shutil.copytree(
+            profile_dir,
+            staged,
+            ignore=lambda d, contents: _CREDENTIAL_FILES & set(contents),
+        )
+        result = shutil.make_archive(base, "gztar", tmpdir, name)
+        return Path(result)
+
+
+def _normalize_profile_archive_parts(member_name: str) -> List[str]:
+    """Return safe path parts for a profile archive member."""
+    normalized_name = member_name.replace("\\", "/")
+    posix_path = PurePosixPath(normalized_name)
+    windows_path = PureWindowsPath(member_name)
+
+    if (
+        not normalized_name
+        or posix_path.is_absolute()
+        or windows_path.is_absolute()
+        or windows_path.drive
+    ):
+        raise ValueError(f"Unsafe archive member path: {member_name}")
+
+    parts = [part for part in posix_path.parts if part not in ("", ".")]
+    if not parts or any(part == ".." for part in parts):
+        raise ValueError(f"Unsafe archive member path: {member_name}")
+    return parts
+
+
+def _safe_extract_profile_archive(archive: Path, destination: Path) -> None:
+    """Extract a profile archive without allowing path escapes or links."""
+    import tarfile
+
+    with tarfile.open(archive, "r:gz") as tf:
+        for member in tf.getmembers():
+            parts = _normalize_profile_archive_parts(member.name)
+            target = destination.joinpath(*parts)
+
+            if member.isdir():
+                target.mkdir(parents=True, exist_ok=True)
+                continue
+
+            if not member.isfile():
+                raise ValueError(
+                    f"Unsupported archive member type: {member.name}"
+                )
+
+            target.parent.mkdir(parents=True, exist_ok=True)
+            extracted = tf.extractfile(member)
+            if extracted is None:
+                raise ValueError(f"Cannot read archive member: {member.name}")
+
+            with extracted, open(target, "wb") as dst:
+                shutil.copyfileobj(extracted, dst)
+
+            try:
+                os.chmod(target, member.mode & 0o777)
+            except OSError:
+                pass


 def import_profile(archive_path: str, name: Optional[str] = None) -> Path:
@ -716,9 +846,18 @@ def import_profile(archive_path: str, name: Optional[str] = None) -> Path:

    # Peek at the archive to find the top-level directory name
    with tarfile.open(archive, "r:gz") as tf:
-        top_dirs = {m.name.split("/")[0] for m in tf.getmembers() if "/" in m.name}
+        top_dirs = {
+            parts[0]
+            for member in tf.getmembers()
+            for parts in [_normalize_profile_archive_parts(member.name)]
+            if len(parts) > 1 or member.isdir()
+        }
        if not top_dirs:
-            top_dirs = {m.name for m in tf.getmembers() if m.isdir()}
+            top_dirs = {
+                _normalize_profile_archive_parts(member.name)[0]
+                for member in tf.getmembers()
+                if member.isdir()
+            }

    inferred_name = name or (top_dirs.pop() if len(top_dirs) == 1 else None)
    if not inferred_name:
@ -727,6 +866,15 @@ def import_profile(archive_path: str, name: Optional[str] = None) -> Path:
            "Specify it explicitly: hermes profile import <archive> --name <name>"
        )

+    # Archives exported from the default profile have "default/" as top-level
+    # dir.  Importing as "default" would target ~/.hermes itself — disallow
+    # that and guide the user toward a named profile.
+    if inferred_name == "default":
+        raise ValueError(
+            "Cannot import as 'default' — that is the built-in root profile (~/.hermes). "
+            "Specify a different name: hermes profile import <archive> --name <name>"
+        )
+
    validate_profile_name(inferred_name)
    profile_dir = get_profile_dir(inferred_name)
    if profile_dir.exists():
@ -735,7 +883,7 @@ def import_profile(archive_path: str, name: Optional[str] = None) -> Path:
    profiles_root = _get_profiles_root()
    profiles_root.mkdir(parents=True, exist_ok=True)

-    shutil.unpack_archive(str(archive), str(profiles_root))
+    _safe_extract_profile_archive(archive, profiles_root)

    # If the archive extracted under a different name, rename
    extracted = profiles_root / (top_dirs.pop() if top_dirs else inferred_name)
--- a/hermes_cli/runtime_provider.py
+++ b/hermes_cli/runtime_provider.py
@ -6,8 +6,10 @@ import os
 from typing import Any, Dict, Optional

 from hermes_cli import auth as auth_mod
+from agent.credential_pool import CredentialPool, PooledCredential, get_custom_provider_pool_key, load_pool
 from hermes_cli.auth import (
    AuthError,
+    DEFAULT_CODEX_BASE_URL,
    PROVIDER_REGISTRY,
    format_auth_error,
    resolve_provider,
@ -69,7 +71,7 @@ def _get_model_config() -> Dict[str, Any]:
        default = (cfg.get("default") or "").strip()
        base_url = (cfg.get("base_url") or "").strip()
        is_local = "localhost" in base_url or "127.0.0.1" in base_url
-        is_fallback = not default or default == "anthropic/claude-opus-4.6"
+        is_fallback = not default
        if is_local and is_fallback and base_url:
            detected = _auto_detect_local_model(base_url)
            if detected:
@ -109,6 +111,52 @@ def _parse_api_mode(raw: Any) -> Optional[str]:
    return None


+def _resolve_runtime_from_pool_entry(
+    *,
+    provider: str,
+    entry: PooledCredential,
+    requested_provider: str,
+    model_cfg: Optional[Dict[str, Any]] = None,
+    pool: Optional[CredentialPool] = None,
+) -> Dict[str, Any]:
+    model_cfg = model_cfg or _get_model_config()
+    base_url = (getattr(entry, "runtime_base_url", None) or getattr(entry, "base_url", None) or "").rstrip("/")
+    api_key = getattr(entry, "runtime_api_key", None) or getattr(entry, "access_token", "")
+    api_mode = "chat_completions"
+    if provider == "openai-codex":
+        api_mode = "codex_responses"
+        base_url = base_url or DEFAULT_CODEX_BASE_URL
+    elif provider == "anthropic":
+        api_mode = "anthropic_messages"
+        cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
+        cfg_base_url = ""
+        if cfg_provider == "anthropic":
+            cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/")
+        base_url = cfg_base_url or base_url or "https://api.anthropic.com"
+    elif provider == "openrouter":
+        base_url = base_url or OPENROUTER_BASE_URL
+    elif provider == "nous":
+        api_mode = "chat_completions"
+    elif provider == "copilot":
+        api_mode = _copilot_runtime_api_mode(model_cfg, getattr(entry, "runtime_api_key", ""))
+    else:
+        configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
+        if configured_mode:
+            api_mode = configured_mode
+        elif base_url.rstrip("/").endswith("/anthropic"):
+            api_mode = "anthropic_messages"
+
+    return {
+        "provider": provider,
+        "api_mode": api_mode,
+        "base_url": base_url,
+        "api_key": api_key,
+        "source": getattr(entry, "source", "pool"),
+        "credential_pool": pool,
+        "requested_provider": requested_provider,
+    }
+
+
 def resolve_requested_provider(requested: Optional[str] = None) -> str:
    """Resolve provider request from explicit arg, config, then env."""
    if requested and requested.strip():
@ -128,6 +176,37 @@ def resolve_requested_provider(requested: Optional[str] = None) -> str:
    return "auto"


+def _try_resolve_from_custom_pool(
+    base_url: str,
+    provider_label: str,
+    api_mode_override: Optional[str] = None,
+) -> Optional[Dict[str, Any]]:
+    """Check if a credential pool exists for a custom endpoint and return a runtime dict if so."""
+    pool_key = get_custom_provider_pool_key(base_url)
+    if not pool_key:
+        return None
+    try:
+        pool = load_pool(pool_key)
+        if not pool.has_credentials():
+            return None
+        entry = pool.select()
+        if entry is None:
+            return None
+        pool_api_key = getattr(entry, "runtime_api_key", None) or getattr(entry, "access_token", "")
+        if not pool_api_key:
+            return None
+        return {
+            "provider": provider_label,
+            "api_mode": api_mode_override or _detect_api_mode_for_url(base_url) or "chat_completions",
+            "base_url": base_url,
+            "api_key": pool_api_key,
+            "source": f"pool:{pool_key}",
+            "credential_pool": pool,
+        }
+    except Exception:
+        return None
+
+
 def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, Any]]:
    requested_norm = _normalize_custom_provider_name(requested_provider or "")
    if not requested_norm or requested_norm == "custom":
@ -192,6 +271,11 @@ def _resolve_named_custom_runtime(
    if not base_url:
        return None

+    # Check if a credential pool exists for this custom endpoint
+    pool_result = _try_resolve_from_custom_pool(base_url, "custom", custom_provider.get("api_mode"))
+    if pool_result:
+        return pool_result
+
    api_key_candidates = [
        (explicit_api_key or "").strip(),
        str(custom_provider.get("api_key", "") or "").strip(),
@ -229,28 +313,22 @@ def _resolve_openrouter_runtime(
    requested_norm = (requested_provider or "").strip().lower()
    cfg_provider = cfg_provider.strip().lower()

-    env_openai_base_url = os.getenv("OPENAI_BASE_URL", "").strip()
    env_openrouter_base_url = os.getenv("OPENROUTER_BASE_URL", "").strip()

+    # Use config base_url when available and the provider context matches.
+    # OPENAI_BASE_URL env var is no longer consulted — config.yaml is
+    # the single source of truth for endpoint URLs.
    use_config_base_url = False
    if cfg_base_url.strip() and not explicit_base_url:
        if requested_norm == "auto":
-            if (not cfg_provider or cfg_provider == "auto") and not env_openai_base_url:
+            if not cfg_provider or cfg_provider == "auto":
                use_config_base_url = True
        elif requested_norm == "custom" and cfg_provider == "custom":
-            # provider: custom — use base_url from config (Fixes #1760).
            use_config_base_url = True

-    # When the user explicitly requested the openrouter provider, skip
-    # OPENAI_BASE_URL — it typically points to a custom / non-OpenRouter
-    # endpoint and would prevent switching back to OpenRouter (#874).
-    skip_openai_base = requested_norm == "openrouter"
-
-    # For custom, prefer config base_url over env so config.yaml is honored (#1760).
    base_url = (
        (explicit_base_url or "").strip()
        or (cfg_base_url.strip() if use_config_base_url else "")
-        or ("" if skip_openai_base else env_openai_base_url)
        or env_openrouter_base_url
        or OPENROUTER_BASE_URL
    ).rstrip("/")
@ -287,6 +365,15 @@ def _resolve_openrouter_runtime(
    # Also provide a placeholder API key for local servers that don't require
    # authentication — the OpenAI SDK requires a non-empty api_key string.
    effective_provider = "custom" if requested_norm == "custom" else "openrouter"
+
+    # For custom endpoints, check if a credential pool exists
+    if effective_provider == "custom" and base_url:
+        pool_result = _try_resolve_from_custom_pool(
+            base_url, effective_provider, _parse_api_mode(model_cfg.get("api_mode")),
+        )
+        if pool_result:
+            return pool_result
+
    if effective_provider == "custom" and not api_key and not _is_openrouter_url:
        api_key = "no-key-required"

@ -301,6 +388,134 @@ def _resolve_openrouter_runtime(
    }


+def _resolve_explicit_runtime(
+    *,
+    provider: str,
+    requested_provider: str,
+    model_cfg: Dict[str, Any],
+    explicit_api_key: Optional[str] = None,
+    explicit_base_url: Optional[str] = None,
+) -> Optional[Dict[str, Any]]:
+    explicit_api_key = str(explicit_api_key or "").strip()
+    explicit_base_url = str(explicit_base_url or "").strip().rstrip("/")
+    if not explicit_api_key and not explicit_base_url:
+        return None
+
+    if provider == "anthropic":
+        cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
+        cfg_base_url = ""
+        if cfg_provider == "anthropic":
+            cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/")
+        base_url = explicit_base_url or cfg_base_url or "https://api.anthropic.com"
+        api_key = explicit_api_key
+        if not api_key:
+            from agent.anthropic_adapter import resolve_anthropic_token
+
+            api_key = resolve_anthropic_token()
+            if not api_key:
+                raise AuthError(
+                    "No Anthropic credentials found. Set ANTHROPIC_TOKEN or ANTHROPIC_API_KEY, "
+                    "run 'claude setup-token', or authenticate with 'claude /login'."
+                )
+        return {
+            "provider": "anthropic",
+            "api_mode": "anthropic_messages",
+            "base_url": base_url,
+            "api_key": api_key,
+            "source": "explicit",
+            "requested_provider": requested_provider,
+        }
+
+    if provider == "openai-codex":
+        base_url = explicit_base_url or DEFAULT_CODEX_BASE_URL
+        api_key = explicit_api_key
+        last_refresh = None
+        if not api_key:
+            creds = resolve_codex_runtime_credentials()
+            api_key = creds.get("api_key", "")
+            last_refresh = creds.get("last_refresh")
+            if not explicit_base_url:
+                base_url = creds.get("base_url", "").rstrip("/") or base_url
+        return {
+            "provider": "openai-codex",
+            "api_mode": "codex_responses",
+            "base_url": base_url,
+            "api_key": api_key,
+            "source": "explicit",
+            "last_refresh": last_refresh,
+            "requested_provider": requested_provider,
+        }
+
+    if provider == "nous":
+        state = auth_mod.get_provider_auth_state("nous") or {}
+        base_url = (
+            explicit_base_url
+            or str(state.get("inference_base_url") or auth_mod.DEFAULT_NOUS_INFERENCE_URL).strip().rstrip("/")
+        )
+        api_key = explicit_api_key or str(state.get("agent_key") or state.get("access_token") or "").strip()
+        expires_at = state.get("agent_key_expires_at") or state.get("expires_at")
+        if not api_key:
+            creds = resolve_nous_runtime_credentials(
+                min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
+                timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
+            )
+            api_key = creds.get("api_key", "")
+            expires_at = creds.get("expires_at")
+            if not explicit_base_url:
+                base_url = creds.get("base_url", "").rstrip("/") or base_url
+        return {
+            "provider": "nous",
+            "api_mode": "chat_completions",
+            "base_url": base_url,
+            "api_key": api_key,
+            "source": "explicit",
+            "expires_at": expires_at,
+            "requested_provider": requested_provider,
+        }
+
+    pconfig = PROVIDER_REGISTRY.get(provider)
+    if pconfig and pconfig.auth_type == "api_key":
+        env_url = ""
+        if pconfig.base_url_env_var:
+            env_url = os.getenv(pconfig.base_url_env_var, "").strip().rstrip("/")
+
+        base_url = explicit_base_url
+        if not base_url:
+            if provider == "kimi-coding":
+                creds = resolve_api_key_provider_credentials(provider)
+                base_url = creds.get("base_url", "").rstrip("/")
+            else:
+                base_url = env_url or pconfig.inference_base_url
+
+        api_key = explicit_api_key
+        if not api_key:
+            creds = resolve_api_key_provider_credentials(provider)
+            api_key = creds.get("api_key", "")
+            if not base_url:
+                base_url = creds.get("base_url", "").rstrip("/")
+
+        api_mode = "chat_completions"
+        if provider == "copilot":
+            api_mode = _copilot_runtime_api_mode(model_cfg, api_key)
+        else:
+            configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
+            if configured_mode:
+                api_mode = configured_mode
+            elif base_url.rstrip("/").endswith("/anthropic"):
+                api_mode = "anthropic_messages"
+
+        return {
+            "provider": provider,
+            "api_mode": api_mode,
+            "base_url": base_url.rstrip("/"),
+            "api_key": api_key,
+            "source": "explicit",
+            "requested_provider": requested_provider,
+        }
+
+    return None
+
+
 def resolve_runtime_provider(
    *,
    requested: Optional[str] = None,
@ -324,6 +539,57 @@ def resolve_runtime_provider(
        explicit_api_key=explicit_api_key,
        explicit_base_url=explicit_base_url,
    )
+    model_cfg = _get_model_config()
+    explicit_runtime = _resolve_explicit_runtime(
+        provider=provider,
+        requested_provider=requested_provider,
+        model_cfg=model_cfg,
+        explicit_api_key=explicit_api_key,
+        explicit_base_url=explicit_base_url,
+    )
+    if explicit_runtime:
+        return explicit_runtime
+
+    should_use_pool = provider != "openrouter"
+    if provider == "openrouter":
+        cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
+        cfg_base_url = str(model_cfg.get("base_url") or "").strip()
+        env_openai_base_url = os.getenv("OPENAI_BASE_URL", "").strip()
+        env_openrouter_base_url = os.getenv("OPENROUTER_BASE_URL", "").strip()
+        has_custom_endpoint = bool(
+            explicit_base_url
+            or env_openai_base_url
+            or env_openrouter_base_url
+        )
+        if cfg_base_url and cfg_provider in {"auto", "custom"}:
+            has_custom_endpoint = True
+        has_runtime_override = bool(explicit_api_key or explicit_base_url)
+        should_use_pool = (
+            requested_provider in {"openrouter", "auto"}
+            and not has_custom_endpoint
+            and not has_runtime_override
+        )
+
+    try:
+        pool = load_pool(provider) if should_use_pool else None
+    except Exception:
+        pool = None
+    if pool and pool.has_credentials():
+        entry = pool.select()
+        pool_api_key = ""
+        if entry is not None:
+            pool_api_key = (
+                getattr(entry, "runtime_api_key", None)
+                or getattr(entry, "access_token", "")
+            )
+        if entry is not None and pool_api_key:
+            return _resolve_runtime_from_pool_entry(
+                provider=provider,
+                entry=entry,
+                requested_provider=requested_provider,
+                model_cfg=model_cfg,
+                pool=pool,
+            )

    if provider == "nous":
        creds = resolve_nous_runtime_credentials(
@ -377,7 +643,6 @@ def resolve_runtime_provider(
        # Allow base URL override from config.yaml model.base_url, but only
        # when the configured provider is anthropic — otherwise a non-Anthropic
        # base_url (e.g. Codex endpoint) would leak into Anthropic requests.
-        model_cfg = _get_model_config()
        cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
        cfg_base_url = ""
        if cfg_provider == "anthropic":
@ -396,7 +661,6 @@ def resolve_runtime_provider(
    pconfig = PROVIDER_REGISTRY.get(provider)
    if pconfig and pconfig.auth_type == "api_key":
        creds = resolve_api_key_provider_credentials(provider)
-        model_cfg = _get_model_config()
        base_url = creds.get("base_url", "").rstrip("/")
        api_mode = "chat_completions"
        if provider == "copilot":
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
--- a/hermes_cli/tools_config.py
+++ b/hermes_cli/tools_config.py
@ -405,10 +405,10 @@ def _run_post_setup(post_setup_key: str):
            _print_info("    Start the Camofox server:")
            _print_info("      npx @askjo/camoufox-browser")
            _print_info("    First run downloads the Camoufox engine (~300MB)")
-            _print_info("    Or use Docker: docker run -p 9377:9377 jo-inc/camofox-browser")
+            _print_info("    Or use Docker: docker run -p 9377:9377 -e CAMOFOX_PORT=9377 jo-inc/camofox-browser")
        elif not shutil.which("npm"):
            _print_warning("    Node.js not found. Install Camofox via Docker:")
-            _print_info("      docker run -p 9377:9377 jo-inc/camofox-browser")
+            _print_info("      docker run -p 9377:9377 -e CAMOFOX_PORT=9377 jo-inc/camofox-browser")

    elif post_setup_key == "rl_training":
        try:
@ -1105,8 +1105,13 @@ def _configure_simple_requirements(ts_key: str):
            key_label = "    OPENAI_API_KEY" if "api.openai.com" in base_url.lower() else "    API key"
            api_key = _prompt(key_label, password=True)
            if api_key and api_key.strip():
-                save_env_value("OPENAI_BASE_URL", base_url)
                save_env_value("OPENAI_API_KEY", api_key.strip())
+                # Save vision base URL to config (not .env — only secrets go there)
+                from hermes_cli.config import load_config, save_config
+                _cfg = load_config()
+                _aux = _cfg.setdefault("auxiliary", {}).setdefault("vision", {})
+                _aux["base_url"] = base_url
+                save_config(_cfg)
                if "api.openai.com" in base_url.lower():
                    save_env_value("AUXILIARY_VISION_MODEL", "gpt-4o-mini")
                _print_success("    Saved")
--- a/hermes_constants.py
+++ b/hermes_constants.py
@ -17,6 +17,20 @@ def get_hermes_home() -> Path:
    return Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))


+def get_optional_skills_dir(default: Path | None = None) -> Path:
+    """Return the optional-skills directory, honoring package-manager wrappers.
+
+    Packaged installs may ship ``optional-skills`` outside the Python package
+    tree and expose it via ``HERMES_OPTIONAL_SKILLS``.
+    """
+    override = os.getenv("HERMES_OPTIONAL_SKILLS", "").strip()
+    if override:
+        return Path(override)
+    if default is not None:
+        return default
+    return get_hermes_home() / "optional-skills"
+
+
 def get_hermes_dir(new_subpath: str, old_name: str) -> Path:
    """Resolve a Hermes subdirectory with backward compatibility.

--- a/model_tools.py
+++ b/model_tools.py
@ -252,7 +252,7 @@ def get_tool_definitions(
    # Determine which tool names the caller wants
    tools_to_include: set = set()

-    if enabled_toolsets:
+    if enabled_toolsets is not None:
        for toolset_name in enabled_toolsets:
            if validate_toolset(toolset_name):
                resolved = resolve_toolset(toolset_name)
--- a/optional-skills/migration/openclaw-migration/scripts/openclaw_to_hermes.py
+++ b/optional-skills/migration/openclaw-migration/scripts/openclaw_to_hermes.py
@ -2455,9 +2455,24 @@ class Migrator:
            notes.append("")

        notes.extend([
+            "## IMPORTANT: Archive the OpenClaw Directory",
+            "",
+            "After migration, your OpenClaw directory still exists on disk with workspace",
+            "state files (todo.json, sessions, logs). If the Hermes agent discovers these",
+            "directories, it may read/write to them instead of the Hermes state, causing",
+            "confusion (e.g., cron jobs reading a different todo list than interactive sessions).",
+            "",
+            "**Strongly recommended:** Run `hermes claw cleanup` to rename the OpenClaw",
+            "directory to `.openclaw.pre-migration`. This prevents the agent from finding it.",
+            "The directory is renamed, not deleted — you can undo this at any time.",
+            "",
+            "If you skip this step and notice the agent getting confused about workspaces",
+            "or todo lists, run `hermes claw cleanup` to fix it.",
+            "",
            "## Hermes-Specific Setup",
            "",
            "After migration, you may want to:",
+            "- Run `hermes claw cleanup` to archive the OpenClaw directory (prevents state confusion)",
            "- Run `hermes setup` to configure any remaining settings",
            "- Run `hermes mcp list` to verify MCP servers were imported correctly",
            "- Run `hermes cron` to recreate scheduled tasks (see archive/cron-config.json)",
--- a/packaging/homebrew/README.md
+++ b/packaging/homebrew/README.md
@ -0,0 +1,14 @@
+Homebrew packaging notes for Hermes Agent.
+
+Use `packaging/homebrew/hermes-agent.rb` as a tap or `homebrew-core` starting point.
+
+Key choices:
+- Stable builds should target the semver-named sdist asset attached to each GitHub release, not the CalVer tag tarball.
+- `faster-whisper` now lives in the `voice` extra, which keeps wheel-only transitive dependencies out of the base Homebrew formula.
+- The wrapper exports `HERMES_BUNDLED_SKILLS`, `HERMES_OPTIONAL_SKILLS`, and `HERMES_MANAGED=homebrew` so packaged installs keep runtime assets and defer upgrades to Homebrew.
+
+Typical update flow:
+1. Bump the formula `url`, `version`, and `sha256`.
+2. Refresh Python resources with `brew update-python-resources --print-only hermes-agent`.
+3. Keep `ignore_packages: %w[certifi cryptography pydantic]`.
+4. Verify `brew audit --new --strict hermes-agent` and `brew test hermes-agent`.
--- a/packaging/homebrew/hermes-agent.rb
+++ b/packaging/homebrew/hermes-agent.rb
@ -0,0 +1,48 @@
+class HermesAgent < Formula
+  include Language::Python::Virtualenv
+
+  desc "Self-improving AI agent that creates skills from experience"
+  homepage "https://hermes-agent.nousresearch.com"
+  # Stable source should point at the semver-named sdist asset attached by
+  # scripts/release.py, not the CalVer tag tarball.
+  url "https://github.com/NousResearch/hermes-agent/releases/download/v2026.3.30/hermes_agent-0.6.0.tar.gz"
+  sha256 "<replace-with-release-asset-sha256>"
+  license "MIT"
+
+  depends_on "certifi" => :no_linkage
+  depends_on "cryptography" => :no_linkage
+  depends_on "libyaml"
+  depends_on "python@3.14"
+
+  pypi_packages ignore_packages: %w[certifi cryptography pydantic]
+
+  # Refresh resource stanzas after bumping the source url/version:
+  #   brew update-python-resources --print-only hermes-agent
+
+  def install
+    venv = virtualenv_create(libexec, "python3.14")
+    venv.pip_install resources
+    venv.pip_install buildpath
+
+    pkgshare.install "skills", "optional-skills"
+
+    %w[hermes hermes-agent hermes-acp].each do |exe|
+      next unless (libexec/"bin"/exe).exist?
+
+      (bin/exe).write_env_script(
+        libexec/"bin"/exe,
+        HERMES_BUNDLED_SKILLS: pkgshare/"skills",
+        HERMES_OPTIONAL_SKILLS: pkgshare/"optional-skills",
+        HERMES_MANAGED: "homebrew"
+      )
+    end
+  end
+
+  test do
+    assert_match "Hermes Agent v#{version}", shell_output("#{bin}/hermes version")
+
+    managed = shell_output("#{bin}/hermes update 2>&1")
+    assert_match "managed by Homebrew", managed
+    assert_match "brew upgrade hermes-agent", managed
+  end
+end
--- a/pyproject.toml
+++ b/pyproject.toml
@ -32,7 +32,6 @@ dependencies = [
  "fal-client>=0.13.1,<1",
  # Text-to-speech (Edge TTS is free, no API key needed)
  "edge-tts>=7.2.7,<8",
-  "faster-whisper>=1.0.0,<2",
  # Skills Hub (GitHub App JWT auth — optional, only needed for bot identity)
  "PyJWT[crypto]>=2.12.0,<3",  # CVE-2026-32597
 ]
@ -47,7 +46,13 @@ slack = ["slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4"]
 matrix = ["matrix-nio[e2e]>=0.24.0,<1"]
 cli = ["simple-term-menu>=1.0,<2"]
 tts-premium = ["elevenlabs>=1.0,<2"]
-voice = ["sounddevice>=0.4.6,<1", "numpy>=1.24.0,<3"]
+voice = [
+  # Local STT pulls in wheel-only transitive deps (ctranslate2, onnxruntime),
+  # so keep it out of the base install for source-build packagers like Homebrew.
+  "faster-whisper>=1.0.0,<2",
+  "sounddevice>=0.4.6,<1",
+  "numpy>=1.24.0,<3",
+]
 pty = [
  "ptyprocess>=0.7.0,<1; sys_platform != 'win32'",
  "pywinpty>=2.0.0,<3; sys_platform == 'win32'",
--- a/run_agent.py
+++ b/run_agent.py
@ -89,7 +89,7 @@ from agent.model_metadata import (
 )
 from agent.context_compressor import ContextCompressor
 from agent.prompt_caching import apply_anthropic_cache_control
-from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, load_soul_md, TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS
+from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, load_soul_md, TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS, DEVELOPER_ROLE_MODELS
 from agent.usage_pricing import estimate_usage_cost, normalize_usage
 from agent.display import (
    KawaiiSpinner, build_tool_preview as _build_tool_preview,
@ -321,8 +321,12 @@ def _extract_parallel_scope_path(tool_name: str, function_args: dict) -> Path |
    if not isinstance(raw_path, str) or not raw_path.strip():
        return None

+    expanded = Path(raw_path).expanduser()
+    if expanded.is_absolute():
+        return Path(os.path.abspath(str(expanded)))
+
    # Avoid resolve(); the file may not exist yet.
-    return Path(raw_path).expanduser()
+    return Path(os.path.abspath(str(Path.cwd() / expanded)))


 def _paths_overlap(left: Path, right: Path) -> bool:
@ -468,7 +472,7 @@ class AIAgent:
        acp_args: list[str] | None = None,
        command: str = None,
        args: list[str] | None = None,
-        model: str = "anthropic/claude-opus-4.6",  # OpenRouter format
+        model: str = "",
        max_iterations: int = 90,  # Default tool-calling iterations (shared with subagents)
        tool_delay: float = 1.0,
        enabled_toolsets: List[str] = None,
@ -487,6 +491,8 @@ class AIAgent:
        provider_data_collection: str = None,
        session_id: str = None,
        tool_progress_callback: callable = None,
+        tool_start_callback: callable = None,
+        tool_complete_callback: callable = None,
        thinking_callback: callable = None,
        reasoning_callback: callable = None,
        clarify_callback: callable = None,
@ -506,9 +512,11 @@ class AIAgent:
        honcho_config=None,
        iteration_budget: "IterationBudget" = None,
        fallback_model: Dict[str, Any] = None,
+        credential_pool=None,
        checkpoints_enabled: bool = False,
        checkpoint_max_snapshots: int = 50,
        pass_session_id: bool = False,
+        persist_session: bool = True,
    ):
        """
        Initialize the AI Agent.
@ -574,13 +582,14 @@ class AIAgent:
        self.background_review_callback = None  # Optional sync callback for gateway delivery
        self.skip_context_files = skip_context_files
        self.pass_session_id = pass_session_id
+        self.persist_session = persist_session
+        self._credential_pool = credential_pool
        self.log_prefix_chars = log_prefix_chars
        self.log_prefix = f"{log_prefix} " if log_prefix else ""
        # Store effective base URL for feature detection (prompt caching, reasoning, etc.)
-        # When no base_url is provided, the client defaults to OpenRouter, so reflect that here.
-        self.base_url = base_url or OPENROUTER_BASE_URL
+        self.base_url = base_url or ""
        provider_name = provider.strip().lower() if isinstance(provider, str) and provider.strip() else None
-        self.provider = provider_name or "openrouter"
+        self.provider = provider_name or ""
        self.acp_command = acp_command or command
        self.acp_args = list(acp_args or args or [])
        if api_mode in {"chat_completions", "codex_responses", "anthropic_messages"}:
@ -617,6 +626,8 @@ class AIAgent:
            ).start()

        self.tool_progress_callback = tool_progress_callback
+        self.tool_start_callback = tool_start_callback
+        self.tool_complete_callback = tool_complete_callback
        self.thinking_callback = thinking_callback
        self.reasoning_callback = reasoning_callback
        self._reasoning_deltas_fired = False  # Set by _fire_reasoning_delta, reset per API call
@ -1386,6 +1397,7 @@ class AIAgent:
        content = re.sub(r'<thinking>.*?</thinking>', '', content, flags=re.DOTALL | re.IGNORECASE)
        content = re.sub(r'<reasoning>.*?</reasoning>', '', content, flags=re.DOTALL)
        content = re.sub(r'<REASONING_SCRATCHPAD>.*?</REASONING_SCRATCHPAD>', '', content, flags=re.DOTALL)
+        content = re.sub(r'</?(?:think|thinking|reasoning|REASONING_SCRATCHPAD)>\s*', '', content, flags=re.IGNORECASE)
        return content

    def _looks_like_codex_intermediate_ack(
@ -1701,7 +1713,10 @@ class AIAgent:
        """Save session state to both JSON log and SQLite on any exit path.

        Ensures conversations are never lost, even on errors or early returns.
+        Skipped when ``persist_session=False`` (ephemeral helper flows).
        """
+        if not self.persist_session:
+            return
        self._apply_persist_user_message_override(messages)
        self._session_messages = messages
        self._save_session_log(messages)
@ -3234,9 +3249,10 @@ class AIAgent:
            "model": model,
            "instructions": instructions,
            "input": normalized_input,
-            "tools": normalized_tools,
            "store": False,
        }
+        if normalized_tools is not None:
+            normalized["tools"] = normalized_tools

        # Pass through reasoning config
        reasoning = api_kwargs.get("reasoning")
@ -3481,14 +3497,33 @@ class AIAgent:

    @staticmethod
    def _is_openai_client_closed(client: Any) -> bool:
+        """Check if an OpenAI client is closed.
+
+        Handles both property and method forms of is_closed:
+        - httpx.Client.is_closed is a bool property
+        - openai.OpenAI.is_closed is a method returning bool
+
+        Prior bug: getattr(client, "is_closed", False) returned the bound method,
+        which is always truthy, causing unnecessary client recreation on every call.
+        """
        from unittest.mock import Mock

        if isinstance(client, Mock):
            return False
-        if bool(getattr(client, "is_closed", False)):
-            return True
+
+        is_closed_attr = getattr(client, "is_closed", None)
+        if is_closed_attr is not None:
+            # Handle method (openai SDK) vs property (httpx)
+            if callable(is_closed_attr):
+                if is_closed_attr():
+                    return True
+            elif bool(is_closed_attr):
+                return True
+
        http_client = getattr(client, "_client", None)
-        return bool(getattr(http_client, "is_closed", False))
+        if http_client is not None:
+            return bool(getattr(http_client, "is_closed", False))
+        return False

    def _create_openai_client(self, client_kwargs: dict, *, reason: str, shared: bool) -> Any:
        if self.provider == "copilot-acp" or str(client_kwargs.get("base_url", "")).startswith("acp://copilot"):
@ -3511,15 +3546,78 @@ class AIAgent:
        )
        return client

+    @staticmethod
+    def _force_close_tcp_sockets(client: Any) -> int:
+        """Force-close underlying TCP sockets to prevent CLOSE-WAIT accumulation.
+
+        When a provider drops a connection mid-stream, httpx's ``client.close()``
+        performs a graceful shutdown which leaves sockets in CLOSE-WAIT until the
+        OS times them out (often minutes).  This method walks the httpx transport
+        pool and issues ``socket.shutdown(SHUT_RDWR)`` + ``socket.close()`` to
+        force an immediate TCP RST, freeing the file descriptors.
+
+        Returns the number of sockets force-closed.
+        """
+        import socket as _socket
+
+        closed = 0
+        try:
+            http_client = getattr(client, "_client", None)
+            if http_client is None:
+                return 0
+            transport = getattr(http_client, "_transport", None)
+            if transport is None:
+                return 0
+            pool = getattr(transport, "_pool", None)
+            if pool is None:
+                return 0
+            # httpx uses httpcore connection pools; connections live in
+            # _connections (list) or _pool (list) depending on version.
+            connections = (
+                getattr(pool, "_connections", None)
+                or getattr(pool, "_pool", None)
+                or []
+            )
+            for conn in list(connections):
+                stream = (
+                    getattr(conn, "_network_stream", None)
+                    or getattr(conn, "_stream", None)
+                )
+                if stream is None:
+                    continue
+                sock = getattr(stream, "_sock", None)
+                if sock is None:
+                    sock = getattr(stream, "stream", None)
+                    if sock is not None:
+                        sock = getattr(sock, "_sock", None)
+                if sock is None:
+                    continue
+                try:
+                    sock.shutdown(_socket.SHUT_RDWR)
+                except OSError:
+                    pass
+                try:
+                    sock.close()
+                except OSError:
+                    pass
+                closed += 1
+        except Exception as exc:
+            logger.debug("Force-close TCP sockets sweep error: %s", exc)
+        return closed
+
    def _close_openai_client(self, client: Any, *, reason: str, shared: bool) -> None:
        if client is None:
            return
+        # Force-close TCP sockets first to prevent CLOSE-WAIT accumulation,
+        # then do the graceful SDK-level close.
+        force_closed = self._force_close_tcp_sockets(client)
        try:
            client.close()
            logger.info(
-                "OpenAI client closed (%s, shared=%s) %s",
+                "OpenAI client closed (%s, shared=%s, tcp_force_closed=%d) %s",
                reason,
                shared,
+                force_closed,
                self._client_log_context(),
            )
        except Exception as exc:
@ -3564,6 +3662,76 @@ class AIAgent:
        with self._openai_client_lock():
            return self.client

+    def _cleanup_dead_connections(self) -> bool:
+        """Detect and clean up dead TCP connections on the primary client.
+
+        Inspects the httpx connection pool for sockets in unhealthy states
+        (CLOSE-WAIT, errors).  If any are found, force-closes all sockets
+        and rebuilds the primary client from scratch.
+
+        Returns True if dead connections were found and cleaned up.
+        """
+        client = getattr(self, "client", None)
+        if client is None:
+            return False
+        try:
+            http_client = getattr(client, "_client", None)
+            if http_client is None:
+                return False
+            transport = getattr(http_client, "_transport", None)
+            if transport is None:
+                return False
+            pool = getattr(transport, "_pool", None)
+            if pool is None:
+                return False
+            connections = (
+                getattr(pool, "_connections", None)
+                or getattr(pool, "_pool", None)
+                or []
+            )
+            dead_count = 0
+            for conn in list(connections):
+                # Check for connections that are idle but have closed sockets
+                stream = (
+                    getattr(conn, "_network_stream", None)
+                    or getattr(conn, "_stream", None)
+                )
+                if stream is None:
+                    continue
+                sock = getattr(stream, "_sock", None)
+                if sock is None:
+                    sock = getattr(stream, "stream", None)
+                    if sock is not None:
+                        sock = getattr(sock, "_sock", None)
+                if sock is None:
+                    continue
+                # Probe socket health with a non-blocking recv peek
+                import socket as _socket
+                try:
+                    sock.setblocking(False)
+                    data = sock.recv(1, _socket.MSG_PEEK | _socket.MSG_DONTWAIT)
+                    if data == b"":
+                        dead_count += 1
+                except BlockingIOError:
+                    pass  # No data available — socket is healthy
+                except OSError:
+                    dead_count += 1
+                finally:
+                    try:
+                        sock.setblocking(True)
+                    except OSError:
+                        pass
+            if dead_count > 0:
+                logger.warning(
+                    "Found %d dead connection(s) in client pool — rebuilding client",
+                    dead_count,
+                )
+                self._replace_primary_openai_client(reason="dead_connection_cleanup")
+                return True
+        except Exception as exc:
+            logger.debug("Dead connection check error: %s", exc)
+        return False
+
    def _create_request_openai_client(self, *, reason: str) -> Any:
        from unittest.mock import Mock

@ -3579,6 +3747,8 @@ class AIAgent:

    def _run_codex_stream(self, api_kwargs: dict, client: Any = None, on_first_delta: callable = None):
        """Execute one streaming Responses API request and return the final response."""
+        import httpx as _httpx
+
        active_client = client or self._ensure_primary_openai_client(reason="codex_stream_direct")
        max_stream_retries = 1
        has_tool_calls = False
@ -3612,6 +3782,22 @@ class AIAgent:
                            if reasoning_text:
                                self._fire_reasoning_delta(reasoning_text)
                    return stream.get_final_response()
+            except (_httpx.RemoteProtocolError, _httpx.ReadTimeout, _httpx.ConnectError, ConnectionError) as exc:
+                if attempt < max_stream_retries:
+                    logger.debug(
+                        "Codex Responses stream transport failed (attempt %s/%s); retrying. %s error=%s",
+                        attempt + 1,
+                        max_stream_retries + 1,
+                        self._client_log_context(),
+                        exc,
+                    )
+                    continue
+                logger.debug(
+                    "Codex Responses stream transport failed; falling back to create(stream=True). %s error=%s",
+                    self._client_log_context(),
+                    exc,
+                )
+                return self._run_codex_create_stream_fallback(api_kwargs, client=active_client)
            except RuntimeError as exc:
                err_text = str(exc)
                missing_completed = "response.completed" in err_text
@ -3774,6 +3960,100 @@ class AIAgent:
        self._is_anthropic_oauth = _is_oauth_token(new_token)
        return True

+    def _apply_client_headers_for_base_url(self, base_url: str) -> None:
+        from agent.auxiliary_client import _OR_HEADERS
+
+        normalized = (base_url or "").lower()
+        if "openrouter" in normalized:
+            self._client_kwargs["default_headers"] = dict(_OR_HEADERS)
+        elif "api.githubcopilot.com" in normalized:
+            from hermes_cli.models import copilot_default_headers
+
+            self._client_kwargs["default_headers"] = copilot_default_headers()
+        elif "api.kimi.com" in normalized:
+            self._client_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.3"}
+        else:
+            self._client_kwargs.pop("default_headers", None)
+
+    def _swap_credential(self, entry) -> None:
+        runtime_key = getattr(entry, "runtime_api_key", None) or getattr(entry, "access_token", "")
+        runtime_base = getattr(entry, "runtime_base_url", None) or getattr(entry, "base_url", None) or self.base_url
+
+        if self.api_mode == "anthropic_messages":
+            from agent.anthropic_adapter import build_anthropic_client, _is_oauth_token
+
+            try:
+                self._anthropic_client.close()
+            except Exception:
+                pass
+
+            self._anthropic_api_key = runtime_key
+            self._anthropic_base_url = runtime_base
+            self._anthropic_client = build_anthropic_client(runtime_key, runtime_base)
+            self._is_anthropic_oauth = _is_oauth_token(runtime_key) if self.provider == "anthropic" else False
+            self.api_key = runtime_key
+            self.base_url = runtime_base
+            return
+
+        self.api_key = runtime_key
+        self.base_url = runtime_base.rstrip("/") if isinstance(runtime_base, str) else runtime_base
+        self._client_kwargs["api_key"] = self.api_key
+        self._client_kwargs["base_url"] = self.base_url
+        self._apply_client_headers_for_base_url(self.base_url)
+        self._replace_primary_openai_client(reason="credential_rotation")
+
+    def _recover_with_credential_pool(
+        self,
+        *,
+        status_code: Optional[int],
+        has_retried_429: bool,
+    ) -> tuple[bool, bool]:
+        """Attempt credential recovery via pool rotation.
+
+        Returns (recovered, has_retried_429).
+        On 429: first occurrence retries same credential (sets flag True).
+                second consecutive 429 rotates to next credential (resets flag).
+        On 402: immediately rotates (billing exhaustion won't resolve with retry).
+        On 401: attempts token refresh before rotating.
+        """
+        pool = self._credential_pool
+        if pool is None or status_code is None:
+            return False, has_retried_429
+
+        if status_code == 402:
+            next_entry = pool.mark_exhausted_and_rotate(status_code=402)
+            if next_entry is not None:
+                logger.info(f"Credential 402 (billing) — rotated to pool entry {getattr(next_entry, 'id', '?')}")
+                self._swap_credential(next_entry)
+                return True, False
+            return False, has_retried_429
+
+        if status_code == 429:
+            if not has_retried_429:
+                return False, True
+            next_entry = pool.mark_exhausted_and_rotate(status_code=429)
+            if next_entry is not None:
+                logger.info(f"Credential 429 (rate limit) — rotated to pool entry {getattr(next_entry, 'id', '?')}")
+                self._swap_credential(next_entry)
+                return True, False
+            return False, True
+
+        if status_code == 401:
+            refreshed = pool.try_refresh_current()
+            if refreshed is not None:
+                logger.info(f"Credential 401 — refreshed pool entry {getattr(refreshed, 'id', '?')}")
+                self._swap_credential(refreshed)
+                return True, has_retried_429
+            # Refresh failed — rotate to next credential instead of giving up.
+            # The failed entry is already marked exhausted by try_refresh_current().
+            next_entry = pool.mark_exhausted_and_rotate(status_code=401)
+            if next_entry is not None:
+                logger.info(f"Credential 401 (refresh failed) — rotated to pool entry {getattr(next_entry, 'id', '?')}")
+                self._swap_credential(next_entry)
+                return True, False
+
+        return False, has_retried_429
+
    def _anthropic_messages_create(self, api_kwargs: dict):
        if self.api_mode == "anthropic_messages":
            self._try_refresh_anthropic_client_credentials()
@ -4243,6 +4523,11 @@ class AIAgent:
                                    type(e).__name__,
                                    e,
                                )
+                                self._emit_status(
+                                    f"⚠️ Connection to provider dropped "
+                                    f"({type(e).__name__}). Reconnecting… "
+                                    f"(attempt {_stream_attempt + 2}/{_max_stream_retries + 1})"
+                                )
                                # Close the stale request client before retry
                                stale = request_client_holder.get("client")
                                if stale is not None:
@ -4250,7 +4535,21 @@ class AIAgent:
                                        stale, reason="stream_retry_cleanup"
                                    )
                                    request_client_holder["client"] = None
+                                # Also rebuild the primary client to purge
+                                # any dead connections from the pool.
+                                try:
+                                    self._replace_primary_openai_client(
+                                        reason="stream_retry_pool_cleanup"
+                                    )
+                                except Exception:
+                                    pass
                                continue
+                            self._emit_status(
+                                "❌ Connection to provider failed after "
+                                f"{_max_stream_retries + 1} attempts. "
+                                "The provider may be experiencing issues — "
+                                "try again in a moment."
+                            )
                            logger.warning(
                                "Streaming exhausted %s retries on transient error, "
                                "falling back to non-streaming: %s",
@ -4322,6 +4621,12 @@ class AIAgent:
                        self._close_request_openai_client(rc, reason="stale_stream_kill")
                except Exception:
                    pass
+                # Rebuild the primary client too — its connection pool
+                # may hold dead sockets from the same provider outage.
+                try:
+                    self._replace_primary_openai_client(reason="stale_stream_pool_cleanup")
+                except Exception:
+                    pass
                # Reset the timer so we don't kill repeatedly while
                # the inner thread processes the closure.
                last_chunk_time["t"] = time.time()
@ -4722,6 +5027,19 @@ class AIAgent:
                            tool_call.pop("call_id", None)
                            tool_call.pop("response_item_id", None)

+        # GPT-5 and Codex models respond better to 'developer' than 'system'
+        # for instruction-following.  Swap the role at the API boundary so
+        # internal message representation stays uniform ("system").
+        _model_lower = (self.model or "").lower()
+        if (
+            sanitized_messages
+            and sanitized_messages[0].get("role") == "system"
+            and any(p in _model_lower for p in DEVELOPER_ROLE_MODELS)
+        ):
+            # Shallow-copy the list + first message only — rest stays shared.
+            sanitized_messages = list(sanitized_messages)
+            sanitized_messages[0] = {**sanitized_messages[0], "role": "developer"}
+
        provider_preferences = {}
        if self.providers_allowed:
            provider_preferences["only"] = self.providers_allowed
@ -5244,6 +5562,15 @@ class AIAgent:
            if _post_progress < 0.85:
                self._context_pressure_warned = False

+        # Clear the file-read dedup cache.  After compression the original
+        # read content is summarised away — if the model re-reads the same
+        # file it needs the full content, not a "file unchanged" stub.
+        try:
+            from tools.file_tools import reset_file_dedup
+            reset_file_dedup(task_id)
+        except Exception:
+            pass
+
        return compressed, new_system_prompt

    def _execute_tool_calls(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
@ -5408,7 +5735,7 @@ class AIAgent:
                    args_preview = args_str[:self.log_prefix_chars] + "..." if len(args_str) > self.log_prefix_chars else args_str
                    print(f"  📞 Tool {i}: {name}({list(args.keys())}) - {args_preview}")

-        for _, name, args in parsed_calls:
+        for tc, name, args in parsed_calls:
            if self.tool_progress_callback:
                try:
                    preview = _build_tool_preview(name, args)
@ -5416,6 +5743,13 @@ class AIAgent:
                except Exception as cb_err:
                    logging.debug(f"Tool progress callback error: {cb_err}")

+        for tc, name, args in parsed_calls:
+            if self.tool_start_callback:
+                try:
+                    self.tool_start_callback(tc.id, name, args)
+                except Exception as cb_err:
+                    logging.debug(f"Tool start callback error: {cb_err}")
+
        # ── Concurrent execution ─────────────────────────────────────────
        # Each slot holds (function_name, function_args, function_result, duration, error_flag)
        results = [None] * num_tools
@ -5486,6 +5820,12 @@ class AIAgent:
                    response_preview = function_result[:self.log_prefix_chars] + "..." if len(function_result) > self.log_prefix_chars else function_result
                    print(f"  ✅ Tool {i+1} completed in {tool_duration:.2f}s - {response_preview}")

+            if self.tool_complete_callback:
+                try:
+                    self.tool_complete_callback(tc.id, name, args, function_result)
+                except Exception as cb_err:
+                    logging.debug(f"Tool complete callback error: {cb_err}")
+
            # Truncate oversized results
            MAX_TOOL_RESULT_CHARS = 100_000
            if len(function_result) > MAX_TOOL_RESULT_CHARS:
@ -5574,6 +5914,12 @@ class AIAgent:
                except Exception as cb_err:
                    logging.debug(f"Tool progress callback error: {cb_err}")

+            if self.tool_start_callback:
+                try:
+                    self.tool_start_callback(tool_call.id, function_name, function_args)
+                except Exception as cb_err:
+                    logging.debug(f"Tool start callback error: {cb_err}")
+
            # Checkpoint: snapshot working dir before file-mutating tools
            if function_name in ("write_file", "patch") and self._checkpoint_mgr.enabled:
                try:
@ -5738,6 +6084,12 @@ class AIAgent:
                logging.debug(f"Tool {function_name} completed in {tool_duration:.2f}s")
                logging.debug(f"Tool result ({len(function_result)} chars): {function_result}")

+            if self.tool_complete_callback:
+                try:
+                    self.tool_complete_callback(tool_call.id, function_name, function_args, function_result)
+                except Exception as cb_err:
+                    logging.debug(f"Tool complete callback error: {cb_err}")
+
            # Guard against tools returning absurdly large content that would
            # blow up the context window. 100K chars ≈ 25K tokens — generous
            # enough for any reasonable tool output but prevents catastrophic
@ -6076,6 +6428,20 @@ class AIAgent:
        self._last_content_with_tools = None
        self._mute_post_response = False
        self._surrogate_sanitized = False
+
+        # Pre-turn connection health check: detect and clean up dead TCP
+        # connections left over from provider outages or dropped streams.
+        # This prevents the next API call from hanging on a zombie socket.
+        if self.api_mode != "anthropic_messages":
+            try:
+                if self._cleanup_dead_connections():
+                    self._emit_status(
+                        "🔌 Detected stale connections from a previous provider "
+                        "issue — cleaned up automatically. Proceeding with fresh "
+                        "connection."
+                    )
+            except Exception:
+                pass
        # NOTE: _turns_since_memory and _iters_since_skill are NOT reset here.
        # They are initialized in __init__ and must persist across run_conversation
        # calls so that nudge logic accumulates correctly in CLI mode.
@ -6254,6 +6620,12 @@ class AIAgent:
                    )
                    if len(messages) >= _orig_len:
                        break  # Cannot compress further
+                    # Compression created a new session — clear the history
+                    # reference so _flush_messages_to_session_db writes ALL
+                    # compressed messages to the new session's SQLite, not
+                    # skipping them because conversation_history is still the
+                    # pre-compression length.
+                    conversation_history = None
                    # Re-estimate after compression
                    _preflight_tokens = estimate_request_tokens_rough(
                        messages,
@ -6453,6 +6825,7 @@ class AIAgent:
            codex_auth_retry_attempted = False
            anthropic_auth_retry_attempted = False
            nous_auth_retry_attempted = False
+            has_retried_429 = False
            restart_with_compressed_messages = False
            restart_with_length_continuation = False

@ -6888,6 +7261,7 @@ class AIAgent:
                            if not self.quiet_mode:
                                self._vprint(f"{self.log_prefix}   💾 Cache: {cached:,}/{prompt:,} tokens ({hit_pct:.0f}% hit, {written:,} written)")
                    
+                    has_retried_429 = False  # Reset on success
                    break  # Success, exit retry loop

                except InterruptedError:
@ -6930,6 +7304,12 @@ class AIAgent:
                        # prompt or prefill.  Fall through to normal error path.

                    status_code = getattr(api_error, "status_code", None)
+                    recovered_with_pool, has_retried_429 = self._recover_with_credential_pool(
+                        status_code=status_code,
+                        has_retried_429=has_retried_429,
+                    )
+                    if recovered_with_pool:
+                        continue
                    if (
                        self.api_mode == "codex_responses"
                        and self.provider == "openai-codex"
@ -7038,10 +7418,17 @@ class AIAgent:
                        or "quota" in error_msg
                    )
                    if is_rate_limited and self._fallback_index < len(self._fallback_chain):
-                        self._emit_status("⚠️ Rate limited — switching to fallback provider...")
-                        if self._try_activate_fallback():
-                            retry_count = 0
-                            continue
+                        # Don't eagerly fallback if credential pool rotation may
+                        # still recover.  The pool's retry-then-rotate cycle needs
+                        # at least one more attempt to fire — jumping to a fallback
+                        # provider here short-circuits it.
+                        pool = self._credential_pool
+                        pool_may_recover = pool is not None and pool.has_available()
+                        if not pool_may_recover:
+                            self._emit_status("⚠️ Rate limited — switching to fallback provider...")
+                            if self._try_activate_fallback():
+                                retry_count = 0
+                                continue

                    is_payload_too_large = (
                        status_code == 413
@ -7054,6 +7441,7 @@ class AIAgent:
                        compression_attempts += 1
                        if compression_attempts > max_compression_attempts:
                            self._vprint(f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached for payload-too-large error.", force=True)
+                            self._vprint(f"{self.log_prefix}   💡 Try /new to start a fresh conversation, or /compress to retry compression.", force=True)
                            logging.error(f"{self.log_prefix}413 compression failed after {max_compression_attempts} attempts.")
                            self._persist_session(messages, conversation_history)
                            return {
@ -7078,6 +7466,7 @@ class AIAgent:
                            break
                        else:
                            self._vprint(f"{self.log_prefix}❌ Payload too large and cannot compress further.", force=True)
+                            self._vprint(f"{self.log_prefix}   💡 Try /new to start a fresh conversation, or /compress to retry compression.", force=True)
                            logging.error(f"{self.log_prefix}413 payload too large. Cannot compress further.")
                            self._persist_session(messages, conversation_history)
                            return {
@ -7154,6 +7543,7 @@ class AIAgent:
                        compression_attempts += 1
                        if compression_attempts > max_compression_attempts:
                            self._vprint(f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached.", force=True)
+                            self._vprint(f"{self.log_prefix}   💡 Try /new to start a fresh conversation, or /compress to retry compression.", force=True)
                            logging.error(f"{self.log_prefix}Context compression failed after {max_compression_attempts} attempts.")
                            self._persist_session(messages, conversation_history)
                            return {
@ -7180,7 +7570,7 @@ class AIAgent:
                        else:
                            # Can't compress further and already at minimum tier
                            self._vprint(f"{self.log_prefix}❌ Context length exceeded and cannot compress further.", force=True)
-                            self._vprint(f"{self.log_prefix}   💡 The conversation has accumulated too much content.", force=True)
+                            self._vprint(f"{self.log_prefix}   💡 The conversation has accumulated too much content. Try /new to start fresh, or /compress to manually trigger compression.", force=True)
                            logging.error(f"{self.log_prefix}Context length exceeded: {approx_tokens:,} tokens. Cannot compress further.")
                            self._persist_session(messages, conversation_history)
                            return {
@ -7769,6 +8159,10 @@ class AIAgent:
                            approx_tokens=self.context_compressor.last_prompt_tokens,
                            task_id=effective_task_id,
                        )
+                        # Compression created a new session — clear history so
+                        # _flush_messages_to_session_db writes compressed messages
+                        # to the new session (see preflight compression comment).
+                        conversation_history = None
                    
                    # Save session log incrementally (so progress is visible even if interrupted)
                    self._session_messages = messages
@ -8129,9 +8523,9 @@ class AIAgent:

 def main(
    query: str = None,
-    model: str = "anthropic/claude-opus-4.6",
+    model: str = "",
    api_key: str = None,
-    base_url: str = "https://openrouter.ai/api/v1",
+    base_url: str = "",
    max_turns: int = 10,
    enabled_toolsets: str = None,
    disabled_toolsets: str = None,
--- a/scripts/release.py
+++ b/scripts/release.py
@ -24,6 +24,7 @@ import argparse
 import json
 import os
 import re
+import shutil
 import subprocess
 import sys
 from collections import defaultdict
@ -128,6 +129,16 @@ def git(*args, cwd=None):
    return result.stdout.strip()


+def git_result(*args, cwd=None):
+    """Run a git command and return the full CompletedProcess."""
+    return subprocess.run(
+        ["git"] + list(args),
+        capture_output=True,
+        text=True,
+        cwd=cwd or str(REPO_ROOT),
+    )
+
+
 def get_last_tag():
    """Get the most recent CalVer tag."""
    tags = git("tag", "--list", "v20*", "--sort=-v:refname")
@ -136,6 +147,18 @@ def get_last_tag():
    return None


+def next_available_tag(base_tag: str) -> tuple[str, str]:
+    """Return a tag/calver pair, suffixing same-day releases when needed."""
+    if not git("tag", "--list", base_tag):
+        return base_tag, base_tag.removeprefix("v")
+
+    suffix = 2
+    while git("tag", "--list", f"{base_tag}.{suffix}"):
+        suffix += 1
+    tag_name = f"{base_tag}.{suffix}"
+    return tag_name, tag_name.removeprefix("v")
+
+
 def get_current_version():
    """Read current semver from __init__.py."""
    content = VERSION_FILE.read_text()
@ -192,6 +215,41 @@ def update_version_files(semver: str, calver_date: str):
    PYPROJECT_FILE.write_text(pyproject)


+def build_release_artifacts(semver: str) -> list[Path]:
+    """Build sdist/wheel artifacts for the current release.
+
+    Returns the artifact paths when the local environment has ``python -m build``
+    available. If build tooling is missing or the build fails, returns an empty
+    list and lets the release proceed without attached Python artifacts.
+    """
+    dist_dir = REPO_ROOT / "dist"
+    shutil.rmtree(dist_dir, ignore_errors=True)
+
+    result = subprocess.run(
+        [sys.executable, "-m", "build", "--sdist", "--wheel"],
+        cwd=str(REPO_ROOT),
+        capture_output=True,
+        text=True,
+    )
+    if result.returncode != 0:
+        print("  ⚠ Could not build Python release artifacts.")
+        stderr = result.stderr.strip()
+        stdout = result.stdout.strip()
+        if stderr:
+            print(f"    {stderr.splitlines()[-1]}")
+        elif stdout:
+            print(f"    {stdout.splitlines()[-1]}")
+        print("    Install the 'build' package to attach semver-named sdist/wheel assets.")
+        return []
+
+    artifacts = sorted(p for p in dist_dir.iterdir() if p.is_file())
+    matching = [p for p in artifacts if semver in p.name]
+    if not matching:
+        print("  ⚠ Built artifacts did not match the expected release version.")
+        return []
+    return matching
+
+
 def resolve_author(name: str, email: str) -> str:
    """Resolve a git author to a GitHub @mention."""
    # Try email lookup first
@ -424,18 +482,10 @@ def main():
        now = datetime.now()
        calver_date = f"{now.year}.{now.month}.{now.day}"

-    tag_name = f"v{calver_date}"
-
-    # Check for existing tag with same date
-    existing = git("tag", "--list", tag_name)
-    if existing and not args.publish:
-        # Append a suffix for same-day releases
-        suffix = 2
-        while git("tag", "--list", f"{tag_name}.{suffix}"):
-            suffix += 1
-        tag_name = f"{tag_name}.{suffix}"
-        calver_date = f"{calver_date}.{suffix}"
-        print(f"Note: Tag {tag_name[:-2]} already exists, using {tag_name}")
+    base_tag = f"v{calver_date}"
+    tag_name, calver_date = next_available_tag(base_tag)
+    if tag_name != base_tag:
+        print(f"Note: Tag {base_tag} already exists, using {tag_name}")

    # Determine semver
    current_version = get_current_version()
@ -494,41 +544,83 @@ def main():
            print(f"  ✓ Updated version files to v{new_version} ({calver_date})")

            # Commit version bump
-            git("add", str(VERSION_FILE), str(PYPROJECT_FILE))
-            git("commit", "-m", f"chore: bump version to v{new_version} ({calver_date})")
+            add_result = git_result("add", str(VERSION_FILE), str(PYPROJECT_FILE))
+            if add_result.returncode != 0:
+                print(f"  ✗ Failed to stage version files: {add_result.stderr.strip()}")
+                return
+
+            commit_result = git_result(
+                "commit", "-m", f"chore: bump version to v{new_version} ({calver_date})"
+            )
+            if commit_result.returncode != 0:
+                print(f"  ✗ Failed to commit version bump: {commit_result.stderr.strip()}")
+                return
            print(f"  ✓ Committed version bump")

        # Create annotated tag
-        git("tag", "-a", tag_name, "-m",
-            f"Hermes Agent v{new_version} ({calver_date})\n\nWeekly release")
+        tag_result = git_result(
+            "tag", "-a", tag_name, "-m",
+            f"Hermes Agent v{new_version} ({calver_date})\n\nWeekly release"
+        )
+        if tag_result.returncode != 0:
+            print(f"  ✗ Failed to create tag {tag_name}: {tag_result.stderr.strip()}")
+            return
        print(f"  ✓ Created tag {tag_name}")

        # Push
-        push_result = git("push", "origin", "HEAD", "--tags")
-        print(f"  ✓ Pushed to origin")
+        push_result = git_result("push", "origin", "HEAD", "--tags")
+        if push_result.returncode == 0:
+            print(f"  ✓ Pushed to origin")
+        else:
+            print(f"  ✗ Failed to push to origin: {push_result.stderr.strip()}")
+            print("    Continue manually after fixing access:")
+            print("    git push origin HEAD --tags")
+
+        # Build semver-named Python artifacts so downstream packagers
+        # (e.g. Homebrew) can target them without relying on CalVer tag names.
+        artifacts = build_release_artifacts(new_version)
+        if artifacts:
+            print("  ✓ Built release artifacts:")
+            for artifact in artifacts:
+                print(f"    - {artifact.relative_to(REPO_ROOT)}")

        # Create GitHub release
        changelog_file = REPO_ROOT / ".release_notes.md"
        changelog_file.write_text(changelog)

-        result = subprocess.run(
-            ["gh", "release", "create", tag_name,
-             "--title", f"Hermes Agent v{new_version} ({calver_date})",
-             "--notes-file", str(changelog_file)],
-            capture_output=True, text=True,
-            cwd=str(REPO_ROOT),
-        )
+        gh_cmd = [
+            "gh", "release", "create", tag_name,
+            "--title", f"Hermes Agent v{new_version} ({calver_date})",
+            "--notes-file", str(changelog_file),
+        ]
+        gh_cmd.extend(str(path) for path in artifacts)

-        changelog_file.unlink(missing_ok=True)
-
-        if result.returncode == 0:
-            print(f"  ✓ GitHub release created: {result.stdout.strip()}")
+        gh_bin = shutil.which("gh")
+        if gh_bin:
+            result = subprocess.run(
+                gh_cmd,
+                capture_output=True, text=True,
+                cwd=str(REPO_ROOT),
+            )
        else:
-            print(f"  ✗ GitHub release failed: {result.stderr}")
-            print(f"    Tag was created. Create the release manually:")
-            print(f"    gh release create {tag_name} --title 'Hermes Agent v{new_version} ({calver_date})'")
+            result = None

-        print(f"\n  🎉 Release v{new_version} ({tag_name}) published!")
+        if result and result.returncode == 0:
+            changelog_file.unlink(missing_ok=True)
+            print(f"  ✓ GitHub release created: {result.stdout.strip()}")
+            print(f"\n  🎉 Release v{new_version} ({tag_name}) published!")
+        else:
+            if result is None:
+                print("  ✗ GitHub release skipped: `gh` CLI not found.")
+            else:
+                print(f"  ✗ GitHub release failed: {result.stderr.strip()}")
+            print(f"    Release notes kept at: {changelog_file}")
+            print(f"    Tag was created locally. Create the release manually:")
+            print(
+                f"    gh release create {tag_name} --title 'Hermes Agent v{new_version} ({calver_date})' "
+                f"--notes-file .release_notes.md {' '.join(str(path) for path in artifacts)}"
+            )
+            print(f"\n  ✓ Release artifacts prepared for manual publish: v{new_version} ({tag_name})")
    else:
        print(f"\n{'='*60}")
        print(f"  Dry run complete. To publish, add --publish")
--- a/scripts/whatsapp-bridge/allowlist.js
+++ b/scripts/whatsapp-bridge/allowlist.js
@ -68,6 +68,11 @@ export function matchesAllowedUser(senderId, allowedUsers, sessionDir) {
    return true;
  }

+  // "*" means allow everyone (consistent with SIGNAL_GROUP_ALLOWED_USERS)
+  if (allowedUsers.has('*')) {
+    return true;
+  }
+
  const aliases = expandWhatsAppIdentifiers(senderId, sessionDir);
  for (const alias of aliases) {
    if (allowedUsers.has(alias)) {
--- a/scripts/whatsapp-bridge/allowlist.test.mjs
+++ b/scripts/whatsapp-bridge/allowlist.test.mjs
@ -45,3 +45,15 @@ test('matchesAllowedUser accepts mapped lid sender when allowlist only contains
    rmSync(sessionDir, { recursive: true, force: true });
  }
 });
+
+test('matchesAllowedUser treats * as allow-all wildcard', () => {
+  const sessionDir = mkdtempSync(path.join(os.tmpdir(), 'hermes-wa-allowlist-'));
+
+  try {
+    const allowedUsers = parseAllowedUsers('*');
+    assert.equal(matchesAllowedUser('19175395595@s.whatsapp.net', allowedUsers, sessionDir), true);
+    assert.equal(matchesAllowedUser('267383306489914@lid', allowedUsers, sessionDir), true);
+  } finally {
+    rmSync(sessionDir, { recursive: true, force: true });
+  }
+});
--- a/skills/autonomous-ai-agents/hermes-agent/SKILL.md
+++ b/skills/autonomous-ai-agents/hermes-agent/SKILL.md
@ -1,203 +1,655 @@
 ---
-name: hermes-agent-spawning
-description: Spawn additional Hermes Agent instances as autonomous subprocesses for independent long-running tasks. Supports non-interactive one-shot mode (-q) and interactive PTY mode for multi-turn collaboration. Different from delegate_task — this runs a full separate hermes process.
-version: 1.1.0
-author: Hermes Agent
+name: hermes-agent
+description: Complete guide to using and extending Hermes Agent — CLI usage, setup, configuration, spawning additional agents, gateway platforms, skills, voice, tools, profiles, and a concise contributor reference. Load this skill when helping users configure Hermes, troubleshoot issues, spawn agent instances, or make code contributions.
+version: 2.0.0
+author: Hermes Agent + Teknium
 license: MIT
 metadata:
  hermes:
-    tags: [Agent, Hermes, Multi-Agent, Orchestration, Subprocess, Interactive]
+    tags: [hermes, setup, configuration, multi-agent, spawning, cli, gateway, development]
    homepage: https://github.com/NousResearch/hermes-agent
-    related_skills: [claude-code, codex]
+    related_skills: [claude-code, codex, opencode]
 ---

-# Spawning Hermes Agent Instances
+# Hermes Agent

-Run additional Hermes Agent processes as autonomous subprocesses. Unlike `delegate_task` (which spawns lightweight subagents sharing the same process), this launches fully independent `hermes` CLI processes with their own sessions, tools, and terminal environments.
+Hermes Agent is an open-source AI agent framework by Nous Research that runs in your terminal, messaging platforms, and IDEs. It belongs to the same category as Claude Code (Anthropic), Codex (OpenAI), and OpenClaw — autonomous coding and task-execution agents that use tool calling to interact with your system. Hermes works with any LLM provider (OpenRouter, Anthropic, OpenAI, DeepSeek, local models, and 15+ others) and runs on Linux, macOS, and WSL.

-## When to Use This vs delegate_task
+What makes Hermes different:

-| Feature | `delegate_task` | Spawning `hermes` process |
-|---------|-----------------|--------------------------|
-| Context isolation | Separate conversation, shared process | Fully independent process |
-| Tool access | Subset of parent's tools | Full tool access (all toolsets) |
-| Session persistence | Ephemeral (no DB entry) | Full session logging + DB |
-| Duration | Minutes (bounded by parent's loop) | Hours/days (runs independently) |
-| Monitoring | Parent waits for result | Background process, monitor via `process` tool |
-| Interactive | No | Yes (PTY mode supports back-and-forth) |
-| Use case | Quick parallel subtasks | Long autonomous missions, interactive collaboration |
+- **Self-improving through skills** — Hermes learns from experience by saving reusable procedures as skills. When it solves a complex problem, discovers a workflow, or gets corrected, it can persist that knowledge as a skill document that loads into future sessions. Skills accumulate over time, making the agent better at your specific tasks and environment.
+- **Persistent memory across sessions** — remembers who you are, your preferences, environment details, and lessons learned. Pluggable memory backends (built-in, Honcho, Mem0, and more) let you choose how memory works.
+- **Multi-platform gateway** — the same agent runs on Telegram, Discord, Slack, WhatsApp, Signal, Matrix, Email, and 8+ other platforms with full tool access, not just chat.
+- **Provider-agnostic** — swap models and providers mid-workflow without changing anything else. Credential pools rotate across multiple API keys automatically.
+- **Profiles** — run multiple independent Hermes instances with isolated configs, sessions, skills, and memory.
+- **Extensible** — plugins, MCP servers, custom tools, webhook triggers, cron scheduling, and the full Python ecosystem.

-## Prerequisites
+People use Hermes for software development, research, system administration, data analysis, content creation, home automation, and anything else that benefits from an AI agent with persistent context and full system access.

- `hermes` CLI installed and on PATH
- API key configured in `~/.hermes/.env`
+**This skill helps you work with Hermes Agent effectively** — setting it up, configuring features, spawning additional agent instances, troubleshooting issues, finding the right commands and settings, and understanding how the system works when you need to extend or contribute to it.

-### Installation
+**Docs:** https://hermes-agent.nousresearch.com/docs/

-Requires an interactive shell (the installer runs a setup wizard):
+## Quick Start

-```
+```bash
+# Install
 curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash
+
+# Interactive chat (default)
+hermes
+
+# Single query
+hermes chat -q "What is the capital of France?"
+
+# Setup wizard
+hermes setup
+
+# Change model/provider
+hermes model
+
+# Check health
+hermes doctor
 ```

-This installs uv, Python 3.11, clones the repo, sets up the venv, and launches an interactive setup wizard to configure your API provider and model. See the [GitHub repo](https://github.com/NousResearch/hermes-agent) for details.
+---

-## Resuming Previous Sessions
+## CLI Reference

-Resume a prior CLI session instead of starting fresh. Useful for continuing long tasks across process restarts:
+### Global Flags

 ```
-# Resume the most recent CLI session
-terminal(command="hermes --continue", background=true, pty=true)
+hermes [flags] [command]

-# Resume a specific session by ID (shown on exit)
-terminal(command="hermes --resume 20260225_143052_a1b2c3", background=true, pty=true)
+  --version, -V             Show version
+  --resume, -r SESSION      Resume session by ID or title
+  --continue, -c [NAME]     Resume by name, or most recent session
+  --worktree, -w            Isolated git worktree mode (parallel agents)
+  --skills, -s SKILL        Preload skills (comma-separate or repeat)
+  --profile, -p NAME        Use a named profile
+  --yolo                    Skip dangerous command approval
+  --pass-session-id         Include session ID in system prompt
 ```

-The full conversation history (messages, tool calls, responses) is restored from SQLite. The agent sees everything from the previous session.
+No subcommand defaults to `chat`.

-## Mode 1: One-Shot Query (-q flag)
-
-Run a single query non-interactively. The agent executes, does its work, and exits:
+### Chat

 ```
-terminal(command="hermes chat -q 'Research the latest GRPO training papers and write a summary to ~/research/grpo.md'", timeout=300)
+hermes chat [flags]
+  -q, --query TEXT          Single query, non-interactive
+  -m, --model MODEL         Model (e.g. anthropic/claude-sonnet-4)
+  -t, --toolsets LIST       Comma-separated toolsets
+  --provider PROVIDER       Force provider (openrouter, anthropic, nous, etc.)
+  -v, --verbose             Verbose output
+  -Q, --quiet               Suppress banner, spinner, tool previews
+  --checkpoints             Enable filesystem checkpoints (/rollback)
+  --source TAG              Session source tag (default: cli)
 ```

-Background for long tasks:
+### Configuration
+
 ```
+hermes setup [section]      Interactive wizard (model|terminal|gateway|tools|agent)
+hermes model                Interactive model/provider picker
+hermes config               View current config
+hermes config edit          Open config.yaml in $EDITOR
+hermes config set KEY VAL   Set a config value
+hermes config path          Print config.yaml path
+hermes config env-path      Print .env path
+hermes config check         Check for missing/outdated config
+hermes config migrate       Update config with new options
+hermes login [--provider P] OAuth login (nous, openai-codex)
+hermes logout               Clear stored auth
+hermes doctor [--fix]       Check dependencies and config
+hermes status [--all]       Show component status
+```
+
+### Tools & Skills
+
+```
+hermes tools                Interactive tool enable/disable (curses UI)
+hermes tools list           Show all tools and status
+hermes tools enable NAME    Enable a toolset
+hermes tools disable NAME   Disable a toolset
+
+hermes skills list          List installed skills
+hermes skills search QUERY  Search the skills hub
+hermes skills install ID    Install a skill
+hermes skills inspect ID    Preview without installing
+hermes skills config        Enable/disable skills per platform
+hermes skills check         Check for updates
+hermes skills update        Update outdated skills
+hermes skills uninstall N   Remove a hub skill
+hermes skills publish PATH  Publish to registry
+hermes skills browse        Browse all available skills
+hermes skills tap add REPO  Add a GitHub repo as skill source
+```
+
+### MCP Servers
+
+```
+hermes mcp serve            Run Hermes as an MCP server
+hermes mcp add NAME         Add an MCP server (--url or --command)
+hermes mcp remove NAME      Remove an MCP server
+hermes mcp list             List configured servers
+hermes mcp test NAME        Test connection
+hermes mcp configure NAME   Toggle tool selection
+```
+
+### Gateway (Messaging Platforms)
+
+```
+hermes gateway run          Start gateway foreground
+hermes gateway install      Install as background service
+hermes gateway start/stop   Control the service
+hermes gateway restart      Restart the service
+hermes gateway status       Check status
+hermes gateway setup        Configure platforms
+```
+
+Supported platforms: Telegram, Discord, Slack, WhatsApp, Signal, Email, SMS, Matrix, Mattermost, Home Assistant, DingTalk, Feishu, WeCom, API Server, Webhooks, Open WebUI.
+
+Platform docs: https://hermes-agent.nousresearch.com/docs/user-guide/messaging/
+
+### Sessions
+
+```
+hermes sessions list        List recent sessions
+hermes sessions browse      Interactive picker
+hermes sessions export OUT  Export to JSONL
+hermes sessions rename ID T Rename a session
+hermes sessions delete ID   Delete a session
+hermes sessions prune       Clean up old sessions (--older-than N days)
+hermes sessions stats       Session store statistics
+```
+
+### Cron Jobs
+
+```
+hermes cron list            List jobs (--all for disabled)
+hermes cron create SCHED    Create: '30m', 'every 2h', '0 9 * * *'
+hermes cron edit ID         Edit schedule, prompt, delivery
+hermes cron pause/resume ID Control job state
+hermes cron run ID          Trigger on next tick
+hermes cron remove ID       Delete a job
+hermes cron status          Scheduler status
+```
+
+### Webhooks
+
+```
+hermes webhook subscribe N  Create route at /webhooks/<name>
+hermes webhook list         List subscriptions
+hermes webhook remove NAME  Remove a subscription
+hermes webhook test NAME    Send a test POST
+```
+
+### Profiles
+
+```
+hermes profile list         List all profiles
+hermes profile create NAME  Create (--clone, --clone-all, --clone-from)
+hermes profile use NAME     Set sticky default
+hermes profile delete NAME  Delete a profile
+hermes profile show NAME    Show details
+hermes profile alias NAME   Manage wrapper scripts
+hermes profile rename A B   Rename a profile
+hermes profile export NAME  Export to tar.gz
+hermes profile import FILE  Import from archive
+```
+
+### Credential Pools
+
+```
+hermes auth add             Interactive credential wizard
+hermes auth list [PROVIDER] List pooled credentials
+hermes auth remove P INDEX  Remove by provider + index
+hermes auth reset PROVIDER  Clear exhaustion status
+```
+
+### Other
+
+```
+hermes insights [--days N]  Usage analytics
+hermes update               Update to latest version
+hermes pairing list/approve/revoke  DM authorization
+hermes plugins list/install/remove  Plugin management
+hermes honcho setup/status  Honcho memory integration
+hermes memory setup/status/off  Memory provider config
+hermes completion bash|zsh  Shell completions
+hermes acp                  ACP server (IDE integration)
+hermes claw migrate         Migrate from OpenClaw
+hermes uninstall            Uninstall Hermes
+```
+
+---
+
+## Slash Commands (In-Session)
+
+Type these during an interactive chat session.
+
+### Session Control
+```
+/new (/reset)        Fresh session
+/clear               Clear screen + new session (CLI)
+/retry               Resend last message
+/undo                Remove last exchange
+/title [name]        Name the session
+/compress            Manually compress context
+/stop                Kill background processes
+/rollback [N]        Restore filesystem checkpoint
+/background <prompt> Run prompt in background
+/queue <prompt>      Queue for next turn
+/resume [name]       Resume a named session
+```
+
+### Configuration
+```
+/config              Show config (CLI)
+/model [name]        Show or change model
+/provider            Show provider info
+/prompt [text]       View/set system prompt (CLI)
+/personality [name]  Set personality
+/reasoning [level]   Set reasoning (none|low|medium|high|xhigh|show|hide)
+/verbose             Cycle: off → new → all → verbose
+/voice [on|off|tts]  Voice mode
+/yolo                Toggle approval bypass
+/skin [name]         Change theme (CLI)
+/statusbar           Toggle status bar (CLI)
+```
+
+### Tools & Skills
+```
+/tools               Manage tools (CLI)
+/toolsets            List toolsets (CLI)
+/skills              Search/install skills (CLI)
+/skill <name>        Load a skill into session
+/cron                Manage cron jobs (CLI)
+/reload-mcp          Reload MCP servers
+/plugins             List plugins (CLI)
+```
+
+### Info
+```
+/help                Show commands
+/commands [page]     Browse all commands (gateway)
+/usage               Token usage
+/insights [days]     Usage analytics
+/status              Session info (gateway)
+/profile             Active profile info
+```
+
+### Exit
+```
+/quit (/exit, /q)    Exit CLI
+```
+
+---
+
+## Key Paths & Config
+
+```
+~/.hermes/config.yaml       Main configuration
+~/.hermes/.env              API keys and secrets
+~/.hermes/skills/           Installed skills
+~/.hermes/sessions/         Session transcripts
+~/.hermes/logs/             Gateway and error logs
+~/.hermes/auth.json         OAuth tokens and credential pools
+~/.hermes/hermes-agent/     Source code (if git-installed)
+```
+
+Profiles use `~/.hermes/profiles/<name>/` with the same layout.
+
+### Config Sections
+
+Edit with `hermes config edit` or `hermes config set section.key value`.
+
+| Section | Key options |
+|---------|-------------|
+| `model` | `default`, `provider`, `base_url`, `api_key`, `context_length` |
+| `agent` | `max_turns` (90), `tool_use_enforcement` |
+| `terminal` | `backend` (local/docker/ssh/modal), `cwd`, `timeout` (180) |
+| `compression` | `enabled`, `threshold` (0.50), `target_ratio` (0.20) |
+| `display` | `skin`, `tool_progress`, `show_reasoning`, `show_cost` |
+| `stt` | `enabled`, `provider` (local/groq/openai) |
+| `tts` | `provider` (edge/elevenlabs/openai/kokoro/fish) |
+| `memory` | `memory_enabled`, `user_profile_enabled`, `provider` |
+| `security` | `tirith_enabled`, `website_blocklist` |
+| `delegation` | `model`, `provider`, `max_iterations` (50) |
+| `smart_model_routing` | `enabled`, `cheap_model` |
+| `checkpoints` | `enabled`, `max_snapshots` (50) |
+
+Full config reference: https://hermes-agent.nousresearch.com/docs/user-guide/configuration
+
+### Providers
+
+18 providers supported. Set via `hermes model` or `hermes setup`.
+
+| Provider | Auth | Key env var |
+|----------|------|-------------|
+| OpenRouter | API key | `OPENROUTER_API_KEY` |
+| Anthropic | API key | `ANTHROPIC_API_KEY` |
+| Nous Portal | OAuth | `hermes login --provider nous` |
+| OpenAI Codex | OAuth | `hermes login --provider openai-codex` |
+| GitHub Copilot | Token | `COPILOT_GITHUB_TOKEN` |
+| DeepSeek | API key | `DEEPSEEK_API_KEY` |
+| Hugging Face | Token | `HF_TOKEN` |
+| Z.AI / GLM | API key | `GLM_API_KEY` |
+| MiniMax | API key | `MINIMAX_API_KEY` |
+| Kimi / Moonshot | API key | `KIMI_API_KEY` |
+| Alibaba / DashScope | API key | `DASHSCOPE_API_KEY` |
+| Kilo Code | API key | `KILOCODE_API_KEY` |
+| Custom endpoint | Config | `model.base_url` + `model.api_key` in config.yaml |
+
+Plus: AI Gateway, OpenCode Zen, OpenCode Go, MiniMax CN, GitHub Copilot ACP.
+
+Full provider docs: https://hermes-agent.nousresearch.com/docs/integrations/providers
+
+### Toolsets
+
+Enable/disable via `hermes tools` (interactive) or `hermes tools enable/disable NAME`.
+
+| Toolset | What it provides |
+|---------|-----------------|
+| `web` | Web search and content extraction |
+| `browser` | Browser automation (Browserbase, Camofox, or local Chromium) |
+| `terminal` | Shell commands and process management |
+| `file` | File read/write/search/patch |
+| `code_execution` | Sandboxed Python execution |
+| `vision` | Image analysis |
+| `image_gen` | AI image generation |
+| `tts` | Text-to-speech |
+| `skills` | Skill browsing and management |
+| `memory` | Persistent cross-session memory |
+| `session_search` | Search past conversations |
+| `delegation` | Subagent task delegation |
+| `cronjob` | Scheduled task management |
+| `clarify` | Ask user clarifying questions |
+| `moa` | Mixture of Agents (off by default) |
+| `homeassistant` | Smart home control (off by default) |
+
+Tool changes take effect on `/reset` (new session). They do NOT apply mid-conversation to preserve prompt caching.
+
+---
+
+## Voice & Transcription
+
+### STT (Voice → Text)
+
+Voice messages from messaging platforms are auto-transcribed.
+
+Provider priority (auto-detected):
+1. **Local faster-whisper** — free, no API key: `pip install faster-whisper`
+2. **Groq Whisper** — free tier: set `GROQ_API_KEY`
+3. **OpenAI Whisper** — paid: set `VOICE_TOOLS_OPENAI_KEY`
+
+Config:
+```yaml
+stt:
+  enabled: true
+  provider: local        # local, groq, openai
+  local:
+    model: base          # tiny, base, small, medium, large-v3
+```
+
+### TTS (Text → Voice)
+
+| Provider | Env var | Free? |
+|----------|---------|-------|
+| Edge TTS | None | Yes (default) |
+| ElevenLabs | `ELEVENLABS_API_KEY` | Free tier |
+| OpenAI | `VOICE_TOOLS_OPENAI_KEY` | Paid |
+| Kokoro (local) | None | Free |
+| Fish Audio | `FISH_AUDIO_API_KEY` | Free tier |
+
+Voice commands: `/voice on` (voice-to-voice), `/voice tts` (always voice), `/voice off`.
+
+---
+
+## Spawning Additional Hermes Instances
+
+Run additional Hermes processes as fully independent subprocesses — separate sessions, tools, and environments.
+
+### When to Use This vs delegate_task
+
+| | `delegate_task` | Spawning `hermes` process |
+|-|-----------------|--------------------------|
+| Isolation | Separate conversation, shared process | Fully independent process |
+| Duration | Minutes (bounded by parent loop) | Hours/days |
+| Tool access | Subset of parent's tools | Full tool access |
+| Interactive | No | Yes (PTY mode) |
+| Use case | Quick parallel subtasks | Long autonomous missions |
+
+### One-Shot Mode
+
+```
+terminal(command="hermes chat -q 'Research GRPO papers and write summary to ~/research/grpo.md'", timeout=300)
+
+# Background for long tasks:
 terminal(command="hermes chat -q 'Set up CI/CD for ~/myapp'", background=true)
-# Returns session_id, monitor with process tool
 ```

-## Mode 2: Interactive PTY Session
+### Interactive PTY Mode (via tmux)

-Launch a full interactive Hermes session with PTY for back-and-forth collaboration. You can send messages, review its work, give feedback, and steer it.
-
-Note: Hermes uses prompt_toolkit for its CLI UI. Through a PTY, this works because ptyprocess provides a real terminal — input sent via `submit` arrives as keystrokes. The output log will contain ANSI escape sequences from the UI rendering — focus on the text content, not the formatting.
+Hermes uses prompt_toolkit, which requires a real terminal. Use tmux for interactive spawning:

 ```
-# Start interactive hermes in background with PTY
-terminal(command="hermes", workdir="~/project", background=true, pty=true)
-# Returns session_id
+# Start
+terminal(command="tmux new-session -d -s agent1 -x 120 -y 40 'hermes'", timeout=10)

-# Send it a task
-process(action="submit", session_id="<id>", data="Set up a Python project with FastAPI, add auth endpoints, and write tests")
-
-# Wait for it to work, then check progress
-process(action="log", session_id="<id>")
-
-# Give feedback on what it produced
-process(action="submit", session_id="<id>", data="The tests look good but add edge cases for invalid tokens")
-
-# Check its response
-process(action="log", session_id="<id>")
-
-# Ask it to iterate
-process(action="submit", session_id="<id>", data="Now add rate limiting middleware")
-
-# When done, exit the session
-process(action="submit", session_id="<id>", data="/exit")
-```
-
-### Interactive Collaboration Patterns
-
-**Code review loop** — spawn hermes, send code for review, iterate on feedback:
-```
-terminal(command="hermes", workdir="~/project", background=true, pty=true)
-process(action="submit", session_id="<id>", data="Review the changes in src/auth.py and suggest improvements")
-# ... read its review ...
-process(action="submit", session_id="<id>", data="Good points. Go ahead and implement suggestions 1 and 3")
-# ... it makes changes ...
-process(action="submit", session_id="<id>", data="Run the tests to make sure nothing broke")
-```
-
-**Research with steering** — start broad, narrow down based on findings:
-```
-terminal(command="hermes", background=true, pty=true)
-process(action="submit", session_id="<id>", data="Search for the latest papers on KV cache compression techniques")
-# ... read its findings ...
-process(action="submit", session_id="<id>", data="The MQA approach looks promising. Dig deeper into that one and compare with GQA")
-# ... more detailed research ...
-process(action="submit", session_id="<id>", data="Write up everything you found to ~/research/kv-cache-compression.md")
-```
-
-**Multi-agent coordination** — spawn two agents working on related tasks, pass context between them:
-```
-# Agent A: backend
-terminal(command="hermes", workdir="~/project/backend", background=true, pty=true)
-process(action="submit", session_id="<agent-a>", data="Build a REST API for user management with CRUD endpoints")
-
-# Agent B: frontend
-terminal(command="hermes", workdir="~/project/frontend", background=true, pty=true)
-process(action="submit", session_id="<agent-b>", data="Build a React dashboard that will connect to a REST API at localhost:8000/api/users")
-
-# Check Agent A's progress, relay API schema to Agent B
-process(action="log", session_id="<agent-a>")
-process(action="submit", session_id="<agent-b>", data="Here's the API schema Agent A built: GET /api/users, POST /api/users, etc. Update your fetch calls to match.")
-```
-
-## Parallel Non-Interactive Instances
-
-Spawn multiple independent agents for unrelated tasks:
-
-```
-terminal(command="hermes chat -q 'Research competitor landing pages and write a report to ~/research/competitors.md'", background=true)
-terminal(command="hermes chat -q 'Audit security of ~/myapp and write findings to ~/myapp/SECURITY_AUDIT.md'", background=true)
-process(action="list")
-```
-
-## With Custom Model
-
-```
-terminal(command="hermes chat -q 'Summarize this codebase' --model google/gemini-2.5-pro", workdir="~/project", background=true)
-```
-
-## Gateway Cron Integration
-
-For scheduled autonomous tasks, use the unified `cronjob` tool instead of spawning processes — cron jobs handle delivery, retry, and persistence automatically.
-
-## Key Differences Between Modes
-
-| | `-q` (one-shot) | Interactive (PTY) | `--continue` / `--resume` |
-|---|---|---|---|
-| User interaction | None | Full back-and-forth | Full back-and-forth |
-| PTY required | No | Yes (`pty=true`) | Yes (`pty=true`) |
-| Multi-turn | Single query | Unlimited turns | Continues previous turns |
-| Best for | Fire-and-forget tasks | Iterative work, steering | Picking up where you left off |
-| Exit | Automatic after completion | Send `/exit` or kill | Send `/exit` or kill |
-
-## Known Issues
-
- **Interactive PTY + prompt_toolkit**: The `submit` action sends `\n` (line feed) but prompt_toolkit in raw mode expects `\r` (carriage return) for Enter. Text appears in the prompt but never submits. **Workaround**: Use **tmux** instead of raw PTY mode. tmux's `send-keys Enter` sends the correct `\r`:
-
-```
-# Start hermes inside tmux
-tmux new-session -d -s hermes-session -x 120 -y 40 "hermes"
-sleep 10  # Wait for banner/startup
-
-# Send messages
-tmux send-keys -t hermes-session "your message here" Enter
+# Wait for startup, then send a message
+terminal(command="sleep 8 && tmux send-keys -t agent1 'Build a FastAPI auth service' Enter", timeout=15)

 # Read output
-sleep 15  # Wait for LLM response
-tmux capture-pane -t hermes-session -p
+terminal(command="sleep 20 && tmux capture-pane -t agent1 -p", timeout=5)

-# Multi-turn: just send more messages and capture again
-tmux send-keys -t hermes-session "follow-up message" Enter
+# Send follow-up
+terminal(command="tmux send-keys -t agent1 'Add rate limiting middleware' Enter", timeout=5)

-# Exit when done
-tmux send-keys -t hermes-session "/exit" Enter
-tmux kill-session -t hermes-session
+# Exit
+terminal(command="tmux send-keys -t agent1 '/exit' Enter && sleep 2 && tmux kill-session -t agent1", timeout=10)
 ```

-## Rules
+### Multi-Agent Coordination

-1. **Use `-q` for autonomous tasks** — agent works independently and exits
-2. **Use `pty=true` for interactive sessions** — required for the full CLI UI
-3. **Use `submit` not `write`** — `submit` adds a newline (Enter), `write` doesn't
-4. **Read logs before sending more** — check what the agent produced before giving next instruction
-5. **Set timeouts for `-q` mode** — complex tasks may take 5-10 minutes
-6. **Prefer `delegate_task` for quick subtasks** — spawning a full process has more overhead
-7. **Each instance is independent** — they don't share conversation context with the parent
-8. **Check results** — after completion, read the output files or logs the agent produced
+```
+# Agent A: backend
+terminal(command="tmux new-session -d -s backend -x 120 -y 40 'hermes -w'", timeout=10)
+terminal(command="sleep 8 && tmux send-keys -t backend 'Build REST API for user management' Enter", timeout=15)
+
+# Agent B: frontend
+terminal(command="tmux new-session -d -s frontend -x 120 -y 40 'hermes -w'", timeout=10)
+terminal(command="sleep 8 && tmux send-keys -t frontend 'Build React dashboard for user management' Enter", timeout=15)
+
+# Check progress, relay context between them
+terminal(command="tmux capture-pane -t backend -p | tail -30", timeout=5)
+terminal(command="tmux send-keys -t frontend 'Here is the API schema from the backend agent: ...' Enter", timeout=5)
+```
+
+### Session Resume
+
+```
+# Resume most recent session
+terminal(command="tmux new-session -d -s resumed 'hermes --continue'", timeout=10)
+
+# Resume specific session
+terminal(command="tmux new-session -d -s resumed 'hermes --resume 20260225_143052_a1b2c3'", timeout=10)
+```
+
+### Tips
+
+- **Prefer `delegate_task` for quick subtasks** — less overhead than spawning a full process
+- **Use `-w` (worktree mode)** when spawning agents that edit code — prevents git conflicts
+- **Set timeouts** for one-shot mode — complex tasks can take 5-10 minutes
+- **Use `hermes chat -q` for fire-and-forget** — no PTY needed
+- **Use tmux for interactive sessions** — raw PTY mode has `\r` vs `\n` issues with prompt_toolkit
+- **For scheduled tasks**, use the `cronjob` tool instead of spawning — handles delivery and retry
+
+---
+
+## Troubleshooting
+
+### Voice not working
+1. Check `stt.enabled: true` in config.yaml
+2. Verify provider: `pip install faster-whisper` or set API key
+3. Restart gateway: `/restart`
+
+### Tool not available
+1. `hermes tools` — check if toolset is enabled for your platform
+2. Some tools need env vars (check `.env`)
+3. `/reset` after enabling tools
+
+### Model/provider issues
+1. `hermes doctor` — check config and dependencies
+2. `hermes login` — re-authenticate OAuth providers
+3. Check `.env` has the right API key
+
+### Changes not taking effect
+- **Tools/skills:** `/reset` starts a new session with updated toolset
+- **Config changes:** `/restart` reloads gateway config
+- **Code changes:** Restart the CLI or gateway process
+
+### Skills not showing
+1. `hermes skills list` — verify installed
+2. `hermes skills config` — check platform enablement
+3. Load explicitly: `/skill name` or `hermes -s name`
+
+### Gateway issues
+Check logs first:
+```bash
+grep -i "failed to send\|error" ~/.hermes/logs/gateway.log | tail -20
+```
+
+---
+
+## Where to Find Things
+
+| Looking for... | Location |
+|----------------|----------|
+| Config options | `hermes config edit` or [Configuration docs](https://hermes-agent.nousresearch.com/docs/user-guide/configuration) |
+| Available tools | `hermes tools list` or [Tools reference](https://hermes-agent.nousresearch.com/docs/reference/tools-reference) |
+| Slash commands | `/help` in session or [Slash commands reference](https://hermes-agent.nousresearch.com/docs/reference/slash-commands) |
+| Skills catalog | `hermes skills browse` or [Skills catalog](https://hermes-agent.nousresearch.com/docs/reference/skills-catalog) |
+| Provider setup | `hermes model` or [Providers guide](https://hermes-agent.nousresearch.com/docs/integrations/providers) |
+| Platform setup | `hermes gateway setup` or [Messaging docs](https://hermes-agent.nousresearch.com/docs/user-guide/messaging/) |
+| MCP servers | `hermes mcp list` or [MCP guide](https://hermes-agent.nousresearch.com/docs/user-guide/features/mcp) |
+| Profiles | `hermes profile list` or [Profiles docs](https://hermes-agent.nousresearch.com/docs/user-guide/profiles) |
+| Cron jobs | `hermes cron list` or [Cron docs](https://hermes-agent.nousresearch.com/docs/user-guide/features/cron) |
+| Memory | `hermes memory status` or [Memory docs](https://hermes-agent.nousresearch.com/docs/user-guide/features/memory) |
+| Env variables | `hermes config env-path` or [Env vars reference](https://hermes-agent.nousresearch.com/docs/reference/environment-variables) |
+| CLI commands | `hermes --help` or [CLI reference](https://hermes-agent.nousresearch.com/docs/reference/cli-commands) |
+| Gateway logs | `~/.hermes/logs/gateway.log` |
+| Session files | `~/.hermes/sessions/` or `hermes sessions browse` |
+| Source code | `~/.hermes/hermes-agent/` |
+
+---
+
+## Contributor Quick Reference
+
+For occasional contributors and PR authors. Full developer docs: https://hermes-agent.nousresearch.com/docs/developer-guide/
+
+### Project Layout
+
+```
+hermes-agent/
+├── run_agent.py          # AIAgent — core conversation loop
+├── model_tools.py        # Tool discovery and dispatch
+├── toolsets.py           # Toolset definitions
+├── cli.py                # Interactive CLI (HermesCLI)
+├── hermes_state.py       # SQLite session store
+├── agent/                # Prompt builder, compression, display, adapters
+├── hermes_cli/           # CLI subcommands, config, setup, commands
+│   ├── commands.py       # Slash command registry (CommandDef)
+│   ├── config.py         # DEFAULT_CONFIG, env var definitions
+│   └── main.py           # CLI entry point and argparse
+├── tools/                # One file per tool
+│   └── registry.py       # Central tool registry
+├── gateway/              # Messaging gateway
+│   └── platforms/        # Platform adapters (telegram, discord, etc.)
+├── cron/                 # Job scheduler
+├── tests/                # ~3000 pytest tests
+└── website/              # Docusaurus docs site
+```
+
+Config: `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys).
+
+### Adding a Tool (3 files)
+
+**1. Create `tools/your_tool.py`:**
+```python
+import json, os
+from tools.registry import registry
+
+def check_requirements() -> bool:
+    return bool(os.getenv("EXAMPLE_API_KEY"))
+
+def example_tool(param: str, task_id: str = None) -> str:
+    return json.dumps({"success": True, "data": "..."})
+
+registry.register(
+    name="example_tool",
+    toolset="example",
+    schema={"name": "example_tool", "description": "...", "parameters": {...}},
+    handler=lambda args, **kw: example_tool(
+        param=args.get("param", ""), task_id=kw.get("task_id")),
+    check_fn=check_requirements,
+    requires_env=["EXAMPLE_API_KEY"],
+)
+```
+
+**2. Add import** in `model_tools.py` → `_discover_tools()` list.
+
+**3. Add to `toolsets.py`** → `_HERMES_CORE_TOOLS` list.
+
+All handlers must return JSON strings. Use `get_hermes_home()` for paths, never hardcode `~/.hermes`.
+
+### Adding a Slash Command
+
+1. Add `CommandDef` to `COMMAND_REGISTRY` in `hermes_cli/commands.py`
+2. Add handler in `cli.py` → `process_command()`
+3. (Optional) Add gateway handler in `gateway/run.py`
+
+All consumers (help text, autocomplete, Telegram menu, Slack mapping) derive from the central registry automatically.
+
+### Agent Loop (High Level)
+
+```
+run_conversation():
+  1. Build system prompt
+  2. Loop while iterations < max:
+     a. Call LLM (OpenAI-format messages + tool schemas)
+     b. If tool_calls → dispatch each via handle_function_call() → append results → continue
+     c. If text response → return
+  3. Context compression triggers automatically near token limit
+```
+
+### Testing
+
+```bash
+source venv/bin/activate  # or .venv/bin/activate
+python -m pytest tests/ -o 'addopts=' -q   # Full suite
+python -m pytest tests/tools/ -q            # Specific area
+```
+
+- Tests auto-redirect `HERMES_HOME` to temp dirs — never touch real `~/.hermes/`
+- Run full suite before pushing any change
+- Use `-o 'addopts='` to clear any baked-in pytest flags
+
+### Commit Conventions
+
+```
+type: concise subject line
+
+Optional body.
+```
+
+Types: `fix:`, `feat:`, `refactor:`, `docs:`, `chore:`
+
+### Key Rules
+
+- **Never break prompt caching** — don't change context, tools, or system prompt mid-conversation
+- **Message role alternation** — never two assistant or two user messages in a row
+- Use `get_hermes_home()` from `hermes_constants` for all paths (profile-safe)
+- Config values go in `config.yaml`, secrets go in `.env`
+- New tools need a `check_fn` so they only appear when requirements are met
--- a/skills/dogfood/hermes-agent-setup/SKILL.md
+++ b/skills/dogfood/hermes-agent-setup/SKILL.md
@ -1,300 +0,0 @@
---
-name: hermes-agent-setup
-description: Help users configure Hermes Agent — CLI usage, setup wizard, model/provider selection, tools, skills, voice/STT/TTS, gateway, and troubleshooting. Use when someone asks to enable features, configure settings, or needs help with Hermes itself.
-version: 1.1.0
-author: Hermes Agent
-tags: [setup, configuration, tools, stt, tts, voice, hermes, cli, skills]
---
-
-# Hermes Agent Setup & Configuration
-
-Use this skill when a user asks about configuring Hermes, enabling features, setting up voice, managing tools/skills, or troubleshooting.
-
-## Key Paths
-
- Config: `~/.hermes/config.yaml`
- API keys: `~/.hermes/.env`
- Skills: `~/.hermes/skills/`
- Hermes install: `~/.hermes/hermes-agent/`
- Venv: `~/.hermes/hermes-agent/venv/`
-
-## CLI Overview
-
-Hermes is used via the `hermes` command (or `python -m hermes_cli.main` from the repo).
-
-### Core commands:
-
-```
-hermes                          Interactive chat (default)
-hermes chat -q "question"       Single query, then exit
-hermes chat -m MODEL            Chat with a specific model
-hermes -c                       Resume most recent session
-hermes -c "project name"        Resume session by name
-hermes --resume SESSION_ID      Resume by exact ID
-hermes -w                       Isolated git worktree mode
-hermes -s skill1,skill2         Preload skills for the session
-hermes --yolo                   Skip dangerous command approval
-```
-
-### Configuration & setup:
-
-```
-hermes setup                    Interactive setup wizard (provider, API keys, model)
-hermes model                    Interactive model/provider selection
-hermes config                   View current configuration
-hermes config edit              Open config.yaml in $EDITOR
-hermes config set KEY VALUE     Set a config value directly
-hermes login                    Authenticate with a provider
-hermes logout                   Clear stored auth
-hermes doctor                   Check configuration and dependencies
-```
-
-### Tools & skills:
-
-```
-hermes tools                    Interactive tool enable/disable per platform
-hermes skills list              List installed skills
-hermes skills search QUERY      Search the skills hub
-hermes skills install NAME      Install a skill from the hub
-hermes skills config            Enable/disable skills per platform
-```
-
-### Gateway (messaging platforms):
-
-```
-hermes gateway run              Start the messaging gateway
-hermes gateway install          Install gateway as background service
-hermes gateway status           Check gateway status
-```
-
-### Session management:
-
-```
-hermes sessions list            List past sessions
-hermes sessions browse          Interactive session picker
-hermes sessions rename ID TITLE Rename a session
-hermes sessions export ID       Export session as markdown
-hermes sessions prune           Clean up old sessions
-```
-
-### Other:
-
-```
-hermes status                   Show status of all components
-hermes cron list                List cron jobs
-hermes insights                 Usage analytics
-hermes update                   Update to latest version
-hermes pairing                  Manage DM authorization codes
-```
-
-## Setup Wizard (`hermes setup`)
-
-The interactive setup wizard walks through:
-1. **Provider selection** — OpenRouter, Anthropic, OpenAI, Google, DeepSeek, and many more
-2. **API key entry** — stores securely in the env file
-3. **Model selection** — picks from available models for the chosen provider
-4. **Basic settings** — reasoning effort, tool preferences
-
-Run it from terminal:
-```bash
-cd ~/.hermes/hermes-agent
-source venv/bin/activate
-python -m hermes_cli.main setup
-```
-
-To change just the model/provider later: `hermes model`
-
-## Skills Configuration (`hermes skills`)
-
-Skills are reusable instruction sets that extend what Hermes can do.
-
-### Managing skills:
-
-```bash
-hermes skills list              # Show installed skills
-hermes skills search "docker"   # Search the hub
-hermes skills install NAME      # Install from hub
-hermes skills config            # Enable/disable per platform
-```
-
-### Per-platform skill control:
-
-`hermes skills config` opens an interactive UI where you can enable or disable specific skills for each platform (cli, telegram, discord, etc.). Disabled skills won't appear in the agent's available skills list for that platform.
-
-### Loading skills in a session:
-
- CLI: `hermes -s skill-name` or `hermes -s skill1,skill2`
- Chat: `/skill skill-name`
- Gateway: type `/skill skill-name` in any chat
-
-## Voice Messages (STT)
-
-Voice messages from Telegram/Discord/WhatsApp/Slack/Signal are auto-transcribed when an STT provider is available.
-
-### Provider priority (auto-detected):
-1. **Local faster-whisper** — free, no API key, runs on CPU/GPU
-2. **Groq Whisper** — free tier, needs GROQ_API_KEY
-3. **OpenAI Whisper** — paid, needs VOICE_TOOLS_OPENAI_KEY
-
-### Setup local STT (recommended):
-
-```bash
-cd ~/.hermes/hermes-agent
-source venv/bin/activate
-pip install faster-whisper
-```
-
-Add to config.yaml under the `stt:` section:
-```yaml
-stt:
-  enabled: true
-  provider: local
-  local:
-    model: base  # Options: tiny, base, small, medium, large-v3
-```
-
-Model downloads automatically on first use (~150 MB for base).
-
-### Setup Groq STT (free cloud):
-
-1. Get free key from https://console.groq.com
-2. Add GROQ_API_KEY to the env file
-3. Set provider to groq in config.yaml stt section
-
-### Verify STT:
-
-After config changes, restart the gateway (send /restart in chat, or restart `hermes gateway run`). Then send a voice message.
-
-## Voice Replies (TTS)
-
-Hermes can reply with voice when users send voice messages.
-
-### TTS providers (set API key in env file):
-
-| Provider | Env var | Free? |
-|----------|---------|-------|
-| ElevenLabs | ELEVENLABS_API_KEY | Free tier |
-| OpenAI | VOICE_TOOLS_OPENAI_KEY | Paid |
-| Kokoro (local) | None needed | Free |
-| Fish Audio | FISH_AUDIO_API_KEY | Free tier |
-
-### Voice commands (in any chat):
- `/voice on` — voice reply to voice messages only
- `/voice tts` — voice reply to all messages
- `/voice off` — text only (default)
-
-## Enabling/Disabling Tools (`hermes tools`)
-
-### Interactive tool config:
-
-```bash
-cd ~/.hermes/hermes-agent
-source venv/bin/activate
-python -m hermes_cli.main tools
-```
-
-This opens a curses UI to enable/disable toolsets per platform (cli, telegram, discord, slack, etc.).
-
-### After changing tools:
-
-Use `/reset` in the chat to start a fresh session with the new toolset. Tool changes do NOT take effect mid-conversation (this preserves prompt caching and avoids cost spikes).
-
-### Common toolsets:
-
-| Toolset | What it provides |
-|---------|-----------------|
-| terminal | Shell command execution |
-| file | File read/write/search/patch |
-| web | Web search and extraction |
-| browser | Browser automation (needs Browserbase) |
-| image_gen | AI image generation |
-| mcp | MCP server connections |
-| voice | Text-to-speech output |
-| cronjob | Scheduled tasks |
-
-## Installing Dependencies
-
-Some tools need extra packages:
-
-```bash
-cd ~/.hermes/hermes-agent && source venv/bin/activate
-
-pip install faster-whisper    # Local STT (voice transcription)
-pip install browserbase       # Browser automation
-pip install mcp               # MCP server connections
-```
-
-## Config File Reference
-
-The main config file is `~/.hermes/config.yaml`. Key sections:
-
-```yaml
-# Model and provider
-model:
-  default: anthropic/claude-opus-4.6
-  provider: openrouter
-
-# Agent behavior
-agent:
-  max_turns: 90
-  reasoning_effort: high    # xhigh, high, medium, low, minimal, none
-
-# Voice
-stt:
-  enabled: true
-  provider: local           # local, groq, openai
-tts:
-  provider: elevenlabs      # elevenlabs, openai, kokoro, fish
-
-# Display
-display:
-  skin: default             # default, ares, mono, slate
-  tool_progress: full       # full, compact, off
-  background_process_notifications: all  # all, result, error, off
-```
-
-Edit with `hermes config edit` or `hermes config set KEY VALUE`.
-
-## Gateway Commands (Messaging Platforms)
-
-| Command | What it does |
-|---------|-------------|
-| /reset or /new | Fresh session (picks up new tool config) |
-| /help | Show all commands |
-| /model [name] | Show or change model |
-| /compact | Compress conversation to save context |
-| /voice [mode] | Configure voice replies |
-| /reasoning [effort] | Set reasoning level |
-| /sethome | Set home channel for cron/notifications |
-| /restart | Restart the gateway (picks up config changes) |
-| /status | Show session info |
-| /retry | Retry last message |
-| /undo | Remove last exchange |
-| /personality [name] | Set agent personality |
-| /skill [name] | Load a skill |
-
-## Troubleshooting
-
-### Voice messages not working
-1. Check stt.enabled is true in config.yaml
-2. Check a provider is available (faster-whisper installed, or API key set)
-3. Restart gateway after config changes (/restart)
-
-### Tool not available
-1. Run `hermes tools` to check if the toolset is enabled for your platform
-2. Some tools need env vars — check the env file
-3. Use /reset after enabling tools
-
-### Model/provider issues
-1. Run `hermes doctor` to check configuration
-2. Run `hermes login` to re-authenticate
-3. Check the env file has the right API key
-
-### Changes not taking effect
- Gateway: /reset for tool changes, /restart for config changes
- CLI: start a new session
-
-### Skills not showing up
-1. Check `hermes skills list` shows the skill
-2. Check `hermes skills config` has it enabled for your platform
-3. Load explicitly with `/skill name` or `hermes -s name`
--- a/skills/media/youtube-content/scripts/fetch_transcript.py
+++ b/skills/media/youtube-content/scripts/fetch_transcript.py
@ -48,7 +48,11 @@ def format_timestamp(seconds: float) -> str:


 def fetch_transcript(video_id: str, languages: list = None):
-    """Fetch transcript segments from YouTube."""
+    """Fetch transcript segments from YouTube.
+
+    Returns a list of dicts with 'text', 'start', and 'duration' keys.
+    Compatible with youtube-transcript-api v1.x.
+    """
    try:
        from youtube_transcript_api import YouTubeTranscriptApi
    except ImportError:
@ -56,9 +60,17 @@ def fetch_transcript(video_id: str, languages: list = None):
              file=sys.stderr)
        sys.exit(1)

+    api = YouTubeTranscriptApi()
    if languages:
-        return YouTubeTranscriptApi.get_transcript(video_id, languages=languages)
-    return YouTubeTranscriptApi.get_transcript(video_id)
+        result = api.fetch(video_id, languages=languages)
+    else:
+        result = api.fetch(video_id)
+
+    # v1.x returns FetchedTranscriptSnippet objects; normalize to dicts
+    return [
+        {"text": seg.text, "start": seg.start, "duration": seg.duration}
+        for seg in result
+    ]


 def main():
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@ -198,7 +198,8 @@ class TestAnthropicOAuthFlag:
    def test_api_key_no_oauth_flag(self, monkeypatch):
        """Regular API keys (sk-ant-api-*) should create client with is_oauth=False."""
        with patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-api03-testkey1234"), \
-             patch("agent.anthropic_adapter.build_anthropic_client") as mock_build:
+             patch("agent.anthropic_adapter.build_anthropic_client") as mock_build, \
+             patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)):
            mock_build.return_value = MagicMock()
            from agent.auxiliary_client import _try_anthropic, AnthropicAuxiliaryClient
            client, model = _try_anthropic()
@ -207,6 +208,31 @@ class TestAnthropicOAuthFlag:
            adapter = client.chat.completions
            assert adapter._is_oauth is False

+    def test_pool_entry_takes_priority_over_legacy_resolution(self):
+        class _Entry:
+            access_token = "sk-ant-oat01-pooled"
+            base_url = "https://api.anthropic.com"
+
+        class _Pool:
+            def has_credentials(self):
+                return True
+
+            def select(self):
+                return _Entry()
+
+        with (
+            patch("agent.auxiliary_client.load_pool", return_value=_Pool()),
+            patch("agent.anthropic_adapter.resolve_anthropic_token", side_effect=AssertionError("legacy path should not run")),
+            patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()) as mock_build,
+        ):
+            from agent.auxiliary_client import _try_anthropic
+
+            client, model = _try_anthropic()
+
+        assert client is not None
+        assert model == "claude-haiku-4-5-20251001"
+        assert mock_build.call_args.args[0] == "sk-ant-oat01-pooled"
+

 class TestExpiredCodexFallback:
    """Test that expired Codex tokens don't block the auto chain."""
@ -392,7 +418,8 @@ class TestExplicitProviderRouting:
    def test_explicit_anthropic_api_key(self, monkeypatch):
        """provider='anthropic' + regular API key should work with is_oauth=False."""
        with patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-api-regular-key"), \
-             patch("agent.anthropic_adapter.build_anthropic_client") as mock_build:
+             patch("agent.anthropic_adapter.build_anthropic_client") as mock_build, \
+             patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)):
            mock_build.return_value = MagicMock()
            client, model = resolve_provider_client("anthropic")
            assert client is not None
@ -465,9 +492,16 @@ class TestGetTextAuxiliaryClient:
        assert model == "google/gemini-3-flash-preview"

    def test_custom_endpoint_over_codex(self, monkeypatch, codex_auth_dir):
-        monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1")
+        config = {
+            "model": {
+                "provider": "custom",
+                "base_url": "http://localhost:1234/v1",
+                "default": "my-local-model",
+            }
+        }
        monkeypatch.setenv("OPENAI_API_KEY", "lm-studio-key")
-        monkeypatch.setenv("OPENAI_MODEL", "my-local-model")
+        monkeypatch.setattr("hermes_cli.config.load_config", lambda: config)
+        monkeypatch.setattr("hermes_cli.runtime_provider.load_config", lambda: config)
        # Override the autouse monkeypatch for codex
        monkeypatch.setattr(
            "agent.auxiliary_client._read_codex_access_token",
@ -535,6 +569,32 @@ class TestGetTextAuxiliaryClient:
        from agent.auxiliary_client import CodexAuxiliaryClient
        assert isinstance(client, CodexAuxiliaryClient)

+    def test_codex_pool_entry_takes_priority_over_auth_store(self):
+        class _Entry:
+            access_token = "pooled-codex-token"
+            base_url = "https://chatgpt.com/backend-api/codex"
+
+        class _Pool:
+            def has_credentials(self):
+                return True
+
+            def select(self):
+                return _Entry()
+
+        with (
+            patch("agent.auxiliary_client.load_pool", return_value=_Pool()),
+            patch("agent.auxiliary_client.OpenAI"),
+            patch("hermes_cli.auth._read_codex_tokens", side_effect=AssertionError("legacy codex store should not run")),
+        ):
+            from agent.auxiliary_client import _try_codex
+
+            client, model = _try_codex()
+
+        from agent.auxiliary_client import CodexAuxiliaryClient
+
+        assert isinstance(client, CodexAuxiliaryClient)
+        assert model == "gpt-5.2-codex"
+
    def test_returns_none_when_nothing_available(self, monkeypatch):
        monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
        monkeypatch.delenv("OPENAI_API_KEY", raising=False)
@ -583,6 +643,35 @@ class TestVisionClientFallback:
        assert client.__class__.__name__ == "AnthropicAuxiliaryClient"
        assert model == "claude-haiku-4-5-20251001"

+
+class TestAuxiliaryPoolAwareness:
+    def test_try_nous_uses_pool_entry(self):
+        class _Entry:
+            access_token = "pooled-access-token"
+            agent_key = "pooled-agent-key"
+            inference_base_url = "https://inference.pool.example/v1"
+
+        class _Pool:
+            def has_credentials(self):
+                return True
+
+            def select(self):
+                return _Entry()
+
+        with (
+            patch("agent.auxiliary_client.load_pool", return_value=_Pool()),
+            patch("agent.auxiliary_client.OpenAI") as mock_openai,
+        ):
+            from agent.auxiliary_client import _try_nous
+
+            client, model = _try_nous()
+
+        assert client is not None
+        assert model == "gemini-3-flash"
+        call_kwargs = mock_openai.call_args.kwargs
+        assert call_kwargs["api_key"] == "pooled-agent-key"
+        assert call_kwargs["base_url"] == "https://inference.pool.example/v1"
+
    def test_resolve_provider_client_copilot_uses_runtime_credentials(self, monkeypatch):
        monkeypatch.delenv("GITHUB_TOKEN", raising=False)
        monkeypatch.delenv("GH_TOKEN", raising=False)
@ -726,10 +815,17 @@ class TestVisionClientFallback:

    def test_vision_forced_main_uses_custom_endpoint(self, monkeypatch):
        """When explicitly forced to 'main', vision CAN use custom endpoint."""
+        config = {
+            "model": {
+                "provider": "custom",
+                "base_url": "http://localhost:1234/v1",
+                "default": "my-local-model",
+            }
+        }
        monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "main")
-        monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1")
        monkeypatch.setenv("OPENAI_API_KEY", "local-key")
-        monkeypatch.setenv("OPENAI_MODEL", "my-local-model")
+        monkeypatch.setattr("hermes_cli.config.load_config", lambda: config)
+        monkeypatch.setattr("hermes_cli.runtime_provider.load_config", lambda: config)
        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
             patch("agent.auxiliary_client.OpenAI") as mock_openai:
            client, model = get_vision_auxiliary_client()
@ -827,9 +923,16 @@ class TestResolveForcedProvider:
        assert model is None

    def test_forced_main_uses_custom(self, monkeypatch):
-        monkeypatch.setenv("OPENAI_BASE_URL", "http://local:8080/v1")
+        config = {
+            "model": {
+                "provider": "custom",
+                "base_url": "http://local:8080/v1",
+                "default": "my-local-model",
+            }
+        }
        monkeypatch.setenv("OPENAI_API_KEY", "local-key")
-        monkeypatch.setenv("OPENAI_MODEL", "my-local-model")
+        monkeypatch.setattr("hermes_cli.config.load_config", lambda: config)
+        monkeypatch.setattr("hermes_cli.runtime_provider.load_config", lambda: config)
        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
             patch("agent.auxiliary_client.OpenAI") as mock_openai:
            client, model = _resolve_forced_provider("main")
@ -858,10 +961,17 @@ class TestResolveForcedProvider:

    def test_forced_main_skips_openrouter_nous(self, monkeypatch):
        """Even if OpenRouter key is set, 'main' skips it."""
+        config = {
+            "model": {
+                "provider": "custom",
+                "base_url": "http://local:8080/v1",
+                "default": "my-local-model",
+            }
+        }
        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
-        monkeypatch.setenv("OPENAI_BASE_URL", "http://local:8080/v1")
        monkeypatch.setenv("OPENAI_API_KEY", "local-key")
-        monkeypatch.setenv("OPENAI_MODEL", "my-local-model")
+        monkeypatch.setattr("hermes_cli.config.load_config", lambda: config)
+        monkeypatch.setattr("hermes_cli.runtime_provider.load_config", lambda: config)
        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
             patch("agent.auxiliary_client.OpenAI") as mock_openai:
            client, model = _resolve_forced_provider("main")
--- a/tests/agent/test_redact.py
+++ b/tests/agent/test_redact.py
@ -12,6 +12,8 @@ from agent.redact import redact_sensitive_text, RedactingFormatter
 def _ensure_redaction_enabled(monkeypatch):
    """Ensure HERMES_REDACT_SECRETS is not disabled by prior test imports."""
    monkeypatch.delenv("HERMES_REDACT_SECRETS", raising=False)
+    # Also patch the module-level snapshot so it reflects the cleared env var
+    monkeypatch.setattr("agent.redact._REDACT_ENABLED", True)


 class TestKnownPrefixes:
--- a/tests/e2e/init.py
+++ b/tests/e2e/init.py
--- a/tests/e2e/conftest.py
+++ b/tests/e2e/conftest.py
@ -0,0 +1,173 @@
+"""Shared fixtures for Telegram gateway e2e tests.
+
+These tests exercise the full async message flow:
+    adapter.handle_message(event)
+        → background task
+        → GatewayRunner._handle_message (command dispatch)
+        → adapter.send() (captured by mock)
+
+No LLM, no real platform connections.
+"""
+
+import asyncio
+import sys
+import uuid
+from datetime import datetime
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock
+
+from gateway.config import GatewayConfig, Platform, PlatformConfig
+from gateway.platforms.base import MessageEvent, SendResult
+from gateway.session import SessionEntry, SessionSource, build_session_key
+
+
+#Ensure telegram module is available (mock it if not installed)
+
+def _ensure_telegram_mock():
+    """Install mock telegram modules so TelegramAdapter can be imported."""
+    if "telegram" in sys.modules and hasattr(sys.modules["telegram"], "__file__"):
+        return  # Real library installed
+
+    telegram_mod = MagicMock()
+    telegram_mod.Update = MagicMock()
+    telegram_mod.Update.ALL_TYPES = []
+    telegram_mod.Bot = MagicMock
+    telegram_mod.constants.ParseMode.MARKDOWN_V2 = "MarkdownV2"
+    telegram_mod.ext.Application = MagicMock()
+    telegram_mod.ext.Application.builder = MagicMock
+    telegram_mod.ext.ContextTypes.DEFAULT_TYPE = type(None)
+    telegram_mod.ext.MessageHandler = MagicMock
+    telegram_mod.ext.CommandHandler = MagicMock
+    telegram_mod.ext.filters = MagicMock()
+    telegram_mod.request.HTTPXRequest = MagicMock
+
+    for name in (
+        "telegram",
+        "telegram.constants",
+        "telegram.ext",
+        "telegram.ext.filters",
+        "telegram.request",
+    ):
+        sys.modules.setdefault(name, telegram_mod)
+
+
+_ensure_telegram_mock()
+
+from gateway.platforms.telegram import TelegramAdapter  # noqa: E402
+
+
+#GatewayRunner factory (based on tests/gateway/test_status_command.py)
+
+def make_runner(session_entry: SessionEntry) -> "GatewayRunner":
+    """Create a GatewayRunner with mocked internals for e2e testing.
+
+    Skips __init__ to avoid filesystem/network side effects.
+    All command-dispatch dependencies are wired manually.
+    """
+    from gateway.run import GatewayRunner
+
+    runner = object.__new__(GatewayRunner)
+    runner.config = GatewayConfig(
+        platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="e2e-test-token")}
+    )
+    runner.adapters = {}
+    runner._voice_mode = {}
+    runner.hooks = SimpleNamespace(emit=AsyncMock(), loaded_hooks=False)
+
+    runner.session_store = MagicMock()
+    runner.session_store.get_or_create_session.return_value = session_entry
+    runner.session_store.load_transcript.return_value = []
+    runner.session_store.has_any_sessions.return_value = True
+    runner.session_store.append_to_transcript = MagicMock()
+    runner.session_store.rewrite_transcript = MagicMock()
+    runner.session_store.update_session = MagicMock()
+    runner.session_store.reset_session = MagicMock()
+
+    runner._running_agents = {}
+    runner._pending_messages = {}
+    runner._pending_approvals = {}
+    runner._session_db = None
+    runner._reasoning_config = None
+    runner._provider_routing = {}
+    runner._fallback_model = None
+    runner._show_reasoning = False
+
+    runner._is_user_authorized = lambda _source: True
+    runner._set_session_env = lambda _context: None
+    runner._should_send_voice_reply = lambda *_a, **_kw: False
+    runner._send_voice_reply = AsyncMock()
+    runner._capture_gateway_honcho_if_configured = lambda *a, **kw: None
+    runner._emit_gateway_run_progress = AsyncMock()
+
+    # Pairing store (used by authorization rejection path)
+    runner.pairing_store = MagicMock()
+    runner.pairing_store._is_rate_limited = MagicMock(return_value=False)
+    runner.pairing_store.generate_code = MagicMock(return_value="ABC123")
+
+    return runner
+
+
+#TelegramAdapter factory
+
+def make_adapter(runner) -> TelegramAdapter:
+    """Create a TelegramAdapter wired to *runner*, with send methods mocked.
+
+    connect() is NOT called — no polling, no token lock, no real HTTP.
+    """
+    config = PlatformConfig(enabled=True, token="e2e-test-token")
+    adapter = TelegramAdapter(config)
+
+    # Mock outbound methods so tests can capture what was sent
+    adapter.send = AsyncMock(return_value=SendResult(success=True, message_id="e2e-resp-1"))
+    adapter.send_typing = AsyncMock()
+
+    # Wire adapter ↔ runner
+    adapter.set_message_handler(runner._handle_message)
+    runner.adapters[Platform.TELEGRAM] = adapter
+
+    return adapter
+
+
+#Helpers
+
+def make_source(chat_id: str = "e2e-chat-1", user_id: str = "e2e-user-1") -> SessionSource:
+    return SessionSource(
+        platform=Platform.TELEGRAM,
+        chat_id=chat_id,
+        user_id=user_id,
+        user_name="e2e_tester",
+        chat_type="dm",
+    )
+
+
+def make_event(text: str, chat_id: str = "e2e-chat-1", user_id: str = "e2e-user-1") -> MessageEvent:
+    return MessageEvent(
+        text=text,
+        source=make_source(chat_id, user_id),
+        message_id=f"msg-{uuid.uuid4().hex[:8]}",
+    )
+
+
+def make_session_entry(source: SessionSource = None) -> SessionEntry:
+    source = source or make_source()
+    return SessionEntry(
+        session_key=build_session_key(source),
+        session_id=f"sess-{uuid.uuid4().hex[:8]}",
+        created_at=datetime.now(),
+        updated_at=datetime.now(),
+        platform=Platform.TELEGRAM,
+        chat_type="dm",
+    )
+
+
+async def send_and_capture(adapter: TelegramAdapter, text: str, **event_kwargs) -> AsyncMock:
+    """Send a message through the full e2e flow and return the send mock.
+
+    Drives: adapter.handle_message → background task → runner dispatch → adapter.send.
+    """
+    event = make_event(text, **event_kwargs)
+    adapter.send.reset_mock()
+    await adapter.handle_message(event)
+    # Let the background task complete
+    await asyncio.sleep(0.3)
+    return adapter.send
--- a/tests/e2e/test_telegram_commands.py
+++ b/tests/e2e/test_telegram_commands.py
@ -0,0 +1,217 @@
+"""E2E tests for Telegram gateway slash commands.
+
+Each test drives a message through the full async pipeline:
+    adapter.handle_message(event)
+        → BasePlatformAdapter._process_message_background()
+        → GatewayRunner._handle_message() (command dispatch)
+        → adapter.send() (captured for assertions)
+
+No LLM involved — only gateway-level commands are tested.
+"""
+
+import asyncio
+from unittest.mock import AsyncMock
+
+import pytest
+
+from gateway.platforms.base import SendResult
+from tests.e2e.conftest import (
+    make_adapter,
+    make_event,
+    make_runner,
+    make_session_entry,
+    make_source,
+    send_and_capture,
+)
+
+
+#Fixtures
+
+@pytest.fixture()
+def source():
+    return make_source()
+
+
+@pytest.fixture()
+def session_entry(source):
+    return make_session_entry(source)
+
+
+@pytest.fixture()
+def runner(session_entry):
+    return make_runner(session_entry)
+
+
+@pytest.fixture()
+def adapter(runner):
+    return make_adapter(runner)
+
+
+#Tests
+
+class TestTelegramSlashCommands:
+    """Gateway slash commands dispatched through the full adapter pipeline."""
+
+    @pytest.mark.asyncio
+    async def test_help_returns_command_list(self, adapter):
+        send = await send_and_capture(adapter, "/help")
+
+        send.assert_called_once()
+        response_text = send.call_args[1].get("content") or send.call_args[0][1]
+        assert "/new" in response_text
+        assert "/status" in response_text
+
+    @pytest.mark.asyncio
+    async def test_status_shows_session_info(self, adapter):
+        send = await send_and_capture(adapter, "/status")
+
+        send.assert_called_once()
+        response_text = send.call_args[1].get("content") or send.call_args[0][1]
+        # Status output includes session metadata
+        assert "session" in response_text.lower() or "Session" in response_text
+
+    @pytest.mark.asyncio
+    async def test_new_resets_session(self, adapter, runner):
+        send = await send_and_capture(adapter, "/new")
+
+        send.assert_called_once()
+        runner.session_store.reset_session.assert_called_once()
+
+    @pytest.mark.asyncio
+    async def test_stop_when_no_agent_running(self, adapter):
+        send = await send_and_capture(adapter, "/stop")
+
+        send.assert_called_once()
+        response_text = send.call_args[1].get("content") or send.call_args[0][1]
+        response_lower = response_text.lower()
+        assert "no" in response_lower or "stop" in response_lower or "not running" in response_lower
+
+    @pytest.mark.asyncio
+    async def test_commands_shows_listing(self, adapter):
+        send = await send_and_capture(adapter, "/commands")
+
+        send.assert_called_once()
+        response_text = send.call_args[1].get("content") or send.call_args[0][1]
+        # Should list at least some commands
+        assert "/" in response_text
+
+    @pytest.mark.asyncio
+    async def test_sequential_commands_share_session(self, adapter):
+        """Two commands from the same chat_id should both succeed."""
+        send_help = await send_and_capture(adapter, "/help")
+        send_help.assert_called_once()
+
+        send_status = await send_and_capture(adapter, "/status")
+        send_status.assert_called_once()
+
+    @pytest.mark.asyncio
+    @pytest.mark.xfail(
+        reason="Bug: _handle_provider_command references unbound model_cfg when config.yaml is absent",
+        strict=False,
+    )
+    async def test_provider_shows_current_provider(self, adapter):
+        send = await send_and_capture(adapter, "/provider")
+
+        send.assert_called_once()
+        response_text = send.call_args[1].get("content") or send.call_args[0][1]
+        assert "provider" in response_text.lower()
+
+    @pytest.mark.asyncio
+    async def test_verbose_responds(self, adapter):
+        send = await send_and_capture(adapter, "/verbose")
+
+        send.assert_called_once()
+        response_text = send.call_args[1].get("content") or send.call_args[0][1]
+        # Either shows the mode cycle or tells user to enable it in config
+        assert "verbose" in response_text.lower() or "tool_progress" in response_text
+
+    @pytest.mark.asyncio
+    async def test_personality_lists_options(self, adapter):
+        send = await send_and_capture(adapter, "/personality")
+
+        send.assert_called_once()
+        response_text = send.call_args[1].get("content") or send.call_args[0][1]
+        assert "personalit" in response_text.lower()  # matches "personality" or "personalities"
+
+    @pytest.mark.asyncio
+    async def test_yolo_toggles_mode(self, adapter):
+        send = await send_and_capture(adapter, "/yolo")
+
+        send.assert_called_once()
+        response_text = send.call_args[1].get("content") or send.call_args[0][1]
+        assert "yolo" in response_text.lower()
+
+
+class TestSessionLifecycle:
+    """Verify session state changes across command sequences."""
+
+    @pytest.mark.asyncio
+    async def test_new_then_status_reflects_reset(self, adapter, runner, session_entry):
+        """After /new, /status should report the fresh session."""
+        await send_and_capture(adapter, "/new")
+        runner.session_store.reset_session.assert_called_once()
+
+        send = await send_and_capture(adapter, "/status")
+        send.assert_called_once()
+        response_text = send.call_args[1].get("content") or send.call_args[0][1]
+        # Session ID from the entry should appear in the status output
+        assert session_entry.session_id[:8] in response_text
+
+    @pytest.mark.asyncio
+    async def test_new_is_idempotent(self, adapter, runner):
+        """/new called twice should not crash."""
+        await send_and_capture(adapter, "/new")
+        await send_and_capture(adapter, "/new")
+        assert runner.session_store.reset_session.call_count == 2
+
+
+class TestAuthorization:
+    """Verify the pipeline handles unauthorized users."""
+
+    @pytest.mark.asyncio
+    async def test_unauthorized_user_gets_pairing_response(self, adapter, runner):
+        """Unauthorized DM should trigger pairing code, not a command response."""
+        runner._is_user_authorized = lambda _source: False
+
+        event = make_event("/help")
+        adapter.send.reset_mock()
+        await adapter.handle_message(event)
+        await asyncio.sleep(0.3)
+
+        # The adapter.send is called directly by the authorization path
+        # (not via _send_with_retry), so check it was called with a pairing message
+        adapter.send.assert_called()
+        response_text = adapter.send.call_args[0][1] if len(adapter.send.call_args[0]) > 1 else ""
+        assert "recognize" in response_text.lower() or "pair" in response_text.lower() or "ABC123" in response_text
+
+    @pytest.mark.asyncio
+    async def test_unauthorized_user_does_not_get_help(self, adapter, runner):
+        """Unauthorized user should NOT see the help command output."""
+        runner._is_user_authorized = lambda _source: False
+
+        event = make_event("/help")
+        adapter.send.reset_mock()
+        await adapter.handle_message(event)
+        await asyncio.sleep(0.3)
+
+        # If send was called, it should NOT contain the help text
+        if adapter.send.called:
+            response_text = adapter.send.call_args[0][1] if len(adapter.send.call_args[0]) > 1 else ""
+            assert "/new" not in response_text
+
+
+class TestSendFailureResilience:
+    """Verify the pipeline handles send failures gracefully."""
+
+    @pytest.mark.asyncio
+    async def test_send_failure_does_not_crash_pipeline(self, adapter):
+        """If send() returns failure, the pipeline should not raise."""
+        adapter.send = AsyncMock(return_value=SendResult(success=False, error="network timeout"))
+        adapter.set_message_handler(adapter._message_handler)  # re-wire with same handler
+
+        event = make_event("/help")
+        # Should not raise — pipeline handles send failures internally
+        await adapter.handle_message(event)
+        await asyncio.sleep(0.3)
+
+        adapter.send.assert_called()
--- a/tests/gateway/test_api_server.py
+++ b/tests/gateway/test_api_server.py
@ -427,6 +427,81 @@ class TestChatCompletionsEndpoint:
                assert "Thinking" in body
                assert " about it..." in body

+    @pytest.mark.asyncio
+    async def test_stream_includes_tool_progress(self, adapter):
+        """tool_progress_callback fires → progress appears in the SSE stream."""
+        import asyncio
+
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            async def _mock_run_agent(**kwargs):
+                cb = kwargs.get("stream_delta_callback")
+                tp_cb = kwargs.get("tool_progress_callback")
+                # Simulate tool progress before streaming content
+                if tp_cb:
+                    tp_cb("terminal", "ls -la", {"command": "ls -la"})
+                if cb:
+                    await asyncio.sleep(0.05)
+                    cb("Here are the files.")
+                return (
+                    {"final_response": "Here are the files.", "messages": [], "api_calls": 1},
+                    {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15},
+                )
+
+            with patch.object(adapter, "_run_agent", side_effect=_mock_run_agent):
+                resp = await cli.post(
+                    "/v1/chat/completions",
+                    json={
+                        "model": "test",
+                        "messages": [{"role": "user", "content": "list files"}],
+                        "stream": True,
+                    },
+                )
+                assert resp.status == 200
+                body = await resp.text()
+                assert "[DONE]" in body
+                # Tool progress message must appear in the stream
+                assert "ls -la" in body
+                # Final content must also be present
+                assert "Here are the files." in body
+
+    @pytest.mark.asyncio
+    async def test_stream_tool_progress_skips_internal_events(self, adapter):
+        """Internal events (name starting with _) are not streamed."""
+        import asyncio
+
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            async def _mock_run_agent(**kwargs):
+                cb = kwargs.get("stream_delta_callback")
+                tp_cb = kwargs.get("tool_progress_callback")
+                if tp_cb:
+                    tp_cb("_thinking", "some internal state", {})
+                    tp_cb("web_search", "Python docs", {"query": "Python docs"})
+                if cb:
+                    await asyncio.sleep(0.05)
+                    cb("Found it.")
+                return (
+                    {"final_response": "Found it.", "messages": [], "api_calls": 1},
+                    {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15},
+                )
+
+            with patch.object(adapter, "_run_agent", side_effect=_mock_run_agent):
+                resp = await cli.post(
+                    "/v1/chat/completions",
+                    json={
+                        "model": "test",
+                        "messages": [{"role": "user", "content": "search"}],
+                        "stream": True,
+                    },
+                )
+                assert resp.status == 200
+                body = await resp.text()
+                # Internal _thinking event should NOT appear
+                assert "some internal state" not in body
+                # Real tool progress should appear
+                assert "Python docs" in body
+
    @pytest.mark.asyncio
    async def test_no_user_message_returns_400(self, adapter):
        app = _create_app(adapter)
@ -1501,3 +1576,110 @@ class TestConversationParameter:
                assert resp.status == 200
                # Conversation mapping should NOT be set since store=false
                assert adapter._response_store.get_conversation("ephemeral-chat") is None
+
+
+# ---------------------------------------------------------------------------
+# X-Hermes-Session-Id header (session continuity)
+# ---------------------------------------------------------------------------
+
+
+class TestSessionIdHeader:
+    @pytest.mark.asyncio
+    async def test_new_session_response_includes_session_id_header(self, adapter):
+        """Without X-Hermes-Session-Id, a new session is created and returned in the header."""
+        mock_result = {"final_response": "Hello!", "messages": [], "api_calls": 1}
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
+                mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
+                resp = await cli.post(
+                    "/v1/chat/completions",
+                    json={"model": "hermes-agent", "messages": [{"role": "user", "content": "Hi"}]},
+                )
+            assert resp.status == 200
+            assert resp.headers.get("X-Hermes-Session-Id") is not None
+
+    @pytest.mark.asyncio
+    async def test_provided_session_id_is_used_and_echoed(self, adapter):
+        """When X-Hermes-Session-Id is provided, it's passed to the agent and echoed in the response."""
+        mock_result = {"final_response": "Continuing!", "messages": [], "api_calls": 1}
+        mock_db = MagicMock()
+        mock_db.get_messages_as_conversation.return_value = [
+            {"role": "user", "content": "previous message"},
+            {"role": "assistant", "content": "previous reply"},
+        ]
+        adapter._session_db = mock_db
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
+                mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
+
+                resp = await cli.post(
+                    "/v1/chat/completions",
+                    headers={"X-Hermes-Session-Id": "my-session-123"},
+                    json={"model": "hermes-agent", "messages": [{"role": "user", "content": "Continue"}]},
+                )
+
+            assert resp.status == 200
+            assert resp.headers.get("X-Hermes-Session-Id") == "my-session-123"
+            call_kwargs = mock_run.call_args.kwargs
+            assert call_kwargs["session_id"] == "my-session-123"
+
+    @pytest.mark.asyncio
+    async def test_provided_session_id_loads_history_from_db(self, adapter):
+        """When X-Hermes-Session-Id is provided, history comes from SessionDB not request body."""
+        mock_result = {"final_response": "OK", "messages": [], "api_calls": 1}
+        db_history = [
+            {"role": "user", "content": "stored message 1"},
+            {"role": "assistant", "content": "stored reply 1"},
+        ]
+        mock_db = MagicMock()
+        mock_db.get_messages_as_conversation.return_value = db_history
+        adapter._session_db = mock_db
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
+                mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
+
+                resp = await cli.post(
+                    "/v1/chat/completions",
+                    headers={"X-Hermes-Session-Id": "existing-session"},
+                    # Request body has different history — should be ignored
+                    json={
+                        "model": "hermes-agent",
+                        "messages": [
+                            {"role": "user", "content": "old msg from client"},
+                            {"role": "assistant", "content": "old reply from client"},
+                            {"role": "user", "content": "new question"},
+                        ],
+                    },
+                )
+
+            assert resp.status == 200
+            call_kwargs = mock_run.call_args.kwargs
+            # History must come from DB, not from the request body
+            assert call_kwargs["conversation_history"] == db_history
+            assert call_kwargs["user_message"] == "new question"
+
+    @pytest.mark.asyncio
+    async def test_db_failure_falls_back_to_empty_history(self, adapter):
+        """If SessionDB raises, history falls back to empty and request still succeeds."""
+        mock_result = {"final_response": "OK", "messages": [], "api_calls": 1}
+        # Simulate DB failure: _session_db is None and SessionDB() constructor raises
+        adapter._session_db = None
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run, \
+                 patch("hermes_state.SessionDB", side_effect=Exception("DB unavailable")):
+                mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
+
+                resp = await cli.post(
+                    "/v1/chat/completions",
+                    headers={"X-Hermes-Session-Id": "some-session"},
+                    json={"model": "hermes-agent", "messages": [{"role": "user", "content": "Hi"}]},
+                )
+
+            assert resp.status == 200
+            call_kwargs = mock_run.call_args.kwargs
+            assert call_kwargs["conversation_history"] == []
+            assert call_kwargs["session_id"] == "some-session"
--- a/tests/gateway/test_approve_deny_commands.py
+++ b/tests/gateway/test_approve_deny_commands.py
@ -4,6 +4,7 @@ Verifies that dangerous command approvals require explicit /approve or /deny
 slash commands, not bare "yes"/"no" text matching.
 """

+import asyncio
 import time
 from types import SimpleNamespace
 from unittest.mock import AsyncMock, MagicMock, patch
@ -49,6 +50,7 @@ def _make_runner():
    runner._running_agents = {}
    runner._pending_messages = {}
    runner._pending_approvals = {}
+    runner._background_tasks = set()
    runner._session_db = None
    runner._reasoning_config = None
    runner._provider_routing = {}
@ -78,20 +80,32 @@ class TestApproveCommand:

    @pytest.mark.asyncio
    async def test_approve_executes_pending_command(self):
-        """Basic /approve executes the pending command."""
+        """Basic /approve executes the pending command and sends feedback."""
        runner = _make_runner()
        source = _make_source()
        session_key = runner._session_key_for_source(source)
        runner._pending_approvals[session_key] = _make_pending_approval()

        event = _make_event("/approve")
-        with patch("tools.terminal_tool.terminal_tool", return_value="done") as mock_term:
+        with (
+            patch("tools.terminal_tool.terminal_tool", return_value="done") as mock_term,
+            patch.object(runner, "_handle_message", new_callable=AsyncMock, return_value="agent continued"),
+        ):
            result = await runner._handle_approve_command(event)
+            # Yield to let the background continuation task run.
+            # This works because mocks return immediately (no real await points).
+            await asyncio.sleep(0)

-        assert "✅ Command approved and executed" in result
+        # Returns None because feedback is sent directly via adapter
+        assert result is None
        mock_term.assert_called_once_with(command="sudo rm -rf /tmp/test", force=True)
        assert session_key not in runner._pending_approvals

+        # Immediate feedback sent via adapter
+        adapter = runner.adapters[Platform.TELEGRAM]
+        sent_text = adapter.send.call_args_list[0][0][1]
+        assert "Command approved and executed" in sent_text
+
    @pytest.mark.asyncio
    async def test_approve_session_remembers_pattern(self):
        """/approve session approves the pattern for the session."""
@ -104,12 +118,21 @@ class TestApproveCommand:
        with (
            patch("tools.terminal_tool.terminal_tool", return_value="done"),
            patch("tools.approval.approve_session") as mock_session,
+            patch.object(runner, "_handle_message", new_callable=AsyncMock, return_value=None),
        ):
            result = await runner._handle_approve_command(event)
+            # Yield to let the background continuation task run.
+            # This works because mocks return immediately (no real await points).
+            await asyncio.sleep(0)

-        assert "pattern approved for this session" in result
+        assert result is None
        mock_session.assert_called_once_with(session_key, "sudo")

+        # Verify scope message in adapter feedback
+        adapter = runner.adapters[Platform.TELEGRAM]
+        sent_text = adapter.send.call_args_list[0][0][1]
+        assert "pattern approved for this session" in sent_text
+
    @pytest.mark.asyncio
    async def test_approve_always_approves_permanently(self):
        """/approve always approves the pattern permanently."""
@ -122,12 +145,21 @@ class TestApproveCommand:
        with (
            patch("tools.terminal_tool.terminal_tool", return_value="done"),
            patch("tools.approval.approve_permanent") as mock_perm,
+            patch.object(runner, "_handle_message", new_callable=AsyncMock, return_value=None),
        ):
            result = await runner._handle_approve_command(event)
+            # Yield to let the background continuation task run.
+            # This works because mocks return immediately (no real await points).
+            await asyncio.sleep(0)

-        assert "pattern approved permanently" in result
+        assert result is None
        mock_perm.assert_called_once_with("sudo")

+        # Verify scope message in adapter feedback
+        adapter = runner.adapters[Platform.TELEGRAM]
+        sent_text = adapter.send.call_args_list[0][0][1]
+        assert "pattern approved permanently" in sent_text
+
    @pytest.mark.asyncio
    async def test_approve_no_pending(self):
        """/approve with no pending approval returns helpful message."""
@ -152,6 +184,40 @@ class TestApproveCommand:
        assert "expired" in result
        assert session_key not in runner._pending_approvals

+    @pytest.mark.asyncio
+    async def test_approve_reinvokes_agent_with_result(self):
+        """After executing, /approve re-invokes the agent with command output."""
+        runner = _make_runner()
+        source = _make_source()
+        session_key = runner._session_key_for_source(source)
+        runner._pending_approvals[session_key] = _make_pending_approval()
+
+        event = _make_event("/approve")
+        mock_handle = AsyncMock(return_value="I continued the task.")
+
+        with (
+            patch("tools.terminal_tool.terminal_tool", return_value="file deleted"),
+            patch.object(runner, "_handle_message", mock_handle),
+        ):
+            await runner._handle_approve_command(event)
+            # Yield to let the background continuation task run.
+            # This works because mocks return immediately (no real await points).
+            await asyncio.sleep(0)
+
+        # Agent was re-invoked via _handle_message with a synthetic event
+        mock_handle.assert_called_once()
+        synthetic_event = mock_handle.call_args[0][0]
+        assert "approved" in synthetic_event.text.lower()
+        assert "file deleted" in synthetic_event.text
+        assert "sudo rm -rf /tmp/test" in synthetic_event.text
+
+        # The continuation response was sent to the user
+        adapter = runner.adapters[Platform.TELEGRAM]
+        # First call: immediate feedback, second call: agent continuation
+        assert adapter.send.call_count == 2
+        continuation_response = adapter.send.call_args_list[1][0][1]
+        assert continuation_response == "I continued the task."
+

 # ------------------------------------------------------------------
 # /deny command
--- a/tests/gateway/test_async_memory_flush.py
+++ b/tests/gateway/test_async_memory_flush.py
@ -3,7 +3,7 @@
 Verifies that:
 1. _is_session_expired() works from a SessionEntry alone (no source needed)
 2. The sync callback is no longer called in get_or_create_session
-3. _pre_flushed_sessions tracking works correctly
+3. memory_flushed flag persists across save/load cycles (prevents restart re-flush)
 4. The background watcher can detect expired sessions
 """

@ -115,8 +115,8 @@ class TestIsSessionExpired:
 class TestGetOrCreateSessionNoCallback:
    """get_or_create_session should NOT call a sync flush callback."""

-    def test_auto_reset_cleans_pre_flushed_marker(self, idle_store):
-        """When a session auto-resets, the pre_flushed marker should be discarded."""
+    def test_auto_reset_creates_new_session_after_flush(self, idle_store):
+        """When a flushed session auto-resets, a new session_id is created."""
        source = SessionSource(
            platform=Platform.TELEGRAM,
            chat_id="123",
@ -127,7 +127,7 @@ class TestGetOrCreateSessionNoCallback:
        old_sid = entry1.session_id

        # Simulate the watcher having flushed it
-        idle_store._pre_flushed_sessions.add(old_sid)
+        entry1.memory_flushed = True

        # Simulate the session going idle
        entry1.updated_at = datetime.now() - timedelta(minutes=120)
@ -137,9 +137,8 @@ class TestGetOrCreateSessionNoCallback:
        entry2 = idle_store.get_or_create_session(source)
        assert entry2.session_id != old_sid
        assert entry2.was_auto_reset is True
-
-        # The old session_id should be removed from pre_flushed
-        assert old_sid not in idle_store._pre_flushed_sessions
+        # New session starts with memory_flushed=False
+        assert entry2.memory_flushed is False

    def test_no_sync_callback_invoked(self, idle_store):
        """No synchronous callback should block during auto-reset."""
@ -160,21 +159,91 @@ class TestGetOrCreateSessionNoCallback:
        assert entry2.was_auto_reset is True


-class TestPreFlushedSessionsTracking:
-    """The _pre_flushed_sessions set should prevent double-flushing."""
+class TestMemoryFlushedFlag:
+    """The memory_flushed flag on SessionEntry prevents double-flushing."""

-    def test_starts_empty(self, idle_store):
-        assert len(idle_store._pre_flushed_sessions) == 0
+    def test_defaults_to_false(self):
+        entry = SessionEntry(
+            session_key="agent:main:telegram:dm:123",
+            session_id="sid_new",
+            created_at=datetime.now(),
+            updated_at=datetime.now(),
+            platform=Platform.TELEGRAM,
+            chat_type="dm",
+        )
+        assert entry.memory_flushed is False

-    def test_add_and_check(self, idle_store):
-        idle_store._pre_flushed_sessions.add("sid_old")
-        assert "sid_old" in idle_store._pre_flushed_sessions
-        assert "sid_other" not in idle_store._pre_flushed_sessions
+    def test_persists_through_save_load(self, idle_store):
+        """memory_flushed=True must survive a save/load cycle (simulates restart)."""
+        key = "agent:main:discord:thread:789"
+        entry = SessionEntry(
+            session_key=key,
+            session_id="sid_flushed",
+            created_at=datetime.now() - timedelta(hours=5),
+            updated_at=datetime.now() - timedelta(hours=5),
+            platform=Platform.DISCORD,
+            chat_type="thread",
+            memory_flushed=True,
+        )
+        idle_store._entries[key] = entry
+        idle_store._save()

-    def test_discard_on_reset(self, idle_store):
-        """discard should remove without raising if not present."""
-        idle_store._pre_flushed_sessions.add("sid_a")
-        idle_store._pre_flushed_sessions.discard("sid_a")
-        assert "sid_a" not in idle_store._pre_flushed_sessions
-        # discard on non-existent should not raise
-        idle_store._pre_flushed_sessions.discard("sid_nonexistent")
+        # Simulate restart: clear in-memory state, reload from disk
+        idle_store._entries.clear()
+        idle_store._loaded = False
+        idle_store._ensure_loaded()
+
+        reloaded = idle_store._entries[key]
+        assert reloaded.memory_flushed is True
+
+    def test_unflushed_entry_survives_restart_as_unflushed(self, idle_store):
+        """An entry without memory_flushed stays False after reload."""
+        key = "agent:main:telegram:dm:456"
+        entry = SessionEntry(
+            session_key=key,
+            session_id="sid_not_flushed",
+            created_at=datetime.now() - timedelta(hours=2),
+            updated_at=datetime.now() - timedelta(hours=2),
+            platform=Platform.TELEGRAM,
+            chat_type="dm",
+        )
+        idle_store._entries[key] = entry
+        idle_store._save()
+
+        idle_store._entries.clear()
+        idle_store._loaded = False
+        idle_store._ensure_loaded()
+
+        reloaded = idle_store._entries[key]
+        assert reloaded.memory_flushed is False
+
+    def test_roundtrip_to_dict_from_dict(self):
+        """to_dict/from_dict must preserve memory_flushed."""
+        entry = SessionEntry(
+            session_key="agent:main:telegram:dm:999",
+            session_id="sid_rt",
+            created_at=datetime.now(),
+            updated_at=datetime.now(),
+            platform=Platform.TELEGRAM,
+            chat_type="dm",
+            memory_flushed=True,
+        )
+        d = entry.to_dict()
+        assert d["memory_flushed"] is True
+
+        restored = SessionEntry.from_dict(d)
+        assert restored.memory_flushed is True
+
+    def test_legacy_entry_without_field_defaults_false(self):
+        """Old sessions.json entries missing memory_flushed should default to False."""
+        data = {
+            "session_key": "agent:main:telegram:dm:legacy",
+            "session_id": "sid_legacy",
+            "created_at": datetime.now().isoformat(),
+            "updated_at": datetime.now().isoformat(),
+            "platform": "telegram",
+            "chat_type": "dm",
+            # no memory_flushed key
+        }
+        entry = SessionEntry.from_dict(data)
+        assert entry.memory_flushed is False
--- a/tests/gateway/test_discord_reactions.py
+++ b/tests/gateway/test_discord_reactions.py
@ -168,3 +168,67 @@ async def test_reaction_helper_failures_do_not_break_message_flow(adapter):
    await adapter._process_message_background(event, build_session_key(event.source))

    adapter.send.assert_awaited_once()
+
+
+@pytest.mark.asyncio
+async def test_reactions_disabled_via_env(adapter, monkeypatch):
+    """When DISCORD_REACTIONS=false, no reactions should be added."""
+    monkeypatch.setenv("DISCORD_REACTIONS", "false")
+
+    raw_message = SimpleNamespace(
+        add_reaction=AsyncMock(),
+        remove_reaction=AsyncMock(),
+    )
+
+    async def handler(_event):
+        await asyncio.sleep(0)
+        return "ack"
+
+    async def hold_typing(_chat_id, interval=2.0, metadata=None):
+        await asyncio.Event().wait()
+
+    adapter.set_message_handler(handler)
+    adapter.send = AsyncMock(return_value=SendResult(success=True, message_id="999"))
+    adapter._keep_typing = hold_typing
+
+    event = _make_event("4", raw_message)
+    await adapter._process_message_background(event, build_session_key(event.source))
+
+    raw_message.add_reaction.assert_not_awaited()
+    raw_message.remove_reaction.assert_not_awaited()
+    # Response should still be sent
+    adapter.send.assert_awaited_once()
+
+
+@pytest.mark.asyncio
+async def test_reactions_disabled_via_env_zero(adapter, monkeypatch):
+    """DISCORD_REACTIONS=0 should also disable reactions."""
+    monkeypatch.setenv("DISCORD_REACTIONS", "0")
+
+    raw_message = SimpleNamespace(
+        add_reaction=AsyncMock(),
+        remove_reaction=AsyncMock(),
+    )
+
+    event = _make_event("5", raw_message)
+    await adapter.on_processing_start(event)
+    await adapter.on_processing_complete(event, success=True)
+
+    raw_message.add_reaction.assert_not_awaited()
+    raw_message.remove_reaction.assert_not_awaited()
+
+
+@pytest.mark.asyncio
+async def test_reactions_enabled_by_default(adapter, monkeypatch):
+    """When DISCORD_REACTIONS is unset, reactions should still work (default: true)."""
+    monkeypatch.delenv("DISCORD_REACTIONS", raising=False)
+
+    raw_message = SimpleNamespace(
+        add_reaction=AsyncMock(),
+        remove_reaction=AsyncMock(),
+    )
+
+    event = _make_event("6", raw_message)
+    await adapter.on_processing_start(event)
+
+    raw_message.add_reaction.assert_awaited_once_with("👀")
--- a/tests/gateway/test_matrix.py
+++ b/tests/gateway/test_matrix.py
@ -643,3 +643,353 @@ class TestMatrixEncryptedSendFallback:
        assert fake_client.room_send.await_count == 2
        second_call = fake_client.room_send.await_args_list[1]
        assert second_call.kwargs.get("ignore_unverified_devices") is True
+
+
+# ---------------------------------------------------------------------------
+# E2EE: Auto-trust devices
+# ---------------------------------------------------------------------------
+
+class TestMatrixAutoTrustDevices:
+    def test_auto_trust_verifies_unverified_devices(self):
+        adapter = _make_adapter()
+
+        # DeviceStore.__iter__ yields OlmDevice objects directly.
+        device_a = MagicMock()
+        device_a.device_id = "DEVICE_A"
+        device_a.verified = False
+        device_b = MagicMock()
+        device_b.device_id = "DEVICE_B"
+        device_b.verified = True  # already trusted
+        device_c = MagicMock()
+        device_c.device_id = "DEVICE_C"
+        device_c.verified = False
+
+        fake_client = MagicMock()
+        fake_client.device_id = "OWN_DEVICE"
+        fake_client.verify_device = MagicMock()
+
+        # Simulate DeviceStore iteration (yields OlmDevice objects)
+        fake_client.device_store = MagicMock()
+        fake_client.device_store.__iter__ = MagicMock(
+            return_value=iter([device_a, device_b, device_c])
+        )
+
+        adapter._client = fake_client
+        adapter._auto_trust_devices()
+
+        # Should have verified device_a and device_c (not device_b, already verified)
+        assert fake_client.verify_device.call_count == 2
+        verified_devices = [call.args[0] for call in fake_client.verify_device.call_args_list]
+        assert device_a in verified_devices
+        assert device_c in verified_devices
+        assert device_b not in verified_devices
+
+    def test_auto_trust_skips_own_device(self):
+        adapter = _make_adapter()
+
+        own_device = MagicMock()
+        own_device.device_id = "MY_DEVICE"
+        own_device.verified = False
+
+        fake_client = MagicMock()
+        fake_client.device_id = "MY_DEVICE"
+        fake_client.verify_device = MagicMock()
+
+        fake_client.device_store = MagicMock()
+        fake_client.device_store.__iter__ = MagicMock(
+            return_value=iter([own_device])
+        )
+
+        adapter._client = fake_client
+        adapter._auto_trust_devices()
+
+        fake_client.verify_device.assert_not_called()
+
+    def test_auto_trust_handles_missing_device_store(self):
+        adapter = _make_adapter()
+        fake_client = MagicMock(spec=[])  # empty spec — no attributes
+        adapter._client = fake_client
+        # Should not raise
+        adapter._auto_trust_devices()
+
+
+# ---------------------------------------------------------------------------
+# E2EE: MegolmEvent key request + buffering
+# ---------------------------------------------------------------------------
+
+class TestMatrixMegolmEventHandling:
+    @pytest.mark.asyncio
+    async def test_megolm_event_requests_room_key_and_buffers(self):
+        adapter = _make_adapter()
+        adapter._user_id = "@bot:example.org"
+        adapter._startup_ts = 0.0
+        adapter._dm_rooms = {}
+
+        fake_megolm = MagicMock()
+        fake_megolm.sender = "@alice:example.org"
+        fake_megolm.event_id = "$encrypted_event"
+        fake_megolm.server_timestamp = 9999999999000  # future
+        fake_megolm.session_id = "SESSION123"
+
+        fake_room = MagicMock()
+        fake_room.room_id = "!room:example.org"
+
+        fake_client = MagicMock()
+        fake_client.request_room_key = AsyncMock(return_value=MagicMock())
+        adapter._client = fake_client
+
+        # Create a MegolmEvent class for isinstance check
+        fake_nio = MagicMock()
+        FakeMegolmEvent = type("MegolmEvent", (), {})
+        fake_megolm.__class__ = FakeMegolmEvent
+        fake_nio.MegolmEvent = FakeMegolmEvent
+
+        with patch.dict("sys.modules", {"nio": fake_nio}):
+            await adapter._on_room_message(fake_room, fake_megolm)
+
+        # Should have requested the room key
+        fake_client.request_room_key.assert_awaited_once_with(fake_megolm)
+
+        # Should have buffered the event
+        assert len(adapter._pending_megolm) == 1
+        room, event, ts = adapter._pending_megolm[0]
+        assert room is fake_room
+        assert event is fake_megolm
+
+    @pytest.mark.asyncio
+    async def test_megolm_buffer_capped(self):
+        adapter = _make_adapter()
+        adapter._user_id = "@bot:example.org"
+        adapter._startup_ts = 0.0
+        adapter._dm_rooms = {}
+
+        fake_client = MagicMock()
+        fake_client.request_room_key = AsyncMock(return_value=MagicMock())
+        adapter._client = fake_client
+
+        FakeMegolmEvent = type("MegolmEvent", (), {})
+        fake_nio = MagicMock()
+        fake_nio.MegolmEvent = FakeMegolmEvent
+
+        # Fill the buffer past max
+        from gateway.platforms.matrix import _MAX_PENDING_EVENTS
+        with patch.dict("sys.modules", {"nio": fake_nio}):
+            for i in range(_MAX_PENDING_EVENTS + 10):
+                evt = MagicMock()
+                evt.__class__ = FakeMegolmEvent
+                evt.sender = "@alice:example.org"
+                evt.event_id = f"$event_{i}"
+                evt.server_timestamp = 9999999999000
+                evt.session_id = f"SESSION_{i}"
+                room = MagicMock()
+                room.room_id = "!room:example.org"
+                await adapter._on_room_message(room, evt)
+
+        assert len(adapter._pending_megolm) == _MAX_PENDING_EVENTS
+
+
+# ---------------------------------------------------------------------------
+# E2EE: Retry pending decryptions
+# ---------------------------------------------------------------------------
+
+class TestMatrixRetryPendingDecryptions:
+    @pytest.mark.asyncio
+    async def test_successful_decryption_routes_to_text_handler(self):
+        import time as _time
+
+        adapter = _make_adapter()
+        adapter._user_id = "@bot:example.org"
+        adapter._startup_ts = 0.0
+        adapter._dm_rooms = {}
+
+        # Create types
+        FakeMegolmEvent = type("MegolmEvent", (), {})
+        FakeRoomMessageText = type("RoomMessageText", (), {})
+
+        decrypted_event = MagicMock()
+        decrypted_event.__class__ = FakeRoomMessageText
+
+        fake_megolm = MagicMock()
+        fake_megolm.__class__ = FakeMegolmEvent
+        fake_megolm.event_id = "$encrypted"
+
+        fake_room = MagicMock()
+        now = _time.time()
+
+        adapter._pending_megolm = [(fake_room, fake_megolm, now)]
+
+        fake_client = MagicMock()
+        fake_client.decrypt_event = MagicMock(return_value=decrypted_event)
+        adapter._client = fake_client
+
+        fake_nio = MagicMock()
+        fake_nio.MegolmEvent = FakeMegolmEvent
+        fake_nio.RoomMessageText = FakeRoomMessageText
+        fake_nio.RoomMessageImage = type("RoomMessageImage", (), {})
+        fake_nio.RoomMessageAudio = type("RoomMessageAudio", (), {})
+        fake_nio.RoomMessageVideo = type("RoomMessageVideo", (), {})
+        fake_nio.RoomMessageFile = type("RoomMessageFile", (), {})
+
+        with patch.dict("sys.modules", {"nio": fake_nio}):
+            with patch.object(adapter, "_on_room_message", AsyncMock()) as mock_handler:
+                await adapter._retry_pending_decryptions()
+                mock_handler.assert_awaited_once_with(fake_room, decrypted_event)
+
+        # Buffer should be empty now
+        assert len(adapter._pending_megolm) == 0
+
+    @pytest.mark.asyncio
+    async def test_still_undecryptable_stays_in_buffer(self):
+        import time as _time
+
+        adapter = _make_adapter()
+
+        FakeMegolmEvent = type("MegolmEvent", (), {})
+
+        fake_megolm = MagicMock()
+        fake_megolm.__class__ = FakeMegolmEvent
+        fake_megolm.event_id = "$still_encrypted"
+
+        now = _time.time()
+        adapter._pending_megolm = [(MagicMock(), fake_megolm, now)]
+
+        fake_client = MagicMock()
+        # decrypt_event raises when key is still missing
+        fake_client.decrypt_event = MagicMock(side_effect=Exception("missing key"))
+        adapter._client = fake_client
+
+        fake_nio = MagicMock()
+        fake_nio.MegolmEvent = FakeMegolmEvent
+
+        with patch.dict("sys.modules", {"nio": fake_nio}):
+            await adapter._retry_pending_decryptions()
+
+        assert len(adapter._pending_megolm) == 1
+
+    @pytest.mark.asyncio
+    async def test_expired_events_dropped(self):
+        import time as _time
+
+        adapter = _make_adapter()
+
+        from gateway.platforms.matrix import _PENDING_EVENT_TTL
+
+        fake_megolm = MagicMock()
+        fake_megolm.event_id = "$old_event"
+        fake_megolm.__class__ = type("MegolmEvent", (), {})
+
+        # Timestamp well past TTL
+        old_ts = _time.time() - _PENDING_EVENT_TTL - 60
+        adapter._pending_megolm = [(MagicMock(), fake_megolm, old_ts)]
+
+        fake_client = MagicMock()
+        adapter._client = fake_client
+
+        fake_nio = MagicMock()
+        fake_nio.MegolmEvent = type("MegolmEvent", (), {})
+
+        with patch.dict("sys.modules", {"nio": fake_nio}):
+            await adapter._retry_pending_decryptions()
+
+        # Should have been dropped
+        assert len(adapter._pending_megolm) == 0
+        # Should NOT have tried to decrypt
+        fake_client.decrypt_event.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_media_event_routes_to_media_handler(self):
+        import time as _time
+
+        adapter = _make_adapter()
+        adapter._user_id = "@bot:example.org"
+        adapter._startup_ts = 0.0
+
+        FakeMegolmEvent = type("MegolmEvent", (), {})
+        FakeRoomMessageImage = type("RoomMessageImage", (), {})
+
+        decrypted_image = MagicMock()
+        decrypted_image.__class__ = FakeRoomMessageImage
+
+        fake_megolm = MagicMock()
+        fake_megolm.__class__ = FakeMegolmEvent
+        fake_megolm.event_id = "$encrypted_image"
+
+        fake_room = MagicMock()
+        now = _time.time()
+        adapter._pending_megolm = [(fake_room, fake_megolm, now)]
+
+        fake_client = MagicMock()
+        fake_client.decrypt_event = MagicMock(return_value=decrypted_image)
+        adapter._client = fake_client
+
+        fake_nio = MagicMock()
+        fake_nio.MegolmEvent = FakeMegolmEvent
+        fake_nio.RoomMessageText = type("RoomMessageText", (), {})
+        fake_nio.RoomMessageImage = FakeRoomMessageImage
+        fake_nio.RoomMessageAudio = type("RoomMessageAudio", (), {})
+        fake_nio.RoomMessageVideo = type("RoomMessageVideo", (), {})
+        fake_nio.RoomMessageFile = type("RoomMessageFile", (), {})
+
+        with patch.dict("sys.modules", {"nio": fake_nio}):
+            with patch.object(adapter, "_on_room_message_media", AsyncMock()) as mock_media:
+                await adapter._retry_pending_decryptions()
+                mock_media.assert_awaited_once_with(fake_room, decrypted_image)
+
+        assert len(adapter._pending_megolm) == 0
+
+
+# ---------------------------------------------------------------------------
+# E2EE: Key export / import
+# ---------------------------------------------------------------------------
+
+class TestMatrixKeyExportImport:
+    @pytest.mark.asyncio
+    async def test_disconnect_exports_keys(self):
+        adapter = _make_adapter()
+        adapter._encryption = True
+        adapter._sync_task = None
+
+        fake_client = MagicMock()
+        fake_client.olm = object()
+        fake_client.export_keys = AsyncMock()
+        fake_client.close = AsyncMock()
+        adapter._client = fake_client
+
+        from gateway.platforms.matrix import _KEY_EXPORT_FILE, _KEY_EXPORT_PASSPHRASE
+
+        await adapter.disconnect()
+
+        fake_client.export_keys.assert_awaited_once_with(
+            str(_KEY_EXPORT_FILE), _KEY_EXPORT_PASSPHRASE,
+        )
+
+    @pytest.mark.asyncio
+    async def test_disconnect_handles_export_failure(self):
+        adapter = _make_adapter()
+        adapter._encryption = True
+        adapter._sync_task = None
+
+        fake_client = MagicMock()
+        fake_client.olm = object()
+        fake_client.export_keys = AsyncMock(side_effect=Exception("export failed"))
+        fake_client.close = AsyncMock()
+        adapter._client = fake_client
+
+        # Should not raise
+        await adapter.disconnect()
+        assert adapter._client is None  # still cleaned up
+
+    @pytest.mark.asyncio
+    async def test_disconnect_skips_export_when_no_encryption(self):
+        adapter = _make_adapter()
+        adapter._encryption = False
+        adapter._sync_task = None
+
+        fake_client = MagicMock()
+        fake_client.close = AsyncMock()
+        adapter._client = fake_client
+
+        await adapter.disconnect()
+        # Should not have tried to export
+        assert not hasattr(fake_client, "export_keys") or \
+               not fake_client.export_keys.called
--- a/tests/gateway/test_session_hygiene.py
+++ b/tests/gateway/test_session_hygiene.py
@ -212,47 +212,7 @@ class TestSessionHygieneWarnThreshold:
        assert post_compress_tokens < warn_threshold


-class TestCompressionWarnRateLimit:
-    """Compression warning messages must be rate-limited per chat_id."""

-    def _make_runner(self):
-        from unittest.mock import MagicMock, patch
-        with patch("gateway.run.load_gateway_config"), \
-             patch("gateway.run.SessionStore"), \
-             patch("gateway.run.DeliveryRouter"):
-            from gateway.run import GatewayRunner
-            runner = GatewayRunner.__new__(GatewayRunner)
-            runner._compression_warn_sent = {}
-            runner._compression_warn_cooldown = 3600
-            return runner
-
-    def test_first_warn_is_sent(self):
-        runner = self._make_runner()
-        now = 1_000_000.0
-        last = runner._compression_warn_sent.get("chat:1", 0)
-        assert now - last >= runner._compression_warn_cooldown
-
-    def test_second_warn_suppressed_within_cooldown(self):
-        runner = self._make_runner()
-        now = 1_000_000.0
-        runner._compression_warn_sent["chat:1"] = now - 60  # 1 minute ago
-        last = runner._compression_warn_sent.get("chat:1", 0)
-        assert now - last < runner._compression_warn_cooldown
-
-    def test_warn_allowed_after_cooldown(self):
-        runner = self._make_runner()
-        now = 1_000_000.0
-        runner._compression_warn_sent["chat:1"] = now - 3601  # just past cooldown
-        last = runner._compression_warn_sent.get("chat:1", 0)
-        assert now - last >= runner._compression_warn_cooldown
-
-    def test_rate_limit_is_per_chat(self):
-        """Rate-limiting one chat must not suppress warnings for another."""
-        runner = self._make_runner()
-        now = 1_000_000.0
-        runner._compression_warn_sent["chat:1"] = now - 60  # suppressed
-        last_other = runner._compression_warn_sent.get("chat:2", 0)
-        assert now - last_other >= runner._compression_warn_cooldown


 class TestEstimatedTokenThreshold:
@ -421,10 +381,6 @@ async def test_session_hygiene_messages_stay_in_originating_topic(monkeypatch, t
    result = await runner._handle_message(event)

    assert result == "ok"
-    assert len(adapter.sent) == 2
-    assert adapter.sent[0]["chat_id"] == "-1001"
-    assert "Session is large" in adapter.sent[0]["content"]
-    assert adapter.sent[0]["metadata"] == {"thread_id": "17585"}
-    assert adapter.sent[1]["chat_id"] == "-1001"
-    assert "Compressed:" in adapter.sent[1]["content"]
-    assert adapter.sent[1]["metadata"] == {"thread_id": "17585"}
+    # Compression warnings are no longer sent to users — compression
+    # happens silently with server-side logging only.
+    assert len(adapter.sent) == 0
--- a/tests/gateway/test_unauthorized_dm_behavior.py
+++ b/tests/gateway/test_unauthorized_dm_behavior.py
@ -90,6 +90,46 @@ def test_whatsapp_lid_user_matches_phone_allowlist_via_session_mapping(monkeypat
    assert runner._is_user_authorized(source) is True


+def test_star_wildcard_in_allowlist_authorizes_any_user(monkeypatch):
+    """WHATSAPP_ALLOWED_USERS=* should act as allow-all wildcard."""
+    _clear_auth_env(monkeypatch)
+    monkeypatch.setenv("WHATSAPP_ALLOWED_USERS", "*")
+
+    runner, _adapter = _make_runner(
+        Platform.WHATSAPP,
+        GatewayConfig(platforms={Platform.WHATSAPP: PlatformConfig(enabled=True)}),
+    )
+
+    source = SessionSource(
+        platform=Platform.WHATSAPP,
+        user_id="99998887776@s.whatsapp.net",
+        chat_id="99998887776@s.whatsapp.net",
+        user_name="stranger",
+        chat_type="dm",
+    )
+    assert runner._is_user_authorized(source) is True
+
+
+def test_star_wildcard_works_for_any_platform(monkeypatch):
+    """The * wildcard should work generically, not just for WhatsApp."""
+    _clear_auth_env(monkeypatch)
+    monkeypatch.setenv("TELEGRAM_ALLOWED_USERS", "*")
+
+    runner, _adapter = _make_runner(
+        Platform.TELEGRAM,
+        GatewayConfig(platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="t")}),
+    )
+
+    source = SessionSource(
+        platform=Platform.TELEGRAM,
+        user_id="123456789",
+        chat_id="123456789",
+        user_name="stranger",
+        chat_type="dm",
+    )
+    assert runner._is_user_authorized(source) is True
+
+
@pytest.mark.asyncio
 async def test_unauthorized_dm_pairs_by_default(monkeypatch):
    _clear_auth_env(monkeypatch)
--- a/tests/gateway/test_update_command.py
+++ b/tests/gateway/test_update_command.py
@ -45,6 +45,17 @@ def _make_runner():
 class TestHandleUpdateCommand:
    """Tests for GatewayRunner._handle_update_command."""

+    @pytest.mark.asyncio
+    async def test_managed_install_returns_package_manager_guidance(self, monkeypatch):
+        runner = _make_runner()
+        event = _make_event()
+        monkeypatch.setenv("HERMES_MANAGED", "homebrew")
+
+        result = await runner._handle_update_command(event)
+
+        assert "managed by Homebrew" in result
+        assert "brew upgrade hermes-agent" in result
+
    @pytest.mark.asyncio
    async def test_no_git_directory(self, tmp_path):
        """Returns an error when .git does not exist."""
@ -191,7 +202,7 @@ class TestHandleUpdateCommand:

        with patch("gateway.run._hermes_home", hermes_home), \
             patch("gateway.run.__file__", fake_file), \
-             patch("shutil.which", side_effect=lambda x: "/usr/bin/hermes" if x == "hermes" else "/usr/bin/systemd-run"), \
+             patch("shutil.which", side_effect=lambda x: "/usr/bin/hermes" if x == "hermes" else "/usr/bin/setsid"), \
             patch("subprocess.Popen"):
            result = await runner._handle_update_command(event)

@ -204,8 +215,8 @@ class TestHandleUpdateCommand:
        assert not (hermes_home / ".update_exit_code").exists()

    @pytest.mark.asyncio
-    async def test_spawns_systemd_run(self, tmp_path):
-        """Uses systemd-run when available."""
+    async def test_spawns_setsid(self, tmp_path):
+        """Uses setsid when available."""
        runner = _make_runner()
        event = _make_event()

@ -225,16 +236,16 @@ class TestHandleUpdateCommand:
             patch("subprocess.Popen", mock_popen):
            result = await runner._handle_update_command(event)

-        # Verify systemd-run was used
+        # Verify setsid was used
        call_args = mock_popen.call_args[0][0]
-        assert call_args[0] == "/usr/bin/systemd-run"
-        assert "--scope" in call_args
+        assert call_args[0] == "/usr/bin/setsid"
+        assert call_args[1] == "bash"
        assert ".update_exit_code" in call_args[-1]
        assert "Starting Hermes update" in result

    @pytest.mark.asyncio
-    async def test_fallback_nohup_when_no_systemd_run(self, tmp_path):
-        """Falls back to nohup when systemd-run is not available."""
+    async def test_fallback_when_no_setsid(self, tmp_path):
+        """Falls back to start_new_session=True when setsid is not available."""
        runner = _make_runner()
        event = _make_event()

@ -249,24 +260,27 @@ class TestHandleUpdateCommand:

        mock_popen = MagicMock()

-        def which_no_systemd(x):
+        def which_no_setsid(x):
            if x == "hermes":
                return "/usr/bin/hermes"
-            if x == "systemd-run":
+            if x == "setsid":
                return None
            return None

        with patch("gateway.run._hermes_home", hermes_home), \
             patch("gateway.run.__file__", fake_file), \
-             patch("shutil.which", side_effect=which_no_systemd), \
+             patch("shutil.which", side_effect=which_no_setsid), \
             patch("subprocess.Popen", mock_popen):
            result = await runner._handle_update_command(event)

-        # Verify bash -c nohup fallback was used
+        # Verify plain bash -c fallback (no nohup, no setsid)
        call_args = mock_popen.call_args[0][0]
        assert call_args[0] == "bash"
-        assert "nohup" in call_args[2]
+        assert "nohup" not in call_args[2]
        assert ".update_exit_code" in call_args[2]
+        # start_new_session=True should be in kwargs
+        call_kwargs = mock_popen.call_args[1]
+        assert call_kwargs.get("start_new_session") is True
        assert "Starting Hermes update" in result

    @pytest.mark.asyncio
--- a/tests/hermes_cli/test_claw.py
+++ b/tests/hermes_cli/test_claw.py
@ -40,6 +40,119 @@ class TestFindMigrationScript:
            assert claw_mod._find_migration_script() is None


+# ---------------------------------------------------------------------------
+# _find_openclaw_dirs
+# ---------------------------------------------------------------------------
+
+
+class TestFindOpenclawDirs:
+    """Test discovery of OpenClaw directories."""
+
+    def test_finds_openclaw_dir(self, tmp_path):
+        openclaw = tmp_path / ".openclaw"
+        openclaw.mkdir()
+        with patch("pathlib.Path.home", return_value=tmp_path):
+            found = claw_mod._find_openclaw_dirs()
+        assert openclaw in found
+
+    def test_finds_legacy_dirs(self, tmp_path):
+        clawdbot = tmp_path / ".clawdbot"
+        clawdbot.mkdir()
+        moldbot = tmp_path / ".moldbot"
+        moldbot.mkdir()
+        with patch("pathlib.Path.home", return_value=tmp_path):
+            found = claw_mod._find_openclaw_dirs()
+        assert len(found) == 2
+        assert clawdbot in found
+        assert moldbot in found
+
+    def test_returns_empty_when_none_exist(self, tmp_path):
+        with patch("pathlib.Path.home", return_value=tmp_path):
+            found = claw_mod._find_openclaw_dirs()
+        assert found == []
+
+
+# ---------------------------------------------------------------------------
+# _scan_workspace_state
+# ---------------------------------------------------------------------------
+
+
+class TestScanWorkspaceState:
+    """Test scanning for workspace state files."""
+
+    def test_finds_root_state_files(self, tmp_path):
+        (tmp_path / "todo.json").write_text("{}")
+        (tmp_path / "sessions").mkdir()
+        findings = claw_mod._scan_workspace_state(tmp_path)
+        descs = [desc for _, desc in findings]
+        assert any("todo.json" in d for d in descs)
+        assert any("sessions" in d for d in descs)
+
+    def test_finds_workspace_state_files(self, tmp_path):
+        ws = tmp_path / "workspace"
+        ws.mkdir()
+        (ws / "todo.json").write_text("{}")
+        (ws / "sessions").mkdir()
+        findings = claw_mod._scan_workspace_state(tmp_path)
+        descs = [desc for _, desc in findings]
+        assert any("workspace/todo.json" in d for d in descs)
+        assert any("workspace/sessions" in d for d in descs)
+
+    def test_ignores_hidden_dirs(self, tmp_path):
+        scan_dir = tmp_path / "scan_target"
+        scan_dir.mkdir()
+        hidden = scan_dir / ".git"
+        hidden.mkdir()
+        (hidden / "todo.json").write_text("{}")
+        findings = claw_mod._scan_workspace_state(scan_dir)
+        assert len(findings) == 0
+
+    def test_empty_dir_returns_empty(self, tmp_path):
+        scan_dir = tmp_path / "scan_target"
+        scan_dir.mkdir()
+        findings = claw_mod._scan_workspace_state(scan_dir)
+        assert findings == []
+
+
+# ---------------------------------------------------------------------------
+# _archive_directory
+# ---------------------------------------------------------------------------
+
+
+class TestArchiveDirectory:
+    """Test directory archival (rename)."""
+
+    def test_renames_to_pre_migration(self, tmp_path):
+        source = tmp_path / ".openclaw"
+        source.mkdir()
+        (source / "test.txt").write_text("data")
+
+        archive_path = claw_mod._archive_directory(source)
+        assert archive_path == tmp_path / ".openclaw.pre-migration"
+        assert archive_path.is_dir()
+        assert not source.exists()
+        assert (archive_path / "test.txt").read_text() == "data"
+
+    def test_adds_timestamp_when_archive_exists(self, tmp_path):
+        source = tmp_path / ".openclaw"
+        source.mkdir()
+        # Pre-existing archive
+        (tmp_path / ".openclaw.pre-migration").mkdir()
+
+        archive_path = claw_mod._archive_directory(source)
+        assert ".pre-migration-" in archive_path.name
+        assert archive_path.is_dir()
+        assert not source.exists()
+
+    def test_dry_run_does_not_rename(self, tmp_path):
+        source = tmp_path / ".openclaw"
+        source.mkdir()
+
+        archive_path = claw_mod._archive_directory(source, dry_run=True)
+        assert archive_path == tmp_path / ".openclaw.pre-migration"
+        assert source.is_dir()  # Still exists
+
+
 # ---------------------------------------------------------------------------
 # claw_command routing
 # ---------------------------------------------------------------------------
@ -56,11 +169,24 @@ class TestClawCommand:
            claw_mod.claw_command(args)
        mock.assert_called_once_with(args)

+    def test_routes_to_cleanup(self):
+        args = Namespace(claw_action="cleanup", source=None, dry_run=False, yes=False)
+        with patch.object(claw_mod, "_cmd_cleanup") as mock:
+            claw_mod.claw_command(args)
+        mock.assert_called_once_with(args)
+
+    def test_routes_clean_alias(self):
+        args = Namespace(claw_action="clean", source=None, dry_run=False, yes=False)
+        with patch.object(claw_mod, "_cmd_cleanup") as mock:
+            claw_mod.claw_command(args)
+        mock.assert_called_once_with(args)
+
    def test_shows_help_for_no_action(self, capsys):
        args = Namespace(claw_action=None)
        claw_mod.claw_command(args)
        captured = capsys.readouterr()
        assert "migrate" in captured.out
+        assert "cleanup" in captured.out


 # ---------------------------------------------------------------------------
@ -168,6 +294,7 @@ class TestCmdMigrate:
            patch.object(claw_mod, "_load_migration_module", return_value=fake_mod),
            patch.object(claw_mod, "get_config_path", return_value=config_path),
            patch.object(claw_mod, "prompt_yes_no", return_value=True),
+            patch.object(claw_mod, "_offer_source_archival"),
        ):
            claw_mod._cmd_migrate(args)

@ -175,6 +302,75 @@ class TestCmdMigrate:
        assert "Migration Results" in captured.out
        assert "Migration complete!" in captured.out

+    def test_execute_offers_archival_on_success(self, tmp_path, capsys):
+        """After successful migration, _offer_source_archival should be called."""
+        openclaw_dir = tmp_path / ".openclaw"
+        openclaw_dir.mkdir()
+
+        fake_mod = ModuleType("openclaw_to_hermes")
+        fake_mod.resolve_selected_options = MagicMock(return_value={"soul"})
+        fake_migrator = MagicMock()
+        fake_migrator.migrate.return_value = {
+            "summary": {"migrated": 3, "skipped": 0, "conflict": 0, "error": 0},
+            "items": [
+                {"kind": "soul", "status": "migrated", "destination": str(tmp_path / "SOUL.md")},
+            ],
+        }
+        fake_mod.Migrator = MagicMock(return_value=fake_migrator)
+
+        args = Namespace(
+            source=str(openclaw_dir),
+            dry_run=False, preset="full", overwrite=False,
+            migrate_secrets=False, workspace_target=None,
+            skill_conflict="skip", yes=True,
+        )
+
+        with (
+            patch.object(claw_mod, "_find_migration_script", return_value=tmp_path / "s.py"),
+            patch.object(claw_mod, "_load_migration_module", return_value=fake_mod),
+            patch.object(claw_mod, "get_config_path", return_value=tmp_path / "config.yaml"),
+            patch.object(claw_mod, "save_config"),
+            patch.object(claw_mod, "load_config", return_value={}),
+            patch.object(claw_mod, "_offer_source_archival") as mock_archival,
+        ):
+            claw_mod._cmd_migrate(args)
+
+        mock_archival.assert_called_once_with(openclaw_dir, True)
+
+    def test_dry_run_skips_archival(self, tmp_path, capsys):
+        """Dry run should not offer archival."""
+        openclaw_dir = tmp_path / ".openclaw"
+        openclaw_dir.mkdir()
+
+        fake_mod = ModuleType("openclaw_to_hermes")
+        fake_mod.resolve_selected_options = MagicMock(return_value=set())
+        fake_migrator = MagicMock()
+        fake_migrator.migrate.return_value = {
+            "summary": {"migrated": 2, "skipped": 0, "conflict": 0, "error": 0},
+            "items": [],
+            "preset": "full",
+        }
+        fake_mod.Migrator = MagicMock(return_value=fake_migrator)
+
+        args = Namespace(
+            source=str(openclaw_dir),
+            dry_run=True, preset="full", overwrite=False,
+            migrate_secrets=False, workspace_target=None,
+            skill_conflict="skip", yes=False,
+        )
+
+        with (
+            patch.object(claw_mod, "_find_migration_script", return_value=tmp_path / "s.py"),
+            patch.object(claw_mod, "_load_migration_module", return_value=fake_mod),
+            patch.object(claw_mod, "get_config_path", return_value=tmp_path / "config.yaml"),
+            patch.object(claw_mod, "save_config"),
+            patch.object(claw_mod, "load_config", return_value={}),
+            patch.object(claw_mod, "_offer_source_archival") as mock_archival,
+        ):
+            claw_mod._cmd_migrate(args)
+
+        mock_archival.assert_not_called()
+
    def test_execute_cancelled_by_user(self, tmp_path, capsys):
        openclaw_dir = tmp_path / ".openclaw"
        openclaw_dir.mkdir()
@ -290,6 +486,172 @@ class TestCmdMigrate:
        assert call_kwargs["migrate_secrets"] is True


+# ---------------------------------------------------------------------------
+# _offer_source_archival
+# ---------------------------------------------------------------------------
+
+
+class TestOfferSourceArchival:
+    """Test the post-migration archival offer."""
+
+    def test_archives_with_auto_yes(self, tmp_path, capsys):
+        source = tmp_path / ".openclaw"
+        source.mkdir()
+        (source / "workspace").mkdir()
+        (source / "workspace" / "todo.json").write_text("{}")
+
+        claw_mod._offer_source_archival(source, auto_yes=True)
+
+        captured = capsys.readouterr()
+        assert "Archived" in captured.out
+        assert not source.exists()
+        assert (tmp_path / ".openclaw.pre-migration").is_dir()
+
+    def test_skips_when_user_declines(self, tmp_path, capsys):
+        source = tmp_path / ".openclaw"
+        source.mkdir()
+
+        with patch.object(claw_mod, "prompt_yes_no", return_value=False):
+            claw_mod._offer_source_archival(source, auto_yes=False)
+
+        captured = capsys.readouterr()
+        assert "Skipped" in captured.out
+        assert source.is_dir()  # Still exists
+
+    def test_noop_when_source_missing(self, tmp_path, capsys):
+        claw_mod._offer_source_archival(tmp_path / "nonexistent", auto_yes=True)
+        captured = capsys.readouterr()
+        assert captured.out == ""  # No output
+
+    def test_shows_state_files(self, tmp_path, capsys):
+        source = tmp_path / ".openclaw"
+        source.mkdir()
+        ws = source / "workspace"
+        ws.mkdir()
+        (ws / "todo.json").write_text("{}")
+
+        with patch.object(claw_mod, "prompt_yes_no", return_value=False):
+            claw_mod._offer_source_archival(source, auto_yes=False)
+
+        captured = capsys.readouterr()
+        assert "todo.json" in captured.out
+
+    def test_handles_archive_error(self, tmp_path, capsys):
+        source = tmp_path / ".openclaw"
+        source.mkdir()
+
+        with patch.object(claw_mod, "_archive_directory", side_effect=OSError("permission denied")):
+            claw_mod._offer_source_archival(source, auto_yes=True)
+
+        captured = capsys.readouterr()
+        assert "Could not archive" in captured.out
+
+
+# ---------------------------------------------------------------------------
+# _cmd_cleanup
+# ---------------------------------------------------------------------------
+
+
+class TestCmdCleanup:
+    """Test the cleanup command handler."""
+
+    def test_no_dirs_found(self, tmp_path, capsys):
+        args = Namespace(source=None, dry_run=False, yes=False)
+        with patch.object(claw_mod, "_find_openclaw_dirs", return_value=[]):
+            claw_mod._cmd_cleanup(args)
+        captured = capsys.readouterr()
+        assert "No OpenClaw directories found" in captured.out
+
+    def test_dry_run_lists_dirs(self, tmp_path, capsys):
+        openclaw = tmp_path / ".openclaw"
+        openclaw.mkdir()
+        ws = openclaw / "workspace"
+        ws.mkdir()
+        (ws / "todo.json").write_text("{}")
+
+        args = Namespace(source=None, dry_run=True, yes=False)
+        with patch.object(claw_mod, "_find_openclaw_dirs", return_value=[openclaw]):
+            claw_mod._cmd_cleanup(args)
+
+        captured = capsys.readouterr()
+        assert "Would archive" in captured.out
+        assert openclaw.is_dir()  # Not actually archived
+
+    def test_archives_with_yes(self, tmp_path, capsys):
+        openclaw = tmp_path / ".openclaw"
+        openclaw.mkdir()
+        (openclaw / "workspace").mkdir()
+        (openclaw / "workspace" / "todo.json").write_text("{}")
+
+        args = Namespace(source=None, dry_run=False, yes=True)
+        with patch.object(claw_mod, "_find_openclaw_dirs", return_value=[openclaw]):
+            claw_mod._cmd_cleanup(args)
+
+        captured = capsys.readouterr()
+        assert "Archived" in captured.out
+        assert "Cleaned up 1" in captured.out
+        assert not openclaw.exists()
+        assert (tmp_path / ".openclaw.pre-migration").is_dir()
+
+    def test_skips_when_user_declines(self, tmp_path, capsys):
+        openclaw = tmp_path / ".openclaw"
+        openclaw.mkdir()
+
+        args = Namespace(source=None, dry_run=False, yes=False)
+        with (
+            patch.object(claw_mod, "_find_openclaw_dirs", return_value=[openclaw]),
+            patch.object(claw_mod, "prompt_yes_no", return_value=False),
+        ):
+            claw_mod._cmd_cleanup(args)
+
+        captured = capsys.readouterr()
+        assert "Skipped" in captured.out
+        assert openclaw.is_dir()
+
+    def test_explicit_source(self, tmp_path, capsys):
+        custom_dir = tmp_path / "my-openclaw"
+        custom_dir.mkdir()
+        (custom_dir / "todo.json").write_text("{}")
+
+        args = Namespace(source=str(custom_dir), dry_run=False, yes=True)
+        claw_mod._cmd_cleanup(args)
+
+        captured = capsys.readouterr()
+        assert "Archived" in captured.out
+        assert not custom_dir.exists()
+
+    def test_shows_workspace_details(self, tmp_path, capsys):
+        openclaw = tmp_path / ".openclaw"
+        openclaw.mkdir()
+        ws = openclaw / "workspace"
+        ws.mkdir()
+        (ws / "todo.json").write_text("{}")
+        (ws / "SOUL.md").write_text("# Soul")
+
+        args = Namespace(source=None, dry_run=True, yes=False)
+        with patch.object(claw_mod, "_find_openclaw_dirs", return_value=[openclaw]):
+            claw_mod._cmd_cleanup(args)
+
+        captured = capsys.readouterr()
+        assert "workspace/" in captured.out
+        assert "todo.json" in captured.out
+
+    def test_handles_multiple_dirs(self, tmp_path, capsys):
+        openclaw = tmp_path / ".openclaw"
+        openclaw.mkdir()
+        clawdbot = tmp_path / ".clawdbot"
+        clawdbot.mkdir()
+
+        args = Namespace(source=None, dry_run=False, yes=True)
+        with patch.object(claw_mod, "_find_openclaw_dirs", return_value=[openclaw, clawdbot]):
+            claw_mod._cmd_cleanup(args)
+
+        captured = capsys.readouterr()
+        assert "Cleaned up 2" in captured.out
+        assert not openclaw.exists()
+        assert not clawdbot.exists()
+
+
 # ---------------------------------------------------------------------------
 # _print_migration_report
 # ---------------------------------------------------------------------------
--- a/tests/hermes_cli/test_commands.py
+++ b/tests/hermes_cli/test_commands.py
@ -12,10 +12,13 @@ from hermes_cli.commands import (
    SUBCOMMANDS,
    SlashCommandAutoSuggest,
    SlashCommandCompleter,
+    _TG_NAME_LIMIT,
+    _clamp_telegram_names,
    gateway_help_lines,
    resolve_command,
    slack_subcommand_map,
    telegram_bot_commands,
+    telegram_menu_commands,
 )


@ -504,3 +507,83 @@ class TestGhostText:

    def test_no_suggestion_for_non_slash(self):
        assert _suggestion("hello") is None
+
+
+# ---------------------------------------------------------------------------
+# Telegram command name clamping (32-char limit)
+# ---------------------------------------------------------------------------
+
+
+class TestClampTelegramNames:
+    """Tests for _clamp_telegram_names() — 32-char enforcement + collision."""
+
+    def test_short_names_unchanged(self):
+        entries = [("help", "Show help"), ("status", "Show status")]
+        result = _clamp_telegram_names(entries, set())
+        assert result == entries
+
+    def test_long_name_truncated(self):
+        long = "a" * 40
+        result = _clamp_telegram_names([(long, "desc")], set())
+        assert len(result) == 1
+        assert result[0][0] == "a" * _TG_NAME_LIMIT
+        assert result[0][1] == "desc"
+
+    def test_collision_with_reserved_gets_digit_suffix(self):
+        # The truncated form collides with a reserved name
+        prefix = "x" * _TG_NAME_LIMIT
+        long_name = "x" * 40
+        result = _clamp_telegram_names([(long_name, "d")], reserved={prefix})
+        assert len(result) == 1
+        name = result[0][0]
+        assert len(name) == _TG_NAME_LIMIT
+        assert name == "x" * (_TG_NAME_LIMIT - 1) + "0"
+
+    def test_collision_between_entries_gets_incrementing_digits(self):
+        # Two long names that truncate to the same 32-char prefix
+        base = "y" * 40
+        entries = [(base + "_alpha", "d1"), (base + "_beta", "d2")]
+        result = _clamp_telegram_names(entries, set())
+        assert len(result) == 2
+        assert result[0][0] == "y" * _TG_NAME_LIMIT
+        assert result[1][0] == "y" * (_TG_NAME_LIMIT - 1) + "0"
+
+    def test_collision_with_reserved_and_entries_skips_taken_digits(self):
+        prefix = "z" * _TG_NAME_LIMIT
+        digit0 = "z" * (_TG_NAME_LIMIT - 1) + "0"
+        # Reserve both the plain truncation and digit-0
+        reserved = {prefix, digit0}
+        long_name = "z" * 50
+        result = _clamp_telegram_names([(long_name, "d")], reserved)
+        assert len(result) == 1
+        assert result[0][0] == "z" * (_TG_NAME_LIMIT - 1) + "1"
+
+    def test_all_digits_exhausted_drops_entry(self):
+        prefix = "w" * _TG_NAME_LIMIT
+        # Reserve the plain truncation + all 10 digit slots
+        reserved = {prefix} | {"w" * (_TG_NAME_LIMIT - 1) + str(d) for d in range(10)}
+        long_name = "w" * 50
+        result = _clamp_telegram_names([(long_name, "d")], reserved)
+        assert result == []
+
+    def test_exact_32_chars_not_truncated(self):
+        name = "a" * _TG_NAME_LIMIT
+        result = _clamp_telegram_names([(name, "desc")], set())
+        assert result[0][0] == name
+
+    def test_duplicate_short_name_deduplicated(self):
+        entries = [("foo", "d1"), ("foo", "d2")]
+        result = _clamp_telegram_names(entries, set())
+        assert len(result) == 1
+        assert result[0] == ("foo", "d1")
+
+
+class TestTelegramMenuCommands:
+    """Integration: telegram_menu_commands enforces the 32-char limit."""
+
+    def test_all_names_within_limit(self):
+        menu, _ = telegram_menu_commands(max_commands=100)
+        for name, _desc in menu:
+            assert 1 <= len(name) <= _TG_NAME_LIMIT, (
+                f"Command '{name}' is {len(name)} chars (limit {_TG_NAME_LIMIT})"
+            )
--- a/tests/hermes_cli/test_gateway_service.py
+++ b/tests/hermes_cli/test_gateway_service.py
@ -271,7 +271,7 @@ class TestGatewaySystemServiceRouting:
        )

        run_calls = []
-        monkeypatch.setattr(gateway_cli, "run_gateway", lambda verbose=False, replace=False: run_calls.append((verbose, replace)))
+        monkeypatch.setattr(gateway_cli, "run_gateway", lambda verbose=0, quiet=False, replace=False: run_calls.append((verbose, quiet, replace)))
        monkeypatch.setattr(gateway_cli, "kill_gateway_processes", lambda force=False: 0)

        try:
@ -339,6 +339,102 @@ class TestDetectVenvDir:
        assert result is None


+class TestSystemUnitHermesHome:
+    """HERMES_HOME in system units must reference the target user, not root."""
+
+    def test_system_unit_uses_target_user_home_not_calling_user(self, monkeypatch):
+        # Simulate sudo: Path.home() returns /root, target user is alice
+        monkeypatch.setattr(Path, "home", staticmethod(lambda: Path("/root")))
+        monkeypatch.delenv("HERMES_HOME", raising=False)
+        monkeypatch.setattr(
+            gateway_cli, "_system_service_identity",
+            lambda run_as_user=None: ("alice", "alice", "/home/alice"),
+        )
+        monkeypatch.setattr(
+            gateway_cli, "_build_user_local_paths",
+            lambda home, existing: [],
+        )
+
+        unit = gateway_cli.generate_systemd_unit(system=True, run_as_user="alice")
+
+        assert 'HERMES_HOME=/home/alice/.hermes' in unit
+        assert '/root/.hermes' not in unit
+
+    def test_system_unit_remaps_profile_to_target_user(self, monkeypatch):
+        # Simulate sudo with a profile: HERMES_HOME was resolved under root
+        monkeypatch.setattr(Path, "home", staticmethod(lambda: Path("/root")))
+        monkeypatch.setenv("HERMES_HOME", "/root/.hermes/profiles/coder")
+        monkeypatch.setattr(
+            gateway_cli, "_system_service_identity",
+            lambda run_as_user=None: ("alice", "alice", "/home/alice"),
+        )
+        monkeypatch.setattr(
+            gateway_cli, "_build_user_local_paths",
+            lambda home, existing: [],
+        )
+
+        unit = gateway_cli.generate_systemd_unit(system=True, run_as_user="alice")
+
+        assert 'HERMES_HOME=/home/alice/.hermes/profiles/coder' in unit
+        assert '/root/' not in unit
+
+    def test_system_unit_preserves_custom_hermes_home(self, monkeypatch):
+        # Custom HERMES_HOME not under any user's home — keep as-is
+        monkeypatch.setattr(Path, "home", staticmethod(lambda: Path("/root")))
+        monkeypatch.setenv("HERMES_HOME", "/opt/hermes-shared")
+        monkeypatch.setattr(
+            gateway_cli, "_system_service_identity",
+            lambda run_as_user=None: ("alice", "alice", "/home/alice"),
+        )
+        monkeypatch.setattr(
+            gateway_cli, "_build_user_local_paths",
+            lambda home, existing: [],
+        )
+
+        unit = gateway_cli.generate_systemd_unit(system=True, run_as_user="alice")
+
+        assert 'HERMES_HOME=/opt/hermes-shared' in unit
+
+    def test_user_unit_unaffected_by_change(self):
+        # User-scope units should still use the calling user's HERMES_HOME
+        unit = gateway_cli.generate_systemd_unit(system=False)
+
+        hermes_home = str(gateway_cli.get_hermes_home().resolve())
+        assert f'HERMES_HOME={hermes_home}' in unit
+
+
+class TestHermesHomeForTargetUser:
+    """Unit tests for _hermes_home_for_target_user()."""
+
+    def test_remaps_default_home(self, monkeypatch):
+        monkeypatch.setattr(Path, "home", staticmethod(lambda: Path("/root")))
+        monkeypatch.delenv("HERMES_HOME", raising=False)
+
+        result = gateway_cli._hermes_home_for_target_user("/home/alice")
+        assert result == "/home/alice/.hermes"
+
+    def test_remaps_profile_path(self, monkeypatch):
+        monkeypatch.setattr(Path, "home", staticmethod(lambda: Path("/root")))
+        monkeypatch.setenv("HERMES_HOME", "/root/.hermes/profiles/coder")
+
+        result = gateway_cli._hermes_home_for_target_user("/home/alice")
+        assert result == "/home/alice/.hermes/profiles/coder"
+
+    def test_keeps_custom_path(self, monkeypatch):
+        monkeypatch.setattr(Path, "home", staticmethod(lambda: Path("/root")))
+        monkeypatch.setenv("HERMES_HOME", "/opt/hermes")
+
+        result = gateway_cli._hermes_home_for_target_user("/home/alice")
+        assert result == "/opt/hermes"
+
+    def test_noop_when_same_user(self, monkeypatch):
+        monkeypatch.setattr(Path, "home", staticmethod(lambda: Path("/home/alice")))
+        monkeypatch.delenv("HERMES_HOME", raising=False)
+
+        result = gateway_cli._hermes_home_for_target_user("/home/alice")
+        assert result == "/home/alice/.hermes"
+
+
 class TestGeneratedUnitUsesDetectedVenv:
    def test_systemd_unit_uses_dot_venv_when_detected(self, tmp_path, monkeypatch):
        dot_venv = tmp_path / ".venv"
--- a/tests/hermes_cli/test_managed_installs.py
+++ b/tests/hermes_cli/test_managed_installs.py
@ -0,0 +1,54 @@
+from types import SimpleNamespace
+from unittest.mock import patch
+
+from hermes_cli.config import (
+    format_managed_message,
+    get_managed_system,
+    recommended_update_command,
+)
+from hermes_cli.main import cmd_update
+from tools.skills_hub import OptionalSkillSource
+
+
+def test_get_managed_system_homebrew(monkeypatch):
+    monkeypatch.setenv("HERMES_MANAGED", "homebrew")
+
+    assert get_managed_system() == "Homebrew"
+    assert recommended_update_command() == "brew upgrade hermes-agent"
+
+
+def test_format_managed_message_homebrew(monkeypatch):
+    monkeypatch.setenv("HERMES_MANAGED", "homebrew")
+
+    message = format_managed_message("update Hermes Agent")
+
+    assert "managed by Homebrew" in message
+    assert "brew upgrade hermes-agent" in message
+
+
+def test_recommended_update_command_defaults_to_hermes_update(monkeypatch):
+    monkeypatch.delenv("HERMES_MANAGED", raising=False)
+
+    assert recommended_update_command() == "hermes update"
+
+
+def test_cmd_update_blocks_managed_homebrew(monkeypatch, capsys):
+    monkeypatch.setenv("HERMES_MANAGED", "homebrew")
+
+    with patch("hermes_cli.main.subprocess.run") as mock_run:
+        cmd_update(SimpleNamespace())
+
+    assert not mock_run.called
+    captured = capsys.readouterr()
+    assert "managed by Homebrew" in captured.err
+    assert "brew upgrade hermes-agent" in captured.err
+
+
+def test_optional_skill_source_honors_env_override(monkeypatch, tmp_path):
+    optional_dir = tmp_path / "optional-skills"
+    optional_dir.mkdir()
+    monkeypatch.setenv("HERMES_OPTIONAL_SKILLS", str(optional_dir))
+
+    source = OptionalSkillSource()
+
+    assert source._optional_dir == optional_dir
--- a/tests/hermes_cli/test_profile_export_credentials.py
+++ b/tests/hermes_cli/test_profile_export_credentials.py
@ -0,0 +1,52 @@
+"""Tests for credential exclusion during profile export.
+
+Profile exports should NEVER include auth.json or .env — these contain
+API keys, OAuth tokens, and credential pool data. Users share exported
+profiles; leaking credentials in the archive is a security issue.
+"""
+
+import tarfile
+from pathlib import Path
+
+from hermes_cli.profiles import export_profile, _DEFAULT_EXPORT_EXCLUDE_ROOT
+
+
+class TestCredentialExclusion:
+
+    def test_auth_json_in_default_exclude_set(self):
+        """auth.json must be in the default export exclusion set."""
+        assert "auth.json" in _DEFAULT_EXPORT_EXCLUDE_ROOT
+
+    def test_dotenv_in_default_exclude_set(self):
+        """.env must be in the default export exclusion set."""
+        assert ".env" in _DEFAULT_EXPORT_EXCLUDE_ROOT
+
+    def test_named_profile_export_excludes_auth(self, tmp_path, monkeypatch):
+        """Named profile export must not contain auth.json or .env."""
+        profiles_root = tmp_path / "profiles"
+        profile_dir = profiles_root / "testprofile"
+        profile_dir.mkdir(parents=True)
+
+        # Create a profile with credentials
+        (profile_dir / "config.yaml").write_text("model: gpt-4\n")
+        (profile_dir / "auth.json").write_text('{"tokens": {"access": "sk-secret"}}')
+        (profile_dir / ".env").write_text("OPENROUTER_API_KEY=sk-secret-key\n")
+        (profile_dir / "SOUL.md").write_text("I am helpful.\n")
+        (profile_dir / "memories").mkdir()
+        (profile_dir / "memories" / "MEMORY.md").write_text("# Memories\n")
+
+        monkeypatch.setattr("hermes_cli.profiles._get_profiles_root", lambda: profiles_root)
+        monkeypatch.setattr("hermes_cli.profiles.get_profile_dir", lambda n: profile_dir)
+        monkeypatch.setattr("hermes_cli.profiles.validate_profile_name", lambda n: None)
+
+        output = tmp_path / "export.tar.gz"
+        result = export_profile("testprofile", str(output))
+
+        # Check archive contents
+        with tarfile.open(result, "r:gz") as tf:
+            names = tf.getnames()
+
+        assert any("config.yaml" in n for n in names), "config.yaml should be in export"
+        assert any("SOUL.md" in n for n in names), "SOUL.md should be in export"
+        assert not any("auth.json" in n for n in names), "auth.json must NOT be in export"
+        assert not any(".env" in n for n in names), ".env must NOT be in export"
--- a/tests/hermes_cli/test_profiles.py
+++ b/tests/hermes_cli/test_profiles.py
@ -6,6 +6,7 @@ and shell completion generation.
 """

 import json
+import io
 import os
 import tarfile
 from pathlib import Path
@ -449,10 +450,187 @@ class TestExportImport:
        with pytest.raises(FileExistsError):
            import_profile(str(archive_path), name="coder")

+    def test_import_rejects_traversal_archive_member(self, profile_env, tmp_path):
+        archive_path = tmp_path / "export" / "evil.tar.gz"
+        archive_path.parent.mkdir(parents=True, exist_ok=True)
+        escape_path = tmp_path / "escape.txt"
+
+        with tarfile.open(archive_path, "w:gz") as tf:
+            info = tarfile.TarInfo("../../escape.txt")
+            data = b"pwned"
+            info.size = len(data)
+            tf.addfile(info, io.BytesIO(data))
+
+        with pytest.raises(ValueError, match="Unsafe archive member path"):
+            import_profile(str(archive_path), name="coder")
+
+        assert not escape_path.exists()
+        assert not get_profile_dir("coder").exists()
+
+    def test_import_rejects_absolute_archive_member(self, profile_env, tmp_path):
+        archive_path = tmp_path / "export" / "evil-abs.tar.gz"
+        archive_path.parent.mkdir(parents=True, exist_ok=True)
+        absolute_target = tmp_path / "abs-escape.txt"
+
+        with tarfile.open(archive_path, "w:gz") as tf:
+            info = tarfile.TarInfo(str(absolute_target))
+            data = b"pwned"
+            info.size = len(data)
+            tf.addfile(info, io.BytesIO(data))
+
+        with pytest.raises(ValueError, match="Unsafe archive member path"):
+            import_profile(str(archive_path), name="coder")
+
+        assert not absolute_target.exists()
+        assert not get_profile_dir("coder").exists()
+
    def test_export_nonexistent_raises(self, profile_env, tmp_path):
        with pytest.raises(FileNotFoundError):
            export_profile("nonexistent", str(tmp_path / "out.tar.gz"))

+    # ---------------------------------------------------------------
+    # Default profile export / import
+    # ---------------------------------------------------------------
+
+    def test_export_default_creates_valid_archive(self, profile_env, tmp_path):
+        """Exporting the default profile produces a valid tar.gz."""
+        default_dir = get_profile_dir("default")
+        (default_dir / "config.yaml").write_text("model: test")
+
+        output = tmp_path / "export" / "default.tar.gz"
+        output.parent.mkdir(parents=True, exist_ok=True)
+        result = export_profile("default", str(output))
+
+        assert Path(result).exists()
+        assert tarfile.is_tarfile(str(result))
+
+    def test_export_default_includes_profile_data(self, profile_env, tmp_path):
+        """Profile data files end up in the archive (credentials excluded)."""
+        default_dir = get_profile_dir("default")
+        (default_dir / "config.yaml").write_text("model: test")
+        (default_dir / ".env").write_text("KEY=val")
+        (default_dir / "SOUL.md").write_text("Be nice.")
+        mem_dir = default_dir / "memories"
+        mem_dir.mkdir(exist_ok=True)
+        (mem_dir / "MEMORY.md").write_text("remember this")
+
+        output = tmp_path / "export" / "default.tar.gz"
+        output.parent.mkdir(parents=True, exist_ok=True)
+        export_profile("default", str(output))
+
+        with tarfile.open(str(output), "r:gz") as tf:
+            names = tf.getnames()
+
+        assert "default/config.yaml" in names
+        assert "default/.env" not in names  # credentials excluded
+        assert "default/SOUL.md" in names
+        assert "default/memories/MEMORY.md" in names
+
+    def test_export_default_excludes_infrastructure(self, profile_env, tmp_path):
+        """Repo checkout, worktrees, profiles, databases are excluded."""
+        default_dir = get_profile_dir("default")
+        (default_dir / "config.yaml").write_text("ok")
+
+        # Create dirs/files that should be excluded
+        for d in ("hermes-agent", ".worktrees", "profiles", "bin",
+                  "image_cache", "logs", "sandboxes", "checkpoints"):
+            sub = default_dir / d
+            sub.mkdir(exist_ok=True)
+            (sub / "marker.txt").write_text("excluded")
+
+        for f in ("state.db", "gateway.pid", "gateway_state.json",
+                  "processes.json", "errors.log", ".hermes_history",
+                  "active_profile", ".update_check", "auth.lock"):
+            (default_dir / f).write_text("excluded")
+
+        output = tmp_path / "export" / "default.tar.gz"
+        output.parent.mkdir(parents=True, exist_ok=True)
+        export_profile("default", str(output))
+
+        with tarfile.open(str(output), "r:gz") as tf:
+            names = tf.getnames()
+
+        # Config is present
+        assert "default/config.yaml" in names
+
+        # Infrastructure excluded
+        excluded_prefixes = [
+            "default/hermes-agent", "default/.worktrees", "default/profiles",
+            "default/bin", "default/image_cache", "default/logs",
+            "default/sandboxes", "default/checkpoints",
+        ]
+        for prefix in excluded_prefixes:
+            assert not any(n.startswith(prefix) for n in names), \
+                f"Expected {prefix} to be excluded but found it in archive"
+
+        excluded_files = [
+            "default/state.db", "default/gateway.pid",
+            "default/gateway_state.json", "default/processes.json",
+            "default/errors.log", "default/.hermes_history",
+            "default/active_profile", "default/.update_check",
+            "default/auth.lock",
+        ]
+        for f in excluded_files:
+            assert f not in names, f"Expected {f} to be excluded"
+
+    def test_export_default_excludes_pycache_at_any_depth(self, profile_env, tmp_path):
+        """__pycache__ dirs are excluded even inside nested directories."""
+        default_dir = get_profile_dir("default")
+        (default_dir / "config.yaml").write_text("ok")
+        nested = default_dir / "skills" / "my-skill" / "__pycache__"
+        nested.mkdir(parents=True)
+        (nested / "cached.pyc").write_text("bytecode")
+
+        output = tmp_path / "export" / "default.tar.gz"
+        output.parent.mkdir(parents=True, exist_ok=True)
+        export_profile("default", str(output))
+
+        with tarfile.open(str(output), "r:gz") as tf:
+            names = tf.getnames()
+
+        assert not any("__pycache__" in n for n in names)
+
+    def test_import_default_without_name_raises(self, profile_env, tmp_path):
+        """Importing a default export without --name gives clear guidance."""
+        default_dir = get_profile_dir("default")
+        (default_dir / "config.yaml").write_text("ok")
+
+        archive = tmp_path / "export" / "default.tar.gz"
+        archive.parent.mkdir(parents=True, exist_ok=True)
+        export_profile("default", str(archive))
+
+        with pytest.raises(ValueError, match="Cannot import as 'default'"):
+            import_profile(str(archive))
+
+    def test_import_default_with_explicit_default_name_raises(self, profile_env, tmp_path):
+        """Explicitly importing as 'default' is also rejected."""
+        default_dir = get_profile_dir("default")
+        (default_dir / "config.yaml").write_text("ok")
+
+        archive = tmp_path / "export" / "default.tar.gz"
+        archive.parent.mkdir(parents=True, exist_ok=True)
+        export_profile("default", str(archive))
+
+        with pytest.raises(ValueError, match="Cannot import as 'default'"):
+            import_profile(str(archive), name="default")
+
+    def test_import_default_export_with_new_name_roundtrip(self, profile_env, tmp_path):
+        """Export default → import under a different name → data preserved."""
+        default_dir = get_profile_dir("default")
+        (default_dir / "config.yaml").write_text("model: opus")
+        mem_dir = default_dir / "memories"
+        mem_dir.mkdir(exist_ok=True)
+        (mem_dir / "MEMORY.md").write_text("important fact")
+
+        archive = tmp_path / "export" / "default.tar.gz"
+        archive.parent.mkdir(parents=True, exist_ok=True)
+        export_profile("default", str(archive))
+
+        imported = import_profile(str(archive), name="backup")
+        assert imported.is_dir()
+        assert (imported / "config.yaml").read_text() == "model: opus"
+        assert (imported / "memories" / "MEMORY.md").read_text() == "important fact"
+

 # ===================================================================
 # TestProfileIsolation
--- a/tests/hermes_cli/test_set_config_value.py
+++ b/tests/hermes_cli/test_set_config_value.py
@ -1,12 +1,13 @@
 """Tests for set_config_value — verifying secrets route to .env and config to config.yaml."""

+import argparse
 import os
 from pathlib import Path
 from unittest.mock import patch, call

 import pytest

-from hermes_cli.config import set_config_value
+from hermes_cli.config import set_config_value, config_command


@pytest.fixture(autouse=True)
@ -125,3 +126,42 @@ class TestConfigYamlRouting:
            "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE=true" in env_content
            or "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE=True" in env_content
        )
+
+
+# ---------------------------------------------------------------------------
+# Empty / falsy values — regression tests for #4277
+# ---------------------------------------------------------------------------
+
+class TestFalsyValues:
+    """config set should accept empty strings and falsy values like '0'."""
+
+    def test_empty_string_routes_to_env(self, _isolated_hermes_home):
+        """Blanking an API key should write an empty value to .env."""
+        set_config_value("OPENROUTER_API_KEY", "")
+        env_content = _read_env(_isolated_hermes_home)
+        assert "OPENROUTER_API_KEY=" in env_content
+
+    def test_empty_string_routes_to_config(self, _isolated_hermes_home):
+        """Blanking a config key should write an empty string to config.yaml."""
+        set_config_value("model", "")
+        config = _read_config(_isolated_hermes_home)
+        assert "model: ''" in config or "model: \"\"" in config
+
+    def test_zero_routes_to_config(self, _isolated_hermes_home):
+        """Setting a config key to '0' should write 0 to config.yaml."""
+        set_config_value("verbose", "0")
+        config = _read_config(_isolated_hermes_home)
+        assert "verbose: 0" in config
+
+    def test_config_command_rejects_missing_value(self):
+        """config set with no value arg (None) should still exit."""
+        args = argparse.Namespace(config_command="set", key="model", value=None)
+        with pytest.raises(SystemExit):
+            config_command(args)
+
+    def test_config_command_accepts_empty_string(self, _isolated_hermes_home):
+        """config set KEY '' should not exit — it should set the value."""
+        args = argparse.Namespace(config_command="set", key="model", value="")
+        config_command(args)
+        config = _read_config(_isolated_hermes_home)
+        assert "model" in config
--- a/tests/hermes_cli/test_setup.py
+++ b/tests/hermes_cli/test_setup.py
@ -1,8 +1,10 @@
+"""Tests for setup_model_provider — verifies the delegation to
+select_provider_and_model() and config dict sync."""
 import json
 import sys
 import types

-from hermes_cli.auth import _update_config_for_provider, get_active_provider
+from hermes_cli.auth import get_active_provider
 from hermes_cli.config import load_config, save_config
 from hermes_cli.setup import setup_model_provider

@ -25,249 +27,201 @@ def _clear_provider_env(monkeypatch):
        monkeypatch.delenv(key, raising=False)


+def _stub_tts(monkeypatch):
+    """Stub out TTS prompts so setup_model_provider doesn't block."""
+    monkeypatch.setattr("hermes_cli.setup.prompt_choice", lambda q, c, d=0: (
+        _maybe_keep_current_tts(q, c) if _maybe_keep_current_tts(q, c) is not None
+        else d
+    ))
+    monkeypatch.setattr("hermes_cli.setup.prompt_yes_no", lambda *a, **kw: False)

-def test_nous_oauth_setup_keeps_current_model_when_syncing_disk_provider(
-    tmp_path, monkeypatch
-):
+
+def _write_model_config(tmp_path, provider, base_url="", model_name="test-model"):
+    """Simulate what a _model_flow_* function writes to disk."""
+    cfg = load_config()
+    m = cfg.get("model")
+    if not isinstance(m, dict):
+        m = {"default": m} if m else {}
+        cfg["model"] = m
+    m["provider"] = provider
+    if base_url:
+        m["base_url"] = base_url
+    if model_name:
+        m["default"] = model_name
+    save_config(cfg)
+
+
+def test_setup_delegates_to_select_provider_and_model(tmp_path, monkeypatch):
+    """setup_model_provider calls select_provider_and_model and syncs config."""
    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
    _clear_provider_env(monkeypatch)
+    _stub_tts(monkeypatch)

    config = load_config()

-    def fake_prompt_choice(question, choices, default=0):
-        if question == "Select your inference provider:":
-            return 1  # Nous Portal
-        if question == "Configure vision:":
-            return len(choices) - 1
-        if question == "Select default model:":
-            assert choices[-1] == "Keep current (anthropic/claude-opus-4.6)"
-            return len(choices) - 1
-        tts_idx = _maybe_keep_current_tts(question, choices)
-        if tts_idx is not None:
-            return tts_idx
-        raise AssertionError(f"Unexpected prompt_choice call: {question}")
+    def fake_select():
+        _write_model_config(tmp_path, "custom", "http://localhost:11434/v1", "qwen3.5:32b")

-    monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice)
-    monkeypatch.setattr("hermes_cli.setup.prompt", lambda *args, **kwargs: "")
-    monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: [])
-
-    def _fake_login_nous(*args, **kwargs):
-        auth_path = tmp_path / "auth.json"
-        auth_path.write_text(json.dumps({"active_provider": "nous", "providers": {}}))
-        _update_config_for_provider("nous", "https://inference.example.com/v1")
-
-    monkeypatch.setattr("hermes_cli.auth._login_nous", _fake_login_nous)
-    monkeypatch.setattr(
-        "hermes_cli.auth.resolve_nous_runtime_credentials",
-        lambda *args, **kwargs: {
-            "base_url": "https://inference.example.com/v1",
-            "api_key": "nous-key",
-        },
-    )
-    monkeypatch.setattr(
-        "hermes_cli.auth.fetch_nous_models",
-        lambda *args, **kwargs: ["gemini-3-flash"],
-    )
+    monkeypatch.setattr("hermes_cli.main.select_provider_and_model", fake_select)

    setup_model_provider(config)
    save_config(config)

    reloaded = load_config()
+    assert isinstance(reloaded["model"], dict)
+    assert reloaded["model"]["provider"] == "custom"
+    assert reloaded["model"]["base_url"] == "http://localhost:11434/v1"
+    assert reloaded["model"]["default"] == "qwen3.5:32b"

+
+def test_setup_syncs_openrouter_from_disk(tmp_path, monkeypatch):
+    """When select_provider_and_model saves OpenRouter config to disk,
+    the wizard's config dict picks it up."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    _clear_provider_env(monkeypatch)
+    _stub_tts(monkeypatch)
+
+    config = load_config()
+    assert isinstance(config.get("model"), str)  # fresh install
+
+    def fake_select():
+        _write_model_config(tmp_path, "openrouter", model_name="anthropic/claude-opus-4.6")
+
+    monkeypatch.setattr("hermes_cli.main.select_provider_and_model", fake_select)
+
+    setup_model_provider(config)
+    save_config(config)
+
+    reloaded = load_config()
+    assert isinstance(reloaded["model"], dict)
+    assert reloaded["model"]["provider"] == "openrouter"
+
+
+def test_setup_syncs_nous_from_disk(tmp_path, monkeypatch):
+    """Nous OAuth writes config to disk; wizard config dict must pick it up."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    _clear_provider_env(monkeypatch)
+    _stub_tts(monkeypatch)
+
+    config = load_config()
+
+    def fake_select():
+        _write_model_config(tmp_path, "nous", "https://inference.example.com/v1", "gemini-3-flash")
+
+    monkeypatch.setattr("hermes_cli.main.select_provider_and_model", fake_select)
+
+    setup_model_provider(config)
+    save_config(config)
+
+    reloaded = load_config()
    assert isinstance(reloaded["model"], dict)
    assert reloaded["model"]["provider"] == "nous"
    assert reloaded["model"]["base_url"] == "https://inference.example.com/v1"
-    assert reloaded["model"]["default"] == "anthropic/claude-opus-4.6"


-def test_custom_setup_clears_active_oauth_provider(tmp_path, monkeypatch):
+def test_setup_custom_providers_synced(tmp_path, monkeypatch):
+    """custom_providers written by select_provider_and_model must survive."""
    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
    _clear_provider_env(monkeypatch)
-
-    auth_path = tmp_path / "auth.json"
-    auth_path.write_text(json.dumps({"active_provider": "nous", "providers": {}}))
+    _stub_tts(monkeypatch)

    config = load_config()

-    def fake_prompt_choice(question, choices, default=0):
-        if question == "Select your inference provider:":
-            return 3
-        tts_idx = _maybe_keep_current_tts(question, choices)
-        if tts_idx is not None:
-            return tts_idx
-        raise AssertionError(f"Unexpected prompt_choice call: {question}")
+    def fake_select():
+        _write_model_config(tmp_path, "custom", "http://localhost:8080/v1", "llama3")
+        cfg = load_config()
+        cfg["custom_providers"] = [{"name": "Local", "base_url": "http://localhost:8080/v1"}]
+        save_config(cfg)

-    monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice)
-
-    # _model_flow_custom uses builtins.input (URL, key, model, context_length)
-    input_values = iter([
-        "https://custom.example/v1",
-        "custom-api-key",
-        "custom/model",
-        "",  # context_length (blank = auto-detect)
-    ])
-    monkeypatch.setattr("builtins.input", lambda _prompt="": next(input_values))
-    monkeypatch.setattr("hermes_cli.setup.prompt_yes_no", lambda *args, **kwargs: False)
-    monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: [])
-    monkeypatch.setattr("hermes_cli.main._save_custom_provider", lambda *args, **kwargs: None)
-    monkeypatch.setattr(
-        "hermes_cli.models.probe_api_models",
-        lambda api_key, base_url: {"models": ["m"], "probed_url": base_url + "/models"},
-    )
-
-    setup_model_provider(config)
-
-    # Core assertion: switching to custom endpoint clears OAuth provider
-    assert get_active_provider() is None
-
-    # _model_flow_custom writes config via its own load/save cycle
-    reloaded = load_config()
-    if isinstance(reloaded.get("model"), dict):
-        assert reloaded["model"].get("provider") == "custom"
-        assert reloaded["model"].get("default") == "custom/model"
-
-
-def test_codex_setup_uses_runtime_access_token_for_live_model_list(tmp_path, monkeypatch):
-    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
-    monkeypatch.setenv("OPENROUTER_API_KEY", "or-test-key")
-    _clear_provider_env(monkeypatch)
-    monkeypatch.setenv("OPENROUTER_API_KEY", "or-test-key")
-
-    config = load_config()
-
-    def fake_prompt_choice(question, choices, default=0):
-        if question == "Select your inference provider:":
-            return 2  # OpenAI Codex
-        if question == "Configure vision:":
-            return len(choices) - 1
-        if question == "Select default model:":
-            return 0
-        tts_idx = _maybe_keep_current_tts(question, choices)
-        if tts_idx is not None:
-            return tts_idx
-        raise AssertionError(f"Unexpected prompt_choice call: {question}")
-
-    monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice)
-    monkeypatch.setattr("hermes_cli.setup.prompt", lambda *args, **kwargs: "")
-    monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: [])
-    monkeypatch.setattr("hermes_cli.auth._login_openai_codex", lambda *args, **kwargs: None)
-    monkeypatch.setattr(
-        "hermes_cli.auth.resolve_codex_runtime_credentials",
-        lambda *args, **kwargs: {
-            "base_url": "https://chatgpt.com/backend-api/codex",
-            "api_key": "codex-access-token",
-        },
-    )
-
-    captured = {}
-
-    def _fake_get_codex_model_ids(access_token=None):
-        captured["access_token"] = access_token
-        return ["gpt-5.2-codex", "gpt-5.2"]
-
-    monkeypatch.setattr(
-        "hermes_cli.codex_models.get_codex_model_ids",
-        _fake_get_codex_model_ids,
-    )
+    monkeypatch.setattr("hermes_cli.main.select_provider_and_model", fake_select)

    setup_model_provider(config)
    save_config(config)

    reloaded = load_config()
+    assert reloaded.get("custom_providers") == [{"name": "Local", "base_url": "http://localhost:8080/v1"}]

-    assert captured["access_token"] == "codex-access-token"
+
+def test_setup_cancel_preserves_existing_config(tmp_path, monkeypatch):
+    """When the user cancels provider selection, existing config is preserved."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    _clear_provider_env(monkeypatch)
+    _stub_tts(monkeypatch)
+
+    # Pre-set a provider
+    _write_model_config(tmp_path, "openrouter", model_name="gpt-4o")
+
+    config = load_config()
+    assert config["model"]["provider"] == "openrouter"
+
+    def fake_select():
+        pass  # user cancelled — nothing written to disk
+
+    monkeypatch.setattr("hermes_cli.main.select_provider_and_model", fake_select)
+
+    setup_model_provider(config)
+    save_config(config)
+
+    reloaded = load_config()
+    assert isinstance(reloaded["model"], dict)
+    assert reloaded["model"]["provider"] == "openrouter"
+    assert reloaded["model"]["default"] == "gpt-4o"
+
+
+def test_setup_exception_in_select_gracefully_handled(tmp_path, monkeypatch):
+    """If select_provider_and_model raises, setup continues with existing config."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    _clear_provider_env(monkeypatch)
+    _stub_tts(monkeypatch)
+
+    config = load_config()
+
+    def fake_select():
+        raise RuntimeError("something broke")
+
+    monkeypatch.setattr("hermes_cli.main.select_provider_and_model", fake_select)
+
+    # Should not raise
+    setup_model_provider(config)
+
+
+def test_setup_keyboard_interrupt_gracefully_handled(tmp_path, monkeypatch):
+    """KeyboardInterrupt during provider selection is handled."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    _clear_provider_env(monkeypatch)
+    _stub_tts(monkeypatch)
+
+    config = load_config()
+
+    def fake_select():
+        raise KeyboardInterrupt()
+
+    monkeypatch.setattr("hermes_cli.main.select_provider_and_model", fake_select)
+
+    setup_model_provider(config)
+
+
+def test_codex_setup_uses_runtime_access_token_for_live_model_list(tmp_path, monkeypatch):
+    """Codex model list fetching uses the runtime access token."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    monkeypatch.setenv("OPENROUTER_API_KEY", "or-test-key")
+    _clear_provider_env(monkeypatch)
+    monkeypatch.setenv("OPENROUTER_API_KEY", "or-test-key")
+
+    config = load_config()
+    _stub_tts(monkeypatch)
+
+    def fake_select():
+        _write_model_config(tmp_path, "openai-codex", "https://api.openai.com/v1", "gpt-4o")
+
+    monkeypatch.setattr("hermes_cli.main.select_provider_and_model", fake_select)
+
+    setup_model_provider(config)
+    save_config(config)
+
+    reloaded = load_config()
    assert isinstance(reloaded["model"], dict)
    assert reloaded["model"]["provider"] == "openai-codex"
-    assert reloaded["model"]["default"] == "gpt-5.2-codex"
-    assert reloaded["model"]["base_url"] == "https://chatgpt.com/backend-api/codex"
-
-
-def test_nous_setup_sets_managed_openai_tts_when_unconfigured(tmp_path, monkeypatch, capsys):
-    monkeypatch.setenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", "1")
-    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
-    _clear_provider_env(monkeypatch)
-
-    config = load_config()
-
-    def fake_prompt_choice(question, choices, default=0):
-        if question == "Select your inference provider:":
-            return 1
-        if question == "Configure vision:":
-            return len(choices) - 1
-        if question == "Select default model:":
-            return len(choices) - 1
-        raise AssertionError(f"Unexpected prompt_choice call: {question}")
-
-    monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice)
-    monkeypatch.setattr("hermes_cli.setup.prompt", lambda *args, **kwargs: "")
-    monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: [])
-
-    def _fake_login_nous(*args, **kwargs):
-        auth_path = tmp_path / "auth.json"
-        auth_path.write_text(json.dumps({"active_provider": "nous", "providers": {"nous": {"access_token": "nous-token"}}}))
-        _update_config_for_provider("nous", "https://inference.example.com/v1")
-
-    monkeypatch.setattr("hermes_cli.auth._login_nous", _fake_login_nous)
-    monkeypatch.setattr(
-        "hermes_cli.auth.resolve_nous_runtime_credentials",
-        lambda *args, **kwargs: {
-            "base_url": "https://inference.example.com/v1",
-            "api_key": "nous-key",
-        },
-    )
-    monkeypatch.setattr(
-        "hermes_cli.auth.fetch_nous_models",
-        lambda *args, **kwargs: ["gemini-3-flash"],
-    )
-
-    setup_model_provider(config)
-
-    out = capsys.readouterr().out
-    assert config["tts"]["provider"] == "openai"
-    assert "Nous subscription enables managed web tools" in out
-    assert "OpenAI TTS via your Nous subscription" in out
-
-
-def test_nous_setup_preserves_existing_tts_provider(tmp_path, monkeypatch):
-    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
-    _clear_provider_env(monkeypatch)
-
-    config = load_config()
-    config["tts"] = {"provider": "elevenlabs"}
-
-    def fake_prompt_choice(question, choices, default=0):
-        if question == "Select your inference provider:":
-            return 1
-        if question == "Configure vision:":
-            return len(choices) - 1
-        if question == "Select default model:":
-            return len(choices) - 1
-        raise AssertionError(f"Unexpected prompt_choice call: {question}")
-
-    monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice)
-    monkeypatch.setattr("hermes_cli.setup.prompt", lambda *args, **kwargs: "")
-    monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: [])
-    monkeypatch.setattr(
-        "hermes_cli.auth._login_nous",
-        lambda *args, **kwargs: (tmp_path / "auth.json").write_text(
-            json.dumps({"active_provider": "nous", "providers": {"nous": {"access_token": "nous-token"}}})
-        ),
-    )
-    monkeypatch.setattr(
-        "hermes_cli.auth.resolve_nous_runtime_credentials",
-        lambda *args, **kwargs: {
-            "base_url": "https://inference.example.com/v1",
-            "api_key": "nous-key",
-        },
-    )
-    monkeypatch.setattr(
-        "hermes_cli.auth.fetch_nous_models",
-        lambda *args, **kwargs: ["gemini-3-flash"],
-    )
-
-    setup_model_provider(config)
-
-    assert config["tts"]["provider"] == "elevenlabs"


 def test_modal_setup_can_use_nous_subscription_without_modal_creds(tmp_path, monkeypatch, capsys):
--- a/tests/hermes_cli/test_setup_model_provider.py
+++ b/tests/hermes_cli/test_setup_model_provider.py
@ -1,4 +1,9 @@
-"""Regression tests for interactive setup provider/model persistence."""
+"""Regression tests for interactive setup provider/model persistence.
+
+Since setup_model_provider delegates to select_provider_and_model()
+from hermes_cli.main, these tests mock the delegation point and verify
+that the setup wizard correctly syncs config from disk after the call.
+"""

 from __future__ import annotations

@ -14,19 +19,6 @@ def _maybe_keep_current_tts(question, choices):
    return len(choices) - 1


-def _read_env(home):
-    env_path = home / ".env"
-    data = {}
-    if not env_path.exists():
-        return data
-    for line in env_path.read_text().splitlines():
-        if not line or line.startswith("#") or "=" not in line:
-            continue
-        k, v = line.split("=", 1)
-        data[k] = v
-    return data
-
-
 def _clear_provider_env(monkeypatch):
    for key in (
        "HERMES_INFERENCE_PROVIDER",
@ -45,419 +37,375 @@ def _clear_provider_env(monkeypatch):
        monkeypatch.delenv(key, raising=False)


+def _stub_tts(monkeypatch):
+    monkeypatch.setattr("hermes_cli.setup.prompt_choice", lambda q, c, d=0: (
+        _maybe_keep_current_tts(q, c) if _maybe_keep_current_tts(q, c) is not None
+        else d
+    ))
+    monkeypatch.setattr("hermes_cli.setup.prompt_yes_no", lambda *a, **kw: False)
+
+
+def _write_model_config(provider, base_url="", model_name="test-model"):
+    """Simulate what a _model_flow_* function writes to disk."""
+    cfg = load_config()
+    m = cfg.get("model")
+    if not isinstance(m, dict):
+        m = {"default": m} if m else {}
+        cfg["model"] = m
+    m["provider"] = provider
+    if base_url:
+        m["base_url"] = base_url
+    else:
+        m.pop("base_url", None)
+    if model_name:
+        m["default"] = model_name
+    m.pop("api_mode", None)
+    save_config(cfg)
+
+
 def test_setup_keep_current_custom_from_config_does_not_fall_through(tmp_path, monkeypatch):
    """Keep-current custom should not fall through to the generic model menu."""
    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
    _clear_provider_env(monkeypatch)
-    save_env_value("OPENAI_BASE_URL", "https://example.invalid/v1")
-    save_env_value("OPENAI_API_KEY", "custom-key")
+    _stub_tts(monkeypatch)
+
+    # Pre-set custom provider
+    _write_model_config("custom", "http://localhost:8080/v1", "local-model")

    config = load_config()
-    config["model"] = {
-        "default": "custom/model",
-        "provider": "custom",
-        "base_url": "https://example.invalid/v1",
-    }
-    save_config(config)
+    assert config["model"]["provider"] == "custom"

-    def fake_prompt_choice(question, choices, default=0):
-        if question == "Select your inference provider:":
-            assert choices[-1] == "Keep current (Custom: https://example.invalid/v1)"
-            return len(choices) - 1
-        tts_idx = _maybe_keep_current_tts(question, choices)
-        if tts_idx is not None:
-            return tts_idx
-        raise AssertionError("Model menu should not appear for keep-current custom")
+    def fake_select():
+        pass  # user chose "cancel" or "keep current"

-    monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice)
-    monkeypatch.setattr("hermes_cli.setup.prompt", lambda *args, **kwargs: "")
-    monkeypatch.setattr("hermes_cli.setup.prompt_yes_no", lambda *args, **kwargs: False)
-    monkeypatch.setattr("hermes_cli.auth.get_active_provider", lambda: None)
-    monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: [])
+    monkeypatch.setattr("hermes_cli.main.select_provider_and_model", fake_select)

    setup_model_provider(config)
    save_config(config)

    reloaded = load_config()
+    assert isinstance(reloaded["model"], dict)
    assert reloaded["model"]["provider"] == "custom"
-    assert reloaded["model"]["default"] == "custom/model"
-    assert reloaded["model"]["base_url"] == "https://example.invalid/v1"
+    assert reloaded["model"]["base_url"] == "http://localhost:8080/v1"


-def test_setup_custom_endpoint_saves_working_v1_base_url(tmp_path, monkeypatch):
+def test_setup_keep_current_config_provider_uses_provider_specific_model_menu(
+    tmp_path, monkeypatch
+):
+    """Keeping current provider preserves the config on disk."""
    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
    _clear_provider_env(monkeypatch)
+    _stub_tts(monkeypatch)
+
+    _write_model_config("zai", "https://open.bigmodel.cn/api/paas/v4", "glm-5")

    config = load_config()

-    def fake_prompt_choice(question, choices, default=0):
-        if question == "Select your inference provider:":
-            return 3  # Custom endpoint
-        if question == "Configure vision:":
-            return len(choices) - 1  # Skip
-        tts_idx = _maybe_keep_current_tts(question, choices)
-        if tts_idx is not None:
-            return tts_idx
-        raise AssertionError(f"Unexpected prompt_choice call: {question}")
+    def fake_select():
+        pass  # keep current

-    # _model_flow_custom uses builtins.input (URL, key, model, context_length)
-    input_values = iter([
-        "http://localhost:8000",
-        "local-key",
-        "llm",
-        "",  # context_length (blank = auto-detect)
-    ])
-    monkeypatch.setattr("builtins.input", lambda _prompt="": next(input_values))
-
-    monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice)
-    monkeypatch.setattr("hermes_cli.setup.prompt_yes_no", lambda *args, **kwargs: False)
-    monkeypatch.setattr("hermes_cli.auth.get_active_provider", lambda: None)
-    monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: [])
-    monkeypatch.setattr("agent.auxiliary_client.get_available_vision_backends", lambda: [])
-    monkeypatch.setattr("hermes_cli.main._save_custom_provider", lambda *args, **kwargs: None)
-    monkeypatch.setattr(
-        "hermes_cli.models.probe_api_models",
-        lambda api_key, base_url: {
-            "models": ["llm"],
-            "probed_url": "http://localhost:8000/v1/models",
-            "resolved_base_url": "http://localhost:8000/v1",
-            "suggested_base_url": "http://localhost:8000/v1",
-            "used_fallback": True,
-        },
-    )
-
-    setup_model_provider(config)
-
-    env = _read_env(tmp_path)
-
-    # _model_flow_custom saves env vars and config to disk
-    assert env.get("OPENAI_BASE_URL") == "http://localhost:8000/v1"
-    assert env.get("OPENAI_API_KEY") == "local-key"
-
-    # The model config is saved as a dict by _model_flow_custom
-    reloaded = load_config()
-    model_cfg = reloaded.get("model", {})
-    if isinstance(model_cfg, dict):
-        assert model_cfg.get("provider") == "custom"
-        assert model_cfg.get("default") == "llm"
-
-
-def test_setup_keep_current_config_provider_uses_provider_specific_model_menu(tmp_path, monkeypatch):
-    """Keep-current should respect config-backed providers, not fall back to OpenRouter."""
-    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
-    _clear_provider_env(monkeypatch)
-
-    config = load_config()
-    config["model"] = {
-        "default": "claude-opus-4-6",
-        "provider": "anthropic",
-    }
-    save_config(config)
-
-    captured = {"provider_choices": None, "model_choices": None}
-
-    def fake_prompt_choice(question, choices, default=0):
-        if question == "Select your inference provider:":
-            captured["provider_choices"] = list(choices)
-            assert choices[-1] == "Keep current (Anthropic)"
-            return len(choices) - 1
-        if question == "Configure vision:":
-            assert question == "Configure vision:"
-            assert choices[-1] == "Skip for now"
-            return len(choices) - 1
-        if question == "Select default model:":
-            captured["model_choices"] = list(choices)
-            return len(choices) - 1  # keep current model
-        tts_idx = _maybe_keep_current_tts(question, choices)
-        if tts_idx is not None:
-            return tts_idx
-        raise AssertionError(f"Unexpected prompt_choice call: {question}")
-
-    monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice)
-    monkeypatch.setattr("hermes_cli.setup.prompt", lambda *args, **kwargs: "")
-    monkeypatch.setattr("hermes_cli.setup.prompt_yes_no", lambda *args, **kwargs: False)
-    monkeypatch.setattr("hermes_cli.auth.get_active_provider", lambda: None)
-    monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: [])
-    monkeypatch.setattr("hermes_cli.models.provider_model_ids", lambda provider: [])
-    monkeypatch.setattr("agent.auxiliary_client.get_available_vision_backends", lambda: [])
-
-    setup_model_provider(config)
-    save_config(config)
-
-    assert captured["provider_choices"] is not None
-    assert captured["model_choices"] is not None
-    assert captured["model_choices"][0] == "claude-opus-4-6"
-    assert "anthropic/claude-opus-4.6 (recommended)" not in captured["model_choices"]
-
-
-def test_setup_keep_current_anthropic_can_configure_openai_vision_default(tmp_path, monkeypatch):
-    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
-    _clear_provider_env(monkeypatch)
-
-    config = load_config()
-    config["model"] = {
-        "default": "claude-opus-4-6",
-        "provider": "anthropic",
-    }
-    save_config(config)
-
-    def fake_prompt_choice(question, choices, default=0):
-        if question == "Select your inference provider:":
-            assert choices[-1] == "Keep current (Anthropic)"
-            return len(choices) - 1
-        if question == "Configure vision:":
-            return 1
-        if question == "Select vision model:":
-            assert choices[-1] == "Use default (gpt-4o-mini)"
-            return len(choices) - 1
-        if question == "Select default model:":
-            assert choices[-1] == "Keep current (claude-opus-4-6)"
-            return len(choices) - 1
-        tts_idx = _maybe_keep_current_tts(question, choices)
-        if tts_idx is not None:
-            return tts_idx
-        raise AssertionError(f"Unexpected prompt_choice call: {question}")
-
-    monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice)
-    monkeypatch.setattr(
-        "hermes_cli.setup.prompt",
-        lambda message, *args, **kwargs: "sk-openai" if "OpenAI API key" in message else "",
-    )
-    monkeypatch.setattr("hermes_cli.setup.prompt_yes_no", lambda *args, **kwargs: False)
-    monkeypatch.setattr("hermes_cli.auth.get_active_provider", lambda: None)
-    monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: [])
-    monkeypatch.setattr("hermes_cli.models.provider_model_ids", lambda provider: [])
-    monkeypatch.setattr("agent.auxiliary_client.get_available_vision_backends", lambda: [])
-
-    setup_model_provider(config)
-    env = _read_env(tmp_path)
-
-    assert env.get("OPENAI_API_KEY") == "sk-openai"
-    assert env.get("OPENAI_BASE_URL") == "https://api.openai.com/v1"
-    assert env.get("AUXILIARY_VISION_MODEL") == "gpt-4o-mini"
-
-
-def test_setup_copilot_uses_gh_auth_and_saves_provider(tmp_path, monkeypatch):
-    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
-    _clear_provider_env(monkeypatch)
-
-    config = load_config()
-
-    def fake_prompt_choice(question, choices, default=0):
-        if question == "Select your inference provider:":
-            assert choices[14] == "GitHub Copilot (uses GITHUB_TOKEN or gh auth token)"
-            return 14
-        if question == "Select default model:":
-            assert "gpt-4.1" in choices
-            assert "gpt-5.4" in choices
-            return choices.index("gpt-5.4")
-        if question == "Select reasoning effort:":
-            assert "low" in choices
-            assert "high" in choices
-            return choices.index("high")
-        if question == "Configure vision:":
-            return len(choices) - 1
-        tts_idx = _maybe_keep_current_tts(question, choices)
-        if tts_idx is not None:
-            return tts_idx
-        raise AssertionError(f"Unexpected prompt_choice call: {question}")
-
-    def fake_prompt(message, *args, **kwargs):
-        raise AssertionError(f"Unexpected prompt call: {message}")
-
-    def fake_get_auth_status(provider_id):
-        if provider_id == "copilot":
-            return {"logged_in": True}
-        return {"logged_in": False}
-
-    monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice)
-    monkeypatch.setattr("hermes_cli.setup.prompt", fake_prompt)
-    monkeypatch.setattr("hermes_cli.setup.prompt_yes_no", lambda *args, **kwargs: False)
-    monkeypatch.setattr("hermes_cli.auth.get_active_provider", lambda: None)
-    monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: [])
-    monkeypatch.setattr("hermes_cli.auth.get_auth_status", fake_get_auth_status)
-    monkeypatch.setattr(
-        "hermes_cli.auth.resolve_api_key_provider_credentials",
-        lambda provider_id: {
-            "provider": provider_id,
-            "api_key": "gh-cli-token",
-            "base_url": "https://api.githubcopilot.com",
-            "source": "gh auth token",
-        },
-    )
-    monkeypatch.setattr(
-        "hermes_cli.models.fetch_github_model_catalog",
-        lambda api_key: [
-            {
-                "id": "gpt-4.1",
-                "capabilities": {"type": "chat", "supports": {}},
-                "supported_endpoints": ["/chat/completions"],
-            },
-            {
-                "id": "gpt-5.4",
-                "capabilities": {"type": "chat", "supports": {"reasoning_effort": ["low", "medium", "high"]}},
-                "supported_endpoints": ["/responses"],
-            },
-        ],
-    )
-    monkeypatch.setattr("agent.auxiliary_client.get_available_vision_backends", lambda: [])
-
-    setup_model_provider(config)
-    save_config(config)
-
-    env = _read_env(tmp_path)
-    reloaded = load_config()
-
-    assert env.get("GITHUB_TOKEN") is None
-    assert reloaded["model"]["provider"] == "copilot"
-    assert reloaded["model"]["base_url"] == "https://api.githubcopilot.com"
-    assert reloaded["model"]["default"] == "gpt-5.4"
-    assert reloaded["model"]["api_mode"] == "codex_responses"
-    assert reloaded["agent"]["reasoning_effort"] == "high"
-
-
-def test_setup_copilot_acp_uses_model_picker_and_saves_provider(tmp_path, monkeypatch):
-    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
-    _clear_provider_env(monkeypatch)
-
-    config = load_config()
-
-    def fake_prompt_choice(question, choices, default=0):
-        if question == "Select your inference provider:":
-            assert choices[15] == "GitHub Copilot ACP (spawns `copilot --acp --stdio`)"
-            return 15
-        if question == "Select default model:":
-            assert "gpt-4.1" in choices
-            assert "gpt-5.4" in choices
-            return choices.index("gpt-5.4")
-        if question == "Configure vision:":
-            return len(choices) - 1
-        tts_idx = _maybe_keep_current_tts(question, choices)
-        if tts_idx is not None:
-            return tts_idx
-        raise AssertionError(f"Unexpected prompt_choice call: {question}")
-
-    def fake_prompt(message, *args, **kwargs):
-        raise AssertionError(f"Unexpected prompt call: {message}")
-
-    monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice)
-    monkeypatch.setattr("hermes_cli.setup.prompt", fake_prompt)
-    monkeypatch.setattr("hermes_cli.setup.prompt_yes_no", lambda *args, **kwargs: False)
-    monkeypatch.setattr("hermes_cli.auth.get_active_provider", lambda: None)
-    monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: [])
-    monkeypatch.setattr("hermes_cli.auth.get_auth_status", lambda provider_id: {"logged_in": provider_id == "copilot-acp"})
-    monkeypatch.setattr(
-        "hermes_cli.auth.resolve_api_key_provider_credentials",
-        lambda provider_id: {
-            "provider": "copilot",
-            "api_key": "gh-cli-token",
-            "base_url": "https://api.githubcopilot.com",
-            "source": "gh auth token",
-        },
-    )
-    monkeypatch.setattr(
-        "hermes_cli.models.fetch_github_model_catalog",
-        lambda api_key: [
-            {
-                "id": "gpt-4.1",
-                "capabilities": {"type": "chat", "supports": {}},
-                "supported_endpoints": ["/chat/completions"],
-            },
-            {
-                "id": "gpt-5.4",
-                "capabilities": {"type": "chat", "supports": {"reasoning_effort": ["low", "medium", "high"]}},
-                "supported_endpoints": ["/responses"],
-            },
-        ],
-    )
-    monkeypatch.setattr("agent.auxiliary_client.get_available_vision_backends", lambda: [])
+    monkeypatch.setattr("hermes_cli.main.select_provider_and_model", fake_select)

    setup_model_provider(config)
    save_config(config)

    reloaded = load_config()
-
-    assert reloaded["model"]["provider"] == "copilot-acp"
-    assert reloaded["model"]["base_url"] == "acp://copilot"
-    assert reloaded["model"]["default"] == "gpt-5.4"
-    assert reloaded["model"]["api_mode"] == "chat_completions"
+    assert isinstance(reloaded["model"], dict)
+    assert reloaded["model"]["provider"] == "zai"


-def test_setup_switch_custom_to_codex_clears_custom_endpoint_and_updates_config(tmp_path, monkeypatch):
-    """Switching from custom to Codex should clear custom endpoint overrides."""
+def test_setup_same_provider_rotation_strategy_saved_for_multi_credential_pool(tmp_path, monkeypatch):
    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
    _clear_provider_env(monkeypatch)
+    save_env_value("OPENROUTER_API_KEY", "or-key")

-    save_env_value("OPENAI_BASE_URL", "https://example.invalid/v1")
-    save_env_value("OPENAI_API_KEY", "sk-custom")
-    save_env_value("OPENROUTER_API_KEY", "sk-or")
+    # Pre-write config so the pool step sees provider="openrouter"
+    _write_model_config("openrouter", "", "anthropic/claude-opus-4.6")

    config = load_config()
-    config["model"] = {
-        "default": "custom/model",
-        "provider": "custom",
-        "base_url": "https://example.invalid/v1",
-    }
-    save_config(config)
+
+    class _Entry:
+        def __init__(self, label):
+            self.label = label
+
+    class _Pool:
+        def entries(self):
+            return [_Entry("primary"), _Entry("secondary")]
+
+    def fake_select():
+        pass  # no-op — config already has provider set

    def fake_prompt_choice(question, choices, default=0):
-        if question == "Select your inference provider:":
-            return 2  # OpenAI Codex
-        if question == "Select default model:":
+        if "rotation strategy" in question:
+            return 1  # round robin
+        tts_idx = _maybe_keep_current_tts(question, choices)
+        if tts_idx is not None:
+            return tts_idx
+        return default
+
+    def fake_prompt_yes_no(question, default=True):
+        return False
+
+    # Patch directly on the module objects to ensure local imports pick them up.
+    import hermes_cli.main as _main_mod
+    import hermes_cli.setup as _setup_mod
+    import agent.credential_pool as _pool_mod
+    import agent.auxiliary_client as _aux_mod
+
+    monkeypatch.setattr(_main_mod, "select_provider_and_model", fake_select)
+    # NOTE: _stub_tts overwrites prompt_choice, so set our mock AFTER it.
+    _stub_tts(monkeypatch)
+    monkeypatch.setattr(_setup_mod, "prompt_choice", fake_prompt_choice)
+    monkeypatch.setattr(_setup_mod, "prompt_yes_no", fake_prompt_yes_no)
+    monkeypatch.setattr(_setup_mod, "prompt", lambda *args, **kwargs: "")
+    monkeypatch.setattr(_pool_mod, "load_pool", lambda provider: _Pool())
+    monkeypatch.setattr(_aux_mod, "get_available_vision_backends", lambda: [])
+
+    setup_model_provider(config)
+
+    # The pool has 2 entries, so the strategy prompt should fire
+    strategy = config.get("credential_pool_strategies", {}).get("openrouter")
+    assert strategy == "round_robin", f"Expected round_robin but got {strategy}"
+
+
+def test_setup_same_provider_fallback_can_add_another_credential(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    _clear_provider_env(monkeypatch)
+    save_env_value("OPENROUTER_API_KEY", "or-key")
+
+    # Pre-write config so the pool step sees provider="openrouter"
+    _write_model_config("openrouter", "", "anthropic/claude-opus-4.6")
+
+    config = load_config()
+    pool_sizes = iter([1, 2])
+    add_calls = []
+
+    class _Entry:
+        def __init__(self, label):
+            self.label = label
+
+    class _Pool:
+        def __init__(self, size):
+            self._size = size
+
+        def entries(self):
+            return [_Entry(f"cred-{idx}") for idx in range(self._size)]
+
+    def fake_load_pool(provider):
+        return _Pool(next(pool_sizes))
+
+    def fake_auth_add_command(args):
+        add_calls.append(args.provider)
+
+    def fake_select():
+        pass  # no-op — config already has provider set
+
+    def fake_prompt_choice(question, choices, default=0):
+        if question == "Select same-provider rotation strategy:":
            return 0
        tts_idx = _maybe_keep_current_tts(question, choices)
+        if tts_idx is not None:
+            return tts_idx
+        return default
+
+    yes_no_answers = iter([True, False])
+
+    def fake_prompt_yes_no(question, default=True):
+        if question == "Add another credential for same-provider fallback?":
+            return next(yes_no_answers)
+        return False
+
+    monkeypatch.setattr("hermes_cli.main.select_provider_and_model", fake_select)
+    _stub_tts(monkeypatch)
+    monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice)
+    monkeypatch.setattr("hermes_cli.setup.prompt_yes_no", fake_prompt_yes_no)
+    monkeypatch.setattr("hermes_cli.setup.prompt", lambda *args, **kwargs: "")
+    monkeypatch.setattr("agent.credential_pool.load_pool", fake_load_pool)
+    monkeypatch.setattr("hermes_cli.auth_commands.auth_add_command", fake_auth_add_command)
+    monkeypatch.setattr("agent.auxiliary_client.get_available_vision_backends", lambda: [])
+
+    setup_model_provider(config)
+
+    assert add_calls == ["openrouter"]
+    assert config.get("credential_pool_strategies", {}).get("openrouter") == "fill_first"
+
+
+def test_setup_pool_step_shows_manual_vs_auto_detected_counts(tmp_path, monkeypatch, capsys):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    _clear_provider_env(monkeypatch)
+    save_env_value("OPENROUTER_API_KEY", "or-key")
+
+    # Pre-write config so the pool step sees provider="openrouter"
+    _write_model_config("openrouter", "", "anthropic/claude-opus-4.6")
+
+    config = load_config()
+
+    class _Entry:
+        def __init__(self, label, source):
+            self.label = label
+            self.source = source
+
+    class _Pool:
+        def entries(self):
+            return [
+                _Entry("primary", "manual"),
+                _Entry("secondary", "manual"),
+                _Entry("OPENROUTER_API_KEY", "env:OPENROUTER_API_KEY"),
+            ]
+
+    def fake_select():
+        pass  # no-op — config already has provider set
+
+    def fake_prompt_choice(question, choices, default=0):
+        if "rotation strategy" in question:
+            return 0
+        tts_idx = _maybe_keep_current_tts(question, choices)
+        if tts_idx is not None:
+            return tts_idx
+        return default
+
+    monkeypatch.setattr("hermes_cli.main.select_provider_and_model", fake_select)
+    _stub_tts(monkeypatch)
+    monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice)
+    monkeypatch.setattr("hermes_cli.setup.prompt_yes_no", lambda *args, **kwargs: False)
+    monkeypatch.setattr("hermes_cli.setup.prompt", lambda *args, **kwargs: "")
+    monkeypatch.setattr("agent.credential_pool.load_pool", lambda provider: _Pool())
+    monkeypatch.setattr("agent.auxiliary_client.get_available_vision_backends", lambda: [])
+
+    setup_model_provider(config)
+
+    out = capsys.readouterr().out
+    assert "Current pooled credentials for openrouter: 3 (2 manual, 1 auto-detected from env/shared auth)" in out
+
+
+def test_setup_copilot_acp_skips_same_provider_pool_step(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    _clear_provider_env(monkeypatch)
+
+    config = load_config()
+
+    def fake_prompt_choice(question, choices, default=0):
+        if question == "Select your inference provider:":
+            return 15  # GitHub Copilot ACP
+        if question == "Select default model:":
+            return 0
+        if question == "Configure vision:":
+            return len(choices) - 1
+        tts_idx = _maybe_keep_current_tts(question, choices)
        if tts_idx is not None:
            return tts_idx
        raise AssertionError(f"Unexpected prompt_choice call: {question}")

+    def fake_prompt_yes_no(question, default=True):
+        if question == "Add another credential for same-provider fallback?":
+            raise AssertionError("same-provider pool prompt should not appear for copilot-acp")
+        return False
+
    monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice)
+    monkeypatch.setattr("hermes_cli.setup.prompt_yes_no", fake_prompt_yes_no)
    monkeypatch.setattr("hermes_cli.setup.prompt", lambda *args, **kwargs: "")
-    monkeypatch.setattr("hermes_cli.setup.prompt_yes_no", lambda *args, **kwargs: False)
    monkeypatch.setattr("hermes_cli.auth.get_active_provider", lambda: None)
    monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: [])
-    monkeypatch.setattr("hermes_cli.auth._login_openai_codex", lambda *args, **kwargs: None)
-    monkeypatch.setattr(
-        "hermes_cli.auth.resolve_codex_runtime_credentials",
-        lambda *args, **kwargs: {
-            "base_url": "https://chatgpt.com/backend-api/codex",
-            "api_key": "codex-...oken",
-        },
-    )
-    monkeypatch.setattr(
-        "hermes_cli.codex_models.get_codex_model_ids",
-        lambda **kwargs: ["openai/gpt-5.3-codex", "openai/gpt-5-codex-mini"],
-    )
+    monkeypatch.setattr("agent.auxiliary_client.get_available_vision_backends", lambda: [])
+
+    setup_model_provider(config)
+
+    assert config.get("credential_pool_strategies", {}) == {}
+
+
+def test_setup_copilot_uses_gh_auth_and_saves_provider(tmp_path, monkeypatch):
+    """Copilot provider saves correctly through delegation."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    _clear_provider_env(monkeypatch)
+    _stub_tts(monkeypatch)
+
+    config = load_config()
+
+    def fake_select():
+        _write_model_config("copilot", "https://models.github.ai/inference/v1", "gpt-4o")
+
+    monkeypatch.setattr("hermes_cli.main.select_provider_and_model", fake_select)

    setup_model_provider(config)
    save_config(config)

-    env = _read_env(tmp_path)
    reloaded = load_config()
-
-    assert env.get("OPENAI_BASE_URL") == ""
-    assert env.get("OPENAI_API_KEY") == ""
-    assert reloaded["model"]["provider"] == "openai-codex"
-    assert reloaded["model"]["default"] == "openai/gpt-5.3-codex"
-    assert reloaded["model"]["base_url"] == "https://chatgpt.com/backend-api/codex"
+    assert isinstance(reloaded["model"], dict)
+    assert reloaded["model"]["provider"] == "copilot"


-def test_setup_summary_marks_codex_auth_as_vision_available(tmp_path, monkeypatch, capsys):
+def test_setup_copilot_acp_uses_model_picker_and_saves_provider(tmp_path, monkeypatch):
+    """Copilot ACP provider saves correctly through delegation."""
    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
    _clear_provider_env(monkeypatch)
+    _stub_tts(monkeypatch)

-    (tmp_path / "auth.json").write_text(
-        '{"active_provider":"openai-codex","providers":{"openai-codex":{"tokens":{"access_token": "***", "refresh_token": "***"}}}}'
-    )
+    config = load_config()

-    monkeypatch.setattr("shutil.which", lambda _name: None)
+    def fake_select():
+        _write_model_config("copilot-acp", "", "claude-sonnet-4")

-    _print_setup_summary(load_config(), tmp_path)
-    output = capsys.readouterr().out
+    monkeypatch.setattr("hermes_cli.main.select_provider_and_model", fake_select)

-    assert "Vision (image analysis)" in output
-    assert "missing run 'hermes setup' to configure" not in output
-    assert "Mixture of Agents" in output
-    assert "missing OPENROUTER_API_KEY" in output
+    setup_model_provider(config)
+    save_config(config)
+
+    reloaded = load_config()
+    assert isinstance(reloaded["model"], dict)
+    assert reloaded["model"]["provider"] == "copilot-acp"
+
+
+def test_setup_switch_custom_to_codex_clears_custom_endpoint_and_updates_config(
+    tmp_path, monkeypatch
+):
+    """Switching from custom to codex updates config correctly."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    _clear_provider_env(monkeypatch)
+    _stub_tts(monkeypatch)
+
+    # Start with custom
+    _write_model_config("custom", "http://localhost:11434/v1", "qwen3.5:32b")
+
+    config = load_config()
+    assert config["model"]["provider"] == "custom"
+
+    def fake_select():
+        _write_model_config("openai-codex", "https://api.openai.com/v1", "gpt-4o")
+
+    monkeypatch.setattr("hermes_cli.main.select_provider_and_model", fake_select)
+
+    setup_model_provider(config)
+    save_config(config)
+
+    reloaded = load_config()
+    assert isinstance(reloaded["model"], dict)
+    assert reloaded["model"]["provider"] == "openai-codex"
+    assert reloaded["model"]["default"] == "gpt-4o"
+
+
+def test_setup_switch_preserves_non_model_config(tmp_path, monkeypatch):
+    """Provider switch preserves other config sections (terminal, display, etc.)."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    _clear_provider_env(monkeypatch)
+    _stub_tts(monkeypatch)
+
+    config = load_config()
+    config["terminal"]["timeout"] = 999
+    save_config(config)
+
+    config = load_config()
+
+    def fake_select():
+        _write_model_config("openrouter", model_name="gpt-4o")
+
+    monkeypatch.setattr("hermes_cli.main.select_provider_and_model", fake_select)
+
+    setup_model_provider(config)
+    save_config(config)
+
+    reloaded = load_config()
+    assert reloaded["terminal"]["timeout"] == 999
+    assert reloaded["model"]["provider"] == "openrouter"


 def test_setup_summary_marks_anthropic_auth_as_vision_available(tmp_path, monkeypatch, capsys):
--- a/tests/hermes_cli/test_update_gateway_restart.py
+++ b/tests/hermes_cli/test_update_gateway_restart.py
@ -25,6 +25,8 @@ def _make_run_side_effect(
    verify_ok=True,
    commit_count="3",
    systemd_active=False,
+    system_service_active=False,
+    system_restart_rc=0,
    launchctl_loaded=False,
 ):
    """Build a subprocess.run side_effect that simulates git + service commands."""
@ -45,14 +47,23 @@ def _make_run_side_effect(
        if "rev-list" in joined:
            return subprocess.CompletedProcess(cmd, 0, stdout=f"{commit_count}\n", stderr="")

-        # systemctl --user is-active
+        # systemctl is-active — distinguish --user from system scope
        if "systemctl" in joined and "is-active" in joined:
-            if systemd_active:
-                return subprocess.CompletedProcess(cmd, 0, stdout="active\n", stderr="")
-            return subprocess.CompletedProcess(cmd, 3, stdout="inactive\n", stderr="")
+            if "--user" in joined:
+                if systemd_active:
+                    return subprocess.CompletedProcess(cmd, 0, stdout="active\n", stderr="")
+                return subprocess.CompletedProcess(cmd, 3, stdout="inactive\n", stderr="")
+            else:
+                # System-level check (no --user)
+                if system_service_active:
+                    return subprocess.CompletedProcess(cmd, 0, stdout="active\n", stderr="")
+                return subprocess.CompletedProcess(cmd, 3, stdout="inactive\n", stderr="")

-        # systemctl --user restart
+        # systemctl restart — distinguish --user from system scope
        if "systemctl" in joined and "restart" in joined:
+            if "--user" not in joined and system_service_active:
+                stderr = "" if system_restart_rc == 0 else "Failed to restart: Permission denied"
+                return subprocess.CompletedProcess(cmd, system_restart_rc, stdout="", stderr=stderr)
            return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")

        # launchctl list ai.hermes.gateway
@ -393,3 +404,91 @@ class TestCmdUpdateLaunchdRestart:
        assert "Stopped gateway" not in captured
        assert "Gateway restarted" not in captured
        assert "Gateway restarted via launchd" not in captured
+
+
+# ---------------------------------------------------------------------------
+# cmd_update — system-level systemd service detection
+# ---------------------------------------------------------------------------
+
+
+class TestCmdUpdateSystemService:
+    """cmd_update detects system-level gateway services where --user fails."""
+
+    @patch("shutil.which", return_value=None)
+    @patch("subprocess.run")
+    def test_update_detects_system_service_and_restarts(
+        self, mock_run, _mock_which, mock_args, capsys, monkeypatch,
+    ):
+        """When user systemd is inactive but a system service exists, restart via system scope."""
+        monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
+        monkeypatch.setattr(gateway_cli, "is_linux", lambda: True)
+
+        mock_run.side_effect = _make_run_side_effect(
+            commit_count="3",
+            systemd_active=False,
+            system_service_active=True,
+        )
+
+        with patch("gateway.status.get_running_pid", return_value=12345), \
+             patch("gateway.status.remove_pid_file"):
+            cmd_update(mock_args)
+
+        captured = capsys.readouterr().out
+        assert "system gateway service" in captured.lower()
+        assert "Gateway restarted (system service)" in captured
+        # Verify systemctl restart (no --user) was called
+        restart_calls = [
+            c for c in mock_run.call_args_list
+            if "restart" in " ".join(str(a) for a in c.args[0])
+            and "systemctl" in " ".join(str(a) for a in c.args[0])
+            and "--user" not in " ".join(str(a) for a in c.args[0])
+        ]
+        assert len(restart_calls) == 1
+
+    @patch("shutil.which", return_value=None)
+    @patch("subprocess.run")
+    def test_update_system_service_restart_failure_shows_sudo_hint(
+        self, mock_run, _mock_which, mock_args, capsys, monkeypatch,
+    ):
+        """When system service restart fails (e.g. no root), show sudo hint."""
+        monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
+        monkeypatch.setattr(gateway_cli, "is_linux", lambda: True)
+
+        mock_run.side_effect = _make_run_side_effect(
+            commit_count="3",
+            systemd_active=False,
+            system_service_active=True,
+            system_restart_rc=1,
+        )
+
+        with patch("gateway.status.get_running_pid", return_value=12345), \
+             patch("gateway.status.remove_pid_file"):
+            cmd_update(mock_args)
+
+        captured = capsys.readouterr().out
+        assert "sudo systemctl restart" in captured
+
+    @patch("shutil.which", return_value=None)
+    @patch("subprocess.run")
+    def test_user_service_takes_priority_over_system(
+        self, mock_run, _mock_which, mock_args, capsys, monkeypatch,
+    ):
+        """When both user and system services are active, user wins."""
+        monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
+        monkeypatch.setattr(gateway_cli, "is_linux", lambda: True)
+
+        mock_run.side_effect = _make_run_side_effect(
+            commit_count="3",
+            systemd_active=True,
+            system_service_active=True,
+        )
+
+        with patch("gateway.status.get_running_pid", return_value=12345), \
+             patch("gateway.status.remove_pid_file"), \
+             patch("os.kill"):
+            cmd_update(mock_args)
+
+        captured = capsys.readouterr().out
+        # Should restart via user service, not system
+        assert "Gateway restarted." in captured
+        assert "(system service)" not in captured
--- a/tests/test_api_key_providers.py
+++ b/tests/test_api_key_providers.py
@ -622,6 +622,134 @@ class TestHasAnyProviderConfigured:
        from hermes_cli.main import _has_any_provider_configured
        assert _has_any_provider_configured() is True

+    def test_claude_code_creds_ignored_on_fresh_install(self, monkeypatch, tmp_path):
+        """Claude Code credentials should NOT skip the wizard when Hermes is unconfigured."""
+        from hermes_cli import config as config_module
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        monkeypatch.setattr(config_module, "get_env_path", lambda: hermes_home / ".env")
+        monkeypatch.setattr(config_module, "get_hermes_home", lambda: hermes_home)
+        # Clear all provider env vars so earlier checks don't short-circuit
+        for var in ("OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY",
+                     "ANTHROPIC_TOKEN", "OPENAI_BASE_URL"):
+            monkeypatch.delenv(var, raising=False)
+        # Simulate valid Claude Code credentials
+        monkeypatch.setattr(
+            "agent.anthropic_adapter.read_claude_code_credentials",
+            lambda: {"accessToken": "sk-ant-test", "refreshToken": "ref-tok"},
+        )
+        monkeypatch.setattr(
+            "agent.anthropic_adapter.is_claude_code_token_valid",
+            lambda creds: True,
+        )
+        from hermes_cli.main import _has_any_provider_configured
+        assert _has_any_provider_configured() is False
+
+    def test_config_provider_counts(self, monkeypatch, tmp_path):
+        """config.yaml with model.provider set should count as configured."""
+        import yaml
+        from hermes_cli import config as config_module
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        config_file = hermes_home / "config.yaml"
+        config_file.write_text(yaml.dump({
+            "model": {"default": "anthropic/claude-opus-4.6", "provider": "openrouter"},
+        }))
+        monkeypatch.setattr(config_module, "get_env_path", lambda: hermes_home / ".env")
+        monkeypatch.setattr(config_module, "get_hermes_home", lambda: hermes_home)
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+        # Clear all provider env vars
+        for var in ("OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY",
+                     "ANTHROPIC_TOKEN", "OPENAI_BASE_URL"):
+            monkeypatch.delenv(var, raising=False)
+        from hermes_cli.main import _has_any_provider_configured
+        assert _has_any_provider_configured() is True
+
+    def test_config_base_url_counts(self, monkeypatch, tmp_path):
+        """config.yaml with model.base_url set (custom endpoint) should count."""
+        import yaml
+        from hermes_cli import config as config_module
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        config_file = hermes_home / "config.yaml"
+        config_file.write_text(yaml.dump({
+            "model": {"default": "my-model", "base_url": "http://localhost:11434/v1"},
+        }))
+        monkeypatch.setattr(config_module, "get_env_path", lambda: hermes_home / ".env")
+        monkeypatch.setattr(config_module, "get_hermes_home", lambda: hermes_home)
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+        for var in ("OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY",
+                     "ANTHROPIC_TOKEN", "OPENAI_BASE_URL"):
+            monkeypatch.delenv(var, raising=False)
+        from hermes_cli.main import _has_any_provider_configured
+        assert _has_any_provider_configured() is True
+
+    def test_config_api_key_counts(self, monkeypatch, tmp_path):
+        """config.yaml with model.api_key set should count."""
+        import yaml
+        from hermes_cli import config as config_module
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        config_file = hermes_home / "config.yaml"
+        config_file.write_text(yaml.dump({
+            "model": {"default": "my-model", "api_key": "sk-test-key"},
+        }))
+        monkeypatch.setattr(config_module, "get_env_path", lambda: hermes_home / ".env")
+        monkeypatch.setattr(config_module, "get_hermes_home", lambda: hermes_home)
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+        for var in ("OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY",
+                     "ANTHROPIC_TOKEN", "OPENAI_BASE_URL"):
+            monkeypatch.delenv(var, raising=False)
+        from hermes_cli.main import _has_any_provider_configured
+        assert _has_any_provider_configured() is True
+
+    def test_config_dict_no_provider_no_creds_still_false(self, monkeypatch, tmp_path):
+        """config.yaml model dict with empty default and no creds stays false."""
+        import yaml
+        from hermes_cli import config as config_module
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        config_file = hermes_home / "config.yaml"
+        config_file.write_text(yaml.dump({
+            "model": {"default": ""},
+        }))
+        monkeypatch.setattr(config_module, "get_env_path", lambda: hermes_home / ".env")
+        monkeypatch.setattr(config_module, "get_hermes_home", lambda: hermes_home)
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+        for var in ("OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY",
+                     "ANTHROPIC_TOKEN", "OPENAI_BASE_URL"):
+            monkeypatch.delenv(var, raising=False)
+        from hermes_cli.main import _has_any_provider_configured
+        assert _has_any_provider_configured() is False
+
+    def test_claude_code_creds_counted_when_hermes_configured(self, monkeypatch, tmp_path):
+        """Claude Code credentials should count when Hermes has been explicitly configured."""
+        import yaml
+        from hermes_cli import config as config_module
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        # Write a config with a non-default model to simulate explicit configuration
+        config_file = hermes_home / "config.yaml"
+        config_file.write_text(yaml.dump({"model": {"default": "my-local-model"}}))
+        monkeypatch.setattr(config_module, "get_env_path", lambda: hermes_home / ".env")
+        monkeypatch.setattr(config_module, "get_hermes_home", lambda: hermes_home)
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+        # Clear all provider env vars
+        for var in ("OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY",
+                     "ANTHROPIC_TOKEN", "OPENAI_BASE_URL"):
+            monkeypatch.delenv(var, raising=False)
+        # Simulate valid Claude Code credentials
+        monkeypatch.setattr(
+            "agent.anthropic_adapter.read_claude_code_credentials",
+            lambda: {"accessToken": "sk-ant-test", "refreshToken": "ref-tok"},
+        )
+        monkeypatch.setattr(
+            "agent.anthropic_adapter.is_claude_code_token_valid",
+            lambda creds: True,
+        )
+        from hermes_cli.main import _has_any_provider_configured
+        assert _has_any_provider_configured() is True
+

 # =============================================================================
 # Kimi Code auto-detection tests
--- a/tests/test_auth_commands.py
+++ b/tests/test_auth_commands.py
@ -0,0 +1,391 @@
+"""Tests for auth subcommands backed by the credential pool."""
+
+from __future__ import annotations
+
+import base64
+import json
+
+import pytest
+
+
+def _write_auth_store(tmp_path, payload: dict) -> None:
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    (hermes_home / "auth.json").write_text(json.dumps(payload, indent=2))
+
+
+def _jwt_with_email(email: str) -> str:
+    header = base64.urlsafe_b64encode(b'{"alg":"RS256","typ":"JWT"}').rstrip(b"=").decode()
+    payload = base64.urlsafe_b64encode(
+        json.dumps({"email": email}).encode()
+    ).rstrip(b"=").decode()
+    return f"{header}.{payload}.signature"
+
+
+@pytest.fixture(autouse=True)
+def _clear_provider_env(monkeypatch):
+    for key in (
+        "OPENROUTER_API_KEY",
+        "OPENAI_API_KEY",
+        "ANTHROPIC_API_KEY",
+        "ANTHROPIC_TOKEN",
+        "CLAUDE_CODE_OAUTH_TOKEN",
+    ):
+        monkeypatch.delenv(key, raising=False)
+
+
+def test_auth_add_api_key_persists_manual_entry(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+    _write_auth_store(tmp_path, {"version": 1, "providers": {}})
+
+    from hermes_cli.auth_commands import auth_add_command
+
+    class _Args:
+        provider = "openrouter"
+        auth_type = "api-key"
+        api_key = "sk-or-manual"
+        label = "personal"
+
+    auth_add_command(_Args())
+
+    payload = json.loads((tmp_path / "hermes" / "auth.json").read_text())
+    entries = payload["credential_pool"]["openrouter"]
+    entry = next(item for item in entries if item["source"] == "manual")
+    assert entry["label"] == "personal"
+    assert entry["auth_type"] == "api_key"
+    assert entry["source"] == "manual"
+    assert entry["access_token"] == "sk-or-manual"
+
+
+def test_auth_add_anthropic_oauth_persists_pool_entry(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
+    monkeypatch.delenv("ANTHROPIC_TOKEN", raising=False)
+    monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False)
+    _write_auth_store(tmp_path, {"version": 1, "providers": {}})
+    token = _jwt_with_email("claude@example.com")
+    monkeypatch.setattr(
+        "agent.anthropic_adapter.run_hermes_oauth_login_pure",
+        lambda: {
+            "access_token": token,
+            "refresh_token": "refresh-token",
+            "expires_at_ms": 1711234567000,
+        },
+    )
+
+    from hermes_cli.auth_commands import auth_add_command
+
+    class _Args:
+        provider = "anthropic"
+        auth_type = "oauth"
+        api_key = None
+        label = None
+
+    auth_add_command(_Args())
+
+    payload = json.loads((tmp_path / "hermes" / "auth.json").read_text())
+    entries = payload["credential_pool"]["anthropic"]
+    entry = next(item for item in entries if item["source"] == "manual:hermes_pkce")
+    assert entry["label"] == "claude@example.com"
+    assert entry["source"] == "manual:hermes_pkce"
+    assert entry["refresh_token"] == "refresh-token"
+    assert entry["expires_at_ms"] == 1711234567000
+
+
+def test_auth_add_nous_oauth_persists_pool_entry(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    _write_auth_store(tmp_path, {"version": 1, "providers": {}})
+    token = _jwt_with_email("nous@example.com")
+    monkeypatch.setattr(
+        "hermes_cli.auth._nous_device_code_login",
+        lambda **kwargs: {
+            "portal_base_url": "https://portal.example.com",
+            "inference_base_url": "https://inference.example.com/v1",
+            "client_id": "hermes-cli",
+            "scope": "inference:mint_agent_key",
+            "token_type": "Bearer",
+            "access_token": token,
+            "refresh_token": "refresh-token",
+            "obtained_at": "2026-03-23T10:00:00+00:00",
+            "expires_at": "2026-03-23T11:00:00+00:00",
+            "expires_in": 3600,
+            "agent_key": "ak-test",
+            "agent_key_id": "ak-id",
+            "agent_key_expires_at": "2026-03-23T10:30:00+00:00",
+            "agent_key_expires_in": 1800,
+            "agent_key_reused": False,
+            "agent_key_obtained_at": "2026-03-23T10:00:10+00:00",
+            "tls": {"insecure": False, "ca_bundle": None},
+        },
+    )
+
+    from hermes_cli.auth_commands import auth_add_command
+
+    class _Args:
+        provider = "nous"
+        auth_type = "oauth"
+        api_key = None
+        label = None
+        portal_url = None
+        inference_url = None
+        client_id = None
+        scope = None
+        no_browser = False
+        timeout = None
+        insecure = False
+        ca_bundle = None
+
+    auth_add_command(_Args())
+
+    payload = json.loads((tmp_path / "hermes" / "auth.json").read_text())
+    entries = payload["credential_pool"]["nous"]
+    entry = next(item for item in entries if item["source"] == "manual:device_code")
+    assert entry["label"] == "nous@example.com"
+    assert entry["source"] == "manual:device_code"
+    assert entry["agent_key"] == "ak-test"
+    assert entry["portal_base_url"] == "https://portal.example.com"
+
+
+def test_auth_add_codex_oauth_persists_pool_entry(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    _write_auth_store(tmp_path, {"version": 1, "providers": {}})
+    token = _jwt_with_email("codex@example.com")
+    monkeypatch.setattr(
+        "hermes_cli.auth._codex_device_code_login",
+        lambda: {
+            "tokens": {
+                "access_token": token,
+                "refresh_token": "refresh-token",
+            },
+            "base_url": "https://chatgpt.com/backend-api/codex",
+            "last_refresh": "2026-03-23T10:00:00Z",
+        },
+    )
+
+    from hermes_cli.auth_commands import auth_add_command
+
+    class _Args:
+        provider = "openai-codex"
+        auth_type = "oauth"
+        api_key = None
+        label = None
+
+    auth_add_command(_Args())
+
+    payload = json.loads((tmp_path / "hermes" / "auth.json").read_text())
+    entries = payload["credential_pool"]["openai-codex"]
+    entry = next(item for item in entries if item["source"] == "manual:device_code")
+    assert entry["label"] == "codex@example.com"
+    assert entry["source"] == "manual:device_code"
+    assert entry["refresh_token"] == "refresh-token"
+    assert entry["base_url"] == "https://chatgpt.com/backend-api/codex"
+
+
+def test_auth_remove_reindexes_priorities(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    # Prevent pool auto-seeding from host env vars and file-backed sources
+    monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
+    monkeypatch.delenv("ANTHROPIC_TOKEN", raising=False)
+    monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False)
+    monkeypatch.setattr(
+        "agent.credential_pool._seed_from_singletons",
+        lambda provider, entries: (False, set()),
+    )
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "credential_pool": {
+                "anthropic": [
+                    {
+                        "id": "cred-1",
+                        "label": "primary",
+                        "auth_type": "api_key",
+                        "priority": 0,
+                        "source": "manual",
+                        "access_token": "sk-ant-api-primary",
+                    },
+                    {
+                        "id": "cred-2",
+                        "label": "secondary",
+                        "auth_type": "api_key",
+                        "priority": 1,
+                        "source": "manual",
+                        "access_token": "sk-ant-api-secondary",
+                    },
+                ]
+            },
+        },
+    )
+
+    from hermes_cli.auth_commands import auth_remove_command
+
+    class _Args:
+        provider = "anthropic"
+        index = 1
+
+    auth_remove_command(_Args())
+
+    payload = json.loads((tmp_path / "hermes" / "auth.json").read_text())
+    entries = payload["credential_pool"]["anthropic"]
+    assert len(entries) == 1
+    assert entries[0]["label"] == "secondary"
+    assert entries[0]["priority"] == 0
+
+
+def test_auth_reset_clears_provider_statuses(tmp_path, monkeypatch, capsys):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "credential_pool": {
+                "anthropic": [
+                    {
+                        "id": "cred-1",
+                        "label": "primary",
+                        "auth_type": "api_key",
+                        "priority": 0,
+                        "source": "manual",
+                        "access_token": "sk-ant-api-primary",
+                        "last_status": "exhausted",
+                        "last_status_at": 1711230000.0,
+                        "last_error_code": 402,
+                    }
+                ]
+            },
+        },
+    )
+
+    from hermes_cli.auth_commands import auth_reset_command
+
+    class _Args:
+        provider = "anthropic"
+
+    auth_reset_command(_Args())
+
+    out = capsys.readouterr().out
+    assert "Reset status" in out
+
+    payload = json.loads((tmp_path / "hermes" / "auth.json").read_text())
+    entry = payload["credential_pool"]["anthropic"][0]
+    assert entry["last_status"] is None
+    assert entry["last_status_at"] is None
+    assert entry["last_error_code"] is None
+
+
+def test_clear_provider_auth_removes_provider_pool_entries(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "active_provider": "anthropic",
+            "providers": {
+                "anthropic": {"access_token": "legacy-token"},
+            },
+            "credential_pool": {
+                "anthropic": [
+                    {
+                        "id": "cred-1",
+                        "label": "primary",
+                        "auth_type": "oauth",
+                        "priority": 0,
+                        "source": "manual:hermes_pkce",
+                        "access_token": "pool-token",
+                    }
+                ],
+                "openrouter": [
+                    {
+                        "id": "cred-2",
+                        "label": "other-provider",
+                        "auth_type": "api_key",
+                        "priority": 0,
+                        "source": "manual",
+                        "access_token": "sk-or-test",
+                    }
+                ],
+            },
+        },
+    )
+
+    from hermes_cli.auth import clear_provider_auth
+
+    assert clear_provider_auth("anthropic") is True
+
+    payload = json.loads((tmp_path / "hermes" / "auth.json").read_text())
+    assert payload["active_provider"] is None
+    assert "anthropic" not in payload.get("providers", {})
+    assert "anthropic" not in payload.get("credential_pool", {})
+    assert "openrouter" in payload.get("credential_pool", {})
+
+
+def test_auth_list_does_not_call_mutating_select(monkeypatch, capsys):
+    from hermes_cli.auth_commands import auth_list_command
+
+    class _Entry:
+        id = "cred-1"
+        label = "primary"
+        auth_type="***"
+        source = "manual"
+        last_status = None
+        last_error_code = None
+        last_status_at = None
+
+    class _Pool:
+        def entries(self):
+            return [_Entry()]
+
+        def peek(self):
+            return _Entry()
+
+        def select(self):
+            raise AssertionError("auth_list_command should not call select()")
+
+    monkeypatch.setattr(
+        "hermes_cli.auth_commands.load_pool",
+        lambda provider: _Pool() if provider == "openrouter" else type("_EmptyPool", (), {"entries": lambda self: []})(),
+    )
+
+    class _Args:
+        provider = "openrouter"
+
+    auth_list_command(_Args())
+
+    out = capsys.readouterr().out
+    assert "openrouter (1 credentials):" in out
+    assert "primary" in out
+
+
+def test_auth_list_shows_exhausted_cooldown(monkeypatch, capsys):
+    from hermes_cli.auth_commands import auth_list_command
+
+    class _Entry:
+        id = "cred-1"
+        label = "primary"
+        auth_type = "api_key"
+        source = "manual"
+        last_status = "exhausted"
+        last_error_code = 429
+        last_status_at = 1000.0
+
+    class _Pool:
+        def entries(self):
+            return [_Entry()]
+
+        def peek(self):
+            return None
+
+    monkeypatch.setattr("hermes_cli.auth_commands.load_pool", lambda provider: _Pool())
+    monkeypatch.setattr("hermes_cli.auth_commands.time.time", lambda: 1030.0)
+
+    class _Args:
+        provider = "openrouter"
+
+    auth_list_command(_Args())
+
+    out = capsys.readouterr().out
+    assert "exhausted (429)" in out
+    assert "59m 30s left" in out
--- a/tests/test_cli_context_warning.py
+++ b/tests/test_cli_context_warning.py
@ -0,0 +1,161 @@
+"""Tests for the low context length warning in the CLI banner."""
+
+import os
+from types import SimpleNamespace
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+
+@pytest.fixture
+def _isolate(tmp_path, monkeypatch):
+    """Isolate HERMES_HOME so tests don't touch real config."""
+    home = tmp_path / ".hermes"
+    home.mkdir()
+    monkeypatch.setenv("HERMES_HOME", str(home))
+
+
+@pytest.fixture
+def cli_obj(_isolate):
+    """Create a minimal HermesCLI instance for banner testing."""
+    with patch("cli.load_cli_config", return_value={
+        "display": {"tool_progress": "new"},
+        "terminal": {},
+    }), patch("cli.get_tool_definitions", return_value=[]), \
+         patch("cli.build_welcome_banner"):
+        from cli import HermesCLI
+        obj = HermesCLI.__new__(HermesCLI)
+        obj.model = "test-model"
+        obj.enabled_toolsets = ["hermes-core"]
+        obj.compact = False
+        obj.console = MagicMock()
+        obj.session_id = None
+        obj.api_key = "test"
+        obj.base_url = ""
+        obj.provider = "test"
+        obj._provider_source = None
+        # Mock agent with context compressor
+        obj.agent = SimpleNamespace(
+            context_compressor=SimpleNamespace(context_length=None)
+        )
+        return obj
+
+
+class TestLowContextWarning:
+    """Tests that the CLI warns about low context lengths."""
+
+    def test_no_warning_for_normal_context(self, cli_obj):
+        """No warning when context is 32k+."""
+        cli_obj.agent.context_compressor.context_length = 32768
+        with patch("cli.get_tool_definitions", return_value=[]), \
+             patch("cli.build_welcome_banner"):
+            cli_obj.show_banner()
+
+        # Check that no yellow warning was printed
+        calls = [str(c) for c in cli_obj.console.print.call_args_list]
+        warning_calls = [c for c in calls if "too low" in c]
+        assert len(warning_calls) == 0
+
+    def test_warning_for_low_context(self, cli_obj):
+        """Warning shown when context is 4096 (Ollama default)."""
+        cli_obj.agent.context_compressor.context_length = 4096
+        with patch("cli.get_tool_definitions", return_value=[]), \
+             patch("cli.build_welcome_banner"):
+            cli_obj.show_banner()
+
+        calls = [str(c) for c in cli_obj.console.print.call_args_list]
+        warning_calls = [c for c in calls if "too low" in c]
+        assert len(warning_calls) == 1
+        assert "4,096" in warning_calls[0]
+
+    def test_warning_for_2048_context(self, cli_obj):
+        """Warning shown for 2048 tokens (common LM Studio default)."""
+        cli_obj.agent.context_compressor.context_length = 2048
+        with patch("cli.get_tool_definitions", return_value=[]), \
+             patch("cli.build_welcome_banner"):
+            cli_obj.show_banner()
+
+        calls = [str(c) for c in cli_obj.console.print.call_args_list]
+        warning_calls = [c for c in calls if "too low" in c]
+        assert len(warning_calls) == 1
+
+    def test_no_warning_at_boundary(self, cli_obj):
+        """No warning at exactly 8192 — 8192 is borderline but included in warning."""
+        cli_obj.agent.context_compressor.context_length = 8192
+        with patch("cli.get_tool_definitions", return_value=[]), \
+             patch("cli.build_welcome_banner"):
+            cli_obj.show_banner()
+
+        calls = [str(c) for c in cli_obj.console.print.call_args_list]
+        warning_calls = [c for c in calls if "too low" in c]
+        assert len(warning_calls) == 1  # 8192 is still warned about
+
+    def test_no_warning_above_boundary(self, cli_obj):
+        """No warning at 16384."""
+        cli_obj.agent.context_compressor.context_length = 16384
+        with patch("cli.get_tool_definitions", return_value=[]), \
+             patch("cli.build_welcome_banner"):
+            cli_obj.show_banner()
+
+        calls = [str(c) for c in cli_obj.console.print.call_args_list]
+        warning_calls = [c for c in calls if "too low" in c]
+        assert len(warning_calls) == 0
+
+    def test_ollama_specific_hint(self, cli_obj):
+        """Ollama-specific fix shown when port 11434 detected."""
+        cli_obj.agent.context_compressor.context_length = 4096
+        cli_obj.base_url = "http://localhost:11434/v1"
+        with patch("cli.get_tool_definitions", return_value=[]), \
+             patch("cli.build_welcome_banner"):
+            cli_obj.show_banner()
+
+        calls = [str(c) for c in cli_obj.console.print.call_args_list]
+        ollama_hints = [c for c in calls if "OLLAMA_CONTEXT_LENGTH" in c]
+        assert len(ollama_hints) == 1
+
+    def test_lm_studio_specific_hint(self, cli_obj):
+        """LM Studio-specific fix shown when port 1234 detected."""
+        cli_obj.agent.context_compressor.context_length = 2048
+        cli_obj.base_url = "http://localhost:1234/v1"
+        with patch("cli.get_tool_definitions", return_value=[]), \
+             patch("cli.build_welcome_banner"):
+            cli_obj.show_banner()
+
+        calls = [str(c) for c in cli_obj.console.print.call_args_list]
+        lms_hints = [c for c in calls if "LM Studio" in c]
+        assert len(lms_hints) == 1
+
+    def test_generic_hint_for_other_servers(self, cli_obj):
+        """Generic fix shown for unknown servers."""
+        cli_obj.agent.context_compressor.context_length = 4096
+        cli_obj.base_url = "http://localhost:8080/v1"
+        with patch("cli.get_tool_definitions", return_value=[]), \
+             patch("cli.build_welcome_banner"):
+            cli_obj.show_banner()
+
+        calls = [str(c) for c in cli_obj.console.print.call_args_list]
+        generic_hints = [c for c in calls if "config.yaml" in c]
+        assert len(generic_hints) == 1
+
+    def test_no_warning_when_no_context_length(self, cli_obj):
+        """No warning when context length is not yet known."""
+        cli_obj.agent.context_compressor.context_length = None
+        with patch("cli.get_tool_definitions", return_value=[]), \
+             patch("cli.build_welcome_banner"):
+            cli_obj.show_banner()
+
+        calls = [str(c) for c in cli_obj.console.print.call_args_list]
+        warning_calls = [c for c in calls if "too low" in c]
+        assert len(warning_calls) == 0
+
+    def test_compact_banner_does_not_crash_on_narrow_terminal(self, cli_obj):
+        """Compact mode should still have ctx_len defined for warning logic."""
+        cli_obj.agent.context_compressor.context_length = 4096
+
+        with patch("shutil.get_terminal_size", return_value=os.terminal_size((70, 40))), \
+             patch("cli._build_compact_banner", return_value="compact banner"):
+            cli_obj.show_banner()
+
+        calls = [str(c) for c in cli_obj.console.print.call_args_list]
+        warning_calls = [c for c in calls if "too low" in c]
+        assert len(warning_calls) == 1
--- a/tests/test_cli_init.py
+++ b/tests/test_cli_init.py
@ -192,6 +192,91 @@ class TestHistoryDisplay:
        assert "A" * 250 + "..." not in output


+class TestRootLevelProviderOverride:
+    """Root-level provider/base_url in config.yaml must NOT override model.provider."""
+
+    def test_model_provider_wins_over_root_provider(self, tmp_path, monkeypatch):
+        """model.provider takes priority — root-level provider is only a fallback."""
+        import yaml
+
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+        config_path = hermes_home / "config.yaml"
+        config_path.write_text(yaml.safe_dump({
+            "provider": "opencode-go",  # stale root-level key
+            "model": {
+                "default": "google/gemini-3-flash-preview",
+                "provider": "openrouter",  # correct canonical key
+            },
+        }))
+
+        import cli
+        monkeypatch.setattr(cli, "_hermes_home", hermes_home)
+        cfg = cli.load_cli_config()
+
+        assert cfg["model"]["provider"] == "openrouter"
+
+    def test_root_provider_ignored_when_default_model_provider_exists(self, tmp_path, monkeypatch):
+        """Even when model.provider is the default 'auto', root-level provider is ignored."""
+        import yaml
+
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+        config_path = hermes_home / "config.yaml"
+        config_path.write_text(yaml.safe_dump({
+            "provider": "opencode-go",  # stale root key
+            "model": {
+                "default": "google/gemini-3-flash-preview",
+                # no explicit model.provider — defaults provide "auto"
+            },
+        }))
+
+        import cli
+        monkeypatch.setattr(cli, "_hermes_home", hermes_home)
+        cfg = cli.load_cli_config()
+
+        # Root-level "opencode-go" must NOT leak through
+        assert cfg["model"]["provider"] != "opencode-go"
+
+    def test_normalize_root_model_keys_moves_to_model(self):
+        """_normalize_root_model_keys migrates root keys into model section."""
+        from hermes_cli.config import _normalize_root_model_keys
+
+        config = {
+            "provider": "opencode-go",
+            "base_url": "https://example.com/v1",
+            "model": {
+                "default": "some-model",
+            },
+        }
+        result = _normalize_root_model_keys(config)
+        # Root keys removed
+        assert "provider" not in result
+        assert "base_url" not in result
+        # Migrated into model section
+        assert result["model"]["provider"] == "opencode-go"
+        assert result["model"]["base_url"] == "https://example.com/v1"
+
+    def test_normalize_root_model_keys_does_not_override_existing(self):
+        """Existing model.provider is never overridden by root-level key."""
+        from hermes_cli.config import _normalize_root_model_keys
+
+        config = {
+            "provider": "stale-provider",
+            "model": {
+                "default": "some-model",
+                "provider": "correct-provider",
+            },
+        }
+        result = _normalize_root_model_keys(config)
+        assert result["model"]["provider"] == "correct-provider"
+        assert "provider" not in result  # root key still cleaned up
+
+
 class TestProviderResolution:
    def test_api_key_is_string_or_none(self):
        cli = _make_cli()
--- a/tests/test_cli_provider_resolution.py
+++ b/tests/test_cli_provider_resolution.py
@ -508,6 +508,7 @@ def test_cmd_model_falls_back_to_auto_on_invalid_provider(monkeypatch, capsys):

    monkeypatch.setattr("hermes_cli.auth.resolve_provider", _resolve_provider)
    monkeypatch.setattr(hermes_main, "_prompt_provider_choice", lambda choices: len(choices) - 1)
+    monkeypatch.setattr("sys.stdin", type("FakeTTY", (), {"isatty": lambda self: True})())

    hermes_main.cmd_model(SimpleNamespace())
    output = capsys.readouterr().out
@ -543,15 +544,18 @@ def test_model_flow_custom_saves_verified_v1_base_url(monkeypatch, capsys):
    )
    monkeypatch.setattr("hermes_cli.config.save_config", lambda cfg: None)

-    answers = iter(["http://localhost:8000", "local-key", "llm", ""])
+    # After the probe detects a single model ("llm"), the flow asks
+    # "Use this model? [Y/n]:" — confirm with Enter, then context length.
+    answers = iter(["http://localhost:8000", "local-key", "", ""])
    monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers))

    hermes_main._model_flow_custom({})
    output = capsys.readouterr().out

    assert "Saving the working base URL instead" in output
-    assert saved_env["OPENAI_BASE_URL"] == "http://localhost:8000/v1"
-    assert saved_env["OPENAI_API_KEY"] == "local-key"
+    assert "Detected model: llm" in output
+    # OPENAI_BASE_URL is no longer saved to .env — config.yaml is authoritative
+    assert "OPENAI_BASE_URL" not in saved_env
    assert saved_env["MODEL"] == "llm"


--- a/tests/test_cli_save_config_value.py
+++ b/tests/test_cli_save_config_value.py
@ -0,0 +1,80 @@
+"""Tests for save_config_value() in cli.py — atomic write behavior."""
+
+import os
+import yaml
+from pathlib import Path
+from unittest.mock import patch, MagicMock
+
+import pytest
+
+
+class TestSaveConfigValueAtomic:
+    """save_config_value() must use atomic_yaml_write to avoid data loss."""
+
+    @pytest.fixture
+    def config_env(self, tmp_path, monkeypatch):
+        """Isolated config environment with a writable config.yaml."""
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        config_path = hermes_home / "config.yaml"
+        config_path.write_text(yaml.dump({
+            "model": {"default": "test-model", "provider": "openrouter"},
+            "display": {"skin": "default"},
+        }))
+        monkeypatch.setattr("cli._hermes_home", hermes_home)
+        return config_path
+
+    def test_calls_atomic_yaml_write(self, config_env, monkeypatch):
+        """save_config_value must route through atomic_yaml_write, not bare open()."""
+        mock_atomic = MagicMock()
+        monkeypatch.setattr("utils.atomic_yaml_write", mock_atomic)
+
+        from cli import save_config_value
+        save_config_value("display.skin", "mono")
+
+        mock_atomic.assert_called_once()
+        written_path, written_data = mock_atomic.call_args[0]
+        assert Path(written_path) == config_env
+        assert written_data["display"]["skin"] == "mono"
+
+    def test_preserves_existing_keys(self, config_env):
+        """Writing a new key must not clobber existing config entries."""
+        from cli import save_config_value
+        save_config_value("agent.max_turns", 50)
+
+        result = yaml.safe_load(config_env.read_text())
+        assert result["model"]["default"] == "test-model"
+        assert result["model"]["provider"] == "openrouter"
+        assert result["display"]["skin"] == "default"
+        assert result["agent"]["max_turns"] == 50
+
+    def test_creates_nested_keys(self, config_env):
+        """Dot-separated paths create intermediate dicts as needed."""
+        from cli import save_config_value
+        save_config_value("compression.summary_model", "google/gemini-3-flash-preview")
+
+        result = yaml.safe_load(config_env.read_text())
+        assert result["compression"]["summary_model"] == "google/gemini-3-flash-preview"
+
+    def test_overwrites_existing_value(self, config_env):
+        """Updating an existing key replaces the value."""
+        from cli import save_config_value
+        save_config_value("display.skin", "ares")
+
+        result = yaml.safe_load(config_env.read_text())
+        assert result["display"]["skin"] == "ares"
+
+    def test_file_not_truncated_on_error(self, config_env, monkeypatch):
+        """If atomic_yaml_write raises, the original file is untouched."""
+        original_content = config_env.read_text()
+
+        def exploding_write(*args, **kwargs):
+            raise OSError("disk full")
+
+        monkeypatch.setattr("utils.atomic_yaml_write", exploding_write)
+
+        from cli import save_config_value
+        result = save_config_value("display.skin", "broken")
+
+        assert result is False
+        assert config_env.read_text() == original_content
--- a/tests/test_codex_execution_paths.py
+++ b/tests/test_codex_execution_paths.py
@ -112,7 +112,7 @@ def test_cron_run_job_codex_path_handles_internal_401_refresh(monkeypatch):
    _Codex401ThenSuccessAgent.last_init = {}

    success, output, final_response, error = cron_scheduler.run_job(
-        {"id": "job-1", "name": "Codex Refresh Test", "prompt": "ping"}
+        {"id": "job-1", "name": "Codex Refresh Test", "prompt": "ping", "model": "gpt-5.3-codex"}
    )

    assert success is True
@ -139,6 +139,7 @@ def test_gateway_run_agent_codex_path_handles_internal_401_refresh(monkeypatch):
        },
    )
    monkeypatch.setenv("HERMES_TOOL_PROGRESS", "false")
+    monkeypatch.setenv("HERMES_MODEL", "gpt-5.3-codex")

    _Codex401ThenSuccessAgent.refresh_attempts = 0
    _Codex401ThenSuccessAgent.last_init = {}
--- a/tests/test_codex_models.py
+++ b/tests/test_codex_models.py
@ -187,12 +187,12 @@ class TestNormalizeModelForProvider:
        assert cli.model == "claude-opus-4.6"

    def test_default_model_replaced(self):
-        """The untouched default (anthropic/claude-opus-4.6) gets swapped."""
+        """No model configured (empty default) gets swapped for codex."""
        import cli as _cli_mod
        _clean_config = {
            "model": {
-                "default": "anthropic/claude-opus-4.6",
-                "base_url": "https://openrouter.ai/api/v1",
+                "default": "",
+                "base_url": "",
                "provider": "auto",
            },
            "display": {"compact": False, "tool_progress": "all", "resume_display": "full"},
@ -219,12 +219,12 @@ class TestNormalizeModelForProvider:
        assert cli.model == "gpt-5.3-codex"

    def test_default_fallback_when_api_fails(self):
-        """Default model falls back to gpt-5.3-codex when API unreachable."""
+        """No model configured falls back to gpt-5.3-codex when API unreachable."""
        import cli as _cli_mod
        _clean_config = {
            "model": {
-                "default": "anthropic/claude-opus-4.6",
-                "base_url": "https://openrouter.ai/api/v1",
+                "default": "",
+                "base_url": "",
                "provider": "auto",
            },
            "display": {"compact": False, "tool_progress": "all", "resume_display": "full"},
--- a/tests/test_compression_persistence.py
+++ b/tests/test_compression_persistence.py
@ -0,0 +1,202 @@
+"""Tests for context compression persistence in the gateway.
+
+Verifies that when context compression fires during run_conversation(),
+the compressed messages are properly persisted to both SQLite (via the
+agent) and JSONL (via the gateway).
+
+Bug scenario (pre-fix):
+  1. Gateway loads 200-message history, passes to agent
+  2. Agent's run_conversation() compresses to ~30 messages mid-run
+  3. _compress_context() resets _last_flushed_db_idx = 0
+  4. On exit, _flush_messages_to_session_db() calculates:
+     flush_from = max(len(conversation_history=200), _last_flushed_db_idx=0) = 200
+  5. messages[200:] is empty (only ~30 messages after compression)
+  6. Nothing written to new session's SQLite — compressed context lost
+  7. Gateway's history_offset was still 200, producing empty new_messages
+  8. Fallback wrote only user/assistant pair — summary lost
+"""
+
+import os
+import tempfile
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+
+# ---------------------------------------------------------------------------
+# Part 1: Agent-side — _flush_messages_to_session_db after compression
+# ---------------------------------------------------------------------------
+
+class TestFlushAfterCompression:
+    """Verify that compressed messages are flushed to the new session's SQLite
+    even when conversation_history (from the original session) is longer than
+    the compressed messages list."""
+
+    def _make_agent(self, session_db):
+        with patch.dict(os.environ, {"OPENROUTER_API_KEY": "test-key"}):
+            from run_agent import AIAgent
+            agent = AIAgent(
+                model="test/model",
+                quiet_mode=True,
+                session_db=session_db,
+                session_id="original-session",
+                skip_context_files=True,
+                skip_memory=True,
+            )
+        return agent
+
+    def test_flush_after_compression_with_long_history(self):
+        """The actual bug: conversation_history longer than compressed messages.
+
+        Before the fix, flush_from = max(len(conversation_history), 0) = 200,
+        but messages only has ~30 entries, so messages[200:] is empty.
+        After the fix, conversation_history is cleared to None after compression,
+        so flush_from = max(0, 0) = 0, and ALL compressed messages are written.
+        """
+        from hermes_state import SessionDB
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            db_path = Path(tmpdir) / "test.db"
+            db = SessionDB(db_path=db_path)
+
+            agent = self._make_agent(db)
+
+            # Simulate the original long history (200 messages)
+            original_history = [
+                {"role": "user" if i % 2 == 0 else "assistant",
+                 "content": f"message {i}"}
+                for i in range(200)
+            ]
+
+            # First, flush original messages to the original session
+            agent._flush_messages_to_session_db(original_history, [])
+            original_rows = db.get_messages("original-session")
+            assert len(original_rows) == 200
+
+            # Now simulate compression: new session, reset idx, shorter messages
+            agent.session_id = "compressed-session"
+            db.create_session(session_id="compressed-session", source="test")
+            agent._last_flushed_db_idx = 0
+
+            # The compressed messages (summary + tail + new turn)
+            compressed_messages = [
+                {"role": "user", "content": "[CONTEXT COMPACTION] Summary of work..."},
+                {"role": "user", "content": "What should we do next?"},
+                {"role": "assistant", "content": "Let me check..."},
+                {"role": "user", "content": "new question"},
+                {"role": "assistant", "content": "new answer"},
+            ]
+
+            # THE BUG: passing the original history as conversation_history
+            # causes flush_from = max(200, 0) = 200, skipping everything.
+            # After the fix, conversation_history should be None.
+            agent._flush_messages_to_session_db(compressed_messages, None)
+
+            new_rows = db.get_messages("compressed-session")
+            assert len(new_rows) == 5, (
+                f"Expected 5 compressed messages in new session, got {len(new_rows)}. "
+                f"Compression persistence bug: messages not written to SQLite."
+            )
+
+    def test_flush_with_stale_history_loses_messages(self):
+        """Demonstrates the bug condition: stale conversation_history causes data loss."""
+        from hermes_state import SessionDB
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            db_path = Path(tmpdir) / "test.db"
+            db = SessionDB(db_path=db_path)
+
+            agent = self._make_agent(db)
+
+            # Simulate compression reset
+            agent.session_id = "new-session"
+            db.create_session(session_id="new-session", source="test")
+            agent._last_flushed_db_idx = 0
+
+            compressed = [
+                {"role": "user", "content": "summary"},
+                {"role": "assistant", "content": "continuing..."},
+            ]
+
+            # Bug: passing a conversation_history longer than compressed messages
+            stale_history = [{"role": "user", "content": f"msg{i}"} for i in range(100)]
+            agent._flush_messages_to_session_db(compressed, stale_history)
+
+            rows = db.get_messages("new-session")
+            # With the stale history, flush_from = max(100, 0) = 100
+            # But compressed only has 2 entries → messages[100:] = empty
+            assert len(rows) == 0, (
+                "Expected 0 messages with stale conversation_history "
+                "(this test verifies the bug condition exists)"
+            )
+
+
+# ---------------------------------------------------------------------------
+# Part 2: Gateway-side — history_offset after session split
+# ---------------------------------------------------------------------------
+
+class TestGatewayHistoryOffsetAfterSplit:
+    """Verify that when the agent creates a new session during compression,
+    the gateway uses history_offset=0 so all compressed messages are written
+    to the JSONL transcript."""
+
+    def test_history_offset_zero_on_session_split(self):
+        """When agent.session_id differs from the original, history_offset must be 0."""
+        # This tests the logic in gateway/run.py run_sync():
+        # _session_was_split = agent.session_id != session_id
+        # _effective_history_offset = 0 if _session_was_split else len(agent_history)
+
+        original_session_id = "session-abc"
+        agent_session_id = "session-compressed-xyz"  # Different = compression happened
+        agent_history_len = 200
+
+        # Simulate the gateway's offset calculation (post-fix)
+        _session_was_split = (agent_session_id != original_session_id)
+        _effective_history_offset = 0 if _session_was_split else agent_history_len
+
+        assert _session_was_split is True
+        assert _effective_history_offset == 0
+
+    def test_history_offset_preserved_without_split(self):
+        """When no compression happened, history_offset is the original length."""
+        session_id = "session-abc"
+        agent_session_id = "session-abc"  # Same = no compression
+        agent_history_len = 200
+
+        _session_was_split = (agent_session_id != session_id)
+        _effective_history_offset = 0 if _session_was_split else agent_history_len
+
+        assert _session_was_split is False
+        assert _effective_history_offset == 200
+
+    def test_new_messages_extraction_after_split(self):
+        """After compression with offset=0, new_messages should be ALL agent messages."""
+        # Simulates the gateway's new_messages calculation
+        agent_messages = [
+            {"role": "user", "content": "[CONTEXT COMPACTION] Summary..."},
+            {"role": "user", "content": "recent question"},
+            {"role": "assistant", "content": "recent answer"},
+            {"role": "user", "content": "new question"},
+            {"role": "assistant", "content": "new answer"},
+        ]
+        history_offset = 0  # After fix: 0 on session split
+
+        new_messages = agent_messages[history_offset:] if len(agent_messages) > history_offset else []
+        assert len(new_messages) == 5, (
+            f"Expected all 5 messages with offset=0, got {len(new_messages)}"
+        )
+
+    def test_new_messages_empty_with_stale_offset(self):
+        """Demonstrates the bug: stale offset produces empty new_messages."""
+        agent_messages = [
+            {"role": "user", "content": "summary"},
+            {"role": "assistant", "content": "answer"},
+        ]
+        # Bug: offset is the pre-compression history length
+        history_offset = 200
+
+        new_messages = agent_messages[history_offset:] if len(agent_messages) > history_offset else []
+        assert len(new_messages) == 0, (
+            "Expected 0 messages with stale offset=200 (demonstrates the bug)"
+        )
--- a/tests/test_credential_pool.py
+++ b/tests/test_credential_pool.py
@ -0,0 +1,949 @@
+"""Tests for multi-credential runtime pooling and rotation."""
+
+from __future__ import annotations
+
+import json
+import time
+
+import pytest
+
+
+def _write_auth_store(tmp_path, payload: dict) -> None:
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    (hermes_home / "auth.json").write_text(json.dumps(payload, indent=2))
+
+
+def test_fill_first_selection_skips_recently_exhausted_entry(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "credential_pool": {
+                "anthropic": [
+                    {
+                        "id": "cred-1",
+                        "label": "primary",
+                        "auth_type": "api_key",
+                        "priority": 0,
+                        "source": "manual",
+                        "access_token": "***",
+                        "last_status": "exhausted",
+                        "last_status_at": time.time(),
+                        "last_error_code": 402,
+                    },
+                    {
+                        "id": "cred-2",
+                        "label": "secondary",
+                        "auth_type": "api_key",
+                        "priority": 1,
+                        "source": "manual",
+                        "access_token": "***",
+                        "last_status": "ok",
+                        "last_status_at": None,
+                        "last_error_code": None,
+                    },
+                ]
+            },
+        },
+    )
+
+    from agent.credential_pool import load_pool
+
+    pool = load_pool("anthropic")
+    entry = pool.select()
+
+    assert entry is not None
+    assert entry.id == "cred-2"
+    assert pool.current().id == "cred-2"
+
+
+def test_select_clears_expired_exhaustion(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "credential_pool": {
+                "anthropic": [
+                    {
+                        "id": "cred-1",
+                        "label": "old",
+                        "auth_type": "api_key",
+                        "priority": 0,
+                        "source": "manual",
+                        "access_token": "***",
+                        "last_status": "exhausted",
+                        "last_status_at": time.time() - 90000,
+                        "last_error_code": 402,
+                    }
+                ]
+            },
+        },
+    )
+
+    from agent.credential_pool import load_pool
+
+    pool = load_pool("anthropic")
+    entry = pool.select()
+
+    assert entry is not None
+    assert entry.last_status == "ok"
+
+
+def test_round_robin_strategy_rotates_priorities(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "credential_pool": {
+                "openrouter": [
+                    {
+                        "id": "cred-1",
+                        "label": "primary",
+                        "auth_type": "api_key",
+                        "priority": 0,
+                        "source": "manual",
+                        "access_token": "***",
+                    },
+                    {
+                        "id": "cred-2",
+                        "label": "secondary",
+                        "auth_type": "api_key",
+                        "priority": 1,
+                        "source": "manual",
+                        "access_token": "***",
+                    },
+                ]
+            },
+        },
+    )
+    config_path = tmp_path / "hermes" / "config.yaml"
+    config_path.write_text("credential_pool_strategies:\n  openrouter: round_robin\n")
+
+    from agent.credential_pool import load_pool
+
+    pool = load_pool("openrouter")
+    first = pool.select()
+    assert first is not None
+    assert first.id == "cred-1"
+
+    reloaded = load_pool("openrouter")
+    second = reloaded.select()
+    assert second is not None
+    assert second.id == "cred-2"
+
+
+def test_random_strategy_uses_random_choice(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "credential_pool": {
+                "openrouter": [
+                    {
+                        "id": "cred-1",
+                        "label": "primary",
+                        "auth_type": "api_key",
+                        "priority": 0,
+                        "source": "manual",
+                        "access_token": "***",
+                    },
+                    {
+                        "id": "cred-2",
+                        "label": "secondary",
+                        "auth_type": "api_key",
+                        "priority": 1,
+                        "source": "manual",
+                        "access_token": "***",
+                    },
+                ]
+            },
+        },
+    )
+    config_path = tmp_path / "hermes" / "config.yaml"
+    config_path.write_text("credential_pool_strategies:\n  openrouter: random\n")
+
+    monkeypatch.setattr("agent.credential_pool.random.choice", lambda entries: entries[-1])
+
+    from agent.credential_pool import load_pool
+
+    pool = load_pool("openrouter")
+    selected = pool.select()
+    assert selected is not None
+    assert selected.id == "cred-2"
+
+
+
+def test_exhausted_entry_resets_after_ttl(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "credential_pool": {
+                "openrouter": [
+                    {
+                        "id": "cred-1",
+                        "label": "primary",
+                        "auth_type": "api_key",
+                        "priority": 0,
+                        "source": "manual",
+                        "access_token": "sk-or-primary",
+                        "base_url": "https://openrouter.ai/api/v1",
+                        "last_status": "exhausted",
+                        "last_status_at": time.time() - 90000,
+                        "last_error_code": 429,
+                    }
+                ]
+            },
+        },
+    )
+
+    from agent.credential_pool import load_pool
+
+    pool = load_pool("openrouter")
+    entry = pool.select()
+
+    assert entry is not None
+    assert entry.id == "cred-1"
+    assert entry.last_status == "ok"
+
+
+def test_mark_exhausted_and_rotate_persists_status(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "credential_pool": {
+                "anthropic": [
+                    {
+                        "id": "cred-1",
+                        "label": "primary",
+                        "auth_type": "api_key",
+                        "priority": 0,
+                        "source": "manual",
+                        "access_token": "sk-ant-api-primary",
+                    },
+                    {
+                        "id": "cred-2",
+                        "label": "secondary",
+                        "auth_type": "api_key",
+                        "priority": 1,
+                        "source": "manual",
+                        "access_token": "sk-ant-api-secondary",
+                    },
+                ]
+            },
+        },
+    )
+
+    from agent.credential_pool import load_pool
+
+    pool = load_pool("anthropic")
+    assert pool.select().id == "cred-1"
+
+    next_entry = pool.mark_exhausted_and_rotate(status_code=402)
+
+    assert next_entry is not None
+    assert next_entry.id == "cred-2"
+
+    auth_payload = json.loads((tmp_path / "hermes" / "auth.json").read_text())
+    persisted = auth_payload["credential_pool"]["anthropic"][0]
+    assert persisted["last_status"] == "exhausted"
+    assert persisted["last_error_code"] == 402
+
+
+def test_try_refresh_current_updates_only_current_entry(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "credential_pool": {
+                "openai-codex": [
+                    {
+                        "id": "cred-1",
+                        "label": "primary",
+                        "auth_type": "oauth",
+                        "priority": 0,
+                        "source": "device_code",
+                        "access_token": "access-old",
+                        "refresh_token": "refresh-old",
+                        "base_url": "https://chatgpt.com/backend-api/codex",
+                    },
+                    {
+                        "id": "cred-2",
+                        "label": "secondary",
+                        "auth_type": "oauth",
+                        "priority": 1,
+                        "source": "device_code",
+                        "access_token": "access-other",
+                        "refresh_token": "refresh-other",
+                        "base_url": "https://chatgpt.com/backend-api/codex",
+                    },
+                ]
+            },
+        },
+    )
+
+    from agent.credential_pool import load_pool
+
+    monkeypatch.setattr(
+        "hermes_cli.auth.refresh_codex_oauth_pure",
+        lambda access_token, refresh_token, timeout_seconds=20.0: {
+            "access_token": "access-new",
+            "refresh_token": "refresh-new",
+        },
+    )
+
+    pool = load_pool("openai-codex")
+    current = pool.select()
+    assert current.id == "cred-1"
+
+    refreshed = pool.try_refresh_current()
+
+    assert refreshed is not None
+    assert refreshed.access_token == "access-new"
+
+    auth_payload = json.loads((tmp_path / "hermes" / "auth.json").read_text())
+    primary, secondary = auth_payload["credential_pool"]["openai-codex"]
+    assert primary["access_token"] == "access-new"
+    assert primary["refresh_token"] == "refresh-new"
+    assert secondary["access_token"] == "access-other"
+    assert secondary["refresh_token"] == "refresh-other"
+
+
+def test_load_pool_seeds_env_api_key(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-seeded")
+    _write_auth_store(tmp_path, {"version": 1, "providers": {}})
+
+    from agent.credential_pool import load_pool
+
+    pool = load_pool("openrouter")
+    entry = pool.select()
+
+    assert entry is not None
+    assert entry.source == "env:OPENROUTER_API_KEY"
+    assert entry.access_token == "sk-or-seeded"
+
+
+def test_load_pool_removes_stale_seeded_env_entry(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "credential_pool": {
+                "openrouter": [
+                    {
+                        "id": "seeded-env",
+                        "label": "OPENROUTER_API_KEY",
+                        "auth_type": "api_key",
+                        "priority": 0,
+                        "source": "env:OPENROUTER_API_KEY",
+                        "access_token": "stale-token",
+                        "base_url": "https://openrouter.ai/api/v1",
+                    }
+                ]
+            },
+        },
+    )
+
+    from agent.credential_pool import load_pool
+
+    pool = load_pool("openrouter")
+
+    assert pool.entries() == []
+
+    auth_payload = json.loads((tmp_path / "hermes" / "auth.json").read_text())
+    assert auth_payload["credential_pool"]["openrouter"] == []
+
+
+def test_load_pool_migrates_nous_provider_state(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "active_provider": "nous",
+            "providers": {
+                "nous": {
+                    "portal_base_url": "https://portal.example.com",
+                    "inference_base_url": "https://inference.example.com/v1",
+                    "client_id": "hermes-cli",
+                    "token_type": "Bearer",
+                    "scope": "inference:mint_agent_key",
+                    "access_token": "access-token",
+                    "refresh_token": "refresh-token",
+                    "expires_at": "2026-03-24T12:00:00+00:00",
+                    "agent_key": "agent-key",
+                    "agent_key_expires_at": "2026-03-24T13:30:00+00:00",
+                }
+            },
+        },
+    )
+
+    from agent.credential_pool import load_pool
+
+    pool = load_pool("nous")
+    entry = pool.select()
+
+    assert entry is not None
+    assert entry.source == "device_code"
+    assert entry.portal_base_url == "https://portal.example.com"
+    assert entry.agent_key == "agent-key"
+
+
+def test_load_pool_removes_stale_file_backed_singleton_entry(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
+    monkeypatch.delenv("ANTHROPIC_TOKEN", raising=False)
+    monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False)
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "credential_pool": {
+                "anthropic": [
+                    {
+                        "id": "seeded-file",
+                        "label": "claude-code",
+                        "auth_type": "oauth",
+                        "priority": 0,
+                        "source": "claude_code",
+                        "access_token": "stale-access-token",
+                        "refresh_token": "stale-refresh-token",
+                        "expires_at_ms": int(time.time() * 1000) + 60_000,
+                    }
+                ]
+            },
+        },
+    )
+
+    monkeypatch.setattr(
+        "agent.anthropic_adapter.read_hermes_oauth_credentials",
+        lambda: None,
+    )
+    monkeypatch.setattr(
+        "agent.anthropic_adapter.read_claude_code_credentials",
+        lambda: None,
+    )
+
+    from agent.credential_pool import load_pool
+
+    pool = load_pool("anthropic")
+
+    assert pool.entries() == []
+
+    auth_payload = json.loads((tmp_path / "hermes" / "auth.json").read_text())
+    assert auth_payload["credential_pool"]["anthropic"] == []
+
+
+def test_load_pool_migrates_nous_provider_state_preserves_tls(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "active_provider": "nous",
+            "providers": {
+                "nous": {
+                    "portal_base_url": "https://portal.example.com",
+                    "inference_base_url": "https://inference.example.com/v1",
+                    "client_id": "hermes-cli",
+                    "token_type": "Bearer",
+                    "scope": "inference:mint_agent_key",
+                    "access_token": "access-token",
+                    "refresh_token": "refresh-token",
+                    "expires_at": "2026-03-24T12:00:00+00:00",
+                    "agent_key": "agent-key",
+                    "agent_key_expires_at": "2026-03-24T13:30:00+00:00",
+                    "tls": {
+                        "insecure": True,
+                        "ca_bundle": "/tmp/nous-ca.pem",
+                    },
+                }
+            },
+        },
+    )
+
+    from agent.credential_pool import load_pool
+
+    pool = load_pool("nous")
+    entry = pool.select()
+
+    assert entry is not None
+    assert entry.tls == {
+        "insecure": True,
+        "ca_bundle": "/tmp/nous-ca.pem",
+    }
+
+    auth_payload = json.loads((tmp_path / "hermes" / "auth.json").read_text())
+    assert auth_payload["credential_pool"]["nous"][0]["tls"] == {
+        "insecure": True,
+        "ca_bundle": "/tmp/nous-ca.pem",
+    }
+
+
+def test_singleton_seed_does_not_clobber_manual_oauth_entry(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
+    monkeypatch.delenv("ANTHROPIC_TOKEN", raising=False)
+    monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False)
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "credential_pool": {
+                "anthropic": [
+                    {
+                        "id": "manual-1",
+                        "label": "manual-pkce",
+                        "auth_type": "oauth",
+                        "priority": 0,
+                        "source": "manual:hermes_pkce",
+                        "access_token": "manual-token",
+                        "refresh_token": "manual-refresh",
+                        "expires_at_ms": 1711234567000,
+                    }
+                ]
+            },
+        },
+    )
+
+    monkeypatch.setattr(
+        "agent.anthropic_adapter.read_hermes_oauth_credentials",
+        lambda: {
+            "accessToken": "seeded-token",
+            "refreshToken": "seeded-refresh",
+            "expiresAt": 1711234999000,
+        },
+    )
+    monkeypatch.setattr(
+        "agent.anthropic_adapter.read_claude_code_credentials",
+        lambda: None,
+    )
+
+    from agent.credential_pool import load_pool
+
+    pool = load_pool("anthropic")
+    entries = pool.entries()
+
+    assert len(entries) == 2
+    assert {entry.source for entry in entries} == {"manual:hermes_pkce", "hermes_pkce"}
+
+
+def test_load_pool_prefers_anthropic_env_token_over_file_backed_oauth(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
+    monkeypatch.setenv("ANTHROPIC_TOKEN", "env-override-token")
+    monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False)
+    _write_auth_store(tmp_path, {"version": 1, "providers": {}})
+
+    monkeypatch.setattr(
+        "agent.anthropic_adapter.read_hermes_oauth_credentials",
+        lambda: {
+            "accessToken": "file-backed-token",
+            "refreshToken": "refresh-token",
+            "expiresAt": int(time.time() * 1000) + 3_600_000,
+        },
+    )
+    monkeypatch.setattr(
+        "agent.anthropic_adapter.read_claude_code_credentials",
+        lambda: None,
+    )
+
+    from agent.credential_pool import load_pool
+
+    pool = load_pool("anthropic")
+    entry = pool.select()
+
+    assert entry is not None
+    assert entry.source == "env:ANTHROPIC_TOKEN"
+    assert entry.access_token == "env-override-token"
+
+
+def test_least_used_strategy_selects_lowest_count(tmp_path, monkeypatch):
+    """least_used strategy should select the credential with the lowest request_count."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    monkeypatch.setattr(
+        "agent.credential_pool.get_pool_strategy",
+        lambda _provider: "least_used",
+    )
+    monkeypatch.setattr(
+        "agent.credential_pool._seed_from_singletons",
+        lambda provider, entries: (False, set()),
+    )
+    monkeypatch.setattr(
+        "agent.credential_pool._seed_from_env",
+        lambda provider, entries: (False, set()),
+    )
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "credential_pool": {
+                "openrouter": [
+                    {
+                        "id": "key-a",
+                        "label": "heavy",
+                        "auth_type": "api_key",
+                        "priority": 0,
+                        "source": "manual",
+                        "access_token": "sk-or-heavy",
+                        "request_count": 100,
+                    },
+                    {
+                        "id": "key-b",
+                        "label": "light",
+                        "auth_type": "api_key",
+                        "priority": 1,
+                        "source": "manual",
+                        "access_token": "sk-or-light",
+                        "request_count": 10,
+                    },
+                    {
+                        "id": "key-c",
+                        "label": "medium",
+                        "auth_type": "api_key",
+                        "priority": 2,
+                        "source": "manual",
+                        "access_token": "sk-or-medium",
+                        "request_count": 50,
+                    },
+                ]
+            },
+        },
+    )
+
+    from agent.credential_pool import load_pool
+
+    pool = load_pool("openrouter")
+    entry = pool.select()
+    assert entry is not None
+    assert entry.id == "key-b"
+    assert entry.access_token == "sk-or-light"
+
+
+def test_mark_used_increments_request_count(tmp_path, monkeypatch):
+    """mark_used should increment the request_count of the current entry."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    monkeypatch.setattr(
+        "agent.credential_pool.get_pool_strategy",
+        lambda _provider: "fill_first",
+    )
+    monkeypatch.setattr(
+        "agent.credential_pool._seed_from_singletons",
+        lambda provider, entries: (False, set()),
+    )
+    monkeypatch.setattr(
+        "agent.credential_pool._seed_from_env",
+        lambda provider, entries: (False, set()),
+    )
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "credential_pool": {
+                "openrouter": [
+                    {
+                        "id": "key-a",
+                        "label": "test",
+                        "auth_type": "api_key",
+                        "priority": 0,
+                        "source": "manual",
+                        "access_token": "sk-or-test",
+                        "request_count": 5,
+                    },
+                ]
+            },
+        },
+    )
+
+    from agent.credential_pool import load_pool
+
+    pool = load_pool("openrouter")
+    entry = pool.select()
+    assert entry is not None
+    assert entry.request_count == 5
+    pool.mark_used()
+    updated = pool.current()
+    assert updated is not None
+    assert updated.request_count == 6
+
+
+def test_thread_safety_concurrent_select(tmp_path, monkeypatch):
+    """Concurrent select() calls should not corrupt pool state."""
+    import threading as _threading
+
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    monkeypatch.setattr(
+        "agent.credential_pool.get_pool_strategy",
+        lambda _provider: "round_robin",
+    )
+    monkeypatch.setattr(
+        "agent.credential_pool._seed_from_singletons",
+        lambda provider, entries: (False, set()),
+    )
+    monkeypatch.setattr(
+        "agent.credential_pool._seed_from_env",
+        lambda provider, entries: (False, set()),
+    )
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "credential_pool": {
+                "openrouter": [
+                    {
+                        "id": f"key-{i}",
+                        "label": f"key-{i}",
+                        "auth_type": "api_key",
+                        "priority": i,
+                        "source": "manual",
+                        "access_token": f"sk-or-{i}",
+                    }
+                    for i in range(5)
+                ]
+            },
+        },
+    )
+
+    from agent.credential_pool import load_pool
+
+    pool = load_pool("openrouter")
+    results = []
+    errors = []
+
+    def worker():
+        try:
+            for _ in range(20):
+                entry = pool.select()
+                if entry:
+                    results.append(entry.id)
+                    pool.mark_used(entry.id)
+        except Exception as exc:
+            errors.append(exc)
+
+    threads = [_threading.Thread(target=worker) for _ in range(4)]
+    for t in threads:
+        t.start()
+    for t in threads:
+        t.join()
+
+    assert not errors, f"Thread errors: {errors}"
+    assert len(results) == 80  # 4 threads * 20 selects
+
+
+def test_custom_endpoint_pool_keyed_by_name(tmp_path, monkeypatch):
+    """Verify load_pool('custom:together.ai') works and returns entries from auth.json."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    # Disable seeding so we only test stored entries
+    monkeypatch.setattr(
+        "agent.credential_pool._seed_custom_pool",
+        lambda pool_key, entries: (False, set()),
+    )
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "credential_pool": {
+                "custom:together.ai": [
+                    {
+                        "id": "cred-1",
+                        "label": "together-key",
+                        "auth_type": "api_key",
+                        "priority": 0,
+                        "source": "manual",
+                        "access_token": "sk-together-xxx",
+                        "base_url": "https://api.together.ai/v1",
+                    },
+                    {
+                        "id": "cred-2",
+                        "label": "together-key-2",
+                        "auth_type": "api_key",
+                        "priority": 1,
+                        "source": "manual",
+                        "access_token": "sk-together-yyy",
+                        "base_url": "https://api.together.ai/v1",
+                    },
+                ]
+            },
+        },
+    )
+
+    from agent.credential_pool import load_pool
+
+    pool = load_pool("custom:together.ai")
+    assert pool.has_credentials()
+    entries = pool.entries()
+    assert len(entries) == 2
+    assert entries[0].access_token == "sk-together-xxx"
+    assert entries[1].access_token == "sk-together-yyy"
+
+    # Select should return the first entry (fill_first default)
+    entry = pool.select()
+    assert entry is not None
+    assert entry.id == "cred-1"
+
+
+def test_custom_endpoint_pool_seeds_from_config(tmp_path, monkeypatch):
+    """Verify seeding from custom_providers api_key in config.yaml."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    _write_auth_store(tmp_path, {"version": 1})
+
+    # Write config.yaml with a custom_providers entry
+    config_path = tmp_path / "hermes" / "config.yaml"
+    import yaml
+    config_path.write_text(yaml.dump({
+        "custom_providers": [
+            {
+                "name": "Together.ai",
+                "base_url": "https://api.together.ai/v1",
+                "api_key": "sk-config-seeded",
+            }
+        ]
+    }))
+
+    from agent.credential_pool import load_pool
+
+    pool = load_pool("custom:together.ai")
+    assert pool.has_credentials()
+    entries = pool.entries()
+    assert len(entries) == 1
+    assert entries[0].access_token == "sk-config-seeded"
+    assert entries[0].source == "config:Together.ai"
+
+
+def test_custom_endpoint_pool_seeds_from_model_config(tmp_path, monkeypatch):
+    """Verify seeding from model.api_key when model.provider=='custom' and base_url matches."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    _write_auth_store(tmp_path, {"version": 1})
+
+    import yaml
+    config_path = tmp_path / "hermes" / "config.yaml"
+    config_path.write_text(yaml.dump({
+        "custom_providers": [
+            {
+                "name": "Together.ai",
+                "base_url": "https://api.together.ai/v1",
+            }
+        ],
+        "model": {
+            "provider": "custom",
+            "base_url": "https://api.together.ai/v1",
+            "api_key": "sk-model-key",
+        },
+    }))
+
+    from agent.credential_pool import load_pool
+
+    pool = load_pool("custom:together.ai")
+    assert pool.has_credentials()
+    entries = pool.entries()
+    # Should have the model_config entry
+    model_entries = [e for e in entries if e.source == "model_config"]
+    assert len(model_entries) == 1
+    assert model_entries[0].access_token == "sk-model-key"
+
+
+def test_custom_pool_does_not_break_existing_providers(tmp_path, monkeypatch):
+    """Existing registry providers work exactly as before with custom pool support."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-test")
+    _write_auth_store(tmp_path, {"version": 1, "providers": {}})
+
+    from agent.credential_pool import load_pool
+
+    pool = load_pool("openrouter")
+    entry = pool.select()
+    assert entry is not None
+    assert entry.source == "env:OPENROUTER_API_KEY"
+    assert entry.access_token == "sk-or-test"
+
+
+def test_get_custom_provider_pool_key(tmp_path, monkeypatch):
+    """get_custom_provider_pool_key maps base_url to custom:<name> pool key."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    (tmp_path / "hermes").mkdir(parents=True, exist_ok=True)
+    import yaml
+    config_path = tmp_path / "hermes" / "config.yaml"
+    config_path.write_text(yaml.dump({
+        "custom_providers": [
+            {
+                "name": "Together.ai",
+                "base_url": "https://api.together.ai/v1",
+                "api_key": "sk-xxx",
+            },
+            {
+                "name": "My Local Server",
+                "base_url": "http://localhost:8080/v1",
+            },
+        ]
+    }))
+
+    from agent.credential_pool import get_custom_provider_pool_key
+
+    assert get_custom_provider_pool_key("https://api.together.ai/v1") == "custom:together.ai"
+    assert get_custom_provider_pool_key("https://api.together.ai/v1/") == "custom:together.ai"
+    assert get_custom_provider_pool_key("http://localhost:8080/v1") == "custom:my-local-server"
+    assert get_custom_provider_pool_key("https://unknown.example.com/v1") is None
+    assert get_custom_provider_pool_key("") is None
+
+
+def test_list_custom_pool_providers(tmp_path, monkeypatch):
+    """list_custom_pool_providers returns custom: pool keys from auth.json."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "credential_pool": {
+                "anthropic": [
+                    {
+                        "id": "a1",
+                        "label": "test",
+                        "auth_type": "api_key",
+                        "priority": 0,
+                        "source": "manual",
+                        "access_token": "sk-ant-xxx",
+                    }
+                ],
+                "custom:together.ai": [
+                    {
+                        "id": "c1",
+                        "label": "together",
+                        "auth_type": "api_key",
+                        "priority": 0,
+                        "source": "manual",
+                        "access_token": "sk-tog-xxx",
+                    }
+                ],
+                "custom:fireworks": [
+                    {
+                        "id": "c2",
+                        "label": "fireworks",
+                        "auth_type": "api_key",
+                        "priority": 0,
+                        "source": "manual",
+                        "access_token": "sk-fw-xxx",
+                    }
+                ],
+                "custom:empty": [],
+            },
+        },
+    )
+
+    from agent.credential_pool import list_custom_pool_providers
+
+    result = list_custom_pool_providers()
+    assert result == ["custom:fireworks", "custom:together.ai"]
+    # "custom:empty" not included because it's empty
--- a/tests/test_credential_pool_routing.py
+++ b/tests/test_credential_pool_routing.py
@ -0,0 +1,350 @@
+"""Tests for credential pool preservation through smart routing and 429 recovery.
+
+Covers:
+1. credential_pool flows through resolve_turn_route (no-route and fallback paths)
+2. CLI _resolve_turn_agent_config passes credential_pool to primary dict
+3. Gateway _resolve_turn_agent_config passes credential_pool to primary dict
+4. Eager fallback deferred when credential pool has credentials
+5. Eager fallback fires when no credential pool exists
+6. Full 429 rotation cycle: retry-same → rotate → exhaust → fallback
+"""
+
+import os
+import time
+from types import SimpleNamespace
+from unittest.mock import MagicMock, patch, PropertyMock
+
+import pytest
+
+
+# ---------------------------------------------------------------------------
+# 1. smart_model_routing: credential_pool preserved in no-route path
+# ---------------------------------------------------------------------------
+
+class TestSmartRoutingPoolPreservation:
+    def test_no_route_preserves_credential_pool(self):
+        from agent.smart_model_routing import resolve_turn_route
+
+        fake_pool = MagicMock(name="CredentialPool")
+        primary = {
+            "model": "gpt-5.4",
+            "api_key": "sk-test",
+            "base_url": None,
+            "provider": "openai-codex",
+            "api_mode": "codex_responses",
+            "command": None,
+            "args": [],
+            "credential_pool": fake_pool,
+        }
+        # routing disabled
+        result = resolve_turn_route("hello", None, primary)
+        assert result["runtime"]["credential_pool"] is fake_pool
+
+    def test_no_route_none_pool(self):
+        from agent.smart_model_routing import resolve_turn_route
+
+        primary = {
+            "model": "gpt-5.4",
+            "api_key": "sk-test",
+            "base_url": None,
+            "provider": "openai-codex",
+            "api_mode": "codex_responses",
+            "command": None,
+            "args": [],
+        }
+        result = resolve_turn_route("hello", None, primary)
+        assert result["runtime"]["credential_pool"] is None
+
+    def test_routing_disabled_preserves_pool(self):
+        from agent.smart_model_routing import resolve_turn_route
+
+        fake_pool = MagicMock(name="CredentialPool")
+        primary = {
+            "model": "gpt-5.4",
+            "api_key": "sk-test",
+            "base_url": None,
+            "provider": "openai-codex",
+            "api_mode": "codex_responses",
+            "command": None,
+            "args": [],
+            "credential_pool": fake_pool,
+        }
+        # routing explicitly disabled
+        result = resolve_turn_route("hello", {"enabled": False}, primary)
+        assert result["runtime"]["credential_pool"] is fake_pool
+
+    def test_route_fallback_on_resolve_error_preserves_pool(self, monkeypatch):
+        """When smart routing picks a cheap model but resolve_runtime_provider
+        fails, the fallback to primary must still include credential_pool."""
+        from agent.smart_model_routing import resolve_turn_route
+
+        fake_pool = MagicMock(name="CredentialPool")
+        primary = {
+            "model": "gpt-5.4",
+            "api_key": "sk-test",
+            "base_url": None,
+            "provider": "openai-codex",
+            "api_mode": "codex_responses",
+            "command": None,
+            "args": [],
+            "credential_pool": fake_pool,
+        }
+        routing_config = {
+            "enabled": True,
+            "cheap_model": "openai/gpt-4.1-mini",
+            "cheap_provider": "openrouter",
+            "max_tokens": 200,
+            "patterns": ["^(hi|hello|hey)"],
+        }
+        # Force resolve_runtime_provider to fail so it falls back to primary
+        monkeypatch.setattr(
+            "hermes_cli.runtime_provider.resolve_runtime_provider",
+            MagicMock(side_effect=RuntimeError("no credentials")),
+        )
+        result = resolve_turn_route("hi", routing_config, primary)
+        assert result["runtime"]["credential_pool"] is fake_pool
+
+
+# ---------------------------------------------------------------------------
+# 2 & 3. CLI and Gateway _resolve_turn_agent_config include credential_pool
+# ---------------------------------------------------------------------------
+
+class TestCliTurnRoutePool:
+    def test_resolve_turn_includes_pool(self, monkeypatch, tmp_path):
+        """CLI's _resolve_turn_agent_config must pass credential_pool to primary."""
+        from agent.smart_model_routing import resolve_turn_route
+        captured = {}
+
+        def spy_resolve(user_message, routing_config, primary):
+            captured["primary"] = primary
+            return resolve_turn_route(user_message, routing_config, primary)
+
+        monkeypatch.setattr(
+            "agent.smart_model_routing.resolve_turn_route", spy_resolve
+        )
+
+        # Build a minimal HermesCLI-like object with the method
+        shell = SimpleNamespace(
+            model="gpt-5.4",
+            api_key="sk-test",
+            base_url=None,
+            provider="openai-codex",
+            api_mode="codex_responses",
+            acp_command=None,
+            acp_args=[],
+            _credential_pool=MagicMock(name="FakePool"),
+            _smart_model_routing={"enabled": False},
+        )
+
+        # Import and bind the real method
+        from cli import HermesCLI
+        bound = HermesCLI._resolve_turn_agent_config.__get__(shell)
+        bound("test message")
+
+        assert "credential_pool" in captured["primary"]
+        assert captured["primary"]["credential_pool"] is shell._credential_pool
+
+
+class TestGatewayTurnRoutePool:
+    def test_resolve_turn_includes_pool(self, monkeypatch):
+        """Gateway's _resolve_turn_agent_config must pass credential_pool."""
+        from agent.smart_model_routing import resolve_turn_route
+        captured = {}
+
+        def spy_resolve(user_message, routing_config, primary):
+            captured["primary"] = primary
+            return resolve_turn_route(user_message, routing_config, primary)
+
+        monkeypatch.setattr(
+            "agent.smart_model_routing.resolve_turn_route", spy_resolve
+        )
+
+        from gateway.run import GatewayRunner
+
+        runner = SimpleNamespace(
+            _smart_model_routing={"enabled": False},
+        )
+
+        runtime_kwargs = {
+            "api_key": "sk-test",
+            "base_url": None,
+            "provider": "openai-codex",
+            "api_mode": "codex_responses",
+            "command": None,
+            "args": [],
+            "credential_pool": MagicMock(name="FakePool"),
+        }
+
+        bound = GatewayRunner._resolve_turn_agent_config.__get__(runner)
+        bound("test message", "gpt-5.4", runtime_kwargs)
+
+        assert "credential_pool" in captured["primary"]
+        assert captured["primary"]["credential_pool"] is runtime_kwargs["credential_pool"]
+
+
+# ---------------------------------------------------------------------------
+# 4 & 5. Eager fallback deferred/fires based on credential pool
+# ---------------------------------------------------------------------------
+
+class TestEagerFallbackWithPool:
+    """Test the eager fallback guard in run_agent.py's error handling loop."""
+
+    def _make_agent(self, has_pool=True, pool_has_creds=True, has_fallback=True):
+        """Create a minimal AIAgent mock with the fields needed."""
+        from run_agent import AIAgent
+
+        with patch.object(AIAgent, "__init__", lambda self, **kw: None):
+            agent = AIAgent()
+
+        agent._credential_pool = None
+        if has_pool:
+            pool = MagicMock()
+            pool.has_available.return_value = pool_has_creds
+            agent._credential_pool = pool
+
+        agent._fallback_chain = [{"model": "fallback/model"}] if has_fallback else []
+        agent._fallback_index = 0
+        agent._try_activate_fallback = MagicMock(return_value=True)
+        agent._emit_status = MagicMock()
+
+        return agent
+
+    def test_eager_fallback_deferred_when_pool_has_credentials(self):
+        """429 with active pool should NOT trigger eager fallback."""
+        agent = self._make_agent(has_pool=True, pool_has_creds=True, has_fallback=True)
+
+        # Simulate the check from run_agent.py lines 7180-7191
+        is_rate_limited = True
+        if is_rate_limited and agent._fallback_index < len(agent._fallback_chain):
+            pool = agent._credential_pool
+            pool_may_recover = pool is not None and pool.has_available()
+            if not pool_may_recover:
+                agent._try_activate_fallback()
+
+        agent._try_activate_fallback.assert_not_called()
+
+    def test_eager_fallback_fires_when_no_pool(self):
+        """429 without pool should trigger eager fallback."""
+        agent = self._make_agent(has_pool=False, has_fallback=True)
+
+        is_rate_limited = True
+        if is_rate_limited and agent._fallback_index < len(agent._fallback_chain):
+            pool = agent._credential_pool
+            pool_may_recover = pool is not None and pool.has_available()
+            if not pool_may_recover:
+                agent._try_activate_fallback()
+
+        agent._try_activate_fallback.assert_called_once()
+
+    def test_eager_fallback_fires_when_pool_exhausted(self):
+        """429 with exhausted pool should trigger eager fallback."""
+        agent = self._make_agent(has_pool=True, pool_has_creds=False, has_fallback=True)
+
+        is_rate_limited = True
+        if is_rate_limited and agent._fallback_index < len(agent._fallback_chain):
+            pool = agent._credential_pool
+            pool_may_recover = pool is not None and pool.has_available()
+            if not pool_may_recover:
+                agent._try_activate_fallback()
+
+        agent._try_activate_fallback.assert_called_once()
+
+
+# ---------------------------------------------------------------------------
+# 6. Full 429 rotation cycle via _recover_with_credential_pool
+# ---------------------------------------------------------------------------
+
+class TestPoolRotationCycle:
+    """Verify the retry-same → rotate → exhaust flow in _recover_with_credential_pool."""
+
+    def _make_agent_with_pool(self, pool_entries=3):
+        from run_agent import AIAgent
+
+        with patch.object(AIAgent, "__init__", lambda self, **kw: None):
+            agent = AIAgent()
+
+        entries = []
+        for i in range(pool_entries):
+            e = MagicMock(name=f"entry_{i}")
+            e.id = f"cred-{i}"
+            entries.append(e)
+
+        pool = MagicMock()
+        pool.has_credentials.return_value = True
+
+        # mark_exhausted_and_rotate returns next entry until exhausted
+        self._rotation_index = 0
+
+        def rotate(status_code=None):
+            self._rotation_index += 1
+            if self._rotation_index < pool_entries:
+                return entries[self._rotation_index]
+            pool.has_credentials.return_value = False
+            return None
+
+        pool.mark_exhausted_and_rotate = MagicMock(side_effect=rotate)
+        agent._credential_pool = pool
+        agent._swap_credential = MagicMock()
+        agent.log_prefix = ""
+
+        return agent, pool, entries
+
+    def test_first_429_sets_retry_flag_no_rotation(self):
+        """First 429 should just set has_retried_429=True, no rotation."""
+        agent, pool, _ = self._make_agent_with_pool(3)
+        recovered, has_retried = agent._recover_with_credential_pool(
+            status_code=429, has_retried_429=False
+        )
+        assert recovered is False
+        assert has_retried is True
+        pool.mark_exhausted_and_rotate.assert_not_called()
+
+    def test_second_429_rotates_to_next(self):
+        """Second consecutive 429 should rotate to next credential."""
+        agent, pool, entries = self._make_agent_with_pool(3)
+        recovered, has_retried = agent._recover_with_credential_pool(
+            status_code=429, has_retried_429=True
+        )
+        assert recovered is True
+        assert has_retried is False  # reset after rotation
+        pool.mark_exhausted_and_rotate.assert_called_once_with(status_code=429)
+        agent._swap_credential.assert_called_once_with(entries[1])
+
+    def test_pool_exhaustion_returns_false(self):
+        """When all credentials exhausted, recovery should return False."""
+        agent, pool, _ = self._make_agent_with_pool(1)
+        # First 429 sets flag
+        _, has_retried = agent._recover_with_credential_pool(
+            status_code=429, has_retried_429=False
+        )
+        assert has_retried is True
+
+        # Second 429 tries to rotate but pool is exhausted (only 1 entry)
+        recovered, _ = agent._recover_with_credential_pool(
+            status_code=429, has_retried_429=True
+        )
+        assert recovered is False
+
+    def test_402_immediate_rotation(self):
+        """402 (billing) should immediately rotate, no retry-first."""
+        agent, pool, entries = self._make_agent_with_pool(3)
+        recovered, has_retried = agent._recover_with_credential_pool(
+            status_code=402, has_retried_429=False
+        )
+        assert recovered is True
+        assert has_retried is False
+        pool.mark_exhausted_and_rotate.assert_called_once_with(status_code=402)
+
+    def test_no_pool_returns_false(self):
+        """No pool should return (False, unchanged)."""
+        from run_agent import AIAgent
+
+        with patch.object(AIAgent, "__init__", lambda self, **kw: None):
+            agent = AIAgent()
+        agent._credential_pool = None
+
+        recovered, has_retried = agent._recover_with_credential_pool(
+            status_code=429, has_retried_429=False
+        )
+        assert recovered is False
+        assert has_retried is False
--- a/tests/test_display.py
+++ b/tests/test_display.py
@ -1,7 +1,17 @@
-"""Tests for agent/display.py — build_tool_preview()."""
+"""Tests for agent/display.py — build_tool_preview() and inline diff previews."""

+import os
 import pytest
-from agent.display import build_tool_preview
+from unittest.mock import MagicMock, patch
+
+from agent.display import (
+    build_tool_preview,
+    capture_local_edit_snapshot,
+    extract_edit_diff,
+    _render_inline_unified_diff,
+    _summarize_rendered_diff_sections,
+    render_edit_diff_with_delta,
+)


 class TestBuildToolPreview:
@ -83,3 +93,110 @@ class TestBuildToolPreview:
        assert build_tool_preview("terminal", 0) is None
        assert build_tool_preview("terminal", "") is None
        assert build_tool_preview("terminal", []) is None
+
+
+class TestEditDiffPreview:
+    def test_extract_edit_diff_for_patch(self):
+        diff = extract_edit_diff("patch", '{"success": true, "diff": "--- a/x\\n+++ b/x\\n"}')
+        assert diff is not None
+        assert "+++ b/x" in diff
+
+    def test_render_inline_unified_diff_colors_added_and_removed_lines(self):
+        rendered = _render_inline_unified_diff(
+            "--- a/cli.py\n"
+            "+++ b/cli.py\n"
+            "@@ -1,2 +1,2 @@\n"
+            "-old line\n"
+            "+new line\n"
+            " context\n"
+        )
+
+        assert "a/cli.py" in rendered[0]
+        assert "b/cli.py" in rendered[0]
+        assert any("old line" in line for line in rendered)
+        assert any("new line" in line for line in rendered)
+        assert any("48;2;" in line for line in rendered)
+
+    def test_extract_edit_diff_ignores_non_edit_tools(self):
+        assert extract_edit_diff("web_search", '{"diff": "--- a\\n+++ b\\n"}') is None
+
+    def test_extract_edit_diff_uses_local_snapshot_for_write_file(self, tmp_path):
+        target = tmp_path / "note.txt"
+        target.write_text("old\n", encoding="utf-8")
+
+        snapshot = capture_local_edit_snapshot("write_file", {"path": str(target)})
+
+        target.write_text("new\n", encoding="utf-8")
+
+        diff = extract_edit_diff(
+            "write_file",
+            '{"bytes_written": 4}',
+            function_args={"path": str(target)},
+            snapshot=snapshot,
+        )
+
+        assert diff is not None
+        assert "--- a/" in diff
+        assert "+++ b/" in diff
+        assert "-old" in diff
+        assert "+new" in diff
+
+    def test_render_edit_diff_with_delta_invokes_printer(self):
+        printer = MagicMock()
+
+        rendered = render_edit_diff_with_delta(
+            "patch",
+            '{"diff": "--- a/x\\n+++ b/x\\n@@ -1 +1 @@\\n-old\\n+new\\n"}',
+            print_fn=printer,
+        )
+
+        assert rendered is True
+        assert printer.call_count >= 2
+        calls = [call.args[0] for call in printer.call_args_list]
+        assert any("a/x" in line and "b/x" in line for line in calls)
+        assert any("old" in line for line in calls)
+        assert any("new" in line for line in calls)
+
+    def test_render_edit_diff_with_delta_skips_without_diff(self):
+        rendered = render_edit_diff_with_delta(
+            "patch",
+            '{"success": true}',
+        )
+
+        assert rendered is False
+
+    def test_render_edit_diff_with_delta_handles_renderer_errors(self, monkeypatch):
+        printer = MagicMock()
+
+        monkeypatch.setattr("agent.display._summarize_rendered_diff_sections", MagicMock(side_effect=RuntimeError("boom")))
+
+        rendered = render_edit_diff_with_delta(
+            "patch",
+            '{"diff": "--- a/x\\n+++ b/x\\n"}',
+            print_fn=printer,
+        )
+
+        assert rendered is False
+        assert printer.call_count == 0
+
+    def test_summarize_rendered_diff_sections_truncates_large_diff(self):
+        diff = "--- a/x.py\n+++ b/x.py\n" + "".join(f"+line{i}\n" for i in range(120))
+
+        rendered = _summarize_rendered_diff_sections(diff, max_lines=20)
+
+        assert len(rendered) == 21
+        assert "omitted" in rendered[-1]
+
+    def test_summarize_rendered_diff_sections_limits_file_count(self):
+        diff = "".join(
+            f"--- a/file{i}.py\n+++ b/file{i}.py\n+line{i}\n"
+            for i in range(8)
+        )
+
+        rendered = _summarize_rendered_diff_sections(diff, max_files=3, max_lines=50)
+
+        assert any("a/file0.py" in line for line in rendered)
+        assert any("a/file1.py" in line for line in rendered)
+        assert any("a/file2.py" in line for line in rendered)
+        assert not any("a/file7.py" in line for line in rendered)
+        assert "additional file" in rendered[-1]
--- a/tests/test_packaging_metadata.py
+++ b/tests/test_packaging_metadata.py
@ -0,0 +1,22 @@
+from pathlib import Path
+import tomllib
+
+
+REPO_ROOT = Path(__file__).resolve().parents[1]
+
+
+def test_faster_whisper_is_not_a_base_dependency():
+    data = tomllib.loads((REPO_ROOT / "pyproject.toml").read_text(encoding="utf-8"))
+    deps = data["project"]["dependencies"]
+
+    assert not any(dep.startswith("faster-whisper") for dep in deps)
+
+    voice_extra = data["project"]["optional-dependencies"]["voice"]
+    assert any(dep.startswith("faster-whisper") for dep in voice_extra)
+
+
+def test_manifest_includes_bundled_skills():
+    manifest = (REPO_ROOT / "MANIFEST.in").read_text(encoding="utf-8")
+
+    assert "graft skills" in manifest
+    assert "graft optional-skills" in manifest
--- a/tests/test_provider_parity.py
+++ b/tests/test_provider_parity.py
@ -137,6 +137,76 @@ class TestBuildApiKwargsOpenRouter:
        assert "codex_reasoning_items" in messages[1]


+class TestDeveloperRoleSwap:
+    """GPT-5 and Codex models should get 'developer' instead of 'system' role."""
+
+    @pytest.mark.parametrize("model", [
+        "openai/gpt-5",
+        "openai/gpt-5-turbo",
+        "openai/gpt-5.4",
+        "gpt-5-mini",
+        "openai/codex-mini",
+        "codex-mini-latest",
+        "openai/codex-pro",
+    ])
+    def test_gpt5_codex_get_developer_role(self, monkeypatch, model):
+        agent = _make_agent(monkeypatch, "openrouter")
+        agent.model = model
+        messages = [
+            {"role": "system", "content": "You are helpful."},
+            {"role": "user", "content": "hi"},
+        ]
+        kwargs = agent._build_api_kwargs(messages)
+        assert kwargs["messages"][0]["role"] == "developer"
+        assert kwargs["messages"][0]["content"] == "You are helpful."
+        assert kwargs["messages"][1]["role"] == "user"
+
+    @pytest.mark.parametrize("model", [
+        "anthropic/claude-opus-4.6",
+        "openai/gpt-4o",
+        "google/gemini-2.5-pro",
+        "deepseek/deepseek-chat",
+        "openai/o3-mini",
+    ])
+    def test_non_matching_models_keep_system_role(self, monkeypatch, model):
+        agent = _make_agent(monkeypatch, "openrouter")
+        agent.model = model
+        messages = [
+            {"role": "system", "content": "You are helpful."},
+            {"role": "user", "content": "hi"},
+        ]
+        kwargs = agent._build_api_kwargs(messages)
+        assert kwargs["messages"][0]["role"] == "system"
+
+    def test_no_system_message_no_crash(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openrouter")
+        agent.model = "openai/gpt-5"
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        assert kwargs["messages"][0]["role"] == "user"
+
+    def test_original_messages_not_mutated(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openrouter")
+        agent.model = "openai/gpt-5"
+        messages = [
+            {"role": "system", "content": "You are helpful."},
+            {"role": "user", "content": "hi"},
+        ]
+        agent._build_api_kwargs(messages)
+        # Original messages must be untouched (internal representation stays "system")
+        assert messages[0]["role"] == "system"
+
+    def test_developer_role_via_nous_portal(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "nous", base_url="https://inference-api.nousresearch.com/v1")
+        agent.model = "gpt-5"
+        messages = [
+            {"role": "system", "content": "You are helpful."},
+            {"role": "user", "content": "hi"},
+        ]
+        kwargs = agent._build_api_kwargs(messages)
+        assert kwargs["messages"][0]["role"] == "developer"
+
+
 class TestBuildApiKwargsAIGateway:
    def test_uses_chat_completions_format(self, monkeypatch):
        agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1")
@ -559,11 +629,18 @@ class TestAuxiliaryClientProviderPriority:
        assert model == "google/gemini-3-flash-preview"

    def test_custom_endpoint_when_no_nous(self, monkeypatch):
+        """Custom endpoint is used when no OpenRouter/Nous keys are available.
+
+        Since the March 2026 config refactor, OPENAI_BASE_URL env var is no
+        longer consulted — base_url comes from config.yaml via
+        resolve_runtime_provider.  Mock _resolve_custom_runtime directly.
+        """
        monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
-        monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1")
        monkeypatch.setenv("OPENAI_API_KEY", "local-key")
        from agent.auxiliary_client import get_text_auxiliary_client
        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client._resolve_custom_runtime",
+                   return_value=("http://localhost:1234/v1", "local-key")), \
             patch("agent.auxiliary_client.OpenAI") as mock:
            client, model = get_text_auxiliary_client()
        assert mock.call_args.kwargs["base_url"] == "http://localhost:1234/v1"
--- a/tests/test_run_agent.py
+++ b/tests/test_run_agent.py
@ -230,6 +230,27 @@ class TestStripThinkBlocks:
        assert "line1" not in result
        assert "visible" in result

+    def test_orphaned_closing_think_tag(self, agent):
+        result = agent._strip_think_blocks("some reasoning</think>actual answer")
+        assert "</think>" not in result
+        assert "actual answer" in result
+
+    def test_orphaned_closing_thinking_tag(self, agent):
+        result = agent._strip_think_blocks("reasoning</thinking>answer")
+        assert "</thinking>" not in result
+        assert "answer" in result
+
+    def test_orphaned_opening_think_tag(self, agent):
+        result = agent._strip_think_blocks("<think>orphaned reasoning without close")
+        assert "<think>" not in result
+
+    def test_mixed_orphaned_and_paired_tags(self, agent):
+        text = "stray</think><think>paired reasoning</think> visible"
+        result = agent._strip_think_blocks(text)
+        assert "</think>" not in result
+        assert "<think>" not in result
+        assert "visible" in result
+

 class TestExtractReasoning:
    def test_reasoning_field(self, agent):
@ -1223,6 +1244,42 @@ class TestConcurrentToolExecution:
            )
            assert result == "result"

+    def test_sequential_tool_callbacks_fire_in_order(self, agent):
+        tool_call = _mock_tool_call(name="web_search", arguments='{"query":"hello"}', call_id="c1")
+        mock_msg = _mock_assistant_msg(content="", tool_calls=[tool_call])
+        messages = []
+        starts = []
+        completes = []
+        agent.tool_start_callback = lambda tool_call_id, function_name, function_args: starts.append((tool_call_id, function_name, function_args))
+        agent.tool_complete_callback = lambda tool_call_id, function_name, function_args, function_result: completes.append((tool_call_id, function_name, function_args, function_result))
+
+        with patch("run_agent.handle_function_call", return_value='{"success": true}'):
+            agent._execute_tool_calls_sequential(mock_msg, messages, "task-1")
+
+        assert starts == [("c1", "web_search", {"query": "hello"})]
+        assert completes == [("c1", "web_search", {"query": "hello"}, '{"success": true}')]
+
+    def test_concurrent_tool_callbacks_fire_for_each_tool(self, agent):
+        tc1 = _mock_tool_call(name="web_search", arguments='{"query":"one"}', call_id="c1")
+        tc2 = _mock_tool_call(name="web_search", arguments='{"query":"two"}', call_id="c2")
+        mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2])
+        messages = []
+        starts = []
+        completes = []
+        agent.tool_start_callback = lambda tool_call_id, function_name, function_args: starts.append((tool_call_id, function_name, function_args))
+        agent.tool_complete_callback = lambda tool_call_id, function_name, function_args, function_result: completes.append((tool_call_id, function_name, function_args, function_result))
+
+        with patch("run_agent.handle_function_call", side_effect=['{"id":1}', '{"id":2}']):
+            agent._execute_tool_calls_concurrent(mock_msg, messages, "task-1")
+
+        assert starts == [
+            ("c1", "web_search", {"query": "one"}),
+            ("c2", "web_search", {"query": "two"}),
+        ]
+        assert len(completes) == 2
+        assert {entry[0] for entry in completes} == {"c1", "c2"}
+        assert {entry[3] for entry in completes} == {'{"id":1}', '{"id":2}'}
+
    def test_invoke_tool_handles_agent_level_tools(self, agent):
        """_invoke_tool should handle todo tool directly."""
        with patch("tools.todo_tool.todo_tool", return_value='{"ok":true}') as mock_todo:
@ -1264,6 +1321,38 @@ class TestPathsOverlap:
        assert not _paths_overlap(Path("src/a.py"), Path(""))


+class TestParallelScopePathNormalization:
+    def test_extract_parallel_scope_path_normalizes_relative_to_cwd(self, tmp_path, monkeypatch):
+        from run_agent import _extract_parallel_scope_path
+
+        monkeypatch.chdir(tmp_path)
+
+        scoped = _extract_parallel_scope_path("write_file", {"path": "./notes.txt"})
+
+        assert scoped == tmp_path / "notes.txt"
+
+    def test_extract_parallel_scope_path_treats_relative_and_absolute_same_file_as_same_scope(self, tmp_path, monkeypatch):
+        from run_agent import _extract_parallel_scope_path, _paths_overlap
+
+        monkeypatch.chdir(tmp_path)
+        abs_path = tmp_path / "notes.txt"
+
+        rel_scoped = _extract_parallel_scope_path("write_file", {"path": "notes.txt"})
+        abs_scoped = _extract_parallel_scope_path("write_file", {"path": str(abs_path)})
+
+        assert rel_scoped == abs_scoped
+        assert _paths_overlap(rel_scoped, abs_scoped)
+
+    def test_should_parallelize_tool_batch_rejects_same_file_with_mixed_path_spellings(self, tmp_path, monkeypatch):
+        from run_agent import _should_parallelize_tool_batch
+
+        monkeypatch.chdir(tmp_path)
+        tc1 = _mock_tool_call(name="write_file", arguments='{"path":"notes.txt","content":"one"}', call_id="c1")
+        tc2 = _mock_tool_call(name="write_file", arguments=f'{{"path":"{tmp_path / "notes.txt"}","content":"two"}}', call_id="c2")
+
+        assert not _should_parallelize_tool_batch([tc1, tc2])
+
+
 class TestHandleMaxIterations:
    def test_returns_summary(self, agent):
        resp = _mock_response(content="Here is a summary of what I did.")
@ -1776,6 +1865,127 @@ class TestNousCredentialRefresh:
        assert isinstance(agent.client, _RebuiltClient)


+class TestCredentialPoolRecovery:
+    def test_recover_with_pool_rotates_on_402(self, agent):
+        current = SimpleNamespace(label="primary")
+        next_entry = SimpleNamespace(label="secondary")
+
+        class _Pool:
+            def current(self):
+                return current
+
+            def mark_exhausted_and_rotate(self, *, status_code):
+                assert status_code == 402
+                return next_entry
+
+        agent._credential_pool = _Pool()
+        agent._swap_credential = MagicMock()
+
+        recovered, retry_same = agent._recover_with_credential_pool(
+            status_code=402,
+            has_retried_429=False,
+        )
+
+        assert recovered is True
+        assert retry_same is False
+        agent._swap_credential.assert_called_once_with(next_entry)
+
+    def test_recover_with_pool_retries_first_429_then_rotates(self, agent):
+        next_entry = SimpleNamespace(label="secondary")
+
+        class _Pool:
+            def current(self):
+                return SimpleNamespace(label="primary")
+
+            def mark_exhausted_and_rotate(self, *, status_code):
+                assert status_code == 429
+                return next_entry
+
+        agent._credential_pool = _Pool()
+        agent._swap_credential = MagicMock()
+
+        recovered, retry_same = agent._recover_with_credential_pool(
+            status_code=429,
+            has_retried_429=False,
+        )
+        assert recovered is False
+        assert retry_same is True
+        agent._swap_credential.assert_not_called()
+
+        recovered, retry_same = agent._recover_with_credential_pool(
+            status_code=429,
+            has_retried_429=True,
+        )
+        assert recovered is True
+        assert retry_same is False
+        agent._swap_credential.assert_called_once_with(next_entry)
+
+
+    def test_recover_with_pool_refreshes_on_401(self, agent):
+        """401 with successful refresh should swap to refreshed credential."""
+        refreshed_entry = SimpleNamespace(label="refreshed-primary", id="abc")
+
+        class _Pool:
+            def try_refresh_current(self):
+                return refreshed_entry
+
+        agent._credential_pool = _Pool()
+        agent._swap_credential = MagicMock()
+
+        recovered, retry_same = agent._recover_with_credential_pool(
+            status_code=401,
+            has_retried_429=False,
+        )
+
+        assert recovered is True
+        agent._swap_credential.assert_called_once_with(refreshed_entry)
+
+    def test_recover_with_pool_rotates_on_401_when_refresh_fails(self, agent):
+        """401 with failed refresh should rotate to next credential."""
+        next_entry = SimpleNamespace(label="secondary", id="def")
+
+        class _Pool:
+            def try_refresh_current(self):
+                return None  # refresh failed
+
+            def mark_exhausted_and_rotate(self, *, status_code):
+                assert status_code == 401
+                return next_entry
+
+        agent._credential_pool = _Pool()
+        agent._swap_credential = MagicMock()
+
+        recovered, retry_same = agent._recover_with_credential_pool(
+            status_code=401,
+            has_retried_429=False,
+        )
+
+        assert recovered is True
+        assert retry_same is False
+        agent._swap_credential.assert_called_once_with(next_entry)
+
+    def test_recover_with_pool_401_refresh_fails_no_more_credentials(self, agent):
+        """401 with failed refresh and no other credentials returns not recovered."""
+
+        class _Pool:
+            def try_refresh_current(self):
+                return None
+
+            def mark_exhausted_and_rotate(self, *, status_code):
+                return None  # no more credentials
+
+        agent._credential_pool = _Pool()
+        agent._swap_credential = MagicMock()
+
+        recovered, retry_same = agent._recover_with_credential_pool(
+            status_code=401,
+            has_retried_429=False,
+        )
+
+        assert recovered is False
+        agent._swap_credential.assert_not_called()
+
+
 class TestMaxTokensParam:
    """Verify _max_tokens_param returns the correct key for each provider."""

@ -2604,6 +2814,46 @@ def test_is_openai_client_closed_honors_custom_client_flag():
    assert AIAgent._is_openai_client_closed(SimpleNamespace(is_closed=False)) is False


+def test_is_openai_client_closed_handles_method_form():
+    """Fix for issue #4377: is_closed as method (openai SDK) vs property (httpx).
+
+    The openai SDK's is_closed is a method, not a property. Prior to this fix,
+    getattr(client, "is_closed", False) returned the bound method object, which
+    is always truthy, causing the function to incorrectly report all clients as
+    closed and triggering unnecessary client recreation on every API call.
+    """
+
+    class MethodFormClient:
+        """Mimics openai.OpenAI where is_closed() is a method."""
+
+        def __init__(self, closed: bool):
+            self._closed = closed
+
+        def is_closed(self) -> bool:
+            return self._closed
+
+    # Method returning False - client is open
+    open_client = MethodFormClient(closed=False)
+    assert AIAgent._is_openai_client_closed(open_client) is False
+
+    # Method returning True - client is closed
+    closed_client = MethodFormClient(closed=True)
+    assert AIAgent._is_openai_client_closed(closed_client) is True
+
+
+def test_is_openai_client_closed_falls_back_to_http_client():
+    """Verify fallback to _client.is_closed when top-level is_closed is None."""
+
+    class ClientWithHttpClient:
+        is_closed = None  # No top-level is_closed
+
+        def __init__(self, http_closed: bool):
+            self._client = SimpleNamespace(is_closed=http_closed)
+
+    assert AIAgent._is_openai_client_closed(ClientWithHttpClient(http_closed=False)) is False
+    assert AIAgent._is_openai_client_closed(ClientWithHttpClient(http_closed=True)) is True
+
+
 class TestAnthropicBaseUrlPassthrough:
    """Bug fix: base_url was filtered with 'anthropic in base_url', blocking proxies."""

--- a/tests/test_runtime_provider_resolution.py
+++ b/tests/test_runtime_provider_resolution.py
@ -1,6 +1,123 @@
 from hermes_cli import runtime_provider as rp


+def test_resolve_runtime_provider_uses_credential_pool(monkeypatch):
+    class _Entry:
+        access_token = "pool-token"
+        source = "manual"
+        base_url = "https://chatgpt.com/backend-api/codex"
+
+    class _Pool:
+        def has_credentials(self):
+            return True
+
+        def select(self):
+            return _Entry()
+
+    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openai-codex")
+    monkeypatch.setattr(rp, "load_pool", lambda provider: _Pool())
+
+    resolved = rp.resolve_runtime_provider(requested="openai-codex")
+
+    assert resolved["provider"] == "openai-codex"
+    assert resolved["api_key"] == "pool-token"
+    assert resolved["credential_pool"] is not None
+    assert resolved["source"] == "manual"
+
+
+def test_resolve_runtime_provider_anthropic_pool_respects_config_base_url(monkeypatch):
+    class _Entry:
+        access_token = "pool-token"
+        source = "manual"
+        base_url = "https://api.anthropic.com"
+
+    class _Pool:
+        def has_credentials(self):
+            return True
+
+        def select(self):
+            return _Entry()
+
+    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "anthropic")
+    monkeypatch.setattr(
+        rp,
+        "_get_model_config",
+        lambda: {
+            "provider": "anthropic",
+            "base_url": "https://proxy.example.com/anthropic",
+        },
+    )
+    monkeypatch.setattr(rp, "load_pool", lambda provider: _Pool())
+
+    resolved = rp.resolve_runtime_provider(requested="anthropic")
+
+    assert resolved["provider"] == "anthropic"
+    assert resolved["api_mode"] == "anthropic_messages"
+    assert resolved["api_key"] == "pool-token"
+    assert resolved["base_url"] == "https://proxy.example.com/anthropic"
+
+
+def test_resolve_runtime_provider_anthropic_explicit_override_skips_pool(monkeypatch):
+    def _unexpected_pool(provider):
+        raise AssertionError(f"load_pool should not be called for {provider}")
+
+    def _unexpected_anthropic_token():
+        raise AssertionError("resolve_anthropic_token should not be called")
+
+    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "anthropic")
+    monkeypatch.setattr(
+        rp,
+        "_get_model_config",
+        lambda: {
+            "provider": "anthropic",
+            "base_url": "https://config.example.com/anthropic",
+        },
+    )
+    monkeypatch.setattr(rp, "load_pool", _unexpected_pool)
+    monkeypatch.setattr(
+        "agent.anthropic_adapter.resolve_anthropic_token",
+        _unexpected_anthropic_token,
+    )
+
+    resolved = rp.resolve_runtime_provider(
+        requested="anthropic",
+        explicit_api_key="anthropic-explicit-token",
+        explicit_base_url="https://proxy.example.com/anthropic/",
+    )
+
+    assert resolved["provider"] == "anthropic"
+    assert resolved["api_mode"] == "anthropic_messages"
+    assert resolved["api_key"] == "anthropic-explicit-token"
+    assert resolved["base_url"] == "https://proxy.example.com/anthropic"
+    assert resolved["source"] == "explicit"
+    assert resolved.get("credential_pool") is None
+
+
+def test_resolve_runtime_provider_falls_back_when_pool_empty(monkeypatch):
+    class _Pool:
+        def has_credentials(self):
+            return False
+
+    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openai-codex")
+    monkeypatch.setattr(rp, "load_pool", lambda provider: _Pool())
+    monkeypatch.setattr(
+        rp,
+        "resolve_codex_runtime_credentials",
+        lambda: {
+            "provider": "openai-codex",
+            "base_url": "https://chatgpt.com/backend-api/codex",
+            "api_key": "codex-token",
+            "source": "hermes-auth-store",
+            "last_refresh": "2026-02-26T00:00:00Z",
+        },
+    )
+
+    resolved = rp.resolve_runtime_provider(requested="openai-codex")
+
+    assert resolved["api_key"] == "codex-token"
+    assert resolved.get("credential_pool") is None
+
+
 def test_resolve_runtime_provider_codex(monkeypatch):
    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openai-codex")
    monkeypatch.setattr(
@ -40,6 +157,36 @@ def test_resolve_runtime_provider_ai_gateway(monkeypatch):
    assert resolved["requested_provider"] == "ai-gateway"


+def test_resolve_runtime_provider_ai_gateway_explicit_override_skips_pool(monkeypatch):
+    def _unexpected_pool(provider):
+        raise AssertionError(f"load_pool should not be called for {provider}")
+
+    def _unexpected_provider_resolution(provider):
+        raise AssertionError(f"resolve_api_key_provider_credentials should not be called for {provider}")
+
+    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "ai-gateway")
+    monkeypatch.setattr(rp, "_get_model_config", lambda: {})
+    monkeypatch.setattr(rp, "load_pool", _unexpected_pool)
+    monkeypatch.setattr(
+        rp,
+        "resolve_api_key_provider_credentials",
+        _unexpected_provider_resolution,
+    )
+
+    resolved = rp.resolve_runtime_provider(
+        requested="ai-gateway",
+        explicit_api_key="ai-gateway-explicit-token",
+        explicit_base_url="https://proxy.example.com/v1/",
+    )
+
+    assert resolved["provider"] == "ai-gateway"
+    assert resolved["api_mode"] == "chat_completions"
+    assert resolved["api_key"] == "ai-gateway-explicit-token"
+    assert resolved["base_url"] == "https://proxy.example.com/v1"
+    assert resolved["source"] == "explicit"
+    assert resolved.get("credential_pool") is None
+
+
 def test_resolve_runtime_provider_openrouter_explicit(monkeypatch):
    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter")
    monkeypatch.setattr(rp, "_get_model_config", lambda: {})
@ -61,6 +208,69 @@ def test_resolve_runtime_provider_openrouter_explicit(monkeypatch):
    assert resolved["source"] == "explicit"


+def test_resolve_runtime_provider_auto_uses_openrouter_pool(monkeypatch):
+    class _Entry:
+        access_token = "pool-key"
+        source = "manual"
+        base_url = "https://openrouter.ai/api/v1"
+
+    class _Pool:
+        def has_credentials(self):
+            return True
+
+        def select(self):
+            return _Entry()
+
+    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter")
+    monkeypatch.setattr(rp, "_get_model_config", lambda: {})
+    monkeypatch.setattr(rp, "load_pool", lambda provider: _Pool())
+    monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
+    monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False)
+    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+    monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+
+    resolved = rp.resolve_runtime_provider(requested="auto")
+
+    assert resolved["provider"] == "openrouter"
+    assert resolved["api_key"] == "pool-key"
+    assert resolved["base_url"] == "https://openrouter.ai/api/v1"
+    assert resolved["source"] == "manual"
+    assert resolved.get("credential_pool") is not None
+
+
+def test_resolve_runtime_provider_openrouter_explicit_api_key_skips_pool(monkeypatch):
+    class _Entry:
+        access_token = "pool-key"
+        source = "manual"
+        base_url = "https://openrouter.ai/api/v1"
+
+    class _Pool:
+        def has_credentials(self):
+            return True
+
+        def select(self):
+            return _Entry()
+
+    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter")
+    monkeypatch.setattr(rp, "_get_model_config", lambda: {})
+    monkeypatch.setattr(rp, "load_pool", lambda provider: _Pool())
+    monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
+    monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False)
+    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+    monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+
+    resolved = rp.resolve_runtime_provider(
+        requested="openrouter",
+        explicit_api_key="explicit-key",
+    )
+
+    assert resolved["provider"] == "openrouter"
+    assert resolved["api_key"] == "explicit-key"
+    assert resolved["base_url"] == rp.OPENROUTER_BASE_URL
+    assert resolved["source"] == "explicit"
+    assert resolved.get("credential_pool") is None
+
+
 def test_resolve_runtime_provider_openrouter_ignores_codex_config_base_url(monkeypatch):
    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter")
    monkeypatch.setattr(
@ -136,16 +346,19 @@ def test_openai_key_used_when_no_openrouter_key(monkeypatch):


 def test_custom_endpoint_prefers_openai_key(monkeypatch):
-    """Custom endpoint should use OPENAI_API_KEY, not OPENROUTER_API_KEY.
+    """Custom endpoint should use config api_key over OPENROUTER_API_KEY.

-    Regression test for #560: when base_url is a non-OpenRouter endpoint,
-    OPENROUTER_API_KEY was being sent as the auth header instead of OPENAI_API_KEY.
+    Updated for #4165: config.yaml is now the source of truth for endpoint URLs,
+    OPENAI_BASE_URL env var is no longer consulted.
    """
    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter")
-    monkeypatch.setattr(rp, "_get_model_config", lambda: {})
-    monkeypatch.setenv("OPENAI_BASE_URL", "https://api.z.ai/api/coding/paas/v4")
+    monkeypatch.setattr(rp, "_get_model_config", lambda: {
+        "provider": "custom",
+        "base_url": "https://api.z.ai/api/coding/paas/v4",
+        "api_key": "zai-key",
+    })
+    monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
    monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False)
-    monkeypatch.setenv("OPENAI_API_KEY", "zai-key")
    monkeypatch.setenv("OPENROUTER_API_KEY", "openrouter-key")

    resolved = rp.resolve_runtime_provider(requested="custom")
@ -221,19 +434,22 @@ def test_custom_endpoint_uses_config_api_field_when_no_api_key(monkeypatch):
    assert resolved["api_key"] == "config-api-field"


-def test_custom_endpoint_auto_provider_prefers_openai_key(monkeypatch):
-    """Auto provider with non-OpenRouter base_url should prefer OPENAI_API_KEY.
+def test_custom_endpoint_explicit_custom_prefers_config_key(monkeypatch):
+    """Explicit 'custom' provider with config base_url+api_key should use them.

-    Same as #560 but via 'hermes model' flow which sets provider to 'auto'.
+    Updated for #4165: config.yaml is the source of truth, not OPENAI_BASE_URL.
    """
    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter")
-    monkeypatch.setattr(rp, "_get_model_config", lambda: {})
-    monkeypatch.setenv("OPENAI_BASE_URL", "https://my-vllm-server.example.com/v1")
+    monkeypatch.setattr(rp, "_get_model_config", lambda: {
+        "provider": "custom",
+        "base_url": "https://my-vllm-server.example.com/v1",
+        "api_key": "sk-vllm-key",
+    })
+    monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
    monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False)
-    monkeypatch.setenv("OPENAI_API_KEY", "sk-vllm-key")
    monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-...leak")

-    resolved = rp.resolve_runtime_provider(requested="auto")
+    resolved = rp.resolve_runtime_provider(requested="custom")

    assert resolved["base_url"] == "https://my-vllm-server.example.com/v1"
    assert resolved["api_key"] == "sk-vllm-key"
@ -359,6 +575,36 @@ def test_explicit_openrouter_skips_openai_base_url(monkeypatch):
    assert resolved["api_key"] == "or-test-key"


+def test_explicit_openrouter_honors_openrouter_base_url_over_pool(monkeypatch):
+    class _Entry:
+        access_token = "pool-key"
+        source = "manual"
+        base_url = "https://openrouter.ai/api/v1"
+
+    class _Pool:
+        def has_credentials(self):
+            return True
+
+        def select(self):
+            return _Entry()
+
+    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter")
+    monkeypatch.setattr(rp, "_get_model_config", lambda: {})
+    monkeypatch.setattr(rp, "load_pool", lambda provider: _Pool())
+    monkeypatch.setenv("OPENROUTER_BASE_URL", "https://mirror.example.com/v1")
+    monkeypatch.setenv("OPENROUTER_API_KEY", "mirror-key")
+    monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
+    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+
+    resolved = rp.resolve_runtime_provider(requested="openrouter")
+
+    assert resolved["provider"] == "openrouter"
+    assert resolved["base_url"] == "https://mirror.example.com/v1"
+    assert resolved["api_key"] == "mirror-key"
+    assert resolved["source"] == "env/config"
+    assert resolved.get("credential_pool") is None
+
+
 def test_resolve_requested_provider_precedence(monkeypatch):
    monkeypatch.setenv("HERMES_INFERENCE_PROVIDER", "nous")
    monkeypatch.setattr(rp, "_get_model_config", lambda: {"provider": "openai-codex"})
@ -545,7 +791,7 @@ def test_alibaba_default_coding_intl_endpoint_uses_chat_completions(monkeypatch)

    assert resolved["provider"] == "alibaba"
    assert resolved["api_mode"] == "chat_completions"
-    assert resolved["base_url"] == "https://coding-intl.dashscope.aliyuncs.com/v1"
+    assert resolved["base_url"] == "https://dashscope-intl.aliyuncs.com/compatible-mode/v1"


 def test_alibaba_anthropic_endpoint_override_uses_anthropic_messages(monkeypatch):
--- a/tests/test_setup_model_selection.py
+++ b/tests/test_setup_model_selection.py
@ -32,8 +32,8 @@ class TestSetupProviderModelSelection:
    @pytest.mark.parametrize("provider_id,expected_defaults", [
        ("zai", ["glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"]),
        ("kimi-coding", ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"]),
-        ("minimax", ["MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"]),
-        ("minimax-cn", ["MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"]),
+        ("minimax", ["MiniMax-M2.7", "MiniMax-M2.7-highspeed", "MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"]),
+        ("minimax-cn", ["MiniMax-M2.7", "MiniMax-M2.7-highspeed", "MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"]),
    ])
    @patch("hermes_cli.models.fetch_api_models", return_value=[])
    @patch("hermes_cli.config.get_env_value", return_value="fake-key")
--- a/tests/test_streaming.py
+++ b/tests/test_streaming.py
@ -782,3 +782,35 @@ class TestCodexStreamCallbacks:

        response = agent._run_codex_stream({}, client=mock_client)
        assert "Hello from Codex!" in deltas
+
+    def test_codex_remote_protocol_error_falls_back_to_create_stream(self):
+        from run_agent import AIAgent
+        import httpx
+
+        fallback_response = SimpleNamespace(
+            output=[SimpleNamespace(
+                type="message",
+                content=[SimpleNamespace(type="output_text", text="fallback from create stream")],
+            )],
+            status="completed",
+        )
+
+        mock_client = MagicMock()
+        mock_client.responses.stream.side_effect = httpx.RemoteProtocolError(
+            "peer closed connection without sending complete message body"
+        )
+
+        agent = AIAgent(
+            model="test/model",
+            quiet_mode=True,
+            skip_context_files=True,
+            skip_memory=True,
+        )
+        agent.api_mode = "codex_responses"
+        agent._interrupt_requested = False
+
+        with patch.object(agent, "_run_codex_create_stream_fallback", return_value=fallback_response) as mock_fallback:
+            response = agent._run_codex_stream({}, client=mock_client)
+
+        assert response is fallback_response
+        mock_fallback.assert_called_once_with({}, client=mock_client)
--- a/Show more
+++ b/Show more