diff --git a/.github/workflows/contributor-check.yml b/.github/workflows/contributor-check.yml
index 939215ed449..de38fcaae9a 100644
--- a/.github/workflows/contributor-check.yml
+++ b/.github/workflows/contributor-check.yml
@@ -3,11 +3,9 @@ name: Contributor Attribution Check
 on:
   pull_request:
     branches: [main]
-    paths:
-      # Only run when code files change (not docs-only PRs)
-      - '*.py'
-      - '**/*.py'
-      - '.github/workflows/contributor-check.yml'
+  # No paths filter — the job must always run so the required check
+  # reports a status (path-gated workflows leave checks "pending" forever
+  # when no matching files change, which blocks merge).
 
 permissions:
   contents: read
@@ -20,7 +18,21 @@ jobs:
         with:
           fetch-depth: 0  # Full history needed for git log
 
+      - name: Check if relevant files changed
+        id: filter
+        run: |
+          BASE="${{ github.event.pull_request.base.sha }}"
+          HEAD="${{ github.event.pull_request.head.sha }}"
+          CHANGED=$(git diff --name-only "$BASE"..."$HEAD" -- '*.py' '**/*.py' '.github/workflows/contributor-check.yml' || true)
+          if [ -n "$CHANGED" ]; then
+            echo "run=true" >> "$GITHUB_OUTPUT"
+          else
+            echo "run=false" >> "$GITHUB_OUTPUT"
+            echo "No Python files changed, skipping attribution check."
+          fi
+
       - name: Check for unmapped contributor emails
+        if: steps.filter.outputs.run == 'true'
         run: |
           # Get the merge base between this PR and main
           MERGE_BASE=$(git merge-base origin/main HEAD)
diff --git a/.github/workflows/deploy-site.yml b/.github/workflows/deploy-site.yml
index 823496157a9..82acaa6667d 100644
--- a/.github/workflows/deploy-site.yml
+++ b/.github/workflows/deploy-site.yml
@@ -22,7 +22,12 @@ concurrency:
 
 jobs:
   deploy-vercel:
-    if: github.event_name == 'release'
+    # Triggered automatically on release publish (production cuts) and
+    # manually via `gh workflow run deploy-site.yml` when an out-of-band
+    # main commit needs to ship live before the next release tag — e.g.
+    # a skills-index PR that doesn't touch website/** paths and so
+    # doesn't auto-deploy via the deploy-docs path.
+    if: github.event_name == 'release' || github.event_name == 'workflow_dispatch'
     runs-on: ubuntu-latest
     steps:
       - name: Trigger Vercel Deploy
diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml
index bdbea5c9c05..7dd0c799f0e 100644
--- a/.github/workflows/docker-publish.yml
+++ b/.github/workflows/docker-publish.yml
@@ -71,6 +71,8 @@ jobs:
           load: true
           platforms: linux/amd64
           tags: ${{ env.IMAGE_NAME }}:test
+          build-args: |
+            HERMES_GIT_SHA=${{ github.sha }}
           cache-from: type=gha,scope=docker-amd64
           cache-to: type=gha,mode=max,scope=docker-amd64
 
@@ -149,6 +151,8 @@ jobs:
           platforms: linux/amd64
           labels: |
             org.opencontainers.image.revision=${{ github.sha }}
+          build-args: |
+            HERMES_GIT_SHA=${{ github.sha }}
           outputs: type=image,name=${{ env.IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=true
           cache-from: type=gha,scope=docker-amd64
           cache-to: type=gha,mode=max,scope=docker-amd64
@@ -192,10 +196,12 @@ jobs:
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f  # v3
 
-      # Build once, load into the local daemon for smoke testing.  Cached
-      # to gha with a per-arch scope; the push step below reuses every
-      # layer from this build.
-      - name: Build image (arm64, smoke test)
+      # Build once, load into the local daemon for smoke testing. PR arm64
+      # builds deliberately avoid the gha cache: cold-cache arm64 builds can
+      # outlive GitHub's short-lived Azure cache SAS token, then fail while
+      # reading or writing cache blobs before the smoke test can run.
+      - name: Build image (arm64, smoke test, uncached PR)
+        if: github.event_name == 'pull_request'
         uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f  # v7.1.0
         with:
           context: .
@@ -203,6 +209,22 @@ jobs:
           load: true
           platforms: linux/arm64
           tags: ${{ env.IMAGE_NAME }}:test
+          build-args: |
+            HERMES_GIT_SHA=${{ github.sha }}
+
+      # Main/release builds still use the per-arch gha cache so the digest
+      # push below can reuse layers from this smoke-test build.
+      - name: Build image (arm64, smoke test, cached publish)
+        if: github.event_name != 'pull_request'
+        uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f  # v7.1.0
+        with:
+          context: .
+          file: Dockerfile
+          load: true
+          platforms: linux/arm64
+          tags: ${{ env.IMAGE_NAME }}:test
+          build-args: |
+            HERMES_GIT_SHA=${{ github.sha }}
           cache-from: type=gha,scope=docker-arm64
           cache-to: type=gha,mode=max,scope=docker-arm64
 
@@ -228,6 +250,8 @@ jobs:
           platforms: linux/arm64
           labels: |
             org.opencontainers.image.revision=${{ github.sha }}
+          build-args: |
+            HERMES_GIT_SHA=${{ github.sha }}
           outputs: type=image,name=${{ env.IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=true
           cache-from: type=gha,scope=docker-arm64
           cache-to: type=gha,mode=max,scope=docker-arm64
diff --git a/.github/workflows/supply-chain-audit.yml b/.github/workflows/supply-chain-audit.yml
index 2f727e8d254..3309de78dae 100644
--- a/.github/workflows/supply-chain-audit.yml
+++ b/.github/workflows/supply-chain-audit.yml
@@ -3,15 +3,9 @@ name: Supply Chain Audit
 on:
   pull_request:
     types: [opened, synchronize, reopened]
-    paths:
-      - '**/*.py'
-      - '**/*.pth'
-      - '**/setup.py'
-      - '**/setup.cfg'
-      - '**/sitecustomize.py'
-      - '**/usercustomize.py'
-      - '**/__init__.pth'
-      - 'pyproject.toml'
+  # No paths filter — the jobs must always run so required checks
+  # report a status (path-gated workflows leave checks "pending" forever
+  # when no matching files change, which blocks merge).
 
 permissions:
   pull-requests: write
@@ -27,8 +21,44 @@ permissions:
 # advisory-only workflow instead.
 
 jobs:
+  # ── Path filter (shared by both scan and dep-bounds) ───────────────
+  changes:
+    runs-on: ubuntu-latest
+    outputs:
+      # True when any file the scanner cares about changed in this PR
+      scan: ${{ steps.filter.outputs.scan }}
+      # True when pyproject.toml changed in this PR
+      deps: ${{ steps.filter.outputs.deps }}
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          fetch-depth: 0
+      - name: Check for relevant file changes
+        id: filter
+        run: |
+          BASE="${{ github.event.pull_request.base.sha }}"
+          HEAD="${{ github.event.pull_request.head.sha }}"
+          SCAN_FILES=$(git diff --name-only "$BASE"..."$HEAD" -- \
+            '*.py' '**/*.py' '*.pth' '**/*.pth' \
+            'setup.py' 'setup.cfg' \
+            'sitecustomize.py' 'usercustomize.py' '__init__.pth' \
+            'pyproject.toml' || true)
+          if [ -n "$SCAN_FILES" ]; then
+            echo "scan=true" >> "$GITHUB_OUTPUT"
+          else
+            echo "scan=false" >> "$GITHUB_OUTPUT"
+          fi
+          DEPS_FILES=$(git diff --name-only "$BASE"..."$HEAD" -- 'pyproject.toml' || true)
+          if [ -n "$DEPS_FILES" ]; then
+            echo "deps=true" >> "$GITHUB_OUTPUT"
+          else
+            echo "deps=false" >> "$GITHUB_OUTPUT"
+          fi
+
   scan:
     name: Scan PR for critical supply chain risks
+    needs: changes
+    if: needs.changes.outputs.scan == 'true'
     runs-on: ubuntu-latest
     steps:
       - name: Checkout
@@ -147,10 +177,24 @@ jobs:
           echo "::error::CRITICAL supply chain risk patterns detected in this PR. See the PR comment for details."
           exit 1
 
+  # Gate: reports success when scan was skipped (no relevant files changed).
+  # This ensures the required check always gets a status.
+  scan-gate:
+    name: Scan PR for critical supply chain risks
+    needs: changes
+    # always() so the gate still reports SUCCESS even if `changes` fails/is
+    # skipped — without it, a failed dependency would leave the required
+    # check unreported (i.e. "pending"), the exact failure mode this fixes.
+    if: always() && needs.changes.outputs.scan != 'true'
+    runs-on: ubuntu-latest
+    steps:
+      - run: echo "No supply-chain-relevant files changed, skipping scan."
+
   dep-bounds:
     name: Check PyPI dependency upper bounds
+    needs: changes
+    if: needs.changes.outputs.deps == 'true'
     runs-on: ubuntu-latest
-    if: contains(github.event.pull_request.changed_files_url, 'pyproject.toml') || true
     steps:
       - name: Checkout
         uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
@@ -211,3 +255,16 @@ jobs:
         run: |
           echo "::error::PyPI dependencies without upper bounds detected. Add <next_major ceiling per CONTRIBUTING.md policy."
           exit 1
+
+  # Gate: reports success when dep-bounds was skipped (no pyproject.toml changed).
+  # This ensures the required check always gets a status.
+  dep-bounds-gate:
+    name: Check PyPI dependency upper bounds
+    needs: changes
+    # always() so the gate still reports SUCCESS even if `changes` fails/is
+    # skipped — without it, a failed dependency would leave the required
+    # check unreported (i.e. "pending"), the exact failure mode this fixes.
+    if: always() && needs.changes.outputs.deps != 'true'
+    runs-on: ubuntu-latest
+    steps:
+      - run: echo "No pyproject.toml changes, skipping dependency bounds check."
diff --git a/.gitignore b/.gitignore
index 6550af851c6..ee1cb15f449 100644
--- a/.gitignore
+++ b/.gitignore
@@ -82,6 +82,12 @@ mini-swe-agent/
 .nix-stamps/
 result
 website/static/api/skills-index.json
+# skills.json + skills-meta.json are build artifacts emitted by
+# website/scripts/extract-skills.py during prebuild — keep them out of
+# git for the same reason as skills-index.json (large, generated, change
+# every build).
+website/static/api/skills.json
+website/static/api/skills-meta.json
 models-dev-upstream/
 
 # Local editor / agent tooling (machine-specific; keep in global config, not the repo)
@@ -100,3 +106,7 @@ docs/superpowers/*
 # also created in-repo when an agent operates in this checkout). Plans, audit
 # logs, and per-session caches are never artifacts of the codebase.
 .hermes/
+
+# Tool Search live-test harness output — non-deterministic model transcripts,
+# regenerated by scripts/tool_search_livetest.py. Never an artifact of the repo.
+scripts/out/
diff --git a/AGENTS.md b/AGENTS.md
index 7b20cabdf82..6c0036efd5a 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -274,7 +274,7 @@ npm test          # vitest
 
 The dashboard embeds the real `hermes --tui` — **not** a rewrite.  See `hermes_cli/pty_bridge.py` + the `@app.websocket("/api/pty")` endpoint in `hermes_cli/web_server.py`.
 
-- Browser loads `apps/dashboard/src/pages/ChatPage.tsx`, which mounts xterm.js's `Terminal` with the WebGL renderer, `@xterm/addon-fit` for container-driven resize, and `@xterm/addon-unicode11` for modern wide-character widths.
+- Browser loads `web/src/pages/ChatPage.tsx`, which mounts xterm.js's `Terminal` with the WebGL renderer, `@xterm/addon-fit` for container-driven resize, and `@xterm/addon-unicode11` for modern wide-character widths.
 - `/api/pty?token=…` upgrades to a WebSocket; auth uses the same ephemeral `_SESSION_TOKEN` as REST, via query param (browsers can't set `Authorization` on WS upgrade).
 - The server spawns whatever `hermes --tui` would spawn, through `ptyprocess` (POSIX PTY — WSL works, native Windows does not).
 - Frames: raw PTY bytes each direction; resize via `\x1b[RESIZE:<cols>;<rows>]` intercepted on the server and applied with `TIOCSWINSZ`.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 5b1ae34aa07..10f1563b945 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -43,7 +43,7 @@ Bundled skills (in `skills/`) ship with every Hermes install. They should be **b
 - Document handling, web research, common dev workflows, system administration
 - Used regularly by a wide range of people
 
-If your skill is official and useful but not universally needed (e.g., a paid service integration, a heavyweight dependency), put it in **`optional-skills/`** — it ships with the repo but isn't activated by default. Users can discover it via `hermes skills browse` (labeled "official") and install it with `hermes skills install` (no third-party warning, builtin trust).
+If your skill is official and useful but not universally needed (e.g., a paid service integration, a heavyweight dependency), put it in **`optional-skills/`** — it ships with the repo but isn't activated by default. Users can discover it via `hermes skills browse` (labeled "official") and install it with `hermes skills install` (no third-party warning, built-in trust).
 
 If your skill is specialized, community-contributed, or niche, it's better suited for a **Skills Hub** — upload it to a skills registry and share it in the [Nous Research Discord](https://discord.gg/NousResearch). Users can install it with `hermes skills install`.
 
diff --git a/Dockerfile b/Dockerfile
index 0a1ed56b47f..f1e04a3b641 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -25,7 +25,7 @@ ENV PLAYWRIGHT_BROWSERS_PATH=/opt/hermes/.playwright
 # hermes process, the dashboard, and per-profile gateways.
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
-    ca-certificates curl python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli xz-utils && \
+    ca-certificates curl iputils-ping python3 python-is-python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli xz-utils && \
     rm -rf /var/lib/apt/lists/*
 
 # ---------- s6-overlay install ----------
@@ -187,6 +187,29 @@ RUN chmod -R a+rX /opt/hermes && \
 # this a fast (~1s) egg-link creation with no resolution or downloads.
 RUN uv pip install --no-cache-dir --no-deps -e "."
 
+# ---------- Bake build-time git revision ----------
+# .dockerignore excludes .git, so `git rev-parse HEAD` from inside the
+# container always returns nothing — meaning `hermes dump` reports
+# "(unknown)" and the startup banner drops its `· upstream <sha>` suffix.
+# That makes support triage from container bug reports impossible:
+# we can't tell which commit the user is actually running.
+#
+# Fix: write the commit SHA passed via the HERMES_GIT_SHA build-arg to
+# /opt/hermes/.hermes_build_sha at build time, and have
+# hermes_cli/build_info.py read it at runtime.  Both `hermes dump` and
+# banner.get_git_banner_state() try the baked SHA first, then fall back
+# to live `git rev-parse` for source installs (unchanged behaviour).
+#
+# The arg is optional — local `docker build` without --build-arg simply
+# omits the file, and the runtime falls back to live-git lookup.  CI
+# (.github/workflows/docker-publish.yml) passes ${{ github.sha }} so
+# every published image has it.
+ARG HERMES_GIT_SHA=
+RUN if [ -n "${HERMES_GIT_SHA}" ]; then \
+        printf '%s\n' "${HERMES_GIT_SHA}" > /opt/hermes/.hermes_build_sha && \
+        chown hermes:hermes /opt/hermes/.hermes_build_sha; \
+    fi
+
 # ---------- s6-overlay service wiring ----------
 # Static services declared at build time: main-hermes + dashboard.
 # Per-profile gateway services are registered dynamically at runtime by
@@ -213,13 +236,32 @@ COPY --chmod=0755 docker/cont-init.d/02-reconcile-profiles /etc/cont-init.d/02-r
 # ---------- Runtime ----------
 ENV HERMES_WEB_DIST=/opt/hermes/hermes_cli/web_dist
 ENV HERMES_HOME=/opt/data
+
+# `docker exec` privilege-drop shim. When operators run
+# `docker exec <c> hermes ...` they default to root, and any file the
+# command writes under $HERMES_HOME (auth.json, .env, config.yaml) ends
+# up root-owned and unreadable to the supervised gateway (UID 10000).
+# The shim lives at /opt/hermes/bin/hermes, sits earliest on PATH, and
+# transparently re-exec's the real venv binary via `s6-setuidgid hermes`
+# when invoked as root. Non-root callers (supervised processes,
+# `--user hermes`, etc.) hit the short-circuit path with no overhead.
+# Recursion is impossible because the shim exec's the venv binary by
+# absolute path (/opt/hermes/.venv/bin/hermes). See the shim source for
+# the opt-out env var (HERMES_DOCKER_EXEC_AS_ROOT=1).
+COPY --chmod=0755 docker/hermes-exec-shim.sh /opt/hermes/bin/hermes
+
 # Pre-s6 entrypoint.sh did `source .venv/bin/activate` which exported
 # the venv bin onto PATH; Architecture B's main-wrapper.sh does the
 # same for the container's main process, but `docker exec` and our
 # cont-init.d scripts don't pass through the wrapper. Expose the venv
 # bin globally so `docker exec <container> hermes ...` and any
 # subprocess that doesn't activate the venv first still find hermes.
-ENV PATH="/opt/hermes/.venv/bin:/opt/data/.local/bin:${PATH}"
+#
+# /opt/hermes/bin is prepended ahead of the venv so the privilege-drop
+# shim wins PATH resolution. The shim's last act is to exec the venv
+# binary by absolute path, so this PATH ordering is transparent to
+# every other consumer.
+ENV PATH="/opt/hermes/bin:/opt/hermes/.venv/bin:/opt/data/.local/bin:${PATH}"
 RUN mkdir -p /opt/data
 VOLUME [ "/opt/data" ]
 
diff --git a/MANIFEST.in b/MANIFEST.in
index 876aeeb7d1f..a0296c377c3 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,4 +1,9 @@
 graft skills
 graft optional-skills
+# Bundled plugin manifests (plugin.yaml / plugin.yml). Without these the
+# PluginManager scan (hermes_cli/plugins.py) finds zero plugins on installs
+# built from the sdist (e.g. Homebrew, downstream packagers). package-data
+# below covers the wheel; this covers the sdist. See #34034 / #28149.
+recursive-include plugins plugin.yaml plugin.yml
 global-exclude __pycache__
 global-exclude *.py[cod]
diff --git a/RELEASE_v0.15.0.md b/RELEASE_v0.15.0.md
new file mode 100644
index 00000000000..5230b17f9f6
--- /dev/null
+++ b/RELEASE_v0.15.0.md
@@ -0,0 +1,651 @@
+# Hermes Agent v0.15.0 (v2026.5.28)
+
+**Release Date:** May 28, 2026
+**Since v0.14.0:** 1,302 commits · 747 merged PRs · 1,746 files changed · 282,712 insertions · 36,699 deletions · 560+ issues closed (15 P0, 65 P1, 19 security-tagged) · 321 community contributors (including co-authors)
+
+> **The Velocity Release.** Hermes gets dramatically faster — to start, to run, to ship work, and to grow. The 16,083-line `run_agent.py` collapses to 3,821 (-76%) across 14 cohesive `agent/*` modules. Kanban grew into a real multi-agent platform across 104 PRs — orchestrator auto-decomposition, swarm topology, scheduled tasks, worktree-per-task, per-task model overrides. The cold-start perf wave keeps going: another second shaved off launch, 47% fewer per-conversation function calls, `hermes --version` flipping the head-to-head benchmark against Codex CLI. `session_search` is 4,500× faster and free now. Promptware defense lands against Brainworm-class attacks. Bitwarden Secrets Manager replaces N per-provider API keys with one bootstrap token. Skill bundles let one slash command load a whole workflow. The Ink TUI gets a multi-session orchestrator. Two new image_gen providers (Krea 2 Medium + Large, FAL ported to plugin), the Nous-approved MCP catalog with an interactive picker, an OpenHands orchestration skill, ntfy as the 23rd messaging platform, and a deep xAI integration round (Web Search plugin, xai-oauth `hermes proxy` upstream, retired-May-15 model detection + `hermes migrate xai`, natural TTS speech-tag pauses, base_url leak guard, OpenAI-style execution guidance for Grok). 15 P0 + 65 P1 closures alongside.
+
+---
+
+## ✨ Highlights
+
+- **The Big Refactor — `run_agent.py` is no longer 16,000 lines** — The file at the heart of Hermes — the agent conversation loop — has been reduced from 16,083 lines to 3,821 (-76%), with the extracted code redistributed across 14 cohesive modules under `agent/`. Behavior is unchanged: every extraction keeps a thin forwarder on `AIAgent`, every test patch path still works, every external caller is compatible. The reason you care: future Hermes development moves faster, plugin authors can finally grep the codebase, and the file that took 90 seconds to load in your editor opens in a blink. ([#27248](https://github.com/NousResearch/hermes-agent/pull/27248))
+
+- **Kanban grew into a real multi-agent platform — 104 PRs end to end** — Triage auto-decomposes one task into a tree of sub-tasks. `hermes kanban swarm` creates a full Swarm v1 graph in one command — root, parallel workers, gated verifier, gated synthesizer, shared blackboard. Tasks support per-task model overrides (cheap models for boilerplate, expensive ones for hard sub-tasks), board-level default workdirs, per-task worktree paths and branches, scheduled start times, configurable claim TTL, retry fingerprinting, stale-task detection, respawn guards, and a drag-to-delete trash zone. Workers report through `/workers/active`, `/runs/{id}`, and `/inspect` endpoints. ([#27572](https://github.com/NousResearch/hermes-agent/pull/27572), [#28443](https://github.com/NousResearch/hermes-agent/pull/28443), [#28364](https://github.com/NousResearch/hermes-agent/pull/28364), [#28394](https://github.com/NousResearch/hermes-agent/pull/28394), [#28462](https://github.com/NousResearch/hermes-agent/pull/28462), [#28384](https://github.com/NousResearch/hermes-agent/pull/28384), [#28467](https://github.com/NousResearch/hermes-agent/pull/28467), [#28455](https://github.com/NousResearch/hermes-agent/pull/28455), [#28452](https://github.com/NousResearch/hermes-agent/pull/28452), [#28432](https://github.com/NousResearch/hermes-agent/pull/28432), [#28468](https://github.com/NousResearch/hermes-agent/pull/28468), [#28420](https://github.com/NousResearch/hermes-agent/pull/28420))
+
+- **Cold-start perf wave keeps going — another second saved, 47% fewer per-turn function calls** — Three new optimization rounds: defer `openai._base_client` import (-240ms / -17MB on every CLI invocation), hot-path optimizations cut 47% of per-conversation function calls (399k → 213k for 31-turn chat), defer compression-feasibility check (-170 to -290ms on every agent construction), adaptive subprocess polling (-195ms per tool call, 1+ second per turn). Termux cold start drops from 2.9s to 0.8s. `hermes --version` cold drops 63% (701ms → 258ms), flipping the head-to-head benchmark against Codex CLI from 5/11 wins to 6/11. ([#28864](https://github.com/NousResearch/hermes-agent/pull/28864), [#28866](https://github.com/NousResearch/hermes-agent/pull/28866), [#28957](https://github.com/NousResearch/hermes-agent/pull/28957), [#29006](https://github.com/NousResearch/hermes-agent/pull/29006), [#29419](https://github.com/NousResearch/hermes-agent/pull/29419), [#30121](https://github.com/NousResearch/hermes-agent/pull/30121), [#30609](https://github.com/NousResearch/hermes-agent/pull/30609), [#31968](https://github.com/NousResearch/hermes-agent/pull/31968))
+
+- **`session_search` rebuilt — no LLM, no cost, 4,500× faster** — The old `session_search` was an aux-LLM-powered tool that cost ~$0.30/call and took ~30 seconds to summarize three sessions, sometimes confabulating when the right session wasn't even in the FTS5 hit list. The new shape is one tool with three modes (discovery, scroll, browse) inferred from which args are set — no `mode` parameter, no aux-LLM, no config knob, no companion skill. Discovery is ~20ms instead of ~90s; scroll is ~1ms. Searching your past sessions for context is now free and instant. ([#27590](https://github.com/NousResearch/hermes-agent/pull/27590))
+
+- **Promptware defense — Brainworm-class attacks blocked at three chokepoints** — Inspired by recent Brainworm / Promptware Kill Chain research (Origin HQ, arxiv 2601.09625), Hermes now defends the context window against prompt-injection attacks that try to hijack the agent via tool output, recalled memory, or stored skills. Single source of truth (`tools/threat_patterns.py`) with ~15 new Brainworm/C2 patterns; recalled memory is scanned at load time; tool results get delimiter markers so a malicious file or remote service can't impersonate Hermes' own system content. Paired with a new `security-guidance` plugin that pattern-matches dangerous code writes. ([#32269](https://github.com/NousResearch/hermes-agent/pull/32269), [#33131](https://github.com/NousResearch/hermes-agent/pull/33131), [#9151](https://github.com/NousResearch/hermes-agent/pull/9151))
+
+- **Bitwarden Secrets Manager — one bootstrap token replaces every per-provider API key** — Stop keeping plaintext API keys in `~/.hermes/.env`. Install Bitwarden Secrets Manager (`bws` auto-installs lazily on first use), point Hermes at it with one bootstrap token (`BWS_ACCESS_TOKEN`), and every credential you need comes from Bitwarden at startup. Rotate a key in the Bitwarden web app and the rotation actually takes effect — Bitwarden defaults to source-of-truth so its values overwrite matching env vars on startup. Flip `secrets.bitwarden.override_existing: false` to invert. EU Cloud and self-hosted Bitwarden server URLs supported. Detected credentials are now labeled with their source so you can see at a glance which keys came from Bitwarden vs. the local env. ([#30035](https://github.com/NousResearch/hermes-agent/pull/30035), [#31378](https://github.com/NousResearch/hermes-agent/pull/31378), [#30364](https://github.com/NousResearch/hermes-agent/pull/30364))
+
+- **ntfy as the 23rd messaging platform — push notifications without an account** — ntfy is the self-hostable push-notification service with no signup, no API key, just a topic URL. Hermes now adapts to it as a platform plugin (zero edits to core), so your agent can send you push notifications from any cron job, kanban task completion, or chat `send_message` — to your phone, your watch, your desktop, your homelab. (salvages [#30625](https://github.com/NousResearch/hermes-agent/pull/30625) → originally [#4043](https://github.com/NousResearch/hermes-agent/pull/4043)) ([#30867](https://github.com/NousResearch/hermes-agent/pull/30867))
+
+- **Skill bundles — `/<name>` loads multiple skills at once** — A skill bundle is a named group of skills that loads them all together with one slash command. Set up your "writing day" bundle (humanizer + ideation + obsidian + youtube-content) and `/writing-day` activates all four for the session. Skills Hub now has health checks, a freshness badge, and a watchdog cron. Three new optional skills land: `code-wiki` (Karpathy's LLM-Wiki, persistent indexed dev wiki), `openhands` (delegate to OpenHands for parallel coding agents), and `web-pentest` (OWASP-style web pentest recipes). ([#28373](https://github.com/NousResearch/hermes-agent/pull/28373), [#32345](https://github.com/NousResearch/hermes-agent/pull/32345), [#32240](https://github.com/NousResearch/hermes-agent/pull/32240), [#32261](https://github.com/NousResearch/hermes-agent/pull/32261), [#32265](https://github.com/NousResearch/hermes-agent/pull/32265))
+
+- **TUI session orchestrator — multiple live sessions in one TUI window** — The Ink TUI gained an active-session switcher overlay. List, switch between, refresh, and close multiple live process-local sessions without leaving the TUI; dispatch a new session with a session-scoped model picker. Plus a wave of TUI polish — mouse-tracking DEC mode presets, scrollback preservation across branches and termux, slash-dropdown fixes, x.com link rendering, and CJK / IME input rendering improvements. (salvages [#27642](https://github.com/NousResearch/hermes-agent/pull/27642)) ([#32980](https://github.com/NousResearch/hermes-agent/pull/32980), [#30084](https://github.com/NousResearch/hermes-agent/pull/30084))
+
+- **Two new image_gen providers — Krea 2 Medium + Large, FAL ported to plugin** — Krea joins the image_gen lineup as a built-in plugin: `Krea 2 Medium` ($0.03) and `Krea 2 Large` ($0.06), auto-discovered, selectable via `hermes tools` → Image Generation → Krea. Available through both the native Krea plugin and the FAL.ai catalog. The FAL.ai backend got pulled out of the monolithic image-generation tool into `plugins/image_gen/fal/`, completing the four-way architectural parity already established by web, browser, and video_gen — new image providers are now one file, not a fork. ([#33236](https://github.com/NousResearch/hermes-agent/pull/33236), [#30380](https://github.com/NousResearch/hermes-agent/pull/30380), [#33506](https://github.com/NousResearch/hermes-agent/pull/33506))
+
+- **Nous-approved MCP catalog with interactive picker** — A curated catalog of Nous-vetted MCP servers, mirroring the optional-skills shape. Run `hermes mcp` and you get an interactive picker; install with one keystroke, credentials prompted at install time and written to `~/.hermes/.env`. Ships with the n8n manifest first. Closes the discovery gap that left users hunting GitHub for trusted MCP servers. ([#30870](https://github.com/NousResearch/hermes-agent/pull/30870))
+
+- **OpenHands orchestration skill** — A new optional skill under `optional-skills/autonomous-ai-agents/openhands/` lets the agent delegate coding tasks to the OpenHands CLI alongside `claude-code`, `codex`, and `opencode`. OpenHands is the model-agnostic member of that family — any LiteLLM-supported provider works (OpenAI, Anthropic, OpenRouter, your own), so you can route a sub-task to the cheapest model that can finish it. Drop-in worker for kanban swarms and `/delegate` flows. (closes [#477](https://github.com/NousResearch/hermes-agent/issues/477)) ([#32261](https://github.com/NousResearch/hermes-agent/pull/32261))
+
+- **Deep xAI integration round — Web Search plugin, OAuth proxy upstream, May 15 retirement detection, natural TTS, security hardening** — Six interlocking xAI improvements:
+    - **xAI Web Search** lands as a `plugins/web/xai/` provider, slots alongside Brave / Tavily / Exa / SearXNG / DDGS / Firecrawl — reuses your existing Grok OAuth or `XAI_API_KEY` credentials, no new env vars. ([#29042](https://github.com/NousResearch/hermes-agent/pull/29042))
+    - **`hermes proxy` gains an xAI upstream** — your local OpenAI-compatible endpoint can now be backed by SuperGrok OAuth, no PKCE-refresh code to write in your client. ([#28356](https://github.com/NousResearch/hermes-agent/pull/28356))
+    - **May 15 model retirement detection** — `grok-4`, `grok-4-fast{,-reasoning,-non-reasoning}`, `grok-3`, `grok-code-fast-1`, `grok-imagine-image-pro` etc. are detected in doctor and chat startup, with `hermes migrate xai` to one-shot config migration to the supported model. No more silent 404s after the retirement date. ([#29277](https://github.com/NousResearch/hermes-agent/pull/29277))
+    - **Opt-in `auto_speech_tags`** for xAI TTS — inserts light `[pause]` tags between paragraphs and sentences for more natural-sounding voice replies. Default OFF. ([#29376](https://github.com/NousResearch/hermes-agent/pull/29376))
+    - **`xai-oauth` `base_url` pinned to `x.ai` origin** — closes a silent credential-leak vector where `XAI_BASE_URL` could repoint OAuth-authenticated inference to an attacker-controlled host. ([#28952](https://github.com/NousResearch/hermes-agent/pull/28952))
+    - **OpenAI-style execution guidance applied to Grok models** — Grok and xai-oauth now get the same family-specific execution discipline block GPT/Codex have, so the model stops claiming completion without tool calls and stops suggesting workarounds instead of using existing tools. ([#27797](https://github.com/NousResearch/hermes-agent/pull/27797))
+    - Plus `x_search` degraded-results surfacing, tier-gated 403 with API-key fallback, PKCE `code_challenge` round-trip fix, dead-token quarantine on terminal refresh failure, MiniMax-style short-token refresh on per-request, and `WKE=unauthenticated` honor at both classifier sites. ([#29484](https://github.com/NousResearch/hermes-agent/pull/29484), [#28351](https://github.com/NousResearch/hermes-agent/pull/28351), [#27560](https://github.com/NousResearch/hermes-agent/pull/27560), [#28116](https://github.com/NousResearch/hermes-agent/pull/28116), [#30619](https://github.com/NousResearch/hermes-agent/pull/30619), [#30872](https://github.com/NousResearch/hermes-agent/pull/30872))
+
+---
+
+## 🏗️ Core Agent & Architecture
+
+### The Big Refactor — `run_agent.py` 16k → 3.8k
+
+- `run_agent.py` from 16,083 → 3,821 lines (-76%), extracted into 14 cohesive `agent/*` modules. `run_conversation` alone was 3,877 lines before the refactor. Every extraction keeps a thin forwarder on `AIAgent`, every test-patch path is preserved, every external caller stays compatible. ([#27248](https://github.com/NousResearch/hermes-agent/pull/27248))
+
+### Agent loop & conversation
+
+- Auxiliary task layered fallback (primary → chain → main agent → graceful fail) on capacity errors (402/429/connection). (salvages [#26811](https://github.com/NousResearch/hermes-agent/pull/26811) + [#26998](https://github.com/NousResearch/hermes-agent/pull/26998)) ([#27625](https://github.com/NousResearch/hermes-agent/pull/27625))
+- Buffer retry/fallback status; surface only on terminal failure (no more noisy "retrying..." spam in mid-run output). ([#33816](https://github.com/NousResearch/hermes-agent/pull/33816))
+- Host contract for external context engines — condenses 5 prior PRs into one extension surface. ([#33750](https://github.com/NousResearch/hermes-agent/pull/33750))
+- Fallback immediately on provider content-policy blocks. ([#33883](https://github.com/NousResearch/hermes-agent/pull/33883))
+- Re-pad `reasoning_content` on cross-provider fallback to require-side providers. (salvage [#33784](https://github.com/NousResearch/hermes-agent/pull/33784)) ([#33795](https://github.com/NousResearch/hermes-agent/pull/33795))
+- Per-turn tool-outcome verifier — patch tool gets indent preservation, CRLF preservation, per-file failure escalation. ([#32273](https://github.com/NousResearch/hermes-agent/pull/32273))
+- Single-knob native vision for custom-provider models. ([#29679](https://github.com/NousResearch/hermes-agent/pull/29679))
+- Background review fork isolated from external memory plugins. ([#27190](https://github.com/NousResearch/hermes-agent/pull/27190))
+- Background review inherits parent toolset config for `tools[]` cache parity. ([#29704](https://github.com/NousResearch/hermes-agent/pull/29704))
+- Recover from providers returning list-type tool content. ([#30259](https://github.com/NousResearch/hermes-agent/pull/30259))
+- Treat partial-stream stub responses as length truncation rather than clean stop. ([#30998](https://github.com/NousResearch/hermes-agent/pull/30998))
+- OpenAI execution guidance applied to xAI Grok / xai-oauth. ([#27797](https://github.com/NousResearch/hermes-agent/pull/27797))
+- ContextVars propagate to concurrent tool worker threads.
+- Preload `jiter` native parser. ([#33692](https://github.com/NousResearch/hermes-agent/pull/33692))
+- Expose context engine tools with saved toolsets. (salvage of [#31194](https://github.com/NousResearch/hermes-agent/pull/31194)) ([#33719](https://github.com/NousResearch/hermes-agent/pull/33719))
+
+### Sessions & memory
+
+- `session_search` rebuilt — single-shape (discovery + scroll + browse), no aux-LLM, ~20ms vs. ~90s. ([#27590](https://github.com/NousResearch/hermes-agent/pull/27590))
+- Salvage [#29182](https://github.com/NousResearch/hermes-agent/pull/29182) — opt-in JSON snapshot writer for sessions. ([#29278](https://github.com/NousResearch/hermes-agent/pull/29278))
+- Persist `platform_message_id` for recall across gateway restarts. ([#29449](https://github.com/NousResearch/hermes-agent/pull/29449))
+- Inline memory-context mentions stay visible in conversation. ([#28132](https://github.com/NousResearch/hermes-agent/pull/28132))
+- Recalled memory labeled informational, not authoritative. ([#28583](https://github.com/NousResearch/hermes-agent/pull/28583))
+- Memory + context-engine tool injection gated on `enabled_toolsets`. ([#30177](https://github.com/NousResearch/hermes-agent/pull/30177))
+- Guard against external drift in `MEMORY.md` / `USER.md`. ([#30877](https://github.com/NousResearch/hermes-agent/pull/30877))
+- Honcho runtime peer mapping — correctness follow-ups + setup wizard + docs. ([#30077](https://github.com/NousResearch/hermes-agent/pull/30077))
+- Periodic memory logging for leak detection. (salvage of [#17667](https://github.com/NousResearch/hermes-agent/pull/17667)) ([#27102](https://github.com/NousResearch/hermes-agent/pull/27102))
+
+### Codex / Responses-API maturation
+
+- TTFB watchdog for stalled Codex Responses streams. ([#32042](https://github.com/NousResearch/hermes-agent/pull/32042))
+- Actionable hint when stale-call detector fires on known silent-reject pattern. ([#32016](https://github.com/NousResearch/hermes-agent/pull/32016), [#33133](https://github.com/NousResearch/hermes-agent/pull/33133))
+- Drop SDK `responses.stream()` helper; consume events directly. ([#33042](https://github.com/NousResearch/hermes-agent/pull/33042))
+- Gracefully recover from `invalid_encrypted_content`. (salvage of [#10144](https://github.com/NousResearch/hermes-agent/pull/10144)) ([#33035](https://github.com/NousResearch/hermes-agent/pull/33035))
+- Recover Codex Responses streams with null output. ([#32963](https://github.com/NousResearch/hermes-agent/pull/32963), [#33390](https://github.com/NousResearch/hermes-agent/pull/33390))
+- Drop foreign-issuer reasoning and transient `rs_tmp` reasoning replay state. ([#33156](https://github.com/NousResearch/hermes-agent/pull/33156), [#33146](https://github.com/NousResearch/hermes-agent/pull/33146))
+- Codex 429 quota classified as rate-limit, not missing credentials. ([#33168](https://github.com/NousResearch/hermes-agent/pull/33168))
+- Codex chat path falls back to credential_pool when singleton is empty. ([#33189](https://github.com/NousResearch/hermes-agent/pull/33189))
+- Codex re-auth syncs credential_pool. ([#33164](https://github.com/NousResearch/hermes-agent/pull/33164))
+- Omit `tools` key when no tools registered. ([#33409](https://github.com/NousResearch/hermes-agent/pull/33409))
+- Parse Codex image-generation SSE directly. ([#32933](https://github.com/NousResearch/hermes-agent/pull/32933))
+
+---
+
+## 🎛️ Kanban — Multi-Agent Maturation Wave
+
+### Orchestration & dispatch
+
+- Orchestrator-driven auto-decomposition on triage. ([#27572](https://github.com/NousResearch/hermes-agent/pull/27572))
+- Kanban swarm topology helper — `hermes kanban swarm` creates a Swarm v1 graph (root + parallel workers + gated verifier + gated synthesizer + shared blackboard). (salvages [#26791](https://github.com/NousResearch/hermes-agent/pull/26791) by @Niraven) ([#28443](https://github.com/NousResearch/hermes-agent/pull/28443))
+- Dispatcher wires review agents from the review column. ([#28449](https://github.com/NousResearch/hermes-agent/pull/28449))
+- Stale-detection for running tasks in dispatcher. ([#28452](https://github.com/NousResearch/hermes-agent/pull/28452))
+- Respawn guard blocks repeat worker storms. ([#28455](https://github.com/NousResearch/hermes-agent/pull/28455))
+- Respawn guard defers `blocker_auth` instead of auto-blocking. ([#28683](https://github.com/NousResearch/hermes-agent/pull/28683))
+- Cross-profile cron jobs surface in dashboard. ([#28457](https://github.com/NousResearch/hermes-agent/pull/28457))
+- Worker visibility endpoints: `/workers/active`, `/runs/{id}`, `/inspect`. (salvages [#23761](https://github.com/NousResearch/hermes-agent/pull/23761) by @Interstellar-code) ([#28432](https://github.com/NousResearch/hermes-agent/pull/28432))
+
+### Task configuration & scheduling
+
+- Per-task model override. ([#28364](https://github.com/NousResearch/hermes-agent/pull/28364))
+- Board-level default workdir. ([#28394](https://github.com/NousResearch/hermes-agent/pull/28394))
+- Configurable worktree paths and branches. ([#28462](https://github.com/NousResearch/hermes-agent/pull/28462))
+- Scheduled task start times. ([#28384](https://github.com/NousResearch/hermes-agent/pull/28384))
+- Scheduled status for delayed follow-ups. ([#28467](https://github.com/NousResearch/hermes-agent/pull/28467))
+- Trimmed task comments. ([#28399](https://github.com/NousResearch/hermes-agent/pull/28399))
+- Initial-status for human-ops cards. ([#28414](https://github.com/NousResearch/hermes-agent/pull/28414))
+- `max_in_progress` config to cap concurrent running tasks. ([#28420](https://github.com/NousResearch/hermes-agent/pull/28420))
+- Filter tasks by workflow fields. ([#28454](https://github.com/NousResearch/hermes-agent/pull/28454))
+- `--sort` for `hermes kanban list`. ([#28427](https://github.com/NousResearch/hermes-agent/pull/28427))
+- Optional `board` parameter on all MCP tools. ([#28444](https://github.com/NousResearch/hermes-agent/pull/28444))
+- Stamp originating ACP session_id on tasks. ([#28447](https://github.com/NousResearch/hermes-agent/pull/28447))
+- `auto_promote_children` config toggle. ([#28344](https://github.com/NousResearch/hermes-agent/pull/28344))
+- `archive --rm` to hard-delete archived tasks. ([#28355](https://github.com/NousResearch/hermes-agent/pull/28355))
+- Promote dependents when parent is archived. ([#28372](https://github.com/NousResearch/hermes-agent/pull/28372))
+- Promote blocked tasks when parent dependencies complete. ([#28377](https://github.com/NousResearch/hermes-agent/pull/28377))
+- Demote ready children when parent is reopened. ([#28382](https://github.com/NousResearch/hermes-agent/pull/28382))
+- `promote` verb for manual `todo→ready` recovery + bulk `--ids`. (salvage [#29464](https://github.com/NousResearch/hermes-agent/pull/29464)) ([#31334](https://github.com/NousResearch/hermes-agent/pull/31334))
+
+### Dashboard
+
+- Drag-to-delete trash zone + bulk delete. ([#28468](https://github.com/NousResearch/hermes-agent/pull/28468))
+- Surface per-task `model_override` in show + tool output. ([#28442](https://github.com/NousResearch/hermes-agent/pull/28442))
+- Cross-profile notification delivery via `kanban.notification_sources`. ([#28395](https://github.com/NousResearch/hermes-agent/pull/28395))
+- Scratch-workspace deletion warning for users. ([#30949](https://github.com/NousResearch/hermes-agent/pull/30949))
+- Mobile dashboard UX polish. ([#28127](https://github.com/NousResearch/hermes-agent/pull/28127))
+
+### Reliability
+
+- Worker log retention configurable. ([#27867](https://github.com/NousResearch/hermes-agent/pull/27867))
+- Configurable claim TTL. ([#28392](https://github.com/NousResearch/hermes-agent/pull/28392))
+- Fingerprint crash errors to prevent fleet-wide retry exhaustion. ([#28380](https://github.com/NousResearch/hermes-agent/pull/28380))
+- Reset failure counters on `unblock_task`. ([#28379](https://github.com/NousResearch/hermes-agent/pull/28379))
+- Detect cycles in `decompose_triage_task` sibling-link pre-validation. ([#28088](https://github.com/NousResearch/hermes-agent/pull/28088))
+- Surface unusable triage auxiliary model (auto-decompose aware). ([#27871](https://github.com/NousResearch/hermes-agent/pull/27871))
+- Align failure diagnostics with retry limit. ([#27868](https://github.com/NousResearch/hermes-agent/pull/27868))
+- Align worker terminal timeout with task runtime. ([#27864](https://github.com/NousResearch/hermes-agent/pull/27864))
+- Auto-install bundled skills (kanban-worker) on init. ([#28368](https://github.com/NousResearch/hermes-agent/pull/28368))
+- Make legacy task migration idempotent. ([#28397](https://github.com/NousResearch/hermes-agent/pull/28397))
+- Serialize DB initialization. ([#28383](https://github.com/NousResearch/hermes-agent/pull/28383))
+- Persist worker session metadata on completion. ([#28387](https://github.com/NousResearch/hermes-agent/pull/28387))
+- Pass `accept-hooks` to worker chat subprocess. ([#28393](https://github.com/NousResearch/hermes-agent/pull/28393))
+- Preserve worker tools with restricted toolsets. ([#28396](https://github.com/NousResearch/hermes-agent/pull/28396))
+- Avoid unsafe Windows worker Hermes shim resolution. ([#28398](https://github.com/NousResearch/hermes-agent/pull/28398))
+- Sync slash subcommands with live parser. ([#28376](https://github.com/NousResearch/hermes-agent/pull/28376))
+- Show scheduled kanban tasks in dashboard. ([#28400](https://github.com/NousResearch/hermes-agent/pull/28400))
+- Assign single-task kanban decompositions. ([#28401](https://github.com/NousResearch/hermes-agent/pull/28401))
+- Configurable `max_tokens` for kanban specify. ([#28374](https://github.com/NousResearch/hermes-agent/pull/28374))
+- Per-job profile support for cron. ([#28124](https://github.com/NousResearch/hermes-agent/pull/28124))
+- Codex app-server: include every Kanban-pinned path in `writable_roots`. ([#28435](https://github.com/NousResearch/hermes-agent/pull/28435))
+- Cache kanban worker guidance at session init for prompt-cache reuse. ([#28425](https://github.com/NousResearch/hermes-agent/pull/28425))
+
+---
+
+## ⚡ Performance
+
+- `openai._base_client` import deferred — 240ms / 17MB off every CLI cold start. ([#28864](https://github.com/NousResearch/hermes-agent/pull/28864))
+- Agent-loop hot-path optimizations — 47% fewer per-conversation function calls (399k → 213k for 31-turn chat). ([#28866](https://github.com/NousResearch/hermes-agent/pull/28866))
+- Compression-feasibility check deferred — 170-290ms off every agent construction. ([#28957](https://github.com/NousResearch/hermes-agent/pull/28957))
+- Adaptive subprocess poll — ~195ms off every tool call, 1+ second per turn. ([#29006](https://github.com/NousResearch/hermes-agent/pull/29006))
+- Termux TUI cold start speedup. ([#29419](https://github.com/NousResearch/hermes-agent/pull/29419))
+- Termux non-TUI cold start speedup. (salvage [#29438](https://github.com/NousResearch/hermes-agent/pull/29438)) ([#30121](https://github.com/NousResearch/hermes-agent/pull/30121))
+- Termux fast-path version + deferred bare-prompt agent startup. ([#30609](https://github.com/NousResearch/hermes-agent/pull/30609))
+- Cut hermes `--version` wall time 63% — flips head-to-head vs Codex CLI. ([#31968](https://github.com/NousResearch/hermes-agent/pull/31968))
+- Date-only timestamp + loud gateway-DB roundtrip logging — improves prompt-cache hit rate. ([#27675](https://github.com/NousResearch/hermes-agent/pull/27675))
+- Cache kanban worker guidance at session init for prompt-cache reuse. ([#28425](https://github.com/NousResearch/hermes-agent/pull/28425))
+
+---
+
+## 🔧 Tool System
+
+### Tool surface
+
+- `patch`: indent preservation, CRLF preservation, per-file failure escalation. ([#32273](https://github.com/NousResearch/hermes-agent/pull/32273))
+- `terminal`: warn at call time when `background=true` runs silently. ([#31289](https://github.com/NousResearch/hermes-agent/pull/31289))
+- `terminal`: nudge homebrewed CI pollers at the tool surface. ([#33142](https://github.com/NousResearch/hermes-agent/pull/33142))
+- `x_search`: surface degraded results + validate dates. ([#29484](https://github.com/NousResearch/hermes-agent/pull/29484))
+- `x_search`: auto-enable toolset when xAI credentials are configured. ([#27376](https://github.com/NousResearch/hermes-agent/pull/27376))
+- `computer_use`: route SOM/vision captures via auxiliary.vision. ([#30126](https://github.com/NousResearch/hermes-agent/pull/30126))
+- `transcription`: reject symlinked audio inputs. ([#10082](https://github.com/NousResearch/hermes-agent/pull/10082))
+- TTS: prevent double `[pause]` in xAI auto speech tags. ([#32237](https://github.com/NousResearch/hermes-agent/pull/32237))
+- TTS: preserve native audio outside Telegram voice delivery. ([#28512](https://github.com/NousResearch/hermes-agent/pull/28512))
+- TTS: opt-in xAI `auto_speech_tags` speech-tag pauses for natural voice replies. ([#29376](https://github.com/NousResearch/hermes-agent/pull/29376))
+- Voice: chunk oversized CLI recordings. ([#30044](https://github.com/NousResearch/hermes-agent/pull/30044))
+- Voice: honor `PULSE_SERVER` / `PIPEWIRE_REMOTE` inside Docker. ([#22534](https://github.com/NousResearch/hermes-agent/pull/22534))
+
+### Browser
+
+- All cloud browser providers (Browserbase, Anchor, Camofox, Hyperbrowser, etc.) migrated to image_gen-style plugins. (salvages [#25580](https://github.com/NousResearch/hermes-agent/pull/25580)) ([#27403](https://github.com/NousResearch/hermes-agent/pull/27403))
+- Auto-launch Chromium-family browser for CDP. ([#29106](https://github.com/NousResearch/hermes-agent/pull/29106))
+- Docker: discover agent-browser Chromium binary at boot. ([#33184](https://github.com/NousResearch/hermes-agent/pull/33184))
+
+### Image generation
+
+- **Krea** provider plugin (Krea 2 Medium + Large). ([#33236](https://github.com/NousResearch/hermes-agent/pull/33236))
+- FAL backend ported to `plugins/image_gen/fal`. (salvage [#27966](https://github.com/NousResearch/hermes-agent/pull/27966)) ([#30380](https://github.com/NousResearch/hermes-agent/pull/30380))
+- Cache xAI ephemeral URL responses to disk. ([#31759](https://github.com/NousResearch/hermes-agent/pull/31759))
+
+### Web search
+
+- **xAI Web Search** as a provider plugin. ([#29042](https://github.com/NousResearch/hermes-agent/pull/29042))
+
+### MCP
+
+- **Nous-approved MCP catalog** with interactive picker. ([#30870](https://github.com/NousResearch/hermes-agent/pull/30870))
+- **TLS client certificate (mTLS) support** for HTTP and SSE MCP servers. ([#33721](https://github.com/NousResearch/hermes-agent/pull/33721))
+- Stdin paste-back fallback for headless OAuth flow. ([#32053](https://github.com/NousResearch/hermes-agent/pull/32053))
+- `skip` at paste prompt bypasses auth without disabling server. ([#32069](https://github.com/NousResearch/hermes-agent/pull/32069))
+- Registry-aware `mcp_` prefix on both ends of round-trip. ([#31700](https://github.com/NousResearch/hermes-agent/pull/31700))
+
+---
+
+## 🧩 Skills Ecosystem
+
+### Skills system
+
+- **Skill bundles** — `/<name>` loads multiple skills. ([#28373](https://github.com/NousResearch/hermes-agent/pull/28373))
+- Skills Hub: health checks, freshness badge, and a watchdog cron. ([#32345](https://github.com/NousResearch/hermes-agent/pull/32345))
+- Opt-in AST deep diagnostics on skill writes. (salvage of [#30918](https://github.com/NousResearch/hermes-agent/pull/30918)) ([#31198](https://github.com/NousResearch/hermes-agent/pull/31198))
+- Bundled/pinned skill protection in background-review prompts. ([#28338](https://github.com/NousResearch/hermes-agent/pull/28338))
+- Show user-modified skill names in bundled skill sync summary. ([#28671](https://github.com/NousResearch/hermes-agent/pull/28671))
+- Load symlinked skill slash commands. ([#27759](https://github.com/NousResearch/hermes-agent/pull/27759))
+- Deduplicate Skills Hub search results by identifier, not name. ([#29490](https://github.com/NousResearch/hermes-agent/pull/29490))
+
+### New skills
+
+- `openhands` — delegate-to-OpenHands orchestration skill (closes [#477](https://github.com/NousResearch/hermes-agent/issues/477)) ([#32261](https://github.com/NousResearch/hermes-agent/pull/32261))
+- `code-wiki` — persistent indexed dev wiki (closes [#486](https://github.com/NousResearch/hermes-agent/issues/486)) ([#32240](https://github.com/NousResearch/hermes-agent/pull/32240))
+- `web-pentest` — OWASP recipes (closes [#400](https://github.com/NousResearch/hermes-agent/issues/400)) ([#32265](https://github.com/NousResearch/hermes-agent/pull/32265))
+- `baoyu-article-illustrator` ([#28287](https://github.com/NousResearch/hermes-agent/pull/28287))
+
+---
+
+## ☁️ Providers
+
+### xAI deep integration
+
+- **xAI Web Search** as a `plugins/web/xai/` provider plugin. ([#29042](https://github.com/NousResearch/hermes-agent/pull/29042))
+- **`hermes proxy` xAI upstream** — OpenAI-compatible local proxy backed by xai-oauth. ([#28356](https://github.com/NousResearch/hermes-agent/pull/28356))
+- **May 15 model retirement detection + `hermes migrate xai`** for grok-4 / grok-3 / grok-code-fast-1 / grok-imagine-image-pro. ([#29277](https://github.com/NousResearch/hermes-agent/pull/29277))
+- **Opt-in `auto_speech_tags`** for natural xAI TTS voice replies. ([#29376](https://github.com/NousResearch/hermes-agent/pull/29376))
+- **xai-oauth base_url pinned to x.ai origin** — closes silent credential-leak vector. ([#28952](https://github.com/NousResearch/hermes-agent/pull/28952))
+- **OpenAI-style execution guidance** applied to Grok / xai-oauth models. ([#27797](https://github.com/NousResearch/hermes-agent/pull/27797))
+- xAI: detect retired May 15 models in doctor/chat startup. ([#29277](https://github.com/NousResearch/hermes-agent/pull/29277))
+- xAI: resolve Grok Build context for OAuth. ([#30579](https://github.com/NousResearch/hermes-agent/pull/30579))
+- xAI OAuth: tier-gated 403 with API-key fallback. ([#28351](https://github.com/NousResearch/hermes-agent/pull/28351))
+- xAI OAuth: PKCE `code_challenge` echo. ([#27560](https://github.com/NousResearch/hermes-agent/pull/27560))
+- xAI OAuth: quarantine dead tokens on terminal refresh failure. ([#28116](https://github.com/NousResearch/hermes-agent/pull/28116))
+- xAI OAuth: honor `WKE=unauthenticated` disambiguator at both classifier sites. ([#30872](https://github.com/NousResearch/hermes-agent/pull/30872))
+- xAI OAuth: accept bare-code manual paste (state=None). (closes [#26923](https://github.com/NousResearch/hermes-agent/issues/26923)) ([#33880](https://github.com/NousResearch/hermes-agent/pull/33880))
+- xAI OAuth: fall back to manual paste on loopback timeout. ([#33231](https://github.com/NousResearch/hermes-agent/pull/33231))
+- xAI proxy: handle 429 rate-limit responses in proxy retry path. ([#33743](https://github.com/NousResearch/hermes-agent/pull/33743))
+
+### Other providers
+
+- **OpenAI API as a first-class provider** (distinct from Codex runtime). ([#31898](https://github.com/NousResearch/hermes-agent/pull/31898))
+- **Microsoft Entra ID** auth for Azure Foundry (with 1M Anthropic-Messages beta preserved on Bearer). (salvages [#27509](https://github.com/NousResearch/hermes-agent/pull/27509), [#27022](https://github.com/NousResearch/hermes-agent/pull/27022)) ([#28101](https://github.com/NousResearch/hermes-agent/pull/28101), [#28084](https://github.com/NousResearch/hermes-agent/pull/28084))
+- **OpenRouter** sticky routing — `session_id` passed via `extra_body` so a long-running session keeps landing on the same upstream provider. (@Cybourgeoisie) ([#33939](https://github.com/NousResearch/hermes-agent/pull/33939))
+- Nous: JWT token for inference; stop replaying invalid Nous refresh tokens. (@rewbs) ([#27663](https://github.com/NousResearch/hermes-agent/pull/27663))
+- Nous Portal: one-shot setup, status CLI, and Nous-included markers. ([#30860](https://github.com/NousResearch/hermes-agent/pull/30860))
+- Anthropic adapter: extract 7 helpers from `convert_messages_to_anthropic`. (salvage [#27784](https://github.com/NousResearch/hermes-agent/pull/27784)) ([#30386](https://github.com/NousResearch/hermes-agent/pull/30386))
+- Catalog: add `qwen3.7-max` to Alibaba + Alibaba-Coding-Plan model lists. ([#33129](https://github.com/NousResearch/hermes-agent/pull/33129))
+- opencode-go: route `qwen3.7-max` via `anthropic_messages`. (@beardthelion) ([#32780](https://github.com/NousResearch/hermes-agent/pull/32780))
+- opencode-go: expose Kimi K2 + DeepSeek reasoning controls. ([#30845](https://github.com/NousResearch/hermes-agent/pull/30845))
+- Remove Vercel AI Gateway and Vercel Sandbox.
+- MiniMax OAuth: refresh short-lived access tokens per request. ([#30619](https://github.com/NousResearch/hermes-agent/pull/30619))
+- Codex OAuth: quarantine terminal refresh errors. ([#28118](https://github.com/NousResearch/hermes-agent/pull/28118))
+- Codex: drop dead model slugs that HTTP 400 on ChatGPT Pro. ([#33424](https://github.com/NousResearch/hermes-agent/pull/33424))
+- Codex: sync `manual:device_code` pool entries on re-auth. ([#33744](https://github.com/NousResearch/hermes-agent/pull/33744))
+- MiniMax OAuth: quarantine terminal refresh errors. ([#28119](https://github.com/NousResearch/hermes-agent/pull/28119))
+
+---
+
+## 🔑 Secrets
+
+- **Bitwarden Secrets Manager** integration with lazy `bws` install. ([#30035](https://github.com/NousResearch/hermes-agent/pull/30035))
+- Bitwarden: EU Cloud + self-hosted server URL support. ([#31378](https://github.com/NousResearch/hermes-agent/pull/31378))
+- Label detected credentials with their source (Bitwarden). ([#30364](https://github.com/NousResearch/hermes-agent/pull/30364))
+
+---
+
+## 📱 Messaging Platforms (Gateway)
+
+### Gateway core
+
+- **Deliverable mode** — agents ship artifacts as native uploads from any platform (Slack/Discord/Telegram/Teams/Email). ([#27813](https://github.com/NousResearch/hermes-agent/pull/27813))
+- `hermes send` — pipe any script's output to any messaging platform. (salvage of [#19631](https://github.com/NousResearch/hermes-agent/pull/19631)) ([#27188](https://github.com/NousResearch/hermes-agent/pull/27188))
+- Debounce queued text follow-ups during active sessions. (salvage of [#31235](https://github.com/NousResearch/hermes-agent/pull/31235)) ([#31341](https://github.com/NousResearch/hermes-agent/pull/31341))
+- Plugin-transformed final_response delivered through streaming gate. ([#31433](https://github.com/NousResearch/hermes-agent/pull/31433))
+- Refresh cached agent tools on `/reload-mcp`. ([#32815](https://github.com/NousResearch/hermes-agent/pull/32815))
+- Harden kanban + provider cleanup races on long-running workloads. ([#29479](https://github.com/NousResearch/hermes-agent/pull/29479))
+
+### New / reorganized adapters
+
+- **ntfy** — 23rd platform, push notifications, plugin shape, zero core edits. (salvages [#30625](https://github.com/NousResearch/hermes-agent/pull/30625) → [#4043](https://github.com/NousResearch/hermes-agent/pull/4043)) ([#30867](https://github.com/NousResearch/hermes-agent/pull/30867))
+- **Discord** adapter migrated to bundled plugin. (salvage of [#24356](https://github.com/NousResearch/hermes-agent/pull/24356)) ([#30591](https://github.com/NousResearch/hermes-agent/pull/30591))
+- **Mattermost** adapter migrated to bundled plugin. (salvage of [#30916](https://github.com/NousResearch/hermes-agent/pull/30916)) ([#31748](https://github.com/NousResearch/hermes-agent/pull/31748))
+
+### Telegram
+
+- Edit status messages in place instead of appending. (based on [#30141](https://github.com/NousResearch/hermes-agent/pull/30141) by @qike-ms) ([#30864](https://github.com/NousResearch/hermes-agent/pull/30864))
+- Skip-STT audio path + 2GB cap via local Bot API server. ([#28541](https://github.com/NousResearch/hermes-agent/pull/28541))
+- Route image documents (.png/.jpg/.webp/.gif) through vision pipeline. ([#28519](https://github.com/NousResearch/hermes-agent/pull/28519))
+- Route audio file attachments away from STT pipeline. ([#28478](https://github.com/NousResearch/hermes-agent/pull/28478))
+- `disable_topic_auto_rename` gateway flag. ([#28523](https://github.com/NousResearch/hermes-agent/pull/28523))
+- `ignore_root_dm` config to drop messages without thread_id. ([#28536](https://github.com/NousResearch/hermes-agent/pull/28536))
+- Chat-scoped auth without sender user_id. ([#28525](https://github.com/NousResearch/hermes-agent/pull/28525))
+- Fail-closed auth fallback when `TELEGRAM_ALLOWED_USERS` is empty. ([#28494](https://github.com/NousResearch/hermes-agent/pull/28494))
+- Roll over tool progress bubbles + scope audio_file_paths. ([#28482](https://github.com/NousResearch/hermes-agent/pull/28482))
+- Avoid duplicate text after auto-TTS voice replies. ([#28509](https://github.com/NousResearch/hermes-agent/pull/28509))
+- Mark final voice reply notify-worthy so Telegram delivers it audibly. ([#28504](https://github.com/NousResearch/hermes-agent/pull/28504))
+
+### Discord
+
+- Recover Windows voice opus decoding. ([#33182](https://github.com/NousResearch/hermes-agent/pull/33182))
+- `allow_any_attachment` config to accept arbitrary file types. ([#27245](https://github.com/NousResearch/hermes-agent/pull/27245))
+- Transcribe native voice notes. ([#28993](https://github.com/NousResearch/hermes-agent/pull/28993))
+- Define UI view classes after lazy install. ([#28817](https://github.com/NousResearch/hermes-agent/pull/28817))
+
+### Signal / Matrix / Feishu / Slack / WeCom
+
+- Signal: `require_mention` filter for group chats. ([#28574](https://github.com/NousResearch/hermes-agent/pull/28574))
+- Matrix: warn on clock-skew silent message drops. ([#27330](https://github.com/NousResearch/hermes-agent/pull/27330))
+- Matrix E2EE installs full dep set; plugins respect `is_connected`. ([#31688](https://github.com/NousResearch/hermes-agent/pull/31688))
+- Feishu: require webhook auth secret + honor config extras. ([#30746](https://github.com/NousResearch/hermes-agent/pull/30746))
+- Feishu: enforce auth and chat binding for approval buttons. ([#30744](https://github.com/NousResearch/hermes-agent/pull/30744))
+- Slack: socket recovery + Windows restart dedupe. ([#28873](https://github.com/NousResearch/hermes-agent/pull/28873))
+- WeCom: safe-parse untrusted XML. ([#32442](https://github.com/NousResearch/hermes-agent/pull/32442))
+
+### DingTalk / Webhooks / Microsoft Graph
+
+- DingTalk: transcribe native voice notes. ([#28993](https://github.com/NousResearch/hermes-agent/pull/28993))
+- Webhook: enforce `INSECURE_NO_AUTH` safety rail on dynamic route reloads. ([#30863](https://github.com/NousResearch/hermes-agent/pull/30863))
+- Webhook: restrict default toolset capabilities. ([#30745](https://github.com/NousResearch/hermes-agent/pull/30745))
+- Microsoft Graph: harden webhook auth requirements. ([#30169](https://github.com/NousResearch/hermes-agent/pull/30169))
+
+---
+
+## 🖥️ CLI & TUI
+
+### CLI
+
+- `/update` slash command in CLI and TUI. ([#23854](https://github.com/NousResearch/hermes-agent/pull/23854))
+- Update auto-rollback when post-pull syntax check fails. ([#28669](https://github.com/NousResearch/hermes-agent/pull/28669))
+- `--branch` flag for `hermes update`. (@jquesnelle) ([#29591](https://github.com/NousResearch/hermes-agent/pull/29591))
+- `/exit --delete` flag to remove session on quit. (salvage of [#17665](https://github.com/NousResearch/hermes-agent/pull/17665)) ([#27101](https://github.com/NousResearch/hermes-agent/pull/27101))
+- `▶ N` indicator in status bar for running `/background` tasks. ([#27175](https://github.com/NousResearch/hermes-agent/pull/27175))
+- Live background terminal-process count in status bar. ([#32061](https://github.com/NousResearch/hermes-agent/pull/32061))
+- Append session recap to `/status` output. (salvage of [#18587](https://github.com/NousResearch/hermes-agent/pull/18587)) ([#27176](https://github.com/NousResearch/hermes-agent/pull/27176))
+- Configurable paste-collapse thresholds (TUI + CLI). (salvage [#29723](https://github.com/NousResearch/hermes-agent/pull/29723)) ([#32087](https://github.com/NousResearch/hermes-agent/pull/32087))
+- `/resume` accepts position numbers. ([#31709](https://github.com/NousResearch/hermes-agent/pull/31709))
+- Bring tool-call display back — verbose mode, specific failure reasons, todo progress. ([#31293](https://github.com/NousResearch/hermes-agent/pull/31293))
+- Validate runtime token refresh in Qwen auth status. ([#31196](https://github.com/NousResearch/hermes-agent/pull/31196))
+
+### TUI
+
+- **TUI session orchestrator** — multiple live sessions in one TUI window. (salvages [#27642](https://github.com/NousResearch/hermes-agent/pull/27642)) ([#32980](https://github.com/NousResearch/hermes-agent/pull/32980))
+- `mouse_tracking` DEC mode presets. (salvage of [#26681](https://github.com/NousResearch/hermes-agent/pull/26681) by @OutThisLife) ([#30084](https://github.com/NousResearch/hermes-agent/pull/30084))
+- Termux scrollback preservation + touch-friendly defaults. ([#28910](https://github.com/NousResearch/hermes-agent/pull/28910))
+- Full assistant text in scrollback (no history truncation). ([#28829](https://github.com/NousResearch/hermes-agent/pull/28829))
+- Preserve scrollback when branching sessions. ([#30162](https://github.com/NousResearch/hermes-agent/pull/30162))
+- Preserve Python dunder identifiers in markdown. ([#28582](https://github.com/NousResearch/hermes-agent/pull/28582))
+- Active profile shown in TUI prompt. ([#28581](https://github.com/NousResearch/hermes-agent/pull/28581))
+- Improve Charizard completion menu contrast. ([#28346](https://github.com/NousResearch/hermes-agent/pull/28346))
+- Stop slash dropdown chopping last char of `/goal`. ([#31311](https://github.com/NousResearch/hermes-agent/pull/31311))
+- Clipboard copy on linux/wayland. ([#29342](https://github.com/NousResearch/hermes-agent/pull/29342))
+- Anchor `splitReasoning` unclosed-tag regex; stop eating last paragraph. ([#29426](https://github.com/NousResearch/hermes-agent/pull/29426))
+- Surface verbose tool details. ([#30225](https://github.com/NousResearch/hermes-agent/pull/30225))
+- Load Linux skills on Termux + salvage @adybag14-cyber's Termux gates. ([#30166](https://github.com/NousResearch/hermes-agent/pull/30166))
+- Handle images with codex app-server. ([#31220](https://github.com/NousResearch/hermes-agent/pull/31220))
+- Refresh virtual transcript on viewport resize. ([#31077](https://github.com/NousResearch/hermes-agent/pull/31077))
+- Ignore late thinking deltas after completion. ([#31055](https://github.com/NousResearch/hermes-agent/pull/31055))
+- Commit composer input bursts immediately. ([#31053](https://github.com/NousResearch/hermes-agent/pull/31053))
+- Log parent gateway lifecycle exits. ([#31051](https://github.com/NousResearch/hermes-agent/pull/31051))
+- Clear TTS env var on voice off + TTS indicator in status bar. ([#30987](https://github.com/NousResearch/hermes-agent/pull/30987))
+- Pass `--expose-gc` as node argv instead of NODE_OPTIONS. ([#29998](https://github.com/NousResearch/hermes-agent/pull/29998))
+- Align composer cursorLayout with wrap-ansi to kill multiline cursor drift. ([#27489](https://github.com/NousResearch/hermes-agent/pull/27489))
+- Harden Terminal.app rendering and color paths. ([#27251](https://github.com/NousResearch/hermes-agent/pull/27251))
+- Keep `/goal` verdict out of compact status row. ([#27971](https://github.com/NousResearch/hermes-agent/pull/27971))
+- Clamp curses color 8 for 8-color terminals (Docker). ([#30260](https://github.com/NousResearch/hermes-agent/pull/30260))
+
+---
+
+## 🔒 Security & Reliability
+
+### Promptware & memory hardening
+
+- **Promptware defense** — shared threat patterns + memory load-time scan + tool-result delimiters. ([#32269](https://github.com/NousResearch/hermes-agent/pull/32269))
+- Expand memory content scanning patterns to parity with skills guard. ([#9151](https://github.com/NousResearch/hermes-agent/pull/9151))
+- Harden Skills Guard multi-word prompt patterns. (@YLChen-007) ([#26852](https://github.com/NousResearch/hermes-agent/pull/26852))
+- Split cron scanner so skill prose stops false-positiving exfil patterns. ([#32339](https://github.com/NousResearch/hermes-agent/pull/32339))
+
+### File safety
+
+- Protect Hermes control-plane files from prompt injection (`auth.json`, `config.yaml`, `webhook_subscriptions.json`, `mcp-tokens/`). (salvages @PratikRai0101's [#14157](https://github.com/NousResearch/hermes-agent/pull/14157)) ([#30397](https://github.com/NousResearch/hermes-agent/pull/30397))
+- Write-deny `<root>/.env` when running under a profile. ([#29687](https://github.com/NousResearch/hermes-agent/pull/29687))
+- Defense-in-depth read-deny on credential stores. (salvages [#17659](https://github.com/NousResearch/hermes-agent/pull/17659) + [#8055](https://github.com/NousResearch/hermes-agent/pull/8055)) ([#30721](https://github.com/NousResearch/hermes-agent/pull/30721))
+- TTS `output_path` traversal + update ZIP symlink reject. (salvage [#6693](https://github.com/NousResearch/hermes-agent/pull/6693) + [#15881](https://github.com/NousResearch/hermes-agent/pull/15881)) ([#32056](https://github.com/NousResearch/hermes-agent/pull/32056))
+- Reject symlinked audio inputs. ([#10082](https://github.com/NousResearch/hermes-agent/pull/10082))
+
+### Credential safety
+
+- Avoid persisting borrowed credential secrets — runtime env-sourced keys no longer leak into `auth.json`. ([#31416](https://github.com/NousResearch/hermes-agent/pull/31416))
+- Validate Nous Portal `inference_base_url` against host allowlist. (salvages [#27612](https://github.com/NousResearch/hermes-agent/pull/27612)) ([#30611](https://github.com/NousResearch/hermes-agent/pull/30611))
+- Harden API server key placeholder handling. ([#30738](https://github.com/NousResearch/hermes-agent/pull/30738))
+- Harden Google Chat OAuth credential persistence. (@Zyrixtrex) ([#24788](https://github.com/NousResearch/hermes-agent/pull/24788))
+- xAI OAuth: pin inference `base_url` to x.ai origin. ([#28952](https://github.com/NousResearch/hermes-agent/pull/28952))
+- Quarantine dead OAuth tokens on terminal refresh failure (xAI, Codex, MiniMax). ([#28116](https://github.com/NousResearch/hermes-agent/pull/28116), [#28118](https://github.com/NousResearch/hermes-agent/pull/28118), [#28119](https://github.com/NousResearch/hermes-agent/pull/28119))
+
+### Supply-chain
+
+- **On-demand supply-chain audit via OSV.dev** — `hermes audit`. ([#31460](https://github.com/NousResearch/hermes-agent/pull/31460))
+- `hermes update` syntax-validates critical files post-pull, auto-rollback on failure. ([#28669](https://github.com/NousResearch/hermes-agent/pull/28669))
+- Quarantine `hermes.exe` vs concurrent Windows instance. ([#26677](https://github.com/NousResearch/hermes-agent/pull/26677))
+
+### Other hardening
+
+- Restrict default webhook toolset capabilities. ([#30745](https://github.com/NousResearch/hermes-agent/pull/30745))
+- Harden Microsoft Graph webhook auth requirements. ([#30169](https://github.com/NousResearch/hermes-agent/pull/30169))
+- Require source CIDR allowlisting for public msgraph webhook binds. ([#33722](https://github.com/NousResearch/hermes-agent/pull/33722))
+- Require `API_SERVER_KEY` before dispatching API server work. ([#33232](https://github.com/NousResearch/hermes-agent/pull/33232))
+- env_passthrough: apply GHSA-rhgp-j443-p4rf filter to config.yaml path. (@roadhero) ([#27794](https://github.com/NousResearch/hermes-agent/pull/27794))
+- Dashboard + WeCom: restrict markdown link schemes; safe-parse untrusted XML. ([#32442](https://github.com/NousResearch/hermes-agent/pull/32442))
+- Salvage project-plugin RCE bypass fix from PR [#29311](https://github.com/NousResearch/hermes-agent/pull/29311) (GHSA-5qr3-c538-wm9j). ([#30837](https://github.com/NousResearch/hermes-agent/pull/30837))
+- Cross-profile soft guard on file-write tools + system-prompt hint. ([#31290](https://github.com/NousResearch/hermes-agent/pull/31290))
+- Reject unsafe tar members in Android psutil compatibility installer. ([#33742](https://github.com/NousResearch/hermes-agent/pull/33742))
+- Reject non-regular tar members during tirith auto-install. ([#33786](https://github.com/NousResearch/hermes-agent/pull/33786))
+
+---
+
+## 🪟 Native Windows (Beta Continued)
+
+- Complete Windows bootstrap — `dep_ensure` + `install.ps1` + detection. (@alt-glitch) ([#27845](https://github.com/NousResearch/hermes-agent/pull/27845))
+- `install.ps1`: strip BOM, `-Commit`/`-Tag` pin params, harden git ops. (@jquesnelle) ([#28169](https://github.com/NousResearch/hermes-agent/pull/28169))
+- Consolidate ACP browser bootstrap into `install.{sh,ps1}`. (@alt-glitch) ([#27851](https://github.com/NousResearch/hermes-agent/pull/27851))
+- `hermes update` quarantines live `hermes.exe`. ([#26677](https://github.com/NousResearch/hermes-agent/pull/26677))
+- Discord voice opus decoding on Windows. ([#33182](https://github.com/NousResearch/hermes-agent/pull/33182))
+- Windows Docker Desktop compatible compose file. (@Sunil123135) ([#31031](https://github.com/NousResearch/hermes-agent/pull/31031))
+
+---
+
+## 🖥️ Web Dashboard
+
+- Hardened Slack socket recovery + Windows restart dedupe. ([#28873](https://github.com/NousResearch/hermes-agent/pull/28873))
+- Web dashboard: migrate checkboxes to `@nous-research/ui` + design-system polish. (@austinpickett) ([#28814](https://github.com/NousResearch/hermes-agent/pull/28814))
+- Web dashboard: collapsible sidebar. (@austinpickett) ([#33421](https://github.com/NousResearch/hermes-agent/pull/33421))
+- Dashboard typography & contrast pass. (salvage of [#28832](https://github.com/NousResearch/hermes-agent/pull/28832)) ([#30714](https://github.com/NousResearch/hermes-agent/pull/30714))
+- Skills page: lazy-fetch catalog instead of bundling 34MB into JS. ([#33809](https://github.com/NousResearch/hermes-agent/pull/33809))
+
+---
+
+## 🐳 Docker
+
+- **s6-overlay container supervision** — abstract `ServiceManager` protocol (systemd/launchd/Windows/s6 backends), per-profile gateway supervision in-container, container-restart reconciliation, hadolint/shellcheck CI. (salvage of [#30136](https://github.com/NousResearch/hermes-agent/pull/30136), @benbarclay) ([#31760](https://github.com/NousResearch/hermes-agent/pull/31760))
+- Auto-redirect `gateway run` to supervised mode inside the s6 image. (@benbarclay) ([#33583](https://github.com/NousResearch/hermes-agent/pull/33583))
+- Tee supervised gateway stdout to docker logs. (@benbarclay) ([#33621](https://github.com/NousResearch/hermes-agent/pull/33621))
+- Drop `docker exec` to hermes uid before invoking the CLI. (@benbarclay) ([#33628](https://github.com/NousResearch/hermes-agent/pull/33628))
+- Align HOME for dashboard and s6 gateway services. (@Dusk1e) ([#33481](https://github.com/NousResearch/hermes-agent/pull/33481))
+- Bake build-time git SHA into image so `hermes dump` reports it. (@benbarclay) ([#33655](https://github.com/NousResearch/hermes-agent/pull/33655))
+- `hermes update` prints `docker pull` guidance instead of bogus git error. (@benbarclay) ([#33659](https://github.com/NousResearch/hermes-agent/pull/33659))
+- Upgrade Node to 22 LTS via multi-stage from `node:22-bookworm-slim`. (@benbarclay) ([#33060](https://github.com/NousResearch/hermes-agent/pull/33060))
+- Drop `build-essential` from apt install. (@benbarclay) ([#33028](https://github.com/NousResearch/hermes-agent/pull/33028))
+- Propagate env through s6 to cont-init and main CMD. ([#32412](https://github.com/NousResearch/hermes-agent/pull/32412))
+- Targeted chown to preserve host file ownership in `HERMES_HOME`. ([#33033](https://github.com/NousResearch/hermes-agent/pull/33033))
+- `mkdir HERMES_HOME` as root in stage2 before chown / privilege drop. ([#33078](https://github.com/NousResearch/hermes-agent/pull/33078))
+- chown `ui-tui` and `node_modules` on UID remap so TUI esbuild works. ([#33045](https://github.com/NousResearch/hermes-agent/pull/33045))
+- Include `anthropic`, `bedrock`, `azure-identity` extras in image. ([#30504](https://github.com/NousResearch/hermes-agent/pull/30504))
+- Stop pushing per-commit SHA tags to Docker Hub. ([#29387](https://github.com/NousResearch/hermes-agent/pull/29387))
+- Simplify Docker tagging — push both `:main` and `:latest` on main push. ([#33225](https://github.com/NousResearch/hermes-agent/pull/33225))
+- Test slicing across GH actions jobs. (@ethernet8023) ([#30575](https://github.com/NousResearch/hermes-agent/pull/30575))
+- Discover agent-browser Chromium binary at boot. ([#33184](https://github.com/NousResearch/hermes-agent/pull/33184))
+
+---
+
+## 🌐 API Server
+
+- **Session control API** — `/api/sessions/*` (list/create/read/patch/delete/fork) + SSE-streaming chat. (salvages [#29302](https://github.com/NousResearch/hermes-agent/pull/29302) by @Codename-11 + multimodal followup by @Schwartz10) ([#33134](https://github.com/NousResearch/hermes-agent/pull/33134))
+- `GET /v1/skills` and `/v1/toolsets`. ([#33016](https://github.com/NousResearch/hermes-agent/pull/33016))
+- Coerce stringified booleans in stream/store/approval payloads. (salvage [#26639](https://github.com/NousResearch/hermes-agent/pull/26639)) ([#27293](https://github.com/NousResearch/hermes-agent/pull/27293))
+- Honor `key_env` in auth-failure fallback resolution. ([#30840](https://github.com/NousResearch/hermes-agent/pull/30840))
+
+---
+
+## 🎟️ ACP (VS Code / Zed / JetBrains)
+
+- Session edit auto-approval modes. (salvage of [#27034](https://github.com/NousResearch/hermes-agent/pull/27034)) ([#27862](https://github.com/NousResearch/hermes-agent/pull/27862))
+- Enrich Zed permission cards — command in title + `reject_always`. ([#28148](https://github.com/NousResearch/hermes-agent/pull/28148))
+- Replay session history before responding to `session/load`. ([#26957](https://github.com/NousResearch/hermes-agent/pull/26957), [#26943](https://github.com/NousResearch/hermes-agent/pull/26943))
+- Plugin-transformed final_response delivered through streaming gate. ([#31433](https://github.com/NousResearch/hermes-agent/pull/31433))
+
+---
+
+## 🔌 Plugin Surface
+
+- `register_tts_provider()` plugin hook. (salvage of [#30420](https://github.com/NousResearch/hermes-agent/pull/30420)) ([#31745](https://github.com/NousResearch/hermes-agent/pull/31745))
+- `register_transcription_provider()` hook + `stt.providers` command-provider registry. (salvage of [#30493](https://github.com/NousResearch/hermes-agent/pull/30493)) ([#31907](https://github.com/NousResearch/hermes-agent/pull/31907))
+- `register_auxiliary_task()` in PluginContext API. (salvage [#29817](https://github.com/NousResearch/hermes-agent/pull/29817)) ([#31177](https://github.com/NousResearch/hermes-agent/pull/31177))
+- Bundled `security-guidance` plugin. ([#33131](https://github.com/NousResearch/hermes-agent/pull/33131))
+- Discord and Mattermost migrated to bundled plugins. ([#30591](https://github.com/NousResearch/hermes-agent/pull/30591), [#31748](https://github.com/NousResearch/hermes-agent/pull/31748))
+- ntfy as platform plugin. ([#30867](https://github.com/NousResearch/hermes-agent/pull/30867))
+- Surface category-namespaced plugins in `hermes plugins list`. ([#27187](https://github.com/NousResearch/hermes-agent/pull/27187))
+- Plugin discovery failures raised to WARNING level. ([#28318](https://github.com/NousResearch/hermes-agent/pull/28318))
+- `hermes_plugins` included in gateway.log component filter. ([#28313](https://github.com/NousResearch/hermes-agent/pull/28313))
+- Seed plugin extras before `is_connected` gate. ([#31703](https://github.com/NousResearch/hermes-agent/pull/31703))
+- Dashboard: allowlist plugin assets + denylist subprocess-influencing env vars. ([#32277](https://github.com/NousResearch/hermes-agent/pull/32277))
+
+---
+
+## 📦 Distribution & Install
+
+- Install-method stamping + Docker detection. (@alt-glitch) ([#27843](https://github.com/NousResearch/hermes-agent/pull/27843))
+- Nix `#messaging` and `#full` package variants. (@alt-glitch) ([#33108](https://github.com/NousResearch/hermes-agent/pull/33108))
+- Pre-load messaging gateway deps via `--extra messaging`. (salvage [#26394](https://github.com/NousResearch/hermes-agent/pull/26394)) ([#27558](https://github.com/NousResearch/hermes-agent/pull/27558))
+- Avoid piping installer directly into `iex` (Windows). ([#28347](https://github.com/NousResearch/hermes-agent/pull/28347))
+- Ship bundled skills in wheel. ([#28421](https://github.com/NousResearch/hermes-agent/pull/28421))
+- Ship dashboard plugin assets in wheel. ([#28406](https://github.com/NousResearch/hermes-agent/pull/28406))
+- Make Camofox lazy-installed instead of eager. ([#27055](https://github.com/NousResearch/hermes-agent/pull/27055))
+- Wire STT lazy-install into transcription_tools.py. ([#30256](https://github.com/NousResearch/hermes-agent/pull/30256))
+
+---
+
+## 🐛 Notable Bug Fixes (highlights only)
+
+- Match bare custom provider by active base URL in `hermes model`. ([#28908](https://github.com/NousResearch/hermes-agent/pull/28908))
+- Route `auxiliary.vision.provider=openai` to api.openai.com, skip text-only main. ([#31452](https://github.com/NousResearch/hermes-agent/pull/31452))
+- Lint: skip per-file shell linter when LSP will handle the file. ([#29054](https://github.com/NousResearch/hermes-agent/pull/29054))
+- Treat empty credential pool entries as unauthenticated in `/model` picker. ([#28312](https://github.com/NousResearch/hermes-agent/pull/28312))
+- Reverted within window: Firecrawl integration tag, send_message @username auto-mentions, Telegram quick-command-only menus, Telegram pin-on-turn.
+
+---
+
+## 🧪 Testing
+
+- Disarm lazy-install probe so `_HAS_FASTER_WHISPER` patches work. ([#30334](https://github.com/NousResearch/hermes-agent/pull/30334))
+- Cover default board dashboard pin. ([#28361](https://github.com/NousResearch/hermes-agent/pull/28361))
+- Cover `_task_dict` `task_age` fallback. ([#28365](https://github.com/NousResearch/hermes-agent/pull/28365))
+- Allowlist `tmp_path` for `kanban_notify` artifact delivery tests. ([#30851](https://github.com/NousResearch/hermes-agent/pull/30851), [#30852](https://github.com/NousResearch/hermes-agent/pull/30852))
+- Cover null output stream terminal events in Codex. ([#33137](https://github.com/NousResearch/hermes-agent/pull/33137))
+
+---
+
+## 📚 Documentation
+
+- **30-day docs overhaul** — full correctness audit, every PR in the window covered, Nous Portal weave, sidebar reorg. ([#33782](https://github.com/NousResearch/hermes-agent/pull/33782))
+- Dedicated Nous Portal integration page and setup guide. ([#31296](https://github.com/NousResearch/hermes-agent/pull/31296))
+- Providers: move Nous Portal first, Google Gemini OAuth last. ([#31287](https://github.com/NousResearch/hermes-agent/pull/31287))
+- `session_search` rewrite for single-shape tool. ([#27840](https://github.com/NousResearch/hermes-agent/pull/27840))
+- Kanban: document failure_limit, max_retries, inline create shortcuts, goals & kanban settings. ([#28357](https://github.com/NousResearch/hermes-agent/pull/28357), [#28358](https://github.com/NousResearch/hermes-agent/pull/28358), [#28359](https://github.com/NousResearch/hermes-agent/pull/28359), [#28360](https://github.com/NousResearch/hermes-agent/pull/28360), [#28362](https://github.com/NousResearch/hermes-agent/pull/28362))
+- Kanban Codex lane skill. ([#28430](https://github.com/NousResearch/hermes-agent/pull/28430))
+- xAI OAuth: note X Premium+ also unlocks Grok OAuth. ([#29055](https://github.com/NousResearch/hermes-agent/pull/29055))
+- Docs site: Docker audio bridge notes, "Installing more tools in the container", xurl auth HOME in Docker.
+- Email: clarify gateway vs Himalaya setup. (@helix4u) ([#33634](https://github.com/NousResearch/hermes-agent/pull/33634))
+- Auth docs: replace stale `hermes login` references with `hermes auth add`. ([#32859](https://github.com/NousResearch/hermes-agent/pull/32859))
+
+---
+
+## 👥 Contributors
+
+### Core
+- @teknium1 (lead)
+
+### Notable salvages & cherry-picks
+
+- **@benbarclay** — s6-overlay container supervision (29 commits salvaged), Node 22 LTS upgrade, build-essential cleanup, `gateway run` auto-redirect in s6, tee supervised stdout to docker logs, `hermes update` Docker guidance, build-time SHA stamping
+- **@OutThisLife** — `mouse_tracking` DEC mode presets
+- **@jquesnelle** — Windows installer hardening, `--branch` flag for `hermes update`, install.ps1 BOM strip / commit-pin
+- **@alt-glitch** — Windows `dep_ensure` bootstrap, Nix package variants (`.#messaging`, `.#full`), install-method stamping, ACP browser bootstrap consolidation
+- **@austinpickett** — `/update` slash command, dashboard checkboxes → `@nous-research/ui`, mobile dashboard polish, collapsible sidebar
+- **@ethernet8023** — CI test slicing across GH Actions jobs, TUI clipboard copy fix
+- **@kshitijk4poor** — doctor section banner + fail-and-issue helpers extraction, post-tag salvage cluster (curator-fallout, kanban SQLite hardening, install world-readable uv dirs, xAI bare-code paste)
+- **@rewbs** — Nous JWT inference switch + refresh-token replay fix
+- **@Codename-11** + **@Schwartz10** — session control API (REST + SSE + multimodal followup)
+- **@Niraven** — kanban swarm topology helper
+- **@Interstellar-code** — kanban worker visibility endpoints
+- **@adybag14-cyber** — termux cold-start optimizations (multiple PRs)
+- **@qike-ms** — Telegram in-place status edits design
+- **@sprmn24** — ntfy adapter
+- **@Jaaneek** — xAI Web Search provider plugin
+- **@yannsunn** — xAI upstream adapter for `hermes proxy`
+- **@Cybourgeoisie** — OpenRouter sticky routing via session_id
+- **@memosr** — Nous Portal base_url allowlist validation
+- **@Sunil123135** — Windows Docker Desktop compose file
+- **@Dusk1e** — Docker HOME alignment for dashboard + s6 gateway services
+- **@beardthelion** — opencode-go anthropic_messages routing
+- **@YLChen-007** — Skills Guard multi-word prompt patterns
+- **@roadhero** — env_passthrough GHSA-rhgp-j443-p4rf filter
+- **@Zyrixtrex** — Google Chat OAuth credential persistence hardening
+- **@briandevans**, **@tomqiaozc** — defense-in-depth read-deny on credential stores
+- **@PratikRai0101** — control-plane file write protection
+- **@helix4u**, **@Bartok9**, **@zccyman** — auxiliary fallback ladder components
+- **@ms-alan**, **@ticketclosed-wontfix**, **@donovan-yohan** — TUI session orchestrator + follow-ups
+- **@daimon-nous[bot]** — cron per-job profile support
+- **@bisko** — re-pad `reasoning_content` on cross-provider fallback
+
+### All Contributors
+
+@02356abc, @0xchainer, @0xDevNinja, @0xjackyang, @0xsir0000, @0z1-ghb, @8bit64k, @aaronlab, @AceWattGit,
+@ACR27, @adam91holt, @AdamPlatin123, @Ade5954, @AdityaRajeshGadgil, @adybag14-cyber, @AhmetArif0, @ai-hana-ai,
+@alaamohanad169-ship-it, @alber70g, @albert748, @alt-glitch, @aqilaziz, @argabor, @asdlem, @austinpickett,
+@avifenesh, @awizemann, @B0Tch1, @Bartok9, @BaxBit, @Beandon13, @beardthelion, @benbarclay, @bensargotest-sys,
+@binhnt92, @bird, @bisko, @BlackishGreen33, @booker1207, @bradhallett, @briandevans, @Brixyy, @brndnsvr,
+@BROCCOLO1D, @btorresgil, @burjorjee, @carltonawong, @Carry00, @chaconne67, @chdlc, @chromalinx, @ChyuWei,
+@CipherFrame, @cmullins70, @CNSeniorious000, @codeblackhole1024, @Codename-11, @colin-chang, @counterposition,
+@cresslank, @CryptoByz, @cyb0rgk1tty, @Cybourgeoisie, @daizhonggeng, @darvsum, @davidcampbelldc, @deas,
+@dgians, @dillweed, @DoGMaTiiC, @donovan-yohan, @draplater, @Drexuxux, @dskwe, @dsr-restyn, @Dusk1e,
+@dusterbloom, @duyua9, @egilewski, @el-analista, @eliteworkstation94-ai, @eloklam, @EloquentBrush0x, @emonty,
+@emozilla, @erhnysr, @erikengervall, @Erosika, @ether-btc, @ethernet8023, @EvilHumphrey, @fabiosiqueira,
+@falasi, @falconexe, @fardoche6, @felix-windsor, @Fewmanism, @ffr31mr, @flamiinngo, @flanny7, @flooryyyy,
+@fonhal, @francip, @fujinice, @gianfrancopiana, @glennc, @Glucksberg, @godlin-gh, @Grogger, @guillaumemeyer,
+@Gutslabs, @H-Ali13381, @hanzckernel, @haran2001, @hawknewton, @hayka-pacha, @hehehe0803, @helix4u, @HenkDz,
+@Hermes, @hermesagent26, @Hinotoi-agent, @hongchen1993, @honor2030, @houenyang-momo, @ht1072, @hueilau,
+@iamfoz, @ilonagaja509-glitch, @InB4DevOps, @indigokarasu, @Interstellar-code, @iqdoctor, @iRonin, @Jaaneek,
+@JabberELF, @jacevys, @jackey8616, @jackjin1997, @jdelmerico, @jfuenmayor, @Jiahui-Gu, @JimLiu, @joe102084,
+@JohnC1009, @jonpol01, @Jpalmer95, @Julientalbot, @justemu, @justincc, @jvinals, @karthikeyann, @kasunvinod,
+@kchuang1015, @kenyonxu, @khungate, @kiranvk-2011, @kjames2001, @konsisumer, @kpadilha, @kriscolab,
+@krislidimo, @kronexoi, @kshitijk4poor, @kunci115, @Kylejeong2, @kylekahraman, @LaPhilosophie, @leeseoki0,
+@lemassykoi, @Lempkey, @LeonJS, @LeonSGP43, @lidge-jun, @LifeJiggy, @liuhao1024, @LizerAIDev, @loicnico96,
+@loongfay, @m0n3r0, @malaiwah, @matthewlai, @mavrickdeveloper, @maxmilian, @McClean-Edison, @memosr,
+@Mind-Dragon, @momowind, @MoonJuhan, @MoonRay305, @moortekweb-art, @MorAlekss, @ms-alan, @Nami4D,
+@nehaaprasaad, @nekwo, @nftpoetrist, @NickLarcombe, @nidhi-singh02, @Niraven, @nnnet, @noctilust, @novax635,
+@nthrow, @nv-kasikritc, @nycomar, @OCWC22, @oemtalks, @OmX, @ooovenenoso, @orcool, @oseftg, @outsourc-e,
+@OutThisLife, @Paperclip, @PaTTeeL, @pepelax, @phoenixshen, @Pluviobyte, @pnascimento9596, @pochi-gio, @pr7426,
+@PratikRai0101, @Prithvi1994, @psionic73, @ptichalouf, @Que0x, @QuenVix, @quocanh261997, @qWaitCrypto, @Qwinty,
+@r266-tech, @rak135, @rdasilva1016-ui, @rewbs, @roadhero, @rodrigoeqnit, @RonHillDev, @roycepersonalassistant,
+@rudi193-cmd, @RyanRana, @sadiksaifi, @samahn0601, @samggggflynn, @SamuelZ12, @sanghyuk-seo-nexcube,
+@Saurav0989, @savanne-kham, @Schrotti77, @Schwartz10, @SerenityTn, @sgtworkman, @sharziki, @shaun0927,
+@shellybotmoyer, @shunsuke-hikiyama, @SimbaKingjoe, @SimoKiihamaki, @sir-ad, @Slimydog21, @slowtokki0409,
+@Soju06, @someaka, @soynchux, @sprmn24, @Stark-X, @steezkelly, @stepanov1975, @stephenschoettler,
+@stevehq26-bot, @steveonjava, @Strontvod, @subtract0, @Sunil123135, @superearn-fisher, @Sylw3ster, @tchanee,
+@that-ambuj, @thedavidmurray, @TheOnlyMika, @therahul-yo, @thewillhuang, @ticketclosed-wontfix, @Timur00Kh,
+@tomqiaozc, @Tosko4, @Tranquil-Flow, @tw2818, @uzunkuyruk, @vaddisrinivas, @vanthinh6886, @vgocoder,
+@victorGPT, @vynxevainglory-ai, @waefrebeorn, @walli, @wangpuv, @wanwan2qq, @wesleysimplicio, @worlldz,
+@wpengpeng168, @WuKongAI-CMU, @wuli666, @Wysie, @wysie, @xxxigm, @yannsunn, @YanzhongSu, @YarrowQiao, @ygd58,
+@YLChen-007, @yoniebans, @yu-xin-c, @YuanHanzhong, @zapabob, @zccyman, @ziliangpeng, @zwolniony, @Zyrixtrex
+
+---
+
+**Full Changelog**: [v2026.5.16...v2026.5.28](https://github.com/NousResearch/hermes-agent/compare/v2026.5.16...v2026.5.28)
diff --git a/RELEASE_v0.15.1.md b/RELEASE_v0.15.1.md
new file mode 100644
index 00000000000..9ed67b6920f
--- /dev/null
+++ b/RELEASE_v0.15.1.md
@@ -0,0 +1,110 @@
+# Hermes Agent v0.15.1 (v2026.5.29)
+
+**Release Date:** May 29, 2026
+**Since v0.15.0:** 28 commits · 21 merged PRs · hotfix release · 9 contributors
+
+> **The Patch Release.** A same-day hotfix for v0.15.0. Headline fix: the dashboard infinite-reload loop that hit anyone running v0.15.0 in loopback mode (Docker, hosted Hermes, fresh installs). A handful of other v0.15.0 follow-ups go along for the ride — kanban worker SIGTERM, `/model` picker unification, `/yolo` session bypass, the full 19,932-entry skills.sh catalog, `.md` media delivery restoration, gateway probe-stepdown safety, web-URL redaction passthrough, kanban worker vision on referenced images, hindsight observation-default. Docker users get an explicit `--insecure` opt-in env var (no more bind-host inference), MCP server bare-command PATH resolution, and arm64 PR-build cache fixes.
+
+---
+
+## ✨ Highlights
+
+- **Dashboard 401 reload loop fixed** — In loopback mode the dashboard's identity probe (`/api/auth/me`) returns 401 by design, but v0.15.0's stale-token reload guard treated every 401 as a rotated session token and full-page-reloaded to pick up a fresh one. Every successful sibling call cleared the one-shot reload guard, so the page reload-looped forever (Firefox: "Navigated to /sessions" storm; Chrome: React re-render storm). Fix adds an `allowUnauthorized` opt-out to `fetchJSON` that skips only the loopback stale-token reload — 401 still throws so `AuthWidget` swallows it, gated-mode `login_url` redirects are unaffected. Closes [#34206](https://github.com/NousResearch/hermes-agent/issues/34206), [#34202](https://github.com/NousResearch/hermes-agent/issues/34202). ([#30698](https://github.com/NousResearch/hermes-agent/pull/30698) — @austinpickett)
+
+- **Docker dashboard `--insecure` is now an explicit env opt-in, never derived from bind host** — Previously the Docker entrypoint inferred `--insecure` when the dashboard bound to a non-loopback host. That conflated "I want LAN access" with "I want to disable the same-origin guard." The fix splits them: bind host is bind host, and disabling the dashboard's loopback auth requires an explicit `HERMES_DASHBOARD_INSECURE=1`. Existing setups that genuinely wanted insecure binding must now set the env var. ([#34188](https://github.com/NousResearch/hermes-agent/pull/34188), [#34204](https://github.com/NousResearch/hermes-agent/pull/34204) — @benbarclay)
+
+- **MCP bare command resolution under Docker** — MCP servers configured with bare commands (`npx`, `npm`, `node`) now resolve against `/usr/local/bin` so they actually launch inside the Docker image where those binaries live. v0.15.0 left these failing silently in containers when the agent's effective PATH didn't include the Node toolchain location. ([#34186](https://github.com/NousResearch/hermes-agent/pull/34186) — @benbarclay)
+
+- **Skills page sidebar / source pills restored** — A stale `useMemo` dependency in the new dashboard skills page collapsed the source pills and category sidebar to "All" only. Fixed; both surfaces now reflect the live catalog state. ([#34194](https://github.com/NousResearch/hermes-agent/pull/34194))
+
+- **Kanban worker can be killed again** — `SIGTERM` on a kanban worker was being absorbed by an intermediate process and the worker stayed running. Closes [#28181](https://github.com/NousResearch/hermes-agent/issues/28181). ([#34045](https://github.com/NousResearch/hermes-agent/pull/34045))
+
+- **Full skills.sh catalog (858 → 19,932 entries)** — The skills hub page was pulling a partial paginated catalog. The fetch now walks the sitemap, so all 19,932 skills.sh entries surface in the picker instead of just the first 858. ([#34025](https://github.com/NousResearch/hermes-agent/pull/34025))
+
+---
+
+## 🐛 Bug Fixes
+
+### Dashboard / Web
+
+- **`/api/auth/me` 401 no longer triggers reload loop** in loopback mode — ([#30698](https://github.com/NousResearch/hermes-agent/pull/30698) — @austinpickett)
+- **Skills page source pills + category sidebar restored** — stale `useMemo` dep ([#34194](https://github.com/NousResearch/hermes-agent/pull/34194))
+
+### Docker
+
+- **`--insecure` is now explicit opt-in via env var**, not derived from bind host ([#34188](https://github.com/NousResearch/hermes-agent/pull/34188) — @benbarclay)
+- **Dashboard test suite repaired** to match the insecure-opt-in fix ([#34204](https://github.com/NousResearch/hermes-agent/pull/34204) — @benbarclay)
+- **arm64 PR builds skip the GHA cache** to avoid cache-thrash on cross-arch builders ([#33704](https://github.com/NousResearch/hermes-agent/pull/33704) — @BROCCOLO1D)
+
+### MCP
+
+- **Bare `npx`/`npm`/`node` resolve against `/usr/local/bin`** for Docker compatibility ([#34186](https://github.com/NousResearch/hermes-agent/pull/34186) — @benbarclay)
+
+### Kanban
+
+- **Worker SIGTERM actually terminates the process** ([#34045](https://github.com/NousResearch/hermes-agent/pull/34045))
+- **Workers receive images referenced in task bodies** for vision-capable models ([#34210](https://github.com/NousResearch/hermes-agent/pull/34210))
+
+### Gateway
+
+- **`.md` files deliver again** — media-delivery validation defaults to denylist-only instead of an overly-narrow allowlist ([#34022](https://github.com/NousResearch/hermes-agent/pull/34022))
+- **Probe stepdown safety** — on a context-overflow without an explicit provider context limit, the agent no longer steps down to a smaller model based on an unknown ceiling (salvage of [#33673](https://github.com/NousResearch/hermes-agent/pull/33673)) ([#33826](https://github.com/NousResearch/hermes-agent/pull/33826))
+
+### CLI
+
+- **`/yolo` mid-session enables the per-session bypass** instead of just toggling the env var (which the running agent had already snapshotted) ([#33931](https://github.com/NousResearch/hermes-agent/pull/33931) — @kshitijk4poor)
+- **`/model` and `hermes model` show the same list**, plus disk cache for picker startup ([#33867](https://github.com/NousResearch/hermes-agent/pull/33867))
+
+### Skills
+
+- **Full skills.sh catalog via sitemap** — 858 → 19,932 entries ([#34025](https://github.com/NousResearch/hermes-agent/pull/34025))
+
+### Redaction
+
+- **Web URLs pass through unchanged** — the redactor was eating query parameters that looked credential-shaped ([#34029](https://github.com/NousResearch/hermes-agent/pull/34029))
+
+---
+
+## ✨ Small Features
+
+- **Hindsight default narrowed to observation-only** for `recall_types` — tool path is also narrowed ([#34079](https://github.com/NousResearch/hermes-agent/pull/34079) — @nicoloboschi, follow-up [#34091](https://github.com/NousResearch/hermes-agent/pull/4df62d239e38bf8c212a595721c9c01e176f6c3a) — @kshitijk4poor)
+- **Memory providers receive completed-turn message context** — salvage of [#28065](https://github.com/NousResearch/hermes-agent/pull/28065) ([#34097](https://github.com/NousResearch/hermes-agent/pull/34097) — @kshitijk4poor, credit to @devwdave)
+
+---
+
+## 📚 Documentation
+
+- **`--no-supervise` / `HERMES_GATEWAY_NO_SUPERVISE` documented** in the reference docs (follow-up to [#33583](https://github.com/NousResearch/hermes-agent/pull/33583)) ([#33751](https://github.com/NousResearch/hermes-agent/pull/33751) — @r266-tech)
+
+---
+
+## 🛠️ Infrastructure
+
+- **Vercel deploy workflow accepts `workflow_dispatch`** so docs deploys can be manually triggered ([#34081](https://github.com/NousResearch/hermes-agent/pull/34081))
+- **`@nous-research/ui` bumped to 0.18.2** (Nix `npmDepsHash` also updated to match) ([#34193](https://github.com/NousResearch/hermes-agent/pull/34193) follow-ups — @austinpickett)
+
+---
+
+## 👥 Contributors
+
+### Core
+- @teknium1
+
+### Community
+- @austinpickett — dashboard 401 reload-loop fix (the headline), `@nous-research/ui` bump, Nix `npmDepsHash` updates
+- @benbarclay — Docker `--insecure` opt-in, MCP bare-command resolution, dashboard test repair
+- @kshitijk4poor — `/yolo` session bypass, completed-turn memory context salvage, hindsight follow-up docs
+- @nicoloboschi — hindsight `recall_types` observation default
+- @BROCCOLO1D — arm64 PR build cache fix
+- @r266-tech — `--no-supervise` reference docs
+- @yangguangjin — probe stepdown safety (salvage of @yanghd's #33673)
+- @devwdave — completed-turn memory context (credited via salvage)
+- @andrewhosf — co-author
+
+### Issue Reporters (the 401 loop)
+- @routesmith ([#34206](https://github.com/NousResearch/hermes-agent/issues/34206))
+- @beeaton ([#34202](https://github.com/NousResearch/hermes-agent/issues/34202))
+
+---
+
+**Full Changelog**: [v2026.5.28...v2026.5.29](https://github.com/NousResearch/hermes-agent/compare/v2026.5.28...v2026.5.29)
diff --git a/acp_adapter/tools.py b/acp_adapter/tools.py
index be4e49d013c..b913e1043af 100644
--- a/acp_adapter/tools.py
+++ b/acp_adapter/tools.py
@@ -907,72 +907,6 @@ def _build_polished_completion_content(
     return [_text(text)]
 
 
-def _build_patch_mode_content(patch_text: str) -> List[Any]:
-    """Parse V4A patch mode input into ACP diff blocks when possible."""
-    if not patch_text:
-        return [acp.tool_content(acp.text_block(""))]
-
-    try:
-        from tools.patch_parser import OperationType, parse_v4a_patch
-
-        operations, error = parse_v4a_patch(patch_text)
-        if error or not operations:
-            return [acp.tool_content(acp.text_block(patch_text))]
-
-        content: List[Any] = []
-        for op in operations:
-            if op.operation == OperationType.UPDATE:
-                old_chunks: list[str] = []
-                new_chunks: list[str] = []
-                for hunk in op.hunks:
-                    old_lines = [line.content for line in hunk.lines if line.prefix in {" ", "-"}]
-                    new_lines = [line.content for line in hunk.lines if line.prefix in {" ", "+"}]
-                    if old_lines or new_lines:
-                        old_chunks.append("\n".join(old_lines))
-                        new_chunks.append("\n".join(new_lines))
-
-                old_text = "\n...\n".join(chunk for chunk in old_chunks if chunk)
-                new_text = "\n...\n".join(chunk for chunk in new_chunks if chunk)
-                if old_text or new_text:
-                    content.append(
-                        acp.tool_diff_content(
-                            path=op.file_path,
-                            old_text=old_text or None,
-                            new_text=new_text or "",
-                        )
-                    )
-                continue
-
-            if op.operation == OperationType.ADD:
-                added_lines = [line.content for hunk in op.hunks for line in hunk.lines if line.prefix == "+"]
-                content.append(
-                    acp.tool_diff_content(
-                        path=op.file_path,
-                        new_text="\n".join(added_lines),
-                    )
-                )
-                continue
-
-            if op.operation == OperationType.DELETE:
-                content.append(
-                    acp.tool_diff_content(
-                        path=op.file_path,
-                        old_text=f"Delete file: {op.file_path}",
-                        new_text="",
-                    )
-                )
-                continue
-
-            if op.operation == OperationType.MOVE:
-                content.append(
-                    acp.tool_content(acp.text_block(f"Move file: {op.file_path} -> {op.new_path}"))
-                )
-
-        return content or [acp.tool_content(acp.text_block(patch_text))]
-    except Exception:
-        return [acp.tool_content(acp.text_block(patch_text))]
-
-
 def _strip_diff_prefix(path: str) -> str:
     raw = str(path or "").strip()
     if raw.startswith(("a/", "b/")):
diff --git a/acp_registry/agent.json b/acp_registry/agent.json
index b23d1642a94..966444ec912 100644
--- a/acp_registry/agent.json
+++ b/acp_registry/agent.json
@@ -1,7 +1,7 @@
 {
   "id": "hermes-agent",
   "name": "Hermes Agent",
-  "version": "0.14.0",
+  "version": "0.15.1",
   "description": "Self-improving open-source AI agent by Nous Research with ACP editor integration, persistent memory, skills, and rich tool support.",
   "repository": "https://github.com/NousResearch/hermes-agent",
   "website": "https://hermes-agent.nousresearch.com/docs/user-guide/features/acp",
@@ -9,7 +9,7 @@
   "license": "MIT",
   "distribution": {
     "uvx": {
-      "package": "hermes-agent[acp]==0.14.0",
+      "package": "hermes-agent[acp]==0.15.1",
       "args": ["hermes-acp"]
     }
   }
diff --git a/agent/__init__.py b/agent/__init__.py
index aaa2d74d14a..41136f9b639 100644
--- a/agent/__init__.py
+++ b/agent/__init__.py
@@ -4,3 +4,5 @@ These modules contain pure utility functions and self-contained classes
 that were previously embedded in the 3,600-line run_agent.py. Extracting
 them makes run_agent.py focused on the AIAgent orchestrator class.
 """
+
+from . import jiter_preload as _jiter_preload  # noqa: F401
diff --git a/agent/agent_init.py b/agent/agent_init.py
index bcad584e87c..675130a8840 100644
--- a/agent/agent_init.py
+++ b/agent/agent_init.py
@@ -27,7 +27,6 @@ import threading
 import time
 import uuid
 from datetime import datetime
-from pathlib import Path
 from typing import Any, Dict, List, Optional
 from urllib.parse import urlparse, parse_qs, urlunparse
 
@@ -37,7 +36,6 @@ from agent.memory_manager import StreamingContextScrubber
 from agent.model_metadata import (
     MINIMUM_CONTEXT_LENGTH,
     fetch_model_metadata,
-    get_model_context_length,
     is_local_endpoint,
     query_ollama_num_ctx,
 )
@@ -52,7 +50,6 @@ from agent.tool_guardrails import (
 from hermes_cli.config import cfg_get
 from hermes_cli.timeouts import get_provider_request_timeout
 from hermes_constants import get_hermes_home
-from model_tools import check_toolset_requirements, get_tool_definitions
 from utils import base_url_host_matches
 
 # Use the same logger name as run_agent so tests patching ``run_agent.logger``
@@ -1201,6 +1198,18 @@ def init_agent(
         _agent_section = {}
     agent._tool_use_enforcement = _agent_section.get("tool_use_enforcement", "auto")
 
+    # Universal task-completion guidance toggle.  Default True.  Surfaced
+    # as a separate flag from tool_use_enforcement because the guidance
+    # applies to ALL models, not just the model families enforcement
+    # targets.
+    agent._task_completion_guidance = bool(_agent_section.get("task_completion_guidance", True))
+
+    # Local Python toolchain probe toggle.  Default True.  When False,
+    # the probe is skipped entirely (no subprocess calls, no system-prompt
+    # line).  Useful for users on exotic setups where the probe heuristics
+    # are noisy.
+    agent._environment_probe = bool(_agent_section.get("environment_probe", True))
+
     # App-level API retry count (wraps each model API call).  Default 3,
     # overridable via agent.api_max_retries in config.yaml.  See #11616.
     try:
@@ -1462,7 +1471,6 @@ def init_agent(
 
     # Reject models whose context window is below the minimum required
     # for reliable tool-calling workflows (64K tokens).
-    from agent.model_metadata import MINIMUM_CONTEXT_LENGTH
     _ctx = getattr(agent.context_compressor, "context_length", 0)
     if _ctx and _ctx < MINIMUM_CONTEXT_LENGTH:
         raise ValueError(
@@ -1522,6 +1530,7 @@ def init_agent(
                 platform=agent.platform or "cli",
                 model=agent.model,
                 context_length=getattr(agent.context_compressor, "context_length", 0),
+                conversation_id=getattr(agent, "_gateway_session_key", None),
             )
         except Exception as _ce_err:
             _ra().logger.debug("Context engine on_session_start: %s", _ce_err)
diff --git a/agent/agent_runtime_helpers.py b/agent/agent_runtime_helpers.py
index 15deb327581..73f3cba435d 100644
--- a/agent/agent_runtime_helpers.py
+++ b/agent/agent_runtime_helpers.py
@@ -25,24 +25,17 @@ from __future__ import annotations
 import copy
 import json
 import logging
-import os
 import re
-import threading
 import time
-import uuid
 from datetime import datetime
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Optional
 
 from hermes_cli.timeouts import get_provider_request_timeout
-from agent.message_sanitization import (
-    _repair_tool_call_arguments,
-    _sanitize_surrogates,
-)
 from agent.tool_dispatch_helpers import _trajectory_normalize_msg, make_tool_result_message
 from agent.trajectory import convert_scratchpad_to_think
 from agent.credential_pool import STATUS_EXHAUSTED
-from agent.error_classifier import classify_api_error, FailoverReason
+from agent.error_classifier import FailoverReason
 from utils import base_url_host_matches, base_url_hostname, env_var_enabled, atomic_json_write
 
 logger = logging.getLogger(__name__)
@@ -1699,6 +1692,8 @@ def invoke_tool(agent, function_name: str, function_args: dict, effective_task_i
             session_id=agent.session_id or "",
             enabled_tools=list(agent.valid_tool_names) if agent.valid_tool_names else None,
             skip_pre_tool_call_hook=True,
+            enabled_toolsets=getattr(agent, "enabled_toolsets", None),
+            disabled_toolsets=getattr(agent, "disabled_toolsets", None),
         )
 
 
@@ -1994,6 +1989,36 @@ def copy_reasoning_content_for_api(agent, source_msg: dict, api_msg: dict) -> No
     api_msg.pop("reasoning_content", None)
 
 
+def reapply_reasoning_echo_for_provider(agent, api_messages: list) -> int:
+    """Re-pad assistant turns with reasoning_content for the active provider.
+
+    ``api_messages`` is built once, before the retry loop, while the *primary*
+    provider is active.  If a mid-conversation fallback then switches to a
+    require-side provider (DeepSeek / Kimi / MiMo thinking mode), assistant
+    turns that were built when the prior provider did NOT need the echo-back go
+    out without ``reasoning_content`` and the new provider rejects them with
+    HTTP 400 ("The reasoning_content in the thinking mode must be passed back").
+
+    Calling this immediately before building the request kwargs re-applies the
+    pad against the *current* provider.  It is idempotent and a no-op unless
+    ``_needs_thinking_reasoning_pad()`` is True for the active provider, so it
+    is safe to call every iteration and covers every fallback path.
+
+    Returns the number of assistant turns that gained reasoning_content.
+    """
+    if not agent._needs_thinking_reasoning_pad():
+        return 0
+    padded = 0
+    for api_msg in api_messages:
+        if api_msg.get("role") != "assistant":
+            continue
+        if api_msg.get("reasoning_content"):
+            continue
+        copy_reasoning_content_for_api(agent, api_msg, api_msg)
+        if api_msg.get("reasoning_content"):
+            padded += 1
+    return padded
+
 
 def _iter_pool_sockets(client: Any):
     """Yield raw sockets reachable from an OpenAI/httpx client pool.
diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py
index 898df7eb685..d9bbe2d8e3a 100644
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@@ -77,16 +77,16 @@ ADAPTIVE_EFFORT_MAP = {
 # xhigh as a distinct level between high and max; older adaptive-thinking
 # models (4.6) reject it with a 400.  Keep this substring list in sync with
 # the Anthropic migration guide as new model families ship.
-_XHIGH_EFFORT_SUBSTRINGS = ("4-7", "4.7")
+_XHIGH_EFFORT_SUBSTRINGS = ("4-7", "4.7", "4-8", "4.8")
 
 # Models where extended thinking is deprecated/removed (4.6+ behavior: adaptive
 # is the only supported mode; 4.7 additionally forbids manual thinking entirely
 # and drops temperature/top_p/top_k).
-_ADAPTIVE_THINKING_SUBSTRINGS = ("4-6", "4.6", "4-7", "4.7")
+_ADAPTIVE_THINKING_SUBSTRINGS = ("4-6", "4.6", "4-7", "4.7", "4-8", "4.8")
 
 # Models where temperature/top_p/top_k return 400 if set to non-default values.
 # This is the Opus 4.7 contract; future 4.x+ models are expected to follow it.
-_NO_SAMPLING_PARAMS_SUBSTRINGS = ("4-7", "4.7")
+_NO_SAMPLING_PARAMS_SUBSTRINGS = ("4-7", "4.7", "4-8", "4.8")
 _FAST_MODE_SUPPORTED_SUBSTRINGS = ("opus-4-6", "opus-4.6")
 
 # ── Max output token limits per Anthropic model ───────────────────────
@@ -94,6 +94,8 @@ _FAST_MODE_SUPPORTED_SUBSTRINGS = ("opus-4-6", "opus-4.6")
 # max_tokens as a mandatory field.  Previously we hardcoded 16384, which
 # starves thinking-enabled models (thinking tokens count toward the limit).
 _ANTHROPIC_OUTPUT_LIMITS = {
+    # Claude 4.8
+    "claude-opus-4-8":   128_000,
     # Claude 4.7
     "claude-opus-4-7":   128_000,
     # Claude 4.6
@@ -892,20 +894,6 @@ def read_claude_code_credentials() -> Optional[Dict[str, Any]]:
     return None
 
 
-def read_claude_managed_key() -> Optional[str]:
-    """Read Claude's native managed key from ~/.claude.json for diagnostics only."""
-    claude_json = Path.home() / ".claude.json"
-    if claude_json.exists():
-        try:
-            data = json.loads(claude_json.read_text(encoding="utf-8"))
-            primary_key = data.get("primaryApiKey", "")
-            if isinstance(primary_key, str) and primary_key.strip():
-                return primary_key.strip()
-        except (json.JSONDecodeError, OSError, IOError) as e:
-            logger.debug("Failed to read ~/.claude.json: %s", e)
-    return None
-
-
 def is_claude_code_token_valid(creds: Dict[str, Any]) -> bool:
     """Check if Claude Code credentials have a non-expired access token."""
     import time
@@ -1254,10 +1242,16 @@ def run_hermes_oauth_login_pure() -> Optional[Dict[str, Any]]:
     print()
 
     try:
-        webbrowser.open(auth_url)
-        print("  (Browser opened automatically)")
+        from hermes_cli.auth import _can_open_graphical_browser as _can_open_gui
     except Exception:
-        pass
+        _can_open_gui = lambda: True  # noqa: E731 — degrade to prior behavior
+
+    if _can_open_gui():
+        try:
+            webbrowser.open(auth_url)
+            print("  (Browser opened automatically)")
+        except Exception:
+            pass
 
     print()
     print("After authorizing, you'll see a code. Paste it below.")
diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index 1e6abb779e8..c5fd9a20aee 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -700,12 +700,20 @@ class _CodexCompletionsAdapter:
             # xAI's Responses endpoint rejects ``pattern`` and ``format`` JSON Schema
             # keywords (HTTP 400). Strip them here to match the parity guarantee that
             # chat_completion_helpers.py provides for the main-agent xAI path.
+            #
+            # Deep-copy before sanitizing — ``list(tools)`` is only a shallow
+            # copy of the outer list, but the sanitizers mutate the inner
+            # parameter dicts in place.  Without a deep copy the caller's
+            # tool registry permanently loses its slash-containing enum
+            # constraints after the first auxiliary xAI call.  See #27907.
             try:
+                import copy as _copy
                 from tools.schema_sanitizer import (
                     strip_pattern_and_format,
                     strip_slash_enum,
                 )
-                tools, _ = strip_pattern_and_format(list(tools))
+                tools = _copy.deepcopy(list(tools))
+                tools, _ = strip_pattern_and_format(tools)
                 tools, _ = strip_slash_enum(tools)
             except Exception as exc:
                 logger.warning(
@@ -1235,8 +1243,23 @@ def _read_nous_auth() -> Optional[dict]:
 
 
 def _nous_api_key(provider: dict) -> str:
-    """Extract the Nous runtime credential from the compatibility field."""
-    return provider.get("agent_key") or provider.get("access_token", "")
+    """Extract a usable Nous inference JWT from stored auth state."""
+    from hermes_cli.auth import _nous_invoke_jwt_is_usable
+
+    for token_key, expiry_key in (
+        ("agent_key", "agent_key_expires_at"),
+        ("access_token", "expires_at"),
+    ):
+        token = provider.get(token_key)
+        if not isinstance(token, str) or not token.strip():
+            continue
+        if _nous_invoke_jwt_is_usable(
+            token,
+            scope=provider.get("scope"),
+            expires_at=provider.get(expiry_key),
+        ):
+            return token
+    return ""
 
 
 def _nous_base_url() -> str:
@@ -1248,25 +1271,16 @@ def _resolve_nous_runtime_api(*, force_refresh: bool = False) -> Optional[tuple[
     """Return fresh Nous runtime credentials when available.
 
     This mirrors the main agent's 401 recovery path and keeps auxiliary
-    clients aligned with the singleton auth store + JWT/mint flow instead of
+    clients aligned with the singleton auth store + JWT refresh flow instead of
     relying only on whatever raw tokens happen to be sitting in auth.json
     or the credential pool.
     """
     try:
-        from hermes_cli.auth import (
-            NOUS_INFERENCE_AUTH_MODE_AUTO,
-            NOUS_INFERENCE_AUTH_MODE_LEGACY,
-            resolve_nous_runtime_credentials,
-        )
+        from hermes_cli.auth import resolve_nous_runtime_credentials
 
         creds = resolve_nous_runtime_credentials(
-            min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
             timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
-            inference_auth_mode=(
-                NOUS_INFERENCE_AUTH_MODE_LEGACY
-                if force_refresh
-                else NOUS_INFERENCE_AUTH_MODE_AUTO
-            ),
+            force_refresh=force_refresh,
         )
     except Exception as exc:
         logger.debug("Auxiliary Nous runtime credential resolution failed: %s", exc)
@@ -1550,13 +1564,9 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
         _mark_provider_unhealthy("nous", ttl=60)
         return None, None
     if runtime is None and nous:
-        # Runtime credential mint failed but stored Nous auth is still present.
-        # Falls back to the raw stored token below; surface a debug line so
-        # operators investigating expired/invalid sessions have a breadcrumb,
-        # without blocking the fallback path the rest of this function relies on.
         logger.debug(
-            "Auxiliary Nous: runtime credential mint failed; falling back to "
-            "stored auth.json token."
+            "Auxiliary Nous: runtime JWT refresh failed; checking stored "
+            "auth.json token."
         )
     global auxiliary_is_nous
     auxiliary_is_nous = True
@@ -1594,6 +1604,13 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
         api_key, base_url = runtime
     else:
         api_key = _nous_api_key(nous or {})
+        if not api_key:
+            logger.warning(
+                "Auxiliary Nous client unavailable: no usable inference JWT found "
+                "(run: hermes auth add nous)."
+            )
+            _mark_provider_unhealthy("nous", ttl=60)
+            return None, None
         base_url = str((nous or {}).get("inference_base_url") or _nous_base_url()).rstrip("/")
     return (
         OpenAI(
@@ -2244,11 +2261,15 @@ def _is_payment_error(exc: Exception) -> bool:
     # but sometimes wrap them in 429 or other codes.
     # Daily quota exhaustion from Bedrock, Vertex AI, and similar providers
     # uses different language but is semantically identical to credit exhaustion.
-    if status in {402, 429, None}:
+    if status in {402, 404, 429, None}:
         if any(kw in err_lower for kw in (
             "credits", "insufficient funds",
             "can only afford", "billing",
             "payment required",
+            "out of funds", "run out of funds",
+            "balance_depleted", "no usable credits",
+            "model_not_supported_on_free_tier",
+            "not available on the free tier",
             # Daily / monthly / weekly quota exhaustion keywords
             "quota exceeded", "quota_exceeded",
             "too many tokens per day", "daily limit",
@@ -2260,6 +2281,18 @@ def _is_payment_error(exc: Exception) -> bool:
     return False
 
 
+def _nous_portal_account_has_fresh_paid_access() -> bool:
+    """Return True only when the fresh Nous account API says paid access is allowed."""
+    try:
+        from hermes_cli.nous_account import get_nous_portal_account_info
+
+        account_info = get_nous_portal_account_info(force_fresh=True)
+        return account_info.paid_service_access is True
+    except Exception as exc:
+        logger.debug("Auxiliary Nous paid-entitlement refresh check failed: %s", exc)
+        return False
+
+
 def _is_rate_limit_error(exc: Exception) -> bool:
     """Detect rate-limit errors that warrant provider fallback.
 
@@ -2288,6 +2321,10 @@ def _is_rate_limit_error(exc: Exception) -> bool:
         if not any(kw in err_lower for kw in (
             "credits", "insufficient funds", "billing",
             "payment required", "can only afford",
+            "out of funds", "run out of funds",
+            "balance_depleted", "no usable credits",
+            "model_not_supported_on_free_tier",
+            "not available on the free tier",
         )):
             return True
     return False
@@ -2337,7 +2374,16 @@ def _is_auth_error(exc: Exception) -> bool:
     if status == 401:
         return True
     err_lower = str(exc).lower()
-    return "error code: 401" in err_lower or "authenticationerror" in type(exc).__name__.lower()
+    if "error code: 401" in err_lower or "authenticationerror" in type(exc).__name__.lower():
+        return True
+    # xAI returns HTTP 403 with "unauthenticated:bad-credentials" when an OAuth2
+    # access token has expired or is invalid — semantically a 401 auth failure,
+    # even though the status code is 403 (PermissionDenied).
+    if status == 403 and "bad-credentials" in err_lower:
+        return True
+    if "unauthenticated" in err_lower and "bad-credentials" in err_lower:
+        return True
+    return False
 
 
 def _is_unsupported_parameter_error(exc: Exception, param: str) -> bool:
@@ -2490,6 +2536,8 @@ def _recoverable_pool_provider(
         return "copilot"
     if base_url_host_matches(base, "api.kimi.com"):
         return "kimi-coding"
+    if base_url_host_matches(base, "api.x.ai"):
+        return "xai-oauth"
     # For api_key providers not in the hardcoded list (e.g. opencode-go), match
     # the client base URL against all registered api_key providers so that
     # credential-pool rotation works for any provider the user configured.
@@ -2686,15 +2734,11 @@ def _refresh_provider_credentials(provider: str) -> bool:
             _evict_cached_clients(normalized)
             return True
         if normalized == "nous":
-            from hermes_cli.auth import (
-                NOUS_INFERENCE_AUTH_MODE_LEGACY,
-                resolve_nous_runtime_credentials,
-            )
+            from hermes_cli.auth import resolve_nous_runtime_credentials
 
             creds = resolve_nous_runtime_credentials(
-                min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
                 timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
-                inference_auth_mode=NOUS_INFERENCE_AUTH_MODE_LEGACY,
+                force_refresh=True,
             )
             if not str(creds.get("api_key", "") or "").strip():
                 return False
@@ -2711,6 +2755,24 @@ def _refresh_provider_credentials(provider: str) -> bool:
                 return False
             _evict_cached_clients(normalized)
             return True
+        if normalized == "xai-oauth":
+            # Preference: pool-level refresh (uses refresh_token from pool entry),
+            # then fall back to singleton auth-store resolver.
+            pool = load_pool(normalized)
+            if pool and pool.has_credentials():
+                # Ensure a current entry is selected before trying to refresh.
+                pool.select()
+                refreshed = pool.try_refresh_current()
+                if refreshed is not None and str(getattr(refreshed, "runtime_api_key", "") or "").strip():
+                    _evict_cached_clients(normalized)
+                    return True
+            from hermes_cli.auth import resolve_xai_oauth_runtime_credentials
+
+            creds = resolve_xai_oauth_runtime_credentials(force_refresh=True)
+            if not str(creds.get("api_key", "") or "").strip():
+                return False
+            _evict_cached_clients(normalized)
+            return True
     except Exception as exc:
         logger.debug("Auxiliary provider credential refresh failed for %s: %s", normalized, exc)
         return False
@@ -4663,24 +4725,23 @@ def _build_call_kwargs(
         kwargs["temperature"] = temperature
 
     if max_tokens is not None:
-        # Codex adapter handles max_tokens internally; OpenRouter/Nous use max_tokens.
-        # Direct OpenAI api.openai.com with newer models needs max_completion_tokens.
-        # ZAI vision models (glm-4v-flash, glm-4v-plus, etc.) reject max_tokens with
-        # error code 1210 ("API 调用参数有误") on multimodal requests — skip it.
-        _model_lower = (model or "").lower()
-        _skip_max_tokens = (
-            provider == "zai"
-            and ("4v" in _model_lower or "5v" in _model_lower or "-v" in _model_lower)
+        # We do NOT cap output by default. Most chat-completions providers treat
+        # an omitted max_tokens as "use the model's max output", which is what we
+        # want for auxiliary tasks (compression summaries, titles, vision, etc.) —
+        # an explicit cap only risks truncating a summary or 400-ing on providers
+        # that reject the parameter outright (e.g. GitHub Copilot / newer OpenAI
+        # GPT-5 models require max_completion_tokens, not max_tokens; ZAI vision
+        # models reject it entirely with error 1210). Omitting it sidesteps all of
+        # those wire-format quirks at once.
+        #
+        # The one exception is the Anthropic Messages wire (MiniMax and any
+        # ``/anthropic`` endpoint reached through the OpenAI SDK wrapper), where
+        # max_tokens is a MANDATORY field — omitting it is a hard 400. Keep it only
+        # there.
+        _effective_base = base_url or (
+            _current_custom_base_url() if provider == "custom" else ""
         )
-        if _skip_max_tokens:
-            pass  # ZAI vision models do not accept max_tokens
-        elif provider == "custom":
-            custom_base = base_url or _current_custom_base_url()
-            if base_url_hostname(custom_base) == "api.openai.com":
-                kwargs["max_completion_tokens"] = max_tokens
-            else:
-                kwargs["max_tokens"] = max_tokens
-        else:
+        if _is_anthropic_compat_endpoint(provider, _effective_base):
             kwargs["max_tokens"] = max_tokens
 
     if tools:
@@ -4937,6 +4998,41 @@ def call_llm(
             resolved_provider == "nous"
             or base_url_host_matches(_base_info, "inference-api.nousresearch.com")
         )
+        if (
+            _is_payment_error(first_err)
+            and client_is_nous
+            and _nous_portal_account_has_fresh_paid_access()
+        ):
+            refreshed_client, refreshed_model = _refresh_nous_auxiliary_client(
+                cache_provider=resolved_provider or "nous",
+                model=final_model,
+                async_mode=False,
+                base_url=resolved_base_url,
+                api_key=resolved_api_key,
+                api_mode=resolved_api_mode,
+                main_runtime=main_runtime,
+                is_vision=(task == "vision"),
+            )
+            if refreshed_client is not None:
+                logger.info(
+                    "Auxiliary %s: refreshed Nous runtime credentials after paid account check, retrying",
+                    task or "call",
+                )
+                if refreshed_model and refreshed_model != kwargs.get("model"):
+                    kwargs["model"] = refreshed_model
+                try:
+                    return _validate_llm_response(
+                        refreshed_client.chat.completions.create(**kwargs), task)
+                except Exception as retry_err:
+                    if not (
+                        _is_auth_error(retry_err)
+                        or _is_payment_error(retry_err)
+                        or _is_connection_error(retry_err)
+                        or _is_rate_limit_error(retry_err)
+                    ):
+                        raise
+                    first_err = retry_err
+
         if _is_auth_error(first_err) and client_is_nous:
             refreshed_client, refreshed_model = _refresh_nous_auxiliary_client(
                 cache_provider=resolved_provider or "nous",
@@ -5339,6 +5435,40 @@ async def async_call_llm(
             resolved_provider == "nous"
             or base_url_host_matches(_client_base, "inference-api.nousresearch.com")
         )
+        if (
+            _is_payment_error(first_err)
+            and client_is_nous
+            and _nous_portal_account_has_fresh_paid_access()
+        ):
+            refreshed_client, refreshed_model = _refresh_nous_auxiliary_client(
+                cache_provider=resolved_provider or "nous",
+                model=final_model,
+                async_mode=True,
+                base_url=resolved_base_url,
+                api_key=resolved_api_key,
+                api_mode=resolved_api_mode,
+                is_vision=(task == "vision"),
+            )
+            if refreshed_client is not None:
+                logger.info(
+                    "Auxiliary %s (async): refreshed Nous runtime credentials after paid account check, retrying",
+                    task or "call",
+                )
+                if refreshed_model and refreshed_model != kwargs.get("model"):
+                    kwargs["model"] = refreshed_model
+                try:
+                    return _validate_llm_response(
+                        await refreshed_client.chat.completions.create(**kwargs), task)
+                except Exception as retry_err:
+                    if not (
+                        _is_auth_error(retry_err)
+                        or _is_payment_error(retry_err)
+                        or _is_connection_error(retry_err)
+                        or _is_rate_limit_error(retry_err)
+                    ):
+                        raise
+                    first_err = retry_err
+
         if _is_auth_error(first_err) and client_is_nous:
             refreshed_client, refreshed_model = _refresh_nous_auxiliary_client(
                 cache_provider=resolved_provider or "nous",
diff --git a/agent/background_review.py b/agent/background_review.py
index 35d3d5191a0..bf99ee52845 100644
--- a/agent/background_review.py
+++ b/agent/background_review.py
@@ -483,6 +483,11 @@ def _run_review_in_thread(
             finally:
                 clear_thread_tool_whitelist()
 
+            # Snapshot review actions before teardown. close() is allowed to
+            # clean per-session state, but the user-visible self-improvement
+            # summary still needs the completed review agent's tool results.
+            review_messages = list(getattr(review_agent, "_session_messages", []))
+
             # Tear down memory providers while stdout is still
             # redirected so background thread teardown (Honcho flush,
             # Hindsight sync, etc.) stays silent.  The finally block
@@ -495,7 +500,6 @@ def _run_review_in_thread(
                 review_agent.close()
             except Exception:
                 pass
-            review_messages = list(getattr(review_agent, "_session_messages", []))
             review_agent = None
 
         # Scan the review agent's messages for successful tool actions
diff --git a/agent/bedrock_adapter.py b/agent/bedrock_adapter.py
index 620d1c99785..12c7afb8c18 100644
--- a/agent/bedrock_adapter.py
+++ b/agent/bedrock_adapter.py
@@ -1167,18 +1167,6 @@ def _extract_provider_from_arn(arn: str) -> str:
     """
     match = re.search(r"foundation-model/([^.]+)", arn)
     return match.group(1) if match else ""
-
-
-def get_bedrock_model_ids(region: str) -> List[str]:
-    """Return a flat list of available Bedrock model IDs for the given region.
-
-    Convenience wrapper around ``discover_bedrock_models()`` for use in
-    the model selection UI.
-    """
-    models = discover_bedrock_models(region)
-    return [m["id"] for m in models]
-
-
 # ---------------------------------------------------------------------------
 # Error classification — Bedrock-specific exceptions
 # ---------------------------------------------------------------------------
diff --git a/agent/browser_registry.py b/agent/browser_registry.py
index db608744b34..122eab4e565 100644
--- a/agent/browser_registry.py
+++ b/agent/browser_registry.py
@@ -186,37 +186,6 @@ def _resolve(configured: Optional[str]) -> Optional[BrowserProvider]:
     return None
 
 
-def get_active_browser_provider() -> Optional[BrowserProvider]:
-    """Resolve the currently-active cloud browser provider.
-
-    Reads ``browser.cloud_provider`` from config.yaml; falls back per the
-    module docstring. Returns None for local mode or when no provider is
-    available.
-    """
-    try:
-        from hermes_cli.config import read_raw_config
-
-        cfg = read_raw_config()
-        browser_cfg = cfg.get("browser", {})
-    except Exception as exc:
-        logger.debug("Could not read browser config: %s", exc)
-        browser_cfg = {}
-
-    configured: Optional[str] = None
-    if isinstance(browser_cfg, dict) and "cloud_provider" in browser_cfg:
-        try:
-            from tools.tool_backend_helpers import normalize_browser_cloud_provider
-
-            configured = normalize_browser_cloud_provider(
-                browser_cfg.get("cloud_provider")
-            )
-        except Exception as exc:
-            logger.debug("normalize_browser_cloud_provider failed: %s", exc)
-            configured = None
-
-    return _resolve(configured)
-
-
 def _reset_for_tests() -> None:
     """Clear the registry. **Test-only.**"""
     with _lock:
diff --git a/agent/chat_completion_helpers.py b/agent/chat_completion_helpers.py
index ce83dd04907..0785347d2c9 100644
--- a/agent/chat_completion_helpers.py
+++ b/agent/chat_completion_helpers.py
@@ -15,49 +15,23 @@ sites unchanged.  Symbols that tests patch on ``run_agent`` (e.g.
 
 from __future__ import annotations
 
-import concurrent.futures
-import contextvars
-import copy
 import json
 import logging
 import os
-import random
 import re
-import sys
 import threading
 import time
 import uuid
-from datetime import datetime
-from pathlib import Path
 from types import SimpleNamespace
-from typing import Any, Dict, List, Optional, Tuple
-from urllib.parse import urlparse, parse_qs, urlunparse
+from typing import Any, Dict, Optional
 
 from hermes_cli.timeouts import get_provider_request_timeout, get_provider_stale_timeout
 from hermes_constants import PARTIAL_STREAM_STUB_ID, FINISH_REASON_LENGTH
-from agent.error_classifier import classify_api_error, FailoverReason
+from agent.error_classifier import FailoverReason
 from agent.model_metadata import is_local_endpoint
 from agent.message_sanitization import (
     _sanitize_surrogates,
-    _sanitize_messages_surrogates,
-    _sanitize_structure_surrogates,
-    _sanitize_messages_non_ascii,
-    _sanitize_tools_non_ascii,
-    _sanitize_structure_non_ascii,
-    _strip_images_from_messages,
-    _strip_non_ascii,
     _repair_tool_call_arguments,
-    _escape_invalid_chars_in_json_strings,
-)
-from agent.tool_dispatch_helpers import (
-    _is_multimodal_tool_result,
-    _multimodal_text_summary,
-)
-from agent.retry_utils import jittered_backoff
-from agent.tool_guardrails import (
-    ToolGuardrailDecision,
-    append_toolguard_guidance,
-    toolguard_synthetic_result,
 )
 from tools.terminal_tool import is_persistent_env
 from utils import base_url_host_matches, base_url_hostname
@@ -175,13 +149,6 @@ def interruptible_api_call(agent, api_kwargs: dict):
             request_client_holder["owner_tid"] = threading.get_ident()
         return client
 
-    def _take_request_client():
-        with request_client_lock:
-            client = request_client_holder.get("client")
-            request_client_holder["client"] = None
-            request_client_holder["owner_tid"] = None
-            return client
-
     def _close_request_client_once(reason: str) -> None:
         # #29507: dispatch on the calling thread.
         #
@@ -310,8 +277,15 @@ def interruptible_api_call(agent, api_kwargs: dict):
     else:
         _codex_idle_timeout_default = 12.0
 
+    # No-byte TTFB cutoff. The OpenAI SDK's own streaming read timeout is far
+    # longer (openai 2.x DEFAULT_TIMEOUT.read = 600s), so a tight 12s default
+    # killed subscription-backed Codex requests mid-prefill before the backend
+    # had a chance to emit its first SSE event. Default to 120s — long enough to
+    # clear normal backend admission / prompt prefill, short enough to still
+    # reconnect promptly when the socket is genuinely wedged. Set
+    # HERMES_CODEX_TTFB_TIMEOUT_SECONDS=0 to disable this watchdog entirely.
     _ttfb_enabled = _codex_watchdog_enabled
-    _ttfb_timeout = _env_float("HERMES_CODEX_TTFB_TIMEOUT_SECONDS", 12.0)
+    _ttfb_timeout = _env_float("HERMES_CODEX_TTFB_TIMEOUT_SECONDS", 120.0)
     if _ttfb_timeout <= 0:
         _ttfb_enabled = False
     elif _openai_codex_backend:
@@ -333,7 +307,7 @@ def interruptible_api_call(agent, api_kwargs: dict):
                 _ttfb_disable_above,
             )
         else:
-            _ttfb_cap = _env_float("HERMES_CODEX_TTFB_MAX_SECONDS", 20.0)
+            _ttfb_cap = _env_float("HERMES_CODEX_TTFB_MAX_SECONDS", 120.0)
             if _ttfb_cap > 0 and _ttfb_timeout > _ttfb_cap:
                 logger.info(
                     "Capping openai-codex no-byte TTFB timeout from %.0fs to %.0fs "
@@ -403,13 +377,13 @@ def interruptible_api_call(agent, api_kwargs: dict):
                 _elapsed, _ttfb_timeout, api_kwargs.get("model", "unknown"),
             )
             if _silent_hint:
-                agent._emit_status(
+                agent._buffer_status(
                     f"⚠️ No first byte from provider in {int(_elapsed)}s "
                     f"(codex stream, model: {api_kwargs.get('model', 'unknown')}). "
                     f"Reconnecting. {_silent_hint}"
                 )
             else:
-                agent._emit_status(
+                agent._buffer_status(
                     f"⚠️ No first byte from provider in {int(_elapsed)}s "
                     f"(codex stream, model: {api_kwargs.get('model', 'unknown')}). "
                     f"Reconnecting."
@@ -455,7 +429,7 @@ def interruptible_api_call(agent, api_kwargs: dict):
                 api_kwargs.get("model", "unknown"),
                 f"{_est_tokens_for_codex_watchdog:,}",
             )
-            agent._emit_status(
+            agent._buffer_status(
                 f"⚠️ Codex stream sent no events for {int(_event_stale_elapsed)}s "
                 f"after first byte (model: {api_kwargs.get('model', 'unknown')}). "
                 f"Reconnecting."
@@ -493,13 +467,13 @@ def interruptible_api_call(agent, api_kwargs: dict):
                 api_kwargs.get("model", "unknown"), f"{_est_ctx:,}",
             )
             if _silent_hint:
-                agent._emit_status(
+                agent._buffer_status(
                     f"⚠️ No response from provider for {int(_elapsed)}s "
                     f"(non-streaming, model: {api_kwargs.get('model', 'unknown')}). "
                     f"{_silent_hint}"
                 )
             else:
-                agent._emit_status(
+                agent._buffer_status(
                     f"⚠️ No response from provider for {int(_elapsed)}s "
                     f"(non-streaming, model: {api_kwargs.get('model', 'unknown')}). "
                     f"Aborting call."
@@ -614,12 +588,23 @@ def build_api_kwargs(agent, api_messages: list) -> dict:
         # It also rejects ``enum`` values containing ``/`` (HuggingFace IDs
         # like ``Qwen/Qwen3.5-0.8B`` shipped by MCP servers) — same 400 with
         # the same opaque message; strip those enums too.
+        #
+        # Deep-copy ``tools_for_api`` before sanitizing: the sanitizers
+        # mutate in place (documented contract on ``strip_slash_enum`` /
+        # ``strip_pattern_and_format``), and ``tools_for_api`` is a direct
+        # reference to ``agent.tools``.  Without the copy, the first xAI
+        # request permanently strips constraints from the shared per-agent
+        # tool registry — every subsequent non-xAI call from the same
+        # agent (auxiliary task routed to Anthropic, OpenRouter fallback,
+        # main-model swap) sees the already-stripped schema.  See #27907.
         if is_xai_responses:
             try:
+                import copy as _copy
                 from tools.schema_sanitizer import (
                     strip_pattern_and_format,
                     strip_slash_enum,
                 )
+                tools_for_api = _copy.deepcopy(tools_for_api)
                 tools_for_api, _ = strip_pattern_and_format(tools_for_api)
                 tools_for_api, _ = strip_slash_enum(tools_for_api)
             except Exception as exc:
@@ -1262,7 +1247,7 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
                 api_mode=agent.api_mode,
             )
 
-        agent._emit_status(
+        agent._buffer_status(
             f"🔄 Primary model failed — switching to fallback: "
             f"{fb_model} via {fb_provider}"
         )
@@ -1636,13 +1621,6 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
             request_client_holder["owner_tid"] = threading.get_ident()
         return client
 
-    def _take_request_client():
-        with request_client_lock:
-            client = request_client_holder.get("client")
-            request_client_holder["client"] = None
-            request_client_holder["owner_tid"] = None
-            return client
-
     def _close_request_client_once(reason: str) -> None:
         # See #29507 explanation in the non-streaming variant above. A
         # stranger thread (the interrupt-check / stale-stream detector loop)
@@ -2251,7 +2229,7 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
                             mid_tool_call=False,
                             diag=request_client_holder.get("diag"),
                         )
-                        agent._emit_status(
+                        agent._buffer_status(
                             "❌ Provider returned malformed streaming data after "
                             f"{_max_stream_retries + 1} attempts. "
                             "The provider may be experiencing issues — "
@@ -2358,7 +2336,7 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
                 _stale_elapsed, _stream_stale_timeout,
                 api_kwargs.get("model", "unknown"), f"{_est_ctx:,}",
             )
-            agent._emit_status(
+            agent._buffer_status(
                 f"⚠️ No response from provider for {int(_stale_elapsed)}s "
                 f"(model: {api_kwargs.get('model', 'unknown')}, "
                 f"context: ~{_est_ctx:,} tokens). "
diff --git a/agent/codex_responses_adapter.py b/agent/codex_responses_adapter.py
index 230a6e613b1..943131f5592 100644
--- a/agent/codex_responses_adapter.py
+++ b/agent/codex_responses_adapter.py
@@ -980,6 +980,48 @@ def _extract_responses_reasoning_text(item: Any) -> str:
     return ""
 
 
+def _format_responses_error(error_obj: Any, response_status: str) -> str:
+    """Build a human-readable error string from a Responses ``response.error`` payload.
+
+    The OpenAI Responses API carries failure details under ``response.error``
+    on terminal ``response.failed`` events, in the shape
+    ``{"code": "rate_limit_exceeded", "message": "Slow down", "param": ...}``.
+    Earlier code only surfaced ``message``, which left users staring at bare
+    strings like ``"Slow down"`` while the failure mode (rate limit vs
+    context-length vs internal_error vs model-overloaded) was hidden in
+    ``code``. We now prefix ``code`` when both are present so consumers can
+    distinguish failure modes without parsing the bare message.
+
+    Falls back to ``code`` alone when ``message`` is empty, and to a stable
+    default referencing the response status when no error payload is
+    available at all. Adapted from anomalyco/opencode#28757.
+    """
+    # Pull code and message from either dict or attribute-style payloads.
+    code: Any = None
+    message: Any = None
+    if isinstance(error_obj, dict):
+        code = error_obj.get("code")
+        message = error_obj.get("message")
+    elif error_obj is not None:
+        code = getattr(error_obj, "code", None)
+        message = getattr(error_obj, "message", None)
+
+    code_str = str(code).strip() if isinstance(code, str) else (str(code).strip() if code else "")
+    message_str = str(message).strip() if isinstance(message, str) else (str(message).strip() if message else "")
+
+    if code_str and message_str:
+        return f"{code_str}: {message_str}"
+    if message_str:
+        return message_str
+    if code_str:
+        return code_str
+    if error_obj:
+        # Last-resort: stringify whatever the provider sent so it's at least
+        # visible in logs/UI rather than silently swallowed.
+        return str(error_obj)
+    return f"Responses API returned status '{response_status}'"
+
+
 # ---------------------------------------------------------------------------
 # Full response normalization
 # ---------------------------------------------------------------------------
@@ -1023,10 +1065,7 @@ def _normalize_codex_response(
 
     if response_status in {"failed", "cancelled"}:
         error_obj = getattr(response, "error", None)
-        if isinstance(error_obj, dict):
-            error_msg = error_obj.get("message") or str(error_obj)
-        else:
-            error_msg = str(error_obj) if error_obj else f"Responses API returned status '{response_status}'"
+        error_msg = _format_responses_error(error_obj, response_status)
         raise RuntimeError(error_msg)
 
     content_parts: List[str] = []
diff --git a/agent/codex_runtime.py b/agent/codex_runtime.py
index e2bcbfc824b..398deed3c16 100644
--- a/agent/codex_runtime.py
+++ b/agent/codex_runtime.py
@@ -16,7 +16,6 @@ compatibility.
 
 from __future__ import annotations
 
-import json
 import logging
 import os
 import time
diff --git a/agent/context_compressor.py b/agent/context_compressor.py
index f435d92e7f9..cf9c534decd 100644
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -75,6 +75,44 @@ _IMAGE_TOKEN_ESTIMATE = 1600
 _IMAGE_CHAR_EQUIVALENT = _IMAGE_TOKEN_ESTIMATE * _CHARS_PER_TOKEN
 _SUMMARY_FAILURE_COOLDOWN_SECONDS = 600
 
+# Hard ceiling for the deterministic summary-failure handoff.  The fallback is
+# only meant to preserve continuity anchors from the dropped window, not to
+# become another unbounded transcript copy after the LLM summarizer failed.
+_FALLBACK_SUMMARY_MAX_CHARS = 8_000
+_FALLBACK_TURN_MAX_CHARS = 700
+
+
+_PATH_MENTION_RE = re.compile(r"(?:/|~/?|[A-Za-z]:\\)[^\s`'\")\]}<>]+")
+
+
+def _dedupe_append(items: list[str], value: str, *, limit: int) -> None:
+    value = value.strip()
+    if value and value not in items and len(items) < limit:
+        items.append(value)
+
+
+def _extract_tool_call_name_and_args(tool_call: Any) -> tuple[str, str]:
+    """Return a best-effort ``(name, arguments)`` pair for dict/object tool calls."""
+    if isinstance(tool_call, dict):
+        fn = tool_call.get("function") or {}
+        return str(fn.get("name") or "unknown"), str(fn.get("arguments") or "")
+
+    fn = getattr(tool_call, "function", None)
+    if fn is None:
+        return "unknown", ""
+    return str(getattr(fn, "name", None) or "unknown"), str(getattr(fn, "arguments", None) or "")
+
+
+def _extract_tool_call_id(tool_call: Any) -> str:
+    if isinstance(tool_call, dict):
+        return str(tool_call.get("id") or "")
+    return str(getattr(tool_call, "id", "") or "")
+
+
+def _collect_path_mentions(text: str, relevant_files: list[str], *, limit: int = 12) -> None:
+    for match in _PATH_MENTION_RE.findall(text):
+        _dedupe_append(relevant_files, match.rstrip(".,:;"), limit=limit)
+
 
 def _content_length_for_budget(raw_content: Any) -> int:
     """Return the effective char-length of a message's content for token budgeting.
@@ -221,6 +259,114 @@ def _truncate_tool_call_args_json(args: str, head_chars: int = 200) -> str:
     return json.dumps(shrunken, ensure_ascii=False)
 
 
+_IMAGE_PART_TYPES = frozenset({"image_url", "input_image", "image"})
+
+
+def _is_image_part(part: Any) -> bool:
+    """True if ``part`` is a multimodal image content block.
+
+    Recognizes all three shapes the agent handles:
+      - OpenAI chat.completions: ``{"type": "image_url", "image_url": ...}``
+      - OpenAI Responses API:    ``{"type": "input_image", "image_url": "..."}``
+      - Anthropic native:        ``{"type": "image", "source": {...}}``
+    """
+    if not isinstance(part, dict):
+        return False
+    return part.get("type") in _IMAGE_PART_TYPES
+
+
+def _content_has_images(content: Any) -> bool:
+    """True if a message's ``content`` is a multimodal list with image parts."""
+    if not isinstance(content, list):
+        return False
+    return any(_is_image_part(p) for p in content)
+
+
+def _strip_images_from_content(content: Any) -> Any:
+    """Return a copy of ``content`` with every image part replaced by a
+    short text placeholder.
+
+    - String content is returned unchanged.
+    - Non-list, non-string content is returned unchanged.
+    - List content: image parts become ``{"type": "text", "text": "[Attached
+      image — stripped after compression]"}``; other parts are preserved as-is.
+
+    Input is never mutated.
+    """
+    if not isinstance(content, list):
+        return content
+    if not any(_is_image_part(p) for p in content):
+        return content
+
+    new_parts: List[Any] = []
+    for p in content:
+        if _is_image_part(p):
+            new_parts.append({
+                "type": "text",
+                "text": "[Attached image — stripped after compression]",
+            })
+        else:
+            new_parts.append(p)
+    return new_parts
+
+
+def _strip_historical_media(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """Replace image parts in older messages with placeholder text.
+
+    The anchor is the *last* user message that has any image content. Every
+    message before that anchor gets its image parts replaced with a short
+    placeholder so the outgoing request stops re-shipping the same multi-MB
+    base-64 image blobs on every turn.
+
+    If no user message carries images, the list is returned unchanged.
+    If the only user message with images is the very first one (nothing
+    earlier to strip), the list is returned unchanged.
+
+    Shallow copies of touched messages only; input is never mutated.
+    Port of Kilo-Org/kilocode#9434 (adapted for the OpenAI-style message
+    shape the hermes compressor emits).
+    """
+    if not messages:
+        return messages
+
+    # Find the newest user message that carries at least one image part.
+    # We anchor on image-bearing user messages (not all user messages) so
+    # a plain text follow-up after a big-image turn still strips the old
+    # image — matching the problem kilocode#9434 set out to solve.
+    anchor = -1
+    for i in range(len(messages) - 1, -1, -1):
+        msg = messages[i]
+        if not isinstance(msg, dict):
+            continue
+        if msg.get("role") != "user":
+            continue
+        if _content_has_images(msg.get("content")):
+            anchor = i
+            break
+
+    if anchor <= 0:
+        # No image-bearing user message, or it's the very first message —
+        # nothing before it to strip.
+        return messages
+
+    changed = False
+    result: List[Dict[str, Any]] = []
+    for i, msg in enumerate(messages):
+        if i >= anchor or not isinstance(msg, dict):
+            result.append(msg)
+            continue
+        content = msg.get("content")
+        if not _content_has_images(content):
+            result.append(msg)
+            continue
+        new_msg = msg.copy()
+        new_msg["content"] = _strip_images_from_content(content)
+        result.append(new_msg)
+        changed = True
+
+    return result if changed else messages
+
+
 def _summarize_tool_result(tool_name: str, tool_args: str, tool_content: str) -> str:
     """Create an informative 1-line summary of a tool call + result.
 
@@ -372,6 +518,10 @@ class ContextCompressor(ContextEngine):
         self._last_compression_savings_pct = 100.0
         self._ineffective_compression_count = 0
         self._summary_failure_cooldown_until = 0.0  # transient errors must not block a fresh session
+        self.last_real_prompt_tokens = 0
+        self.last_compression_rough_tokens = 0
+        self.last_rough_tokens_when_real_prompt_fit = 0
+        self.awaiting_real_usage_after_compression = False
 
     def update_model(
         self,
@@ -429,8 +579,8 @@ class ContextCompressor(ContextEngine):
         self.quiet_mode = quiet_mode
         # When True, summary-generation failure aborts compression entirely
         # (returns messages unchanged, sets _last_compress_aborted=True).
-        # When False (default = historical behavior), insert a static
-        # "summary unavailable" placeholder and drop the middle window.
+        # When False (default = historical behavior), insert a
+        # deterministic "summary unavailable" handoff and drop the middle window.
         self.abort_on_summary_failure = abort_on_summary_failure
 
         self.context_length = get_model_context_length(
@@ -469,6 +619,10 @@ class ContextCompressor(ContextEngine):
 
         self.last_prompt_tokens = 0
         self.last_completion_tokens = 0
+        self.last_real_prompt_tokens = 0
+        self.last_compression_rough_tokens = 0
+        self.last_rough_tokens_when_real_prompt_fit = 0
+        self.awaiting_real_usage_after_compression = False
 
         self.summary_model = summary_model_override or ""
 
@@ -502,6 +656,44 @@ class ContextCompressor(ContextEngine):
         self.last_prompt_tokens = usage.get("prompt_tokens", 0)
         self.last_completion_tokens = usage.get("completion_tokens", 0)
         self.last_total_tokens = usage.get("total_tokens", self.last_prompt_tokens + self.last_completion_tokens)
+        if self.last_prompt_tokens > 0:
+            self.last_real_prompt_tokens = self.last_prompt_tokens
+            if self.last_prompt_tokens < self.threshold_tokens:
+                if self.awaiting_real_usage_after_compression and self.last_compression_rough_tokens > 0:
+                    self.last_rough_tokens_when_real_prompt_fit = self.last_compression_rough_tokens
+            else:
+                self.last_rough_tokens_when_real_prompt_fit = 0
+        self.awaiting_real_usage_after_compression = False
+
+    def should_defer_preflight_to_real_usage(self, rough_tokens: int) -> bool:
+        """Return True when a high rough preflight estimate is known-noisy.
+
+        ``estimate_request_tokens_rough(..., tools=...)`` intentionally
+        overestimates schema-heavy requests so Hermes compresses before a
+        provider rejects the payload. After a successful compressed API call,
+        though, provider ``prompt_tokens`` are a better signal than repeating
+        compaction from the same rough schema overhead. Defer only while the
+        rough estimate has grown modestly since a request the provider proved
+        fit under the threshold.
+        """
+        if rough_tokens < self.threshold_tokens:
+            return False
+        if self.last_real_prompt_tokens <= 0:
+            return False
+        if self.last_real_prompt_tokens >= self.threshold_tokens:
+            return False
+
+        baseline = self.last_rough_tokens_when_real_prompt_fit or self.last_compression_rough_tokens
+        if baseline <= 0:
+            return False
+
+        growth = max(0, rough_tokens - baseline)
+        tolerated_growth = max(4096, int(self.threshold_tokens * 0.05))
+        if growth > tolerated_growth:
+            return False
+
+        self.last_rough_tokens_when_real_prompt_fit = max(baseline, rough_tokens)
+        return True
 
     def should_compress(self, prompt_tokens: int = None) -> bool:
         """Check if context exceeds the compression threshold.
@@ -776,6 +968,195 @@ class ContextCompressor(ContextEngine):
 
         return "\n\n".join(parts)
 
+    def _build_static_fallback_summary(
+        self,
+        turns_to_summarize: List[Dict[str, Any]],
+        reason: str | None = None,
+    ) -> str:
+        """Build a deterministic handoff when the LLM summarizer is unavailable.
+
+        This is intentionally much less rich than an LLM-written summary, but it
+        is still better than a bare "N messages were removed" marker.  It keeps
+        the most useful continuity anchors that can be extracted locally:
+        recent user asks, assistant/tool actions, files/commands mentioned in
+        tool calls, and any error text.  The result uses the normal summary
+        structure so downstream prompts can recover gracefully after a provider
+        outage or summary-model failure.
+        """
+        user_asks: list[str] = []
+        assistant_actions: list[str] = []
+        tool_actions: list[str] = []
+        relevant_files: list[str] = []
+        blockers: list[str] = []
+        last_dropped_turns: list[str] = []
+
+        def _compact_fallback_turn(value: Any) -> str:
+            text = redact_sensitive_text(_content_text_for_contains(value))
+            text = re.sub(r"\bgh[pousr]_[A-Za-z0-9_]{8,}\b", "[REDACTED]", text)
+            text = re.sub(r"\s+", " ", text).strip()
+            if len(text) > _FALLBACK_TURN_MAX_CHARS:
+                text = text[: _FALLBACK_TURN_MAX_CHARS - 15].rstrip() + " ...[truncated]"
+            return re.sub(r"\bgh[pousr]_[A-Za-z0-9_.-]+", "[REDACTED]", text)
+
+        def _remember_dropped_turn(label: str, text: str, *, limit: int = 8) -> None:
+            text = text.strip()
+            if not text:
+                return
+            last_dropped_turns.append(f"{label}: {text}")
+            if len(last_dropped_turns) > limit:
+                del last_dropped_turns[0]
+
+        def _collect_paths_from_jsonish(obj: Any) -> None:
+            if isinstance(obj, dict):
+                for key, val in obj.items():
+                    if key in {"path", "workdir", "file_path", "output_path"} and isinstance(val, str):
+                        _dedupe_append(relevant_files, val, limit=12)
+                    _collect_paths_from_jsonish(val)
+            elif isinstance(obj, list):
+                for val in obj:
+                    _collect_paths_from_jsonish(val)
+            elif isinstance(obj, str):
+                _collect_path_mentions(obj, relevant_files)
+
+        call_id_to_tool: dict[str, tuple[str, str]] = {}
+        for msg in turns_to_summarize:
+            if msg.get("role") == "assistant" and msg.get("tool_calls"):
+                for tc in msg.get("tool_calls") or []:
+                    name, raw_args = _extract_tool_call_name_and_args(tc)
+                    args = redact_sensitive_text(raw_args)
+                    call_id = _extract_tool_call_id(tc)
+                    if call_id:
+                        call_id_to_tool[call_id] = (name, args)
+                    if args:
+                        try:
+                            parsed = json.loads(args)
+                        except Exception:
+                            parsed = args
+                        _collect_paths_from_jsonish(parsed)
+
+        for msg in turns_to_summarize:
+            role = msg.get("role", "unknown")
+            text = _compact_fallback_turn(msg.get("content"))
+            _collect_path_mentions(text, relevant_files)
+
+            turn_text = text
+            turn_tool_names: list[str] = []
+            if role == "assistant" and msg.get("tool_calls"):
+                for tc in msg.get("tool_calls") or []:
+                    name, _args = _extract_tool_call_name_and_args(tc)
+                    turn_tool_names.append(name)
+                if turn_tool_names:
+                    prefix = "tool calls: " + ", ".join(turn_tool_names[:6])
+                    turn_text = f"{prefix}; {turn_text}" if turn_text else prefix
+            _remember_dropped_turn(str(role).upper(), turn_text)
+
+            if len(text) > 600:
+                text = text[:420].rstrip() + " ... " + text[-160:].lstrip()
+
+            if role == "user" and text:
+                user_asks.append(text)
+            elif role == "assistant":
+                tool_names: list[str] = []
+                for tc in msg.get("tool_calls") or []:
+                    name, _args = _extract_tool_call_name_and_args(tc)
+                    tool_names.append(name)
+                if tool_names:
+                    assistant_actions.append(
+                        "Called tool(s): " + ", ".join(tool_names[:6])
+                    )
+                elif text:
+                    assistant_actions.append(text)
+            elif role == "tool":
+                call_id = str(msg.get("tool_call_id") or "")
+                tool_name, tool_args = call_id_to_tool.get(call_id, ("unknown", ""))
+                tool_actions.append(
+                    _summarize_tool_result(tool_name, tool_args, text or "")
+                )
+                if re.search(
+                    r"\b(error|failed|exception|traceback|timeout|timed out|fatal)\b",
+                    text,
+                    re.I,
+                ):
+                    blockers.append(text[:500])
+
+        def _bullets(items: list[str], limit: int = 8) -> str:
+            unique: list[str] = []
+            seen: set[str] = set()
+            for item in items:
+                item = item.strip()
+                if not item or item in seen:
+                    continue
+                seen.add(item)
+                unique.append(item)
+                if len(unique) >= limit:
+                    break
+            return "\n".join(f"- {item}" for item in unique) if unique else "None."
+
+        completed: list[str] = []
+        for idx, item in enumerate((assistant_actions + tool_actions)[:12], start=1):
+            completed.append(f"{idx}. {item}")
+
+        active_task = (
+            f"User asked: {user_asks[-1]!r}"
+            if user_asks
+            else "Unknown from deterministic fallback."
+        )
+        previous_summary_note = ""
+        if self._previous_summary:
+            previous_summary_note = (
+                "\n\nPrevious compaction summary was present and should still be treated as "
+                "background continuity context, but the latest LLM summary update failed."
+            )
+
+        reason_text = f" Summary failure reason: {reason}." if reason else ""
+        body = f"""## Active Task
+{active_task}
+
+## Goal
+Recovered from a deterministic fallback because the LLM context summarizer was unavailable. Continue from the protected recent messages after this summary and use current file/system state for exact details.{previous_summary_note}
+
+## Constraints & Preferences
+- This fallback was generated locally without an LLM summary call.
+- Secrets and credentials were redacted before preservation.
+- The summary may be incomplete; prefer verifying current files, git state, processes, and test results instead of assuming omitted details.
+
+## Completed Actions
+{chr(10).join(completed) if completed else "None recoverable from compacted turns."}
+
+## Active State
+Unknown from deterministic fallback. Inspect current repository/session state if needed.
+
+## In Progress
+{active_task}
+
+## Blocked
+{_bullets(blockers, limit=5)}
+
+## Key Decisions
+None recoverable from deterministic fallback.
+
+## Resolved Questions
+None recoverable from deterministic fallback.
+
+## Pending User Asks
+{active_task}
+
+## Relevant Files
+{_bullets(relevant_files, limit=12)}
+
+## Remaining Work
+Continue from the most recent unfulfilled user ask and protected tail messages. Verify state with tools before making claims.
+
+## Last Dropped Turns
+{_bullets(last_dropped_turns, limit=8)}
+
+## Critical Context
+Summary generation was unavailable, so this is a best-effort deterministic fallback for {len(turns_to_summarize)} compacted message(s).{reason_text}"""
+        summary = self._with_summary_prefix(redact_sensitive_text(body.strip()))
+        if len(summary) > _FALLBACK_SUMMARY_MAX_CHARS:
+            summary = summary[: _FALLBACK_SUMMARY_MAX_CHARS - 42].rstrip() + "\n...[fallback summary truncated]"
+        return summary
+
     def _fallback_to_main_for_compression(self, e: Exception, reason: str) -> None:
         """Switch from a separate ``summary_model`` back to the main model.
 
@@ -803,7 +1184,11 @@ class ContextCompressor(ContextEngine):
         self.summary_model = ""  # empty = use main model
         self._summary_failure_cooldown_until = 0.0  # no cooldown — retry immediately
 
-    def _generate_summary(self, turns_to_summarize: List[Dict[str, Any]], focus_topic: str = None) -> Optional[str]:
+    def _generate_summary(
+        self,
+        turns_to_summarize: List[Dict[str, Any]],
+        focus_topic: Optional[str] = None,
+    ) -> Optional[str]:
         """Generate a structured summary of conversation turns.
 
         Uses a structured template (Goal, Progress, Decisions, Resolved/Pending
@@ -1500,9 +1885,9 @@ The user has requested that this compaction PRIORITISE preserving all informatio
         #   True  → ABORT compression entirely. Return messages unchanged
         #           and set _last_compress_aborted=True so callers can warn
         #           the user and stop the auto-compress retry loop.
-        #   False → Fall through to the legacy fallback path below: insert
-        #           a static "summary unavailable" placeholder and drop the
-        #           middle window.  Records _last_summary_fallback_used /
+        #   False → Fall through to the default fallback path below: insert
+        #           a deterministic "summary unavailable" handoff and drop
+        #           the middle window.  Records _last_summary_fallback_used /
         #           _last_summary_dropped_count for gateway hygiene to
         #           surface a warning.
         # Default is False (historical behavior).
@@ -1535,21 +1920,18 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                     )
             compressed.append(msg)
 
-        # Legacy fallback path: LLM summary failed and abort_on_summary_failure
-        # is False (the default).  Insert a static placeholder so the model
-        # knows context was lost rather than silently dropping everything.
+        # If LLM summary failed, insert a deterministic fallback so the model
+        # gets at least locally recoverable continuity anchors instead of a
+        # content-free "N messages were removed" marker.
         if not summary:
             if not self.quiet_mode:
-                logger.warning("Summary generation failed — inserting static fallback context marker")
+                logger.warning("Summary generation failed — inserting deterministic fallback context summary")
             n_dropped = compress_end - compress_start
             self._last_summary_dropped_count = n_dropped
             self._last_summary_fallback_used = True
-            summary = (
-                f"{SUMMARY_PREFIX}\n"
-                f"Summary generation was unavailable. {n_dropped} message(s) were "
-                f"removed to free context space but could not be summarized. The removed "
-                f"messages contained earlier work in this session. Continue based on the "
-                f"recent messages below and the current state of any files or resources."
+            summary = self._build_static_fallback_summary(
+                turns_to_summarize,
+                reason=self._last_summary_error,
             )
 
         _merge_summary_into_tail = False
@@ -1609,6 +1991,14 @@ The user has requested that this compaction PRIORITISE preserving all informatio
 
         compressed = self._sanitize_tool_pairs(compressed)
 
+        # Replace image parts in all compressed messages before the newest
+        # image-bearing user turn with a short text placeholder. Without
+        # this, tail messages keep their original multi-MB base-64 image
+        # payloads forever, which can push every subsequent API request
+        # past the provider's body-size limit and wedge the session.
+        # Port of Kilo-Org/kilocode#9434.
+        compressed = _strip_historical_media(compressed)
+
         new_estimate = estimate_messages_tokens_rough(compressed)
         saved_estimate = display_tokens - new_estimate
 
diff --git a/agent/context_engine.py b/agent/context_engine.py
index c30a7a84752..bb426fc189d 100644
--- a/agent/context_engine.py
+++ b/agent/context_engine.py
@@ -71,7 +71,12 @@ class ContextEngine(ABC):
     def update_from_response(self, usage: Dict[str, Any]) -> None:
         """Update tracked token usage from an API response.
 
-        Called after every LLM call with the usage dict from the response.
+        Called after every LLM call with a normalized usage dict. The legacy
+        keys ``prompt_tokens``, ``completion_tokens``, and ``total_tokens``
+        are always present. Newer hosts also include canonical buckets:
+        ``input_tokens``, ``output_tokens``, ``cache_read_tokens``,
+        ``cache_write_tokens``, and ``reasoning_tokens``. Engines should
+        treat those fields as optional for compatibility with older hosts.
         """
 
     @abstractmethod
diff --git a/agent/conversation_compression.py b/agent/conversation_compression.py
index a620f343e99..9a93ba4a496 100644
--- a/agent/conversation_compression.py
+++ b/agent/conversation_compression.py
@@ -34,13 +34,33 @@ import tempfile
 import uuid
 from datetime import datetime
 from pathlib import Path
-from typing import Any, List, Optional, Tuple
+from typing import Any, Optional, Tuple
 
 from agent.model_metadata import estimate_request_tokens_rough
 
 logger = logging.getLogger(__name__)
 
 
+def _compression_lock_holder(agent: Any) -> str:
+    """Build a unique holder id for the lock: pid:tid:agent-instance:uuid.
+
+    The pid+tid prefix lets ops tell crashed/abandoned holders apart from
+    live ones (expiry-based recovery uses the timestamp, but ``holder``
+    is what shows up in diagnostics + log lines). The agent instance id
+    and a per-acquire uuid disambiguate two co-resident agents on the
+    same thread (background_review forks run on a worker thread, but
+    on machines where compression itself dispatches to a thread pool
+    we want each acquire to be unique).
+    """
+    import threading
+    return (
+        f"pid={os.getpid()}"
+        f":tid={threading.get_ident()}"
+        f":agent={id(agent):x}"
+        f":nonce={uuid.uuid4().hex[:8]}"
+    )
+
+
 def check_compression_model_feasibility(agent: Any) -> None:
     """Warn at session start if the auxiliary compression model's context
     window is smaller than the main model's compression threshold.
@@ -305,6 +325,103 @@ def compress_context(
         "🗜️ Compacting context — summarizing earlier conversation so I can continue..."
     )
 
+    # ── Compression lock ────────────────────────────────────────────────
+    # Atomic, state.db-backed lock per session_id.  Without this, two
+    # AIAgent instances that share the same session_id (most commonly the
+    # parent-turn agent and its background-review fork — see
+    # ``agent/background_review.py``: ``review_agent.session_id =
+    # agent.session_id``) can each call compress() on overlapping
+    # snapshots of the same conversation.  Both succeed, both rotate
+    # ``agent.session_id`` to a fresh id, both create child sessions in
+    # state.db parented to the same old id.  The gateway's SessionEntry
+    # only catches one rotation, so the other child becomes an orphan
+    # that silently accumulates writes — Damien's repro shape.
+    #
+    # Acquire keyed on the OLD session_id (the rotation target's parent),
+    # because that's the id that competing paths see and read from
+    # SessionEntry at the start of their own compression attempt.
+    #
+    # If we can't acquire the lock, another path is mid-compression on
+    # this session.  Aborting is correct: the messages are unchanged, the
+    # other path's rotation will produce the canonical new session_id,
+    # and our caller's auto-compress loop sees ``len(returned) == len(input)``
+    # and stops retrying for this cycle. The session is NOT corrupted —
+    # we just sit out this round and let the winner finish.
+    _lock_db = getattr(agent, "_session_db", None)
+    _lock_sid = agent.session_id or ""
+    _lock_holder: Optional[str] = None
+    # Probe whether the lock subsystem is actually available on this
+    # SessionDB instance.  A process running mismatched module versions
+    # (e.g. ``conversation_compression.py`` reloaded after a pull but the
+    # long-lived ``hermes_state.SessionDB`` class still bound to the
+    # pre-#34351 version in memory) has the call site but not the method.
+    # In that case ``try_acquire_compression_lock`` raises AttributeError —
+    # NOT a ``sqlite3.Error`` — so the method's own fail-open guard never
+    # runs and the exception propagates to the outer agent loop, which
+    # prints the error and retries.  Because compression never succeeds,
+    # the token count never drops and the loop re-triggers compaction
+    # forever (the "API call #47/#48/#49 ... has no attribute
+    # try_acquire_compression_lock" spin).  Fail OPEN here: if the lock
+    # subsystem is missing or broken in any unexpected way, skip locking
+    # and proceed with compression.  Skipping the lock risks a rare
+    # concurrent-compression session fork; an infinite no-progress loop
+    # that never compresses at all is strictly worse.
+    if _lock_db is not None and _lock_sid:
+        _lock_holder = _compression_lock_holder(agent)
+        try:
+            _lock_acquired = _lock_db.try_acquire_compression_lock(
+                _lock_sid, _lock_holder
+            )
+        except Exception as _lock_err:
+            # Broken/absent lock subsystem (version skew, etc.).  Log once
+            # per session and proceed WITHOUT the lock rather than letting
+            # the exception spin the outer loop.
+            _lock_holder = None  # we don't own anything to release
+            if getattr(agent, "_last_compression_lock_error_sid", None) != _lock_sid:
+                agent._last_compression_lock_error_sid = _lock_sid
+                logger.warning(
+                    "compression lock subsystem unavailable for session=%s "
+                    "(%s: %s) — proceeding without lock. This usually means a "
+                    "stale in-memory module after an update; restart the "
+                    "process (or `hermes update`) to resync.",
+                    _lock_sid, type(_lock_err).__name__, _lock_err,
+                )
+            _lock_acquired = True  # treat as acquired-but-unlocked; proceed
+        if not _lock_acquired:
+            try:
+                existing = _lock_db.get_compression_lock_holder(_lock_sid)
+            except Exception:
+                existing = None
+            logger.warning(
+                "compression skipped: another path is compressing session=%s "
+                "(holder=%s) — returning messages unchanged to avoid session fork",
+                _lock_sid, existing,
+            )
+            _lock_holder = None  # don't release a lock we don't own
+            # Surface to the user once — quiet for downstream auto-compress loops
+            if getattr(agent, "_last_compression_lock_warning_sid", None) != _lock_sid:
+                agent._last_compression_lock_warning_sid = _lock_sid
+                try:
+                    agent._emit_warning(
+                        "⚠ Skipping concurrent compression — another path "
+                        "is already compressing this session. Will retry "
+                        "after it finishes."
+                    )
+                except Exception:
+                    pass
+            _existing_sp = getattr(agent, "_cached_system_prompt", None)
+            if not _existing_sp:
+                _existing_sp = agent._build_system_prompt(system_message)
+            return messages, _existing_sp
+
+    def _release_lock() -> None:
+        """Release the lock keyed on the OLD session_id (before rotation)."""
+        if _lock_db is not None and _lock_sid and _lock_holder:
+            try:
+                _lock_db.release_compression_lock(_lock_sid, _lock_holder)
+            except Exception as _rel_err:
+                logger.debug("compression lock release failed: %s", _rel_err)
+
     # Notify external memory provider before compression discards context
     if agent._memory_manager:
         try:
@@ -318,6 +435,11 @@ def compress_context(
         # Plugin context engine with strict signature that doesn't accept
         # focus_topic / force — fall back to calling without them.
         compressed = agent.context_compressor.compress(messages, current_tokens=approx_tokens)
+    except BaseException:
+        # ANY exception during compress() must release the lock so the
+        # session isn't permanently blocked from future compression.
+        _release_lock()
+        raise
 
     # If compression aborted (aux LLM failed to produce a usable summary)
     # the compressor returns the input messages unchanged.  Surface the
@@ -336,6 +458,7 @@ def compress_context(
         _existing_sp = getattr(agent, "_cached_system_prompt", None)
         if not _existing_sp:
             _existing_sp = agent._build_system_prompt(system_message)
+        _release_lock()  # compression aborted — no rotation will happen
         return messages, _existing_sp
 
     summary_error = getattr(agent.context_compressor, "_last_summary_error", None)
@@ -421,6 +544,7 @@ def compress_context(
                 agent.session_id or "",
                 boundary_reason="compression",
                 old_session_id=_old_sid,
+                conversation_id=getattr(agent, "_gateway_session_key", None),
             )
     except Exception as _ce_err:
         logger.debug("context engine on_session_start (compression): %s", _ce_err)
@@ -479,6 +603,12 @@ def compress_context(
         agent.session_id or "none", _pre_msg_count, len(compressed),
         f"{_compressed_est:,}",
     )
+    # Release the lock on the OLD session_id only AFTER rotation completed
+    # and all post-rotation bookkeeping (memory manager, context engine,
+    # file dedup) ran. A concurrent path that wakes up the moment we
+    # release will see the NEW session_id in state.db / SessionEntry and
+    # acquire on that — no race against our just-finished work.
+    _release_lock()
     return compressed, new_system_prompt
 
 
diff --git a/agent/conversation_loop.py b/agent/conversation_loop.py
index 271056138b1..a6c975be391 100644
--- a/agent/conversation_loop.py
+++ b/agent/conversation_loop.py
@@ -27,8 +27,6 @@ import time
 import uuid
 from typing import Any, Dict, List, Optional
 
-from agent.anthropic_adapter import _is_oauth_token
-from agent.auxiliary_client import set_runtime_main
 from agent.codex_responses_adapter import _summarize_user_message_for_log
 from agent.display import KawaiiSpinner
 from agent.error_classifier import FailoverReason, classify_api_error
@@ -49,25 +47,17 @@ from agent.model_metadata import (
     MINIMUM_CONTEXT_LENGTH,
     estimate_messages_tokens_rough,
     estimate_request_tokens_rough,
-    get_next_probe_tier,
+    get_context_length_from_provider_error,
     parse_available_output_tokens_from_error,
-    parse_context_limit_from_error,
     save_context_length,
 )
-from agent.nous_rate_guard import (
-    clear_nous_rate_limit,
-    is_genuine_nous_rate_limit,
-    nous_rate_limit_remaining,
-    record_nous_rate_limit,
-)
 from agent.process_bootstrap import _install_safe_stdio
 from agent.prompt_caching import apply_anthropic_cache_control
 from agent.retry_utils import jittered_backoff
 from agent.trajectory import has_incomplete_scratchpad
 from agent.usage_pricing import estimate_usage_cost, normalize_usage
-from hermes_constants import display_hermes_home as _dhh_fn, PARTIAL_STREAM_STUB_ID
+from hermes_constants import PARTIAL_STREAM_STUB_ID
 from hermes_logging import set_session_context
-from tools.schema_sanitizer import strip_pattern_and_format
 from tools.skill_provenance import set_current_write_origin
 from utils import base_url_host_matches, env_var_enabled
 
@@ -127,6 +117,104 @@ def _ra():
     return run_agent
 
 
+def _nous_entitlement_message(capability: str) -> str:
+    try:
+        from hermes_cli.nous_account import (
+            format_nous_portal_entitlement_message,
+            get_nous_portal_account_info,
+        )
+
+        account_info = get_nous_portal_account_info(force_fresh=True)
+        message = format_nous_portal_entitlement_message(
+            account_info,
+            capability=capability,
+        )
+        return message or ""
+    except Exception:
+        return ""
+
+
+def _print_nous_entitlement_guidance(agent, capability: str) -> bool:
+    message = _nous_entitlement_message(capability)
+    if not message:
+        return False
+    for line in message.splitlines():
+        agent._vprint(f"{agent.log_prefix}   💡 {line}", force=True)
+    return True
+
+
+def _is_nous_inference_route(provider: str, base_url: str) -> bool:
+    provider = (provider or "").strip().lower()
+    if provider == "nous":
+        return True
+    base = str(base_url or "")
+    return (
+        base_url_host_matches(base, "inference-api.nousresearch.com")
+        or base_url_host_matches(base, "inference.nousresearch.com")
+    )
+
+
+def _billing_or_entitlement_message(
+    *,
+    capability: str,
+    provider: str,
+    base_url: str,
+    model: str,
+) -> str:
+    if _is_nous_inference_route(provider, base_url):
+        return _nous_entitlement_message(capability)
+
+    provider_label = (provider or "").strip() or "the selected provider"
+    model_label = (model or "").strip() or "the selected model"
+    lines = [
+        (
+            f"{provider_label} reported that billing, credits, or account "
+            f"entitlement is exhausted for {model_label}."
+        ),
+        "Add credits or update billing with that provider, then retry.",
+    ]
+    if base_url_host_matches(str(base_url or ""), "openrouter.ai"):
+        lines.append("OpenRouter credits: https://openrouter.ai/settings/credits")
+    lines.append("You can switch providers temporarily with /model <model> --provider <provider>.")
+    return "\n".join(lines)
+
+
+def _print_billing_or_entitlement_guidance(
+    agent,
+    *,
+    capability: str,
+    provider: str,
+    base_url: str,
+    model: str,
+) -> bool:
+    message = _billing_or_entitlement_message(
+        capability=capability,
+        provider=provider,
+        base_url=base_url,
+        model=model,
+    )
+    if not message:
+        return False
+    for line in message.splitlines():
+        agent._vprint(f"{agent.log_prefix}   💡 {line}", force=True)
+    return True
+
+
+def _try_refresh_nous_paid_entitlement_credentials(agent) -> bool:
+    """Refresh Nous runtime credentials after a fresh paid-entitlement check."""
+    try:
+        from hermes_cli.nous_account import get_nous_portal_account_info
+
+        account_info = get_nous_portal_account_info(force_fresh=True)
+        if account_info.paid_service_access is not True:
+            return False
+        return agent._try_refresh_nous_client_credentials(
+            force=True,
+        )
+    except Exception:
+        return False
+
+
 def _restore_or_build_system_prompt(agent, system_message, conversation_history):
     """Restore the cached system prompt from the session DB or build it fresh.
 
@@ -310,7 +398,6 @@ def run_conversation(
 
     # Tag all log records on this thread with the session ID so
     # ``hermes logs --session <id>`` can filter a single conversation.
-    from hermes_logging import set_session_context
     set_session_context(agent.session_id)
 
     # Bind the skill write-origin ContextVar for this thread so tool
@@ -319,7 +406,6 @@ def run_conversation(
     # a foreground user-directed turn. Set at the top of each call;
     # the review fork runs on its own thread with a fresh context,
     # so the foreground value here does not leak into it.
-    from tools.skill_provenance import set_current_write_origin
     set_current_write_origin(getattr(agent, "_memory_write_origin", "assistant_tool"))
 
     # If the previous turn activated fallback, restore the primary
@@ -1017,6 +1103,7 @@ def run_conversation(
         codex_auth_retry_attempted=False
         anthropic_auth_retry_attempted=False
         nous_auth_retry_attempted=False
+        nous_paid_entitlement_refresh_attempted=False
         copilot_auth_retry_attempted=False
         thinking_sig_retry_attempted = False
         invalid_encrypted_content_retry_attempted = False
@@ -1050,17 +1137,18 @@ def run_conversation(
                             f"Nous Portal rate limit active — "
                             f"resets in {_fmt_nous_remaining(_nous_remaining)}."
                         )
-                        agent._vprint(
-                            f"{agent.log_prefix}⏳ {_nous_msg} Trying fallback...",
-                            force=True,
+                        agent._buffer_vprint(
+                            f"⏳ {_nous_msg} Trying fallback..."
                         )
-                        agent._emit_status(f"⏳ {_nous_msg}")
+                        agent._buffer_status(f"⏳ {_nous_msg}")
                         if agent._try_activate_fallback():
                             retry_count = 0
                             compression_attempts = 0
                             primary_recovery_attempted = False
                             continue
-                        # No fallback available — return with clear message
+                        # No fallback available — surface buffered context
+                        # so user sees the rate-limit message that led here.
+                        agent._flush_status_buffer()
                         agent._persist_session(messages, conversation_history)
                         return {
                             "final_response": (
@@ -1082,6 +1170,14 @@ def run_conversation(
 
             try:
                 agent._reset_stream_delivery_tracking()
+                # api_messages is built once, before this retry loop, while the
+                # primary provider is active.  A mid-conversation fallback can
+                # switch to a require-side provider (DeepSeek / Kimi / MiMo) that
+                # rejects assistant turns lacking reasoning_content.  Re-apply the
+                # echo-back pad for the *current* provider here (idempotent no-op
+                # unless the active provider needs it) so the fallback request
+                # isn't sent with stale, primary-shaped reasoning fields.
+                agent._reapply_reasoning_echo_for_provider(api_messages)
                 api_kwargs = agent._build_api_kwargs(api_messages)
                 if agent._force_ascii_payload:
                     _sanitize_structure_non_ascii(api_kwargs)
@@ -1275,9 +1371,10 @@ def run_conversation(
                             error_details.append("response.choices is empty")
 
                 if response_invalid:
-                    # Stop spinner before printing error messages
+                    # Stop spinner silently — retry status is now buffered
+                    # and only surfaced if every retry+fallback exhausts.
                     if thinking_spinner:
-                        thinking_spinner.stop("(´;ω;`) oops, retrying...")
+                        thinking_spinner.stop("")
                         thinking_spinner = None
                     if agent.thinking_callback:
                         agent.thinking_callback("")
@@ -1290,7 +1387,7 @@ def run_conversation(
                     # rate-limit symptom.  Switch to fallback immediately
                     # rather than retrying with extended backoff.
                     if agent._fallback_index < len(agent._fallback_chain):
-                        agent._emit_status("⚠️ Empty/malformed response — switching to fallback...")
+                        agent._buffer_status("⚠️ Empty/malformed response — switching to fallback...")
                     if agent._try_activate_fallback():
                         retry_count = 0
                         compression_attempts = 0
@@ -1352,20 +1449,22 @@ def run_conversation(
                     else:
                         _failure_hint = f"response time {api_duration:.1f}s"
 
-                    agent._vprint(f"{agent.log_prefix}⚠️  Invalid API response (attempt {retry_count}/{max_retries}): {', '.join(error_details)}", force=True)
-                    agent._vprint(f"{agent.log_prefix}   🏢 Provider: {provider_name}", force=True)
+                    agent._buffer_vprint(f"⚠️  Invalid API response (attempt {retry_count}/{max_retries}): {', '.join(error_details)}")
+                    agent._buffer_vprint(f"   🏢 Provider: {provider_name}")
                     cleaned_provider_error = agent._clean_error_message(error_msg)
-                    agent._vprint(f"{agent.log_prefix}   📝 Provider message: {cleaned_provider_error}", force=True)
-                    agent._vprint(f"{agent.log_prefix}   ⏱️  {_failure_hint}", force=True)
+                    agent._buffer_vprint(f"   📝 Provider message: {cleaned_provider_error}")
+                    agent._buffer_vprint(f"   ⏱️  {_failure_hint}")
                     
                     if retry_count >= max_retries:
                         # Try fallback before giving up
-                        agent._emit_status(f"⚠️ Max retries ({max_retries}) for invalid responses — trying fallback...")
+                        agent._buffer_status(f"⚠️ Max retries ({max_retries}) for invalid responses — trying fallback...")
                         if agent._try_activate_fallback():
                             retry_count = 0
                             compression_attempts = 0
                             primary_recovery_attempted = False
                             continue
+                        # Terminal — flush buffered retry trace so user sees what happened.
+                        agent._flush_status_buffer()
                         agent._emit_status(f"❌ Max retries ({max_retries}) exceeded for invalid responses. Giving up.")
                         logger.error(f"{agent.log_prefix}Invalid API response after {max_retries} retries.")
                         agent._persist_session(messages, conversation_history)
@@ -1379,7 +1478,7 @@ def run_conversation(
                     
                     # Backoff before retry — jittered exponential: 5s base, 120s cap
                     wait_time = jittered_backoff(retry_count, base_delay=5.0, max_delay=120.0)
-                    agent._vprint(f"{agent.log_prefix}⏳ Retrying in {wait_time:.1f}s ({_failure_hint})...", force=True)
+                    agent._buffer_vprint(f"⏳ Retrying in {wait_time:.1f}s ({_failure_hint})...")
                     logger.warning(f"Invalid API response (retry {retry_count}/{max_retries}): {', '.join(error_details)} | Provider: {provider_name}")
                     
                     # Sleep in small increments to stay responsive to interrupts
@@ -1606,14 +1705,14 @@ def run_conversation(
                         if assistant_message is not None and _trunc_has_tool_calls:
                             if truncated_tool_call_retries < 1:
                                 truncated_tool_call_retries += 1
-                                agent._vprint(
-                                    f"{agent.log_prefix}⚠️  Truncated tool call detected — retrying API call...",
-                                    force=True,
+                                agent._buffer_vprint(
+                                    f"⚠️  Truncated tool call detected — retrying API call..."
                                 )
                                 # Don't append the broken response to messages;
                                 # just re-run the same API call from the current
                                 # message state, giving the model another chance.
                                 continue
+                            agent._flush_status_buffer()
                             agent._vprint(
                                 f"{agent.log_prefix}⚠️  Truncated tool call response detected again — refusing to execute incomplete tool arguments.",
                                 force=True,
@@ -1647,6 +1746,7 @@ def run_conversation(
                         }
                     else:
                         # First message was truncated - mark as failed
+                        agent._flush_status_buffer()
                         agent._vprint(f"{agent.log_prefix}❌ First response truncated - cannot recover", force=True)
                         agent._persist_session(messages, conversation_history)
                         return {
@@ -1668,10 +1768,19 @@ def run_conversation(
                     prompt_tokens = canonical_usage.prompt_tokens
                     completion_tokens = canonical_usage.output_tokens
                     total_tokens = canonical_usage.total_tokens
+                    # Forward canonical token + cache buckets so context engines
+                    # can make decisions on cache hit ratios / reasoning costs,
+                    # not just legacy aggregate tokens. Legacy keys stay for
+                    # back-compat with engines that only read prompt/completion/total.
                     usage_dict = {
                         "prompt_tokens": prompt_tokens,
                         "completion_tokens": completion_tokens,
                         "total_tokens": total_tokens,
+                        "input_tokens": canonical_usage.input_tokens,
+                        "output_tokens": canonical_usage.output_tokens,
+                        "cache_read_tokens": canonical_usage.cache_read_tokens,
+                        "cache_write_tokens": canonical_usage.cache_write_tokens,
+                        "reasoning_tokens": canonical_usage.reasoning_tokens,
                     }
                     agent.context_compressor.update_from_response(usage_dict)
 
@@ -1789,6 +1898,11 @@ def run_conversation(
                         )
                 
                 has_retried_429 = False  # Reset on success
+                # Note: don't clear the retry buffer here — an "API call
+                # success" only means we got bytes back, not that we got
+                # usable content. Empty responses still loop through the
+                # empty-retry path below; the buffer is cleared when
+                # genuinely successful content is detected later (~L4127).
                 # Clear Nous rate limit state on successful request —
                 # proves the limit has reset and other sessions can
                 # resume hitting Nous.
@@ -1815,9 +1929,10 @@ def run_conversation(
                 break
 
             except Exception as api_error:
-                # Stop spinner before printing error messages
+                # Stop spinner silently — retry status is buffered and
+                # only flushed when every retry+fallback is exhausted.
                 if thinking_spinner:
-                    thinking_spinner.stop("(╥_╥) error, retrying...")
+                    thinking_spinner.stop("")
                     thinking_spinner = None
                 if agent.thinking_callback:
                     agent.thinking_callback("")
@@ -1872,14 +1987,12 @@ def run_conversation(
                     if _surrogates_found or _is_surrogate_error:
                         agent._unicode_sanitization_passes += 1
                         if _surrogates_found:
-                            agent._vprint(
-                                f"{agent.log_prefix}⚠️  Stripped invalid surrogate characters from messages. Retrying...",
-                                force=True,
+                            agent._buffer_vprint(
+                                f"⚠️  Stripped invalid surrogate characters from messages. Retrying..."
                             )
                         else:
-                            agent._vprint(
-                                f"{agent.log_prefix}⚠️  Surrogate encoding error — retrying after full-payload sanitization...",
-                                force=True,
+                            agent._buffer_vprint(
+                                f"⚠️  Surrogate encoding error — retrying after full-payload sanitization..."
                             )
                         continue
                     if _is_ascii_codec:
@@ -2093,6 +2206,23 @@ def run_conversation(
                     classified.should_rotate_credential, classified.should_fallback,
                 )
 
+                if (
+                    classified.reason == FailoverReason.billing
+                    and _is_nous_inference_route(
+                        getattr(agent, "provider", "") or "",
+                        getattr(agent, "base_url", "") or "",
+                    )
+                    and not nous_paid_entitlement_refresh_attempted
+                ):
+                    nous_paid_entitlement_refresh_attempted = True
+                    if _try_refresh_nous_paid_entitlement_credentials(agent):
+                        agent._vprint(
+                            f"{agent.log_prefix}🔐 Nous paid access verified — "
+                            "refreshed runtime credentials and retrying request...",
+                            force=True,
+                        )
+                        continue
+
                 recovered_with_pool, has_retried_429 = agent._recover_with_credential_pool(
                     status_code=status_code,
                     has_retried_429=has_retried_429,
@@ -2190,7 +2320,7 @@ def run_conversation(
                     codex_auth_retry_attempted = True
                     if agent._try_refresh_codex_client_credentials(force=True):
                         _label = "xAI OAuth" if agent.provider == "xai-oauth" else "Codex"
-                        agent._vprint(f"{agent.log_prefix}🔐 {_label} auth refreshed after 401. Retrying request...")
+                        agent._buffer_vprint(f"🔐 {_label} auth refreshed after 401. Retrying request...")
                         continue
                 if (
                     agent.api_mode == "chat_completions"
@@ -2217,7 +2347,8 @@ def run_conversation(
                     print(f"{agent.log_prefix}🔐 Nous 401 — Portal authentication failed.")
                     if _body_text:
                         print(f"{agent.log_prefix}   Response: {_body_text}")
-                    print(f"{agent.log_prefix}   Most likely: Portal OAuth expired, account out of credits, or agent key revoked.")
+                    if not _print_nous_entitlement_guidance(agent, "Nous model access"):
+                        print(f"{agent.log_prefix}   Most likely: Portal OAuth expired, account out of credits, or agent key revoked.")
                     print(f"{agent.log_prefix}   Troubleshooting:")
                     print(f"{agent.log_prefix}     • Re-authenticate: hermes auth add nous")
                     print(f"{agent.log_prefix}     • Check credits / billing: https://portal.nousresearch.com")
@@ -2230,7 +2361,7 @@ def run_conversation(
                 ):
                     copilot_auth_retry_attempted = True
                     if agent._try_refresh_copilot_client_credentials():
-                        agent._vprint(f"{agent.log_prefix}🔐 Copilot credentials refreshed after 401. Retrying request...")
+                        agent._buffer_vprint(f"🔐 Copilot credentials refreshed after 401. Retrying request...")
                         continue
                 if (
                     agent.api_mode == "anthropic_messages"
@@ -2405,41 +2536,37 @@ def run_conversation(
                 _base = getattr(agent, "base_url", "unknown")
                 _model = getattr(agent, "model", "unknown")
                 _status_code_str = f" [HTTP {status_code}]" if status_code else ""
-                agent._vprint(f"{agent.log_prefix}⚠️  API call failed (attempt {retry_count}/{max_retries}): {error_type}{_status_code_str}", force=True)
-                agent._vprint(f"{agent.log_prefix}   🔌 Provider: {_provider}  Model: {_model}", force=True)
-                agent._vprint(f"{agent.log_prefix}   🌐 Endpoint: {_base}", force=True)
-                agent._vprint(f"{agent.log_prefix}   📝 Error: {_error_summary}", force=True)
+                agent._buffer_vprint(f"⚠️  API call failed (attempt {retry_count}/{max_retries}): {error_type}{_status_code_str}")
+                agent._buffer_vprint(f"   🔌 Provider: {_provider}  Model: {_model}")
+                agent._buffer_vprint(f"   🌐 Endpoint: {_base}")
+                agent._buffer_vprint(f"   📝 Error: {_error_summary}")
                 if status_code and status_code < 500:
                     _err_body = getattr(api_error, "body", None)
                     _err_body_str = str(_err_body)[:300] if _err_body else None
                     if _err_body_str:
-                        agent._vprint(f"{agent.log_prefix}   📋 Details: {_err_body_str}", force=True)
-                agent._vprint(f"{agent.log_prefix}   ⏱️  Elapsed: {elapsed_time:.2f}s  Context: {len(api_messages)} msgs, ~{approx_tokens:,} tokens")
+                        agent._buffer_vprint(f"   📋 Details: {_err_body_str}")
+                agent._buffer_vprint(f"   ⏱️  Elapsed: {elapsed_time:.2f}s  Context: {len(api_messages)} msgs, ~{approx_tokens:,} tokens")
 
                 # Actionable hint for OpenRouter "no tool endpoints" error.
-                # This fires regardless of whether fallback succeeds — the
-                # user needs to know WHY their model failed so they can fix
-                # their provider routing, not just silently fall back.
+                # Buffered like the rest of the retry trace — surfaced only
+                # if every retry+fallback exhausts.  Avoids spamming users
+                # who recover automatically via fallback.
                 if (
                     agent._is_openrouter_url()
                     and "support tool use" in error_msg
                 ):
-                    agent._vprint(
-                        f"{agent.log_prefix}   💡 No OpenRouter providers for {_model} support tool calling with your current settings.",
-                        force=True,
+                    agent._buffer_vprint(
+                        f"   💡 No OpenRouter providers for {_model} support tool calling with your current settings."
                     )
                     if agent.providers_allowed:
-                        agent._vprint(
-                            f"{agent.log_prefix}      Your provider_routing.only restriction is filtering out tool-capable providers.",
-                            force=True,
+                        agent._buffer_vprint(
+                            f"      Your provider_routing.only restriction is filtering out tool-capable providers."
                         )
-                        agent._vprint(
-                            f"{agent.log_prefix}      Try removing the restriction or adding providers that support tools for this model.",
-                            force=True,
+                        agent._buffer_vprint(
+                            f"      Try removing the restriction or adding providers that support tools for this model."
                         )
-                    agent._vprint(
-                        f"{agent.log_prefix}      Check which providers support tools: https://openrouter.ai/models/{_model}",
-                        force=True,
+                    agent._buffer_vprint(
+                        f"      Check which providers support tools: https://openrouter.ai/models/{_model}"
                     )
 
                 # Check for interrupt before deciding to retry
@@ -2489,11 +2616,10 @@ def run_conversation(
                             # user later enables extra usage the 1M limit
                             # should come back automatically.
                             compressor._context_probe_persistable = False
-                        agent._vprint(
-                            f"{agent.log_prefix}⚠️  Anthropic long-context tier "
+                        agent._buffer_vprint(
+                            f"⚠️  Anthropic long-context tier "
                             f"requires extra usage — reducing context: "
-                            f"{old_ctx:,} → {_reduced_ctx:,} tokens",
-                            force=True,
+                            f"{old_ctx:,} → {_reduced_ctx:,} tokens"
                         )
 
                     compression_attempts += 1
@@ -2509,7 +2635,7 @@ def run_conversation(
                         # messages to the new session, not skipping them.
                         conversation_history = None
                         if len(messages) < original_len or old_ctx > _reduced_ctx:
-                            agent._emit_status(
+                            agent._buffer_status(
                                 f"🗜️ Context reduced to {_reduced_ctx:,} tokens "
                                 f"(was {old_ctx:,}), retrying..."
                             )
@@ -2538,7 +2664,12 @@ def run_conversation(
                         base_url=getattr(agent, "base_url", None),
                     )
                     if not pool_may_recover:
-                        agent._emit_status("⚠️ Rate limited — switching to fallback provider...")
+                        if classified.reason == FailoverReason.billing:
+                            agent._buffer_status(
+                                "⚠️ Billing or credits exhausted — switching to fallback provider..."
+                            )
+                        else:
+                            agent._buffer_status("⚠️ Rate limited — switching to fallback provider...")
                         if agent._try_activate_fallback(reason=classified.reason):
                             retry_count = 0
                             compression_attempts = 0
@@ -2650,6 +2781,8 @@ def run_conversation(
                 if is_payload_too_large:
                     compression_attempts += 1
                     if compression_attempts > max_compression_attempts:
+                        # Terminal — surface the buffered retry trace.
+                        agent._flush_status_buffer()
                         agent._vprint(f"{agent.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached for payload-too-large error.", force=True)
                         agent._vprint(f"{agent.log_prefix}   💡 Try /new to start a fresh conversation, or /compress to retry compression.", force=True)
                         logger.error(f"{agent.log_prefix}413 compression failed after {max_compression_attempts} attempts.")
@@ -2663,7 +2796,7 @@ def run_conversation(
                             "failed": True,
                             "compression_exhausted": True,
                         }
-                    agent._emit_status(f"⚠️  Request payload too large (413) — compression attempt {compression_attempts}/{max_compression_attempts}...")
+                    agent._buffer_status(f"⚠️  Request payload too large (413) — compression attempt {compression_attempts}/{max_compression_attempts}...")
 
                     original_len = len(messages)
                     messages, active_system_prompt = agent._compress_context(
@@ -2676,11 +2809,14 @@ def run_conversation(
                     conversation_history = None
 
                     if len(messages) < original_len:
-                        agent._emit_status(f"🗜️ Compressed {original_len} → {len(messages)} messages, retrying...")
+                        agent._buffer_status(f"🗜️ Compressed {original_len} → {len(messages)} messages, retrying...")
                         time.sleep(2)  # Brief pause between compression retries
                         restart_with_compressed_messages = True
                         break
                     else:
+                        # Terminal — surface buffered context so the user
+                        # sees what compression attempts were made.
+                        agent._flush_status_buffer()
                         agent._vprint(f"{agent.log_prefix}❌ Payload too large and cannot compress further.", force=True)
                         agent._vprint(f"{agent.log_prefix}   💡 Try /new to start a fresh conversation, or /compress to retry compression.", force=True)
                         logger.error(f"{agent.log_prefix}413 payload too large. Cannot compress further.")
@@ -2724,16 +2860,16 @@ def run_conversation(
                         # touching context_length or triggering compression.
                         safe_out = max(1, available_out - 64)  # small safety margin
                         agent._ephemeral_max_output_tokens = safe_out
-                        agent._vprint(
-                            f"{agent.log_prefix}⚠️  Output cap too large for current prompt — "
+                        agent._buffer_vprint(
+                            f"⚠️  Output cap too large for current prompt — "
                             f"retrying with max_tokens={safe_out:,} "
-                            f"(available_tokens={available_out:,}; context_length unchanged at {old_ctx:,})",
-                            force=True,
+                            f"(available_tokens={available_out:,}; context_length unchanged at {old_ctx:,})"
                         )
                         # Still count against compression_attempts so we don't
                         # loop forever if the error keeps recurring.
                         compression_attempts += 1
                         if compression_attempts > max_compression_attempts:
+                            agent._flush_status_buffer()
                             agent._vprint(f"{agent.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached.", force=True)
                             agent._vprint(f"{agent.log_prefix}   💡 Try /new to start a fresh conversation, or /compress to retry compression.", force=True)
                             logger.error(f"{agent.log_prefix}Context compression failed after {max_compression_attempts} attempts.")
@@ -2750,9 +2886,13 @@ def run_conversation(
                         restart_with_compressed_messages = True
                         break
 
-                    # Error is about the INPUT being too large — reduce context_length.
-                    # Try to parse the actual limit from the error message
-                    parsed_limit = parse_context_limit_from_error(error_msg)
+                    # Error is about the INPUT being too large.  Only reduce
+                    # context_length when the provider explicitly reports the
+                    # real lower limit.  If the provider only says "input
+                    # exceeds the context window", keep the configured window
+                    # and try compression; guessing probe tiers can incorrectly
+                    # turn a user-configured 1M window into 256K/128K/64K.
+                    new_ctx = get_context_length_from_provider_error(error_msg, old_ctx)
                     _provider_lower = (getattr(agent, "provider", "") or "").lower()
                     _base_lower = (getattr(agent, "base_url", "") or "").rstrip("/").lower()
                     is_minimax_provider = (
@@ -2764,24 +2904,12 @@ def run_conversation(
                     )
                     minimax_delta_only_overflow = (
                         is_minimax_provider
-                        and parsed_limit is None
+                        and new_ctx is None
                         and "context window exceeds limit (" in error_msg
                     )
-                    if parsed_limit and parsed_limit < old_ctx:
-                        new_ctx = parsed_limit
-                        agent._vprint(f"{agent.log_prefix}Context limit detected from API: {new_ctx:,} tokens (was {old_ctx:,})", force=True)
-                    elif minimax_delta_only_overflow:
-                        new_ctx = old_ctx
-                        agent._vprint(
-                            f"{agent.log_prefix}Provider reported overflow amount only; "
-                            f"keeping context_length at {old_ctx:,} tokens and compressing.",
-                            force=True,
-                        )
-                    else:
-                        # Step down to the next probe tier
-                        new_ctx = get_next_probe_tier(old_ctx)
 
-                    if new_ctx and new_ctx < old_ctx:
+                    if new_ctx is not None:
+                        agent._buffer_vprint(f"Context limit detected from API: {new_ctx:,} tokens (was {old_ctx:,})")
                         compressor.update_model(
                             model=agent.model,
                             context_length=new_ctx,
@@ -2791,23 +2919,26 @@ def run_conversation(
                             api_mode=agent.api_mode,
                         )
                         # Context probing flags — only set on built-in
-                        # compressor (plugin engines manage their own).
+                        # compressor (plugin engines manage their own).  This
+                        # value came from the provider, so it is safe to cache.
                         if hasattr(compressor, "_context_probed"):
                             compressor._context_probed = True
-                            # Only persist limits parsed from the provider's
-                            # error message (a real number).  Guessed fallback
-                            # tiers from get_next_probe_tier() should stay
-                            # in-memory only — persisting them pollutes the
-                            # cache with wrong values.
-                            compressor._context_probe_persistable = bool(
-                                parsed_limit and parsed_limit == new_ctx
-                            )
-                        agent._vprint(f"{agent.log_prefix}⚠️  Context length exceeded — stepping down: {old_ctx:,} → {new_ctx:,} tokens", force=True)
+                            compressor._context_probe_persistable = True
+                        agent._buffer_vprint(f"⚠️  Context length exceeded — using provider limit: {old_ctx:,} → {new_ctx:,} tokens")
+                    elif minimax_delta_only_overflow:
+                        agent._buffer_vprint(
+                            f"Provider reported overflow amount only; "
+                            f"keeping context_length at {old_ctx:,} tokens and compressing."
+                        )
                     else:
-                        agent._vprint(f"{agent.log_prefix}⚠️  Context length exceeded at minimum tier — attempting compression...", force=True)
+                        agent._buffer_vprint(
+                            f"⚠️  Context length exceeded, but provider did not report a max context length; "
+                            f"keeping context_length at {old_ctx:,} tokens and compressing."
+                        )
 
                     compression_attempts += 1
                     if compression_attempts > max_compression_attempts:
+                        agent._flush_status_buffer()
                         agent._vprint(f"{agent.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached.", force=True)
                         agent._vprint(f"{agent.log_prefix}   💡 Try /new to start a fresh conversation, or /compress to retry compression.", force=True)
                         logger.error(f"{agent.log_prefix}Context compression failed after {max_compression_attempts} attempts.")
@@ -2821,7 +2952,7 @@ def run_conversation(
                             "failed": True,
                             "compression_exhausted": True,
                         }
-                    agent._emit_status(f"🗜️ Context too large (~{approx_tokens:,} tokens) — compressing ({compression_attempts}/{max_compression_attempts})...")
+                    agent._buffer_status(f"🗜️ Context too large (~{approx_tokens:,} tokens) — compressing ({compression_attempts}/{max_compression_attempts})...")
 
                     original_len = len(messages)
                     messages, active_system_prompt = agent._compress_context(
@@ -2835,12 +2966,13 @@ def run_conversation(
 
                     if len(messages) < original_len or new_ctx and new_ctx < old_ctx:
                         if len(messages) < original_len:
-                            agent._emit_status(f"🗜️ Compressed {original_len} → {len(messages)} messages, retrying...")
+                            agent._buffer_status(f"🗜️ Compressed {original_len} → {len(messages)} messages, retrying...")
                         time.sleep(2)  # Brief pause between compression retries
                         restart_with_compressed_messages = True
                         break
                     else:
                         # Can't compress further and already at minimum tier
+                        agent._flush_status_buffer()
                         agent._vprint(f"{agent.log_prefix}❌ Context length exceeded and cannot compress further.", force=True)
                         agent._vprint(f"{agent.log_prefix}   💡 The conversation has accumulated too much content. Try /new to start fresh, or /compress to manually trigger compression.", force=True)
                         logger.error(f"{agent.log_prefix}Context length exceeded: {approx_tokens:,} tokens. Cannot compress further.")
@@ -2929,7 +3061,10 @@ def run_conversation(
                 if is_client_error:
                     # Try fallback before aborting — a different provider
                     # may not have the same issue (rate limit, auth, etc.)
-                    agent._emit_status(f"⚠️ Non-retryable error (HTTP {status_code}) — trying fallback...")
+                    if classified.reason == FailoverReason.content_policy_blocked:
+                        agent._buffer_status("⚠️ Provider safety filter blocked this request — trying fallback...")
+                    else:
+                        agent._buffer_status(f"⚠️ Non-retryable error (HTTP {status_code}) — trying fallback...")
                     if agent._try_activate_fallback():
                         retry_count = 0
                         compression_attempts = 0
@@ -2939,16 +3074,38 @@ def run_conversation(
                         agent._dump_api_request_debug(
                             api_kwargs, reason="non_retryable_client_error", error=api_error,
                         )
-                    agent._emit_status(
-                        f"❌ Non-retryable error (HTTP {status_code}): "
-                        f"{agent._summarize_api_error(api_error)}"
-                    )
+                    # Terminal — flush buffered context so the user sees
+                    # what was tried before the abort.
+                    agent._flush_status_buffer()
+                    if classified.reason == FailoverReason.content_policy_blocked:
+                        agent._emit_status(
+                            f"❌ Provider safety filter blocked this request: "
+                            f"{agent._summarize_api_error(api_error)}"
+                        )
+                    else:
+                        agent._emit_status(
+                            f"❌ Non-retryable error (HTTP {status_code}): "
+                            f"{agent._summarize_api_error(api_error)}"
+                        )
                     agent._vprint(f"{agent.log_prefix}❌ Non-retryable client error (HTTP {status_code}). Aborting.", force=True)
                     agent._vprint(f"{agent.log_prefix}   🔌 Provider: {_provider}  Model: {_model}", force=True)
                     agent._vprint(f"{agent.log_prefix}   🌐 Endpoint: {_base}", force=True)
                     # Actionable guidance for common auth errors
                     if classified.is_auth or classified.reason == FailoverReason.billing:
-                        if _provider in {"openai-codex", "xai-oauth", "nous"} and status_code == 401:
+                        if classified.reason == FailoverReason.billing and _print_billing_or_entitlement_guidance(
+                            agent,
+                            capability="model access",
+                            provider=_provider,
+                            base_url=str(_base),
+                            model=_model,
+                        ):
+                            pass
+                        elif _provider == "nous" and _print_nous_entitlement_guidance(
+                            agent,
+                            "Nous model access",
+                        ):
+                            pass
+                        elif _provider in {"openai-codex", "xai-oauth", "nous"} and status_code == 401:
                             if _provider == "openai-codex":
                                 agent._vprint(f"{agent.log_prefix}   💡 Codex OAuth token was rejected (HTTP 401). Your token may have been", force=True)
                                 agent._vprint(f"{agent.log_prefix}      refreshed by another client (Codex CLI, VS Code). To fix:", force=True)
@@ -2976,6 +3133,28 @@ def run_conversation(
                                 agent._vprint(f"{agent.log_prefix}      • Check credits: https://openrouter.ai/settings/credits", force=True)
                     else:
                         agent._vprint(f"{agent.log_prefix}   💡 This type of error won't be fixed by retrying.", force=True)
+                    # Content-policy blocks deserve their own actionable
+                    # guidance — neither "fix your API key" nor "retry won't
+                    # help" tells the user what to actually do. The provider
+                    # has refused this specific prompt, so the recovery is
+                    # either a rephrase or routing to a different model.
+                    if classified.reason == FailoverReason.content_policy_blocked:
+                        agent._vprint(
+                            f"{agent.log_prefix}   💡 The provider's safety filter rejected this specific prompt.",
+                            force=True,
+                        )
+                        agent._vprint(
+                            f"{agent.log_prefix}      • Try rephrasing the request, narrowing the context, or splitting into smaller steps.",
+                            force=True,
+                        )
+                        agent._vprint(
+                            f"{agent.log_prefix}      • Configure a fallback provider so future blocks route automatically:",
+                            force=True,
+                        )
+                        agent._vprint(
+                            f"{agent.log_prefix}        hermes fallback add   (interactive picker — same as `hermes model`)",
+                            force=True,
+                        )
                     logger.error(f"{agent.log_prefix}Non-retryable client error: {api_error}")
                     # Skip session persistence when the error is likely
                     # context-overflow related (status 400 + large session).
@@ -2990,6 +3169,23 @@ def run_conversation(
                         )
                     else:
                         agent._persist_session(messages, conversation_history)
+                    if classified.reason == FailoverReason.content_policy_blocked:
+                        _summary = agent._summarize_api_error(api_error)
+                        _policy_response = (
+                            f"⚠️  The model provider's safety filter blocked this request "
+                            f"(not a Hermes/gateway failure).\n\n"
+                            f"Provider message: {_summary}\n\n"
+                            f"Try rephrasing the request, narrowing the context, or "
+                            f"adding a fallback provider with `hermes fallback add`."
+                        )
+                        return {
+                            "final_response": _policy_response,
+                            "messages": messages,
+                            "api_calls": api_call_count,
+                            "completed": False,
+                            "failed": True,
+                            "error": f"content_policy_blocked: {_summary}",
+                        }
                     return {
                         "final_response": None,
                         "messages": messages,
@@ -3011,14 +3207,32 @@ def run_conversation(
                         retry_count = 0
                         continue
                     # Try fallback before giving up entirely
-                    agent._emit_status(f"⚠️ Max retries ({max_retries}) exhausted — trying fallback...")
+                    agent._buffer_status(f"⚠️ Max retries ({max_retries}) exhausted — trying fallback...")
                     if agent._try_activate_fallback():
                         retry_count = 0
                         compression_attempts = 0
                         primary_recovery_attempted = False
                         continue
+                    # Terminal — flush buffered retry/fallback trace.
+                    agent._flush_status_buffer()
                     _final_summary = agent._summarize_api_error(api_error)
-                    if is_rate_limited:
+                    _billing_guidance = ""
+                    if classified.reason == FailoverReason.billing:
+                        agent._emit_status(f"❌ Billing or credits exhausted — {_final_summary}")
+                        _billing_guidance = _billing_or_entitlement_message(
+                            capability="model access",
+                            provider=_provider,
+                            base_url=str(_base),
+                            model=_model,
+                        )
+                        _print_billing_or_entitlement_guidance(
+                            agent,
+                            capability="model access",
+                            provider=_provider,
+                            base_url=str(_base),
+                            model=_model,
+                        )
+                    elif is_rate_limited:
                         agent._emit_status(f"❌ Rate limited after {max_retries} retries — {_final_summary}")
                     else:
                         agent._emit_status(f"❌ API failed after {max_retries} retries — {_final_summary}")
@@ -3063,7 +3277,12 @@ def run_conversation(
                             api_kwargs, reason="max_retries_exhausted", error=api_error,
                         )
                     agent._persist_session(messages, conversation_history)
-                    _final_response = f"API call failed after {max_retries} retries: {_final_summary}"
+                    if classified.reason == FailoverReason.billing:
+                        _final_response = f"Billing or credits exhausted: {_final_summary}"
+                        if _billing_guidance:
+                            _final_response += f"\n\n{_billing_guidance}"
+                    else:
+                        _final_response = f"API call failed after {max_retries} retries: {_final_summary}"
                     if _is_stream_drop:
                         _final_response += (
                             "\n\nThe provider's stream connection keeps "
@@ -3095,9 +3314,9 @@ def run_conversation(
                                 pass
                 wait_time = _retry_after if _retry_after else jittered_backoff(retry_count, base_delay=2.0, max_delay=60.0)
                 if is_rate_limited:
-                    agent._emit_status(f"⏱️ Rate limited. Waiting {wait_time:.1f}s (attempt {retry_count + 1}/{max_retries})...")
+                    agent._buffer_status(f"⏱️ Rate limited. Waiting {wait_time:.1f}s (attempt {retry_count + 1}/{max_retries})...")
                 else:
-                    agent._emit_status(f"⏳ Retrying in {wait_time:.1f}s (attempt {retry_count}/{max_retries})...")
+                    agent._buffer_status(f"⏳ Retrying in {wait_time:.1f}s (attempt {retry_count}/{max_retries})...")
                 logger.warning(
                     "Retrying API call in %ss (attempt %s/%s) %s error=%s",
                     wait_time,
@@ -3256,14 +3475,15 @@ def run_conversation(
             if has_incomplete_scratchpad(assistant_message.content or ""):
                 agent._incomplete_scratchpad_retries += 1
                 
-                agent._vprint(f"{agent.log_prefix}⚠️  Incomplete <REASONING_SCRATCHPAD> detected (opened but never closed)")
+                agent._buffer_vprint(f"⚠️  Incomplete <REASONING_SCRATCHPAD> detected (opened but never closed)")
                 
                 if agent._incomplete_scratchpad_retries <= 2:
-                    agent._vprint(f"{agent.log_prefix}🔄 Retrying API call ({agent._incomplete_scratchpad_retries}/2)...")
+                    agent._buffer_vprint(f"🔄 Retrying API call ({agent._incomplete_scratchpad_retries}/2)...")
                     # Don't add the broken message, just retry
                     continue
                 else:
                     # Max retries - discard this turn and save as partial
+                    agent._flush_status_buffer()
                     agent._vprint(f"{agent.log_prefix}❌ Max retries (2) for incomplete scratchpad. Saving as partial.", force=True)
                     agent._incomplete_scratchpad_retries = 0
                     
@@ -3371,9 +3591,10 @@ def run_conversation(
                     available = ", ".join(sorted(agent.valid_tool_names))
                     invalid_name = invalid_tool_calls[0]
                     invalid_preview = invalid_name[:80] + "..." if len(invalid_name) > 80 else invalid_name
-                    agent._vprint(f"{agent.log_prefix}⚠️  Unknown tool '{invalid_preview}' — sending error to model for agent-correction ({agent._invalid_tool_retries}/3)")
+                    agent._buffer_vprint(f"⚠️  Unknown tool '{invalid_preview}' — sending error to model for agent-correction ({agent._invalid_tool_retries}/3)")
 
                     if agent._invalid_tool_retries >= 3:
+                        agent._flush_status_buffer()
                         agent._vprint(f"{agent.log_prefix}❌ Max retries (3) for invalid tool calls exceeded. Stopping as partial.", force=True)
                         agent._invalid_tool_retries = 0
                         agent._persist_session(messages, conversation_history)
@@ -3457,16 +3678,16 @@ def run_conversation(
                     agent._invalid_json_retries += 1
 
                     tool_name, error_msg = invalid_json_args[0]
-                    agent._vprint(f"{agent.log_prefix}⚠️  Invalid JSON in tool call arguments for '{tool_name}': {error_msg}")
+                    agent._buffer_vprint(f"⚠️  Invalid JSON in tool call arguments for '{tool_name}': {error_msg}")
 
                     if agent._invalid_json_retries < 3:
-                        agent._vprint(f"{agent.log_prefix}🔄 Retrying API call ({agent._invalid_json_retries}/3)...")
+                        agent._buffer_vprint(f"🔄 Retrying API call ({agent._invalid_json_retries}/3)...")
                         # Don't add anything to messages, just retry the API call
                         continue
                     else:
                         # Instead of returning partial, inject tool error results so the model can recover.
                         # Using tool results (not user messages) preserves role alternation.
-                        agent._vprint(f"{agent.log_prefix}⚠️  Injecting recovery tool results for invalid JSON...")
+                        agent._buffer_vprint(f"⚠️  Injecting recovery tool results for invalid JSON...")
                         agent._invalid_json_retries = 0  # Reset for next attempt
                         
                         # Append the assistant message with its (broken) tool_calls
@@ -3774,7 +3995,7 @@ def run_conversation(
                             "Empty response after tool calls — nudging model "
                             "to continue processing"
                         )
-                        agent._emit_status(
+                        agent._buffer_status(
                             "⚠️ Model returned empty after tool calls — "
                             "nudging to continue"
                         )
@@ -3820,7 +4041,7 @@ def run_conversation(
                             "prefilling to continue (%d/2)",
                             agent._thinking_prefill_retries,
                         )
-                        agent._emit_status(
+                        agent._buffer_status(
                             f"↻ Thinking-only response — prefilling to continue "
                             f"({agent._thinking_prefill_retries}/2)"
                         )
@@ -3855,7 +4076,7 @@ def run_conversation(
                             "retry %d/3 (model=%s)",
                             agent._empty_content_retries, agent.model,
                         )
-                        agent._emit_status(
+                        agent._buffer_status(
                             f"⚠️ Empty response from model — retrying "
                             f"({agent._empty_content_retries}/3)"
                         )
@@ -3874,13 +4095,13 @@ def run_conversation(
                             agent._empty_content_retries, agent.model,
                             agent.provider,
                         )
-                        agent._emit_status(
+                        agent._buffer_status(
                             "⚠️ Model returning empty responses — "
                             "switching to fallback provider..."
                         )
                         if agent._try_activate_fallback():
                             agent._empty_content_retries = 0
-                            agent._emit_status(
+                            agent._buffer_status(
                                 f"↻ Switched to fallback: {agent.model} "
                                 f"({agent.provider})"
                             )
@@ -3894,6 +4115,9 @@ def run_conversation(
                     # Exhausted retries and fallback chain (or no
                     # fallback configured).  Fall through to the
                     # "(empty)" terminal.
+                    # Surface the buffered retry/fallback trace so the
+                    # user can see what was attempted before "(empty)".
+                    agent._flush_status_buffer()
                     _turn_exit_reason = "empty_response_exhausted"
                     reasoning_text = agent._extract_reasoning(assistant_message)
                     agent._drop_trailing_empty_response_scaffolding(messages)
@@ -3938,6 +4162,9 @@ def run_conversation(
                 # Reset retry counter/signature on successful content
                 agent._empty_content_retries = 0
                 agent._thinking_prefill_retries = 0
+                # Successful content reached — drop any buffered retry
+                # status from earlier failed attempts in this turn.
+                agent._clear_status_buffer()
 
                 if (
                     agent.api_mode == "codex_responses"
@@ -4074,36 +4301,54 @@ def run_conversation(
             )
         final_response = agent._handle_max_iterations(messages, api_call_count)
 
-        # If running as a kanban worker, block the task so the dispatcher
-        # knows the worker could not complete (rather than treating it as a
+        # If running as a kanban worker, signal the dispatcher that the
+        # worker could not complete (rather than treating it as a
         # protocol violation).  The agent loop strips tools before calling
         # _handle_max_iterations, so the model cannot call kanban_block
         # itself — we must do it on its behalf.
+        #
+        # We route through ``_record_task_failure(outcome="timed_out")``
+        # rather than ``kanban_block`` so this counts toward the
+        # ``consecutive_failures`` counter and the dispatcher's
+        # ``failure_limit`` circuit breaker (#29747 gap 2).  Without this,
+        # a task whose worker keeps exhausting its budget would block
+        # silently each run, get auto-promoted by the operator (or never
+        # surface), and re-block in an endless loop with no signal.
         _kanban_task = os.environ.get("HERMES_KANBAN_TASK")
         if _kanban_task:
             try:
-                _ra().handle_function_call(
-                    "kanban_block",
-                    {
-                        "task_id": _kanban_task,
-                        "reason": (
+                from hermes_cli import kanban_db as _kb
+                _conn = _kb.connect()
+                try:
+                    _kb._record_task_failure(
+                        _conn,
+                        _kanban_task,
+                        error=(
                             f"Iteration budget exhausted "
                             f"({api_call_count}/{agent.max_iterations}) — "
                             "task could not complete within the allowed "
                             "iterations"
                         ),
-                    },
-                    task_id=effective_task_id,
-                )
-                logger.info(
-                    "kanban_block called for task %s after iteration "
-                    "exhaustion (%d/%d)",
-                    _kanban_task, api_call_count, agent.max_iterations,
-                )
+                        outcome="timed_out",
+                        release_claim=True,
+                        end_run=True,
+                        event_payload_extra={
+                            "budget_used": api_call_count,
+                            "budget_max": agent.max_iterations,
+                        },
+                    )
+                    logger.info(
+                        "recorded budget-exhausted failure for task %s (%d/%d)",
+                        _kanban_task, api_call_count, agent.max_iterations,
+                    )
+                finally:
+                    try:
+                        _conn.close()
+                    except Exception:
+                        pass
             except Exception:
                 logger.warning(
-                    "Failed to call kanban_block after iteration "
-                    "exhaustion for task %s",
+                    "Failed to record budget-exhausted failure for task %s",
                     _kanban_task,
                     exc_info=True,
                 )
@@ -4321,6 +4566,7 @@ def run_conversation(
         original_user_message=original_user_message,
         final_response=final_response,
         interrupted=interrupted,
+        messages=messages,
     )
 
     # Background memory/skill review — runs AFTER the response is delivered
diff --git a/agent/credential_pool.py b/agent/credential_pool.py
index e62ed59b9b6..5eab3bdb8d0 100644
--- a/agent/credential_pool.py
+++ b/agent/credential_pool.py
@@ -14,7 +14,7 @@ from datetime import datetime, timezone
 from typing import Any, Dict, List, Optional, Set, Tuple
 
 from hermes_constants import OPENROUTER_BASE_URL
-from hermes_cli.config import get_env_value, load_env
+from hermes_cli.config import load_env
 from agent.credential_persistence import (
     is_borrowed_credential_source,
     sanitize_borrowed_credential_payload,
@@ -22,7 +22,6 @@ from agent.credential_persistence import (
 import hermes_cli.auth as auth_mod
 from hermes_cli.auth import (
     CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
-    DEFAULT_AGENT_KEY_MIN_TTL_SECONDS,
     PROVIDER_REGISTRY,
     _auth_store_lock,
     _codex_access_token_is_expiring,
@@ -55,6 +54,38 @@ def _load_config_safe() -> Optional[dict]:
 
 STATUS_OK = "ok"
 STATUS_EXHAUSTED = "exhausted"
+# Terminal failure — the credential will never recover on its own.  Used for
+# upstream-permanent OAuth states like ``token_invalidated`` / ``token_revoked``
+# where retrying after a TTL cooldown is guaranteed to fail.  ``DEAD`` entries
+# are excluded from rotation unconditionally and only clear when an explicit
+# write-side sync (e.g. ``_save_codex_tokens`` after a fresh device-code
+# login) rewrites the tokens.
+STATUS_DEAD = "dead"
+
+# OAuth error reasons that indicate the credential is permanently invalid
+# server-side and cannot be recovered by retry/refresh.  Sourced from
+# OpenAI Codex Responses API, Anthropic, xAI, and Google OAuth spec.
+_TERMINAL_AUTH_REASONS = frozenset({
+    "token_invalidated",   # OpenAI Codex: "Your authentication token has been invalidated."
+    "token_revoked",        # OAuth 2.0 RFC 7009: token explicitly revoked
+    "invalid_token",        # RFC 6750: bearer token is malformed/expired/revoked
+    "invalid_grant",        # RFC 6749: refresh_token rejected during refresh
+    "unauthorized_client",  # RFC 6749: client no longer authorized
+    "refresh_token_reused", # Single-use refresh token consumed by another process
+})
+
+# How long a DEAD manual credential is preserved before being pruned.
+# Manual entries (``manual:*``) are independent credentials with no singleton
+# to re-seed from, so pruning them after a quiet window cleans up dead state
+# without losing recoverability — the user always has the option to re-add
+# via ``hermes auth add``.
+#
+# Singleton-seeded entries (``device_code``, ``loopback_pkce``, ``claude_code``)
+# are NOT pruned because ``_seed_from_singletons`` would just re-create them
+# on the next ``load_pool()`` with the same stale singleton tokens, defeating
+# the cleanup.  They remain in the pool marked DEAD until an explicit re-auth
+# write-side sync (``_save_codex_tokens`` etc.) clears the status.
+DEAD_MANUAL_PRUNE_TTL_SECONDS = 24 * 60 * 60  # 24 hours
 
 AUTH_TYPE_OAUTH = "oauth"
 AUTH_TYPE_API_KEY = "api_key"
@@ -171,8 +202,22 @@ class PooledCredential:
     def runtime_api_key(self) -> str:
         if self.provider == "nous":
             # Nous stores the runtime inference credential in agent_key for
-            # compatibility. It may be a NAS invoke JWT or legacy opaque key.
-            return str(self.agent_key or self.access_token or "")
+            # compatibility. It must be a NAS invoke JWT.
+            for token, expires_at in (
+                (self.agent_key, self.agent_key_expires_at),
+                (self.access_token, self.expires_at),
+            ):
+                if (
+                    isinstance(token, str)
+                    and token.strip()
+                    and auth_mod._nous_invoke_jwt_is_usable(
+                        token,
+                        scope=getattr(self, "scope", None),
+                        expires_at=expires_at,
+                    )
+                ):
+                    return token.strip()
+            return ""
         return str(self.access_token or "")
 
     @property
@@ -438,6 +483,29 @@ class CredentialPool:
             [entry.to_dict() for entry in self._entries],
         )
 
+    def _is_terminal_auth_failure(
+        self,
+        status_code: Optional[int],
+        normalized_error: Dict[str, Any],
+    ) -> bool:
+        """Detect upstream-permanent OAuth failures that won't recover on TTL.
+
+        Only fires for 401 responses whose error code/reason matches a known
+        terminal OAuth state (token_invalidated, token_revoked, invalid_grant,
+        etc.).  Distinguishes permanent failures from transient ones like
+        token_expired (refreshable) or generic 401 without a specific reason
+        (could be a server-side glitch worth retrying).
+
+        Returns False for non-401 status codes — 429 rate limits and 402
+        billing failures are transient by nature and should keep TTL semantics.
+        """
+        if status_code != 401:
+            return False
+        reason = normalized_error.get("reason")
+        if not isinstance(reason, str):
+            return False
+        return reason.strip().lower() in _TERMINAL_AUTH_REASONS
+
     def _mark_exhausted(
         self,
         entry: PooledCredential,
@@ -445,9 +513,20 @@ class CredentialPool:
         error_context: Optional[Dict[str, Any]] = None,
     ) -> PooledCredential:
         normalized_error = _normalize_error_context(error_context)
+        # Permanent OAuth failures (token_invalidated, token_revoked, etc.)
+        # transition to STATUS_DEAD instead of STATUS_EXHAUSTED.  Without this,
+        # a revoked credential gets a 1-hour TTL cooldown and then re-enters
+        # rotation, failing immediately every hour until the user manually
+        # removes it (issue #32849).  DEAD entries are excluded from rotation
+        # unconditionally and only clear via an explicit re-auth write-side
+        # sync (``_save_codex_tokens`` after a fresh device-code login).
+        if self._is_terminal_auth_failure(status_code, normalized_error):
+            terminal_status = STATUS_DEAD
+        else:
+            terminal_status = STATUS_EXHAUSTED
         updated = replace(
             entry,
-            last_status=STATUS_EXHAUSTED,
+            last_status=terminal_status,
             last_status_at=time.time(),
             last_error_code=status_code,
             last_error_reason=normalized_error.get("reason"),
@@ -852,12 +931,7 @@ class CredentialPool:
                 if synced is not entry:
                     entry = synced
                 auth_mod.resolve_nous_runtime_credentials(
-                    min_key_ttl_seconds=DEFAULT_AGENT_KEY_MIN_TTL_SECONDS,
-                    inference_auth_mode=(
-                        auth_mod.NOUS_INFERENCE_AUTH_MODE_LEGACY
-                        if force
-                        else auth_mod.NOUS_INFERENCE_AUTH_MODE_AUTO
-                    ),
+                    force_refresh=force,
                 )
                 updated = self._sync_nous_entry_from_auth_store(entry)
             else:
@@ -1139,7 +1213,7 @@ class CredentialPool:
                 auth_mod.XAI_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
             )
         if self.provider == "nous":
-            # Nous refresh/mint can require network access and should happen when
+            # Nous refresh can require network access and should happen when
             # runtime credentials are actually resolved, not merely when the pool
             # is enumerated for listing, migration, or selection.
             return False
@@ -1158,13 +1232,14 @@ class CredentialPool:
         """
         now = time.time()
         cleared_any = False
+        entries_to_prune: List[str] = []
         available: List[PooledCredential] = []
         for entry in self._entries:
             # For anthropic claude_code entries, sync from the credentials file
             # before any status/refresh checks. This picks up tokens refreshed
             # by other processes (Claude Code CLI, other Hermes profiles).
             if (self.provider == "anthropic" and entry.source == "claude_code"
-                    and entry.last_status == STATUS_EXHAUSTED):
+                    and entry.last_status in {STATUS_EXHAUSTED, STATUS_DEAD}):
                 synced = self._sync_anthropic_entry_from_credentials_file(entry)
                 if synced is not entry:
                     entry = synced
@@ -1175,7 +1250,7 @@ class CredentialPool:
             # exhausted status stale.
             if (self.provider == "nous"
                     and entry.source == "device_code"
-                    and entry.last_status == STATUS_EXHAUSTED):
+                    and entry.last_status in {STATUS_EXHAUSTED, STATUS_DEAD}):
                 synced = self._sync_nous_entry_from_auth_store(entry)
                 if synced is not entry:
                     entry = synced
@@ -1187,7 +1262,7 @@ class CredentialPool:
             # future for ChatGPT weekly windows).
             if (self.provider == "openai-codex"
                     and entry.source == "device_code"
-                    and entry.last_status == STATUS_EXHAUSTED):
+                    and entry.last_status in {STATUS_EXHAUSTED, STATUS_DEAD}):
                 synced = self._sync_codex_entry_from_auth_store(entry)
                 if synced is not entry:
                     entry = synced
@@ -1198,11 +1273,41 @@ class CredentialPool:
             # xAI Grok OAuth login) has since rotated in auth.json.
             if (self.provider == "xai-oauth"
                     and entry.source == "loopback_pkce"
-                    and entry.last_status == STATUS_EXHAUSTED):
+                    and entry.last_status in {STATUS_EXHAUSTED, STATUS_DEAD}):
                 synced = self._sync_xai_oauth_entry_from_auth_store(entry)
                 if synced is not entry:
                     entry = synced
                     cleared_any = True
+            if entry.last_status == STATUS_DEAD:
+                # Manual DEAD credentials get pruned after a 24h quiet window
+                # so the pool doesn't accumulate dead entries forever.  The
+                # user can always re-add via ``hermes auth add``.  Singleton-
+                # seeded DEAD entries are kept so the audit trail (label,
+                # last_error_reason, timestamps) stays visible — pruning them
+                # would just be undone by ``_seed_from_singletons`` on the
+                # next load anyway.
+                if _is_manual_source(entry.source):
+                    dead_at = entry.last_status_at or 0
+                    if dead_at and now - dead_at > DEAD_MANUAL_PRUNE_TTL_SECONDS:
+                        _label = entry.label or entry.id[:8]
+                        logger.warning(
+                            "credential pool: pruning DEAD manual entry %s "
+                            "(reason=%s, age=%.1fh) — re-add via `hermes auth add %s`",
+                            _label,
+                            entry.last_error_reason or "unknown",
+                            (now - dead_at) / 3600.0,
+                            self.provider,
+                        )
+                        # Mark for removal after the loop completes; we can't
+                        # mutate self._entries while iterating.
+                        entries_to_prune.append(entry.id)
+                        cleared_any = True
+                # Permanently failed credentials never re-enter rotation via
+                # TTL.  They only clear when a write-side re-auth sync rewrites
+                # the tokens (e.g. ``_save_codex_tokens`` after a fresh
+                # device-code login).  The auth.json-sync paths below handle
+                # the re-auth case for OAuth singletons.
+                continue
             if entry.last_status == STATUS_EXHAUSTED:
                 exhausted_until = _exhausted_until(entry)
                 if exhausted_until is not None and now < exhausted_until:
@@ -1226,6 +1331,9 @@ class CredentialPool:
                     continue
                 entry = refreshed
             available.append(entry)
+        if entries_to_prune:
+            pruned_ids = set(entries_to_prune)
+            self._entries = [e for e in self._entries if e.id not in pruned_ids]
         if cleared_any:
             self._persist()
         return available
@@ -1293,11 +1401,22 @@ class CredentialPool:
             if entry is None:
                 return None
             _label = entry.label or entry.id[:8]
-            logger.info(
-                "credential pool: marking %s exhausted (status=%s), rotating",
-                _label, status_code,
-            )
             self._mark_exhausted(entry, status_code, error_context)
+            # Re-read the updated entry to log the correct terminal state.
+            updated_entry = next(
+                (e for e in self._entries if e.id == entry.id), entry,
+            )
+            if updated_entry.last_status == STATUS_DEAD:
+                logger.warning(
+                    "credential pool: marking %s DEAD (status=%s, reason=%s) — "
+                    "permanently failed, will NOT re-enter rotation until re-auth",
+                    _label, status_code, updated_entry.last_error_reason or "unknown",
+                )
+            else:
+                logger.info(
+                    "credential pool: marking %s exhausted (status=%s), rotating",
+                    _label, status_code,
+                )
             self._current_id = None
             next_entry = self._select_unlocked()
             if next_entry:
@@ -1637,9 +1756,9 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
                     "inference_base_url": state.get("inference_base_url"),
                     "agent_key": state.get("agent_key"),
                     "agent_key_expires_at": state.get("agent_key_expires_at"),
-                    # Carry the mint/refresh timestamps into the pool so
+                    # Carry the refresh timestamps into the pool so
                     # freshness-sensitive consumers (self-heal hooks, pool
-                    # pruning by age) can distinguish just-minted credentials
+                    # pruning by age) can distinguish just-refreshed credentials
                     # from stale ones.  Without these, fresh device_code
                     # entries get obtained_at=None and look older than they
                     # are (#15099).
diff --git a/agent/curator_backup.py b/agent/curator_backup.py
index 5e39443bae0..1961b99de3c 100644
--- a/agent/curator_backup.py
+++ b/agent/curator_backup.py
@@ -39,12 +39,9 @@ from __future__ import annotations
 
 import json
 import logging
-import os
 import re
 import shutil
 import tarfile
-import tempfile
-import time
 from datetime import datetime, timezone
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple
diff --git a/agent/display.py b/agent/display.py
index 02880a83e0d..8514279888e 100644
--- a/agent/display.py
+++ b/agent/display.py
@@ -904,10 +904,6 @@ def get_cute_tool_message(
             extra = f" +{len(urls)-1}" if len(urls) > 1 else ""
             return _wrap(f"┊ 📄 fetch     {_trunc(domain, 35)}{extra}  {dur}")
         return _wrap(f"┊ 📄 fetch     pages  {dur}")
-    if tool_name == "web_crawl":
-        url = args.get("url", "")
-        domain = url.replace("https://", "").replace("http://", "").split("/")[0]
-        return _wrap(f"┊ 🕸️  crawl     {_trunc(domain, 35)}  {dur}")
     if tool_name == "terminal":
         return _wrap(f"┊ 💻 $         {_trunc(args.get('command', ''), 42)}  {dur}")
     if tool_name == "process":
diff --git a/agent/error_classifier.py b/agent/error_classifier.py
index a0726a4e02a..e8a44866b28 100644
--- a/agent/error_classifier.py
+++ b/agent/error_classifier.py
@@ -44,9 +44,10 @@ class FailoverReason(enum.Enum):
     payload_too_large = "payload_too_large"  # 413 — compress payload
     image_too_large = "image_too_large"   # Native image part exceeds provider's per-image limit — shrink and retry
 
-    # Model
+    # Model / provider policy
     model_not_found = "model_not_found"  # 404 or invalid model — fallback to different model
     provider_policy_blocked = "provider_policy_blocked"  # Aggregator (e.g. OpenRouter) blocked the only endpoint due to account data/privacy policy
+    content_policy_blocked = "content_policy_blocked"  # Provider safety filter rejected this prompt — deterministic per-request, don't retry unchanged
 
     # Request format
     format_error = "format_error"        # 400 bad request — abort or strip + retry
@@ -97,13 +98,20 @@ _BILLING_PATTERNS = [
     "insufficient_quota",
     "insufficient balance",
     "credit balance",
+    "credits exhausted",
     "credits have been exhausted",
+    "no usable credits",
     "top up your credits",
     "payment required",
     "billing hard limit",
     "exceeded your current quota",
     "account is deactivated",
     "plan does not include",
+    "out of funds",
+    "run out of funds",
+    "balance_depleted",
+    "model_not_supported_on_free_tier",
+    "not available on the free tier",
 ]
 
 # Patterns that indicate rate limiting (transient, will resolve)
@@ -282,6 +290,45 @@ _PROVIDER_POLICY_BLOCKED_PATTERNS = [
     "no endpoints found matching your data policy",
 ]
 
+# Provider content-policy / safety-filter blocks. Distinct from
+# ``provider_policy_blocked`` above (which is an OpenRouter *account*-level
+# data/privacy guardrail) — these are *per-prompt* safety decisions made by
+# the upstream model provider. They are deterministic for the unchanged
+# request, so retrying the same prompt three times just reproduces the same
+# block and burns paid attempts on a refusal. The recovery is to switch to a
+# configured fallback model/provider immediately, or surface the block to
+# the user with actionable guidance if no fallback exists.
+#
+# Patterns are intentionally narrow — each phrase is a verbatim string from
+# a specific provider's safety pipeline, not a generic word like "policy" or
+# "violation" that could collide with billing/auth/format errors:
+#   • OpenAI Codex cybersecurity refusal (gpt-5.5, the case from #18028)
+#   • OpenAI moderation refusal ("violates our usage policies", with
+#     "usage policies" disambiguating from billing's "exceeded ... policy")
+#   • Anthropic safety refusal ("prompt was flagged by ... safety system")
+#   • OpenAI Responses content filter
+_CONTENT_POLICY_BLOCKED_PATTERNS = [
+    # OpenAI Codex (#18028) — message may arrive without an HTTP status
+    "flagged for possible cybersecurity risk",
+    "trusted access for cyber",
+    # OpenAI moderation — chat completions / responses
+    "violates our usage policies",
+    "violates openai's usage policies",
+    "your request was flagged by",
+    # Anthropic safety system
+    "prompt was flagged by our safety",
+    "responses cannot be generated due to safety",
+    # Generic content-filter wording seen on Azure / OpenAI Responses.
+    # ``content_filter`` (underscore) is the OpenAI-standard error/finish
+    # token surfaced verbatim by their SDKs when a request is blocked.
+    # ``responsibleaipolicyviolation`` is Azure OpenAI's error code.
+    # Deliberately NOT matching the space variant ("content filter") — it
+    # appears in benign config descriptions and tooltip text that providers
+    # echo back; the underscore form is provider-specific enough.
+    "content_filter",
+    "responsibleaipolicyviolation",
+]
+
 # Auth patterns (non-status-code signals)
 _AUTH_PATTERNS = [
     "invalid api key",
@@ -485,6 +532,20 @@ def classify_api_error(
 
     # ── 1. Provider-specific patterns (highest priority) ────────────
 
+    # Provider content-policy / safety-filter block. The provider has made a
+    # deterministic refusal decision about THIS prompt — retrying unchanged
+    # just reproduces the same refusal and burns paid attempts. Must run
+    # before status-based classification so a 400 safety block isn't
+    # downgraded to a generic ``format_error`` and a status-less block
+    # (OpenAI Codex SDK can raise without one) isn't left in the retryable
+    # ``unknown`` bucket. See issue #18028.
+    if any(p in error_msg for p in _CONTENT_POLICY_BLOCKED_PATTERNS):
+        return _result(
+            FailoverReason.content_policy_blocked,
+            retryable=False,
+            should_fallback=True,
+        )
+
     # Anthropic thinking block signature invalid (400).
     # Don't gate on provider — OpenRouter proxies Anthropic errors, so the
     # provider may be "openrouter" even though the error is Anthropic-specific.
@@ -690,8 +751,13 @@ def _classify_by_status(
         )
 
     if status_code == 403:
-        # OpenRouter 403 "key limit exceeded" is actually billing
-        if "key limit exceeded" in error_msg or "spending limit" in error_msg:
+        # OpenRouter 403 "key limit exceeded" is actually billing. Other
+        # providers also use 403 for account-plan or credit exhaustion.
+        if (
+            "key limit exceeded" in error_msg
+            or "spending limit" in error_msg
+            or any(p in error_msg for p in _BILLING_PATTERNS)
+        ):
             return result_fn(
                 FailoverReason.billing,
                 retryable=False,
@@ -708,6 +774,17 @@ def _classify_by_status(
         return _classify_402(error_msg, result_fn)
 
     if status_code == 404:
+        # Nous API currently surfaces HA/NAS credit depletion as a paid model
+        # becoming unavailable on the Free Tier, returned as 404 rather than
+        # 402. Treat that as entitlement/billing exhaustion, not a missing
+        # model, so the retry loop can show credit/top-up guidance.
+        if any(p in error_msg for p in _BILLING_PATTERNS):
+            return result_fn(
+                FailoverReason.billing,
+                retryable=False,
+                should_rotate_credential=True,
+                should_fallback=True,
+            )
         # OpenRouter policy-block 404 — distinct from "model not found".
         # The model exists; the user's account privacy setting excludes the
         # only endpoint serving it. Falling back to another provider won't
@@ -973,7 +1050,15 @@ def _classify_by_error_code(
             should_rotate_credential=True,
         )
 
-    if code_lower in {"insufficient_quota", "billing_not_active", "payment_required"}:
+    if code_lower in {
+        "insufficient_quota",
+        "billing_not_active",
+        "payment_required",
+        "insufficient_credits",
+        "no_usable_credits",
+        "balance_depleted",
+        "model_not_supported_on_free_tier",
+    }:
         return result_fn(
             FailoverReason.billing,
             retryable=False,
diff --git a/agent/file_safety.py b/agent/file_safety.py
index 22b190c3a6c..e317b3439f3 100644
--- a/agent/file_safety.py
+++ b/agent/file_safety.py
@@ -249,6 +249,10 @@ def get_read_block_error(path: str) -> Optional[str]:
         ".env",
         "webhook_subscriptions.json",
         os.path.join("auth", "google_oauth.json"),
+        # Bitwarden Secrets Manager disk cache: stores plaintext secret values
+        # to avoid re-fetching across back-to-back CLI invocations. The file
+        # was introduced by #31968 but not added to this guard.
+        os.path.join("cache", "bws_cache.json"),
     )
     for hd in hermes_dirs:
         for name in credential_file_names:
diff --git a/agent/google_code_assist.py b/agent/google_code_assist.py
index 3e61d1b03e9..eec6441f80e 100644
--- a/agent/google_code_assist.py
+++ b/agent/google_code_assist.py
@@ -31,7 +31,6 @@ import json
 import logging
 import time
 import urllib.error
-import urllib.parse
 import urllib.request
 import uuid
 from dataclasses import dataclass, field
diff --git a/agent/google_oauth.py b/agent/google_oauth.py
index 97a65349dfa..9eb55ec19dc 100644
--- a/agent/google_oauth.py
+++ b/agent/google_oauth.py
@@ -899,7 +899,15 @@ def start_oauth_flow(
         try:
             import webbrowser
 
-            webbrowser.open(auth_url, new=1, autoraise=True)
+            try:
+                from hermes_cli.auth import (
+                    _can_open_graphical_browser as _can_open_gui,
+                )
+            except Exception:
+                _can_open_gui = lambda: True  # noqa: E731
+
+            if _can_open_gui():
+                webbrowser.open(auth_url, new=1, autoraise=True)
         except Exception as exc:
             logger.debug("webbrowser.open failed: %s", exc)
 
diff --git a/agent/image_routing.py b/agent/image_routing.py
index 37e1cbbf102..74b29af7cd8 100644
--- a/agent/image_routing.py
+++ b/agent/image_routing.py
@@ -37,6 +37,8 @@ from __future__ import annotations
 import base64
 import logging
 import mimetypes
+import os
+import re
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple
 
@@ -46,6 +48,102 @@ logger = logging.getLogger(__name__)
 _VALID_MODES = frozenset({"auto", "native", "text"})
 
 
+# Image extensions used by extract_image_refs(). Kept tight on purpose — we
+# only auto-attach things the model can actually see. Documents/archives are
+# excluded because the gateway's broader extract_local_files() also routes
+# them differently (send_document), and we don't want to attach a PDF as a
+# vision part.
+_IMAGE_EXTS = (
+    ".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".tiff", ".tif", ".heic",
+)
+_IMAGE_EXT_PATTERN = "|".join(e.lstrip(".") for e in _IMAGE_EXTS)
+
+# Absolute / home-relative local image path. Matches the same shape gateway's
+# extract_local_files() uses: anchors to ``~/`` or ``/``, ignores matches inside
+# URLs (the ``(?<![/:\w.])`` lookbehind), and case-insensitive on the extension.
+_LOCAL_IMAGE_PATH_RE = re.compile(
+    r"(?<![/:\w.])(?:~/|/)(?:[\w.\-]+/)*[\w.\-]+\.(?:" + _IMAGE_EXT_PATTERN + r")\b",
+    re.IGNORECASE,
+)
+
+# http(s) URL ending in an image extension (optionally followed by a
+# query string). Case-insensitive on the extension. Strict ``http(s)://``
+# scheme so we don't accidentally grab ``file://`` URLs or other shapes.
+_IMAGE_URL_RE = re.compile(
+    r"https?://[^\s<>\"']+?\.(?:" + _IMAGE_EXT_PATTERN + r")(?:\?[^\s<>\"']*)?",
+    re.IGNORECASE,
+)
+
+
+def extract_image_refs(text: str) -> Tuple[List[str], List[str]]:
+    """Scan free-form text for image references the model should see.
+
+    Returns ``(local_paths, urls)``:
+
+      * ``local_paths`` — absolute (``/``) or home-relative (``~/``) paths
+        whose suffix is an image extension AND whose expanded form exists
+        on disk as a file. Order-preserving, deduplicated.
+      * ``urls`` — ``http(s)://…`` URLs whose path ends in an image
+        extension (a ``?query`` is allowed after the extension).
+        Order-preserving, deduplicated.
+
+    Matches inside fenced code blocks (``` ``` ```) and inline backticks
+    (`` `…` ``) are skipped so that snippets pasted into a task body for
+    reference aren't mistaken for live attachments. This mirrors the
+    behaviour of ``gateway.platforms.base.BaseAdapter.extract_local_files``.
+
+    Local paths are validated against the filesystem; URLs are not
+    (the provider fetches them at request time).
+    """
+    if not isinstance(text, str) or not text:
+        return [], []
+
+    # Build spans covered by fenced code blocks and inline code so we can
+    # ignore references the author embedded purely as example text.
+    code_spans: list[tuple[int, int]] = []
+    for m in re.finditer(r"```[^\n]*\n.*?```", text, re.DOTALL):
+        code_spans.append((m.start(), m.end()))
+    for m in re.finditer(r"`[^`\n]+`", text):
+        code_spans.append((m.start(), m.end()))
+
+    def _in_code(pos: int) -> bool:
+        return any(s <= pos < e for s, e in code_spans)
+
+    local_paths: list[str] = []
+    seen_paths: set[str] = set()
+    for match in _LOCAL_IMAGE_PATH_RE.finditer(text):
+        if _in_code(match.start()):
+            continue
+        raw = match.group(0)
+        expanded = os.path.expanduser(raw)
+        try:
+            if not os.path.isfile(expanded):
+                continue
+        except OSError:
+            # ENAMETOOLONG / EINVAL on pathological inputs — skip rather than crash.
+            continue
+        if expanded in seen_paths:
+            continue
+        seen_paths.add(expanded)
+        local_paths.append(expanded)
+
+    urls: list[str] = []
+    seen_urls: set[str] = set()
+    for match in _IMAGE_URL_RE.finditer(text):
+        if _in_code(match.start()):
+            continue
+        url = match.group(0)
+        # Strip trailing punctuation that's almost certainly prose, not part
+        # of the URL (e.g. "see https://x.com/a.png." or "/a.png)").
+        url = url.rstrip(".,;:!?)]>")
+        if url in seen_urls:
+            continue
+        seen_urls.add(url)
+        urls.append(url)
+
+    return local_paths, urls
+
+
 # Strict YAML/JSON boolean coercion for capability overrides.
 #
 # ``bool("false")`` is True in Python because non-empty strings are truthy, so
@@ -320,20 +418,29 @@ def _file_to_data_url(path: Path) -> Optional[str]:
 def build_native_content_parts(
     user_text: str,
     image_paths: List[str],
+    image_urls: Optional[List[str]] = None,
 ) -> Tuple[List[Dict[str, Any]], List[str]]:
     """Build an OpenAI-style ``content`` list for a user turn.
 
     Shape:
       [{"type": "text", "text": "...\\n\\n[Image attached at: /local/path]"},
        {"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}},
+       {"type": "image_url", "image_url": {"url": "https://example.com/a.png"}},
        ...]
 
-    The local path of each successfully attached image is appended to the
-    text part as ``[Image attached at: <path>]``. The model still sees the
-    pixels via the ``image_url`` part (full native vision); the path note
-    just gives it a string handle so MCP/skill tools that take an image
-    path or URL argument can be invoked on the same image without an
-    extra round-trip. This parallels the text-mode hint produced by
+    Local paths are read from disk and embedded as base64 ``data:`` URLs.
+    Remote URLs (``http(s)://``) are passed through verbatim — the provider
+    fetches them server-side. The model still sees the pixels either way.
+
+    For each successfully attached image, a hint is appended to the text
+    part:
+
+      * local path → ``[Image attached at: <path>]``
+      * URL        → ``[Image attached: <url>]``
+
+    The hint gives the model a string handle so MCP/skill tools that take
+    an image path or URL argument can be invoked on the same image without
+    an extra round-trip. This parallels the text-mode hint produced by
     ``Runner._enrich_message_with_vision`` (``vision_analyze using image_url:
     <path>``) so behaviour is consistent across both image input modes.
 
@@ -342,12 +449,14 @@ def build_native_content_parts(
     ceiling), the agent's retry loop transparently shrinks and retries
     once — see ``run_agent._try_shrink_image_parts_in_messages``.
 
-    Returns (content_parts, skipped_paths). Skipped paths are files that
-    couldn't be read from disk and are NOT advertised in the path hints.
+    Returns (content_parts, skipped). Skipped entries are local paths
+    that couldn't be read from disk; URLs are never skipped (they're
+    not validated here).
     """
     skipped: List[str] = []
     image_parts: List[Dict[str, Any]] = []
     attached_paths: List[str] = []
+    attached_urls: List[str] = []
 
     for raw_path in image_paths:
         p = Path(raw_path)
@@ -364,16 +473,26 @@ def build_native_content_parts(
         })
         attached_paths.append(str(raw_path))
 
+    for url in image_urls or []:
+        url = (url or "").strip()
+        if not url:
+            continue
+        image_parts.append({
+            "type": "image_url",
+            "image_url": {"url": url},
+        })
+        attached_urls.append(url)
+
     text = (user_text or "").strip()
 
     # If at least one image attached, build a single text part that combines
-    # the user's caption (or a neutral default) with one path hint per image.
-    if attached_paths:
+    # the user's caption (or a neutral default) with one hint per image.
+    if attached_paths or attached_urls:
         base_text = text or "What do you see in this image?"
-        path_hints = "\n".join(
-            f"[Image attached at: {p}]" for p in attached_paths
-        )
-        combined_text = f"{base_text}\n\n{path_hints}"
+        hint_lines: List[str] = []
+        hint_lines.extend(f"[Image attached at: {p}]" for p in attached_paths)
+        hint_lines.extend(f"[Image attached: {u}]" for u in attached_urls)
+        combined_text = f"{base_text}\n\n" + "\n".join(hint_lines)
         parts: List[Dict[str, Any]] = [{"type": "text", "text": combined_text}]
         parts.extend(image_parts)
         return parts, skipped
@@ -388,4 +507,5 @@ def build_native_content_parts(
 __all__ = [
     "decide_image_input_mode",
     "build_native_content_parts",
+    "extract_image_refs",
 ]
diff --git a/agent/jiter_preload.py b/agent/jiter_preload.py
new file mode 100644
index 00000000000..787e45afa61
--- /dev/null
+++ b/agent/jiter_preload.py
@@ -0,0 +1,39 @@
+"""Best-effort early import for the OpenAI SDK's native streaming parser.
+
+The OpenAI SDK imports ``jiter`` while constructing streaming chat-completion
+responses.  On some Windows installs the native extension can be imported
+directly from the Hermes venv, but the first import fails when it happens later
+inside the threaded streaming request path.  Loading it once during agent
+package import avoids that import-order failure while preserving the normal
+SDK error path for genuinely missing or broken installs.
+"""
+
+from __future__ import annotations
+
+import importlib
+
+_JITER_PRELOADED = False
+_JITER_PRELOAD_ERROR: Exception | None = None
+
+
+def preload_jiter_native_extension() -> bool:
+    """Import jiter's native extension early if it is available."""
+
+    global _JITER_PRELOADED, _JITER_PRELOAD_ERROR
+
+    if _JITER_PRELOADED:
+        return True
+
+    try:
+        importlib.import_module("jiter.jiter")
+        from jiter import from_json as _from_json  # noqa: F401
+    except Exception as exc:
+        _JITER_PRELOAD_ERROR = exc
+        return False
+
+    _JITER_PRELOADED = True
+    _JITER_PRELOAD_ERROR = None
+    return True
+
+
+preload_jiter_native_extension()
diff --git a/agent/lsp/cli.py b/agent/lsp/cli.py
index c17ef682b33..121cfa5f92c 100644
--- a/agent/lsp/cli.py
+++ b/agent/lsp/cli.py
@@ -16,7 +16,6 @@ from __future__ import annotations
 
 import argparse
 import sys
-from typing import Optional
 
 
 def register_subparser(subparsers: argparse._SubParsersAction) -> None:
@@ -249,7 +248,6 @@ def _cmd_restart() -> int:
 
 def _cmd_which(server_id: str) -> int:
     from agent.lsp.install import INSTALL_RECIPES, hermes_lsp_bin_dir
-    import os
     import shutil as _shutil
 
     recipe = INSTALL_RECIPES.get(server_id)
diff --git a/agent/lsp/manager.py b/agent/lsp/manager.py
index 4f16188de0b..aebb4881c96 100644
--- a/agent/lsp/manager.py
+++ b/agent/lsp/manager.py
@@ -39,25 +39,20 @@ import logging
 import os
 import threading
 import time
-from concurrent.futures import Future as ConcurrentFuture
 from typing import Any, Callable, Dict, List, Optional, Tuple
 
 from agent.lsp import eventlog
 from agent.lsp.client import (
     DIAGNOSTICS_DOCUMENT_WAIT,
     LSPClient,
-    file_uri,
 )
 from agent.lsp.servers import (
     ServerContext,
-    ServerDef,
-    SpawnSpec,
     find_server_for_file,
     language_id_for,
 )
 from agent.lsp.workspace import (
     clear_cache,
-    is_inside_workspace,
     resolve_workspace_for_file,
 )
 
diff --git a/agent/lsp/servers.py b/agent/lsp/servers.py
index 144b5cb2c11..8ba87be9495 100644
--- a/agent/lsp/servers.py
+++ b/agent/lsp/servers.py
@@ -25,7 +25,7 @@ import shutil
 from dataclasses import dataclass, field
 from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple
 
-from agent.lsp.workspace import nearest_root, normalize_path
+from agent.lsp.workspace import nearest_root
 
 logger = logging.getLogger("agent.lsp.servers")
 
diff --git a/agent/memory_manager.py b/agent/memory_manager.py
index 79547139086..fc5d96da4fe 100644
--- a/agent/memory_manager.py
+++ b/agent/memory_manager.py
@@ -368,11 +368,42 @@ class MemoryManager:
 
     # -- Sync ----------------------------------------------------------------
 
-    def sync_all(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
+    @staticmethod
+    def _provider_sync_accepts_messages(provider: MemoryProvider) -> bool:
+        """Return whether sync_turn accepts a messages keyword."""
+        try:
+            signature = inspect.signature(provider.sync_turn)
+        except (TypeError, ValueError):
+            return True
+        params = list(signature.parameters.values())
+        if any(p.kind == inspect.Parameter.VAR_KEYWORD for p in params):
+            return True
+        return "messages" in signature.parameters
+
+    def sync_all(
+        self,
+        user_content: str,
+        assistant_content: str,
+        *,
+        session_id: str = "",
+        messages: Optional[List[Dict[str, Any]]] = None,
+    ) -> None:
         """Sync a completed turn to all providers."""
         for provider in self._providers:
             try:
-                provider.sync_turn(user_content, assistant_content, session_id=session_id)
+                if messages is not None and self._provider_sync_accepts_messages(provider):
+                    provider.sync_turn(
+                        user_content,
+                        assistant_content,
+                        session_id=session_id,
+                        messages=messages,
+                    )
+                else:
+                    provider.sync_turn(
+                        user_content,
+                        assistant_content,
+                        session_id=session_id,
+                    )
             except Exception as e:
                 logger.warning(
                     "Memory provider '%s' sync_turn failed: %s",
diff --git a/agent/memory_provider.py b/agent/memory_provider.py
index d801d856a04..116ceff406f 100644
--- a/agent/memory_provider.py
+++ b/agent/memory_provider.py
@@ -112,11 +112,22 @@ class MemoryProvider(ABC):
         that do background prefetching should override this.
         """
 
-    def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
+    def sync_turn(
+        self,
+        user_content: str,
+        assistant_content: str,
+        *,
+        session_id: str = "",
+        messages: Optional[List[Dict[str, Any]]] = None,
+    ) -> None:
         """Persist a completed turn to the backend.
 
         Called after each turn. Should be non-blocking — queue for
         background processing if the backend has latency.
+
+        ``messages`` is the OpenAI-style conversation message list as of the
+        completed turn, including any assistant tool calls and tool results.
+        Providers that do not need raw turn context can ignore it.
         """
 
     @abstractmethod
diff --git a/agent/model_metadata.py b/agent/model_metadata.py
index fa21c837123..a2d9b2daa3d 100644
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -141,6 +141,8 @@ DEFAULT_CONTEXT_LENGTHS = {
     # fuzzy-match collisions (e.g. "anthropic/claude-sonnet-4" is a
     # substring of "anthropic/claude-sonnet-4.6").
     # OpenRouter-prefixed models resolve via OpenRouter live API or models.dev.
+    "claude-opus-4-8": 1000000,
+    "claude-opus-4.8": 1000000,
     "claude-opus-4-7": 1000000,
     "claude-opus-4.7": 1000000,
     "claude-opus-4-6": 1000000,
@@ -911,12 +913,33 @@ def parse_context_limit_from_error(error_msg: str) -> Optional[int]:
     return None
 
 
+def get_context_length_from_provider_error(
+    error_msg: str,
+    current_context_length: int,
+) -> Optional[int]:
+    """Return a provider-reported lower context limit, if one is present.
+
+    Context-overflow recovery must not invent a new model window size.  Some
+    providers only say that the input exceeds the context window without
+    reporting the actual maximum.  In that case callers should keep the
+    configured context length and try compression only, rather than stepping
+    down through guessed probe tiers (1M → 256K → 128K → ...).
+    """
+    parsed_limit = parse_context_limit_from_error(error_msg)
+    if parsed_limit is None:
+        return None
+    if parsed_limit < current_context_length:
+        return parsed_limit
+    return None
+
+
 def parse_available_output_tokens_from_error(error_msg: str) -> Optional[int]:
     """Detect an "output cap too large" error and return how many output tokens are available.
 
     Background — two distinct context errors exist:
       1. "Prompt too long"  — the INPUT itself exceeds the context window.
-           Fix: compress history and/or halve context_length.
+           Fix: compress history, and only reduce context_length if the
+           provider explicitly reports the actual lower limit.
       2. "max_tokens too large" — input is fine, but input + requested_output > window.
            Fix: reduce max_tokens (the output cap) for this call.
            Do NOT touch context_length — the window hasn't shrunk.
diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py
index 365bcdc075f..7ba2edfa124 100644
--- a/agent/prompt_builder.py
+++ b/agent/prompt_builder.py
@@ -7,7 +7,6 @@ assemble pieces, then combines them with memory and ephemeral prompts.
 import json
 import logging
 import os
-import re
 import threading
 from collections import OrderedDict
 from pathlib import Path
@@ -236,6 +235,11 @@ KANBAN_GUIDANCE = (
     "- Do not shell out to `hermes kanban <verb>` for board operations. Use "
     "the `kanban_*` tools — they work across all terminal backends.\n"
     "- Do not complete a task you didn't actually finish. Block it.\n"
+    "- Do not call `clarify` to ask questions. You are running headless — "
+    "there is no live user to answer. The call will time out and the task "
+    "will sit silently in `running` with no signal to the operator. Instead: "
+    "`kanban_comment` the context, then `kanban_block(reason=...)` so the "
+    "task surfaces on the board as needing input.\n"
     "- Do not assign follow-up work to yourself. Assign it to the right "
     "specialist profile.\n"
     "- Do not call `delegate_task` as a board substitute. `delegate_task` is "
@@ -262,6 +266,37 @@ TOOL_USE_ENFORCEMENT_GUIDANCE = (
 # Add new patterns here when a model family needs explicit steering.
 TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex", "gemini", "gemma", "grok", "glm", "qwen", "deepseek")
 
+# Universal "finish the job" guidance — applied to ALL models, not gated
+# by model family.  Addresses two cross-model failure modes:
+#   1. Stopping after a stub: writing a tiny file or running one command
+#      and then ending the turn with a description of the plan instead
+#      of the finished artifact.  (Observed on Opus during a real
+#      Sarasota real-estate build task: 3 API calls, 85-byte file,
+#      one terminal command, finish_reason=stop.)
+#   2. Fabricating output when a real path is blocked.  When `pip` or a
+#      tool fails, some models will synthesize plausible-looking results
+#      (fake addresses, fake JSON, fake numbers) instead of reporting
+#      the blocker.  (Observed on DeepSeek v4-flash on the same task:
+#      pushed through PEP-668 wall, then returned fabricated listings.)
+#
+# Short on purpose.  This block is shipped to every user, every session,
+# in the cached system prompt — token cost is paid once at install and
+# then amortised across all sessions via prefix caching.  Keep it tight.
+TASK_COMPLETION_GUIDANCE = (
+    "# Finishing the job\n"
+    "When the user asks you to build, run, or verify something, the deliverable is "
+    "a working artifact backed by real tool output — not a description of one. "
+    "Do not stop after writing a stub, a plan, or a single command. Keep working "
+    "until you have actually exercised the code or produced the requested result, "
+    "then report what real execution returned.\n"
+    "If a tool, install, or network call fails and blocks the real path, say so "
+    "directly and try an alternative (different package manager, different "
+    "approach, ask the user). NEVER substitute plausible-looking fabricated "
+    "output (made-up data, invented file contents, synthesised API responses) "
+    "for results you couldn't actually produce. Reporting a blocker honestly "
+    "is always better than inventing a result."
+)
+
 # OpenAI GPT/Codex-specific execution guidance.  Addresses known failure modes
 # where GPT models abandon work on partial results, skip prerequisite lookups,
 # hallucinate instead of using tools, and declare "done" without verification.
@@ -813,6 +848,27 @@ def build_environment_hints() -> str:
 
     if is_wsl():
         hints.append(WSL_ENVIRONMENT_HINT)
+
+    # Embedder-supplied environment description. Lets a host that wraps Hermes
+    # (e.g. a sandbox runner / managed platform) explain the environment the
+    # agent is running in — proxy, credential handling, mount layout — without
+    # forking the identity slot (SOUL.md). Read once at prompt-build time, so
+    # it's part of the stable, cache-safe system prompt. The env var is the
+    # build-time/embedder mechanism (set in a container ENV); config.yaml
+    # ``agent.environment_hint`` is the user-facing surface. Env var wins.
+    extra = (os.getenv("HERMES_ENVIRONMENT_HINT") or "").strip()
+    if not extra:
+        try:
+            from hermes_cli.config import load_config
+
+            extra = str(
+                (load_config().get("agent", {}) or {}).get("environment_hint", "")
+            ).strip()
+        except Exception as e:
+            logger.debug("Could not read agent.environment_hint from config: %s", e)
+    if extra:
+        hints.append(extra)
+
     return "\n\n".join(hints)
 
 
diff --git a/agent/redact.py b/agent/redact.py
index 7ed241c5efd..5de714a5f99 100644
--- a/agent/redact.py
+++ b/agent/redact.py
@@ -331,7 +331,7 @@ def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = F
     """Apply all redaction patterns to a block of text.
 
     Safe to call on any string -- non-matching text passes through unchanged.
-    Disabled by default — enable via security.redact_secrets: true in config.yaml.
+    Enabled by default. Disable via security.redact_secrets: false in config.yaml.
     Set force=True for safety boundaries that must never return raw secrets
     regardless of the user's global logging redaction preference.
 
@@ -406,19 +406,14 @@ def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = F
     if "eyJ" in text:
         text = _JWT_RE.sub(lambda m: _mask_token(m.group(0)), text)
 
-    # URL userinfo (http(s)://user:pass@host) — redact for non-DB schemes.
-    # DB schemes are handled above by _DB_CONNSTR_RE.
-    if "://" in text:
-        text = _redact_url_userinfo(text)
-
-        # URL query params containing opaque tokens (?access_token=…&code=…)
-        if "?" in text:
-            text = _redact_url_query_params(text)
-
-    # HTTP access logs can contain relative request targets with query params
-    # and no URL scheme, e.g. `"POST /hook?password=... HTTP/1.1"`.
-    if "?" in text and "=" in text and _has_http_method_substring(text):
-        text = _redact_http_request_target_query_params(text)
+    # NOTE: Web-URL redaction (query params + userinfo + HTTP access-log
+    # request targets) is intentionally OFF. Many legitimate workflows pass
+    # opaque tokens through query strings — magic-link checkouts, OAuth
+    # callbacks the agent is meant to follow, pre-signed share URLs — and
+    # blanket-redacting param values by name breaks those skills mid-flow.
+    # Known credential shapes (sk-, ghp_, JWTs, etc.) inside URLs are still
+    # caught by _PREFIX_RE and _JWT_RE above. DB connection-string passwords
+    # are still caught by _DB_CONNSTR_RE.
 
     # Form-urlencoded bodies (only triggers on clean k=v&k=v inputs).
     if "&" in text and "=" in text:
diff --git a/agent/secret_sources/bitwarden.py b/agent/secret_sources/bitwarden.py
index 235a4222594..b19451fda22 100644
--- a/agent/secret_sources/bitwarden.py
+++ b/agent/secret_sources/bitwarden.py
@@ -37,7 +37,6 @@ import platform
 import shutil
 import stat
 import subprocess
-import sys
 import tempfile
 import time
 import urllib.error
diff --git a/agent/stream_diag.py b/agent/stream_diag.py
index c4d8c54f470..cd10e74367a 100644
--- a/agent/stream_diag.py
+++ b/agent/stream_diag.py
@@ -258,7 +258,7 @@ def emit_stream_drop(
         except Exception:
             pass
     try:
-        agent._emit_status(
+        agent._buffer_status(
             f"⚠️ {provider} stream {kind} ({type(error).__name__}){_suffix} "
             f"— reconnecting, retry {attempt}/{max_attempts}"
         )
diff --git a/agent/system_prompt.py b/agent/system_prompt.py
index 8fa4c191563..4a61a2ee045 100644
--- a/agent/system_prompt.py
+++ b/agent/system_prompt.py
@@ -37,6 +37,7 @@ from agent.prompt_builder import (
     PLATFORM_HINTS,
     SESSION_SEARCH_GUIDANCE,
     SKILLS_GUIDANCE,
+    TASK_COMPLETION_GUIDANCE,
     TOOL_USE_ENFORCEMENT_GUIDANCE,
     TOOL_USE_ENFORCEMENT_MODELS,
 )
@@ -100,6 +101,15 @@ def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None)
     # Pointer to the hermes-agent skill + docs for user questions about Hermes itself.
     stable_parts.append(HERMES_AGENT_HELP_GUIDANCE)
 
+    # Universal task-completion / no-fabrication guidance.  Applied to ALL
+    # models regardless of tool_use_enforcement gating — the failure modes
+    # this targets (stopping after a stub; fabricating output when a real
+    # path is blocked) are not model-family specific.  Gated only by
+    # config.yaml ``agent.task_completion_guidance`` (default True) so
+    # users who want a leaner prompt can turn it off.
+    if getattr(agent, "_task_completion_guidance", True) and agent.valid_tool_names:
+        stable_parts.append(TASK_COMPLETION_GUIDANCE)
+
     # Tool-aware behavioral guidance: only inject when the tools are loaded
     tool_guidance = []
     if "memory" in agent.valid_tool_names:
@@ -205,6 +215,23 @@ def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None)
     if _env_hints:
         stable_parts.append(_env_hints)
 
+    # Local Python toolchain probe — names python/pip/uv/PEP-668 state when
+    # something is non-default so the model can pick the right install
+    # strategy without discovering by failure.  Emits a single line; emits
+    # NOTHING when the environment is clean (no token cost).  Skipped
+    # entirely for remote terminal backends (the host's Python state is
+    # irrelevant when tools run inside docker/modal/ssh).  Gated by
+    # config.yaml ``agent.environment_probe`` (default True).
+    if getattr(agent, "_environment_probe", True):
+        try:
+            from tools.env_probe import get_environment_probe_line
+            _probe_line = get_environment_probe_line()
+            if _probe_line:
+                stable_parts.append(_probe_line)
+        except Exception:
+            # Probe failure must never block prompt build.
+            pass
+
     # Active-profile hint — names the Hermes profile the agent is running
     # under so it doesn't conflate ~/.hermes/skills/ (default profile) with
     # ~/.hermes/profiles/<active>/skills/ (this profile's). Deterministic
diff --git a/agent/tool_executor.py b/agent/tool_executor.py
index 438a6337074..358c1a0a8f7 100644
--- a/agent/tool_executor.py
+++ b/agent/tool_executor.py
@@ -13,14 +13,13 @@ extracted functions reach back through the ``run_agent`` module via
 from __future__ import annotations
 
 import concurrent.futures
-import contextvars
 import json
 import logging
 import os
 import random
 import threading
 import time
-from typing import Any, Optional
+from typing import Optional
 
 from agent.display import (
     KawaiiSpinner,
@@ -38,12 +37,9 @@ from agent.tool_dispatch_helpers import (
     make_tool_result_message,
 )
 from tools.terminal_tool import (
-    _get_approval_callback,
-    _get_sudo_password_callback,
-    set_approval_callback as _set_approval_callback,
-    set_sudo_password_callback as _set_sudo_password_callback,
     get_active_env,
 )
+from tools.thread_context import propagate_context_to_thread
 from tools.tool_result_storage import (
     maybe_persist_tool_result,
     enforce_turn_budget,
@@ -62,6 +58,55 @@ def _ra():
     return run_agent
 
 
+def _tool_search_scoped_names(agent) -> frozenset:
+    """Return the deferrable tool names the session may invoke via tool_call.
+
+    The Tool Search unwrap dispatches the underlying tool directly, bypassing
+    the bridge branch (and its scope check) in
+    ``model_tools.handle_function_call``. To keep a restricted-toolset session
+    (subagent, kanban worker, curated gateway session) from reaching tools it
+    was never granted, the unwrap validates the underlying name against this
+    set: the deferrable subset of the session's own enabled/disabled toolset
+    scope.
+
+    Result is cached on the agent and refreshed when the tool registry's
+    generation changes (e.g. an MCP server reconnects), so the common case is
+    a dict lookup, not a full tool-defs rebuild on every tool call.
+    """
+    try:
+        import model_tools
+        from tools import tool_search as _ts
+        from tools.registry import registry as _registry
+    except Exception:
+        return frozenset()
+
+    enabled = getattr(agent, "enabled_toolsets", None)
+    disabled = getattr(agent, "disabled_toolsets", None)
+    cache_key = (
+        getattr(_registry, "_generation", 0),
+        frozenset(enabled) if enabled is not None else None,
+        frozenset(disabled) if disabled is not None else None,
+    )
+    cached = getattr(agent, "_tool_search_scope_cache", None)
+    if cached is not None and cached[0] == cache_key:
+        return cached[1]
+    try:
+        scoped_defs = model_tools.get_tool_definitions(
+            enabled_toolsets=enabled,
+            disabled_toolsets=disabled,
+            quiet_mode=True,
+            skip_tool_search_assembly=True,
+        ) or []
+        names = _ts.scoped_deferrable_names(scoped_defs)
+    except Exception:
+        names = frozenset()
+    try:
+        agent._tool_search_scope_cache = (cache_key, names)
+    except Exception:
+        pass
+    return names
+
+
 def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
     """Execute multiple tool calls concurrently using a thread pool.
 
@@ -100,6 +145,41 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
         if not isinstance(function_args, dict):
             function_args = {}
 
+        # ── Tool Search unwrap ────────────────────────────────────────
+        # When the model invokes the tool_call bridge, peel it open so
+        # every downstream check (checkpointing, guardrails, plugin
+        # pre-tool-call hooks, the display/activity feed, the post-call
+        # callback) sees the underlying tool — not the bridge. This is
+        # the OpenClaw lesson: hooks must observe the real tool name.
+        #
+        # The original tool_call entry on ``tool_call.function`` is left
+        # untouched so the conversation transcript and the matching
+        # tool_call_id are preserved exactly as the model emitted them.
+        #
+        # Scope gate: the unwrap dispatches the underlying tool directly
+        # (bypassing the bridge branch in handle_function_call and its
+        # scope check), so we enforce session toolset scope HERE. A tool
+        # the session was not granted is rejected before any checkpoint,
+        # hook, or dispatch fires.
+        _ts_scope_block = None
+        try:
+            from tools import tool_search as _ts
+            if function_name == _ts.TOOL_CALL_NAME:
+                _underlying, _underlying_args, _err = _ts.resolve_underlying_call(function_args)
+                if not _err and _underlying:
+                    if _underlying in _tool_search_scoped_names(agent):
+                        function_name = _underlying
+                        function_args = _underlying_args
+                    else:
+                        _ts_scope_block = json.dumps({
+                            "error": (
+                                f"'{_underlying}' is not available in this session. "
+                                "Use tool_search to find tools you can call."
+                            ),
+                        }, ensure_ascii=False)
+        except Exception:
+            pass
+
         # Checkpoint for file-mutating tools
         if function_name in {"write_file", "patch"} and agent._checkpoint_mgr.enabled:
             try:
@@ -124,21 +204,25 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
 
         block_result = None
         blocked_by_guardrail = False
-        try:
-            from hermes_cli.plugins import get_pre_tool_call_block_message
-            block_message = get_pre_tool_call_block_message(
-                function_name, function_args, task_id=effective_task_id or "",
-            )
-        except Exception:
-            block_message = None
-
-        if block_message is not None:
-            block_result = json.dumps({"error": block_message}, ensure_ascii=False)
+        if _ts_scope_block is not None:
+            # Out-of-scope tool_call: reject before hooks/guardrails/dispatch.
+            block_result = _ts_scope_block
         else:
-            guardrail_decision = agent._tool_guardrails.before_call(function_name, function_args)
-            if not guardrail_decision.allows_execution:
-                block_result = agent._guardrail_block_result(guardrail_decision)
-                blocked_by_guardrail = True
+            try:
+                from hermes_cli.plugins import get_pre_tool_call_block_message
+                block_message = get_pre_tool_call_block_message(
+                    function_name, function_args, task_id=effective_task_id or "",
+                )
+            except Exception:
+                block_message = None
+
+            if block_message is not None:
+                block_result = json.dumps({"error": block_message}, ensure_ascii=False)
+            else:
+                guardrail_decision = agent._tool_guardrails.before_call(function_name, function_args)
+                if not guardrail_decision.allows_execution:
+                    block_result = agent._guardrail_block_result(guardrail_decision)
+                    blocked_by_guardrail = True
 
         parsed_calls.append((tool_call, function_name, function_args, block_result, blocked_by_guardrail))
 
@@ -186,14 +270,6 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
     agent._current_tool = tool_names_str
     agent._touch_activity(f"executing {num_tools} tools concurrently: {tool_names_str}")
 
-    # Capture CLI callbacks from the agent thread so worker threads can
-    # register them locally.  Without this, _get_approval_callback() in
-    # terminal_tool returns None in ThreadPoolExecutor workers, causing
-    # the dangerous-command prompt to fall back to input() — which
-    # deadlocks against prompt_toolkit's raw terminal mode (#13617).
-    _parent_approval_cb = _get_approval_callback()
-    _parent_sudo_cb = _get_sudo_password_callback()
-
     def _run_tool(index, tool_call, function_name, function_args):
         """Worker function executed in a thread."""
         # Register this worker tid so the agent can fan out an interrupt
@@ -220,18 +296,9 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
             set_activity_callback(agent._touch_activity)
         except Exception:
             pass
-        # Propagate approval/sudo callbacks to this worker thread.
-        # Mirrors cli.py run_agent() pattern (GHSA-qg5c-hvr5-hjgr).
-        if _parent_approval_cb is not None:
-            try:
-                _set_approval_callback(_parent_approval_cb)
-            except Exception:
-                pass
-        if _parent_sudo_cb is not None:
-            try:
-                _set_sudo_password_callback(_parent_sudo_cb)
-            except Exception:
-                pass
+        # Approval/sudo callbacks (thread-local) and the agent turn's
+        # ContextVars are propagated by propagate_context_to_thread() at the
+        # submit site below (GHSA-qg5c-hvr5-hjgr, #13617).
         start = time.time()
         try:
             result = agent._invoke_tool(
@@ -261,13 +328,6 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
             _ra()._set_interrupt(False, _worker_tid)
         except Exception:
             pass
-        # Clear thread-local callbacks so a recycled worker thread
-        # doesn't hold stale references to a disposed CLI instance.
-        try:
-            _set_approval_callback(None)
-            _set_sudo_password_callback(None)
-        except Exception:
-            pass
 
     # Start spinner for CLI mode (skip when TUI handles tool progress)
     spinner = None
@@ -287,9 +347,12 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
             max_workers = min(len(runnable_calls), _MAX_TOOL_WORKERS)
             with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
                 for i, tc, name, args in runnable_calls:
-                    # Propagate ContextVars (e.g. _approval_session_key); mirrors asyncio.to_thread.
-                    ctx = contextvars.copy_context()
-                    f = executor.submit(ctx.run, _run_tool, i, tc, name, args)
+                    # Propagate the agent turn's ContextVars (e.g.
+                    # _approval_session_key) AND thread-local approval/sudo
+                    # callbacks into the worker thread; clears callbacks on exit.
+                    f = executor.submit(
+                        propagate_context_to_thread(_run_tool), i, tc, name, args
+                    )
                     futures.append(f)
 
                 # Wait for all to complete with periodic heartbeats so the
@@ -497,16 +560,39 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
         if not isinstance(function_args, dict):
             function_args = {}
 
-        # Check plugin hooks for a block directive before executing.
-        _block_msg: Optional[str] = None
+        # Tool Search unwrap — see execute_tool_calls_concurrent for full
+        # rationale, including the scope gate (the unwrap dispatches the
+        # underlying tool directly, so session toolset scope is enforced here).
+        _ts_scope_block: Optional[str] = None
         try:
-            from hermes_cli.plugins import get_pre_tool_call_block_message
-            _block_msg = get_pre_tool_call_block_message(
-                function_name, function_args, task_id=effective_task_id or "",
-            )
+            from tools import tool_search as _ts
+            if function_name == _ts.TOOL_CALL_NAME:
+                _underlying, _underlying_args, _err = _ts.resolve_underlying_call(function_args)
+                if not _err and _underlying:
+                    if _underlying in _tool_search_scoped_names(agent):
+                        function_name = _underlying
+                        function_args = _underlying_args
+                    else:
+                        _ts_scope_block = (
+                            f"'{_underlying}' is not available in this session. "
+                            "Use tool_search to find tools you can call."
+                        )
         except Exception:
             pass
 
+        # Check plugin hooks for a block directive before executing.
+        _block_msg: Optional[str] = None
+        if _ts_scope_block is not None:
+            _block_msg = _ts_scope_block
+        else:
+            try:
+                from hermes_cli.plugins import get_pre_tool_call_block_message
+                _block_msg = get_pre_tool_call_block_message(
+                    function_name, function_args, task_id=effective_task_id or "",
+                )
+            except Exception:
+                pass
+
         _guardrail_block_decision: ToolGuardrailDecision | None = None
         if _block_msg is None:
             guardrail_decision = agent._tool_guardrails.before_call(function_name, function_args)
@@ -752,6 +838,8 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
                     session_id=agent.session_id or "",
                     enabled_tools=list(agent.valid_tool_names) if agent.valid_tool_names else None,
                     skip_pre_tool_call_hook=True,
+                    enabled_toolsets=getattr(agent, "enabled_toolsets", None),
+                    disabled_toolsets=getattr(agent, "disabled_toolsets", None),
                 )
                 _spinner_result = function_result
             except Exception as tool_error:
@@ -772,6 +860,8 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
                     session_id=agent.session_id or "",
                     enabled_tools=list(agent.valid_tool_names) if agent.valid_tool_names else None,
                     skip_pre_tool_call_hook=True,
+                    enabled_toolsets=getattr(agent, "enabled_toolsets", None),
+                    disabled_toolsets=getattr(agent, "disabled_toolsets", None),
                 )
             except Exception as tool_error:
                 function_result = f"Error executing tool '{function_name}': {tool_error}"
diff --git a/agent/transports/chat_completions.py b/agent/transports/chat_completions.py
index 96997afca43..259b1b0ca61 100644
--- a/agent/transports/chat_completions.py
+++ b/agent/transports/chat_completions.py
@@ -10,7 +10,7 @@ reasoning configuration, temperature handling, and extra_body assembly.
 """
 
 import copy
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict
 
 from agent.lmstudio_reasoning import resolve_lmstudio_effort
 from agent.moonshot_schema import is_moonshot_model, sanitize_moonshot_tools
@@ -476,13 +476,17 @@ class ChatCompletionsTransport(ProviderTransport):
         ephemeral = params.get("ephemeral_max_output_tokens")
         user_max = params.get("max_tokens")
         anthropic_max = params.get("anthropic_max_output")
+        # Per-model default cap — profiles override get_max_tokens() when
+        # they front several backends with different completion-token limits
+        # (e.g. opencode-go: mimo-v2.5-pro = 131072).
+        profile_max = profile.get_max_tokens(model)
 
         if ephemeral is not None and max_tokens_fn:
             api_kwargs.update(max_tokens_fn(ephemeral))
         elif user_max is not None and max_tokens_fn:
             api_kwargs.update(max_tokens_fn(user_max))
-        elif profile.default_max_tokens and max_tokens_fn:
-            api_kwargs.update(max_tokens_fn(profile.default_max_tokens))
+        elif profile_max and max_tokens_fn:
+            api_kwargs.update(max_tokens_fn(profile_max))
         elif anthropic_max is not None:
             api_kwargs["max_tokens"] = anthropic_max
 
diff --git a/agent/transports/codex_app_server.py b/agent/transports/codex_app_server.py
index 7128de9c4fa..be348a6960f 100644
--- a/agent/transports/codex_app_server.py
+++ b/agent/transports/codex_app_server.py
@@ -23,7 +23,7 @@ import subprocess
 import threading
 import time
 from dataclasses import dataclass, field
-from typing import Any, Callable, Optional
+from typing import Any, Optional
 
 # Default minimum codex version we test against. The PR sets this from the
 # `codex --version` parsed at install time; bumping is a one-line change here.
diff --git a/agent/transports/codex_app_server_session.py b/agent/transports/codex_app_server_session.py
index 74e164d64d9..60eb607084f 100644
--- a/agent/transports/codex_app_server_session.py
+++ b/agent/transports/codex_app_server_session.py
@@ -31,6 +31,7 @@ import time
 from dataclasses import dataclass, field
 from typing import Any, Callable, Optional
 
+from agent.codex_responses_adapter import _format_responses_error
 from agent.redact import redact_sensitive_text
 from agent.transports.codex_app_server import (
     CodexAppServerClient,
@@ -581,7 +582,7 @@ class CodexAppServerSession:
                         (note.get("params") or {}).get("turn") or {}
                     ).get("error")
                     if err_obj:
-                        err_msg = err_obj.get("message") or str(err_obj)
+                        err_msg = _format_responses_error(err_obj, str(turn_status))
                         # If the turn failed for an auth/refresh reason,
                         # rewrite the error into a re-auth hint AND mark
                         # the session for retirement.
diff --git a/agent/usage_pricing.py b/agent/usage_pricing.py
index 93ced2e7d43..8d6b85cd0b8 100644
--- a/agent/usage_pricing.py
+++ b/agent/usage_pricing.py
@@ -83,6 +83,34 @@ _UTC_NOW = lambda: datetime.now(timezone.utc)
 # Official docs snapshot entries. Models whose published pricing and cache
 # semantics are stable enough to encode exactly.
 _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
+    # ── Anthropic Claude 4.8 ─────────────────────────────────────────────
+    # Same $5/$25 base pricing as 4.6/4.7.  Fast-mode variant is a separate
+    # model ID with 2x premium (vs the 6x premium on older Opus generations).
+    # Source: https://openrouter.ai/anthropic/claude-opus-4.8
+    (
+        "anthropic",
+        "claude-opus-4-8",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("5.00"),
+        output_cost_per_million=Decimal("25.00"),
+        cache_read_cost_per_million=Decimal("0.50"),
+        cache_write_cost_per_million=Decimal("6.25"),
+        source="official_docs_snapshot",
+        source_url="https://platform.claude.com/docs/en/about-claude/pricing",
+        pricing_version="anthropic-pricing-2026-05",
+    ),
+    (
+        "anthropic",
+        "claude-opus-4-8-fast",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("10.00"),
+        output_cost_per_million=Decimal("50.00"),
+        cache_read_cost_per_million=Decimal("1.00"),
+        cache_write_cost_per_million=Decimal("12.50"),
+        source="official_docs_snapshot",
+        source_url="https://openrouter.ai/anthropic/claude-opus-4.8-fast",
+        pricing_version="anthropic-pricing-2026-05",
+    ),
     # ── Anthropic Claude 4.7 ─────────────────────────────────────────────
     # Opus 4.5/4.6/4.7 share $5/$25 pricing (new tokenizer, up to 35% more
     # tokens for the same text).
diff --git a/agent/web_search_provider.py b/agent/web_search_provider.py
index 7223bbf2cfe..685eb68b337 100644
--- a/agent/web_search_provider.py
+++ b/agent/web_search_provider.py
@@ -61,14 +61,14 @@ from typing import Any, Dict, List
 
 
 class WebSearchProvider(abc.ABC):
-    """Abstract base class for a web search/extract/crawl backend.
+    """Abstract base class for a web search/extract backend.
 
     Subclasses must implement :meth:`is_available` and at least one of
-    :meth:`search` / :meth:`extract` / :meth:`crawl`. The
-    :meth:`supports_search` / :meth:`supports_extract` / :meth:`supports_crawl`
-    capability flags let the registry route each tool call to the right
-    provider, and let multi-capability providers (Firecrawl, Tavily, Exa,
-    …) advertise multiple capabilities from a single class.
+    :meth:`search` / :meth:`extract`. The :meth:`supports_search` /
+    :meth:`supports_extract` capability flags let the registry route each
+    tool call to the right provider, and let multi-capability providers
+    (Firecrawl, Tavily, Exa, …) advertise multiple capabilities from a
+    single class.
     """
 
     @property
@@ -113,22 +113,6 @@ class WebSearchProvider(abc.ABC):
         """
         return False
 
-    def supports_crawl(self) -> bool:
-        """Return True if this provider implements :meth:`crawl`.
-
-        Crawl differs from extract in that the agent provides a *seed URL*
-        and the provider walks linked pages on its own — useful for
-        documentation sites where the agent doesn't know all relevant
-        URLs upfront. Tavily is the only built-in backend that natively
-        crawls today; Firecrawl provides a similar capability that we
-        don't currently surface as a tool.
-
-        Providers that don't crawl should leave this as False; the
-        dispatcher in :func:`tools.web_tools.web_crawl_tool` will fall
-        back to its auxiliary-model summarization path.
-        """
-        return False
-
     def search(self, query: str, limit: int = 5) -> Dict[str, Any]:
         """Execute a web search.
 
@@ -173,26 +157,6 @@ class WebSearchProvider(abc.ABC):
             f"{self.name} does not support extract (override supports_extract)"
         )
 
-    def crawl(self, url: str, **kwargs: Any) -> Any:
-        """Crawl a seed URL and return results.
-
-        Override when :meth:`supports_crawl` returns True. The default
-        raises NotImplementedError; callers should gate on
-        :meth:`supports_crawl` before calling.
-
-        Return shape: ``{"results": [{"url": str, "title": str,
-        "content": str, ...}, ...]}`` matching what
-        :func:`tools.web_tools.web_crawl_tool` post-processing expects.
-
-        Implementations MAY be ``async def``.
-
-        ``kwargs`` may carry forward-compat fields (e.g. ``max_depth``,
-        ``include_domains``) — implementations should ignore unknown keys.
-        """
-        raise NotImplementedError(
-            f"{self.name} does not support crawl (override supports_crawl)"
-        )
-
     def get_setup_schema(self) -> Dict[str, Any]:
         """Return provider metadata for the ``hermes tools`` picker.
 
diff --git a/agent/web_search_registry.py b/agent/web_search_registry.py
index c61c16cadb2..079c755787c 100644
--- a/agent/web_search_registry.py
+++ b/agent/web_search_registry.py
@@ -11,7 +11,7 @@ Active selection
 ----------------
 The active provider is chosen by configuration with this precedence:
 
-1. ``web.search_backend`` / ``web.extract_backend`` / ``web.crawl_backend``
+1. ``web.search_backend`` / ``web.extract_backend``
    (per-capability override).
 2. ``web.backend`` (shared fallback).
 3. If exactly one capability-eligible provider is registered AND available,
@@ -24,10 +24,10 @@ The active provider is chosen by configuration with this precedence:
 5. Otherwise ``None`` — the tool surfaces a helpful error pointing at
    ``hermes tools``.
 
-The capability filter (``supports_search`` / ``supports_extract`` /
-``supports_crawl``) is applied at every step so a search-only provider
-(``brave-free``) configured as ``web.extract_backend`` correctly falls
-through to an extract-capable backend.
+The capability filter (``supports_search`` / ``supports_extract``) is
+applied at every step so a search-only provider (``brave-free``)
+configured as ``web.extract_backend`` correctly falls through to an
+extract-capable backend.
 """
 
 from __future__ import annotations
@@ -131,7 +131,7 @@ _LEGACY_PREFERENCE = (
 
 
 def _resolve(configured: Optional[str], *, capability: str) -> Optional[WebSearchProvider]:
-    """Resolve the active provider for a capability ("search" | "extract" | "crawl").
+    """Resolve the active provider for a capability ("search" | "extract").
 
     Resolution rules (in order):
 
@@ -168,8 +168,6 @@ def _resolve(configured: Optional[str], *, capability: str) -> Optional[WebSearc
             return bool(p.supports_search())
         if capability == "extract":
             return bool(p.supports_extract())
-        if capability == "crawl":
-            return bool(p.supports_crawl())
         return False
 
     def _is_available_safe(p: WebSearchProvider) -> bool:
@@ -241,21 +239,6 @@ def get_active_extract_provider() -> Optional[WebSearchProvider]:
     return _resolve(explicit, capability="extract")
 
 
-def get_active_crawl_provider() -> Optional[WebSearchProvider]:
-    """Resolve the currently-active web crawl provider.
-
-    Reads ``web.crawl_backend`` (preferred) or ``web.backend`` (shared
-    fallback) from config.yaml; falls back per the module docstring.
-
-    Crawl is a niche capability — among built-in providers only Tavily and
-    Firecrawl implement it. Callers should expect ``None`` and fall back to
-    a different strategy (e.g. summarize-via-LLM) when neither is
-    configured.
-    """
-    explicit = _read_config_key("web", "crawl_backend") or _read_config_key("web", "backend")
-    return _resolve(explicit, capability="crawl")
-
-
 def _reset_for_tests() -> None:
     """Clear the registry. **Test-only.**"""
     with _lock:
diff --git a/apps/bootstrap-installer/vite.config.ts b/apps/bootstrap-installer/vite.config.ts
index 90b49ba98aa..f0a0a316686 100644
--- a/apps/bootstrap-installer/vite.config.ts
+++ b/apps/bootstrap-installer/vite.config.ts
@@ -6,7 +6,7 @@ import path from 'node:path'
 // Hermes Setup — Tauri-targeted Vite config.
 //
 // Port 5175 keeps us out of the way of:
-//   apps/dashboard       (vite default 5173)
+//   web       (vite default 5173)
 //   apps/desktop dev     (5174 per its package.json)
 //
 // `clearScreen: false` is the Tauri convention — they spawn vite as a child
diff --git a/apps/dashboard/public/ds-assets/filler-bg0.jpg b/apps/dashboard/public/ds-assets/filler-bg0.jpg
deleted file mode 100644
index 49096941731..00000000000
Binary files a/apps/dashboard/public/ds-assets/filler-bg0.jpg and /dev/null differ
diff --git a/apps/dashboard/public/fonts/Collapse-BoldItalic.woff2 b/apps/dashboard/public/fonts/Collapse-BoldItalic.woff2
deleted file mode 100644
index 4fd83e36cf3..00000000000
Binary files a/apps/dashboard/public/fonts/Collapse-BoldItalic.woff2 and /dev/null differ
diff --git a/apps/dashboard/public/fonts/Collapse-Italic.woff2 b/apps/dashboard/public/fonts/Collapse-Italic.woff2
deleted file mode 100644
index 8cb1af7f126..00000000000
Binary files a/apps/dashboard/public/fonts/Collapse-Italic.woff2 and /dev/null differ
diff --git a/apps/dashboard/public/fonts/Collapse-Light.woff2 b/apps/dashboard/public/fonts/Collapse-Light.woff2
deleted file mode 100644
index a8d28c957e6..00000000000
Binary files a/apps/dashboard/public/fonts/Collapse-Light.woff2 and /dev/null differ
diff --git a/apps/dashboard/public/fonts/Collapse-LightItalic.woff2 b/apps/dashboard/public/fonts/Collapse-LightItalic.woff2
deleted file mode 100644
index 9b44d0e1e8b..00000000000
Binary files a/apps/dashboard/public/fonts/Collapse-LightItalic.woff2 and /dev/null differ
diff --git a/apps/dashboard/public/fonts/Collapse-Thin.woff2 b/apps/dashboard/public/fonts/Collapse-Thin.woff2
deleted file mode 100644
index c01b891c152..00000000000
Binary files a/apps/dashboard/public/fonts/Collapse-Thin.woff2 and /dev/null differ
diff --git a/apps/dashboard/public/fonts/Collapse-ThinItalic.woff2 b/apps/dashboard/public/fonts/Collapse-ThinItalic.woff2
deleted file mode 100644
index 26ec5418f65..00000000000
Binary files a/apps/dashboard/public/fonts/Collapse-ThinItalic.woff2 and /dev/null differ
diff --git a/apps/dashboard/public/fonts/Neuebit-Bold.woff2 b/apps/dashboard/public/fonts/Neuebit-Bold.woff2
deleted file mode 100644
index 2b4c40bd29d..00000000000
Binary files a/apps/dashboard/public/fonts/Neuebit-Bold.woff2 and /dev/null differ
diff --git a/apps/dashboard/src/components/BottomPickSheet.tsx b/apps/dashboard/src/components/BottomPickSheet.tsx
deleted file mode 100644
index 38cae8daa00..00000000000
--- a/apps/dashboard/src/components/BottomPickSheet.tsx
+++ /dev/null
@@ -1,225 +0,0 @@
-import {
-  type PointerEvent as ReactPointerEvent,
-  type ReactNode,
-  useEffect,
-  useRef,
-  useState,
-} from "react";
-import { createPortal } from "react-dom";
-import { Typography } from "@/components/NouiTypography";
-import { cn, themedBody } from "@/lib/utils";
-
-const CLOSE_DRAG_MIN_PX = 72;
-const CLOSE_DRAG_RATIO = 0.18;
-const SHEET_TRANSITION_MS = 280;
-
-/**
- * Mobile-first picker shell: fixed backdrop + bottom sheet, portaled to `body`
- * so nested overflow/transform in the sidebar cannot clip menus (theme /
- * language switchers). Open/close uses slide + fade; teardown is delayed until
- * the exit animation finishes so animations can complete.
- *
- * Drag the header/handle downward to dismiss (skipped when reduced motion is on).
- */
-export function BottomPickSheet({
-  backdropDismissLabel = "Dismiss",
-  children,
-  onClose,
-  open,
-  title,
-}: BottomPickSheetProps) {
-  const [renderPortal, setRenderPortal] = useState(open);
-  const [entered, setEntered] = useState(false);
-  const [dragOffsetPx, setDragOffsetPx] = useState(0);
-  const [dragActive, setDragActive] = useState(false);
-
-  const closeTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
-  const sheetRef = useRef<HTMLDivElement>(null);
-  const dragTrackingRef = useRef(false);
-  const dragStartYRef = useRef(0);
-  const dragOffsetRef = useRef(0);
-
-  const reducedMotion =
-    typeof window !== "undefined" &&
-    window.matchMedia("(prefers-reduced-motion: reduce)").matches;
-
-  const syncDragPx = (next: number) => {
-    dragOffsetRef.current = next;
-    setDragOffsetPx(next);
-  };
-
-  useEffect(() => {
-    if (closeTimerRef.current) {
-      clearTimeout(closeTimerRef.current);
-      closeTimerRef.current = null;
-    }
-
-    const ms = reducedMotion ? 0 : SHEET_TRANSITION_MS;
-
-    let openRafId = 0;
-    let exitRafId = 0;
-
-    if (open) {
-      openRafId = requestAnimationFrame(() => {
-        dragTrackingRef.current = false;
-        dragOffsetRef.current = 0;
-        setDragActive(false);
-        setDragOffsetPx(0);
-        setRenderPortal(true);
-        requestAnimationFrame(() => {
-          requestAnimationFrame(() => setEntered(true));
-        });
-      });
-    } else {
-      exitRafId = requestAnimationFrame(() => {
-        dragTrackingRef.current = false;
-        setDragActive(false);
-        setEntered(false);
-        closeTimerRef.current = window.setTimeout(() => {
-          dragOffsetRef.current = 0;
-          setDragOffsetPx(0);
-          setRenderPortal(false);
-          closeTimerRef.current = null;
-        }, ms);
-      });
-    }
-
-    return () => {
-      cancelAnimationFrame(openRafId);
-      cancelAnimationFrame(exitRafId);
-      if (closeTimerRef.current) {
-        clearTimeout(closeTimerRef.current);
-        closeTimerRef.current = null;
-      }
-    };
-  }, [open, reducedMotion]);
-
-  useEffect(() => {
-    if (!renderPortal) return;
-    const prev = document.body.style.overflow;
-    document.body.style.overflow = "hidden";
-    return () => {
-      document.body.style.overflow = prev;
-    };
-  }, [renderPortal]);
-
-  if (!renderPortal || typeof document === "undefined") return null;
-
-  const durationClass = reducedMotion ? "duration-0" : "duration-[280ms]";
-
-  const draggingVisual = dragActive || dragOffsetPx > 0;
-
-  const onDragPointerDown = (e: ReactPointerEvent<HTMLDivElement>) => {
-    if (reducedMotion || !entered) return;
-    if (e.pointerType === "mouse" && e.button !== 0) return;
-
-    dragTrackingRef.current = true;
-    setDragActive(true);
-    dragStartYRef.current = e.clientY;
-    syncDragPx(0);
-    e.currentTarget.setPointerCapture(e.pointerId);
-  };
-
-  const onDragPointerMove = (e: ReactPointerEvent<HTMLDivElement>) => {
-    if (!dragTrackingRef.current) return;
-    const dy = e.clientY - dragStartYRef.current;
-    const next = Math.max(0, dy);
-    const sheetH = sheetRef.current?.offsetHeight ?? 560;
-    syncDragPx(Math.min(next, sheetH));
-  };
-
-  const endDrag = (e: ReactPointerEvent<HTMLDivElement>) => {
-    if (!dragTrackingRef.current) return;
-    dragTrackingRef.current = false;
-    setDragActive(false);
-    try {
-      e.currentTarget.releasePointerCapture(e.pointerId);
-    } catch {
-      /* already released */
-    }
-
-    const sheetH = sheetRef.current?.offsetHeight ?? 560;
-    const threshold = Math.max(CLOSE_DRAG_MIN_PX, sheetH * CLOSE_DRAG_RATIO);
-    const d = dragOffsetRef.current;
-
-    if (d >= threshold) {
-      onClose();
-      return;
-    }
-    syncDragPx(0);
-  };
-
-  return createPortal(
-    <div className="fixed inset-0 z-[200] flex flex-col justify-end">
-      <button
-        type="button"
-        aria-label={backdropDismissLabel}
-        className={cn(
-          "absolute inset-0 bg-black/55 backdrop-blur-[2px]",
-          "transition-opacity ease-out motion-reduce:transition-none",
-          durationClass,
-          entered ? "opacity-100" : "opacity-0",
-        )}
-        onClick={onClose}
-      />
-
-      <div
-        aria-label={title}
-        aria-modal="true"
-        ref={sheetRef}
-        className={cn(
-          themedBody,
-          "relative flex max-h-[85dvh] min-h-0 flex-col rounded-t-xl border border-current/20",
-          "bg-background-base/98 pb-[max(1rem,env(safe-area-inset-bottom))]",
-          "shadow-[0_-12px_40px_-8px_rgba(0,0,0,0.55)] backdrop-blur-md",
-          "ease-out motion-reduce:transition-none transform-gpu",
-          draggingVisual ? "transition-none" : cn("transition-transform", durationClass),
-          entered ? "translate-y-0" : "translate-y-full",
-        )}
-        role="dialog"
-        style={
-          entered && dragOffsetPx > 0
-            ? { transform: `translateY(${dragOffsetPx}px)` }
-            : undefined
-        }
-      >
-        <div
-          className={cn(
-            "flex shrink-0 flex-col gap-2 border-b border-current/15 px-4 pb-3 pt-2",
-            "touch-none select-none",
-            reducedMotion ? "cursor-default" : "cursor-grab active:cursor-grabbing",
-          )}
-          onPointerCancel={endDrag}
-          onPointerDown={onDragPointerDown}
-          onPointerMove={onDragPointerMove}
-          onPointerUp={endDrag}
-        >
-          <div
-            aria-hidden
-            className="mx-auto h-1 w-10 shrink-0 rounded-full bg-current/20"
-          />
-
-          <Typography
-            mondwest
-            className="text-display text-xs tracking-[0.12em] text-text-tertiary"
-          >
-            {title}
-          </Typography>
-        </div>
-
-        <div className="min-h-0 flex-1 overflow-y-auto overscroll-contain">
-          {children}
-        </div>
-      </div>
-    </div>,
-    document.body,
-  );
-}
-
-interface BottomPickSheetProps {
-  backdropDismissLabel?: string;
-  children: ReactNode;
-  onClose: () => void;
-  open: boolean;
-  title: string;
-}
diff --git a/apps/dashboard/src/components/NouiTypography.tsx b/apps/dashboard/src/components/NouiTypography.tsx
deleted file mode 100644
index eb26d75cc1c..00000000000
--- a/apps/dashboard/src/components/NouiTypography.tsx
+++ /dev/null
@@ -1,63 +0,0 @@
-import { forwardRef, type ElementType, type HTMLAttributes, type ReactNode } from "react";
-import { cn } from "@/lib/utils";
-
-type TypographyProps = HTMLAttributes<HTMLElement> & {
-  as?: ElementType;
-  children?: ReactNode;
-  compressed?: boolean;
-  courier?: boolean;
-  expanded?: boolean;
-  mondwest?: boolean;
-  mono?: boolean;
-  sans?: boolean;
-  variant?: "sm" | "md" | "lg" | "xl";
-};
-
-const variantClasses: Record<NonNullable<TypographyProps["variant"]>, string> = {
-  sm: "leading-[1.4] text-[.9375rem] tracking-[0.1875rem]",
-  md: "text-[2.625rem] leading-[1] tracking-[0.0525rem]",
-  lg: "text-[2.625rem] leading-[1] tracking-[0.0525rem]",
-  xl: "text-[4.5rem] leading-[1] tracking-[0.135rem]",
-};
-
-export const Typography = forwardRef<HTMLElement, TypographyProps>(function Typography(
-  {
-    as: Component = "span",
-    className,
-    compressed,
-    courier,
-    expanded,
-    mondwest,
-    mono,
-    sans,
-    variant,
-    ...props
-  },
-  ref,
-) {
-  const hasFontVariant = compressed || courier || expanded || mondwest || mono || sans;
-
-  return (
-    <Component
-      className={cn(
-        compressed && "font-compressed",
-        courier && "font-courier",
-        expanded && "font-expanded",
-        mondwest && "font-mondwest tracking-[0.1875rem]",
-        mono && "font-mono",
-        (!hasFontVariant || sans) && "font-sans",
-        variant && variantClasses[variant],
-        className,
-      )}
-      ref={ref}
-      {...props}
-    />
-  );
-});
-
-export const H2 = forwardRef<HTMLHeadingElement, Omit<TypographyProps, "as">>(function H2(
-  { className, variant = "lg", ...props },
-  ref,
-) {
-  return <Typography as="h2" className={cn("font-bold", className)} variant={variant} ref={ref} {...props} />;
-});
diff --git a/apps/dashboard/src/components/Toast.tsx b/apps/dashboard/src/components/Toast.tsx
deleted file mode 100644
index e6bb349e896..00000000000
--- a/apps/dashboard/src/components/Toast.tsx
+++ /dev/null
@@ -1,40 +0,0 @@
-import { useEffect, useState } from "react";
-import { createPortal } from "react-dom";
-
-export function Toast({ toast }: { toast: { message: string; type: "success" | "error" } | null }) {
-  const [visible, setVisible] = useState(false);
-  const [current, setCurrent] = useState(toast);
-
-  useEffect(() => {
-    if (toast) {
-      setCurrent(toast);
-      setVisible(true);
-    } else {
-      setVisible(false);
-      const timer = setTimeout(() => setCurrent(null), 200);
-      return () => clearTimeout(timer);
-    }
-  }, [toast]);
-
-  if (!current) return null;
-
-  // Portal to document.body so the toast escapes any ancestor stacking context
-  // (e.g. <main> has `relative z-2`, which would trap z-50 below the header's z-40).
-  return createPortal(
-    <div
-      role="status"
-      aria-live="polite"
-      className={`fixed top-16 right-4 z-50 border px-4 py-2.5 font-courier text-xs tracking-wider uppercase backdrop-blur-sm ${
-        current.type === "success"
-          ? "bg-success/15 text-success border-success/30"
-          : "bg-destructive/15 text-destructive border-destructive/30"
-      }`}
-      style={{
-        animation: visible ? "toast-in 200ms ease-out forwards" : "toast-out 200ms ease-in forwards",
-      }}
-    >
-      {current.message}
-    </div>,
-    document.body,
-  );
-}
diff --git a/apps/dashboard/src/components/ui/card.tsx b/apps/dashboard/src/components/ui/card.tsx
deleted file mode 100644
index 52050c22645..00000000000
--- a/apps/dashboard/src/components/ui/card.tsx
+++ /dev/null
@@ -1,63 +0,0 @@
-import { cn, themedBody } from "@/lib/utils";
-
-/**
- * Themed card primitive. Themes can restyle every card without touching
- * call sites by setting CSS vars under the `card` component-style bucket:
- *
- *   componentStyles:
- *     card:
- *       clipPath: "polygon(10px 0, 100% 0, 100% calc(100% - 10px), calc(100% - 10px) 100%, 0 100%, 0 10px)"
- *       border: "1px solid var(--color-ring)"
- *       background: "linear-gradient(180deg, var(--color-card) 0%, transparent 100%)"
- *       boxShadow: "0 0 0 1px var(--color-ring) inset, 0 0 24px -8px var(--warm-glow)"
- *
- * All properties are optional — vars that aren't set compute to their
- * CSS initial value, so the default shadcn-y card keeps looking normal
- * for themes that don't override anything.
- */
-const CARD_STYLE: React.CSSProperties = {
-  clipPath: "var(--component-card-clip-path)",
-  borderImage: "var(--component-card-border-image)",
-  background: "var(--component-card-background)",
-  boxShadow: "var(--component-card-box-shadow)",
-};
-
-export function Card({ className, style, ...props }: React.HTMLAttributes<HTMLDivElement>) {
-  return (
-    <div
-      className={cn(
-        "border border-border bg-card/80 text-card-foreground w-full",
-        themedBody,
-        className,
-      )}
-      style={{ ...CARD_STYLE, ...style }}
-      {...props}
-    />
-  );
-}
-
-export function CardHeader({ className, ...props }: React.HTMLAttributes<HTMLDivElement>) {
-  return <div className={cn("flex flex-col gap-1.5 p-4 border-b border-border", className)} {...props} />;
-}
-
-export function CardTitle({ className, ...props }: React.HTMLAttributes<HTMLHeadingElement>) {
-  return (
-    <h3
-      className={cn(
-        "font-mondwest text-display text-sm tracking-[0.12em] text-text-primary",
-        className,
-      )}
-      {...props}
-    />
-  );
-}
-
-export function CardDescription({ className, ...props }: React.HTMLAttributes<HTMLParagraphElement>) {
-  return (
-    <p className={cn("font-mondwest normal-case text-xs text-muted-foreground", className)} {...props} />
-  );
-}
-
-export function CardContent({ className, ...props }: React.HTMLAttributes<HTMLDivElement>) {
-  return <div className={cn("p-4", className)} {...props} />;
-}
diff --git a/apps/dashboard/src/components/ui/confirm-dialog.tsx b/apps/dashboard/src/components/ui/confirm-dialog.tsx
deleted file mode 100644
index d84f931aa31..00000000000
--- a/apps/dashboard/src/components/ui/confirm-dialog.tsx
+++ /dev/null
@@ -1,137 +0,0 @@
-import { useEffect, useRef } from "react";
-import { createPortal } from "react-dom";
-import { AlertTriangle } from "lucide-react";
-import { Button } from "@nous-research/ui/ui/components/button";
-import { cn, themedBody } from "@/lib/utils";
-
-export function ConfirmDialog({
-  cancelLabel = "Cancel",
-  confirmLabel = "Confirm",
-  description,
-  destructive = false,
-  loading = false,
-  onCancel,
-  onConfirm,
-  open,
-  title,
-}: ConfirmDialogProps) {
-  const dialogRef = useRef<HTMLDivElement>(null);
-
-  // Focus the confirm button when opened; trap ESC to cancel.
-  useEffect(() => {
-    if (!open) return;
-
-    const prevActive = document.activeElement as HTMLElement | null;
-    dialogRef.current
-      ?.querySelector<HTMLButtonElement>("[data-confirm]")
-      ?.focus();
-
-    const onKey = (e: KeyboardEvent) => {
-      if (e.key === "Escape") {
-        e.preventDefault();
-        onCancel();
-      }
-    };
-
-    document.addEventListener("keydown", onKey);
-    const prevOverflow = document.body.style.overflow;
-    document.body.style.overflow = "hidden";
-
-    return () => {
-      document.removeEventListener("keydown", onKey);
-      document.body.style.overflow = prevOverflow;
-      prevActive?.focus?.();
-    };
-  }, [open, onCancel]);
-
-  if (!open) return null;
-
-  return createPortal(
-    <div
-      role="dialog"
-      aria-modal="true"
-      aria-labelledby="confirm-dialog-title"
-      aria-describedby={description ? "confirm-dialog-desc" : undefined}
-      onClick={(e) => {
-        if (e.target === e.currentTarget) onCancel();
-      }}
-      className={cn(
-        "fixed inset-0 z-50 flex items-center justify-center",
-        "bg-black/60 backdrop-blur-sm",
-        "animate-[fade-in_150ms_ease-out]",
-      )}
-    >
-      <div
-        ref={dialogRef}
-        className={cn(
-          themedBody,
-          "relative w-full max-w-md mx-4",
-          "border border-border bg-card shadow-lg",
-          "animate-[dialog-in_180ms_ease-out]",
-        )}
-      >
-        <div className="flex items-start gap-3 p-4 border-b border-border">
-          {destructive && (
-            <div
-              aria-hidden
-              className="mt-0.5 shrink-0 text-destructive"
-            >
-              <AlertTriangle className="h-4 w-4" />
-            </div>
-          )}
-
-          <div className="flex-1 min-w-0 flex flex-col gap-1">
-            <h2
-              id="confirm-dialog-title"
-              className="font-mondwest text-display text-sm font-bold tracking-[0.12em] blend-lighter"
-            >
-              {title}
-            </h2>
-
-            {description && (
-              <p
-                id="confirm-dialog-desc"
-                className="font-mondwest normal-case text-xs text-muted-foreground leading-relaxed"
-              >
-                {description}
-              </p>
-            )}
-          </div>
-        </div>
-
-        <div className="flex items-center justify-end gap-2 p-3">
-          <Button
-            type="button"
-            outlined
-            onClick={onCancel}
-            disabled={loading}
-          >
-            {cancelLabel}
-          </Button>
-          <Button
-            data-confirm
-            type="button"
-            destructive={destructive}
-            onClick={onConfirm}
-            disabled={loading}
-          >
-            {loading ? "…" : confirmLabel}
-          </Button>
-        </div>
-      </div>
-    </div>,
-    document.body,
-  );
-}
-
-interface ConfirmDialogProps {
-  cancelLabel?: string;
-  confirmLabel?: string;
-  description?: string;
-  destructive?: boolean;
-  loading?: boolean;
-  onCancel: () => void;
-  onConfirm: () => void;
-  open: boolean;
-  title: string;
-}
diff --git a/apps/dashboard/src/components/ui/input.tsx b/apps/dashboard/src/components/ui/input.tsx
deleted file mode 100644
index 1e1199e6478..00000000000
--- a/apps/dashboard/src/components/ui/input.tsx
+++ /dev/null
@@ -1,16 +0,0 @@
-import { cn } from "@/lib/utils";
-
-export function Input({ className, ...props }: React.InputHTMLAttributes<HTMLInputElement>) {
-  return (
-    <input
-      className={cn(
-        "flex h-9 w-full border border-border bg-background/40 px-3 py-1 font-courier text-sm transition-colors",
-        "placeholder:text-muted-foreground",
-        "focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-foreground/30 focus-visible:border-foreground/25",
-        "disabled:cursor-not-allowed disabled:opacity-50",
-        className,
-      )}
-      {...props}
-    />
-  );
-}
diff --git a/apps/dashboard/src/components/ui/label.tsx b/apps/dashboard/src/components/ui/label.tsx
deleted file mode 100644
index a5807e4bd4f..00000000000
--- a/apps/dashboard/src/components/ui/label.tsx
+++ /dev/null
@@ -1,13 +0,0 @@
-import { cn } from "@/lib/utils";
-
-export function Label({ className, ...props }: React.LabelHTMLAttributes<HTMLLabelElement>) {
-  return (
-    <label
-      className={cn(
-        "font-mondwest text-xs tracking-[0.1em] uppercase leading-none peer-disabled:cursor-not-allowed peer-disabled:opacity-70",
-        className,
-      )}
-      {...props}
-    />
-  );
-}
diff --git a/apps/dashboard/src/components/ui/separator.tsx b/apps/dashboard/src/components/ui/separator.tsx
deleted file mode 100644
index f432df73050..00000000000
--- a/apps/dashboard/src/components/ui/separator.tsx
+++ /dev/null
@@ -1,19 +0,0 @@
-import { cn } from "@/lib/utils";
-
-export function Separator({
-  className,
-  orientation = "horizontal",
-  ...props
-}: React.HTMLAttributes<HTMLDivElement> & { orientation?: "horizontal" | "vertical" }) {
-  return (
-    <div
-      role="separator"
-      className={cn(
-        "shrink-0 bg-border",
-        orientation === "horizontal" ? "h-px w-full" : "h-full w-px",
-        className,
-      )}
-      {...props}
-    />
-  );
-}
diff --git a/apps/dashboard/src/hooks/useBelowBreakpoint.ts b/apps/dashboard/src/hooks/useBelowBreakpoint.ts
deleted file mode 100644
index 67d7dea97ee..00000000000
--- a/apps/dashboard/src/hooks/useBelowBreakpoint.ts
+++ /dev/null
@@ -1,19 +0,0 @@
-import { useEffect, useState } from "react";
-
-/** True when viewport width is strictly below `px` (matches Tailwind `min-width: px`). */
-export function useBelowBreakpoint(px: number) {
-  const query = `(max-width: ${px - 1}px)`;
-  const [matches, setMatches] = useState(() =>
-    typeof window !== "undefined" ? window.matchMedia(query).matches : false,
-  );
-
-  useEffect(() => {
-    const mql = window.matchMedia(query);
-    const sync = () => setMatches(mql.matches);
-    sync();
-    mql.addEventListener("change", sync);
-    return () => mql.removeEventListener("change", sync);
-  }, [query]);
-
-  return matches;
-}
diff --git a/apps/dashboard/src/hooks/useConfirmDelete.ts b/apps/dashboard/src/hooks/useConfirmDelete.ts
deleted file mode 100644
index 563255343c4..00000000000
--- a/apps/dashboard/src/hooks/useConfirmDelete.ts
+++ /dev/null
@@ -1,41 +0,0 @@
-import { useCallback, useState } from "react";
-
-export function useConfirmDelete<TId>({
-  onDelete,
-}: {
-  onDelete: (id: TId) => Promise<void>;
-}) {
-  const [pendingId, setPendingId] = useState<TId | null>(null);
-  const [isDeleting, setIsDeleting] = useState(false);
-
-  const requestDelete = useCallback((id: TId) => {
-    setPendingId(id);
-  }, []);
-
-  const cancel = useCallback(() => {
-    if (!isDeleting) setPendingId(null);
-  }, [isDeleting]);
-
-  const confirm = useCallback(async () => {
-    if (pendingId === null) return;
-    const id = pendingId;
-    setIsDeleting(true);
-    try {
-      await onDelete(id);
-      setPendingId(null);
-    } catch {
-      // Dialog stays open; caller can surface errors in onDelete before rethrowing
-    } finally {
-      setIsDeleting(false);
-    }
-  }, [pendingId, onDelete]);
-
-  return {
-    cancel,
-    confirm,
-    isDeleting,
-    isOpen: pendingId !== null,
-    pendingId,
-    requestDelete,
-  } as const;
-}
diff --git a/apps/dashboard/src/hooks/useToast.ts b/apps/dashboard/src/hooks/useToast.ts
deleted file mode 100644
index ce82372f4ba..00000000000
--- a/apps/dashboard/src/hooks/useToast.ts
+++ /dev/null
@@ -1,15 +0,0 @@
-import { useCallback, useState } from "react";
-
-export function useToast(duration = 3000) {
-  const [toast, setToast] = useState<{ message: string; type: "success" | "error" } | null>(null);
-
-  const showToast = useCallback(
-    (message: string, type: "success" | "error") => {
-      setToast({ message, type });
-      setTimeout(() => setToast(null), duration);
-    },
-    [duration],
-  );
-
-  return { toast, showToast };
-}
diff --git a/apps/dashboard/src/lib/gatewayClient.ts b/apps/dashboard/src/lib/gatewayClient.ts
deleted file mode 100644
index 3c8cdd76035..00000000000
--- a/apps/dashboard/src/lib/gatewayClient.ts
+++ /dev/null
@@ -1,38 +0,0 @@
-import {
-  JsonRpcGatewayClient,
-  type ConnectionState,
-  type GatewayEvent,
-  type GatewayEventName,
-} from "@hermes/shared";
-
-import { HERMES_BASE_PATH } from "@/lib/api";
-
-export type { ConnectionState, GatewayEvent, GatewayEventName };
-
-/**
- * Browser wrapper for the shared tui_gateway JSON-RPC client.
- *
- * Dashboard resolves its token and host from the served page. Desktop uses the
- * same shared protocol client, but supplies an absolute wsUrl from Electron.
- */
-export class GatewayClient extends JsonRpcGatewayClient {
-  async connect(token?: string): Promise<void> {
-    const resolved = token ?? window.__HERMES_SESSION_TOKEN__ ?? "";
-    if (!resolved) {
-      throw new Error(
-        "Session token not available — page must be served by the Hermes dashboard",
-      );
-    }
-
-    const scheme = location.protocol === "https:" ? "wss:" : "ws:";
-    await super.connect(
-      `${scheme}//${location.host}${HERMES_BASE_PATH}/api/ws?token=${encodeURIComponent(resolved)}`,
-    );
-  }
-}
-
-declare global {
-  interface Window {
-    __HERMES_SESSION_TOKEN__?: string;
-  }
-}
diff --git a/apps/desktop/README.md b/apps/desktop/README.md
index 3d757779b12..dbdfdd75768 100644
--- a/apps/desktop/README.md
+++ b/apps/desktop/README.md
@@ -4,7 +4,7 @@ Native Electron shell for Hermes. It packages the desktop renderer, a bundled He
 
 ## Setup
 
-Install workspace dependencies from the repo root so `apps/desktop`, `apps/dashboard`, and `apps/shared` stay linked:
+Install workspace dependencies from the repo root so `apps/desktop`, `web`, and `apps/shared` stay linked:
 
 ```bash
 npm install
@@ -80,7 +80,7 @@ hermes dashboard --tui --no-open
 For dashboard HMR, start Vite in another terminal:
 
 ```bash
-cd apps/dashboard
+cd web
 npm run dev
 ```
 
diff --git a/apps/desktop/electron/main.cjs b/apps/desktop/electron/main.cjs
index 4bc746812c3..344691b4bce 100644
--- a/apps/desktop/electron/main.cjs
+++ b/apps/desktop/electron/main.cjs
@@ -926,6 +926,35 @@ function getVenvPython(venvRoot) {
   return path.join(venvRoot, IS_WINDOWS ? path.join('Scripts', 'python.exe') : path.join('bin', 'python'))
 }
 
+// resolveGitBinary — locate git.exe on Windows. A fresh installer-driven
+// install only has PortableGit under %LOCALAPPDATA%\hermes\git (never on
+// PATH), so a bare spawn('git') ENOENTs and self-update checks fail with
+// "Couldn't check for updates". Mirror findGitBash: PortableGit first, then
+// standard Git-for-Windows locations, then PATH. Cached after first probe.
+let _gitBinaryCache = null
+function resolveGitBinary() {
+  if (_gitBinaryCache) return _gitBinaryCache
+  if (!IS_WINDOWS) {
+    _gitBinaryCache = findOnPath('git') || 'git'
+    return _gitBinaryCache
+  }
+
+  const localAppData = process.env.LOCALAPPDATA || ''
+  const candidates = []
+  if (localAppData) {
+    candidates.push(path.join(localAppData, 'hermes', 'git', 'cmd', 'git.exe'))
+    candidates.push(path.join(localAppData, 'hermes', 'git', 'bin', 'git.exe'))
+  }
+  candidates.push(path.join(process.env['ProgramFiles'] || 'C:\\Program Files', 'Git', 'cmd', 'git.exe'))
+  candidates.push(path.join(process.env['ProgramFiles(x86)'] || 'C:\\Program Files (x86)', 'Git', 'cmd', 'git.exe'))
+  if (localAppData) {
+    candidates.push(path.join(localAppData, 'Programs', 'Git', 'cmd', 'git.exe'))
+  }
+
+  _gitBinaryCache = candidates.find(fileExists) || findOnPath('git') || 'git'
+  return _gitBinaryCache
+}
+
 function recentHermesLog() {
   return hermesLog.slice(-20).join('\n')
 }
@@ -962,7 +991,7 @@ function resolveUpdateRoot() {
 
 function runGit(args, options = {}) {
   return new Promise((resolve, reject) => {
-    const child = spawn('git', IS_WINDOWS ? ['-c', 'windows.appendAtomically=false', ...args] : args, {
+    const child = spawn(resolveGitBinary(), IS_WINDOWS ? ['-c', 'windows.appendAtomically=false', ...args] : args, {
       cwd: options.cwd,
       env: { ...process.env, ...(options.env || {}), GIT_TERMINAL_PROMPT: '0' },
       stdio: ['ignore', 'pipe', 'pipe']
diff --git a/apps/desktop/src/app/page-search-shell.tsx b/apps/desktop/src/app/page-search-shell.tsx
index d2f8ddaabf0..6e7d3432be1 100644
--- a/apps/desktop/src/app/page-search-shell.tsx
+++ b/apps/desktop/src/app/page-search-shell.tsx
@@ -29,12 +29,22 @@ export function PageSearchShell({
       className={cn('flex h-full min-w-0 flex-col overflow-hidden bg-(--ui-chat-surface-background)', className)}
     >
       <div className="relative z-10 grid gap-2 border-b border-(--ui-stroke-tertiary) px-3 py-2.5">
-        <PageSearchInput
-          onChange={onSearchChange}
-          placeholder={searchPlaceholder}
-          trailingAction={searchTrailingAction}
-          value={searchValue}
-        />
+        {/* Reserve the top-right titlebar tools + native window-controls
+            footprint so the full-width search input never slides under them
+            (this header sits in the titlebar row at the window top). */}
+        <div
+          style={{
+            paddingRight:
+              'max(0px, calc(var(--titlebar-tools-right, 0px) + var(--titlebar-tools-width, 0px) - 0.75rem))'
+          }}
+        >
+          <PageSearchInput
+            onChange={onSearchChange}
+            placeholder={searchPlaceholder}
+            trailingAction={searchTrailingAction}
+            value={searchValue}
+          />
+        </div>
         {filters ? <div className="flex flex-wrap items-center justify-center gap-1.5">{filters}</div> : null}
       </div>
       <div className="min-h-0 flex-1 overflow-hidden bg-(--ui-chat-surface-background)">{children}</div>
diff --git a/apps/desktop/src/components/assistant-ui/thread.tsx b/apps/desktop/src/components/assistant-ui/thread.tsx
index ecc350d53d9..0c90d29903d 100644
--- a/apps/desktop/src/components/assistant-ui/thread.tsx
+++ b/apps/desktop/src/components/assistant-ui/thread.tsx
@@ -517,11 +517,17 @@ const AssistantActionBar: FC<MessageActionProps> = ({ messageId, messageText, on
     <div className="relative flex w-full shrink-0 justify-end">
       <ActionBarPrimitive.Root
         className={cn(
+          // NOTE: intentionally NOT `hideWhenRunning`. That prop unmounts the
+          // bar while the thread streams, which collapses every completed
+          // assistant message's footer by this bar's height and shifts the
+          // whole conversation when the turn resolves. The bar is already
+          // invisible by default (opacity-0 + pointer-events-none, reveals on
+          // hover), so keeping it mounted reserves stable layout height with
+          // no visual change during streaming.
           'relative flex flex-row items-center justify-end gap-2 py-1.5 opacity-0 pointer-events-none group-hover:pointer-events-auto group-hover:opacity-100 focus-within:pointer-events-auto focus-within:opacity-100',
           menuOpen && 'pointer-events-auto opacity-100 [&_button]:opacity-100'
         )}
         data-slot="aui_msg-actions"
-        hideWhenRunning
       >
         <CopyButton appearance="icon" buttonSize="icon" disabled={!messageText} label="Copy" text={messageText} />
         <ActionBarPrimitive.Reload asChild>
diff --git a/apps/desktop/src/components/desktop-install-overlay.tsx b/apps/desktop/src/components/desktop-install-overlay.tsx
index af6078d8835..1864c6840ca 100644
--- a/apps/desktop/src/components/desktop-install-overlay.tsx
+++ b/apps/desktop/src/components/desktop-install-overlay.tsx
@@ -46,6 +46,7 @@ interface StageRowProps {
   descriptor: DesktopBootstrapStageDescriptor
   result: DesktopBootstrapStageResult | undefined
   isCurrent: boolean
+  now: number
 }
 
 const STATE_LABEL: Record<DesktopBootstrapStageState, string> = {
@@ -77,8 +78,18 @@ function formatDuration(ms: number | null | undefined): string {
   return `${m}m ${rs}s`
 }
 
-function StageRow({ descriptor, result, isCurrent }: StageRowProps) {
+// Live elapsed for a running stage, as m:ss (or s for sub-minute).
+function formatElapsed(ms: number): string {
+  const s = Math.max(0, Math.floor(ms / 1000))
+  if (s < 60) return `${s}s`
+  const m = Math.floor(s / 60)
+  return `${m}:${String(s - m * 60).padStart(2, '0')}`
+}
+
+function StageRow({ descriptor, result, isCurrent, now }: StageRowProps) {
   const state: DesktopBootstrapStageState = result?.state || 'pending'
+  const elapsed =
+    state === 'running' && typeof result?.startedAt === 'number' ? formatElapsed(now - result.startedAt) : ''
   const icon = useMemo(() => {
     switch (state) {
       case 'running':
@@ -119,7 +130,7 @@ function StageRow({ descriptor, result, isCurrent }: StageRowProps) {
             {formatStageName(descriptor.name)}
           </span>
           <span className="flex-shrink-0 text-xs tabular-nums text-muted-foreground">
-            {state === 'running' ? STATE_LABEL[state] : null}
+            {state === 'running' ? (elapsed ? `${STATE_LABEL[state]} · ${elapsed}` : STATE_LABEL[state]) : null}
             {state === 'succeeded' || state === 'skipped' ? formatDuration(result?.durationMs) : null}
             {state === 'failed' ? STATE_LABEL[state] : null}
           </span>
@@ -147,7 +158,7 @@ function applyEvent(state: DesktopBootstrapState, ev: DesktopBootstrapEvent): De
   if (ev.type === 'manifest') {
     const stages: Record<string, DesktopBootstrapStageResult> = {}
     for (const stage of ev.stages) {
-      stages[stage.name] = { state: 'pending', durationMs: null, json: null, error: null }
+      stages[stage.name] = { state: 'pending', durationMs: null, startedAt: null, json: null, error: null }
     }
     return {
       ...state,
@@ -159,6 +170,7 @@ function applyEvent(state: DesktopBootstrapState, ev: DesktopBootstrapEvent): De
     }
   }
   if (ev.type === 'stage') {
+    const prev = state.stages[ev.name]
     return {
       ...state,
       stages: {
@@ -166,6 +178,9 @@ function applyEvent(state: DesktopBootstrapState, ev: DesktopBootstrapEvent): De
         [ev.name]: {
           state: ev.state,
           durationMs: ev.durationMs ?? null,
+          // Stamp the start time on the running transition so the UI can show
+          // a live elapsed timer; preserve it across repeated running events.
+          startedAt: ev.state === 'running' ? prev?.startedAt ?? Date.now() : prev?.startedAt ?? null,
           json: ev.json ?? null,
           error: ev.error ?? null
         }
@@ -202,8 +217,17 @@ export function DesktopInstallOverlay({ enabled = true }: DesktopInstallOverlayP
   const [state, setState] = useState<DesktopBootstrapState>(EMPTY_STATE)
   const [logOpen, setLogOpen] = useState(false)
   const [copied, setCopied] = useState(false)
+  const [now, setNow] = useState(() => Date.now())
   const logEndRef = useRef<HTMLDivElement | null>(null)
 
+  // Tick once a second while a bootstrap is in flight so running steps show a
+  // live elapsed timer. Stops when nothing is active to avoid idle renders.
+  useEffect(() => {
+    if (!state.active) return
+    const id = window.setInterval(() => setNow(Date.now()), 1000)
+    return () => window.clearInterval(id)
+  }, [state.active])
+
   // Subscribe to bootstrap events + load initial snapshot
   useEffect(() => {
     if (!enabled) return
@@ -325,6 +349,8 @@ export function DesktopInstallOverlay({ enabled = true }: DesktopInstallOverlayP
   const totalCount = stages.length
   const failed = Boolean(state.error)
   const progressPct = totalCount > 0 ? Math.round((completedCount / totalCount) * 100) : 0
+  const currentStartedAt = currentStage ? state.stages[currentStage]?.startedAt : null
+  const currentElapsed = typeof currentStartedAt === 'number' ? formatElapsed(now - currentStartedAt) : ''
 
   return (
     <div className="fixed inset-0 z-[1400] flex items-center justify-center bg-background/90 backdrop-blur-md p-4">
@@ -350,6 +376,7 @@ export function DesktopInstallOverlay({ enabled = true }: DesktopInstallOverlayP
                 <span>
                   {completedCount} of {totalCount} steps complete
                   {currentStage && ` -- now: ${formatStageName(currentStage)}`}
+                  {currentElapsed && ` (${currentElapsed})`}
                 </span>
                 <span className="tabular-nums">{progressPct}%</span>
               </div>
@@ -390,6 +417,7 @@ export function DesktopInstallOverlay({ enabled = true }: DesktopInstallOverlayP
                   descriptor={stage}
                   result={state.stages[stage.name]}
                   isCurrent={stage.name === currentStage}
+                  now={now}
                 />
               ))}
             </ol>
diff --git a/apps/desktop/src/global.d.ts b/apps/desktop/src/global.d.ts
index f285fb07765..dca38387aad 100644
--- a/apps/desktop/src/global.d.ts
+++ b/apps/desktop/src/global.d.ts
@@ -201,6 +201,7 @@ export type DesktopBootstrapStageState =
 export interface DesktopBootstrapStageResult {
   state: DesktopBootstrapStageState
   durationMs: number | null
+  startedAt: number | null
   json: { ok: boolean; skipped?: boolean; reason?: string | null; stage: string } | null
   error: string | null
 }
diff --git a/apps/desktop/src/styles.css b/apps/desktop/src/styles.css
index 33090db769d..1f1e7f47945 100644
--- a/apps/desktop/src/styles.css
+++ b/apps/desktop/src/styles.css
@@ -962,6 +962,11 @@ canvas {
   width: 0.6em;
   height: 1em;
   margin-left: 0.18em;
+  /* Net-zero the caret's inline advance so it paints at the text end without
+     consuming layout width. Otherwise its ~0.78em footprint can wrap the last
+     line mid-stream, and removing the caret on completion un-wraps it — the
+     visible "layout shift after the cursor goes away". */
+  margin-right: calc(-0.6em - 0.18em);
   vertical-align: middle;
   border-radius: 0.09375rem;
   background: repeating-conic-gradient(currentColor 0% 25%, transparent 0% 50%) 0 0 / 0.125rem 0.125rem;
diff --git a/cli-config.yaml.example b/cli-config.yaml.example
index 355b6bb7569..fb6912642ae 100644
--- a/cli-config.yaml.example
+++ b/cli-config.yaml.example
@@ -163,7 +163,7 @@ model:
 # -----------------------------------------------------------------------------
 # Working directory behavior:
 #   - CLI (`hermes` command): Uses "." (current directory where you run hermes)
-#   - Messaging (Telegram/Discord): Uses MESSAGING_CWD from .env (default: home)
+#   - Gateway/messaging/cron: Uses terminal.cwd here; legacy .env cwd values are deprecated
 terminal:
   backend: "local"
   cwd: "."  # For local backend: "." = current directory. Ignored for remote backends unless a backend documents otherwise.
diff --git a/cli.py b/cli.py
index d0baf268ace..95d14f7a996 100644
--- a/cli.py
+++ b/cli.py
@@ -74,10 +74,15 @@ except (ImportError, AttributeError):
     _STEADY_CURSOR = None
 
 try:
-    from hermes_cli.pt_input_extras import install_shift_enter_alias, install_ctrl_enter_alias
+    from hermes_cli.pt_input_extras import (
+        install_ctrl_enter_alias,
+        install_ignored_terminal_sequences,
+        install_shift_enter_alias,
+    )
     install_shift_enter_alias()
     install_ctrl_enter_alias()
-    del install_shift_enter_alias, install_ctrl_enter_alias
+    install_ignored_terminal_sequences()
+    del install_shift_enter_alias, install_ctrl_enter_alias, install_ignored_terminal_sequences
 except Exception:
     pass
 import threading
@@ -168,7 +173,7 @@ from hermes_cli.browser_connect import (
     try_launch_chrome_debug,
 )
 from hermes_cli.env_loader import load_hermes_dotenv
-from utils import base_url_host_matches, is_truthy_value
+from utils import base_url_host_matches
 
 _hermes_home = get_hermes_home()
 _project_env = Path(__file__).parent / '.env'
@@ -382,6 +387,10 @@ def load_cli_config() -> Dict[str, Any]:
             "inactivity_timeout": 120,  # Auto-cleanup inactive browser sessions after 2 min
             "record_sessions": False,  # Auto-record browser sessions as WebM videos
             "engine": "auto",  # Browser engine: auto (Chrome), lightpanda, chrome
+            "camofox": {
+                "rewrite_loopback_urls": False,
+                "loopback_host_alias": "host.docker.internal",
+            },
         },
         "compression": {
             "enabled": True,      # Auto-compress when approaching context limit
@@ -576,6 +585,8 @@ def load_cli_config() -> Dict[str, Any]:
         "docker_env": "TERMINAL_DOCKER_ENV",
         "docker_mount_cwd_to_workspace": "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE",
         "docker_run_as_host_user": "TERMINAL_DOCKER_RUN_AS_HOST_USER",
+        "docker_persist_across_processes": "TERMINAL_DOCKER_PERSIST_ACROSS_PROCESSES",
+        "docker_orphan_reaper": "TERMINAL_DOCKER_ORPHAN_REAPER",
         "sandbox_dir": "TERMINAL_SANDBOX_DIR",
         # Persistent shell (non-local backends)
         "persistent_shell": "TERMINAL_PERSISTENT_SHELL",
@@ -2475,8 +2486,9 @@ _TERMINAL_INPUT_MODE_RESET_SEQ = (
 def _preserve_ctrl_enter_newline() -> bool:
     """Detect environments where Ctrl+Enter must produce a newline, not submit.
 
-    Native Windows, WSL, SSH sessions, and Windows Terminal all send Ctrl+Enter
-    as bare LF (c-j). On those terminals c-j must NOT be bound to submit;
+    Windows Terminal, WSL, SSH sessions, Ghostty, and some modern terminals
+    deliver Ctrl+Enter/Ctrl+J as bare LF (c-j). On those terminals c-j must
+    NOT be bound to submit;
     binding it to submit makes Ctrl+Enter (intended as 'newline like Alt+Enter')
     submit instead. Local POSIX TTYs that deliver Enter as LF (docker exec,
     some thin PTYs without SSH) still need c-j bound to submit, so we keep
@@ -2490,6 +2502,12 @@ def _preserve_ctrl_enter_newline() -> bool:
         return True
     if os.environ.get("WT_SESSION"):
         return True
+    if os.environ.get("GHOSTTY_RESOURCES_DIR") or os.environ.get("GHOSTTY_BIN_DIR"):
+        return True
+    if os.environ.get("TERM", "").lower() == "xterm-ghostty":
+        return True
+    if os.environ.get("TERM_PROGRAM", "").lower() == "ghostty":
+        return True
     if "microsoft" in os.environ.get("WSL_DISTRO_NAME", "").lower():
         return True
     # WSL detection — env vars can be scrubbed under sudo, also peek /proc.
@@ -2510,7 +2528,7 @@ def _bind_prompt_submit_keys(kb, handler) -> None:
     some thin PTYs (docker exec, certain SSH flavors) deliver Enter as LF
     instead of CR — without this, Enter appears dead on those terminals.
 
-    Exception: on Windows, WSL, SSH sessions, and Windows Terminal,
+    Exception: on Windows, WSL, SSH sessions, Windows Terminal, and Ghostty,
     c-j is the wire encoding of Ctrl+Enter (a distinct keystroke from
     plain Enter / c-m). We leave c-j unbound there so the c-j newline
     handler registered separately can fire — giving the user an
@@ -3230,6 +3248,12 @@ class HermesCLI:
         self._slash_confirm_state = None
         self._slash_confirm_deadline = 0
         self._model_picker_state = None
+        # Armed when a bare `/resume` prints the recent-sessions list so the
+        # very next bare numeric input (e.g. `3`) resolves to that session.
+        # Holds the exact list used for index resolution; one-shot (cleared on
+        # the next submitted input, whether it's the selection or anything
+        # else). See #34584.
+        self._pending_resume_sessions = None
         self._secret_state = None
         self._secret_deadline = 0
         self._spinner_text: str = ""  # thinking spinner text for TUI
@@ -3748,7 +3772,7 @@ class HermesCLI:
             percent_label = f"{percent}%" if percent is not None else "--"
             duration_label = snapshot["duration"]
 
-            yolo_active = bool(os.getenv("HERMES_YOLO_MODE"))
+            yolo_active = self._is_session_yolo_active()
             if width < 52:
                 text = f"⚕ {snapshot['model_short']} · {duration_label}"
                 if yolo_active:
@@ -3809,7 +3833,7 @@ class HermesCLI:
             # line and produce duplicated status bar rows over long sessions.
             width = self._get_tui_terminal_width()
             duration_label = snapshot["duration"]
-            yolo_active = bool(os.getenv("HERMES_YOLO_MODE"))
+            yolo_active = self._is_session_yolo_active()
 
             if width < 52:
                 frags = [
@@ -6658,10 +6682,21 @@ class HermesCLI:
         if not target:
             _cprint("  Usage: /resume <number|session_id_or_title>")
             if self._show_recent_sessions(reason="resume"):
+                # Arm a one-shot pending-resume selection so the user can type
+                # just the number (`3`) on the next line instead of having to
+                # retype `/resume 3`. The list here must match the one shown by
+                # _show_recent_sessions and used for index resolution below —
+                # all three go through _list_recent_sessions(limit=10). See
+                # #34584.
+                self._pending_resume_sessions = self._list_recent_sessions(limit=10)
                 return
             _cprint("  Tip:   Use /history or `hermes sessions list` to find sessions.")
             return
 
+        # Any explicit /resume <target> supersedes a previously-armed bare
+        # numbered prompt.
+        self._pending_resume_sessions = None
+
         if not self._session_db:
             from hermes_state import format_session_db_unavailable
             _cprint(f"  {format_session_db_unavailable()}")
@@ -6775,6 +6810,44 @@ class HermesCLI:
         else:
             _cprint(f"  ↻ Resumed session {target_id}{title_part} — no messages, starting fresh.")
 
+    def _consume_pending_resume_selection(self, text: str) -> bool:
+        """Resolve a bare numeric reply that follows a bare ``/resume`` prompt.
+
+        After ``/resume`` (no args) prints the recent-sessions list it arms
+        ``self._pending_resume_sessions``. The next submitted input is given
+        one chance to be a bare session number (``3``); if so we resume that
+        session here. Anything else (another command, free text, blank) simply
+        disarms the prompt and is handled normally by the caller.
+
+        Returns True if the input was consumed as a resume selection (caller
+        must not treat it as chat); False otherwise. The pending state is
+        always one-shot: it is cleared on the first submitted input regardless
+        of outcome. See #34584.
+        """
+        pending = self._pending_resume_sessions
+        if not pending:
+            return False
+        # One-shot: disarm now so a non-matching input can't leave the prompt
+        # armed and hijack a later number the user meant as chat.
+        self._pending_resume_sessions = None
+
+        if not isinstance(text, str):
+            return False
+        stripped = text.strip()
+        # Only a pure number selects; let "/resume 3", titles, or any other
+        # text fall through to normal handling.
+        if not stripped.isdigit():
+            return False
+
+        index = int(stripped)
+        if index < 1 or index > len(pending):
+            _cprint(f"  Resume index {index} is out of range.")
+            _cprint("  Use /resume with no arguments to see available sessions.")
+            return True
+
+        self._handle_resume_command(f"/resume {index}")
+        return True
+
     def _handle_sessions_command(self, cmd_original: str) -> None:
         """Handle /sessions [list|<id_or_title>] — browse or resume previous sessions.
 
@@ -6890,6 +6963,7 @@ class HermesCLI:
             pass
 
         # Switch to the new session
+        self._transfer_session_yolo(self.session_id, new_session_id)
         self.session_id = new_session_id
         self.session_start = now
         self._pending_title = None
@@ -7569,8 +7643,19 @@ class HermesCLI:
         parts = cmd_original.split(None, 1)  # split off '/model'
         raw_args = parts[1].strip() if len(parts) > 1 else ""
 
-        # Parse --provider and --global flags
-        model_input, explicit_provider, persist_global = parse_model_flags(raw_args)
+        # Parse --provider, --global, and --refresh flags
+        model_input, explicit_provider, persist_global, force_refresh = parse_model_flags(raw_args)
+
+        # --refresh: wipe the on-disk picker cache before building the
+        # provider list. Forces a live re-fetch of every authed provider's
+        # /v1/models endpoint on this open.
+        if force_refresh:
+            try:
+                from hermes_cli.models import clear_provider_models_cache
+                clear_provider_models_cache()
+                _cprint("  Cleared model picker cache. Refreshing...")
+            except Exception:
+                pass
 
         # Single inventory context — replaces the inline config-slice the
         # dashboard / TUI used to duplicate. Overlay live session state
@@ -7609,6 +7694,7 @@ class HermesCLI:
                 _cprint("")
                 _cprint("  /model <name>                        switch model")
                 _cprint("  /model --provider <slug>             switch provider")
+                _cprint("  /model --refresh                     re-fetch live model lists")
                 return
 
             self._open_model_picker(
@@ -8285,7 +8371,14 @@ class HermesCLI:
         _base_word = cmd_lower.split()[0].lstrip("/")
         _cmd_def = _resolve_cmd(_base_word)
         canonical = _cmd_def.name if _cmd_def else _base_word
-        
+
+        # A bare `/resume` prompt is one-shot: any command other than the
+        # resume/sessions handlers (which manage the pending state themselves)
+        # disarms it so a later number isn't swallowed as a stale selection.
+        # See #34584.
+        if canonical not in {"resume", "sessions"}:
+            self._pending_resume_sessions = None
+
         if canonical in {"quit", "exit"}:
             # Parse --delete flag: /exit --delete also removes the current
             # session's transcripts + SQLite history. Ported from
@@ -9590,20 +9683,92 @@ class HermesCLI:
         }
         _cprint(labels.get(self.tool_progress_mode, ""))
 
-    def _toggle_yolo(self):
-        """Toggle YOLO mode — skip all dangerous command approval prompts."""
-        import os
-        from hermes_cli.colors import Colors as _Colors
+    def _transfer_session_yolo(self, old_session_id: str, new_session_id: str) -> None:
+        """Move YOLO bypass state from an old session key to a new one.
 
-        current = is_truthy_value(os.environ.get("HERMES_YOLO_MODE"))
-        if current:
-            os.environ.pop("HERMES_YOLO_MODE", None)
+        Called whenever ``self.session_id`` is reassigned mid-run — ``/branch``
+        forks into a new session, and auto-compression rotates the agent's
+        session id into a fresh continuation session. Without this transfer
+        the user's ``/yolo ON`` toggle would silently revert on the very next
+        turn (the same UX failure mode that motivated this entire fix), since
+        ``_session_yolo`` is keyed by session id.
+
+        Mirrors ``tui_gateway/server.py`` (~line 1297-1305) which performs the
+        same transfer for the TUI's session-rename path. No-op when YOLO
+        wasn't enabled or when the ids match.
+        """
+        if not old_session_id or not new_session_id or old_session_id == new_session_id:
+            return
+        try:
+            from tools.approval import (
+                disable_session_yolo,
+                enable_session_yolo,
+                is_session_yolo_enabled,
+            )
+        except Exception:
+            return
+        if is_session_yolo_enabled(old_session_id):
+            enable_session_yolo(new_session_id)
+            disable_session_yolo(old_session_id)
+
+    def _is_session_yolo_active(self) -> bool:
+        """Whether YOLO bypass is currently enabled for this CLI session.
+
+        Reads from ``tools.approval._session_yolo`` (the same set that
+        ``enable_session_yolo`` / ``disable_session_yolo`` write to) so the
+        status bar reflects the actual bypass state instead of a stale env
+        var. Also honors the process-start ``--yolo`` flag, which freezes
+        ``HERMES_YOLO_MODE`` into ``_YOLO_MODE_FROZEN`` before tool imports
+        happen.
+        """
+        try:
+            from tools.approval import (
+                _YOLO_MODE_FROZEN,
+                is_session_yolo_enabled,
+            )
+        except Exception:
+            return False
+        if _YOLO_MODE_FROZEN:
+            return True
+        # Use ``getattr`` so test fixtures that build a CLI via ``__new__``
+        # (skipping ``__init__``) don't trip an AttributeError here; the
+        # status-bar builders swallow exceptions silently but lose every
+        # field after the failure.
+        session_key = getattr(self, "session_id", None) or "default"
+        return is_session_yolo_enabled(session_key)
+
+    def _toggle_yolo(self):
+        """Toggle YOLO mode — skip all dangerous command approval prompts.
+
+        Per-session toggle that mirrors the gateway and TUI ``/yolo`` handlers
+        (see ``gateway/run.py:_handle_yolo_command`` and
+        ``tui_gateway/server.py`` key=="yolo"). We deliberately do NOT mutate
+        ``HERMES_YOLO_MODE`` here — that env var is read once at module import
+        time into ``tools.approval._YOLO_MODE_FROZEN`` to keep prompt-injected
+        skills from flipping the bypass mid-session, so setting it after CLI
+        startup is a silent no-op. Routing through ``enable_session_yolo`` /
+        ``disable_session_yolo`` gives the same auditable, per-session bypass
+        the other surfaces have. ``run_conversation`` binds
+        ``self.session_id`` as the active approval session key via
+        ``set_current_session_key`` so the bypass takes effect on the very
+        next dangerous command in this run.
+        """
+        from hermes_cli.colors import Colors as _Colors
+        from tools.approval import (
+            disable_session_yolo,
+            enable_session_yolo,
+            is_session_yolo_enabled,
+        )
+
+        session_key = self.session_id or "default"
+        if is_session_yolo_enabled(session_key):
+            disable_session_yolo(session_key)
             _cprint(
                 f"  ⚠ YOLO mode {_Colors.BOLD}{_Colors.RED}OFF{_Colors.RESET}"
                 " — dangerous commands will require approval."
             )
         else:
-            os.environ["HERMES_YOLO_MODE"] = "1"
+            enable_session_yolo(session_key)
             _cprint(
                 f"  ⚡ YOLO mode {_Colors.BOLD}{_Colors.GREEN}ON{_Colors.RESET}"
                 " — all commands auto-approved. Use with caution."
@@ -9765,10 +9930,20 @@ class HermesCLI:
     def _manual_compress(self, cmd_original: str = ""):
         """Manually trigger context compression on the current conversation.
 
-        Accepts an optional focus topic: ``/compress <focus>`` guides the
-        summariser to preserve information related to *focus* while being
-        more aggressive about discarding everything else.  Inspired by
-        Claude Code's ``/compact <focus>`` feature.
+        Two modes:
+
+        * ``/compress [<focus>]`` — compress the *whole* history. An
+          optional focus topic guides the summariser to preserve
+          information related to *focus* while being more aggressive
+          about discarding everything else.  Inspired by Claude Code's
+          ``/compact <focus>`` feature.
+        * ``/compress here [N]`` — boundary-aware compression. Summarize
+          everything *except* the most recent ``N`` exchanges (default
+          2), which are preserved verbatim. Inspired by Claude Code's
+          Rewind "Summarize up to here" action (v2.1.139, May 2026,
+          https://code.claude.com/docs/en/whats-new/2026-w20). Lets the
+          user pick the compression boundary instead of leaving it to
+          the automatic token-budget heuristic.
         """
         if not self.conversation_history or len(self.conversation_history) < 4:
             print("(._.) Not enough conversation to compress (need at least 4 messages).")
@@ -9782,12 +9957,21 @@ class HermesCLI:
             print("(._.) Compression is disabled in config.")
             return
 
-        # Extract optional focus topic from the command (e.g. "/compress database schema")
-        focus_topic = ""
+        from hermes_cli.partial_compress import (
+            parse_partial_compress_args,
+            rejoin_compressed_head_and_tail,
+            split_history_for_partial_compress,
+        )
+
+        # Args after the command word (e.g. "/compress here 3" -> "here 3").
+        raw_args = ""
         if cmd_original:
-            parts = cmd_original.strip().split(None, 1)
-            if len(parts) > 1:
-                focus_topic = parts[1].strip()
+            _parts = cmd_original.strip().split(None, 1)
+            if len(_parts) > 1:
+                raw_args = _parts[1].strip()
+
+        partial, keep_last, focus_topic = parse_partial_compress_args(raw_args)
+        focus_topic = focus_topic or ""
 
         original_count = len(self.conversation_history)
         with self._busy_command("Compressing context..."):
@@ -9795,6 +9979,22 @@ class HermesCLI:
                 from agent.model_metadata import estimate_request_tokens_rough
                 from agent.manual_compression_feedback import summarize_manual_compression
                 original_history = list(self.conversation_history)
+
+                # Boundary-aware split: only the head is summarized; the
+                # most recent `keep_last` exchanges ride along verbatim.
+                tail: list = []
+                head = original_history
+                if partial:
+                    head, tail = split_history_for_partial_compress(
+                        original_history, keep_last
+                    )
+                    if not tail:
+                        # Split degenerated (everything would be kept, or
+                        # no head left to compress). Fall back to full
+                        # compression so the user still gets an action.
+                        partial = False
+                        head = original_history
+
                 # Include system prompt + tool schemas in the estimate —
                 # a transcript-only number understates real request pressure
                 # and can even appear to grow after compression because a
@@ -9806,7 +10006,11 @@ class HermesCLI:
                     system_prompt=_sys_prompt,
                     tools=_tools,
                 )
-                if focus_topic:
+                if partial:
+                    print(f"🗜️  Summarizing up to here: compressing {len(head)} of "
+                          f"{original_count} messages (~{approx_tokens:,} tokens), "
+                          f"keeping last {keep_last} exchange(s) verbatim...")
+                elif focus_topic:
                     print(f"🗜️  Compressing {original_count} messages (~{approx_tokens:,} tokens), "
                           f"focus: \"{focus_topic}\"...")
                 else:
@@ -9819,12 +10023,21 @@ class HermesCLI:
                 # which already contain the agent identity — resulting in the
                 # identity block appearing twice (issue #15281).
                 compressed, _ = self.agent._compress_context(
-                    original_history,
+                    head,
                     None,
                     approx_tokens=approx_tokens,
                     focus_topic=focus_topic or None,
                     force=True,
                 )
+                # Re-append the verbatim tail after the compressed head.
+                # The split guarantees `tail` begins on a user turn, so the
+                # compressed-head -> tail boundary is normally valid
+                # (the head's compressed output ends on assistant/tool).
+                # rejoin_compressed_head_and_tail() additionally guards the
+                # seam against any illegal user->user / assistant->assistant
+                # adjacency, defending provider role-alternation rules.
+                if partial and tail:
+                    compressed = rejoin_compressed_head_and_tail(compressed, tail)
                 self.conversation_history = compressed
                 # _compress_context ends the old session and creates a new child
                 # session on the agent (run_agent.py::_compress_context). Sync the
@@ -10650,7 +10863,8 @@ class HermesCLI:
         if not reqs.get("stt_available", reqs.get("stt_key_set")):
             raise RuntimeError(
                 "Voice mode requires an STT provider for transcription.\n"
-                "Option 1: pip install faster-whisper  (free, local)\n"
+                "Option 1: uv pip install faster-whisper  "
+                "(free, local; `pip install faster-whisper` also works if pip is on PATH)\n"
                 "Option 2: Set GROQ_API_KEY (free tier)\n"
                 "Option 3: Set VOICE_TOOLS_OPENAI_KEY (paid)"
             )
@@ -11739,6 +11953,23 @@ class HermesCLI:
                     set_secret_capture_callback(self._secret_capture_callback)
                 except Exception:
                     pass
+                # Bind this turn's approval session key into the contextvar so
+                # ``tools.approval.is_current_session_yolo_enabled()`` resolves
+                # against the same key that ``/yolo`` toggles under (see
+                # ``_toggle_yolo`` → ``enable_session_yolo(self.session_id)``).
+                # Mirrors ``tui_gateway/server.py`` and ``gateway/run.py`` which
+                # bind the same contextvar before invoking the agent.
+                try:
+                    from tools.approval import (
+                        reset_current_session_key,
+                        set_current_session_key,
+                    )
+                    _approval_session_token = set_current_session_key(
+                        self.session_id or "default"
+                    )
+                except Exception:
+                    reset_current_session_key = None  # type: ignore[assignment]
+                    _approval_session_token = None
                 agent_message = _voice_prefix + message if _voice_prefix else message
                 # Prepend pending model switch note so the model knows about the switch
                 _msn = getattr(self, '_pending_model_switch_note', None)
@@ -11780,6 +12011,15 @@ class HermesCLI:
                         set_secret_capture_callback(None)
                     except Exception:
                         pass
+                    # Release the per-turn approval session key. ``_session_yolo``
+                    # state itself is preserved across turns (so /yolo persists
+                    # for the whole CLI run); we just unbind the contextvar so a
+                    # reused thread doesn't see stale identity on its next run.
+                    if _approval_session_token is not None and reset_current_session_key is not None:
+                        try:
+                            reset_current_session_key(_approval_session_token)
+                        except Exception:
+                            pass
 
             # Start agent in background thread (daemon so it cannot keep the
             # process alive when the user closes the terminal tab — SIGHUP
@@ -11910,6 +12150,7 @@ class HermesCLI:
                 and getattr(self.agent, "session_id", None)
                 and self.agent.session_id != self.session_id
             ):
+                self._transfer_session_yolo(self.session_id, self.agent.session_id)
                 self.session_id = self.agent.session_id
                 self._pending_title = None
 
@@ -12574,7 +12815,21 @@ class HermesCLI:
         
         # Key bindings for the input area
         kb = KeyBindings()
-        
+
+        from prompt_toolkit.keys import Keys as _IgnoreKeys
+
+        @kb.add(_IgnoreKeys.Ignore, eager=True)
+        def handle_ignored_terminal_sequence(event):
+            """Consume parser-level ignored terminal sequences before self-insert.
+
+            install_ignored_terminal_sequences() in hermes_cli.pt_input_extras
+            registers focus reports (CSI I / CSI O) as Keys.Ignore at the
+            VT100 parser level. Without this no-op binding the default
+            self-insert path would still fire and the bytes would land in
+            the buffer.
+            """
+            return None
+
         def handle_enter(event):
             """Handle Enter key - submit input.
             
@@ -13817,7 +14072,12 @@ class HermesCLI:
             reserved_below = 6
 
             available = max(0, term_rows - reserved_below)
-            mandatory_full = chrome_full + len(choice_wrapped) + len(other_wrapped)
+            # The compact decision must reserve room for at least one question
+            # row on top of the choices, otherwise full chrome (3 blank
+            # separators) gets kept when there is no room for it and the panel
+            # overflows the viewport — HSplit then clips the panel's tail,
+            # silently dropping the choices (the reported bug).
+            mandatory_full = chrome_full + 1 + len(choice_wrapped) + len(other_wrapped)
 
             use_compact_chrome = mandatory_full > available
             chrome_rows = chrome_tight if use_compact_chrome else chrome_full
@@ -13825,9 +14085,24 @@ class HermesCLI:
             max_question_rows = max(1, available - chrome_rows - len(choice_wrapped) - len(other_wrapped))
             max_question_rows = min(max_question_rows, 12)  # soft cap on huge terminals
 
+            # When the choices alone (plus compact chrome) already exceed the
+            # viewport, drop the question entirely — the choices are the only
+            # thing the user must see to make a selection. Without this the
+            # question would still claim its 1-row floor above and push the
+            # tail of the choices off-screen (HSplit clips the overflow).
+            choices_overflow = chrome_rows + len(choice_wrapped) + len(other_wrapped) >= available
+            if choices_overflow:
+                max_question_rows = 0
+
             question_wrapped = _wrap_panel_text(question, inner_text_width)
-            if len(question_wrapped) > max_question_rows:
-                keep = max(1, max_question_rows - 1)
+            if max_question_rows <= 0:
+                question_wrapped = []
+            elif len(question_wrapped) > max_question_rows:
+                # The truncation marker is itself a row, so it must count
+                # against the budget. With a 1-row budget there is no room for
+                # both a question line and the marker — show the marker alone
+                # so the rendered question never exceeds max_question_rows.
+                keep = max(0, max_question_rows - 1)
                 question_wrapped = question_wrapped[:keep] + ["… (question truncated)"]
 
             lines = []
@@ -14361,6 +14636,17 @@ class HermesCLI:
                                 + (f"\n{_remainder}" if _remainder else "")
                             )
 
+                    # A bare number right after a bare `/resume` prompt selects
+                    # that session (see #34584). Checked before chat routing so
+                    # the digit isn't sent to the agent as a message.
+                    if (
+                        not _file_drop
+                        and self._pending_resume_sessions
+                        and isinstance(user_input, str)
+                        and self._consume_pending_resume_selection(user_input)
+                    ):
+                        continue
+
                     if not _file_drop and isinstance(user_input, str) and _looks_like_slash_command(user_input):
                         _cprint(f"\n⚙️  {user_input}")
                         try:
@@ -14950,6 +15236,39 @@ def main(
                     time.sleep(_grace)
         except Exception:
             pass  # never block signal handling
+        # Kanban worker exit path (#28181): SIGTERM hits a dispatcher-spawned
+        # worker that's likely in a non-daemon thread waiting on a child
+        # subprocess in _wait_for_process. Raising KeyboardInterrupt only
+        # unwinds the main thread; the worker thread keeps running, the
+        # process gets reparented to init, and the dispatcher's _pid_alive
+        # check returns True forever — task stuck in 'running' indefinitely.
+        # Skip the controlled-unwind dance and call os._exit(0) so the kernel
+        # reclaims the PID immediately and detect_crashed_workers can reclaim
+        # the stale claim on the next tick. Flush logging + stdout/stderr
+        # first so the final debug trace isn't lost; SIGALRM deadman guards
+        # the flush against any rare blocking-I/O case (the reporter measured
+        # flush in <1ms; the alarm is a failsafe, not the common path).
+        if os.environ.get("HERMES_KANBAN_TASK"):
+            try:
+                import signal as _sig_mod
+                if hasattr(_sig_mod, "SIGALRM"):
+                    # Cancel any pre-existing alarm to avoid colliding with
+                    # caller-installed timers.
+                    _sig_mod.signal(_sig_mod.SIGALRM, lambda *_: os._exit(0))
+                    _sig_mod.alarm(2)
+            except Exception:
+                pass
+            try:
+                import logging as _lg
+                _lg.shutdown()
+            except Exception:
+                pass
+            for _stream in (sys.stdout, sys.stderr):
+                try:
+                    _stream.flush()
+                except Exception:
+                    pass
+            os._exit(0)
         raise KeyboardInterrupt()
     try:
         import signal as _signal
@@ -14962,13 +15281,50 @@ def main(
     # Handle single query mode
     if query or image:
         query, single_query_images = _collect_query_images(query, image)
+        # Kanban workers spawn with ``hermes chat -q "work kanban task <id>"``;
+        # the actual task description lives in the task body. Mirror the
+        # gateway/CLI behaviour for inbound images by scanning the body for
+        # local image paths and http(s) image URLs and attaching them to the
+        # worker's first turn. Without this, users who paste a screenshot
+        # path or URL into a kanban task body never get it routed to the
+        # model's vision input.
+        single_query_image_urls: list[str] = []
+        _kanban_task_id = os.environ.get("HERMES_KANBAN_TASK", "").strip()
+        if _kanban_task_id:
+            try:
+                from hermes_cli import kanban_db as _kb
+                from agent.image_routing import extract_image_refs as _extract_refs
+
+                _conn = _kb.connect()
+                try:
+                    _task = _kb.get_task(_conn, _kanban_task_id)
+                finally:
+                    try:
+                        _conn.close()
+                    except Exception:
+                        pass
+                _body = getattr(_task, "body", "") if _task is not None else ""
+                if _body:
+                    _kb_paths, _kb_urls = _extract_refs(_body)
+                    if _kb_paths:
+                        # Dedupe against any --image the user already passed.
+                        _seen = {str(p) for p in single_query_images}
+                        for _p in _kb_paths:
+                            if _p not in _seen:
+                                _seen.add(_p)
+                                single_query_images.append(Path(_p))
+                    if _kb_urls:
+                        single_query_image_urls.extend(_kb_urls)
+            except Exception as _exc:
+                # Best-effort enrichment; never block worker startup on it.
+                logger.debug("kanban image-ref extraction failed: %s", _exc)
         if quiet:
             # Quiet mode: suppress banner, spinner, tool previews.
             # Only print the final response and parseable session info.
             cli.tool_progress_mode = "off"
             if cli._ensure_runtime_credentials():
                 effective_query: Any = query
-                if single_query_images:
+                if single_query_images or single_query_image_urls:
                     # Honour the same image-routing decision used by the
                     # interactive path. With a vision-capable model (incl.
                     # custom-provider models declared via
@@ -14997,19 +15353,26 @@ def main(
                             _parts, _skipped = _build_parts(
                                 query if isinstance(query, str) else "",
                                 [str(p) for p in single_query_images],
+                                image_urls=list(single_query_image_urls) or None,
                             )
                             if any(p.get("type") == "image_url" for p in _parts):
                                 effective_query = _parts
                             else:
                                 # All images unreadable — text fallback.
+                                # ``_preprocess_images_with_vision`` only knows
+                                # about local files; URLs would be lost there,
+                                # so keep the original query text intact when
+                                # only URLs were supplied.
+                                if single_query_images:
+                                    effective_query = cli._preprocess_images_with_vision(
+                                        query, single_query_images, announce=False,
+                                    )
+                        except Exception:
+                            if single_query_images:
                                 effective_query = cli._preprocess_images_with_vision(
                                     query, single_query_images, announce=False,
                                 )
-                        except Exception:
-                            effective_query = cli._preprocess_images_with_vision(
-                                query, single_query_images, announce=False,
-                            )
-                    else:
+                    elif single_query_images:
                         effective_query = cli._preprocess_images_with_vision(
                             query,
                             single_query_images,
diff --git a/docker/hermes-exec-shim.sh b/docker/hermes-exec-shim.sh
new file mode 100644
index 00000000000..7f4c5c3c0a0
--- /dev/null
+++ b/docker/hermes-exec-shim.sh
@@ -0,0 +1,87 @@
+#!/bin/sh
+# shellcheck shell=sh
+# /opt/hermes/bin/hermes — `docker exec` privilege-drop shim.
+#
+# Background
+# ----------
+# The s6 image runs the supervised gateway/main process as the unprivileged
+# `hermes` user (UID 10000). When an operator runs `docker exec <c> hermes ...`
+# the default UID is root (0), and any file the command writes under
+# $HERMES_HOME — auth.json, .env, config.yaml — ends up root-owned and
+# unreadable to the supervised gateway. The most common manifestation: the
+# user runs `docker exec <c> hermes login`, this writes
+# /opt/data/auth.json as root:root mode 0600, and from then on the gateway
+# returns "Provider authentication failed: Hermes is not logged into Nous
+# Portal" on every incoming message — even though `docker exec <c> hermes
+# chat -q ping` (also running as root) succeeds because root happens to be
+# able to read its own root-owned file. See systematic-debugging skill
+# notes attached to this fix.
+#
+# Fix
+# ---
+# This shim sits at /opt/hermes/bin/hermes and is placed earliest on PATH.
+# When invoked as root, it drops to the hermes user (via s6-setuidgid)
+# before exec'ing the real venv binary, so anything that writes under
+# $HERMES_HOME is uid-aligned with the supervised processes. When invoked
+# as any non-root UID — including the supervised processes themselves,
+# `docker exec --user hermes`, kanban subagents, etc. — it short-circuits
+# straight to the venv binary with no privilege change. Net: one extra
+# fork on the docker-exec-as-root path, zero behavioral change on every
+# other path.
+#
+# Recursion safety: the shim exec's the venv binary by *absolute path*
+# (/opt/hermes/.venv/bin/hermes), so the second hop cannot re-enter this
+# shim regardless of PATH state. No sentinel env var needed.
+#
+# Opt-out: set HERMES_DOCKER_EXEC_AS_ROOT=1 (1/true/yes, case-insensitive)
+# to keep running as root. Reserved for diagnostic sessions where the
+# operator deliberately wants root semantics — e.g. inspecting root-only
+# state via the hermes CLI. Default is to drop.
+
+set -e
+
+REAL=/opt/hermes/.venv/bin/hermes
+
+# Defensive: if the venv binary is missing (corrupted image, partial
+# install), fail loudly rather than silently masking it.
+if [ ! -x "$REAL" ]; then
+    echo "hermes-shim: $REAL not found or not executable" >&2
+    exit 127
+fi
+
+# Already non-root? Just exec the real binary. This is the hot path for
+# supervised processes (uid 10000) and for `docker exec --user hermes`.
+if [ "$(id -u)" != "0" ]; then
+    exec "$REAL" "$@"
+fi
+
+# Root, with opt-out set? Honor it.
+case "${HERMES_DOCKER_EXEC_AS_ROOT:-}" in
+    1|true|TRUE|True|yes|YES|Yes)
+        exec "$REAL" "$@"
+        ;;
+esac
+
+# Root, no opt-out. Drop to the hermes user.
+#
+# s6-setuidgid lives under /command/ which is NOT on `docker exec`'s PATH
+# (s6-overlay only puts /command/ on PATH for supervision-tree children).
+# Reference it by absolute path so the drop is robust against PATH
+# manipulation.
+S6_SUID=/command/s6-setuidgid
+if [ ! -x "$S6_SUID" ]; then
+    # Non-s6 image (someone stripped s6-overlay, or a hand-built variant).
+    # Fail loud rather than silently re-execing as root and leaking the
+    # bug this shim exists to prevent.
+    echo "hermes-shim: $S6_SUID not found; refusing to silently run as root." >&2
+    echo "hermes-shim: re-run with --user hermes or set HERMES_DOCKER_EXEC_AS_ROOT=1." >&2
+    exit 126
+fi
+
+# Reset HOME to the hermes user's home before dropping privileges. Without
+# this, $HOME stays /root and any library that resolves paths off $HOME
+# (XDG caches, lockfiles, .config writes) will try to write to /root and
+# fail with EACCES. Mirrors main-wrapper.sh.
+export HOME=/opt/data
+
+exec "$S6_SUID" hermes "$REAL" "$@"
diff --git a/docker/s6-rc.d/dashboard/run b/docker/s6-rc.d/dashboard/run
index a48e8995dfc..d6cfa3f0940 100755
--- a/docker/s6-rc.d/dashboard/run
+++ b/docker/s6-rc.d/dashboard/run
@@ -19,6 +19,10 @@ case "${HERMES_DASHBOARD:-}" in
         ;;
 esac
 
+# with-contenv repopulates HOME from /init as /root. Reset it before
+# dropping privileges so HOME-anchored state lands under /opt/data.
+export HOME=/opt/data
+
 cd /opt/data
 # shellcheck disable=SC1091
 . /opt/hermes/.venv/bin/activate
@@ -26,13 +30,21 @@ cd /opt/data
 dash_host="${HERMES_DASHBOARD_HOST:-0.0.0.0}"
 dash_port="${HERMES_DASHBOARD_PORT:-9119}"
 
-# Binding to anything other than localhost requires --insecure — the
-# dashboard refuses otherwise because it exposes API keys. Inside a
-# container this is the expected deployment.
+# `--insecure` is opt-in via HERMES_DASHBOARD_INSECURE. The dashboard's
+# OAuth auth gate engages automatically on non-loopback binds when a
+# DashboardAuthProvider is registered (e.g. the bundled dashboard_auth/nous
+# provider, which auto-registers when HERMES_DASHBOARD_OAUTH_CLIENT_ID is
+# set). If no provider is registered, start_server fails closed with a
+# specific operator-facing error.
+#
+# This used to derive --insecure from the bind host ("anything non-loopback
+# implies insecure"), but that predates the OAuth gate and silently
+# disabled it on every container-deployed dashboard. The gate is now the
+# authority; operators on trusted LANs / behind a reverse proxy without
+# the OAuth contract opt in explicitly.
 insecure=""
-case "$dash_host" in
-    127.0.0.1|localhost) ;;
-    *) insecure="--insecure" ;;
+case "${HERMES_DASHBOARD_INSECURE:-}" in
+    1|true|TRUE|True|yes|YES|Yes) insecure="--insecure" ;;
 esac
 
 # shellcheck disable=SC2086  # word-splitting of $insecure is intentional
diff --git a/docker/stage2-hook.sh b/docker/stage2-hook.sh
index 1e8af197de9..4bfbd56af8a 100755
--- a/docker/stage2-hook.sh
+++ b/docker/stage2-hook.sh
@@ -33,6 +33,15 @@ INSTALL_DIR="/opt/hermes"
 mkdir -p "$HERMES_HOME"
 
 # --- UID/GID remap ---
+# Accept PUID/PGID as aliases for HERMES_UID/HERMES_GID.  NAS users (UGOS,
+# Synology, unRAID) expect the LinuxServer.io PUID/PGID convention and
+# bind-mount /opt/data from a host directory owned by their own UID; without
+# this alias those vars are silently ignored and the s6-setuidgid drop to
+# UID 10000 leaves the runtime unable to read the volume.  HERMES_UID/
+# HERMES_GID still win when both are set.  See #15290, salvages #25872.
+HERMES_UID="${HERMES_UID:-${PUID:-}}"
+HERMES_GID="${HERMES_GID:-${PGID:-}}"
+
 if [ -n "${HERMES_UID:-}" ] && [ "$HERMES_UID" != "$(id -u hermes)" ]; then
     echo "[stage2] Changing hermes UID to $HERMES_UID"
     usermod -u "$HERMES_UID" hermes
@@ -44,6 +53,62 @@ if [ -n "${HERMES_GID:-}" ] && [ "$HERMES_GID" != "$(id -g hermes)" ]; then
     groupmod -o -g "$HERMES_GID" hermes 2>/dev/null || true
 fi
 
+# --- Docker socket group membership (docker-in-docker / DooD) ---
+# When the user bind-mounts the host Docker daemon socket
+# (`-v /var/run/docker.sock:/var/run/docker.sock`) to use the `docker`
+# terminal backend from inside the container, the socket is owned by the
+# host's `docker` group (or root). The supervised hermes user (UID 10000)
+# is not a member of any group that matches the socket's GID, so every
+# `docker` invocation EACCES'es and `check_terminal_requirements()` fails.
+# See #16703.
+#
+# Granting the supp group via `docker run --group-add <gid>` alone is
+# NOT sufficient with our s6-setuidgid privilege drop: s6-setuidgid (and
+# gosu, the older shim) calls initgroups() for the target user, which
+# rebuilds the supplementary group list from /etc/group. Without an
+# /etc/group entry whose GID matches the socket, the kernel-granted
+# supp group is silently wiped between PID 1 and the dropped process.
+# Confirmed empirically: `--group-add 998` alone leaves the dropped
+# hermes process with `Groups: 10000` (998 gone); after this hook adds
+# the entry, the dropped process has `Groups: 998 10000` as expected.
+#
+# Fix: detect the socket's GID at boot and ensure /etc/group has a
+# matching entry that includes hermes. Idempotent across container
+# restarts. Skipped silently when no socket is bind-mounted.
+#
+# Handles the awkward corner cases:
+#   - socket owned by GID 0 (root) — some Podman setups; usermod -aG root
+#   - socket GID already used by a known container group (e.g. tty=5):
+#     reuse that group's name rather than creating a duplicate
+#   - hermes is already a member of the right group (idempotent restart)
+#   - chown/groupadd failures under rootless containers — non-fatal
+for sock in /var/run/docker.sock /run/docker.sock; do
+    [ -S "$sock" ] || continue
+    sock_gid=$(stat -c '%g' "$sock" 2>/dev/null) || continue
+    [ -n "$sock_gid" ] || continue
+    # Already a member? Nothing to do.
+    if id -G hermes 2>/dev/null | tr ' ' '\n' | grep -qx "$sock_gid"; then
+        echo "[stage2] hermes already in group $sock_gid for $sock"
+        break
+    fi
+    # Resolve or create a group name for this GID.
+    sock_group=$(getent group "$sock_gid" 2>/dev/null | cut -d: -f1)
+    if [ -z "$sock_group" ]; then
+        sock_group="hostdocker"
+        if ! groupadd -g "$sock_gid" "$sock_group" 2>/dev/null; then
+            echo "[stage2] Warning: groupadd -g $sock_gid $sock_group failed; skipping docker socket group setup"
+            break
+        fi
+        echo "[stage2] Created group $sock_group (GID $sock_gid) for Docker socket"
+    fi
+    if usermod -aG "$sock_group" hermes 2>/dev/null; then
+        echo "[stage2] Added hermes to group $sock_group (GID $sock_gid) for $sock"
+    else
+        echo "[stage2] Warning: usermod -aG $sock_group hermes failed; docker backend may fail with EACCES"
+    fi
+    break
+done
+
 # --- Fix ownership of data volume ---
 # When HERMES_UID is remapped or the top-level $HERMES_HOME isn't owned by
 # the runtime hermes UID, restore ownership to hermes — but ONLY for the
diff --git a/docs/security/network-egress-isolation.md b/docs/security/network-egress-isolation.md
new file mode 100644
index 00000000000..46cde2fd747
--- /dev/null
+++ b/docs/security/network-egress-isolation.md
@@ -0,0 +1,195 @@
+# Network Egress Isolation for Docker Deployments
+
+When running Hermes inside Docker, the default `network_mode: host` gives the
+agent process unrestricted outbound network access. This guide shows how to
+segment traffic so the agent core can only reach the services it needs, while
+blocking arbitrary outbound connections.
+
+This is primarily a defense against prompt injection attacks that attempt to
+exfiltrate data via `curl`, `wget`, or raw HTTP from tool-generated shell
+commands.
+
+## Threat Model
+
+The Hermes [SECURITY.md](../../SECURITY.md) §2 defines the trust model. The
+terminal backend is the primary execution boundary. However, when running with
+`network_mode: host`, any command the agent executes can reach any endpoint on
+the network, including external ones.
+
+Network egress isolation adds a second layer: even if a malicious command
+executes inside the container, it cannot reach endpoints outside the
+explicitly allowlisted set.
+
+## Architecture
+
+```
+┌─────────────────────────────────────────────┐
+│  Docker Network: internal (no internet)     │
+│                                             │
+│   ┌──────────────┐   ┌──────────────────┐   │
+│   │ hermes-agent │   │ hermes-dashboard │   │
+│   └──────┬───────┘   └────────┬─────────┘   │
+│          │                    │              │
+│          ▼                    │              │
+│   ┌──────────────┐            │              │
+│   │ hermes-gtw   │◄───────────┘              │
+│   └──────┬───────┘                           │
+│          │                                   │
+└──────────┼───────────────────────────────────┘
+           │
+┌──────────┼───────────────────────────────────┐
+│  Docker Network: egress (internet-capable)   │
+│          │                                   │
+│          ▼                                   │
+│   ┌─────────────────┐                        │
+│   │ egress-proxy     │──► allowlisted hosts  │
+│   │ (squid / envoy)  │                       │
+│   └─────────────────┘                        │
+└──────────────────────────────────────────────┘
+```
+
+Two Docker networks:
+
+- **`internal`** — no default route, no internet access. The agent, dashboard,
+  and gateway run here.
+- **`egress`** — has internet access. Only services that need to reach external
+  APIs are attached to this network.
+
+The gateway service is dual-homed (attached to both networks) so it can
+receive inbound messages from Telegram/Slack/etc. and forward them to the
+agent on the internal network.
+
+## Compose Configuration
+
+Override the default `docker-compose.yml` with a
+`docker-compose.override.yml`:
+
+```yaml
+# docker-compose.override.yml
+# Network egress isolation for production deployments.
+#
+# Usage:
+#   HERMES_UID=$(id -u) HERMES_GID=$(id -g) docker compose up -d
+#
+# This overrides network_mode: host with isolated Docker networks.
+
+networks:
+  internal:
+    driver: bridge
+    internal: true          # no default route, no internet
+  egress:
+    driver: bridge
+
+services:
+  gateway:
+    network_mode: ""        # clear the host-mode default
+    networks:
+      - internal
+      - egress              # needs outbound for Telegram, LLM APIs
+    ports:
+      - "127.0.0.1:9119:9119"   # dashboard proxy, localhost only
+
+  dashboard:
+    network_mode: ""
+    networks:
+      - internal            # internal only, no egress needed
+```
+
+### With an Egress Proxy (Recommended)
+
+For tighter control, route all outbound traffic through an HTTP proxy with
+an explicit allowlist:
+
+```yaml
+# docker-compose.override.yml (with egress proxy)
+
+networks:
+  internal:
+    driver: bridge
+    internal: true
+  egress:
+    driver: bridge
+
+services:
+  gateway:
+    network_mode: ""
+    networks:
+      - internal
+      - egress
+    environment:
+      - HTTP_PROXY=http://egress-proxy:3128
+      - HTTPS_PROXY=http://egress-proxy:3128
+      - NO_PROXY=hermes,hermes-dashboard,localhost
+
+  dashboard:
+    network_mode: ""
+    networks:
+      - internal
+
+  egress-proxy:
+    image: ubuntu/squid:6.10-24.04_edge
+    networks:
+      - egress
+    volumes:
+      - ./config/squid-allowlist.conf:/etc/squid/conf.d/allowlist.conf:ro
+    restart: unless-stopped
+```
+
+Example `config/squid-allowlist.conf`:
+
+```
+# Only allow HTTPS CONNECT to these hosts
+acl allowed_hosts dstdomain api.openai.com
+acl allowed_hosts dstdomain api.anthropic.com
+acl allowed_hosts dstdomain openrouter.ai
+acl allowed_hosts dstdomain generativelanguage.googleapis.com
+acl allowed_hosts dstdomain api.telegram.org
+acl allowed_hosts dstdomain api.github.com
+acl allowed_hosts dstdomain discord.com
+
+http_access allow CONNECT allowed_hosts
+http_access deny all
+```
+
+Adjust the allowlist to match your LLM provider and messaging platform.
+
+## Validating the Setup
+
+After bringing up the stack, verify isolation:
+
+```bash
+# From the agent container: this should FAIL (no egress)
+docker compose exec gateway \
+  curl -sf --max-time 5 https://example.com && echo "FAIL: egress not blocked" || echo "OK: egress blocked"
+
+# From the agent container: this should SUCCEED (internal network)
+docker compose exec gateway \
+  curl -sf --max-time 5 http://hermes-dashboard:9119/health && echo "OK: internal reachable" || echo "FAIL"
+
+# If using egress proxy: this should SUCCEED (allowlisted)
+docker compose exec gateway \
+  curl -sf --max-time 5 --proxy http://egress-proxy:3128 https://api.openai.com/v1/models && echo "OK" || echo "FAIL"
+```
+
+## Limitations
+
+- **DNS resolution:** The `internal` network can still resolve external DNS
+  names unless you also run a local DNS resolver that blocks external queries.
+  For most threat models this is acceptable since DNS resolution alone does not
+  exfiltrate meaningful data.
+
+- **Not a substitute for sandbox backends:** This guide isolates the agent
+  *container's* network. If you use the default local terminal backend, tool
+  commands execute inside the same container. For stronger isolation, combine
+  network segmentation with a sandboxed terminal backend (Docker, Modal,
+  Daytona).
+
+- **Platform adapters need egress:** The gateway service needs outbound access
+  to reach messaging platform APIs. If you add new platform adapters, add their
+  API endpoints to the proxy allowlist.
+
+## Related
+
+- [SECURITY.md](../../SECURITY.md) — Hermes trust model and vulnerability reporting
+- [Terminal backends](../../README.md) — sandboxed execution targets
+- [docker-compose.yml](../../docker-compose.yml) — default compose configuration
diff --git a/gateway/platforms/api_server.py b/gateway/platforms/api_server.py
index 7d8afa64625..1c4cb6df728 100644
--- a/gateway/platforms/api_server.py
+++ b/gateway/platforms/api_server.py
@@ -24,7 +24,8 @@ Exposes an HTTP server with endpoints:
 
 Any OpenAI-compatible frontend (Open WebUI, LobeChat, LibreChat,
 AnythingLLM, NextChat, ChatBox, etc.) can connect to hermes-agent
-through this adapter by pointing at http://localhost:8642/v1.
+through this adapter by pointing at http://localhost:8642/v1 and
+authenticating with API_SERVER_KEY.
 
 Requires:
 - aiohttp (already available in the gateway)
@@ -844,11 +845,11 @@ class APIServerAdapter(BasePlatformAdapter):
         Validate Bearer token from Authorization header.
 
         Returns None if auth is OK, or a 401 web.Response on failure.
-        If no API key is configured, all requests are allowed (only when API
-        server is local).
+        connect() refuses to start the API server without API_SERVER_KEY, so
+        the no-key branch only exists for tests or unsupported manual wiring.
         """
         if not self._api_key:
-            return None  # No key configured — allow all (local-only use)
+            return None
 
         auth_header = request.headers.get("Authorization", "")
         if auth_header.startswith("Bearer "):
@@ -1604,6 +1605,7 @@ class APIServerAdapter(BasePlatformAdapter):
                 )
                 final_response = result.get("final_response", "") if isinstance(result, dict) else ""
                 effective_session_id = result.get("session_id", session_id) if isinstance(result, dict) else session_id
+                turn_messages = self._turn_transcript_messages(history, user_message, result) if isinstance(result, dict) else []
                 await queue.put(_event_payload("assistant.completed", {
                     "session_id": effective_session_id,
                     "message_id": message_id,
@@ -1616,6 +1618,7 @@ class APIServerAdapter(BasePlatformAdapter):
                     "session_id": effective_session_id,
                     "message_id": message_id,
                     "completed": True,
+                    "messages": turn_messages,
                     "usage": usage,
                 }))
             except Exception as exc:
@@ -3328,6 +3331,44 @@ class APIServerAdapter(BasePlatformAdapter):
             return len(prior)
         return 0
 
+    @classmethod
+    def _turn_transcript_messages(
+        cls,
+        conversation_history: List[Dict[str, Any]],
+        user_message: Any,
+        result: Dict[str, Any],
+    ) -> List[Dict[str, Any]]:
+        """Return this turn's assistant/tool messages in client-safe shape.
+
+        The streaming SSE contract delivers all assistant text as
+        ``assistant.delta`` events under one ``message_id`` interleaved with
+        ``tool.*`` events, and a single ``assistant.completed`` carrying only
+        the final reply.  A client that accumulates deltas into one buffer
+        cannot reconstruct *intermediate* assistant text segments that preceded
+        tool calls — so when the page is re-opened mid/post-stream those
+        segments appear lost, even though state.db persisted them correctly.
+
+        Emitting the authoritative per-turn transcript on ``run.completed`` lets
+        any SSE consumer reconcile its live view against ground truth without a
+        separate ``GET /messages`` round-trip.  Purely additive: clients that
+        ignore the field are unaffected.  Refs #34703.
+        """
+        agent_messages = result.get("messages") if isinstance(result, dict) else None
+        if not isinstance(agent_messages, list) or not agent_messages:
+            return []
+        start = cls._response_messages_turn_start_index(
+            conversation_history, user_message, result
+        )
+        turn = agent_messages[start:]
+        out: List[Dict[str, Any]] = []
+        for msg in turn:
+            if not isinstance(msg, dict):
+                continue
+            if msg.get("role") not in {"assistant", "tool"}:
+                continue
+            out.append(cls._message_response(msg))
+        return out
+
     @staticmethod
     def _extract_output_items(result: Dict[str, Any], start_index: int = 0) -> List[Dict[str, Any]]:
         """
@@ -4099,11 +4140,13 @@ class APIServerAdapter(BasePlatformAdapter):
             if hasattr(sweep_task, "add_done_callback"):
                 sweep_task.add_done_callback(self._background_tasks.discard)
 
-            # Refuse to start network-accessible without authentication
-            if is_network_accessible(self._host) and not self._api_key:
+            # Refuse to start without authentication. The API server can
+            # dispatch terminal-capable agent work, so every deployment needs
+            # an explicit API_SERVER_KEY regardless of bind address.
+            if not self._api_key:
                 logger.error(
-                    "[%s] Refusing to start: binding to %s requires API_SERVER_KEY. "
-                    "Set API_SERVER_KEY or use the default 127.0.0.1.",
+                    "[%s] Refusing to start: API_SERVER_KEY is required for the API server, "
+                    "including loopback-only binds on %s.",
                     self.name, self._host,
                 )
                 return False
@@ -4141,14 +4184,6 @@ class APIServerAdapter(BasePlatformAdapter):
             await self._site.start()
 
             self._mark_connected()
-            if not self._api_key:
-                logger.warning(
-                    "[%s] ⚠️  No API key configured (API_SERVER_KEY / platforms.api_server.key). "
-                    "All requests will be accepted without authentication. "
-                    "Set an API key for production deployments to prevent "
-                    "unauthorized access to sessions, responses, and cron jobs.",
-                    self.name,
-                )
             logger.info(
                 "[%s] API server listening on http://%s:%d (model: %s)",
                 self.name, self._host, self._port, self._model_name,
diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py
index d3960154688..6979a869148 100644
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -472,6 +472,7 @@ def is_host_excluded_by_no_proxy(hostname: str, no_proxy_value: str | None = Non
     return False
 
 
+import dataclasses
 from dataclasses import dataclass, field
 from datetime import datetime
 from pathlib import Path
@@ -829,6 +830,13 @@ _HERMES_HOME = get_hermes_home()
 MEDIA_DELIVERY_ALLOW_DIRS_ENV = "HERMES_MEDIA_ALLOW_DIRS"
 MEDIA_DELIVERY_TRUST_RECENT_ENV = "HERMES_MEDIA_TRUST_RECENT_FILES"
 MEDIA_DELIVERY_TRUST_RECENT_SECONDS_ENV = "HERMES_MEDIA_TRUST_RECENT_SECONDS"
+# Strict mode toggles the original allowlist+recency path-validation behavior.
+# Off by default — symmetric with inbound (we accept any document type the
+# user uploads), and with the denylist still blocking obvious credential /
+# system paths. Operators running public-facing gateways where prompt
+# injection from one user could exfiltrate the host's secrets to that same
+# user should set this to true.
+MEDIA_DELIVERY_STRICT_ENV = "HERMES_MEDIA_DELIVERY_STRICT"
 MEDIA_DELIVERY_SAFE_ROOTS = (
     IMAGE_CACHE_DIR,
     AUDIO_CACHE_DIR,
@@ -840,6 +848,13 @@ MEDIA_DELIVERY_SAFE_ROOTS = (
     _HERMES_HOME / "video_cache",
     _HERMES_HOME / "document_cache",
     _HERMES_HOME / "browser_screenshots",
+    # Canonical cache layout — listed alongside the legacy *_cache dirs so
+    # generated artifacts deliver on installs that have both (#31733).
+    _HERMES_HOME / "cache" / "images",
+    _HERMES_HOME / "cache" / "audio",
+    _HERMES_HOME / "cache" / "videos",
+    _HERMES_HOME / "cache" / "documents",
+    _HERMES_HOME / "cache" / "screenshots",
 )
 
 # Default recency window for trusting freshly-produced files (seconds).
@@ -918,6 +933,21 @@ def _media_delivery_recency_seconds() -> float:
     return float(_MEDIA_DELIVERY_TRUST_RECENT_DEFAULT_SECONDS)
 
 
+def _media_delivery_strict_mode() -> bool:
+    """Return True when path validation should require allowlist/recency match.
+
+    Off by default. In non-strict mode, ``validate_media_delivery_path``
+    accepts any existing regular file that isn't under the credential /
+    system-path denylist — restoring the pre-#29523 behavior for the
+    single-user case. Strict mode preserves the original
+    allowlist+recency-window logic for operators running public-facing
+    gateways where prompt injection from one user shouldn't be able to
+    exfiltrate the host's secrets to that same user.
+    """
+    raw = os.environ.get(MEDIA_DELIVERY_STRICT_ENV, "0").strip().lower()
+    return raw in ("1", "true", "yes", "on")
+
+
 def _media_delivery_denied_paths() -> List[Path]:
     """Return absolute denylist paths under which delivery is never allowed."""
     denied = [Path(p) for p in _MEDIA_DELIVERY_DENIED_PREFIXES]
@@ -972,10 +1002,22 @@ def _path_is_within(path: Path, root: Path) -> bool:
 def validate_media_delivery_path(path: str) -> Optional[str]:
     """Return a safe absolute file path for native media delivery, else None.
 
-    MEDIA tags and bare local paths in model output are untrusted text. Only
-    existing regular files under Hermes-managed media caches, or roots the
-    operator explicitly allowlists, may be uploaded as native attachments.
-    Symlinks are resolved before the containment check.
+    Default mode (single-user / private gateway): accept any existing regular
+    file that isn't under the credential / system-path denylist
+    (``_MEDIA_DELIVERY_DENIED_PREFIXES`` + ``~/.ssh``, ``~/.aws``, etc.).
+    This matches the symmetry of inbound delivery — Telegram/Discord/Slack
+    will hand the agent any file the user uploads, and the agent can hand
+    back any file that isn't a credential.
+
+    Strict mode (opt-in via ``gateway.strict`` in ``config.yaml`` or
+    ``HERMES_MEDIA_DELIVERY_STRICT=1``): the file MUST live under a
+    Hermes-managed cache, under an operator-allowlisted root
+    (``HERMES_MEDIA_ALLOW_DIRS``), or be freshly produced inside the
+    configured recency window. Suitable for public-facing bots where
+    prompt injection from one user shouldn't be able to exfiltrate the
+    host's secrets to that same user.
+
+    Symlinks are resolved before any containment / denylist check.
     """
     if not path:
         return None
@@ -987,7 +1029,11 @@ def validate_media_delivery_path(path: str) -> Optional[str]:
     if not candidate:
         return None
 
-    expanded = Path(os.path.expanduser(candidate))
+    try:
+        expanded = Path(os.path.expanduser(candidate))
+    except (OSError, RuntimeError, ValueError):
+        # expanduser raises ValueError("embedded null byte") for a ~\x00 path.
+        return None
     if not expanded.is_absolute():
         return None
 
@@ -999,6 +1045,8 @@ def validate_media_delivery_path(path: str) -> Optional[str]:
     if not resolved.is_file():
         return None
 
+    # Cache / operator allowlist is always honored — these are unconditionally
+    # trusted regardless of mode.
     for root in _media_delivery_allowed_roots():
         try:
             resolved_root = root.expanduser().resolve(strict=False)
@@ -1007,9 +1055,18 @@ def validate_media_delivery_path(path: str) -> Optional[str]:
         if _path_is_within(resolved, resolved_root):
             return str(resolved)
 
-    # Outside the cache/operator allowlist: fall back to recency-based trust
-    # for files the agent has just produced (e.g. ``pandoc -o /tmp/report.pdf``
-    # or ``write_file("/home/user/report.pdf", ...)``). System paths and
+    # Non-strict mode (default): accept anything not on the denylist.
+    # The denylist still blocks /etc, /proc, ~/.ssh, ~/.aws, ~/.hermes/.env,
+    # ~/.hermes/auth.json, etc. — so the obvious prompt-injection sites
+    # (``MEDIA:/etc/passwd``, ``MEDIA:~/.ssh/id_rsa``) remain rejected.
+    if not _media_delivery_strict_mode():
+        if _path_under_denied_prefix(resolved):
+            return None
+        return str(resolved)
+
+    # Strict mode: fall back to recency-based trust for freshly-produced
+    # files (e.g. ``pandoc -o /tmp/report.pdf`` or
+    # ``write_file("/home/user/report.pdf", ...)``). System paths and
     # credential locations remain blocked even when "recent" — see
     # ``_MEDIA_DELIVERY_DENIED_PREFIXES`` for the denylist.
     window = _media_delivery_recency_seconds()
@@ -1020,6 +1077,17 @@ def validate_media_delivery_path(path: str) -> Optional[str]:
     return None
 
 
+# Neutralise control chars and the Unicode line separators (NEL, LS, PS) that
+# str.splitlines() / log aggregators treat as breaks, so a model-emitted path
+# can't forge a second log line. Truncated to keep records bounded.
+_LOG_UNSAFE_CHARS = re.compile(r"[\x00-\x1f\x7f\x85\u2028\u2029]")
+
+
+def _log_safe_path(path: str) -> str:
+    """Return a single-line, length-bounded path for log output."""
+    return _LOG_UNSAFE_CHARS.sub("?", str(path))[:200]
+
+
 SUPPORTED_DOCUMENT_TYPES = {
     ".pdf": "application/pdf",
     ".md": "text/markdown",
@@ -1063,6 +1131,75 @@ SUPPORTED_IMAGE_DOCUMENT_TYPES = {
 }
 
 
+# ---------------------------------------------------------------------------
+# Media-delivery extension allowlist — SINGLE SOURCE OF TRUTH
+#
+# Both extractors that turn response text into native attachments derive their
+# extension set from this tuple:
+#   * ``extract_media()``       — explicit ``MEDIA:<path>`` tags
+#   * ``extract_local_files()`` — bare absolute/home paths the agent mentions
+#
+# Historically these two carried independently-maintained extension lists.
+# ``extract_media`` had a narrow list (no .md/.json/.yaml/.xml/.html/...) while
+# ``extract_local_files`` had a broad one. Combined with the unconditional
+# ``MEDIA:\\s*\\S+`` cleanup at the dispatch sites, that mismatch created a
+# silent black hole: a ``MEDIA:/report.md`` tag failed the narrow extract_media
+# match, got stripped from the body by the loose cleanup regex, and was then
+# invisible to extract_local_files — the file was never delivered (issue
+# #34517). Keeping one list eliminates the drift; building the cleanup regexes
+# from the same set means a tag is only stripped when its extension is one we
+# can actually deliver, so an unknown-extension path survives in the body
+# instead of vanishing.
+#
+# Covers images (inline), video (inline where supported), audio (voice/audio),
+# documents/spreadsheets/presentations (send_document), archives, and rendered
+# web output. The dispatch partition (image vs video vs document) lives in
+# ``gateway/run.py``.
+# ---------------------------------------------------------------------------
+
+MEDIA_DELIVERY_EXTS: Tuple[str, ...] = (
+    # Images (embed inline)
+    ".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".tiff", ".svg",
+    # Video (embed inline where supported)
+    ".mp4", ".mov", ".avi", ".mkv", ".webm",
+    # Audio (delivered as voice/audio where supported)
+    ".mp3", ".wav", ".ogg", ".opus", ".m4a", ".flac",
+    # Documents (uploaded as file attachments)
+    ".pdf", ".docx", ".doc", ".odt", ".rtf", ".txt", ".md", ".epub",
+    # Spreadsheets / data
+    ".xlsx", ".xls", ".ods", ".csv", ".tsv", ".json", ".xml", ".yaml", ".yml",
+    # Presentations
+    ".pptx", ".ppt", ".odp", ".key",
+    # Archives
+    ".zip", ".tar", ".gz", ".tgz", ".bz2", ".xz", ".7z", ".rar", ".apk", ".ipa",
+    # Web / rendered output
+    ".html", ".htm",
+)
+
+# Regex alternation fragment of bare extensions (no leading dot), e.g.
+# ``png|jpe?g|...``. ``jpe?g`` collapses jpg/jpeg into one branch. Sorted
+# longest-first so the alternation never matches a shorter ext as a prefix of
+# a longer one (e.g. ``.tar`` before ``.tar.gz`` components).
+_MEDIA_EXT_ALTERNATION = "|".join(
+    sorted((e.lstrip(".") for e in MEDIA_DELIVERY_EXTS), key=len, reverse=True)
+)
+
+# Anchored ``MEDIA:<path>`` cleanup pattern. Unlike the old loose
+# ``MEDIA:\\s*\\S+``, this only strips a tag whose path ends in a known
+# deliverable extension (optionally quoted/backticked). A ``MEDIA:`` tag with
+# an unknown extension is left in the text so it can still be picked up by the
+# bare-path detector (extract_local_files) downstream rather than silently
+# deleted. Shared by the non-streaming dispatch path and the streaming
+# consumer so both behave identically.
+MEDIA_TAG_CLEANUP_RE = re.compile(
+    r'''[`"']?MEDIA:\s*'''
+    r'''(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|'''
+    r'''(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:''' + _MEDIA_EXT_ALTERNATION + r'''))'''
+    r'''(?=[\s`"',;:)\]}]|$)[`"']?''',
+    re.IGNORECASE,
+)
+
+
 def get_document_cache_dir() -> Path:
     """Return the document cache directory, creating it if it doesn't exist."""
     DOCUMENT_CACHE_DIR.mkdir(parents=True, exist_ok=True)
@@ -1516,6 +1653,10 @@ class BasePlatformAdapter(ABC):
         self.config = config
         self.platform = platform
         self._message_handler: Optional[MessageHandler] = None
+        # Optional hook (e.g. Telegram DM topic recovery) that rewrites
+        # ``event.source.thread_id`` before session keying. Returns the
+        # corrected thread_id or None to leave the source untouched.
+        self._topic_recovery_fn: Optional[Callable[[Any], Optional[str]]] = None
         self._running = False
         self._fatal_error_code: Optional[str] = None
         self._fatal_error_message: Optional[str] = None
@@ -1583,6 +1724,29 @@ class BasePlatformAdapter(ABC):
         """
         return len
 
+    @property
+    def enforces_own_access_policy(self) -> bool:
+        """Whether this adapter gates inbound access before dispatch.
+
+        Some adapters (WeCom, Weixin, Yuanbao, QQBot) implement a documented
+        config-driven access surface — ``dm_policy`` / ``group_policy`` /
+        ``allow_from`` / ``group_allow_from`` in ``PlatformConfig.extra`` — and
+        enforce it at intake: a message is dropped inside the adapter and never
+        reaches the gateway unless it already passed that policy.
+
+        The gateway's env-based allowlist check runs *after* the adapter, so for
+        these platforms a message arriving at ``_is_user_authorized`` has, by
+        definition, already been authorized by the adapter. Without this flag the
+        gateway would then deny it again (no env allowlist → default deny),
+        silently breaking ``dm_policy: open`` and config-only allowlists.
+
+        Adapters that own their access policy override this to return ``True``.
+        The gateway treats that as "already authorized at intake" and skips the
+        env-allowlist default-deny. Adapters that delegate access control to the
+        gateway leave it ``False`` (the default).
+        """
+        return False
+
     def supports_draft_streaming(
         self,
         chat_type: Optional[str] = None,
@@ -1771,6 +1935,40 @@ class BasePlatformAdapter(ABC):
         """
         self._message_handler = handler
 
+    def set_topic_recovery_fn(
+        self,
+        fn: Optional[Callable[[Any], Optional[str]]],
+    ) -> None:
+        """Install a thread_id-recovery hook (Telegram DM topic mode).
+
+        The hook is called with ``event.source`` before session keying;
+        a non-None return value replaces ``source.thread_id``. Pass
+        ``None`` to clear the hook.
+        """
+        # Guard against subclasses that initialize via ``object.__new__`` in
+        # tests and never run ``BasePlatformAdapter.__init__``.
+        self._topic_recovery_fn = fn  # type: ignore[attr-defined]
+
+    def _apply_topic_recovery(self, event: MessageEvent) -> None:
+        """Rewrite ``event.source.thread_id`` in place if the hook returns one."""
+        recover = getattr(self, "_topic_recovery_fn", None)
+        if recover is None:
+            return
+        source = getattr(event, "source", None)
+        if source is None:
+            return
+        try:
+            recovered = recover(source)
+        except Exception:
+            logger.debug("topic recovery hook failed", exc_info=True)
+            return
+        if recovered is None or str(recovered) == str(source.thread_id or ""):
+            return
+        try:
+            event.source = dataclasses.replace(source, thread_id=str(recovered))
+        except Exception:
+            logger.debug("topic recovery rewrite failed", exc_info=True)
+
     def set_busy_session_handler(self, handler: Optional[Callable[[MessageEvent, str], Awaitable[bool]]]) -> None:
         """Set an optional handler for messages arriving during active sessions."""
         self._busy_session_handler = handler
@@ -2354,11 +2552,12 @@ class BasePlatformAdapter(ABC):
         """Drop unsafe MEDIA paths and normalize accepted paths."""
         safe_media: List[Tuple[str, bool]] = []
         for media_path, is_voice in media_files or []:
-            safe_path = validate_media_delivery_path(str(media_path))
+            raw = str(media_path)
+            safe_path = validate_media_delivery_path(raw)
             if safe_path:
                 safe_media.append((safe_path, bool(is_voice)))
             else:
-                logger.warning("Skipping unsafe MEDIA directive path outside allowed roots")
+                logger.warning("Skipping unsafe MEDIA directive path: %s", _log_safe_path(raw))
         return safe_media
 
     @staticmethod
@@ -2366,11 +2565,12 @@ class BasePlatformAdapter(ABC):
         """Drop unsafe bare local file paths and normalize accepted paths."""
         safe_paths: List[str] = []
         for file_path in file_paths or []:
-            safe_path = validate_media_delivery_path(str(file_path))
+            raw = str(file_path)
+            safe_path = validate_media_delivery_path(raw)
             if safe_path:
                 safe_paths.append(safe_path)
             else:
-                logger.warning("Skipping unsafe local file path outside allowed roots")
+                logger.warning("Skipping unsafe local file path: %s", _log_safe_path(raw))
         return safe_paths
 
     @staticmethod
@@ -2411,17 +2611,22 @@ class BasePlatformAdapter(ABC):
         cleaned = cleaned.replace("[[as_document]]", "")
         
         # Extract MEDIA:<path> tags, allowing optional whitespace after the colon
-        # and quoted/backticked paths for LLM-formatted outputs.
-        media_pattern = re.compile(
-            r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|flac|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|txt|csv|apk|ipa)(?=[\s`"',;:)\]}]|$))[`"']?'''
-        )
+        # and quoted/backticked paths for LLM-formatted outputs. The extension
+        # set is the shared MEDIA_DELIVERY_EXTS source of truth (built once into
+        # MEDIA_TAG_CLEANUP_RE) so it can never drift from extract_local_files.
+        media_pattern = MEDIA_TAG_CLEANUP_RE
         for match in media_pattern.finditer(content):
             path = match.group("path").strip()
             if len(path) >= 2 and path[0] == path[-1] and path[0] in "`\"'":
                 path = path[1:-1].strip()
             path = path.lstrip("`\"'").rstrip("`\"',.;:)}]")
             if path:
-                media.append((os.path.expanduser(path), has_voice_tag))
+                try:
+                    media.append((os.path.expanduser(path), has_voice_tag))
+                except (OSError, RuntimeError, ValueError):
+                    # Skip a crafted ~\x00 path rather than aborting extraction
+                    # and dropping every other attachment in the response.
+                    continue
 
         # Remove MEDIA tags from content (including surrounding quote/backtick wrappers)
         if media:
@@ -2455,24 +2660,7 @@ class BasePlatformAdapter(ABC):
             Tuple of (list of expanded file paths, cleaned text with the
             raw path strings removed).
         """
-        _LOCAL_MEDIA_EXTS = (
-            # Images (embed inline)
-            '.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp', '.tiff', '.svg',
-            # Video (embed inline where supported)
-            '.mp4', '.mov', '.avi', '.mkv', '.webm',
-            # Audio (delivered as voice/audio where supported)
-            '.mp3', '.wav', '.ogg', '.m4a', '.flac',
-            # Documents (uploaded as file attachments)
-            '.pdf', '.docx', '.doc', '.odt', '.rtf', '.txt', '.md',
-            # Spreadsheets / data
-            '.xlsx', '.xls', '.ods', '.csv', '.tsv', '.json', '.xml', '.yaml', '.yml',
-            # Presentations
-            '.pptx', '.ppt', '.odp', '.key',
-            # Archives
-            '.zip', '.tar', '.gz', '.tgz', '.bz2', '.xz', '.7z', '.rar',
-            # Web / rendered output
-            '.html', '.htm',
-        )
+        _LOCAL_MEDIA_EXTS = MEDIA_DELIVERY_EXTS
         ext_part = '|'.join(e.lstrip('.') for e in _LOCAL_MEDIA_EXTS)
 
         # (?<![/:\w.]) prevents matching inside URLs (e.g. https://…/img.png)
@@ -3287,7 +3475,12 @@ class BasePlatformAdapter(ABC):
             return
 
         coerce_plaintext_gateway_command(event)
-        
+
+        # Rewrite ``event.source.thread_id`` via the installed recovery hook
+        # (Telegram DM topic mode) so the session key, guard checks, and
+        # downstream delivery all agree on the same lane.
+        self._apply_topic_recovery(event)
+
         session_key = build_session_key(
             event.source,
             group_sessions_per_user=self.config.extra.get("group_sessions_per_user", True),
@@ -3588,7 +3781,12 @@ class BasePlatformAdapter(ABC):
                 # Strip any remaining internal directives from message body (fixes #1561)
                 text_content = text_content.replace("[[audio_as_voice]]", "").strip()
                 text_content = text_content.replace("[[as_document]]", "").strip()
-                text_content = re.sub(r"MEDIA:\s*\S+", "", text_content).strip()
+                # Strip only MEDIA: tags whose path has a deliverable extension
+                # (shared MEDIA_TAG_CLEANUP_RE). A MEDIA: tag with an unknown
+                # extension is intentionally left in the body so extract_local_files
+                # below can still pick up the bare path — otherwise the file would
+                # be silently dropped (issue #34517).
+                text_content = MEDIA_TAG_CLEANUP_RE.sub("", text_content).strip()
                 if images:
                     logger.info("[%s] extract_images found %d image(s) in response (%d chars)", self.name, len(images), len(response))
 
diff --git a/gateway/platforms/feishu.py b/gateway/platforms/feishu.py
index 2831476b5ba..10ddbb17d21 100644
--- a/gateway/platforms/feishu.py
+++ b/gateway/platforms/feishu.py
@@ -48,6 +48,7 @@ user is seen through different apps in the future.
 from __future__ import annotations
 
 import asyncio
+import collections
 import hashlib
 import hmac
 import itertools
@@ -1408,6 +1409,8 @@ class FeishuAdapter(BasePlatformAdapter):
     """Feishu/Lark bot adapter."""
 
     MAX_MESSAGE_LENGTH = 8000
+    # Max distinct chat IDs retained in _chat_locks before LRU eviction kicks in.
+    CHAT_LOCK_MAX_SIZE: int = 1000
     # Threshold for detecting Feishu client-side message splits.
     # When a chunk is near the ~4096-char practical limit, a continuation
     # is almost certain.
@@ -1445,7 +1448,7 @@ class FeishuAdapter(BasePlatformAdapter):
         self._pending_inbound_lock = threading.Lock()
         self._pending_drain_scheduled = False
         self._pending_inbound_max_depth = 1000  # cap queue; drop oldest beyond
-        self._chat_locks: Dict[str, asyncio.Lock] = {}  # chat_id → lock (per-chat serial processing)
+        self._chat_locks: "collections.OrderedDict[str, asyncio.Lock]" = collections.OrderedDict()  # chat_id → lock (per-chat serial processing, LRU-bounded)
         self._sent_message_ids_to_chat: Dict[str, str] = {}  # message_id → chat_id (for reaction routing)
         self._sent_message_id_order: List[str] = []  # LRU order for _sent_message_ids_to_chat
         self._chat_info_cache: Dict[str, Dict[str, Any]] = {}
@@ -2835,11 +2838,28 @@ class FeishuAdapter(BasePlatformAdapter):
     # =========================================================================
 
     def _get_chat_lock(self, chat_id: str) -> asyncio.Lock:
-        """Return (creating if needed) the per-chat asyncio.Lock for serial message processing."""
+        """Return (creating if needed) the per-chat asyncio.Lock for serial message processing.
+
+        Bounded with LRU eviction so a long-running gateway that sees many
+        distinct chats does not grow ``_chat_locks`` without limit. Locks that
+        are currently held are never evicted; if every entry is locked we fall
+        back to dropping the least-recently-used one.
+        """
         lock = self._chat_locks.get(chat_id)
-        if lock is None:
-            lock = asyncio.Lock()
-            self._chat_locks[chat_id] = lock
+        if lock is not None:
+            self._chat_locks.move_to_end(chat_id)
+            return lock
+        if len(self._chat_locks) >= self.CHAT_LOCK_MAX_SIZE:
+            evicted = False
+            for key in list(self._chat_locks):
+                if not self._chat_locks[key].locked():
+                    self._chat_locks.pop(key)
+                    evicted = True
+                    break
+            if not evicted:
+                self._chat_locks.pop(next(iter(self._chat_locks)))
+        lock = asyncio.Lock()
+        self._chat_locks[chat_id] = lock
         return lock
 
     async def _handle_message_with_guards(self, event: MessageEvent) -> None:
diff --git a/gateway/platforms/matrix.py b/gateway/platforms/matrix.py
index f7837a1f7d6..5c1cb9a182e 100644
--- a/gateway/platforms/matrix.py
+++ b/gateway/platforms/matrix.py
@@ -2236,7 +2236,8 @@ class MatrixAdapter(BasePlatformAdapter):
             if prompt and not prompt.resolved:
                 if room_id != prompt.chat_id:
                     return
-                if self._allowed_user_ids and sender not in self._allowed_user_ids:
+                _allow_all = os.getenv("GATEWAY_ALLOW_ALL_USERS", "").lower() in {"true", "1", "yes"}
+                if not _allow_all and not (self._allowed_user_ids and sender in self._allowed_user_ids):
                     logger.info(
                         "Matrix: ignoring approval reaction from unauthorized user %s on %s",
                         sender, reacts_to,
diff --git a/gateway/platforms/msgraph_webhook.py b/gateway/platforms/msgraph_webhook.py
index b7045c801a6..d1d48996d73 100644
--- a/gateway/platforms/msgraph_webhook.py
+++ b/gateway/platforms/msgraph_webhook.py
@@ -25,6 +25,7 @@ from gateway.platforms.base import (
     MessageEvent,
     MessageType,
     SendResult,
+    is_network_accessible,
 )
 
 logger = logging.getLogger(__name__)
@@ -132,12 +133,24 @@ class MSGraphWebhookAdapter(BasePlatformAdapter):
     def set_notification_scheduler(self, scheduler: Optional[NotificationScheduler]) -> None:
         self._notification_scheduler = scheduler
 
+    def _source_allowlist_required_but_missing(self) -> bool:
+        return is_network_accessible(self._host) and not self._allowed_source_networks
+
     async def connect(self) -> bool:
         if self._client_state is None:
             logger.error(
                 "[msgraph_webhook] Refusing to start without extra.client_state configured"
             )
             return False
+        if self._source_allowlist_required_but_missing():
+            logger.error(
+                "[msgraph_webhook] Refusing to start: binding to %s requires "
+                "extra.allowed_source_cidrs. Configure the Microsoft Graph "
+                "source CIDRs or bind to loopback (127.0.0.1/::1) behind a "
+                "tunnel or reverse proxy.",
+                self._host,
+            )
+            return False
 
         app = web.Application()
         app.router.add_get(self._health_path, self._handle_health)
@@ -177,6 +190,8 @@ class MSGraphWebhookAdapter(BasePlatformAdapter):
         return {"name": chat_id, "type": "webhook"}
 
     async def _handle_health(self, request: "web.Request") -> "web.Response":
+        if not self._source_ip_allowed(request):
+            return web.Response(status=403)
         return web.json_response(
             {
                 "status": "ok",
@@ -271,9 +286,12 @@ class MSGraphWebhookAdapter(BasePlatformAdapter):
     def _source_ip_allowed(self, request: "web.Request") -> bool:
         """Return True if the request's source IP is in the configured allowlist.
 
-        When ``allowed_source_cidrs`` is empty (the default), everything is
-        allowed — preserves behavior for dev tunnels / localhost setups.
+        Loopback-only binds may omit ``allowed_source_cidrs`` for local reverse
+        proxies and dev tunnels. Network-accessible binds fail closed until an
+        explicit CIDR allowlist is configured.
         """
+        if self._source_allowlist_required_but_missing():
+            return False
         if not self._allowed_source_networks:
             return True
         peer = request.remote or ""
diff --git a/gateway/platforms/qqbot/adapter.py b/gateway/platforms/qqbot/adapter.py
index 7569884760e..5b4a396ed2f 100644
--- a/gateway/platforms/qqbot/adapter.py
+++ b/gateway/platforms/qqbot/adapter.py
@@ -126,7 +126,6 @@ from gateway.platforms.qqbot.chunked_upload import (
 )
 from gateway.platforms.qqbot.keyboards import (
     ApprovalRequest,
-    ApprovalSender,
     InlineKeyboard,
     InteractionEvent,
     build_approval_keyboard,
@@ -270,6 +269,11 @@ class QQAdapter(BasePlatformAdapter):
     def name(self) -> str:
         return "QQBot"
 
+    @property
+    def enforces_own_access_policy(self) -> bool:
+        """QQBot gates DM/group access at intake via dm_policy/group_policy."""
+        return True
+
     # ------------------------------------------------------------------
     # Connection lifecycle
     # ------------------------------------------------------------------
diff --git a/gateway/platforms/qqbot/chunked_upload.py b/gateway/platforms/qqbot/chunked_upload.py
index 416dfc52a98..6979bd4cb7c 100644
--- a/gateway/platforms/qqbot/chunked_upload.py
+++ b/gateway/platforms/qqbot/chunked_upload.py
@@ -37,7 +37,7 @@ import asyncio
 import functools
 import hashlib
 import logging
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from pathlib import Path
 from typing import Any, Awaitable, Callable, Dict, List, Optional
 
diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py
index 16977bbcff1..afb5726f330 100644
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -1690,7 +1690,6 @@ class TelegramAdapter(BasePlatformAdapter):
                     BotCommandScopeAllPrivateChats,
                     BotCommandScopeAllGroupChats,
                     BotCommandScopeDefault,
-                    BotCommandScopeChat,
                 )
                 from hermes_cli.commands import telegram_menu_commands
                 # Telegram allows up to 100 commands but has an undocumented
@@ -5027,8 +5026,14 @@ class TelegramAdapter(BasePlatformAdapter):
     # ------------------------------------------------------------------
 
     def _text_batch_key(self, event: MessageEvent) -> str:
-        """Session-scoped key for text message batching."""
+        """Session-scoped key for text message batching.
+
+        Applies the installed topic-recovery hook first so DM-topic batches
+        coalesce on (and dispatch to) the recovered lane rather than the
+        raw inbound ``message_thread_id`` Telegram may have attached.
+        """
         from gateway.session import build_session_key
+        self._apply_topic_recovery(event)
         return build_session_key(
             event.source,
             group_sessions_per_user=self.config.extra.get("group_sessions_per_user", True),
diff --git a/gateway/platforms/wecom.py b/gateway/platforms/wecom.py
index 1569d5faf52..c1175643019 100644
--- a/gateway/platforms/wecom.py
+++ b/gateway/platforms/wecom.py
@@ -847,6 +847,11 @@ class WeComAdapter(BasePlatformAdapter):
     # Policy helpers
     # ------------------------------------------------------------------
 
+    @property
+    def enforces_own_access_policy(self) -> bool:
+        """WeCom gates DM/group access at intake via dm_policy/group_policy."""
+        return True
+
     def _is_dm_allowed(self, sender_id: str) -> bool:
         if self._dm_policy == "disabled":
             return False
diff --git a/gateway/platforms/weixin.py b/gateway/platforms/weixin.py
index 613c8283b1c..025bf052cce 100644
--- a/gateway/platforms/weixin.py
+++ b/gateway/platforms/weixin.py
@@ -658,52 +658,6 @@ def _split_table_row(line: str) -> List[str]:
     return [cell.strip() for cell in row.split("|")]
 
 
-def _rewrite_headers_for_weixin(line: str) -> str:
-    match = _HEADER_RE.match(line)
-    if not match:
-        return line.rstrip()
-    level = len(match.group(1))
-    title = match.group(2).strip()
-    if level == 1:
-        return f"【{title}】"
-    return f"**{title}**"
-
-
-def _rewrite_table_block_for_weixin(lines: List[str]) -> str:
-    if len(lines) < 2:
-        return "\n".join(lines)
-    headers = _split_table_row(lines[0])
-    body_rows = [_split_table_row(line) for line in lines[2:] if line.strip()]
-    if not headers or not body_rows:
-        return "\n".join(lines)
-
-    formatted_rows: List[str] = []
-    for row in body_rows:
-        pairs = []
-        for idx, header in enumerate(headers):
-            if idx >= len(row):
-                break
-            label = header or f"Column {idx + 1}"
-            value = row[idx].strip()
-            if value:
-                pairs.append((label, value))
-        if not pairs:
-            continue
-        if len(pairs) == 1:
-            label, value = pairs[0]
-            formatted_rows.append(f"- {label}: {value}")
-            continue
-        if len(pairs) == 2:
-            label, value = pairs[0]
-            other_label, other_value = pairs[1]
-            formatted_rows.append(f"- {label}: {value}")
-            formatted_rows.append(f"  {other_label}: {other_value}")
-            continue
-        summary = " | ".join(f"{label}: {value}" for label, value in pairs)
-        formatted_rows.append(f"- {summary}")
-    return "\n".join(formatted_rows) if formatted_rows else "\n".join(lines)
-
-
 def _normalize_markdown_blocks(content: str) -> str:
     lines = content.splitlines()
     result: List[str] = []
@@ -1443,6 +1397,11 @@ class WeixinAdapter(BasePlatformAdapter):
         logger.info("[%s] inbound from=%s type=%s media=%d", self.name, _safe_id(sender_id), source.chat_type, len(media_paths))
         await self.handle_message(event)
 
+    @property
+    def enforces_own_access_policy(self) -> bool:
+        """Weixin gates DM/group access at intake via dm_policy/group_policy."""
+        return True
+
     def _is_dm_allowed(self, sender_id: str) -> bool:
         if self._dm_policy == "disabled":
             return False
diff --git a/gateway/platforms/yuanbao.py b/gateway/platforms/yuanbao.py
index 18d0787c978..6dc54dbcd50 100644
--- a/gateway/platforms/yuanbao.py
+++ b/gateway/platforms/yuanbao.py
@@ -2230,6 +2230,45 @@ class MediaResolveMiddleware(InboundMiddleware):
 
     name = "media-resolve"
 
+    # --- Resource download cache (keyed by resourceId) ---
+    # Avoids redundant downloads of the same resource within the TTL window.
+    # The same resourceId can be referenced multiple times in a session (own
+    # attachment, then quoted again, then observed in a group backfill); each
+    # reference otherwise triggers a fresh token exchange + download.
+    _resource_cache: ClassVar[Dict[str, Tuple[str, str, float]]] = {}  # rid -> (local_path, mime, ts)
+    _RESOURCE_CACHE_TTL_S: ClassVar[int] = 24 * 60 * 60  # 24 hours
+    _RESOURCE_CACHE_MAX_SIZE: ClassVar[int] = 256
+
+    @classmethod
+    def _get_cached_resource(cls, resource_id: str) -> Optional[Tuple[str, str]]:
+        """Return cached ``(local_path, mime)`` if still valid and file exists, else None."""
+        if not resource_id:
+            return None
+        entry = cls._resource_cache.get(resource_id)
+        if entry is None:
+            return None
+        local_path, mime, ts = entry
+        if time.time() - ts > cls._RESOURCE_CACHE_TTL_S:
+            cls._resource_cache.pop(resource_id, None)
+            return None
+        # Verify the cached file still exists on disk (cache dir may be swept).
+        if not os.path.isfile(local_path):
+            cls._resource_cache.pop(resource_id, None)
+            return None
+        return local_path, mime
+
+    @classmethod
+    def _put_cached_resource(cls, resource_id: str, local_path: str, mime: str) -> None:
+        """Store download result in cache. Evicts oldest entries when over capacity."""
+        if not resource_id:
+            return
+        if len(cls._resource_cache) >= cls._RESOURCE_CACHE_MAX_SIZE:
+            # Drop the oldest 25% of entries by timestamp.
+            sorted_keys = sorted(cls._resource_cache, key=lambda k: cls._resource_cache[k][2])
+            for k in sorted_keys[: cls._RESOURCE_CACHE_MAX_SIZE // 4]:
+                cls._resource_cache.pop(k, None)
+        cls._resource_cache[resource_id] = (local_path, mime, time.time())
+
     @staticmethod
     def _guess_image_ext_from_url(url: str) -> str:
         """Guess image extension from URL path."""
@@ -2327,8 +2366,23 @@ class MediaResolveMiddleware(InboundMiddleware):
     async def _download_and_cache(
         cls, adapter, *, fetch_url: str, kind: str,
         file_name: Optional[str] = None, log_tag: str = "",
+        resource_id: str = "",
     ) -> Optional[Tuple[str, str]]:
-        """Download a Yuanbao resource and cache locally. Returns ``(local_path, mime)`` or ``None``."""
+        """Download a Yuanbao resource and cache locally. Returns ``(local_path, mime)`` or ``None``.
+
+        When *resource_id* is provided, an in-memory cache keyed by resourceId
+        is consulted first to skip redundant downloads of the same resource
+        within the TTL window.
+        """
+        if resource_id:
+            hit = cls._get_cached_resource(resource_id)
+            if hit is not None:
+                logger.debug(
+                    "[%s] resource cache hit: rid=%s path=%s",
+                    adapter.name, resource_id, hit[0],
+                )
+                return hit
+
         try:
             file_bytes, content_type = await media_download_url(
                 fetch_url, max_size_mb=adapter.MEDIA_MAX_SIZE_MB,
@@ -2353,6 +2407,7 @@ class MediaResolveMiddleware(InboundMiddleware):
             mime = guess_mime_type(f"image{ext}")
             if not mime.startswith("image/"):
                 mime = content_type if content_type.startswith("image/") else "image/jpeg"
+            cls._put_cached_resource(resource_id, local_path, mime)
             return local_path, mime
 
         # kind == "file"
@@ -2368,6 +2423,7 @@ class MediaResolveMiddleware(InboundMiddleware):
             )
             return None
         mime = guess_mime_type(file_name) or content_type or "application/octet-stream"
+        cls._put_cached_resource(resource_id, local_path, mime)
         return local_path, mime
 
     @classmethod
@@ -2393,6 +2449,9 @@ class MediaResolveMiddleware(InboundMiddleware):
             if kind not in _RESOLVABLE_MEDIA_KINDS or not url:
                 continue
 
+            # Extract resourceId from the placeholder URL for cache dedup.
+            rid = ExtractContentMiddleware._parse_resource_id(url)
+
             try:
                 fetch_url = await cls._resolve_download_url(adapter, url)
             except Exception as exc:
@@ -2408,6 +2467,7 @@ class MediaResolveMiddleware(InboundMiddleware):
                 kind=kind,
                 file_name=str(ref.get("name") or "").strip() or None,
                 log_tag=f"placeholder_url={url[:80]}",
+                resource_id=rid,
             )
             if cached is None:
                 continue
@@ -2480,6 +2540,7 @@ class MediaResolveMiddleware(InboundMiddleware):
                 kind=kind,
                 file_name=filename or None,
                 log_tag=f"rid={rid}",
+                resource_id=rid,
             )
             if cached is None:
                 continue
@@ -2563,6 +2624,7 @@ class DispatchMiddleware(InboundMiddleware):
                         kind=kind,
                         file_name=filename or None,
                         log_tag=f"quote rid={rid}",
+                        resource_id=rid,
                     )
                     if cached is None:
                         continue
@@ -4629,6 +4691,11 @@ class YuanbaoAdapter(BasePlatformAdapter):
     # Abstract method implementations
     # ------------------------------------------------------------------
 
+    @property
+    def enforces_own_access_policy(self) -> bool:
+        """Yuanbao gates DM/group access at intake via dm_policy/group_policy."""
+        return True
+
     async def connect(self) -> bool:
         """Connect to Yuanbao WS gateway and authenticate.
 
diff --git a/gateway/run.py b/gateway/run.py
index 71b5e85f677..570ccf7e31b 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -751,7 +751,7 @@ _hermes_home = get_hermes_home()
 
 # Load environment variables from ~/.hermes/.env first.
 # User-managed env files should override stale shell exports on restart.
-from dotenv import load_dotenv  # backward-compat for tests that monkeypatch this symbol
+from dotenv import load_dotenv  # noqa: F401  # backward-compat for tests that monkeypatch this symbol
 from hermes_cli.env_loader import load_hermes_dotenv
 _env_path = _hermes_home / '.env'
 load_hermes_dotenv(hermes_home=_hermes_home, project_env=Path(__file__).resolve().parents[1] / '.env')
@@ -831,6 +831,8 @@ if _config_path.exists():
                 "docker_env": "TERMINAL_DOCKER_ENV",
                 "docker_mount_cwd_to_workspace": "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE",
                 "docker_run_as_host_user": "TERMINAL_DOCKER_RUN_AS_HOST_USER",
+                "docker_persist_across_processes": "TERMINAL_DOCKER_PERSIST_ACROSS_PROCESSES",
+                "docker_orphan_reaper": "TERMINAL_DOCKER_ORPHAN_REAPER",
                 "sandbox_dir": "TERMINAL_SANDBOX_DIR",
                 "persistent_shell": "TERMINAL_PERSISTENT_SHELL",
             }
@@ -932,9 +934,14 @@ if _config_path.exists():
             _redact = _security_cfg.get("redact_secrets")
             if _redact is not None:
                 os.environ["HERMES_REDACT_SECRETS"] = str(_redact).lower()
-        # Gateway settings (media delivery allowlist + recency trust)
+        # Gateway settings (media delivery allowlist + recency trust + strict mode)
         _gateway_cfg = _cfg.get("gateway", {})
         if isinstance(_gateway_cfg, dict):
+            _strict = _gateway_cfg.get("strict")
+            if _strict is not None:
+                os.environ["HERMES_MEDIA_DELIVERY_STRICT"] = (
+                    "1" if _strict else "0"
+                )
             _allow_dirs = _gateway_cfg.get("media_delivery_allow_dirs")
             if _allow_dirs:
                 if isinstance(_allow_dirs, str):
@@ -1800,7 +1807,34 @@ class GatewayRunner:
             ensure_installed(log_failures=False)
         except Exception:
             pass  # Non-fatal — fail-open at scan time if unavailable
-        
+
+        # Startup heads-up (#30882): a gateway in manual approval mode with no
+        # automated risk assessor (tirith disabled AND no auxiliary.approval
+        # model) can only gate dangerous commands / execute_code scripts via
+        # live in-chat approval. With approval routing fixed, those actions now
+        # fail closed (block) rather than silently auto-running — surface that
+        # so operators knowingly enable tirith or configure auxiliary.approval
+        # for unattended gateways.
+        try:
+            from hermes_cli.config import load_config as _load_full_config
+            _appr_cfg = _load_full_config()
+            _appr_mode = str(
+                cfg_get(_appr_cfg, "approvals", "mode", default="manual") or "manual"
+            ).strip().lower()
+            _tirith_on = bool(cfg_get(_appr_cfg, "security", "tirith_enabled", default=True))
+            _aux_approval = cfg_get(_appr_cfg, "auxiliary", "approval", default=None)
+            if _appr_mode == "manual" and not _tirith_on and not _aux_approval:
+                logger.warning(
+                    "Gateway approvals.mode=manual with no automated risk "
+                    "assessor (security.tirith_enabled is false and "
+                    "auxiliary.approval is unset): dangerous commands and "
+                    "execute_code scripts will BLOCK until a human approves "
+                    "them in chat. Enable security.tirith_enabled or configure "
+                    "auxiliary.approval for unattended operation."
+                )
+        except Exception:
+            logger.debug("approvals.mode startup check skipped", exc_info=True)
+
         # Initialize session database for session_search tool support
         self._session_db = None
         try:
@@ -2296,6 +2330,32 @@ class GatewayRunner:
             session_id=session_entry.session_id,
         )
 
+    def _sync_telegram_topic_binding(
+        self,
+        source: SessionSource,
+        session_entry,
+        *,
+        reason: str,
+    ) -> None:
+        """Update the topic binding to point at ``session_entry.session_id``.
+
+        Telegram topic lanes persist a (chat_id, thread_id) -> session_id row
+        so reopening a topic in a fresh process resumes the right Hermes
+        session. When compression rotates ``session_entry.session_id`` mid-turn,
+        the binding goes stale and the next inbound message in that topic
+        reloads the oversized parent transcript instead of the compressed
+        child, retriggering preflight compression — sometimes in a loop
+        (#20470, #29712, #33414).
+        """
+        if not self._is_telegram_topic_lane(source):
+            return
+        try:
+            self._record_telegram_topic_binding(source, session_entry)
+        except Exception:
+            logger.debug(
+                "telegram topic binding refresh failed (%s)", reason, exc_info=True,
+            )
+
     def _recover_telegram_topic_thread_id(
         self,
         source: SessionSource,
@@ -4152,6 +4212,7 @@ class GatewayRunner:
             adapter.set_fatal_error_handler(self._handle_adapter_fatal_error)
             adapter.set_session_store(self.session_store)
             adapter.set_busy_session_handler(self._handle_active_session_busy_message)
+            adapter.set_topic_recovery_fn(self._recover_telegram_topic_thread_id)
             adapter._busy_text_mode = self._busy_text_mode
             
             # Try to connect
@@ -5413,6 +5474,49 @@ class GatewayRunner:
             )
             stale_timeout_seconds = 0
 
+        # Read kanban.default_assignee — fallback profile for tasks
+        # created without an explicit assignee (e.g. via the dashboard).
+        # When set, the dispatcher applies it to unassigned ready tasks
+        # instead of skipping them indefinitely (#27145). Empty string
+        # (the schema default) means "no fallback, keep skipping" —
+        # backward-compatible with existing installs.
+        default_assignee = (kanban_cfg.get("default_assignee") or "").strip() or None
+        if default_assignee:
+            logger.info(
+                "kanban dispatcher: default_assignee=%r (unassigned ready tasks "
+                "will route to this profile)",
+                default_assignee,
+            )
+
+        # Read kanban.max_in_progress_per_profile — per-profile concurrency
+        # cap (#21582). When set, no single profile gets more than N
+        # workers running at once, even if the global max_in_progress
+        # would allow it. Prevents one profile's local model / API quota
+        # / browser pool from being overwhelmed by a fan-out.
+        raw_per_profile = kanban_cfg.get("max_in_progress_per_profile", None)
+        max_in_progress_per_profile = None
+        if raw_per_profile is not None:
+            try:
+                max_in_progress_per_profile = int(raw_per_profile)
+            except (TypeError, ValueError):
+                logger.warning(
+                    "kanban dispatcher: invalid kanban.max_in_progress_per_profile=%r; ignoring",
+                    raw_per_profile,
+                )
+                max_in_progress_per_profile = None
+            else:
+                if max_in_progress_per_profile < 1:
+                    logger.warning(
+                        "kanban dispatcher: kanban.max_in_progress_per_profile=%r is below 1; ignoring",
+                        raw_per_profile,
+                    )
+                    max_in_progress_per_profile = None
+                else:
+                    logger.info(
+                        "kanban dispatcher: max_in_progress_per_profile=%d",
+                        max_in_progress_per_profile,
+                    )
+
         # Initial delay so the gateway finishes wiring adapters before the
         # dispatcher spawns workers (those workers may hit gateway notify
         # subscriptions etc.). Matches the notifier watcher's delay.
@@ -5504,6 +5608,8 @@ class GatewayRunner:
                     max_in_progress=max_in_progress,
                     failure_limit=failure_limit,
                     stale_timeout_seconds=stale_timeout_seconds,
+                    default_assignee=default_assignee,
+                    max_in_progress_per_profile=max_in_progress_per_profile,
                 )
             except sqlite3.DatabaseError as exc:
                 if _is_corrupt_board_db_error(exc):
@@ -5812,6 +5918,7 @@ class GatewayRunner:
                     adapter.set_fatal_error_handler(self._handle_adapter_fatal_error)
                     adapter.set_session_store(self.session_store)
                     adapter.set_busy_session_handler(self._handle_active_session_busy_message)
+                    adapter.set_topic_recovery_fn(self._recover_telegram_topic_thread_id)
                     adapter._busy_text_mode = self._busy_text_mode
 
                     success = await self._connect_adapter_with_timeout(adapter, platform)
@@ -6435,6 +6542,31 @@ class GatewayRunner:
             return YuanbaoAdapter(config)
 
         return None
+
+    def _adapter_enforces_own_access_policy(self, platform: Optional[Platform]) -> bool:
+        """Whether the adapter for *platform* gates access at intake itself.
+
+        Mirrors ``BasePlatformAdapter.enforces_own_access_policy``. Adapters
+        such as WeCom, Weixin, Yuanbao, and QQBot evaluate their documented
+        ``dm_policy`` / ``group_policy`` / ``allow_from`` config before a
+        message is dispatched to the gateway, so a message that reaches
+        ``_is_user_authorized`` has already been authorized by the adapter.
+        Defaults to ``False`` when the adapter is unknown or doesn't expose
+        the flag.
+        """
+        if not platform:
+            return False
+        # Some test helpers build a bare GatewayRunner via object.__new__ and
+        # never set ``adapters``; treat a missing/empty map as "no adapter"
+        # rather than raising (see pitfalls.md #17).
+        adapters = getattr(self, "adapters", None)
+        if not adapters:
+            return False
+        adapter = adapters.get(platform)
+        if adapter is None:
+            return False
+        return bool(getattr(adapter, "enforces_own_access_policy", False))
+
     def _is_user_authorized(self, source: SessionSource) -> bool:
         """
         Check if a user is authorized to use the bot.
@@ -6574,6 +6706,15 @@ class GatewayRunner:
         global_allowlist = os.getenv("GATEWAY_ALLOWED_USERS", "").strip()
 
         if not platform_allowlist and not group_user_allowlist and not group_chat_allowlist and not global_allowlist:
+            # No env allowlists configured. Adapters that own their own
+            # config-driven access policy (dm_policy / group_policy /
+            # allow_from / group_allow_from) already gated this message at
+            # intake — it would not have reached the gateway otherwise — so
+            # honor that decision instead of falling through to the
+            # env-only default-deny below, which would silently break
+            # `dm_policy: open` and config-only allowlists. (#34515)
+            if self._adapter_enforces_own_access_policy(source.platform):
+                return True
             # No allowlists configured -- check global allow-all flag
             return os.getenv("GATEWAY_ALLOW_ALL_USERS", "").lower() in {"true", "1", "yes"}
 
@@ -6681,6 +6822,20 @@ class GatewayRunner:
             if config.unauthorized_dm_behavior != "pair":  # non-default → explicit override
                 return config.unauthorized_dm_behavior
 
+        # Config-driven dm_policy (WeCom / Weixin / Yuanbao / QQBot). An
+        # allowlist or disabled DM policy means the operator restricted access,
+        # so unauthorized DMs should be dropped silently rather than answered
+        # with a pairing code. An explicit pairing policy opts back into codes.
+        if platform and config and hasattr(config, "platforms"):
+            platform_cfg = config.platforms.get(platform)
+            extra = getattr(platform_cfg, "extra", None) if platform_cfg else None
+            if isinstance(extra, dict):
+                dm_policy = str(extra.get("dm_policy") or "").strip().lower()
+                if dm_policy == "pairing":
+                    return "pair"
+                if dm_policy in {"allowlist", "disabled"}:
+                    return "ignore"
+
         # No explicit override.  Fall back to allowlist-aware default:
         # if any allowlist is configured for this platform, silently drop
         # unauthorized messages instead of sending pairing codes.
@@ -8017,7 +8172,8 @@ class GatewayRunner:
                                 "🎤 I received your voice message but can't transcribe it — "
                                 "no speech-to-text provider is configured.\n\n"
                                 "To enable voice: install faster-whisper "
-                                "(`pip install faster-whisper` in the Hermes venv) "
+                                "(`uv pip install faster-whisper` in the Hermes venv; "
+                                "`pip install faster-whisper` also works if pip is on PATH) "
                                 "and set `stt.enabled: true` in config.yaml, "
                                 "then /restart the gateway."
                             )
@@ -8224,6 +8380,28 @@ class GatewayRunner:
                 binding = None
             if binding:
                 bound_session_id = str(binding.get("session_id") or "")
+                # Heal bindings that point at a pre-compression parent: walk
+                # the compression-continuation chain forward to its tip so the
+                # next message resumes the compressed child instead of
+                # reloading the oversized parent transcript (#20470/#29712/
+                # #33414). Returns the input unchanged when the session isn't
+                # a compression parent, so this is cheap and safe.
+                if bound_session_id and self._session_db is not None:
+                    try:
+                        canonical_session_id = self._session_db.get_compression_tip(
+                            bound_session_id,
+                        )
+                    except Exception:
+                        logger.debug(
+                            "compression-tip lookup failed for %s",
+                            bound_session_id, exc_info=True,
+                        )
+                        canonical_session_id = bound_session_id
+                    if (
+                        canonical_session_id
+                        and canonical_session_id != bound_session_id
+                    ):
+                        bound_session_id = canonical_session_id
                 if bound_session_id and bound_session_id != session_entry.session_id:
                     # Route the override through SessionStore so the session_key
                     # → session_id mapping is persisted to disk and the previous
@@ -8233,6 +8411,15 @@ class GatewayRunner:
                     switched = self.session_store.switch_session(session_key, bound_session_id)
                     if switched is not None:
                         session_entry = switched
+                # If the stored binding pointed at a parent, rewrite it to the
+                # canonical descendant now that we've followed the chain.
+                if (
+                    bound_session_id
+                    and bound_session_id != str(binding.get("session_id") or "")
+                ):
+                    self._sync_telegram_topic_binding(
+                        source, session_entry, reason="compression-tip-walk",
+                    )
             else:
                 try:
                     self._record_telegram_topic_binding(source, session_entry)
@@ -8609,6 +8796,10 @@ class GatewayRunner:
                                     if _hyg_new_sid != session_entry.session_id:
                                         session_entry.session_id = _hyg_new_sid
                                         self.session_store._save()
+                                        self._sync_telegram_topic_binding(
+                                            source, session_entry,
+                                            reason="hygiene-compression",
+                                        )
 
                                     self.session_store.rewrite_transcript(
                                         session_entry.session_id, _compressed
@@ -8874,6 +9065,9 @@ class GatewayRunner:
             if agent_result.get("session_id") and agent_result["session_id"] != session_entry.session_id:
                 session_entry.session_id = agent_result["session_id"]
                 self.session_store._save()
+                self._sync_telegram_topic_binding(
+                    source, session_entry, reason="agent-result-compression",
+                )
 
             # Prepend reasoning/thinking if display is enabled (per-platform)
             try:
@@ -10222,8 +10416,16 @@ class GatewayRunner:
 
         raw_args = event.get_command_args().strip()
 
-        # Parse --provider and --global flags
-        model_input, explicit_provider, persist_global = parse_model_flags(raw_args)
+        # Parse --provider, --global, and --refresh flags
+        model_input, explicit_provider, persist_global, force_refresh = parse_model_flags(raw_args)
+
+        # --refresh: bust the disk cache so the picker shows live data.
+        if force_refresh:
+            try:
+                from hermes_cli.models import clear_provider_models_cache
+                clear_provider_models_cache()
+            except Exception:
+                pass
 
         # Read current model/provider from config
         current_model = ""
@@ -11523,9 +11725,16 @@ class GatewayRunner:
 
             from gateway.platforms.base import BasePlatformAdapter, should_send_media_as_audio
 
-            media_files, _ = adapter.extract_media(response)
+            media_files, cleaned = adapter.extract_media(response)
             media_files = BasePlatformAdapter.filter_media_delivery_paths(media_files)
-            _, cleaned = adapter.extract_images(response)
+            # Chain the cleaned text through each extractor (extract_media →
+            # extract_images → extract_local_files) so MEDIA: tags and image URLs
+            # are removed before the bare-path auto-detect runs. Previously the
+            # cleaned text from extract_media was dropped (``_``) and
+            # extract_local_files scanned text that still contained MEDIA: tags,
+            # producing false-positive bare-path matches with the MEDIA: prefix
+            # glued on. This matches the chain order in gateway/platforms/base.py.
+            _, cleaned = adapter.extract_images(cleaned)
             local_files, _ = adapter.extract_local_files(cleaned)
             local_files = BasePlatformAdapter.filter_local_delivery_paths(local_files)
 
@@ -12222,6 +12431,12 @@ class GatewayRunner:
         Accepts an optional focus topic: ``/compress <focus>`` guides the
         summariser to preserve information related to *focus* while being
         more aggressive about discarding everything else.
+
+        Also accepts the boundary-aware form ``/compress here [N]``:
+        summarize everything except the most recent ``N`` exchanges
+        (default 2), kept verbatim. Inspired by Claude Code's Rewind
+        "Summarize up to here" action (v2.1.139, May 2026,
+        https://code.claude.com/docs/en/whats-new/2026-w20).
         """
         source = event.source
         session_entry = self.session_store.get_or_create_session(source)
@@ -12230,8 +12445,15 @@ class GatewayRunner:
         if not history or len(history) < 4:
             return t("gateway.compress.not_enough")
 
-        # Extract optional focus topic from command args
-        focus_topic = (event.get_command_args() or "").strip() or None
+        # Parse args: either a focus topic (full compress) or the
+        # boundary-aware "here [N]" form (partial compress).
+        from hermes_cli.partial_compress import (
+            parse_partial_compress_args,
+            rejoin_compressed_head_and_tail,
+            split_history_for_partial_compress,
+        )
+        _raw_args = (event.get_command_args() or "").strip()
+        partial, keep_last, focus_topic = parse_partial_compress_args(_raw_args)
 
         try:
             from run_agent import AIAgent
@@ -12252,6 +12474,19 @@ class GatewayRunner:
                 if m.get("role") in {"user", "assistant"} and m.get("content")
             ]
 
+            # Boundary-aware split: only the head is summarized; the most
+            # recent `keep_last` exchanges are preserved verbatim. The
+            # split snaps the tail to a user-turn start so the rejoined
+            # transcript keeps role alternation valid.
+            tail: list = []
+            head = msgs
+            if partial:
+                head, tail = split_history_for_partial_compress(msgs, keep_last)
+                if not tail:
+                    # Degenerate split — fall back to full compression.
+                    partial = False
+                    head = msgs
+
             tmp_agent = AIAgent(
                 **runtime_kwargs,
                 model=model,
@@ -12275,15 +12510,20 @@ class GatewayRunner:
                 )
 
                 compressor = tmp_agent.context_compressor
-                if not compressor.has_content_to_compress(msgs):
+                if not compressor.has_content_to_compress(head):
                     return t("gateway.compress.nothing_to_do")
 
                 loop = asyncio.get_running_loop()
                 compressed, _ = await loop.run_in_executor(
                     None,
-                    lambda: tmp_agent._compress_context(msgs, "", approx_tokens=approx_tokens, focus_topic=focus_topic, force=True)
+                    lambda: tmp_agent._compress_context(head, "", approx_tokens=approx_tokens, focus_topic=focus_topic, force=True)
                 )
 
+                # Re-append the verbatim tail after the compressed head,
+                # guarding the seam against illegal role adjacency.
+                if partial and tail:
+                    compressed = rejoin_compressed_head_and_tail(compressed, tail)
+
                 # _compress_context already calls end_session() on the old session
                 # (preserving its full transcript in SQLite) and creates a new
                 # session_id for the continuation.  Write the compressed messages
@@ -12292,6 +12532,9 @@ class GatewayRunner:
                 if new_session_id != session_entry.session_id:
                     session_entry.session_id = new_session_id
                     self.session_store._save()
+                    self._sync_telegram_topic_binding(
+                        source, session_entry, reason="compress-command",
+                    )
 
                 self.session_store.rewrite_transcript(new_session_id, compressed)
                 # Reset stored token count — transcript changed, old value is stale
@@ -17255,13 +17498,33 @@ class GatewayRunner:
             # append any that aren't already present in the final response, so the
             # adapter's extract_media() can find and deliver the files exactly once.
             #
-            # Uses path-based deduplication against _history_media_paths (collected
-            # before run_conversation) instead of index slicing. This is safe even
-            # when context compression shrinks the message list. (Fixes #160)
+            # Scope the scan to THIS turn's tool results only. ``agent_history``
+            # was passed into run_conversation as ``conversation_history``, so the
+            # agent's returned ``messages`` list is ``agent_history`` followed by
+            # the messages produced this turn. Slicing at ``len(agent_history)``
+            # isolates the current turn precisely, so a stale MEDIA: path emitted
+            # by a tool several turns earlier (still present in the full message
+            # list) can never leak onto a later text-only reply. (Fixes #34608)
+            #
+            # Path-based deduplication against _history_media_paths (collected
+            # before run_conversation) is retained as a secondary guard. It is
+            # also the sole guard on the fallback branch taken when mid-run
+            # context compression shrinks the message list below the original
+            # history length, preserving the compression-safe behaviour of #160.
             if "MEDIA:" not in final_response:
                 media_tags = []
                 has_voice_directive = False
-                for msg in result.get("messages", []):
+                _all_msgs = result.get("messages", [])
+                _history_len = len(agent_history)
+                # Only trust the slice boundary when the message list still
+                # contains the full history prefix. Mid-run compression can
+                # rewrite/shrink the list; in that case fall back to scanning
+                # everything and rely on _history_media_paths for dedup.
+                if _history_len and len(_all_msgs) >= _history_len:
+                    _scan_msgs = _all_msgs[_history_len:]
+                else:
+                    _scan_msgs = _all_msgs
+                for msg in _scan_msgs:
                     if msg.get("role") in {"tool", "function"}:
                         content = msg.get("content", "")
                         if "MEDIA:" in content:
@@ -18174,6 +18437,95 @@ class GatewayRunner:
         return response
 
 
+def _run_planned_stop_watcher(
+    stop_event: threading.Event,
+    runner,
+    loop: asyncio.AbstractEventLoop,
+    shutdown_handler,
+    *,
+    poll_interval: float = 0.5,
+) -> None:
+    """Poll for the planned-stop marker and trigger graceful shutdown.
+
+    On Windows, ``asyncio.add_signal_handler`` raises NotImplementedError
+    for SIGTERM/SIGINT, so the standard signal-driven shutdown path
+    never runs when ``hermes gateway stop`` signals the gateway. The
+    consequence is that the drain loop is skipped — in-flight agent
+    sessions are killed mid-turn and ``resume_pending`` is never set,
+    so the next gateway boot has no idea those sessions need to be
+    auto-resumed (issue #33778, v0.13.0 session-resume feature broken
+    on native Windows).
+
+    This watcher runs on every platform (cheap, defensive) and bridges
+    the gap on Windows by translating a filesystem marker into the
+    same shutdown-handler invocation a real SIGTERM would have produced
+    on POSIX. The CLI's ``hermes_cli.gateway_windows.stop()`` writes
+    the marker via ``write_planned_stop_marker(pid)`` and then waits
+    for the gateway PID to exit; this watcher is what makes that
+    exit happen cleanly.
+
+    On POSIX this is a no-op safety net — the signal handler always
+    races us to consuming the marker file because it fires synchronously
+    from the kernel's signal delivery.
+
+    Args:
+        stop_event: cleared by start_gateway() during normal shutdown
+            to tell the watcher to exit.
+        runner: the GatewayRunner instance; we check ``_running`` and
+            ``_draining`` to avoid triggering shutdown if the gateway
+            is already in one of those states.
+        loop: the asyncio event loop the shutdown handler must run on.
+        shutdown_handler: same callable that's wired to SIGTERM —
+            tolerates a ``None`` signal argument (planned stop case)
+            and consumes the marker via
+            ``consume_planned_stop_marker_for_self()``.
+        poll_interval: seconds between marker checks. 0.5s gives a
+            responsive shutdown without burning CPU.
+    """
+    from gateway.status import (
+        _get_planned_stop_marker_path,
+        planned_stop_marker_targets_self,
+    )
+    marker_path = _get_planned_stop_marker_path()
+    while not stop_event.is_set():
+        try:
+            if (
+                marker_path.exists()
+                and not getattr(runner, "_draining", False)
+                and getattr(runner, "_running", False)
+            ):
+                # A marker existing is NOT sufficient — it may have been
+                # written for a PREVIOUS gateway instance (different PID)
+                # and left behind because that process exited before the
+                # CLI's stop() could clean it up. Firing the handler on a
+                # stale/foreign marker drives the gateway into shutdown,
+                # then consume_planned_stop_marker_for_self() correctly
+                # reports a PID mismatch — but by then we're already
+                # stopping, so it's logged as an unexpected "UNKNOWN" exit
+                # and the watchdog crash-loops the gateway (issue #34597,
+                # a regression from PR #33798 which added this watcher
+                # without the PID check).
+                #
+                # Only fire when the marker actually targets us. The probe
+                # is non-destructive on a match (the handler does the
+                # authoritative consume on the loop thread) and self-heals
+                # by unlinking stale/malformed markers so they cannot wedge
+                # a freshly booted gateway.
+                if not planned_stop_marker_targets_self():
+                    stop_event.wait(poll_interval)
+                    continue
+                # Drive the same path as a real signal handler.
+                # Pass signal=None — the handler tolerates that and consumes
+                # the marker via consume_planned_stop_marker_for_self,
+                # which also validates target_pid + start_time match us.
+                loop.call_soon_threadsafe(shutdown_handler, None)
+                # Done — the handler will set _draining; we exit on next tick.
+                break
+        except Exception as _e:
+            logger.debug("Planned-stop watcher tick error: %s", _e)
+        stop_event.wait(poll_interval)
+
+
 def _start_cron_ticker(stop_event: threading.Event, adapters=None, loop=None, interval: int = 60):
     """
     Background thread that ticks the cron scheduler at a regular interval.
@@ -18551,7 +18903,28 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
                 pass
     else:
         logger.info("Skipping signal handlers (not running in main thread).")
-    
+
+    # Windows fallback: asyncio.add_signal_handler raises NotImplementedError
+    # on Windows, so `hermes gateway stop`'s SIGTERM (which Python maps to
+    # TerminateProcess on Windows) never invokes shutdown_signal_handler.
+    # That means the drain loop never runs, mark_resume_pending never fires,
+    # and sessions are silently lost across restarts (issue #33778).
+    #
+    # The fix is a marker-polling thread: `hermes gateway stop` writes the
+    # planned-stop marker BEFORE killing, and this thread notices it and
+    # drives the same shutdown path the signal handler would have.  Runs
+    # on every platform (cheap, defensive) so non-signal-bearing
+    # environments (Windows native, sandboxed CI runners that mask
+    # SIGTERM) still get a clean drain.
+    _planned_stop_watcher_stop = threading.Event()
+    _planned_stop_watcher_thread = threading.Thread(
+        target=_run_planned_stop_watcher,
+        args=(_planned_stop_watcher_stop, runner, loop, shutdown_signal_handler),
+        daemon=True,
+        name="planned-stop-watcher",
+    )
+    _planned_stop_watcher_thread.start()
+
     # Claim the PID file BEFORE bringing up any platform adapters.
     # This closes the --replace race window: two concurrent `gateway run
     # --replace` invocations both pass the termination-wait above, but
@@ -18629,6 +19002,10 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
     cron_stop.set()
     cron_thread.join(timeout=5)
 
+    # Stop the planned-stop watcher (daemon=True so this is belt-and-suspenders).
+    _planned_stop_watcher_stop.set()
+    _planned_stop_watcher_thread.join(timeout=2)
+
     # Close MCP server connections
     try:
         from tools.mcp_tool import shutdown_mcp_servers
diff --git a/gateway/runtime_footer.py b/gateway/runtime_footer.py
index 9d3fea2523b..024cf74d681 100644
--- a/gateway/runtime_footer.py
+++ b/gateway/runtime_footer.py
@@ -26,7 +26,6 @@ piecemeal, the footer is sent as a separate trailing message via
 from __future__ import annotations
 
 import os
-from pathlib import Path
 from typing import Any, Iterable, Optional
 
 _DEFAULT_FIELDS: tuple[str, ...] = ("model", "context_pct", "cwd")
diff --git a/gateway/status.py b/gateway/status.py
index 516ea8f385e..935758b904b 100644
--- a/gateway/status.py
+++ b/gateway/status.py
@@ -816,12 +816,24 @@ def _consume_pid_marker_for_self(
 
     our_pid = os.getpid()
     our_start_time = _get_process_start_time(our_pid)
-    matches = (
-        target_pid == our_pid
-        and target_start_time is not None
-        and our_start_time is not None
-        and target_start_time == our_start_time
-    )
+    # Start-time is a PID-reuse guard. It is only meaningful when both
+    # sides actually have it: ``_get_process_start_time`` returns None on
+    # platforms without ``/proc`` (macOS, native Windows — the very
+    # platform the planned-stop watcher exists for). Requiring a non-None
+    # match there would make every consume return False, so a legitimate
+    # ``hermes gateway stop`` on Windows would be misclassified as an
+    # unexpected ``UNKNOWN`` exit (exit 1) and revived by the service
+    # manager. So: when both start_times are known they must match; when
+    # either is unknown, fall back to PID equality alone (bounded by the
+    # marker's short TTL). This mirrors ``planned_stop_marker_targets_self``
+    # so the watcher's non-destructive probe and this authoritative
+    # consume agree on every platform (issue #34597).
+    if target_pid != our_pid:
+        matches = False
+    elif target_start_time is not None and our_start_time is not None:
+        matches = target_start_time == our_start_time
+    else:
+        matches = True
 
     try:
         path.unlink(missing_ok=True)
@@ -914,6 +926,68 @@ def consume_planned_stop_marker_for_self() -> bool:
     )
 
 
+def planned_stop_marker_targets_self() -> bool:
+    """Return True only when a live planned-stop marker names the current process.
+
+    This is a **non-destructive** probe used by the watcher thread
+    (``gateway/run.py:_run_planned_stop_watcher``) to decide whether to
+    trigger shutdown. Unlike :func:`consume_planned_stop_marker_for_self`,
+    it never unlinks a marker that matches us — the shutdown handler does
+    the authoritative consume on its own thread.
+
+    It *does* clean up markers that can never apply to this process:
+    malformed markers and markers older than the TTL are unlinked so a
+    stale file left behind by a previous gateway instance cannot wedge
+    the new one. Markers naming a different PID/start_time are left in
+    place (they may still be consumed legitimately by the process they
+    name) but report False here.
+
+    Returns False (without raising) on any read/parse error.
+    """
+    path = _get_planned_stop_marker_path()
+    record = _read_json_file(path)
+    if not record:
+        return False
+
+    try:
+        target_pid = int(record["target_pid"])
+        target_start_time = record.get("target_start_time")
+        written_at = record.get("written_at") or ""
+    except (KeyError, TypeError, ValueError):
+        # Malformed marker can never match anyone — drop it.
+        try:
+            path.unlink(missing_ok=True)
+        except OSError:
+            pass
+        return False
+
+    if _marker_is_stale(written_at, _PLANNED_STOP_MARKER_TTL_S):
+        # A marker this old is past its useful life regardless of target —
+        # clean it up so it cannot crash-loop a freshly booted gateway.
+        try:
+            path.unlink(missing_ok=True)
+        except OSError:
+            pass
+        return False
+
+    our_pid = os.getpid()
+    if target_pid != our_pid:
+        return False
+
+    # Start-time is a PID-reuse guard. It is only meaningful when both
+    # sides actually have it: ``_get_process_start_time`` returns None on
+    # platforms without ``/proc`` (macOS, native Windows — the very
+    # platform this watcher exists for). Requiring a non-None match there
+    # would make the watcher never fire and re-break the #33778 Windows
+    # session-resume path. So: when both start_times are known they must
+    # match; when either is unknown, fall back to PID equality alone
+    # (the marker is short-lived under a 60s TTL, bounding reuse risk).
+    our_start_time = _get_process_start_time(our_pid)
+    if target_start_time is not None and our_start_time is not None:
+        return target_start_time == our_start_time
+    return True
+
+
 def clear_planned_stop_marker() -> None:
     """Remove the planned-stop marker unconditionally."""
     try:
diff --git a/gateway/stream_consumer.py b/gateway/stream_consumer.py
index 4ba65ddf4c5..a06470cbcff 100644
--- a/gateway/stream_consumer.py
+++ b/gateway/stream_consumer.py
@@ -26,6 +26,7 @@ from typing import Any, Callable, Optional
 
 from gateway.platforms.base import BasePlatformAdapter as _BasePlatformAdapter
 from gateway.platforms.base import _custom_unit_to_cp
+from gateway.platforms.base import MEDIA_TAG_CLEANUP_RE
 from gateway.config import (
     DEFAULT_STREAMING_EDIT_INTERVAL as _DEFAULT_STREAMING_EDIT_INTERVAL,
     DEFAULT_STREAMING_BUFFER_THRESHOLD as _DEFAULT_STREAMING_BUFFER_THRESHOLD,
@@ -552,11 +553,6 @@ class GatewayStreamConsumer:
                     self._last_edit_time = time.monotonic()
 
                 if got_done:
-                    # Record that the final content reached the user even
-                    # if the cosmetic final edit below fails.
-                    if current_update_visible and self._accumulated:
-                        self._final_content_delivered = True
-
                     # Final edit without cursor. If progressive editing failed
                     # mid-stream, send a single continuation/fallback message
                     # here instead of letting the base gateway path send the
@@ -573,6 +569,7 @@ class GatewayStreamConsumer:
                             # final edit — but only for adapters that don't
                             # need an explicit finalize signal.
                             self._final_response_sent = True
+                            self._final_content_delivered = True
                         elif self._message_id:
                             # Either the mid-stream edit didn't run (no
                             # visible update this tick) OR the adapter needs
@@ -580,8 +577,12 @@ class GatewayStreamConsumer:
                             self._final_response_sent = await self._send_or_edit(
                                 self._accumulated, finalize=True,
                             )
+                            if self._final_response_sent:
+                                self._final_content_delivered = True
                         elif not self._already_sent:
                             self._final_response_sent = await self._send_or_edit(self._accumulated)
+                            if self._final_response_sent:
+                                self._final_content_delivered = True
                     return
 
                 if commentary_text is not None:
@@ -641,13 +642,17 @@ class GatewayStreamConsumer:
             # "Let me search…") had been delivered, not the real answer.
             if _best_effort_ok and not self._final_response_sent:
                 self._final_response_sent = True
+                self._final_content_delivered = True
         except Exception as e:
             logger.error("Stream consumer error: %s", e)
 
-    # Pattern to strip MEDIA:<path> tags (including optional surrounding quotes).
-    # Matches the simple cleanup regex used by the non-streaming path in
-    # gateway/platforms/base.py for post-processing.
-    _MEDIA_RE = re.compile(r'''[`"']?MEDIA:\s*\S+[`"']?''')
+    # Strip MEDIA:<path> tags before display. Uses the shared anchored
+    # MEDIA_TAG_CLEANUP_RE from gateway/platforms/base.py — only tags whose
+    # path ends in a deliverable extension are removed, so an unknown-extension
+    # path stays visible instead of being silently dropped (issue #34517).
+    # Streaming and non-streaming paths share the same regex, so a tag is
+    # treated identically whichever path delivered the text.
+    _MEDIA_RE = MEDIA_TAG_CLEANUP_RE
 
     @staticmethod
     def _clean_for_display(text: str) -> str:
@@ -778,6 +783,7 @@ class GatewayStreamConsumer:
                         pass
                 self._already_sent = True
                 self._final_response_sent = True
+                self._final_content_delivered = True
                 return
 
         raw_limit = getattr(self.adapter, "MAX_MESSAGE_LENGTH", 4096)
@@ -814,11 +820,13 @@ class GatewayStreamConsumer:
 
             if not result or not result.success:
                 if sent_any_chunk:
-                    # Some continuation text already reached the user. Suppress
-                    # the base gateway final-send path so we don't resend the
-                    # full response and create another duplicate.
+                    # Some continuation text already reached the user, but not
+                    # the full response. Do NOT set _final_response_sent — the
+                    # base gateway final-send path should still deliver the
+                    # complete response so the user gets the full answer.
+                    # Suppress only _already_sent to avoid a duplicate send
+                    # of the same partial content.
                     self._already_sent = True
-                    self._final_response_sent = True
                     self._message_id = last_message_id
                     self._last_sent_text = last_successful_chunk
                     self._fallback_prefix = ""
@@ -856,6 +864,7 @@ class GatewayStreamConsumer:
         self._message_id = last_message_id
         self._already_sent = True
         self._final_response_sent = True
+        self._final_content_delivered = True
         self._last_sent_text = chunks[-1]
         self._fallback_prefix = ""
 
diff --git a/hermes_cli/__init__.py b/hermes_cli/__init__.py
index 9781c8bc689..5e6a99ac0ef 100644
--- a/hermes_cli/__init__.py
+++ b/hermes_cli/__init__.py
@@ -14,8 +14,8 @@ Provides subcommands for:
 import os
 import sys
 
-__version__ = "0.14.0"
-__release_date__ = "2026.5.16"
+__version__ = "0.15.1"
+__release_date__ = "2026.5.29"
 
 
 def _ensure_utf8():
diff --git a/hermes_cli/_subprocess_compat.py b/hermes_cli/_subprocess_compat.py
index 941728be8ea..4d4ad3f18dc 100644
--- a/hermes_cli/_subprocess_compat.py
+++ b/hermes_cli/_subprocess_compat.py
@@ -27,11 +27,9 @@ guarantee.
 
 from __future__ import annotations
 
-import os
 import shutil
-import subprocess
 import sys
-from typing import Optional, Sequence
+from typing import Sequence
 
 __all__ = [
     "IS_WINDOWS",
diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
index dd2a17e5f44..4fc59d926da 100644
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -9,14 +9,11 @@ Architecture:
 - ProviderConfig registry defines known OAuth providers
 - Auth store (auth.json) holds per-provider credential state
 - resolve_provider() picks the active provider via priority chain
-- resolve_*_runtime_credentials() handles token refresh and key minting
+- resolve_*_runtime_credentials() handles token refresh and runtime keys
 - logout_command() is the CLI entry point for clearing auth
 
 Nous authentication paths:
 - Invoke JWT (preferred): use a scoped access_token directly for inference.
-- Legacy session key (fallback): mint an opaque 24h key when JWT auth is
-  unavailable, or when HERMES_AGENT_USE_LEGACY_SESSION_KEYS is set for
-  debugging or rollback.
 """
 
 from __future__ import annotations
@@ -45,7 +42,6 @@ from typing import Any, Callable, Dict, FrozenSet, List, Optional, Tuple
 from urllib.parse import parse_qs, urlencode, urlparse
 
 import httpx
-import yaml
 
 from hermes_cli.config import get_hermes_home, get_config_path, read_raw_config
 from hermes_constants import OPENROUTER_BASE_URL, secure_parent_dir
@@ -74,23 +70,10 @@ AUTH_LOCK_TIMEOUT_SECONDS = 15.0
 DEFAULT_NOUS_PORTAL_URL = "https://portal.nousresearch.com"
 DEFAULT_NOUS_INFERENCE_URL = "https://inference-api.nousresearch.com/v1"
 DEFAULT_NOUS_CLIENT_ID = "hermes-cli"
-NOUS_LEGACY_AGENT_KEY_SCOPE = "inference:mint_agent_key"
 NOUS_INFERENCE_INVOKE_SCOPE = "inference:invoke"
-DEFAULT_NOUS_SCOPE = f"{NOUS_INFERENCE_INVOKE_SCOPE} {NOUS_LEGACY_AGENT_KEY_SCOPE}"
-NOUS_LEGACY_SESSION_KEYS_ENV = "HERMES_AGENT_USE_LEGACY_SESSION_KEYS"
+DEFAULT_NOUS_SCOPE = NOUS_INFERENCE_INVOKE_SCOPE
 NOUS_DEVICE_CODE_SOURCE = "device_code"
-NOUS_INFERENCE_AUTH_MODE_AUTO = "auto"
-NOUS_INFERENCE_AUTH_MODE_FRESH = "fresh"
-NOUS_INFERENCE_AUTH_MODE_LEGACY = "legacy"
-NOUS_INFERENCE_AUTH_MODES = frozenset({
-    NOUS_INFERENCE_AUTH_MODE_AUTO,
-    NOUS_INFERENCE_AUTH_MODE_FRESH,
-    NOUS_INFERENCE_AUTH_MODE_LEGACY,
-})
 NOUS_AUTH_PATH_INVOKE_JWT = "invoke_jwt"
-NOUS_AUTH_PATH_LEGACY_SESSION_KEY_CACHE = "legacy_session_key_cache"
-NOUS_AUTH_PATH_LEGACY_SESSION_KEY_MINT = "legacy_session_key_mint"
-DEFAULT_AGENT_KEY_MIN_TTL_SECONDS = 30 * 60  # 30 minutes
 ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120       # refresh 2 min before expiry
 NOUS_INVOKE_JWT_MIN_TTL_SECONDS = ACCESS_TOKEN_REFRESH_SKEW_SECONDS
 DEVICE_AUTH_POLL_INTERVAL_CAP_SECONDS = 1     # poll at most every 1s
@@ -802,16 +785,18 @@ def format_auth_error(error: Exception) -> str:
         return f"{error} Run `hermes model` to re-authenticate."
 
     if error.code == "subscription_required":
-        return (
-            "No active paid subscription found on Nous Portal. "
-            "Please purchase/activate a subscription, then retry."
-        )
+        if error.provider == "nous":
+            return _format_nous_entitlement_auth_error(error)
+        return "No active paid subscription found. Please purchase/activate a subscription, then retry."
 
     if error.code == "insufficient_credits":
-        return (
-            "Subscription credits are exhausted. "
-            "Top up/renew credits in Nous Portal, then retry."
-        )
+        if error.provider == "nous":
+            return _format_nous_entitlement_auth_error(error)
+        return "Subscription credits are exhausted. Top up/renew credits, then retry."
+
+    if error.code in {"subscription_expired", "no_usable_credits", "account_missing"}:
+        if error.provider == "nous":
+            return _format_nous_entitlement_auth_error(error)
 
     if error.code == "temporarily_unavailable":
         return f"{error} Please retry in a few seconds."
@@ -819,6 +804,25 @@ def format_auth_error(error: Exception) -> str:
     return str(error)
 
 
+def _format_nous_entitlement_auth_error(error: AuthError) -> str:
+    try:
+        from hermes_cli.nous_account import (
+            format_nous_portal_entitlement_message,
+            get_nous_portal_account_info,
+        )
+
+        account_info = get_nous_portal_account_info(force_fresh=True)
+        message = format_nous_portal_entitlement_message(
+            account_info,
+            capability="Nous model access",
+        )
+        if message:
+            return message
+    except Exception:
+        pass
+    return f"{error} Check credits or billing in Nous Portal, then retry."
+
+
 def _token_fingerprint(token: Any) -> Optional[str]:
     """Return a short hash fingerprint for telemetry without leaking token bytes."""
     if not isinstance(token, str):
@@ -1633,12 +1637,11 @@ def _optional_base_url(value: Any) -> Optional[str]:
     return cleaned if cleaned else None
 
 
-# Allowlist of hosts the Nous Portal proxy is willing to forward minted
-# bearer tokens to. The bearer is a long-lived agent_key minted by
-# portal.nousresearch.com — sending it anywhere else would leak it.
+# Allowlist of hosts the Nous Portal proxy is willing to forward inference
+# JWTs to. Sending a bearer anywhere else would leak it.
 #
 # This is consulted only for URLs coming from the NETWORK side (Portal
-# refresh / agent-key-mint responses). User-controlled env-var overrides
+# refresh responses). User-controlled env-var overrides
 # (NOUS_INFERENCE_BASE_URL) bypass validation — that's the documented
 # dev/staging escape hatch and the env source is already trusted (the
 # user set it themselves).
@@ -1656,10 +1659,10 @@ def _validate_nous_inference_url_from_network(url: Optional[str]) -> Optional[st
     unexpected host — letting the caller fall back to the configured
     default rather than persist or forward a poisoned value.
 
-    Defense-in-depth: a compromised refresh / mint response from the
-    Portal API (MITM, malicious response injection) could otherwise
-    redirect every subsequent proxy request — bearing the user's
-    legitimately-minted agent_key — to an attacker-controlled endpoint.
+    Defense-in-depth: a compromised refresh response from the Portal API
+    (MITM, malicious response injection) could otherwise redirect every
+    subsequent proxy request — bearing the user's inference JWT — to an
+    attacker-controlled endpoint.
     Validating scheme + host at the source closes that loop before the
     poisoned URL ever lands in ``auth.json``.
 
@@ -1723,25 +1726,6 @@ def _scope_values(raw_scope: Any) -> set[str]:
     return scopes
 
 
-def _nous_legacy_session_keys_forced() -> bool:
-    return is_truthy_value(os.getenv(NOUS_LEGACY_SESSION_KEYS_ENV), default=False)
-
-
-def _nous_scope_has_invoke(raw_scope: Any) -> bool:
-    return NOUS_INFERENCE_INVOKE_SCOPE in _scope_values(raw_scope)
-
-
-def _normalize_nous_inference_auth_mode(inference_auth_mode: Optional[str]) -> str:
-    mode = str(inference_auth_mode or NOUS_INFERENCE_AUTH_MODE_AUTO).strip().lower()
-    if mode not in NOUS_INFERENCE_AUTH_MODES:
-        allowed = ", ".join(sorted(NOUS_INFERENCE_AUTH_MODES))
-        raise ValueError(
-            "Invalid Nous inference auth mode "
-            f"{inference_auth_mode!r}; expected one of: {allowed}"
-        )
-    return mode
-
-
 def _nous_invoke_jwt_status(
     token: Any,
     *,
@@ -1789,58 +1773,25 @@ def _nous_invoke_jwt_is_usable(
     )
 
 
-def _nous_legacy_session_key_reason(
-    token: Any,
-    *,
-    scope: Any = None,
-    expires_at: Any = None,
-    inference_auth_mode: str = NOUS_INFERENCE_AUTH_MODE_AUTO,
-) -> str:
-    if inference_auth_mode == NOUS_INFERENCE_AUTH_MODE_LEGACY:
-        return "forced_legacy_session_key"
-    if _nous_legacy_session_keys_forced():
-        return "forced_legacy_session_keys"
-    return (
-        _nous_invoke_jwt_status(token, scope=scope, expires_at=expires_at)
-        or "invoke_jwt_unavailable"
-    )
-
-
-def _choose_nous_inference_auth_path(
+def _assert_nous_inference_jwt_usable(
     state: Dict[str, Any],
     *,
     access_token: Any = None,
-    min_key_ttl_seconds: int = DEFAULT_AGENT_KEY_MIN_TTL_SECONDS,
-    inference_auth_mode: str = NOUS_INFERENCE_AUTH_MODE_AUTO,
-) -> Tuple[str, Optional[str]]:
-    inference_auth_mode = _normalize_nous_inference_auth_mode(inference_auth_mode)
+) -> None:
     token = state.get("access_token") if access_token is None else access_token
-    if (
-        not _nous_legacy_session_keys_forced()
-        and inference_auth_mode != NOUS_INFERENCE_AUTH_MODE_LEGACY
-        and _nous_invoke_jwt_is_usable(
-            token,
-            scope=state.get("scope"),
-            expires_at=state.get("expires_at"),
-        )
-    ):
-        return NOUS_AUTH_PATH_INVOKE_JWT, None
-    if (
-        inference_auth_mode == NOUS_INFERENCE_AUTH_MODE_AUTO
-        and _agent_key_is_usable(
-            state,
-            max(60, int(min_key_ttl_seconds)),
-        )
-    ):
-        return NOUS_AUTH_PATH_LEGACY_SESSION_KEY_CACHE, None
-    return (
-        NOUS_AUTH_PATH_LEGACY_SESSION_KEY_MINT,
-        _nous_legacy_session_key_reason(
-            token,
-            scope=state.get("scope"),
-            expires_at=state.get("expires_at"),
-            inference_auth_mode=inference_auth_mode,
-        ),
+    reason = _nous_invoke_jwt_status(
+        token,
+        scope=state.get("scope"),
+        expires_at=state.get("expires_at"),
+    )
+    if reason is None:
+        return
+    raise AuthError(
+        "Nous Portal access token is not a usable inference JWT "
+        f"({reason}). Re-authenticate with: hermes auth add nous",
+        provider="nous",
+        code=reason,
+        relogin_required=True,
     )
 
 
@@ -1857,24 +1808,6 @@ def _log_nous_invoke_jwt_selected(
     )
 
 
-def _log_nous_legacy_session_key_selected(
-    reason: str,
-    *,
-    access_token: Any,
-    sequence_id: Optional[str] = None,
-) -> None:
-    logger.info(
-        "Nous inference auth: using legacy session key path (%s)",
-        reason,
-    )
-    _oauth_trace(
-        "nous_legacy_session_key_selected",
-        sequence_id=sequence_id,
-        reason=reason,
-        access_token_fp=_token_fingerprint(access_token),
-    )
-
-
 def _nous_jwt_expires_at(token: Any, fallback_expires_at: Any = None) -> Optional[str]:
     claims = _decode_jwt_claims(token)
     exp = claims.get("exp")
@@ -3013,7 +2946,7 @@ def login_spotify_command(args) -> None:
 
     _print_loopback_ssh_hint(redirect_uri, docs_url=SPOTIFY_DOCS_URL)
 
-    if open_browser and not _is_remote_session():
+    if open_browser and not _is_remote_session() and _can_open_graphical_browser():
         try:
             opened = webbrowser.open(authorize_url)
         except Exception:
@@ -3094,6 +3027,83 @@ def _is_remote_session() -> bool:
     return False
 
 
+# Console/text-mode browsers that ``webbrowser`` will happily launch INSIDE
+# the terminal.  Opening one of these is worse than not opening anything —
+# it hijacks the user's TTY with an unusable text browser (the xAI OAuth
+# "Account Management" page rendered in w3m, reported May 2026) instead of
+# letting them copy the URL to a real browser.  When the resolved browser is
+# one of these we refuse to auto-open and fall back to the print-the-URL /
+# manual-paste path, same as a remote session.
+_CONSOLE_BROWSER_NAMES: FrozenSet[str] = frozenset(
+    {
+        "w3m",
+        "lynx",
+        "links",
+        "links2",
+        "elinks",
+        "www-browser",
+        "browsh",  # TUI browser — still hijacks the terminal
+    }
+)
+
+
+def _can_open_graphical_browser() -> bool:
+    """Return True only when a *graphical* browser is likely to open.
+
+    ``webbrowser.open()`` resolves to whatever the platform offers, and on a
+    headless / CLI-only Linux box with no GUI browser installed that is often
+    a text-mode browser (w3m/lynx/links) which launches inside the terminal
+    and takes over the user's session.  This guard distinguishes "a real
+    windowed browser will pop up" from "a console browser will hijack the
+    TTY", so callers can fall back to printing the URL instead.
+
+    Heuristics:
+      * Respect ``$BROWSER`` — if it names a known console browser, refuse.
+      * On Linux, require a display server (``$DISPLAY`` / ``$WAYLAND_DISPLAY``)
+        unless ``$BROWSER`` points at something graphical; no display server
+        almost always means no GUI browser.
+      * Ask ``webbrowser.get()`` what it resolved to and refuse when the
+        underlying command is a known console browser.
+      * macOS and Windows always have a usable default GUI browser.
+    """
+    import webbrowser as _webbrowser
+
+    def _names_console_browser(value: str) -> bool:
+        token = value.strip().split()[0] if value.strip() else ""
+        base = os.path.basename(token).lower()
+        return base in _CONSOLE_BROWSER_NAMES
+
+    browser_env = os.environ.get("BROWSER", "")
+    if browser_env and _names_console_browser(browser_env):
+        return False
+
+    if sys.platform.startswith("linux"):
+        has_display = bool(
+            os.environ.get("DISPLAY") or os.environ.get("WAYLAND_DISPLAY")
+        )
+        # An explicit graphical $BROWSER can work without $DISPLAY in odd
+        # setups, but a console $BROWSER already returned False above, so the
+        # only way to reach here with a $BROWSER set is a graphical one.
+        if not has_display and not browser_env:
+            return False
+
+    try:
+        controller = _webbrowser.get()
+    except Exception:
+        # No browser resolvable at all → definitely don't auto-open.
+        return False
+
+    candidate = (
+        getattr(controller, "name", "")
+        or getattr(controller, "basename", "")
+        or ""
+    )
+    if candidate and _names_console_browser(candidate):
+        return False
+
+    return True
+
+
 def _parse_pasted_callback(raw: str) -> dict:
     """Parse a pasted callback URL / query string into the loopback shape.
 
@@ -3160,6 +3170,9 @@ def _prompt_manual_callback_paste(redirect_uri: str) -> dict:
     print("not on your laptop) — that is expected.  Copy the FULL URL")
     print("from your browser's address bar of that failed page and paste")
     print("it below.  A bare '?code=...&state=...' fragment also works.")
+    print("If the consent page shows the authorization code in-page")
+    print("(xAI's current behavior) rather than redirecting, paste the")
+    print("bare code value on its own.")
     print("───────────────────────────────────────────────────────────────")
     try:
         raw = input("Callback URL: ")
@@ -3291,16 +3304,38 @@ def _sync_codex_pool_entries(
     tokens: Dict[str, str],
     last_refresh: Optional[str],
 ) -> None:
-    """Mirror a fresh Codex re-auth into the credential_pool singleton entries.
+    """Mirror a fresh Codex re-auth into the credential_pool OAuth entries.
 
     The runtime selects credentials from ``credential_pool.openai-codex``, not
     from ``providers.openai-codex.tokens``.  A re-auth invalidates the prior
-    OAuth pair server-side, but the pool's ``device_code`` entry keeps holding
-    the now-consumed refresh token plus any stale error markers — so the next
-    request spends a dead token and gets a 401 ``token_invalidated``.  Update
-    the singleton-seeded entries in lockstep with the provider tokens and clear
-    the error state so the fresh credentials take effect immediately.  Manual
-    (``manual:*``) entries are independent credentials and are left untouched.
+    OAuth pair server-side, but pool entries keep holding the now-consumed
+    refresh token plus any stale error markers — so the next request spends a
+    dead token and gets a 401 ``token_invalidated``.
+
+    What gets refreshed:
+
+    * ``device_code`` — the singleton-seeded entry written by the device-code
+      OAuth flow when the user logged in via ``hermes setup`` / the model
+      picker.  Always synced with the fresh tokens.
+    * ``manual:device_code`` — entries created by ``hermes auth add openai-codex``
+      that use the same device-code OAuth mechanism.  An interactive re-auth
+      proves the user owns the ChatGPT account, so it is safe (and expected)
+      to refresh these entries too.  Without this, a user who once ran the
+      ``hermes auth add`` workaround for #33000 would silently leave that
+      manual entry stale on every subsequent re-auth, recreating the issue
+      reported in #33538.
+
+    What does NOT get refreshed:
+
+    * ``manual:api_key`` and any other non-device-code manual sources — those
+      are independent credentials (an explicit API key, a different ChatGPT
+      account, etc.) and must not be overwritten by a single re-auth.
+
+    Error markers (``last_status``, ``last_error_*``) are also cleared on
+    every device-code-backed entry — even those whose tokens we did not
+    rewrite — so that an interactive re-auth gives every relevant pool entry
+    a fresh selection chance instead of leaving them marked unhealthy from a
+    pre-re-auth 401.
     """
     access_token = tokens.get("access_token")
     if not access_token:
@@ -3312,8 +3347,15 @@ def _sync_codex_pool_entries(
     entries = pool.get("openai-codex")
     if not isinstance(entries, list):
         return
+    # Sources whose tokens should be rewritten by a fresh Codex device-code
+    # OAuth re-auth.  ``manual:api_key`` and unknown sources are intentionally
+    # excluded — they represent independent credentials.
+    REFRESHABLE_SOURCES = {"device_code", "manual:device_code"}
     for entry in entries:
-        if not isinstance(entry, dict) or entry.get("source") != "device_code":
+        if not isinstance(entry, dict):
+            continue
+        source = entry.get("source")
+        if source not in REFRESHABLE_SOURCES:
             continue
         entry["access_token"] = access_token
         if refresh_token:
@@ -4175,85 +4217,6 @@ def _request_device_code(
     return data
 
 
-def _is_nous_invoke_scope_refusal(exc: Exception) -> bool:
-    if not isinstance(exc, httpx.HTTPStatusError):
-        return False
-    response = exc.response
-    if response.status_code not in {400, 401, 403}:
-        return False
-    try:
-        payload = response.json()
-    except Exception:
-        payload = {}
-    text = " ".join(
-        str(value)
-        for value in (
-            payload.get("error") if isinstance(payload, dict) else None,
-            payload.get("error_description") if isinstance(payload, dict) else None,
-            response.text,
-        )
-        if value
-    ).lower()
-    if not text:
-        return False
-    return (
-        "invalid_scope" in text
-        or "unsupported_scope" in text
-        or "scope" in text and NOUS_INFERENCE_INVOKE_SCOPE in text
-    )
-
-
-def _nous_device_scope_with_env_override(
-    requested_scope: Optional[str],
-    *,
-    default_scope: str = DEFAULT_NOUS_SCOPE,
-) -> Tuple[str, bool]:
-    explicit_scope = requested_scope is not None
-    scope = requested_scope or default_scope
-    if _nous_legacy_session_keys_forced():
-        scope = NOUS_LEGACY_AGENT_KEY_SCOPE
-    return scope, explicit_scope
-
-
-def _request_nous_device_code_with_scope_fallback(
-    *,
-    client: httpx.Client,
-    portal_base_url: str,
-    client_id: str,
-    scope: str,
-    allow_legacy_fallback: bool,
-) -> Tuple[Dict[str, Any], str]:
-    try:
-        return (
-            _request_device_code(
-                client=client,
-                portal_base_url=portal_base_url,
-                client_id=client_id,
-                scope=scope,
-            ),
-            scope,
-        )
-    except Exception as exc:
-        if (
-            allow_legacy_fallback
-            and _nous_scope_has_invoke(scope)
-            and _is_nous_invoke_scope_refusal(exc)
-        ):
-            logger.info("Nous inference auth: NAS refused invoke scope, retrying legacy scope")
-            _oauth_trace("nous_device_code_invoke_scope_refused")
-            retry_scope = NOUS_LEGACY_AGENT_KEY_SCOPE
-            return (
-                _request_device_code(
-                    client=client,
-                    portal_base_url=portal_base_url,
-                    client_id=client_id,
-                    scope=retry_scope,
-                ),
-                retry_scope,
-            )
-        raise
-
-
 def _poll_for_token(
     client: httpx.Client,
     portal_base_url: str,
@@ -4304,7 +4267,7 @@ def _poll_for_token(
 
 
 # =============================================================================
-# Nous Portal — token refresh, agent key minting, model discovery
+# Nous Portal — token refresh and model discovery
 # =============================================================================
 
 # -----------------------------------------------------------------------------
@@ -4383,9 +4346,9 @@ def _nous_shared_store_lock(timeout_seconds: float = AUTH_LOCK_TIMEOUT_SECONDS):
     to be held, acquire ``_auth_store_lock`` FIRST. All runtime refresh
     paths follow this order. The one exception is
     ``_try_import_shared_nous_state``, which holds this lock alone for
-    the entire refresh+mint cycle so concurrent imports on sibling
-    profiles can't race on the single-use shared refresh token; that
-    helper must NOT be called with ``_auth_store_lock`` already held.
+    the entire refresh cycle so concurrent imports on sibling profiles
+    can't race on the single-use shared refresh token; that helper must
+    NOT be called with ``_auth_store_lock`` already held.
     """
     try:
         lock_path = _nous_shared_store_path().with_suffix(".lock")
@@ -4445,9 +4408,8 @@ def _write_shared_nous_state(state: Dict[str, Any]) -> None:
     is a convenience layer; the per-profile auth.json remains the source
     of truth.
 
-    We deliberately omit the runtime ``agent_key`` compatibility field
-    (either an invoke JWT or legacy opaque session key) — only OAuth tokens
-    are cross-profile useful.
+    We deliberately omit the runtime ``agent_key`` compatibility field;
+    the OAuth tokens are the cross-profile source of truth.
     """
     refresh_token = state.get("refresh_token")
     access_token = state.get("access_token")
@@ -4669,13 +4631,12 @@ def _quarantine_nous_pool_entries(
 def _try_import_shared_nous_state(
     *,
     timeout_seconds: float = 15.0,
-    min_key_ttl_seconds: int = 5 * 60,
 ) -> Optional[Dict[str, Any]]:
     """Attempt to rehydrate Nous OAuth state from the shared store.
 
-    Reads the shared file (if present), runs a forced refresh+mint using
-    the stored refresh_token to produce a fresh access_token + agent_key
-    scoped to this profile, and returns the full auth_state dict ready
+    Reads the shared file (if present), runs a forced refresh using the
+    stored refresh_token to produce a fresh inference JWT scoped to this
+    profile, and returns the full auth_state dict ready
     for ``persist_nous_credentials()``.
 
     Returns ``None`` when no shared state is available or the rehydrate
@@ -4691,7 +4652,7 @@ def _try_import_shared_nous_state(
 
             # Build a full state dict so refresh_nous_oauth_from_state has every
             # field it needs. force_refresh=True gets us a fresh access_token
-            # for this profile; fresh auth mode avoids stale cached legacy keys.
+            # for this profile.
             state: Dict[str, Any] = {
                 "access_token": shared.get("access_token"),
                 "refresh_token": shared.get("refresh_token"),
@@ -4712,10 +4673,8 @@ def _try_import_shared_nous_state(
 
             refreshed = refresh_nous_oauth_from_state(
                 state,
-                min_key_ttl_seconds=min_key_ttl_seconds,
                 timeout_seconds=timeout_seconds,
                 force_refresh=True,
-                inference_auth_mode=NOUS_INFERENCE_AUTH_MODE_FRESH,
                 on_state_update=_persist_shared_refresh,
             )
             _write_shared_nous_state(refreshed)
@@ -4798,39 +4757,6 @@ def _refresh_access_token(
     raise AuthError(description, provider="nous", code=code, relogin_required=relogin)
 
 
-def _mint_agent_key(
-    *,
-    client: httpx.Client,
-    portal_base_url: str,
-    access_token: str,
-    min_ttl_seconds: int,
-) -> Dict[str, Any]:
-    """Mint (or reuse) a short-lived inference API key."""
-    response = client.post(
-        f"{portal_base_url}/api/oauth/agent-key",
-        headers={"Authorization": f"Bearer {access_token}"},
-        json={"min_ttl_seconds": max(60, int(min_ttl_seconds))},
-    )
-
-    if response.status_code == 200:
-        payload = response.json()
-        if "api_key" not in payload:
-            raise AuthError("Mint response missing api_key",
-                            provider="nous", code="server_error")
-        return payload
-
-    try:
-        error_payload = response.json()
-    except Exception as exc:
-        raise AuthError("Agent key mint request failed",
-                        provider="nous", code="server_error") from exc
-
-    code = str(error_payload.get("error", "server_error"))
-    description = str(error_payload.get("error_description") or "Agent key mint request failed")
-    relogin = code in {"invalid_token", "invalid_grant"}
-    raise AuthError(description, provider="nous", code=code, relogin_required=relogin)
-
-
 def fetch_nous_models(
     *,
     inference_base_url: str,
@@ -4892,15 +4818,12 @@ def _agent_key_is_usable(state: Dict[str, Any], min_ttl_seconds: int) -> bool:
     key = state.get("agent_key")
     if not isinstance(key, str) or not key.strip():
         return False
-    if _decode_jwt_claims(key):
-        if _nous_legacy_session_keys_forced():
-            return False
-        return _nous_invoke_jwt_is_usable(
-            key,
-            scope=state.get("scope"),
-            expires_at=state.get("agent_key_expires_at"),
-        )
-    return not _is_expiring(state.get("agent_key_expires_at"), min_ttl_seconds)
+    return _nous_invoke_jwt_is_usable(
+        key,
+        scope=state.get("scope"),
+        expires_at=state.get("agent_key_expires_at"),
+        min_ttl_seconds=max(0, int(min_ttl_seconds)),
+    )
 
 
 def resolve_nous_access_token(
@@ -5021,21 +4944,18 @@ def refresh_nous_oauth_pure(
     expires_at: Optional[str] = None,
     agent_key: Optional[str] = None,
     agent_key_expires_at: Optional[str] = None,
-    min_key_ttl_seconds: int = DEFAULT_AGENT_KEY_MIN_TTL_SECONDS,
     timeout_seconds: float = 15.0,
     insecure: Optional[bool] = None,
     ca_bundle: Optional[str] = None,
     force_refresh: bool = False,
-    inference_auth_mode: str = NOUS_INFERENCE_AUTH_MODE_AUTO,
     on_state_update: Optional[Callable[[Dict[str, Any], str], None]] = None,
 ) -> Dict[str, Any]:
     """Refresh Nous OAuth state without mutating auth.json directly.
 
-    ``on_state_update`` is called after a successful access-token refresh and
-    before any subsequent agent-key mint. Callers that own persistent state can
-    use it to save the newly rotated refresh token before later work can fail.
+    ``on_state_update`` is called after a successful access-token refresh.
+    Callers that own persistent state can use it to save the newly rotated
+    refresh token before later validation can fail.
     """
-    inference_auth_mode = _normalize_nous_inference_auth_mode(inference_auth_mode)
     state: Dict[str, Any] = {
         "access_token": access_token,
         "refresh_token": refresh_token,
@@ -5057,33 +4977,38 @@ def refresh_nous_oauth_pure(
     timeout = httpx.Timeout(timeout_seconds if timeout_seconds else 15.0)
 
     with httpx.Client(timeout=timeout, headers={"Accept": "application/json"}, verify=verify) as client:
-        min_agent_key_ttl = max(60, int(min_key_ttl_seconds))
-        legacy_session_keys = _nous_legacy_session_keys_forced()
-        current_invoke_jwt_usable = (
-            not legacy_session_keys
-            and _nous_invoke_jwt_is_usable(
-                state.get("access_token"),
-                scope=state.get("scope"),
-                expires_at=state.get("expires_at"),
-            )
+        current_invoke_jwt_status = _nous_invoke_jwt_status(
+            state.get("access_token"),
+            scope=state.get("scope"),
+            expires_at=state.get("expires_at"),
         )
-        if (
-            force_refresh
-            or (
-                _is_expiring(state.get("expires_at"), ACCESS_TOKEN_REFRESH_SKEW_SECONDS)
-                and not current_invoke_jwt_usable
-            )
-        ):
+        if force_refresh or current_invoke_jwt_status is not None:
+            refresh_token_value = state.get("refresh_token")
+            if not isinstance(refresh_token_value, str) or not refresh_token_value:
+                if current_invoke_jwt_status is not None:
+                    raise AuthError(
+                        "Nous Portal access token is not a usable inference JWT "
+                        f"({current_invoke_jwt_status}) and no refresh token is available. "
+                        "Re-authenticate with: hermes auth add nous",
+                        provider="nous",
+                        code=current_invoke_jwt_status,
+                        relogin_required=True,
+                    )
+                raise AuthError(
+                    "No refresh token is available for Nous Portal.",
+                    provider="nous",
+                    relogin_required=True,
+                )
             refreshed = _refresh_access_token(
                 client=client,
                 portal_base_url=state["portal_base_url"],
                 client_id=state["client_id"],
-                refresh_token=state["refresh_token"],
+                refresh_token=refresh_token_value,
             )
             now = datetime.now(timezone.utc)
             access_ttl = _coerce_ttl_seconds(refreshed.get("expires_in"))
             state["access_token"] = refreshed["access_token"]
-            state["refresh_token"] = refreshed.get("refresh_token") or state["refresh_token"]
+            state["refresh_token"] = refreshed.get("refresh_token") or refresh_token_value
             state["token_type"] = refreshed.get("token_type") or state.get("token_type") or "Bearer"
             state["scope"] = refreshed.get("scope") or state.get("scope")
             refreshed_url = _validate_nous_inference_url_from_network(refreshed.get("inference_base_url"))
@@ -5097,34 +5022,8 @@ def refresh_nous_oauth_pure(
             if on_state_update is not None:
                 on_state_update(dict(state), "post_refresh_access_token")
 
-        selected_auth_path, fallback_reason = _choose_nous_inference_auth_path(
-            state,
-            min_key_ttl_seconds=min_agent_key_ttl,
-            inference_auth_mode=inference_auth_mode,
-        )
-        if selected_auth_path == NOUS_AUTH_PATH_INVOKE_JWT:
-            _select_nous_invoke_jwt(state)
-        elif selected_auth_path == NOUS_AUTH_PATH_LEGACY_SESSION_KEY_MINT:
-            _log_nous_legacy_session_key_selected(
-                fallback_reason or "legacy_session_key_required",
-                access_token=state.get("access_token"),
-            )
-            mint_payload = _mint_agent_key(
-                client=client,
-                portal_base_url=state["portal_base_url"],
-                access_token=state["access_token"],
-                min_ttl_seconds=min_key_ttl_seconds,
-            )
-            now = datetime.now(timezone.utc)
-            state["agent_key"] = mint_payload.get("api_key")
-            state["agent_key_id"] = mint_payload.get("key_id")
-            state["agent_key_expires_at"] = mint_payload.get("expires_at")
-            state["agent_key_expires_in"] = mint_payload.get("expires_in")
-            state["agent_key_reused"] = bool(mint_payload.get("reused", False))
-            state["agent_key_obtained_at"] = now.isoformat()
-            minted_url = _validate_nous_inference_url_from_network(mint_payload.get("inference_base_url"))
-            if minted_url:
-                state["inference_base_url"] = minted_url
+        _assert_nous_inference_jwt_usable(state)
+        _select_nous_invoke_jwt(state)
 
     return state
 
@@ -5132,10 +5031,8 @@ def refresh_nous_oauth_pure(
 def refresh_nous_oauth_from_state(
     state: Dict[str, Any],
     *,
-    min_key_ttl_seconds: int = DEFAULT_AGENT_KEY_MIN_TTL_SECONDS,
     timeout_seconds: float = 15.0,
     force_refresh: bool = False,
-    inference_auth_mode: str = NOUS_INFERENCE_AUTH_MODE_AUTO,
     on_state_update: Optional[Callable[[Dict[str, Any], str], None]] = None,
 ) -> Dict[str, Any]:
     """Refresh Nous OAuth from a state dict. Thin wrapper around refresh_nous_oauth_pure."""
@@ -5152,12 +5049,10 @@ def refresh_nous_oauth_from_state(
         expires_at=state.get("expires_at"),
         agent_key=state.get("agent_key"),
         agent_key_expires_at=state.get("agent_key_expires_at"),
-        min_key_ttl_seconds=min_key_ttl_seconds,
         timeout_seconds=timeout_seconds,
         insecure=tls.get("insecure"),
         ca_bundle=tls.get("ca_bundle"),
         force_refresh=force_refresh,
-        inference_auth_mode=inference_auth_mode,
         on_state_update=on_state_update,
     )
 
@@ -5167,7 +5062,7 @@ def persist_nous_credentials(
     *,
     label: Optional[str] = None,
 ):
-    """Persist minted Nous OAuth credentials as the singleton provider state
+    """Persist Nous OAuth credentials as the singleton provider state
     and ensure the credential pool is in sync.
 
     Nous credentials are read at runtime from two independent locations:
@@ -5178,7 +5073,7 @@ def persist_nous_credentials(
     - ``credential_pool.nous``: used by the runtime ``pool.select()`` path.
 
     Historically ``hermes auth add nous`` wrote a ``manual:device_code`` pool
-    entry only, skipping ``providers.nous``.  When the 24h agent_key TTL
+    entry only, skipping ``providers.nous``. When the runtime credential
     expired, the recovery path read the empty singleton state and raised
     ``AuthError`` silently (``logger.debug`` at INFO level).
 
@@ -5233,24 +5128,20 @@ def _sync_nous_pool_from_auth_store() -> None:
 
 def resolve_nous_runtime_credentials(
     *,
-    min_key_ttl_seconds: int = DEFAULT_AGENT_KEY_MIN_TTL_SECONDS,
     timeout_seconds: float = 15.0,
     insecure: Optional[bool] = None,
     ca_bundle: Optional[str] = None,
-    inference_auth_mode: str = NOUS_INFERENCE_AUTH_MODE_AUTO,
+    force_refresh: bool = False,
 ) -> Dict[str, Any]:
     """
     Resolve Nous inference credentials for runtime use.
 
-    Ensures access_token is valid (refreshes if needed) and a short-lived
-    inference key is present with minimum TTL (mints/reuses as needed).
-    Concurrent processes coordinate through the auth store file lock.
+    Ensures access_token is a valid inference-scoped JWT, refreshing it when
+    needed. Concurrent processes coordinate through the auth store file lock.
 
     Returns dict with: provider, base_url, api_key, key_id, expires_at,
-    expires_in, source ("invoke_jwt", "cache", or "portal"), and auth_path.
+    expires_in, source ("invoke_jwt"), and auth_path.
     """
-    inference_auth_mode = _normalize_nous_inference_auth_mode(inference_auth_mode)
-    min_key_ttl_seconds = max(60, int(min_key_ttl_seconds))
     sequence_id = uuid.uuid4().hex[:12]
 
     with _auth_store_lock():
@@ -5322,8 +5213,6 @@ def resolve_nous_runtime_credentials(
         _oauth_trace(
             "nous_runtime_credentials_start",
             sequence_id=sequence_id,
-            inference_auth_mode=inference_auth_mode,
-            min_key_ttl_seconds=min_key_ttl_seconds,
             refresh_token_fp=_token_fingerprint(state.get("refresh_token")),
         )
 
@@ -5335,43 +5224,40 @@ def resolve_nous_runtime_credentials(
                 raise AuthError("No access token found for Nous Portal login.",
                                 provider="nous", relogin_required=True)
 
-            # Step 1: refresh access token if expiring. If the access token
-            # is already a valid invoke JWT, trust its own exp claim even when
-            # older auth.json metadata has a stale/missing expires_at.
-            current_invoke_jwt_usable = (
-                not _nous_legacy_session_keys_forced()
-                and _nous_invoke_jwt_is_usable(
-                    access_token,
-                    scope=state.get("scope"),
-                    expires_at=state.get("expires_at"),
-                )
+            invoke_jwt_status = _nous_invoke_jwt_status(
+                access_token,
+                scope=state.get("scope"),
+                expires_at=state.get("expires_at"),
             )
-            if (
-                _is_expiring(state.get("expires_at"), ACCESS_TOKEN_REFRESH_SKEW_SECONDS)
-                and not current_invoke_jwt_usable
-            ):
+            if force_refresh or invoke_jwt_status is not None:
                 with _nous_shared_store_lock(timeout_seconds=max(timeout_seconds + 5.0, AUTH_LOCK_TIMEOUT_SECONDS)):
                     if _merge_shared_nous_oauth_state(state):
                         access_token = state.get("access_token")
                         refresh_token = state.get("refresh_token")
-                        _persist_state("post_shared_merge_access_expiring")
-
-                    if (
-                        _is_expiring(state.get("expires_at"), ACCESS_TOKEN_REFRESH_SKEW_SECONDS)
-                        and not _nous_invoke_jwt_is_usable(
+                        invoke_jwt_status = _nous_invoke_jwt_status(
                             access_token,
                             scope=state.get("scope"),
                             expires_at=state.get("expires_at"),
                         )
-                    ):
-                        if not isinstance(refresh_token, str) or not refresh_token:
-                            raise AuthError("Session expired and no refresh token is available.",
-                                            provider="nous", relogin_required=True)
+                        _persist_state("post_shared_merge_access_unusable")
 
+                    if force_refresh or invoke_jwt_status is not None:
+                        if not isinstance(refresh_token, str) or not refresh_token:
+                            reason = invoke_jwt_status or "force_refresh"
+                            raise AuthError(
+                                "Nous Portal access token is not a usable inference JWT "
+                                f"({reason}) and no refresh token is available. "
+                                "Re-authenticate with: hermes auth add nous",
+                                provider="nous",
+                                code=reason,
+                                relogin_required=True,
+                            )
+
+                        refresh_reason = "force_refresh" if force_refresh else (invoke_jwt_status or "access_unusable")
                         _oauth_trace(
                             "refresh_start",
                             sequence_id=sequence_id,
-                            reason="access_expiring",
+                            reason=refresh_reason,
                             refresh_token_fp=_token_fingerprint(refresh_token),
                         )
                         try:
@@ -5413,166 +5299,24 @@ def resolve_nous_runtime_credentials(
                         _oauth_trace(
                             "refresh_success",
                             sequence_id=sequence_id,
-                            reason="access_expiring",
+                            reason=refresh_reason,
                             previous_refresh_token_fp=_token_fingerprint(previous_refresh_token),
                             new_refresh_token_fp=_token_fingerprint(refresh_token),
                         )
-                        # Persist immediately so downstream mint failures cannot drop rotated refresh tokens.
-                        _persist_state("post_refresh_access_expiring")
+                        # Persist immediately so validation failures cannot drop rotated refresh tokens.
+                        _persist_state("post_refresh_access_token")
 
-            # Step 2: resolve the compatibility ``agent_key`` field. Preferred
-            # path stores the NAS invoke JWT there; legacy path mints/reuses
-            # the opaque session key.
-            used_cached_key = False
-            mint_payload: Optional[Dict[str, Any]] = None
-            selected_auth_path, fallback_reason = _choose_nous_inference_auth_path(
+            _assert_nous_inference_jwt_usable(
                 state,
                 access_token=access_token,
-                min_key_ttl_seconds=min_key_ttl_seconds,
-                inference_auth_mode=inference_auth_mode,
+            )
+            _select_nous_invoke_jwt(
+                state,
+                access_token=access_token,
+                sequence_id=sequence_id,
             )
 
-            if selected_auth_path == NOUS_AUTH_PATH_INVOKE_JWT:
-                _select_nous_invoke_jwt(
-                    state,
-                    access_token=access_token,
-                    sequence_id=sequence_id,
-                )
-            elif selected_auth_path == NOUS_AUTH_PATH_LEGACY_SESSION_KEY_CACHE:
-                used_cached_key = True
-                logger.info("Nous inference auth: using cached agent_key")
-                _oauth_trace("agent_key_reuse", sequence_id=sequence_id)
-            else:
-                _log_nous_legacy_session_key_selected(
-                    fallback_reason or "legacy_session_key_required",
-                    access_token=access_token,
-                    sequence_id=sequence_id,
-                )
-                try:
-                    _oauth_trace(
-                        "mint_start",
-                        sequence_id=sequence_id,
-                        access_token_fp=_token_fingerprint(access_token),
-                    )
-                    mint_payload = _mint_agent_key(
-                        client=client, portal_base_url=portal_base_url,
-                        access_token=access_token, min_ttl_seconds=min_key_ttl_seconds,
-                    )
-                except AuthError as exc:
-                    _oauth_trace(
-                        "mint_error",
-                        sequence_id=sequence_id,
-                        code=exc.code,
-                    )
-                    # Retry path: access token may be stale server-side despite local checks
-                    latest_refresh_token = state.get("refresh_token")
-                    if (
-                        exc.code in {"invalid_token", "invalid_grant"}
-                        and isinstance(latest_refresh_token, str)
-                        and latest_refresh_token
-                    ):
-                        with _nous_shared_store_lock(timeout_seconds=max(timeout_seconds + 5.0, AUTH_LOCK_TIMEOUT_SECONDS)):
-                            if _merge_shared_nous_oauth_state(state):
-                                access_token = state.get("access_token")
-                                latest_refresh_token = state.get("refresh_token")
-                                _persist_state("post_shared_merge_mint_retry")
-                            else:
-                                _oauth_trace(
-                                    "refresh_start",
-                                    sequence_id=sequence_id,
-                                    reason="mint_retry_after_invalid_token",
-                                    refresh_token_fp=_token_fingerprint(latest_refresh_token),
-                                )
-                                try:
-                                    refreshed = _refresh_access_token(
-                                        client=client, portal_base_url=portal_base_url,
-                                        client_id=client_id, refresh_token=latest_refresh_token,
-                                    )
-                                except AuthError as exc:
-                                    if _is_terminal_nous_refresh_error(exc):
-                                        _quarantine_nous_oauth_state(
-                                            state,
-                                            exc,
-                                            reason="runtime_mint_retry_refresh_failure",
-                                        )
-                                        _quarantine_nous_pool_entries(
-                                            auth_store,
-                                            exc,
-                                            reason="runtime_mint_retry_refresh_failure",
-                                        )
-                                        _persist_state("terminal_runtime_mint_retry_refresh_failure")
-                                    raise
-                                now = datetime.now(timezone.utc)
-                                access_ttl = _coerce_ttl_seconds(refreshed.get("expires_in"))
-                                state["access_token"] = refreshed["access_token"]
-                                state["refresh_token"] = refreshed.get("refresh_token") or latest_refresh_token
-                                state["token_type"] = refreshed.get("token_type") or state.get("token_type") or "Bearer"
-                                state["scope"] = refreshed.get("scope") or state.get("scope")
-                                refreshed_url = _validate_nous_inference_url_from_network(refreshed.get("inference_base_url"))
-                                if refreshed_url:
-                                    inference_base_url = refreshed_url
-                                state["obtained_at"] = now.isoformat()
-                                state["expires_in"] = access_ttl
-                                state["expires_at"] = datetime.fromtimestamp(
-                                    now.timestamp() + access_ttl, tz=timezone.utc
-                                ).isoformat()
-                                access_token = state["access_token"]
-                                refresh_token = state["refresh_token"]
-                                _oauth_trace(
-                                    "refresh_success",
-                                    sequence_id=sequence_id,
-                                    reason="mint_retry_after_invalid_token",
-                                    previous_refresh_token_fp=_token_fingerprint(latest_refresh_token),
-                                    new_refresh_token_fp=_token_fingerprint(refresh_token),
-                                )
-                                # Persist retry refresh immediately for crash safety and cross-process visibility.
-                                _persist_state("post_refresh_mint_retry")
-
-                        retry_inference_auth_mode = (
-                            NOUS_INFERENCE_AUTH_MODE_LEGACY
-                            if inference_auth_mode == NOUS_INFERENCE_AUTH_MODE_LEGACY
-                            else NOUS_INFERENCE_AUTH_MODE_FRESH
-                        )
-                        retry_auth_path, _ = _choose_nous_inference_auth_path(
-                            state,
-                            access_token=access_token,
-                            min_key_ttl_seconds=min_key_ttl_seconds,
-                            inference_auth_mode=retry_inference_auth_mode,
-                        )
-                        if retry_auth_path == NOUS_AUTH_PATH_INVOKE_JWT:
-                            mint_payload = None
-                            selected_auth_path = NOUS_AUTH_PATH_INVOKE_JWT
-                            _select_nous_invoke_jwt(
-                                state,
-                                access_token=access_token,
-                                sequence_id=sequence_id,
-                            )
-                        else:
-                            mint_payload = _mint_agent_key(
-                                client=client, portal_base_url=portal_base_url,
-                                access_token=access_token, min_ttl_seconds=min_key_ttl_seconds,
-                            )
-                    else:
-                        raise
-
-            if mint_payload is not None:
-                now = datetime.now(timezone.utc)
-                state["agent_key"] = mint_payload.get("api_key")
-                state["agent_key_id"] = mint_payload.get("key_id")
-                state["agent_key_expires_at"] = mint_payload.get("expires_at")
-                state["agent_key_expires_in"] = mint_payload.get("expires_in")
-                state["agent_key_reused"] = bool(mint_payload.get("reused", False))
-                state["agent_key_obtained_at"] = now.isoformat()
-                minted_url = _validate_nous_inference_url_from_network(mint_payload.get("inference_base_url"))
-                if minted_url:
-                    inference_base_url = minted_url
-                _oauth_trace(
-                    "mint_success",
-                    sequence_id=sequence_id,
-                    reused=bool(mint_payload.get("reused", False)),
-                )
-
-            # Persist routing and TLS metadata for non-interactive refresh/mint
+            # Persist routing and TLS metadata for non-interactive refresh.
             state["portal_base_url"] = portal_base_url
             state["inference_base_url"] = inference_base_url
             state["client_id"] = client_id
@@ -5606,12 +5350,8 @@ def resolve_nous_runtime_credentials(
         "key_id": state.get("agent_key_id"),
         "expires_at": expires_at,
         "expires_in": expires_in,
-        "source": (
-            NOUS_AUTH_PATH_INVOKE_JWT
-            if selected_auth_path == NOUS_AUTH_PATH_INVOKE_JWT
-            else ("cache" if used_cached_key else "portal")
-        ),
-        "auth_path": selected_auth_path,
+        "source": NOUS_AUTH_PATH_INVOKE_JWT,
+        "auth_path": NOUS_AUTH_PATH_INVOKE_JWT,
     }
 
 
@@ -5627,6 +5367,8 @@ def _empty_nous_auth_status() -> Dict[str, Any]:
         "access_expires_at": None,
         "agent_key_expires_at": None,
         "has_refresh_token": False,
+        "inference_credential_present": False,
+        "credential_source": None,
     }
 
 
@@ -5634,8 +5376,7 @@ def _snapshot_nous_pool_status() -> Dict[str, Any]:
     """Best-effort status from the credential pool.
 
     This is a fallback only. The auth-store provider state is the runtime source
-    of truth because it is what ``resolve_nous_runtime_credentials()`` refreshes
-    and mints against.
+    of truth because it is what ``resolve_nous_runtime_credentials()`` refreshes.
     """
     try:
         from agent.credential_pool import load_pool
@@ -5655,24 +5396,36 @@ def _snapshot_nous_pool_status() -> Dict[str, Any]:
             return (agent_exp, access_exp, -priority)
 
         entry = max(entries, key=_entry_sort_key)
-        access_token = (
-            getattr(entry, "access_token", None)
-            or getattr(entry, "runtime_api_key", "")
-        )
-        if not access_token:
+        runtime_key = getattr(entry, "runtime_api_key", None)
+        if not runtime_key:
             return _empty_nous_auth_status()
+        access_token = getattr(entry, "access_token", None)
+        auth_type = str(getattr(entry, "auth_type", "") or "").strip().lower()
+        refresh_token = getattr(entry, "refresh_token", None)
+        is_portal_oauth = bool(access_token) and (
+            auth_type.startswith("oauth") or bool(refresh_token)
+        )
+        label = getattr(entry, "label", "unknown")
+        portal_status_url = None
+        if is_portal_oauth:
+            portal_status_url = (
+                getattr(entry, "portal_base_url", None)
+                or DEFAULT_NOUS_PORTAL_URL
+            )
 
         return {
-            "logged_in": True,
-            "portal_base_url": getattr(entry, "portal_base_url", None)
-            or getattr(entry, "base_url", None),
+            "logged_in": is_portal_oauth,
+            "portal_base_url": portal_status_url,
             "inference_base_url": getattr(entry, "inference_base_url", None)
+            or getattr(entry, "runtime_base_url", None)
             or getattr(entry, "base_url", None),
-            "access_token": access_token,
+            "access_token": access_token if is_portal_oauth else None,
             "access_expires_at": getattr(entry, "expires_at", None),
             "agent_key_expires_at": getattr(entry, "agent_key_expires_at", None),
-            "has_refresh_token": bool(getattr(entry, "refresh_token", None)),
-            "source": f"pool:{getattr(entry, 'label', 'unknown')}",
+            "has_refresh_token": bool(refresh_token),
+            "inference_credential_present": True,
+            "credential_source": f"pool:{label}",
+            "source": f"pool:{label}",
         }
     except Exception:
         return _empty_nous_auth_status()
@@ -5715,7 +5468,7 @@ def get_nous_auth_status() -> Dict[str, Any]:
     """Status snapshot for Nous auth.
 
     Prefer the auth-store provider state, because that is the live source of
-    truth for refresh + mint operations. When provider state exists, validate it
+    truth for refresh operations. When provider state exists, validate it
     by resolving runtime credentials so revoked refresh sessions do not show up
     as a healthy login. If provider state is absent, fall back to the credential
     pool for the just-logged-in / not-yet-promoted case.
@@ -5755,10 +5508,14 @@ def _compute_nous_auth_status() -> Dict[str, Any]:
             "agent_key_expires_at": state.get("agent_key_expires_at"),
             "has_refresh_token": bool(state.get("refresh_token")),
             "access_token": state.get("access_token"),
+            "inference_credential_present": bool(
+                state.get("access_token") or state.get("agent_key")
+            ),
+            "credential_source": "auth_store",
             "source": "auth_store",
         }
         try:
-            creds = resolve_nous_runtime_credentials(min_key_ttl_seconds=60)
+            creds = resolve_nous_runtime_credentials()
             refreshed_state = get_provider_auth_state("nous") or state
             base_status.update(
                 {
@@ -5772,6 +5529,8 @@ def _compute_nous_auth_status() -> Dict[str, Any]:
                     or refreshed_state.get("agent_key_expires_at")
                     or base_status.get("agent_key_expires_at"),
                     "has_refresh_token": bool(refreshed_state.get("refresh_token")),
+                    "inference_credential_present": True,
+                    "credential_source": "auth_store",
                     "source": f"runtime:{creds.get('source', 'portal')}",
                     "key_id": creds.get("key_id"),
                 }
@@ -6283,6 +6042,7 @@ def _prompt_model_selection(
     pricing: Optional[Dict[str, Dict[str, str]]] = None,
     unavailable_models: Optional[List[str]] = None,
     portal_url: str = "",
+    unavailable_message: str = "",
 ) -> Optional[str]:
     """Interactive model selection. Puts current_model first with a marker. Returns chosen model ID or None.
 
@@ -6374,18 +6134,22 @@ def _prompt_model_selection(
         choices.append("  Enter custom model name")
         choices.append("  Skip (keep current)")
 
+        _upgrade_url = (portal_url or DEFAULT_NOUS_PORTAL_URL).rstrip("/")
+        unavailable_footer = unavailable_message.strip()
+        if not unavailable_footer and _unavailable:
+            unavailable_footer = f"Upgrade at {_upgrade_url} for paid models"
+
         # Print the unavailable block BEFORE the menu via regular print().
         # simple_term_menu pads title lines to terminal width (causes wrapping),
         # so we keep the title minimal and use stdout for the static block.
         # clear_screen=False means our printed output stays visible above.
-        _upgrade_url = (portal_url or DEFAULT_NOUS_PORTAL_URL).rstrip("/")
         if _unavailable:
             print(menu_title)
             print()
             for mid in _unavailable:
                 print(f"{_DIM}     {_label(mid)}{_RESET}")
             print()
-            print(f"{_DIM}  ── Upgrade at {_upgrade_url} for paid models ──{_RESET}")
+            print(f"{_DIM}  ── {unavailable_footer} ──{_RESET}")
             print()
             effective_title = "Available free models:"
         else:
@@ -6427,8 +6191,11 @@ def _prompt_model_selection(
 
     if _unavailable:
         _upgrade_url = (portal_url or DEFAULT_NOUS_PORTAL_URL).rstrip("/")
+        unavailable_footer = unavailable_message.strip() or (
+            f"Unavailable models (requires paid tier — upgrade at {_upgrade_url})"
+        )
         print()
-        print(f"  {_DIM}── Unavailable models (requires paid tier — upgrade at {_upgrade_url}) ──{_RESET}")
+        print(f"  {_DIM}── {unavailable_footer} ──{_RESET}")
         for mid in _unavailable:
             print(f"  {'':>{num_width}}  {_DIM}{_label(mid)}{_RESET}")
     print()
@@ -6777,6 +6544,12 @@ def _xai_oauth_loopback_login(
     remote VM).  The same PKCE verifier, ``state``, and ``nonce`` are
     used for both paths so the upstream-side OAuth flow is identical.
     """
+    def _stdin_supports_manual_paste() -> bool:
+        try:
+            return bool(getattr(sys.stdin, "isatty", lambda: False)())
+        except Exception:
+            return False
+
     discovery = _xai_oauth_discovery(timeout_seconds)
     authorization_endpoint = discovery["authorization_endpoint"]
     token_endpoint = discovery["token_endpoint"]
@@ -6830,7 +6603,7 @@ def _xai_oauth_loopback_login(
 
             _print_loopback_ssh_hint(redirect_uri, docs_url=XAI_OAUTH_DOCS_URL)
 
-            if open_browser and not _is_remote_session():
+            if open_browser and not _is_remote_session() and _can_open_graphical_browser():
                 try:
                     opened = webbrowser.open(authorize_url)
                 except Exception:
@@ -6840,12 +6613,28 @@ def _xai_oauth_loopback_login(
                 else:
                     print("Could not open the browser automatically; use the URL above.")
 
-            callback = _xai_wait_for_callback(
-                server,
-                thread,
-                callback_result,
-                timeout_seconds=max(30.0, timeout_seconds * 9),
-            )
+            try:
+                callback = _xai_wait_for_callback(
+                    server,
+                    thread,
+                    callback_result,
+                    timeout_seconds=max(30.0, timeout_seconds * 9),
+                )
+            except AuthError as exc:
+                if (
+                    getattr(exc, "code", "") != "xai_callback_timeout"
+                    or not _stdin_supports_manual_paste()
+                ):
+                    raise
+                print()
+                print("xAI loopback callback timed out.")
+                print("If your browser reached a failed 127.0.0.1 callback page,")
+                print("paste that FULL callback URL below to continue this login.")
+                print("You can also re-run with `--manual-paste` to skip the")
+                print("loopback listener from the start.")
+                callback = _prompt_manual_callback_paste(redirect_uri)
+                if callback.get("code") is None and callback.get("error") is None:
+                    raise exc
         except Exception:
             try:
                 server.shutdown()
@@ -6865,7 +6654,21 @@ def _xai_oauth_loopback_login(
             provider="xai-oauth",
             code="xai_authorization_failed",
         )
-    if callback.get("state") != state:
+    callback_state = callback.get("state")
+    # Manual-paste bare-code path: when a user pastes only the opaque
+    # authorization code (no ``code=``/``state=`` query parameters),
+    # ``_parse_pasted_callback`` returns ``state=None``.  xAI's consent
+    # page renders the code in-page rather than redirecting through the
+    # 127.0.0.1 callback, so on many remote setups (Cloud Shell, headless
+    # VPS, container consoles) the bare code is the only thing the user
+    # can obtain.  PKCE (code_verifier) still binds the exchange to this
+    # client, so the local state-equality check is redundant on the
+    # bare-code path — we substitute the locally generated state to keep
+    # the rest of the validation chain (and the token exchange) unchanged.
+    # See #26923 (AccursedGalaxy comment, 2026-05-20).
+    if callback_state is None and manual_paste:
+        callback_state = state
+    if callback_state != state:
         raise AuthError(
             "xAI authorization failed: state mismatch.",
             provider="xai-oauth",
@@ -7242,7 +7045,7 @@ def _minimax_oauth_login(
         print("To continue:")
         print(f"  1. Open: {verification_url}")
         print(f"  2. If prompted, enter code: {user_code}")
-        if open_browser:
+        if open_browser and _can_open_graphical_browser():
             if webbrowser.open(verification_url):
                 print("  (Opened browser for verification)")
             else:
@@ -7510,7 +7313,6 @@ def _nous_device_code_login(
     timeout_seconds: float = 15.0,
     insecure: bool = False,
     ca_bundle: Optional[str] = None,
-    min_key_ttl_seconds: int = 5 * 60,
 ) -> Dict[str, Any]:
     """Run the Nous device-code flow and return full OAuth state without persisting."""
     pconfig = PROVIDER_REGISTRY["nous"]
@@ -7526,10 +7328,7 @@ def _nous_device_code_login(
         or pconfig.inference_base_url
     ).rstrip("/")
     client_id = client_id or pconfig.client_id
-    scope, explicit_scope = _nous_device_scope_with_env_override(
-        scope,
-        default_scope=pconfig.scope,
-    )
+    scope = scope or pconfig.scope
     timeout = httpx.Timeout(timeout_seconds)
     verify: bool | str = False if insecure else (ca_bundle if ca_bundle else True)
 
@@ -7544,12 +7343,11 @@ def _nous_device_code_login(
         print(f"TLS verification: custom CA bundle ({ca_bundle})")
 
     with httpx.Client(timeout=timeout, headers={"Accept": "application/json"}, verify=verify) as client:
-        device_data, scope = _request_nous_device_code_with_scope_fallback(
+        device_data = _request_device_code(
             client=client,
             portal_base_url=portal_base_url,
             client_id=client_id,
             scope=scope,
-            allow_legacy_fallback=not explicit_scope,
         )
 
         verification_url = str(device_data["verification_uri_complete"])
@@ -7616,18 +7414,17 @@ def _nous_device_code_login(
     try:
         return refresh_nous_oauth_from_state(
             auth_state,
-            min_key_ttl_seconds=min_key_ttl_seconds,
             timeout_seconds=timeout_seconds,
             force_refresh=False,
-            inference_auth_mode=NOUS_INFERENCE_AUTH_MODE_FRESH,
         )
     except AuthError as exc:
         if exc.code == "subscription_required":
             portal_url = auth_state.get(
                 "portal_base_url", DEFAULT_NOUS_PORTAL_URL
             ).rstrip("/")
+            message = format_auth_error(exc)
             print()
-            print("Your Nous Portal account does not have an active subscription.")
+            print(message)
             print(f"  Subscribe here: {portal_url}/billing")
             print()
             print("After subscribing, run `hermes model` again to finish setup.")
@@ -7670,7 +7467,6 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
                 print("Rehydrating Nous session from shared credentials...")
                 auth_state = _try_import_shared_nous_state(
                     timeout_seconds=timeout_seconds,
-                    min_key_ttl_seconds=5 * 60,
                 )
                 if auth_state is None:
                     print("Could not refresh shared credentials — falling back to device-code login.")
@@ -7685,7 +7481,6 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
                 timeout_seconds=timeout_seconds,
                 insecure=insecure,
                 ca_bundle=ca_bundle,
-                min_key_ttl_seconds=5 * 60,
             )
 
         inference_base_url = auth_state["inference_base_url"]
@@ -7737,11 +7532,30 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
 
             print()
             unavailable_models: list = []
+            unavailable_message = ""
             if model_ids:
                 pricing = get_pricing_for_provider("nous")
-                free_tier = check_nous_free_tier()
+                # Force fresh account data for model selection so recent credit
+                # purchases are reflected immediately.
+                free_tier = check_nous_free_tier(force_fresh=True)
                 _portal_for_recs = auth_state.get("portal_base_url", "")
                 if free_tier:
+                    try:
+                        from hermes_cli.nous_account import (
+                            format_nous_portal_entitlement_message,
+                            get_nous_portal_account_info,
+                        )
+
+                        _account_info = get_nous_portal_account_info(force_fresh=True)
+                        unavailable_message = (
+                            format_nous_portal_entitlement_message(
+                                _account_info,
+                                capability="paid Nous models",
+                            )
+                            or ""
+                        )
+                    except Exception:
+                        unavailable_message = ""
                     # The Portal's freeRecommendedModels endpoint is the
                     # source of truth for what's free *right now*. Augment
                     # the curated list with anything new the Portal flags
@@ -7768,11 +7582,12 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
                     model_ids, pricing=pricing,
                     unavailable_models=unavailable_models,
                     portal_url=_portal,
+                    unavailable_message=unavailable_message,
                 )
             elif unavailable_models:
                 _url = (_portal or DEFAULT_NOUS_PORTAL_URL).rstrip("/")
                 print("No free models currently available.")
-                print(f"Upgrade at {_url} to access paid models.")
+                print(unavailable_message or f"Upgrade at {_url} to access paid models.")
             else:
                 print("No curated models available for Nous Portal.")
         except Exception as exc:
diff --git a/hermes_cli/auth_commands.py b/hermes_cli/auth_commands.py
index 7a2f24b8d10..bb791e705ef 100644
--- a/hermes_cli/auth_commands.py
+++ b/hermes_cli/auth_commands.py
@@ -272,9 +272,6 @@ def auth_add_command(args) -> None:
                 print("Rehydrating Nous session from shared credentials...")
                 rehydrated = auth_mod._try_import_shared_nous_state(
                     timeout_seconds=getattr(args, "timeout", None) or 15.0,
-                    min_key_ttl_seconds=max(
-                        60, int(getattr(args, "min_key_ttl_seconds", 5 * 60))
-                    ),
                 )
                 if rehydrated is not None:
                     custom_label = (getattr(args, "label", None) or "").strip() or None
@@ -297,7 +294,6 @@ def auth_add_command(args) -> None:
             timeout_seconds=getattr(args, "timeout", None) or 15.0,
             insecure=bool(getattr(args, "insecure", False)),
             ca_bundle=getattr(args, "ca_bundle", None),
-            min_key_ttl_seconds=max(60, int(getattr(args, "min_key_ttl_seconds", 5 * 60))),
         )
         # Honor `--label <name>` so nous matches other providers' UX.  The
         # helper embeds this into providers.nous so that label_from_token
diff --git a/hermes_cli/backup.py b/hermes_cli/backup.py
index ffdf4f94e1b..0c6bf8692fc 100644
--- a/hermes_cli/backup.py
+++ b/hermes_cli/backup.py
@@ -512,6 +512,7 @@ def _quick_snapshot_root(hermes_home: Optional[Path] = None) -> Path:
 def create_quick_snapshot(
     label: Optional[str] = None,
     hermes_home: Optional[Path] = None,
+    keep: Optional[int] = None,
 ) -> Optional[str]:
     """Create a quick state snapshot of critical files.
 
@@ -585,8 +586,10 @@ def create_quick_snapshot(
     with open(snap_dir / "manifest.json", "w", encoding="utf-8") as f:
         json.dump(meta, f, indent=2)
 
-    # Auto-prune
-    _prune_quick_snapshots(root, keep=_QUICK_DEFAULT_KEEP)
+    # Auto-prune. Defaults preserve historical manual /snapshot behavior; callers
+    # with known high-churn safety snapshots (for example pre-update) can pass a
+    # smaller keep value so large state.db copies do not accumulate indefinitely.
+    _prune_quick_snapshots(root, keep=_QUICK_DEFAULT_KEEP if keep is None else keep)
 
     logger.info("State snapshot created: %s (%d files)", snap_id, len(manifest))
     return snap_id
@@ -667,6 +670,105 @@ def restore_quick_snapshot(
     return restored > 0
 
 
+# Relative path of the cron job database inside HERMES_HOME. Kept in sync with
+# the entry in ``_QUICK_STATE_FILES`` and with ``cron/jobs.py``'s ``JOBS_FILE``.
+_CRON_JOBS_REL = "cron/jobs.json"
+
+
+def _count_cron_jobs(path: Path) -> Optional[int]:
+    """Return the number of cron jobs stored in ``path``.
+
+    The canonical on-disk shape is ``{"jobs": [...]}`` (see ``cron/jobs.py``).
+    A legacy bare-list shape (``[...]``) is also honoured.
+
+    Returns:
+        The job count for any *valid, readable* JSON document, or ``None`` if
+        the file is missing or cannot be parsed. ``None`` means "unknown" —
+        callers must not treat it as "zero jobs", because acting on an
+        unreadable file could mask a real corruption the user needs to see.
+    """
+    if not path.is_file():
+        return None
+    try:
+        with open(path, "r", encoding="utf-8") as f:
+            data = json.load(f)
+    except (OSError, json.JSONDecodeError):
+        return None
+    if isinstance(data, dict):
+        jobs = data.get("jobs", [])
+        return len(jobs) if isinstance(jobs, list) else None
+    if isinstance(data, list):
+        return len(data)
+    return None
+
+
+def restore_cron_jobs_if_emptied(
+    snapshot_id: str,
+    hermes_home: Optional[Path] = None,
+) -> Optional[Dict[str, Any]]:
+    """Safety net for silent cron-job loss across ``hermes update``.
+
+    Config-version migrations have been observed to leave ``cron/jobs.json``
+    valid-but-empty after an update, silently dropping every scheduled job
+    (issue #34600). The existing malformed-shape guards in ``cron/jobs.py``
+    don't catch this case because ``{"jobs": []}`` is perfectly valid JSON.
+
+    This compares the *current* job count against the pre-update snapshot. If
+    the live file now has **zero** jobs while the snapshot captured **one or
+    more**, the snapshot copy of ``cron/jobs.json`` is restored in place.
+
+    The check is deliberately conservative — it only ever restores when there
+    is unambiguous evidence of loss (snapshot had jobs, live file has none),
+    so a user who genuinely deleted all their jobs during/after the update is
+    never second-guessed, and an unreadable live file (count ``None``) is left
+    untouched so real corruption still surfaces.
+
+    Args:
+        snapshot_id: The pre-update quick-snapshot id (from
+            :func:`create_quick_snapshot`).
+        hermes_home: Override for the Hermes home directory (tests).
+
+    Returns:
+        ``None`` when no action was taken (the common, healthy path). On a
+        successful restore, a dict ``{"restored": True, "job_count": N,
+        "snapshot_id": ...}`` so the caller can warn the user.
+    """
+    if not snapshot_id:
+        return None
+
+    home = hermes_home or get_hermes_home()
+    live_path = home / _CRON_JOBS_REL
+
+    live_count = _count_cron_jobs(live_path)
+    # Only act when the live file is readable AND empty. ``None`` (missing or
+    # unparseable) is intentionally left alone — that's a different failure
+    # mode the user should see rather than have papered over.
+    if live_count is None or live_count > 0:
+        return None
+
+    snap_path = _quick_snapshot_root(home) / snapshot_id / _CRON_JOBS_REL
+    snap_count = _count_cron_jobs(snap_path)
+    if not snap_count:  # None or 0 — nothing worth restoring
+        return None
+
+    try:
+        live_path.parent.mkdir(parents=True, exist_ok=True)
+        shutil.copy2(snap_path, live_path)
+    except (OSError, PermissionError) as exc:
+        logger.error(
+            "Cron jobs were emptied during update but auto-restore failed: %s", exc
+        )
+        return None
+
+    logger.warning(
+        "Restored %d cron job(s) from pre-update snapshot %s "
+        "(cron/jobs.json was emptied during migration)",
+        snap_count,
+        snapshot_id,
+    )
+    return {"restored": True, "job_count": snap_count, "snapshot_id": snapshot_id}
+
+
 def _prune_quick_snapshots(root: Path, keep: int = _QUICK_DEFAULT_KEEP) -> int:
     """Remove oldest quick snapshots beyond the keep limit. Returns count deleted."""
     if not root.exists():
diff --git a/hermes_cli/banner.py b/hermes_cli/banner.py
index ef592beb7fd..c91b2f728c2 100644
--- a/hermes_cli/banner.py
+++ b/hermes_cli/banner.py
@@ -50,17 +50,6 @@ def _skin_color(key: str, fallback: str) -> str:
         return get_active_skin().get_color(key, fallback)
     except Exception:
         return fallback
-
-
-def _skin_branding(key: str, fallback: str) -> str:
-    """Get a branding string from the active skin, or return fallback."""
-    try:
-        from hermes_cli.skin_engine import get_active_skin
-        return get_active_skin().get_branding(key, fallback)
-    except Exception:
-        return fallback
-
-
 # =========================================================================
 # ASCII Art & Branding
 # =========================================================================
@@ -232,7 +221,11 @@ def check_for_updates() -> Optional[int]:
     cache_file = hermes_home / ".update_check"
     embedded_rev = os.environ.get("HERMES_REVISION") or None
 
-    # Read cache — invalidate if the embedded rev has changed since last check
+    # Read cache — invalidate if the embedded rev OR installed version has
+    # changed since the last check. The version guard matters for pip installs:
+    # `check_via_pypi()` compares against VERSION, so a `pip install --upgrade`
+    # changes VERSION but leaves rev unchanged (both None), and without this
+    # the stale "behind" count would survive the upgrade for up to 6h. See #34491.
     now = time.time()
     try:
         if cache_file.exists():
@@ -240,6 +233,7 @@ def check_for_updates() -> Optional[int]:
             if (
                 now - cached.get("ts", 0) < _UPDATE_CHECK_CACHE_SECONDS
                 and cached.get("rev") == embedded_rev
+                and cached.get("ver") == VERSION
             ):
                 return cached.get("behind")
     except Exception:
@@ -260,7 +254,9 @@ def check_for_updates() -> Optional[int]:
             behind = _check_via_local_git(repo_dir)
 
     try:
-        cache_file.write_text(json.dumps({"ts": now, "behind": behind, "rev": embedded_rev}))
+        cache_file.write_text(
+            json.dumps({"ts": now, "behind": behind, "rev": embedded_rev, "ver": VERSION})
+        )
     except Exception:
         pass
 
@@ -300,14 +296,42 @@ def _git_short_hash(repo_dir: Path, rev: str) -> Optional[str]:
 
 
 def get_git_banner_state(repo_dir: Optional[Path] = None) -> Optional[dict]:
-    """Return upstream/local git hashes for the startup banner."""
+    """Return upstream/local git hashes for the startup banner.
+
+    For source installs and dev images this runs ``git rev-parse`` against
+    the active checkout.  When no checkout is available — the canonical case
+    is the published Docker image, which excludes ``.git`` from the build
+    context — we fall back to the baked-in build SHA (see
+    ``hermes_cli/build_info.py``) and return it as a frozen
+    ``upstream == local`` state with ``ahead=0``.  A built image is by
+    definition pinned to one commit, so "ahead" is always zero and the
+    banner correctly shows ``· upstream <sha>`` with no carried-commits
+    annotation.
+    """
     repo_dir = repo_dir or _resolve_repo_dir()
     if repo_dir is None:
+        # No git checkout — try the baked build SHA (Docker image path).
+        try:
+            from hermes_cli.build_info import get_build_sha
+            baked = get_build_sha(short=8)
+            if baked:
+                return {"upstream": baked, "local": baked, "ahead": 0}
+        except Exception:
+            pass
         return None
 
     upstream = _git_short_hash(repo_dir, "origin/main")
     local = _git_short_hash(repo_dir, "HEAD")
     if not upstream or not local:
+        # Live-git lookup failed (e.g. shallow clone without origin/main).
+        # Fall back to the baked build SHA if available.
+        try:
+            from hermes_cli.build_info import get_build_sha
+            baked = get_build_sha(short=8)
+            if baked:
+                return {"upstream": baked, "local": baked, "ahead": 0}
+        except Exception:
+            pass
         return None
 
     ahead = 0
@@ -674,6 +698,21 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
     except Exception:
         pass  # Never break the banner over an update check
 
+    # Pip-install warning — `pip install hermes-agent` is not the supported
+    # install path (it exists on PyPI for internal/CI reasons, not end users).
+    # Such installs miss the git checkout + installer-managed deps, so updates,
+    # self-update, and issue triage don't behave correctly. Warn, don't block.
+    try:
+        from hermes_cli.config import detect_install_method
+        if detect_install_method() == "pip":
+            right_lines.append(
+                "[bold yellow]⚠ pip install not officially supported[/]"
+                "[dim yellow] — exists for reasons other than user install; "
+                "expect instability and an inability to support issues[/]"
+            )
+    except Exception:
+        pass  # Never break the banner over the install-method check
+
     right_content = "\n".join(right_lines)
     layout_table.add_row(left_content, right_content)
 
diff --git a/hermes_cli/build_info.py b/hermes_cli/build_info.py
new file mode 100644
index 00000000000..e4cc6f09974
--- /dev/null
+++ b/hermes_cli/build_info.py
@@ -0,0 +1,51 @@
+"""
+Baked-in build metadata for Hermes Agent.
+
+Source installs report their git revision live via ``git rev-parse`` (see
+``hermes_cli/dump.py`` and ``hermes_cli/banner.py``).  That doesn't work inside
+the published Docker image because ``.dockerignore`` excludes ``.git``, so
+those callsites fall back to ``"(unknown)"`` / drop the banner suffix entirely.
+
+To make ``hermes dump`` and the startup banner identify the exact commit the
+image was built from, the Docker build writes the build-time ``$HERMES_GIT_SHA``
+arg into ``<project_root>/.hermes_build_sha``.  This module is the single
+read-side helper consumed by both callsites — keeping the lookup in one place
+so the file path and missing-file behaviour stay consistent.
+
+Behaviour:
+
+- Returns ``None`` when the file is absent.  Source installs and dev images
+  built without the ``HERMES_GIT_SHA`` build-arg fall through to live-git
+  resolution in the caller, so non-Docker installs are unaffected.
+- Returns ``None`` on any IO / decoding error.  The build-sha is a nice-to-have
+  for support triage; nothing in the CLI is allowed to crash because of it.
+- Truncates to ``short`` characters (default 8) to match the format used by
+  ``git rev-parse --short=8`` throughout the codebase.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Optional
+
+# Path is resolved relative to this module so it works regardless of cwd —
+# matches the pattern used by ``banner._resolve_repo_dir``.
+_BUILD_SHA_FILE = Path(__file__).parent.parent / ".hermes_build_sha"
+
+
+def get_build_sha(short: int = 8) -> Optional[str]:
+    """Return the baked-in build SHA, truncated to ``short`` chars, or None.
+
+    Reads ``<project_root>/.hermes_build_sha`` if present.  The file is
+    written by the Dockerfile's ``HERMES_GIT_SHA`` build-arg and contains
+    the full 40-character commit hash on a single line.
+    """
+    try:
+        if not _BUILD_SHA_FILE.is_file():
+            return None
+        sha = _BUILD_SHA_FILE.read_text(encoding="utf-8").strip()
+    except Exception:
+        return None
+    if not sha:
+        return None
+    return sha[:short] if short and short > 0 else sha
diff --git a/hermes_cli/bundles.py b/hermes_cli/bundles.py
index 76f6c7a992e..80f0794c9de 100644
--- a/hermes_cli/bundles.py
+++ b/hermes_cli/bundles.py
@@ -15,7 +15,7 @@ Subcommands:
 from __future__ import annotations
 
 import sys
-from typing import List, Optional
+from typing import List
 
 from rich.console import Console
 from rich.table import Table
diff --git a/hermes_cli/checkpoints.py b/hermes_cli/checkpoints.py
index 2c0d3dd107b..2975553ae49 100644
--- a/hermes_cli/checkpoints.py
+++ b/hermes_cli/checkpoints.py
@@ -25,7 +25,7 @@ import argparse
 import time
 from datetime import datetime
 from pathlib import Path
-from typing import Any, Dict
+from typing import Any
 
 
 def _fmt_bytes(n: int) -> str:
diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py
index ccf9b993df4..a2db37be20c 100644
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@@ -85,8 +85,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
                args_hint="<platform>", cli_only=True),
     CommandDef("branch", "Branch the current session (explore a different path)", "Session",
                aliases=("fork",), args_hint="[name]"),
-    CommandDef("compress", "Manually compress conversation context", "Session",
-               args_hint="[focus topic]"),
+    CommandDef("compress", "Compress conversation context (add 'here [N]' to keep recent N turns)", "Session",
+               args_hint="[here [N] | focus topic]"),
     CommandDef("rollback", "List or restore filesystem checkpoints", "Session",
                args_hint="[number]"),
     CommandDef("snapshot", "Create or restore state snapshots of Hermes config/state", "Session",
@@ -123,7 +123,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
     CommandDef("config", "Show current configuration", "Configuration",
                cli_only=True),
     CommandDef("model", "Switch model for this session", "Configuration",
-               aliases=("provider",), args_hint="[model] [--provider name] [--global]"),
+               aliases=("provider",), args_hint="[model] [--provider name] [--global] [--refresh]"),
     CommandDef("codex-runtime", "Toggle codex app-server runtime for OpenAI/Codex models",
                "Configuration", aliases=("codex_runtime",),
                args_hint="[auto|codex_app_server]"),
@@ -218,8 +218,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
     CommandDef("debug", "Upload debug report (system info + logs) and get shareable links", "Info"),
 
     # Exit
-    CommandDef("quit", "Exit the CLI", "Exit",
-               cli_only=True, aliases=("exit",)),
+    CommandDef("quit", "Exit the CLI (use --delete to also remove session history)", "Exit",
+               cli_only=True, aliases=("exit",), args_hint="[--delete]"),
 ]
 
 
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index d9f24a2e415..55b76b58850 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -346,6 +346,58 @@ def recommended_update_command() -> str:
     return recommended_update_command_for_method(method)
 
 
+# Long-form text for ``hermes update`` / ``--check`` when running inside the
+# Docker image.  Surfaced by ``cmd_update`` and ``_cmd_update_check`` in
+# hermes_cli/main.py; lives here so the wording stays consistent and we
+# don't grow two slightly-different copies.
+#
+# Why this matters:
+#   - The published image excludes ``.git`` (see .dockerignore), so the
+#     git-based update path can never succeed inside the container.
+#   - The pre-existing fallback message ("✗ Not a git repository. Please
+#     reinstall: curl ... install.sh") is actively misleading inside Docker
+#     — that script installs a *new* host-side Hermes, it doesn't update
+#     the running container.
+#   - The right action is ``docker pull`` + restart the container; this
+#     helper spells that out, with notes on tag pinning and config
+#     persistence so users don't get blindsided.
+_DOCKER_UPDATE_MESSAGE = """\
+✗ ``hermes update`` doesn't apply inside the Docker container.
+
+Hermes Agent runs as a published image (nousresearch/hermes-agent), not a
+git checkout — the container has no working tree to pull into.  Update by
+pulling a fresh image and restarting your container instead:
+
+  docker pull nousresearch/hermes-agent:latest
+  # then restart whatever started the container, e.g.:
+  docker compose up -d --force-recreate hermes-agent
+  # or, for ad-hoc runs, exit the current container and `docker run` again
+
+Verify the new version after restart:
+  docker run --rm nousresearch/hermes-agent:latest --version
+
+Notes:
+  • If you pinned a specific tag (e.g. ``:v0.14.0``) the ``:latest`` tag
+    won't move your container — pull the newer tag you actually want, or
+    switch to ``:latest`` / ``:main`` for rolling updates.  See available
+    tags at https://hub.docker.com/r/nousresearch/hermes-agent/tags
+  • Your config and session history live under ``$HERMES_HOME`` (``/opt/data``
+    in the container, typically bind-mounted from the host) and persist
+    across image upgrades — re-pulling doesn't lose any state.
+  • Running a fork?  Build your own image with this repo's ``Dockerfile``
+    and replace the ``docker pull`` step with your build/push pipeline."""
+
+
+def format_docker_update_message() -> str:
+    """Return the user-facing message for ``hermes update`` inside Docker.
+
+    Centralised so ``cmd_update`` (the apply path) and ``_cmd_update_check``
+    (the dry-run path) share the same wording.  See ``_DOCKER_UPDATE_MESSAGE``
+    above for the full rationale.
+    """
+    return _DOCKER_UPDATE_MESSAGE
+
+
 def format_managed_message(action: str = "modify this Hermes installation") -> str:
     """Build a user-facing error for managed installs."""
     managed_system = get_managed_system() or "a package manager"
@@ -618,6 +670,27 @@ DEFAULT_CONFIG = {
         # (force on/off for all models), or a list of model-name substrings
         # to match (e.g. ["gpt", "codex", "gemini", "qwen"]).
         "tool_use_enforcement": "auto",
+        # Universal "finish the job" guidance — short prompt block applied to
+        # all models that targets two cross-family failure modes: (1) stopping
+        # after a stub instead of finishing the artifact, (2) fabricating
+        # plausible-looking output when a real path is blocked.  Costs ~80
+        # tokens in the cached system prompt.  Set False to disable globally.
+        "task_completion_guidance": True,
+        # Local-environment toolchain probe — surfaces Python/pip/uv/PEP-668
+        # state in the system prompt when something non-default is detected
+        # (e.g. python3 has no pip module, pip→python version mismatch, PEP
+        # 668 enforcement without uv).  Costs zero tokens when the env is
+        # clean (probe emits nothing).  Skipped for remote terminal backends
+        # (docker/modal/ssh — they have their own probe).  Set False to
+        # disable entirely.
+        "environment_probe": True,
+        # Embedder-supplied environment description appended to the system
+        # prompt's environment-hints block. Lets a host that wraps Hermes
+        # (sandbox runner, managed platform) explain the runtime environment
+        # — proxy, credential handling, mount layout — without editing the
+        # identity slot (SOUL.md). Empty by default. The HERMES_ENVIRONMENT_HINT
+        # env var overrides this (build-time/container mechanism).
+        "environment_hint": "",
         # Staged inactivity warning: send a warning to the user at this
         # threshold before escalating to a full timeout.  The warning fires
         # once per run and does not interrupt the agent.  0 = disable warning.
@@ -785,6 +858,11 @@ DEFAULT_CONFIG = {
             "session_key": "",
             # Rehydrate tab_id from Camofox before creating a new tab.
             "adopt_existing_tab": False,
+            # Docker Camofox opens page URLs from inside the container. Enable
+            # this to rewrite loopback page URLs (localhost/127.0.0.1/::1) to a
+            # host alias while leaving CAMOFOX_URL itself unchanged.
+            "rewrite_loopback_urls": False,
+            "loopback_host_alias": "host.docker.internal",
         },
     },
 
@@ -1681,6 +1759,15 @@ DEFAULT_CONFIG = {
         # assignee to any installed profile. When unset, falls back to the
         # default profile. A task never ends up with assignee=None.
         "default_assignee": "",
+        # Per-profile concurrency cap (#21582). When set to a positive int,
+        # no single profile can have more than N workers running at once,
+        # even if the global max_in_progress / max_spawn caps would allow
+        # it. Tasks blocked this way defer to the next dispatcher tick.
+        # Unset (None) means "no per-profile cap" — backward-compatible
+        # with existing installs. Useful for fan-out workflows that would
+        # otherwise saturate one profile's local model / API quota /
+        # browser pool while leaving other profiles idle.
+        "max_in_progress_per_profile": None,
         # When true, the kanban dispatcher auto-runs the decomposer on
         # tasks that land in Triage (every dispatcher tick). When false,
         # decomposition is manual via `hermes kanban decompose <id>` or
@@ -1712,6 +1799,38 @@ DEFAULT_CONFIG = {
         "mode": "project",
     },
 
+    # Tool Search (progressive disclosure for large tool surfaces).
+    # When the model is connected to many MCP servers or non-core plugin
+    # tools, their JSON schemas can consume a substantial fraction of the
+    # context window on every turn. When enabled, those tools are replaced
+    # in the model-facing tools array with three bridge tools —
+    # tool_search / tool_describe / tool_call — and surfaced on demand.
+    #
+    # Core Hermes tools (terminal, read_file, write_file, patch,
+    # search_files, todo, memory, browser_*, etc.) are NEVER deferred.
+    # See tools/tool_search.py for full design notes and the
+    # openclaw-tool-search-report PDF in this PR for the rationale.
+    "tools": {
+        "tool_search": {
+            # "auto" (default) — activate only when deferrable tool schemas
+            #   exceed ``threshold_pct`` of the active model's context length,
+            #   so small toolsets pay no overhead.
+            # "on"  — always activate when there is at least one deferrable
+            #   tool. Use when you have many MCP servers and want maximum
+            #   token reduction unconditionally.
+            # "off" — disable entirely. Tools-array assembly is a pass-through.
+            "enabled": "auto",
+            # Percentage of context length at which "auto" mode kicks in.
+            # 10 matches the Claude Code default. Range 0..100.
+            "threshold_pct": 10,
+            # When the model calls tool_search without a ``limit`` argument,
+            # how many hits to return. Range 1..max_search_limit.
+            "search_default_limit": 5,
+            # Hard upper bound the model can request via ``limit``. Range 1..50.
+            "max_search_limit": 20,
+        },
+    },
+
     # Logging — controls file logging to ~/.hermes/logs/.
     # agent.log captures INFO+ (all agent activity); errors.log captures WARNING+.
     "logging": {
@@ -1752,6 +1871,21 @@ DEFAULT_CONFIG = {
     # Gateway settings — control how messaging platforms (Telegram, Discord,
     # Slack, etc.) deliver agent-produced files as native attachments.
     "gateway": {
+        # When false (default), any file path the agent emits is delivered
+        # as a native attachment as long as it isn't under the credential /
+        # system-path denylist (/etc, /proc, ~/.ssh, ~/.aws, ~/.hermes/.env,
+        # auth.json, etc.). This matches the symmetry of inbound delivery
+        # — we accept any document type the user uploads, and the agent
+        # can hand back any file that isn't a credential.
+        #
+        # When true, fall back to the older allowlist+recency-window
+        # behavior: files must live under the Hermes cache, under
+        # ``media_delivery_allow_dirs``, or be freshly produced inside the
+        # ``trust_recent_files_seconds`` window. Recommended for
+        # public-facing gateways where prompt injection from one user
+        # shouldn't be able to exfiltrate the host's secrets to that same
+        # user. Bridged to HERMES_MEDIA_DELIVERY_STRICT.
+        "strict": False,
         # Extra directories from which model-emitted bare file paths may be
         # uploaded as native gateway attachments. Files inside the Hermes
         # cache (~/.hermes/cache/{documents,images,audio,video,screenshots})
@@ -1759,7 +1893,7 @@ DEFAULT_CONFIG = {
         # (project dirs, scratch dirs, mounted shares). Accepts a list of
         # absolute paths or a single os.pathsep-separated string. Bridged
         # to HERMES_MEDIA_ALLOW_DIRS at gateway startup. Tilde paths are
-        # expanded.
+        # expanded. Honored in both default and strict mode.
         "media_delivery_allow_dirs": [],
         # When true, files whose mtime is within ``trust_recent_files_seconds``
         # of "now" are trusted for native delivery even outside the cache /
@@ -1767,10 +1901,12 @@ DEFAULT_CONFIG = {
         # PDFs the agent writes into a working directory. System paths
         # (/etc, /proc, ~/.ssh, ~/.aws, etc.) remain blocked regardless.
         # Disable to fall back to pure-allowlist mode. Bridged to
-        # HERMES_MEDIA_TRUST_RECENT_FILES.
+        # HERMES_MEDIA_TRUST_RECENT_FILES. Only consulted when ``strict``
+        # is true; in default mode the denylist alone gates delivery.
         "trust_recent_files": True,
         # Recency window in seconds. 600 (10 min) comfortably covers a
         # multi-tool agent turn. Bridged to HERMES_MEDIA_TRUST_RECENT_SECONDS.
+        # Only consulted when ``strict`` is true.
         "trust_recent_files_seconds": 600,
     },
 
@@ -2451,10 +2587,10 @@ OPTIONAL_ENV_VARS = {
         "advanced": True,
     },
     "TAVILY_API_KEY": {
-        "description": "Tavily API key for AI-native web search, extract, and crawl",
+        "description": "Tavily API key for AI-native web search and extract",
         "prompt": "Tavily API key",
         "url": "https://app.tavily.com/home",
-        "tools": ["web_search", "web_extract", "web_crawl"],
+        "tools": ["web_search", "web_extract"],
         "password": True,
         "category": "tool",
     },
@@ -2939,8 +3075,8 @@ OPTIONAL_ENV_VARS = {
         "advanced": True,
     },
     "API_SERVER_KEY": {
-        "description": "Bearer token for API server authentication. Required for non-loopback binding; server refuses to start without it. On loopback (127.0.0.1), all requests are allowed if empty.",
-        "prompt": "API server auth key (required for network access)",
+        "description": "Bearer token for API server authentication. Required whenever the API server is enabled; server refuses to start without it.",
+        "prompt": "API server auth key",
         "url": None,
         "password": True,
         "category": "messaging",
@@ -2955,7 +3091,7 @@ OPTIONAL_ENV_VARS = {
         "advanced": True,
     },
     "API_SERVER_HOST": {
-        "description": "Host/bind address for the API server (default: 127.0.0.1). Use 0.0.0.0 for network access — server refuses to start without API_SERVER_KEY.",
+        "description": "Host/bind address for the API server (default: 127.0.0.1). API_SERVER_KEY is still required even on loopback binds.",
         "prompt": "API server host",
         "url": None,
         "password": False,
@@ -5481,6 +5617,8 @@ def set_config_value(key: str, value: str):
         "terminal.daytona_image": "TERMINAL_DAYTONA_IMAGE",
         "terminal.docker_mount_cwd_to_workspace": "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE",
         "terminal.docker_run_as_host_user": "TERMINAL_DOCKER_RUN_AS_HOST_USER",
+        "terminal.docker_persist_across_processes": "TERMINAL_DOCKER_PERSIST_ACROSS_PROCESSES",
+        "terminal.docker_orphan_reaper": "TERMINAL_DOCKER_ORPHAN_REAPER",
         "terminal.docker_env": "TERMINAL_DOCKER_ENV",
         # terminal.cwd intentionally excluded — CLI resolves at runtime,
         # gateway bridges it in gateway/run.py. Persisting to .env causes
diff --git a/hermes_cli/dashboard_auth/middleware.py b/hermes_cli/dashboard_auth/middleware.py
index 5b42c90ebf7..3400a0cd979 100644
--- a/hermes_cli/dashboard_auth/middleware.py
+++ b/hermes_cli/dashboard_auth/middleware.py
@@ -26,10 +26,15 @@ from hermes_cli.dashboard_auth import list_providers
 from hermes_cli.dashboard_auth.audit import AuditEvent, audit_log
 from hermes_cli.dashboard_auth.base import ProviderError
 from hermes_cli.dashboard_auth.cookies import read_session_cookies
+from hermes_cli.dashboard_auth.public_paths import PUBLIC_API_PATHS
 
 _log = logging.getLogger(__name__)
 
-# Paths that bypass the auth gate. Order matters: prefix match.
+# Prefixes that bypass the auth gate. Match via ``path == prefix`` or
+# ``path.startswith(prefix)`` — so ``/assets/`` (with trailing slash)
+# matches ``/assets/foo.css`` but not ``/assetsleak``. Auth-bootstrap
+# (login page, OAuth round trip, provider listing) and static asset
+# mounts go here.
 _GATE_PUBLIC_PREFIXES: tuple[str, ...] = (
     "/auth/login",
     "/auth/callback",
@@ -45,6 +50,20 @@ _GATE_PUBLIC_PREFIXES: tuple[str, ...] = (
 
 
 def _path_is_public(path: str) -> bool:
+    """True if ``path`` bypasses the OAuth auth gate.
+
+    Two sources of public-ness:
+
+    * :data:`PUBLIC_API_PATHS` — the shared ``/api/*`` allowlist that
+      the legacy ``_SESSION_TOKEN`` middleware also honours. Matched
+      exactly (no prefix expansion) so adding ``/api/status`` doesn't
+      accidentally expose ``/api/status/secret-extension``.
+    * :data:`_GATE_PUBLIC_PREFIXES` — auth-bootstrap routes and static
+      mounts. Prefix-matched so ``/assets/foo.css`` lights up via
+      ``/assets/``.
+    """
+    if path in PUBLIC_API_PATHS:
+        return True
     return any(
         path == prefix or path.startswith(prefix)
         for prefix in _GATE_PUBLIC_PREFIXES
diff --git a/hermes_cli/dashboard_auth/public_paths.py b/hermes_cli/dashboard_auth/public_paths.py
new file mode 100644
index 00000000000..2699e15c979
--- /dev/null
+++ b/hermes_cli/dashboard_auth/public_paths.py
@@ -0,0 +1,49 @@
+"""Shared allowlist of ``/api/*`` paths that bypass dashboard auth.
+
+Two middlewares enforce dashboard auth and previously kept independent
+copies of this list:
+
+* ``hermes_cli.web_server.auth_middleware`` — loopback / ``--insecure``
+  mode, gates on the ephemeral ``_SESSION_TOKEN``.
+* ``hermes_cli.dashboard_auth.middleware.gated_auth_middleware`` —
+  non-loopback mode, gates on the OAuth session cookie.
+
+When the lists drifted, ``/api/status`` ended up public under the legacy
+gate but 401'd under the OAuth gate. That broke the portal's wildcard
+liveness probe (``nous-account-service`` ``fly-provider.ts``
+``getInstanceRuntimeStatus``), which fetches ``/api/status`` without a
+cookie as its sole signal of "agent dashboard is alive": every healthy
+wildcard-subdomain agent surfaced as STARTING/down in the portal UI even
+though the dashboard was serving correctly.
+
+Centralising the allowlist here so both middlewares import the same
+frozenset prevents the next drift. Keep this list minimal — only truly
+non-sensitive, read-only endpoints belong here. As a sanity check, every
+entry should be safe to expose to:
+
+  * external uptime probes (Pingdom, Better Stack, NAS),
+  * the dashboard SPA before the user has logged in,
+  * anyone who happens to ``curl`` the hostname.
+
+If a new endpoint doesn't pass all three tests, it should be gated and
+the SPA should bootstrap it after login instead.
+"""
+from __future__ import annotations
+
+PUBLIC_API_PATHS: frozenset[str] = frozenset({
+    # Liveness probe target. Returns version, gateway state, active
+    # session count, and the dashboard auth-gate shape. No bodies, no
+    # session content, no secrets. Documented as the portal's wildcard
+    # liveness probe in
+    # ``docs/agent-dashboard-public-url-contract.md`` (NAS side).
+    "/api/status",
+    # Read-only config-defaults / schema feeds for the SPA's Config page.
+    "/api/config/defaults",
+    "/api/config/schema",
+    # Read-only model metadata (context windows, etc.) — same shape as
+    # provider catalogs already exposed on the public internet.
+    "/api/model/info",
+    # Read-only theme + plugin manifests for the dashboard skin engine.
+    "/api/dashboard/themes",
+    "/api/dashboard/plugins",
+})
diff --git a/hermes_cli/debug.py b/hermes_cli/debug.py
index b309ee37c54..e3f2306f665 100644
--- a/hermes_cli/debug.py
+++ b/hermes_cli/debug.py
@@ -17,8 +17,6 @@ import logging
 import re
 import sys
 import time
-import urllib.error
-import urllib.parse
 import urllib.request
 from dataclasses import dataclass
 from pathlib import Path
@@ -260,15 +258,6 @@ def _schedule_auto_delete(urls: list[str], delay_seconds: int = _AUTO_DELETE_SEC
     _record_pending(urls, delay_seconds=delay_seconds)
 
 
-def _delete_hint(url: str) -> str:
-    """Return a one-liner delete command for the given paste URL."""
-    paste_id = _extract_paste_id(url)
-    if paste_id:
-        return f"hermes debug delete {url}"
-    # dpaste.com — no API delete, expires on its own.
-    return "(auto-expires per dpaste.com policy)"
-
-
 def _upload_paste_rs(content: str) -> str:
     """Upload to paste.rs.  Returns the paste URL.
 
diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py
index b99eea4d567..3db70beaa72 100644
--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@@ -8,7 +8,6 @@ import os
 import sys
 import subprocess
 import shutil
-import importlib.util
 from pathlib import Path
 
 from hermes_cli.config import get_project_root, get_hermes_home, get_env_path
diff --git a/hermes_cli/dump.py b/hermes_cli/dump.py
index ded5bb10fa1..98de32bcdea 100644
--- a/hermes_cli/dump.py
+++ b/hermes_cli/dump.py
@@ -20,7 +20,15 @@ from agent.skill_utils import is_excluded_skill_path
 
 
 def _get_git_commit(project_root: Path) -> str:
-    """Return short git commit hash, or '(unknown)'."""
+    """Return short git commit hash, or '(unknown)'.
+
+    Source installs and dev images resolve this live via ``git rev-parse``.
+    The published Docker image excludes ``.git`` from the build context, so
+    that lookup always fails — we fall back to the baked-in build SHA written
+    to ``<project_root>/.hermes_build_sha`` by the Dockerfile's
+    ``HERMES_GIT_SHA`` build-arg (see ``hermes_cli/build_info.py``).
+    The output format is identical regardless of source.
+    """
     try:
         result = subprocess.run(
             ["git", "rev-parse", "--short=8", "HEAD"],
@@ -28,9 +36,23 @@ def _get_git_commit(project_root: Path) -> str:
             cwd=str(project_root),
         )
         if result.returncode == 0:
-            return result.stdout.strip()
+            value = result.stdout.strip()
+            if value:
+                return value
     except Exception:
         pass
+
+    # Fall back to the build-time baked SHA (populated in published Docker
+    # images, absent otherwise).  Defers the import so the dump module
+    # stays cheap on non-dump code paths.
+    try:
+        from hermes_cli.build_info import get_build_sha
+        baked = get_build_sha(short=8)
+        if baked:
+            return baked
+    except Exception:
+        pass
+
     return "(unknown)"
 
 
diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py
index bceb1c3f191..98b22ec7242 100644
--- a/hermes_cli/gateway.py
+++ b/hermes_cli/gateway.py
@@ -216,9 +216,7 @@ def _request_gateway_self_restart(pid: int) -> bool:
     if not _is_pid_ancestor_of_current_process(pid):
         return False
     try:
-        os.kill(
-            pid, signal.SIGUSR1
-        )  # windows-footgun: ok — POSIX signal, guarded by hasattr(signal, 'SIGUSR1') above
+        os.kill(pid, signal.SIGUSR1)  # windows-footgun: ok — POSIX signal, guarded by hasattr(signal, 'SIGUSR1') above
     except (ProcessLookupError, PermissionError, OSError):
         return False
     return True
@@ -254,9 +252,7 @@ def _graceful_restart_via_sigusr1(pid: int, drain_timeout: float) -> bool:
     if pid <= 0:
         return False
     try:
-        os.kill(
-            pid, signal.SIGUSR1
-        )  # windows-footgun: ok — POSIX signal, guarded by hasattr(signal, 'SIGUSR1') above
+        os.kill(pid, signal.SIGUSR1)  # windows-footgun: ok — POSIX signal, guarded by hasattr(signal, 'SIGUSR1') above
     except ProcessLookupError:
         # Already gone — nothing to drain.
         return True
@@ -1477,17 +1473,13 @@ class SystemScopeRequiresRootError(RuntimeError):
 
 def _user_dbus_socket_path() -> Path:
     """Return the expected per-user D-Bus socket path (regardless of existence)."""
-    xdg = (
-        os.environ.get("XDG_RUNTIME_DIR") or f"/run/user/{os.getuid()}"
-    )  # windows-footgun: ok — POSIX systemd helper, never invoked on Windows
+    xdg = os.environ.get("XDG_RUNTIME_DIR") or f"/run/user/{os.getuid()}"  # windows-footgun: ok — POSIX systemd helper, never invoked on Windows
     return Path(xdg) / "bus"
 
 
 def _user_systemd_private_socket_path() -> Path:
     """Return the per-user systemd private socket path (regardless of existence)."""
-    xdg = (
-        os.environ.get("XDG_RUNTIME_DIR") or f"/run/user/{os.getuid()}"
-    )  # windows-footgun: ok — POSIX systemd helper, never invoked on Windows
+    xdg = os.environ.get("XDG_RUNTIME_DIR") or f"/run/user/{os.getuid()}"  # windows-footgun: ok — POSIX systemd helper, never invoked on Windows
     return Path(xdg) / "systemd" / "private"
 
 
@@ -1513,9 +1505,7 @@ def _ensure_user_systemd_env() -> None:
     We detect the standard socket path and set the vars so all subsequent
     subprocess calls inherit them.
     """
-    uid = (
-        os.getuid()
-    )  # windows-footgun: ok — POSIX systemd helper, never invoked on Windows
+    uid = os.getuid()  # windows-footgun: ok — POSIX systemd helper, never invoked on Windows
     if "XDG_RUNTIME_DIR" not in os.environ:
         runtime_dir = f"/run/user/{uid}"
         if Path(runtime_dir).exists():
@@ -1855,9 +1845,7 @@ def remove_legacy_hermes_units(
 
     # System-scope removal (needs root)
     if system_units:
-        if (
-            os.geteuid() != 0
-        ):  # windows-footgun: ok — Linux systemd removal path, guarded by `if system == "Linux"` / systemd-only branch
+        if os.geteuid() != 0:  # windows-footgun: ok — Linux systemd removal path, guarded by `if system == "Linux"` / systemd-only branch
             print()
             print_warning("System-scope legacy units require root to remove.")
             print_info("  Re-run with: sudo hermes gateway migrate-legacy")
@@ -1912,9 +1900,7 @@ def print_systemd_scope_conflict_warning() -> None:
 
 
 def _require_root_for_system_service(action: str) -> None:
-    if (
-        os.geteuid() != 0
-    ):  # windows-footgun: ok — POSIX systemd helper, never invoked on Windows
+    if os.geteuid() != 0:  # windows-footgun: ok — POSIX systemd helper, never invoked on Windows
         raise SystemScopeRequiresRootError(
             f"System gateway {action} requires root. Re-run with sudo.",
             action,
@@ -1994,9 +1980,7 @@ def install_linux_gateway_from_setup(force: bool = False, enable_on_startup: boo
 
     if scope == "system":
         run_as_user = _default_system_service_user()
-        if (
-            os.geteuid() != 0
-        ):  # windows-footgun: ok — Linux systemd install wizard, never invoked on Windows
+        if os.geteuid() != 0:  # windows-footgun: ok — Linux systemd install wizard, never invoked on Windows
             print_warning(
                 "  System service install requires sudo, so Hermes can't create it from this user session."
             )
@@ -2049,9 +2033,7 @@ def get_systemd_linger_status() -> tuple[bool | None, str]:
         try:
             import pwd
 
-            username = pwd.getpwuid(
-                os.getuid()
-            ).pw_name  # windows-footgun: ok — POSIX loginctl helper, never invoked on Windows
+            username = pwd.getpwuid(os.getuid()).pw_name  # windows-footgun: ok — POSIX loginctl helper, never invoked on Windows
         except Exception:
             return None, "could not determine current user"
 
@@ -2102,9 +2084,7 @@ def _launchd_user_home() -> Path:
     """
     import pwd
 
-    return Path(
-        pwd.getpwuid(os.getuid()).pw_dir
-    )  # windows-footgun: ok — POSIX launchd (macOS) helper, never invoked on Windows
+    return Path(pwd.getpwuid(os.getuid()).pw_dir)  # windows-footgun: ok — POSIX launchd (macOS) helper, never invoked on Windows
 
 
 def get_launchd_plist_path() -> Path:
@@ -2305,9 +2285,37 @@ def _build_service_path_dirs(project_root: Path | None = None) -> list[str]:
     return candidates
 
 
+def _stable_service_working_dir() -> str:
+    """Return a WorkingDirectory that will not disappear out from under systemd.
+
+    The gateway does NOT need its cwd to be the source checkout — ``ExecStart``
+    uses an absolute python interpreter and ``-m hermes_cli.main``, so module
+    resolution does not depend on cwd. Pinning ``WorkingDirectory`` to
+    ``PROJECT_ROOT`` (``Path(__file__).parent.parent``) is actively harmful:
+    when the unit is generated from a transient checkout — a ``.worktrees/``
+    dir, or a clone that ``hermes update`` later relocates/removes — the path
+    rots. systemd then fails the start at the CHDIR step (``status=200/CHDIR``,
+    "Changing to the requested working directory failed") *before* Python
+    loads, so the on-boot ``refresh_systemd_unit_if_needed()`` self-heal never
+    runs and ``Restart=always`` crash-loops forever on a dead directory.
+
+    ``HERMES_HOME`` is the stable anchor: it is where config/state/logs live,
+    it never moves, and it is guaranteed to exist whenever the gateway is
+    meaningfully installed. Fall back to ``PROJECT_ROOT`` only if HERMES_HOME
+    cannot be resolved (it always can in practice).
+    """
+    try:
+        home = get_hermes_home()
+        if home and Path(home).is_dir():
+            return str(Path(home).resolve())
+    except Exception:
+        pass
+    return str(PROJECT_ROOT)
+
+
 def generate_systemd_unit(system: bool = False, run_as_user: str | None = None) -> str:
     python_path = get_python_path()
-    working_dir = str(PROJECT_ROOT)
+    working_dir = _stable_service_working_dir()
     detected_venv = _detect_venv_dir()
     venv_dir = str(detected_venv) if detected_venv else str(PROJECT_ROOT / "venv")
 
@@ -2343,7 +2351,10 @@ def generate_systemd_unit(system: bool = False, run_as_user: str | None = None)
         # (e.g. /root/) to the target user's home so the service can
         # actually access them.
         python_path = _remap_path_for_user(python_path, home_dir)
-        working_dir = _remap_path_for_user(working_dir, home_dir)
+        # Anchor cwd to the target user's HERMES_HOME (stable, always exists)
+        # rather than a remapped source-checkout path that can rot. See
+        # _stable_service_working_dir() for the full rationale.
+        working_dir = str(hermes_home) if hermes_home else _remap_path_for_user(working_dir, home_dir)
         venv_dir = _remap_path_for_user(venv_dir, home_dir)
         path_entries = [_remap_path_for_user(p, home_dir) for p in path_entries]
         path_entries.extend(_build_user_local_paths(Path(home_dir), path_entries))
@@ -2570,9 +2581,7 @@ def _system_scope_wizard_would_need_root(system: bool = False) -> bool:
     ``SystemScopeRequiresRootError`` propagate out and leave the user
     staring at a bare shell.
     """
-    if (
-        os.geteuid() == 0
-    ):  # windows-footgun: ok — systemd scope wizard decision, never invoked on Windows
+    if os.geteuid() == 0:  # windows-footgun: ok — systemd scope wizard decision, never invoked on Windows
         return False
     return _select_systemd_scope(system=system)
 
@@ -3000,7 +3009,10 @@ def _launchd_domain() -> str:
 
 def generate_launchd_plist() -> str:
     python_path = get_python_path()
-    working_dir = str(PROJECT_ROOT)
+    # Stable cwd anchor — never the volatile source checkout. See
+    # _stable_service_working_dir() for the rationale (same rot risk applies
+    # to launchd's WorkingDirectory as to systemd's).
+    working_dir = _stable_service_working_dir()
     hermes_home = str(get_hermes_home().resolve())
     log_dir = get_hermes_home() / "logs"
     log_dir.mkdir(parents=True, exist_ok=True)
@@ -4470,18 +4482,6 @@ def _setup_whatsapp():
     cmd_whatsapp(argparse.Namespace())
 
 
-def _setup_email():
-    """Configure Email via the standard platform setup."""
-    email_platform = next(p for p in _PLATFORMS if p["key"] == "email")
-    _setup_standard_platform(email_platform)
-
-
-def _setup_sms():
-    """Configure SMS (Twilio) via the standard platform setup."""
-    sms_platform = next(p for p in _PLATFORMS if p["key"] == "sms")
-    _setup_standard_platform(sms_platform)
-
-
 def _setup_dingtalk():
     """Configure DingTalk — QR scan (recommended) or manual credential entry."""
     from hermes_cli.setup import (
@@ -4667,12 +4667,6 @@ def _setup_wecom():
     print_success("💬 WeCom configured!")
 
 
-def _setup_yuanbao():
-    """Configure Yuanbao via the standard platform setup."""
-    yuanbao_platform = next(p for p in _PLATFORMS if p["key"] == "yuanbao")
-    _setup_standard_platform(yuanbao_platform)
-
-
 def _is_service_installed() -> bool:
     """Check if the gateway is installed as a system service."""
     if supports_systemd_services():
diff --git a/hermes_cli/gateway_windows.py b/hermes_cli/gateway_windows.py
index e019bb3e638..a7f4b983dcb 100644
--- a/hermes_cli/gateway_windows.py
+++ b/hermes_cli/gateway_windows.py
@@ -1014,12 +1014,70 @@ def start() -> None:
     _report_gateway_start(f"direct spawn (PID {pid})")
 
 
-def stop() -> None:
-    """Stop the gateway. Tries /End on the scheduled task, then kills any stragglers."""
-    _assert_windows()
-    from hermes_cli.gateway import kill_gateway_processes
+def _drain_gateway_pid(pid: int, drain_timeout: float) -> bool:
+    """Write the planned-stop marker and wait for the gateway PID to exit.
 
-    stopped_any = False
+    Windows cannot deliver POSIX signals to a Python asyncio loop
+    (``loop.add_signal_handler`` raises NotImplementedError), so writing
+    the marker is the ONLY way to ask a running gateway to drain
+    in-flight agents and persist ``resume_pending`` before exit. The
+    gateway's planned-stop watcher thread (gateway/run.py) polls for
+    the marker and drives the same shutdown path the SIGTERM handler
+    would have on POSIX.
+
+    Returns True if the PID exited within the timeout, False if it
+    didn't (caller should escalate to schtasks /End + taskkill).
+    """
+    if pid <= 0:
+        return False
+    try:
+        from gateway.status import write_planned_stop_marker, _pid_exists
+    except ImportError:
+        return False
+
+    try:
+        write_planned_stop_marker(pid)
+    except Exception:
+        # Best-effort: if the marker can't be written, we have no choice
+        # but to fall through to a hard kill.  Caller decides escalation.
+        pass
+
+    deadline = time.monotonic() + max(drain_timeout, 1.0)
+    while time.monotonic() < deadline:
+        if not _pid_exists(pid):
+            return True
+        time.sleep(0.5)
+    return False
+
+
+def stop() -> None:
+    """Stop the gateway.
+
+    Writes the planned-stop marker first so the gateway can drain
+    in-flight agents and persist ``resume_pending`` before exit (the
+    gateway's marker-watcher thread picks this up — Windows asyncio
+    can't deliver SIGTERM to the loop, so the marker is our only IPC).
+    Then escalates: ``schtasks /End`` (kills the scheduled-task tree)
+    + ``kill_gateway_processes(force=True)`` for any strays.
+    """
+    _assert_windows()
+    from hermes_cli.gateway import kill_gateway_processes, _get_restart_drain_timeout
+    from gateway.status import get_running_pid
+
+    # Phase 1: ask the running gateway (if any) to drain itself by writing
+    # the planned-stop marker, then wait briefly for it to exit cleanly.
+    # On clean exit, sessions land with resume_pending=True and the next
+    # boot will auto-resume them.
+    pid = get_running_pid()
+    drained = False
+    if pid is not None:
+        try:
+            drain_timeout = float(_get_restart_drain_timeout() or 30.0)
+        except Exception:
+            drain_timeout = 30.0
+        drained = _drain_gateway_pid(pid, drain_timeout)
+
+    stopped_any = drained
     if is_task_registered():
         code, _out, err = _exec_schtasks(["/End", "/TN", get_task_name()])
         # schtasks returns nonzero when the task isn't currently running — don't treat that as an error.
@@ -1028,12 +1086,19 @@ def stop() -> None:
         elif "not running" not in (err or "").lower():
             print(f"⚠ schtasks /End returned code {code}: {err.strip()}")
 
-    killed = kill_gateway_processes(all_profiles=False)
+    # Phase 3: hard-kill any strays.  When drain succeeded this is a no-op;
+    # when drain timed out this is the escalation that ensures the PID
+    # actually exits.  Use force=True on Windows so taskkill /T /F walks
+    # the descendant tree (browser helpers, etc.).
+    killed = kill_gateway_processes(all_profiles=False, force=not drained)
     if killed:
         stopped_any = True
         print(f"✓ Killed {killed} gateway process(es)")
     if stopped_any:
-        print("✓ Gateway stopped")
+        if drained:
+            print("✓ Gateway stopped (drained cleanly)")
+        else:
+            print("✓ Gateway stopped")
     else:
         print("✗ No gateway was running")
 
diff --git a/hermes_cli/kanban.py b/hermes_cli/kanban.py
index 1e7169c26cf..a6e76fe35a4 100644
--- a/hermes_cli/kanban.py
+++ b/hermes_cli/kanban.py
@@ -548,6 +548,11 @@ def build_parser(parent_subparsers: argparse._SubParsersAction) -> argparse.Argu
                             help="Additional task ids to schedule with the same reason (bulk mode)")
 
     p_unblock = sub.add_parser("unblock", help="Return one or more blocked/scheduled tasks to ready")
+    p_unblock.add_argument(
+        "--reason",
+        default=None,
+        help="Optional reason/note — recorded as a comment before unblocking. Quote multi-word reasons.",
+    )
     p_unblock.add_argument("task_ids", nargs="+")
 
     p_promote = sub.add_parser(
@@ -1021,7 +1026,7 @@ def _board_task_counts(slug: str) -> dict[str, int]:
         path = kb.kanban_db_path(board=slug)
         if not path.exists():
             return {}
-        with kb.connect(board=slug) as conn:
+        with kb.connect_closing(board=slug) as conn:
             rows = conn.execute(
                 "SELECT status, COUNT(*) AS n FROM tasks GROUP BY status"
             ).fetchall()
@@ -1264,7 +1269,7 @@ def _cmd_init(args: argparse.Namespace) -> int:
 
 
 def _cmd_heartbeat(args: argparse.Namespace) -> int:
-    with kb.connect() as conn:
+    with kb.connect_closing() as conn:
         ok = kb.heartbeat_worker(
             conn,
             args.task_id,
@@ -1279,7 +1284,7 @@ def _cmd_heartbeat(args: argparse.Namespace) -> int:
 
 
 def _cmd_assignees(args: argparse.Namespace) -> int:
-    with kb.connect() as conn:
+    with kb.connect_closing() as conn:
         data = kb.known_assignees(conn)
     if getattr(args, "json", False):
         print(json.dumps(data, indent=2, ensure_ascii=False))
@@ -1320,7 +1325,7 @@ def _cmd_create(args: argparse.Namespace) -> int:
             file=sys.stderr,
         )
         return 2
-    with kb.connect() as conn:
+    with kb.connect_closing() as conn:
         task_id = kb.create_task(
             conn,
             title=args.title,
@@ -1369,7 +1374,7 @@ def _cmd_swarm(args: argparse.Namespace) -> int:
     if not workers:
         print("kanban swarm: at least one --worker is required", file=sys.stderr)
         return 2
-    with kb.connect() as conn:
+    with kb.connect_closing() as conn:
         created = ks.create_swarm(
             conn,
             goal=args.goal,
@@ -1395,7 +1400,7 @@ def _cmd_list(args: argparse.Namespace) -> int:
     assignee = args.assignee
     if args.mine and not assignee:
         assignee = _profile_author()
-    with kb.connect() as conn:
+    with kb.connect_closing() as conn:
         # Cheap "mini-dispatch": recompute ready so list output reflects
         # dependencies that may have cleared since the last dispatcher tick.
         kb.recompute_ready(conn)
@@ -1444,7 +1449,7 @@ def _cmd_show(args: argparse.Namespace) -> int:
             file=sys.stderr,
         )
         return 2
-    with kb.connect() as conn:
+    with kb.connect_closing() as conn:
         task = kb.get_task(conn, args.task_id)
         if not task:
             print(f"no such task: {args.task_id}", file=sys.stderr)
@@ -1610,7 +1615,7 @@ def _cmd_show(args: argparse.Namespace) -> int:
 
 def _cmd_assign(args: argparse.Namespace) -> int:
     profile = None if args.profile.lower() in {"none", "-", "null"} else args.profile
-    with kb.connect() as conn:
+    with kb.connect_closing() as conn:
         ok = kb.assign_task(conn, args.task_id, profile)
     if not ok:
         print(f"no such task: {args.task_id}", file=sys.stderr)
@@ -1620,7 +1625,7 @@ def _cmd_assign(args: argparse.Namespace) -> int:
 
 
 def _cmd_reclaim(args: argparse.Namespace) -> int:
-    with kb.connect() as conn:
+    with kb.connect_closing() as conn:
         ok = kb.reclaim_task(
             conn, args.task_id,
             reason=getattr(args, "reason", None),
@@ -1637,7 +1642,7 @@ def _cmd_reclaim(args: argparse.Namespace) -> int:
 
 def _cmd_reassign(args: argparse.Namespace) -> int:
     profile = None if args.profile.lower() in {"none", "-", "null"} else args.profile
-    with kb.connect() as conn:
+    with kb.connect_closing() as conn:
         ok = kb.reassign_task(
             conn, args.task_id, profile,
             reclaim_first=bool(getattr(args, "reclaim", False)),
@@ -1667,7 +1672,7 @@ def _cmd_diagnostics(args: argparse.Namespace) -> int:
 
     diag_config = kd.config_from_runtime_config(load_config())
 
-    with kb.connect() as conn:
+    with kb.connect_closing() as conn:
         # Either one-task mode or fleet mode.
         if getattr(args, "task", None):
             task = kb.get_task(conn, args.task)
@@ -1790,14 +1795,14 @@ def _cmd_diagnostics(args: argparse.Namespace) -> int:
 
 
 def _cmd_link(args: argparse.Namespace) -> int:
-    with kb.connect() as conn:
+    with kb.connect_closing() as conn:
         kb.link_tasks(conn, args.parent_id, args.child_id)
     print(f"Linked {args.parent_id} -> {args.child_id}")
     return 0
 
 
 def _cmd_unlink(args: argparse.Namespace) -> int:
-    with kb.connect() as conn:
+    with kb.connect_closing() as conn:
         ok = kb.unlink_tasks(conn, args.parent_id, args.child_id)
     if not ok:
         print(f"No such link: {args.parent_id} -> {args.child_id}", file=sys.stderr)
@@ -1807,7 +1812,7 @@ def _cmd_unlink(args: argparse.Namespace) -> int:
 
 
 def _cmd_claim(args: argparse.Namespace) -> int:
-    with kb.connect() as conn:
+    with kb.connect_closing() as conn:
         task = kb.claim_task(conn, args.task_id, ttl_seconds=args.ttl)
         if task is None:
             # Report why
@@ -1838,7 +1843,7 @@ def _cmd_comment(args: argparse.Namespace) -> int:
             suffix = f"\n\n[trimmed to {args.max_len} chars by --max-len]"
             body = body[: max(0, args.max_len - len(suffix))].rstrip() + suffix
     author = args.author or _profile_author()
-    with kb.connect() as conn:
+    with kb.connect_closing() as conn:
         kb.add_comment(conn, args.task_id, author, body)
     print(f"Comment added to {args.task_id}")
     return 0
@@ -1885,7 +1890,7 @@ def _cmd_complete(args: argparse.Namespace) -> int:
             print(f"kanban: --metadata: {exc}", file=sys.stderr)
             return 2
     failed: list[str] = []
-    with kb.connect() as conn:
+    with kb.connect_closing() as conn:
         for tid in ids:
             if not kb.complete_task(
                 conn, tid,
@@ -1912,7 +1917,7 @@ def _cmd_edit(args: argparse.Namespace) -> int:
         except (ValueError, json.JSONDecodeError) as exc:
             print(f"kanban: --metadata: {exc}", file=sys.stderr)
             return 2
-    with kb.connect() as conn:
+    with kb.connect_closing() as conn:
         if not kb.edit_completed_task_result(
             conn,
             args.task_id,
@@ -1934,7 +1939,7 @@ def _cmd_block(args: argparse.Namespace) -> int:
     author = _profile_author()
     ids = [args.task_id] + list(getattr(args, "ids", None) or [])
     failed: list[str] = []
-    with kb.connect() as conn:
+    with kb.connect_closing() as conn:
         for tid in ids:
             if reason:
                 kb.add_comment(conn, tid, author, f"BLOCKED: {reason}")
@@ -1956,7 +1961,7 @@ def _cmd_schedule(args: argparse.Namespace) -> int:
     author = _profile_author()
     ids = [args.task_id] + list(getattr(args, "ids", None) or [])
     failed: list[str] = []
-    with kb.connect() as conn:
+    with kb.connect_closing() as conn:
         for tid in ids:
             if reason:
                 kb.add_comment(conn, tid, author, f"SCHEDULED: {reason}")
@@ -1978,14 +1983,20 @@ def _cmd_unblock(args: argparse.Namespace) -> int:
     if not ids:
         print("at least one task_id is required", file=sys.stderr)
         return 1
+    reason = getattr(args, "reason", None)
+    if reason is not None:
+        reason = reason.strip() or None
+    author = _profile_author() if reason else None
     failed: list[str] = []
-    with kb.connect() as conn:
+    with kb.connect_closing() as conn:
         for tid in ids:
+            if reason:
+                kb.add_comment(conn, tid, author, f"UNBLOCK: {reason}")
             if not kb.unblock_task(conn, tid):
                 failed.append(tid)
                 print(f"cannot unblock {tid} (not blocked/scheduled?)", file=sys.stderr)
             else:
-                print(f"Unblocked {tid}")
+                print(f"Unblocked {tid}" + (f": {reason}" if reason else ""))
     return 0 if not failed else 1
 
 
@@ -2003,7 +2014,7 @@ def _cmd_promote(args: argparse.Namespace) -> int:
             seen.add(tid)
 
     results: list[dict[str, object]] = []
-    with kb.connect() as conn:
+    with kb.connect_closing() as conn:
         for tid in ids:
             ok, err = kb.promote_task(
                 conn,
@@ -2050,7 +2061,7 @@ def _cmd_archive(args: argparse.Namespace) -> int:
         print("at least one task_id is required", file=sys.stderr)
         return 1
     failed: list[str] = []
-    with kb.connect() as conn:
+    with kb.connect_closing() as conn:
         if purge_ids:
             for tid in purge_ids:
                 if not kb.delete_archived_task(conn, tid):
@@ -2073,7 +2084,7 @@ def _cmd_tail(args: argparse.Namespace) -> int:
     print(f"Tailing events for {args.task_id}. Ctrl-C to stop.")
     try:
         while True:
-            with kb.connect() as conn:
+            with kb.connect_closing() as conn:
                 events = kb.list_events(conn, args.task_id)
             for e in events:
                 if e.id > last_id:
@@ -2087,12 +2098,52 @@ def _cmd_tail(args: argparse.Namespace) -> int:
 
 
 def _cmd_dispatch(args: argparse.Namespace) -> int:
-    with kb.connect() as conn:
+    # Honour kanban.default_assignee as the fallback for unassigned ready
+    # tasks (#27145), kanban.max_in_progress as the global concurrency cap
+    # (#33488), kanban.max_in_progress_per_profile as the per-profile
+    # cap (#21582), and kanban.max_spawn as the per-tick spawn limit
+    # (#28805). Same semantics as the gateway dispatch path so behavior
+    # matches whether the user runs the CLI directly or relies on the
+    # gateway-embedded dispatcher.
+    try:
+        from hermes_cli.config import load_config
+        _cfg = load_config()
+        _kanban_cfg = _cfg.get("kanban", {}) if isinstance(_cfg, dict) else {}
+        default_assignee = (_kanban_cfg.get("default_assignee") or "").strip() or None
+
+        def _coerce_positive_int(value):
+            if value is None:
+                return None
+            try:
+                ival = int(value)
+            except (TypeError, ValueError):
+                return None
+            return ival if ival >= 1 else None
+
+        max_in_progress_per_profile = _coerce_positive_int(
+            _kanban_cfg.get("max_in_progress_per_profile")
+        )
+        max_in_progress = _coerce_positive_int(_kanban_cfg.get("max_in_progress"))
+        # CLI --max overrides config kanban.max_spawn when both are present;
+        # CLI is the more explicit signal so it wins.
+        cli_max = getattr(args, "max", None)
+        max_spawn = cli_max if cli_max is not None else _coerce_positive_int(
+            _kanban_cfg.get("max_spawn")
+        )
+    except Exception:
+        default_assignee = None
+        max_in_progress_per_profile = None
+        max_in_progress = None
+        max_spawn = getattr(args, "max", None)
+    with kb.connect_closing() as conn:
         res = kb.dispatch_once(
             conn,
             dry_run=args.dry_run,
-            max_spawn=args.max,
+            max_spawn=max_spawn,
+            max_in_progress=max_in_progress,
             failure_limit=getattr(args, "failure_limit", kb.DEFAULT_SPAWN_FAILURE_LIMIT),
+            default_assignee=default_assignee,
+            max_in_progress_per_profile=max_in_progress_per_profile,
         )
     if getattr(args, "json", False):
         print(json.dumps({
@@ -2108,6 +2159,11 @@ def _cmd_dispatch(args: argparse.Namespace) -> int:
             ],
             "skipped_unassigned": res.skipped_unassigned,
             "skipped_nonspawnable": res.skipped_nonspawnable,
+            "skipped_per_profile_capped": [
+                {"task_id": tid, "assignee": who, "current": current}
+                for (tid, who, current) in res.skipped_per_profile_capped
+            ],
+            "auto_assigned_default": res.auto_assigned_default,
         }, indent=2))
         return 0
     print(f"Reclaimed:    {res.reclaimed}")
@@ -2128,8 +2184,18 @@ def _cmd_dispatch(args: argparse.Namespace) -> int:
     for tid, who, ws in res.spawned:
         tag = " (dry)" if args.dry_run else ""
         print(f"  - {tid}  ->  {who}  @ {ws or '-'}{tag}")
+    if res.auto_assigned_default:
+        print(
+            f"Auto-assigned to kanban.default_assignee={default_assignee!r}: "
+            f"{', '.join(res.auto_assigned_default)}"
+        )
     if res.skipped_unassigned:
         print(f"Skipped (unassigned): {', '.join(res.skipped_unassigned)}")
+    if res.skipped_per_profile_capped:
+        for tid, who, current in res.skipped_per_profile_capped:
+            print(
+                f"Deferred ({who} at per-profile cap, {current} running): {tid}"
+            )
     if res.skipped_nonspawnable:
         print(
             f"Skipped (non-spawnable assignee — terminal lane, OK): "
@@ -2257,7 +2323,7 @@ def _cmd_daemon(args: argparse.Namespace) -> int:
         from the dispatcher's perspective, not stuck.
         """
         try:
-            with kb.connect() as conn:
+            with kb.connect_closing() as conn:
                 return kb.has_spawnable_ready(conn)
         except Exception:
             return False
@@ -2288,7 +2354,7 @@ def _cmd_watch(args: argparse.Namespace) -> int:
     cursor = 0
     print("Watching kanban events. Ctrl-C to stop.", flush=True)
     # Seed cursor at the latest id so we don't replay history.
-    with kb.connect() as conn:
+    with kb.connect_closing() as conn:
         row = conn.execute(
             "SELECT COALESCE(MAX(id), 0) AS m FROM task_events"
         ).fetchone()
@@ -2296,7 +2362,7 @@ def _cmd_watch(args: argparse.Namespace) -> int:
 
     try:
         while True:
-            with kb.connect() as conn:
+            with kb.connect_closing() as conn:
                 rows = conn.execute(
                     "SELECT e.id, e.task_id, e.kind, e.payload, e.created_at, "
                     "       t.assignee, t.tenant "
@@ -2329,7 +2395,7 @@ def _cmd_watch(args: argparse.Namespace) -> int:
 
 
 def _cmd_stats(args: argparse.Namespace) -> int:
-    with kb.connect() as conn:
+    with kb.connect_closing() as conn:
         stats = kb.board_stats(conn)
     if getattr(args, "json", False):
         print(json.dumps(stats, indent=2, ensure_ascii=False))
@@ -2349,7 +2415,7 @@ def _cmd_stats(args: argparse.Namespace) -> int:
 
 
 def _cmd_notify_subscribe(args: argparse.Namespace) -> int:
-    with kb.connect() as conn:
+    with kb.connect_closing() as conn:
         if kb.get_task(conn, args.task_id) is None:
             print(f"no such task: {args.task_id}", file=sys.stderr)
             return 1
@@ -2366,7 +2432,7 @@ def _cmd_notify_subscribe(args: argparse.Namespace) -> int:
 
 
 def _cmd_notify_list(args: argparse.Namespace) -> int:
-    with kb.connect() as conn:
+    with kb.connect_closing() as conn:
         subs = kb.list_notify_subs(conn, args.task_id)
     if getattr(args, "json", False):
         print(json.dumps(subs, indent=2, ensure_ascii=False))
@@ -2383,7 +2449,7 @@ def _cmd_notify_list(args: argparse.Namespace) -> int:
 
 
 def _cmd_notify_unsubscribe(args: argparse.Namespace) -> int:
-    with kb.connect() as conn:
+    with kb.connect_closing() as conn:
         ok = kb.remove_notify_sub(
             conn, task_id=args.task_id,
             platform=args.platform, chat_id=args.chat_id,
@@ -2417,7 +2483,7 @@ def _cmd_runs(args: argparse.Namespace) -> int:
             file=sys.stderr,
         )
         return 2
-    with kb.connect() as conn:
+    with kb.connect_closing() as conn:
         runs = kb.list_runs(conn, args.task_id, **rsk)
     if getattr(args, "json", False):
         print(json.dumps([
@@ -2456,7 +2522,7 @@ def _cmd_runs(args: argparse.Namespace) -> int:
 
 
 def _cmd_context(args: argparse.Namespace) -> int:
-    with kb.connect() as conn:
+    with kb.connect_closing() as conn:
         text = kb.build_worker_context(conn, args.task_id)
     print(text)
     return 0
@@ -2622,7 +2688,7 @@ def _cmd_gc(args: argparse.Namespace) -> int:
     import shutil
     scratch_root = kb.workspaces_root()
     removed_ws = 0
-    with kb.connect() as conn:
+    with kb.connect_closing() as conn:
         rows = conn.execute(
             "SELECT id, workspace_kind, workspace_path FROM tasks WHERE status = 'archived'"
         ).fetchall()
@@ -2645,7 +2711,7 @@ def _cmd_gc(args: argparse.Namespace) -> int:
 
     event_days = getattr(args, "event_retention_days", 30)
     log_days = getattr(args, "log_retention_days", 30)
-    with kb.connect() as conn:
+    with kb.connect_closing() as conn:
         removed_events = kb.gc_events(
             conn, older_than_seconds=event_days * 24 * 3600,
         )
diff --git a/hermes_cli/kanban_db.py b/hermes_cli/kanban_db.py
index 55a981dbef3..5e465e87a6f 100644
--- a/hermes_cli/kanban_db.py
+++ b/hermes_cli/kanban_db.py
@@ -71,6 +71,7 @@ new locking.
 from __future__ import annotations
 
 import contextlib
+import hashlib
 import json
 import os
 import re
@@ -83,7 +84,6 @@ import threading
 import logging
 import time
 from dataclasses import dataclass, field
-from datetime import datetime
 from pathlib import Path
 from typing import Any, Iterable, Optional
 
@@ -110,6 +110,16 @@ _IS_WINDOWS = sys.platform == "win32"
 # long single-call MCP workflows.
 DEFAULT_CLAIM_TTL_SECONDS = 15 * 60
 
+# If a worker's PID is still alive but its ``last_heartbeat_at`` is
+# older than this when ``release_stale_claims`` runs, treat the worker
+# as wedged and reclaim regardless of PID liveness (#29747 gap 3).
+# This catches the logic-loop case where the process is technically
+# running but not making observable progress.  ``_touch_activity``
+# bridges chunk-level liveness into ``last_heartbeat_at`` via #31752,
+# so any genuinely active worker keeps its heartbeat fresh as a side
+# effect of normal API traffic.
+DEFAULT_CLAIM_HEARTBEAT_MAX_STALE_SECONDS = 60 * 60
+
 
 def _resolve_claim_ttl_seconds(ttl_seconds: Optional[int] = None) -> int:
     """Return the effective claim TTL, honoring the kanban env override.
@@ -982,6 +992,89 @@ CREATE INDEX IF NOT EXISTS idx_notify_task           ON kanban_notify_subs(task_
 _INITIALIZED_PATHS: set[str] = set()
 _INIT_LOCK = threading.RLock()
 _SQLITE_HEADER = b"SQLite format 3\x00"
+DEFAULT_BUSY_TIMEOUT_MS = 120_000
+
+
+def _resolve_busy_timeout_ms() -> int:
+    """Return the SQLite busy timeout for Kanban connections.
+
+    Kanban is the shared cross-profile dispatch bus, so worker stampedes are
+    expected.  A long busy timeout lets SQLite serialize writers via WAL rather
+    than surfacing transient ``database is locked`` failures during bursts.
+    """
+    raw = os.environ.get("HERMES_KANBAN_BUSY_TIMEOUT_MS", "").strip()
+    if raw:
+        try:
+            parsed = int(raw)
+        except ValueError:
+            parsed = 0
+        if parsed > 0:
+            return parsed
+    return DEFAULT_BUSY_TIMEOUT_MS
+
+
+def _sqlite_connect(path: Path) -> sqlite3.Connection:
+    """Open a Kanban SQLite connection with consistent lock waiting."""
+    busy_timeout_ms = _resolve_busy_timeout_ms()
+    conn = sqlite3.connect(
+        str(path),
+        isolation_level=None,
+        timeout=busy_timeout_ms / 1000.0,
+    )
+    # ``sqlite3.connect(timeout=...)`` normally maps to busy_timeout, but set
+    # the PRAGMA explicitly so it is observable and survives future wrapper
+    # changes. Parameter binding is not supported for PRAGMA assignments.
+    conn.execute(f"PRAGMA busy_timeout={busy_timeout_ms}")
+    return conn
+
+
+@contextlib.contextmanager
+def _cross_process_init_lock(path: Path):
+    """Serialize first-connect WAL/schema/integrity setup across processes.
+
+    ``_INIT_LOCK`` only protects threads inside one Python process. During a
+    dispatcher burst, many worker processes can all hit a fresh/legacy board at
+    once and each process has an empty ``_INITIALIZED_PATHS`` cache. This file
+    lock keeps header validation, integrity probing, WAL activation, and
+    additive migrations single-file/single-writer across the whole host while
+    leaving normal post-init DB usage concurrent under SQLite WAL.
+    """
+    path.parent.mkdir(parents=True, exist_ok=True)
+    lock_path = path.with_name(path.name + ".init.lock")
+    handle = lock_path.open("a+b")
+    try:
+        if _IS_WINDOWS:
+            import msvcrt
+
+            # Lock a single byte in the sidecar file. ``msvcrt.locking`` starts
+            # at the current file position, so seek explicitly before both
+            # lock and unlock.  The file is opened in append/read binary mode so
+            # it always exists but the byte-range lock is the synchronization
+            # primitive; no payload needs to be written.
+            handle.seek(0)
+            locking = getattr(msvcrt, "locking")
+            lock_mode = getattr(msvcrt, "LK_LOCK")
+            locking(handle.fileno(), lock_mode, 1)
+        else:
+            import fcntl
+
+            fcntl.flock(handle.fileno(), fcntl.LOCK_EX)
+        yield
+    finally:
+        try:
+            if _IS_WINDOWS:
+                import msvcrt
+
+                handle.seek(0)
+                locking = getattr(msvcrt, "locking")
+                unlock_mode = getattr(msvcrt, "LK_UNLCK")
+                locking(handle.fileno(), unlock_mode, 1)
+            else:
+                import fcntl
+
+                fcntl.flock(handle.fileno(), fcntl.LOCK_UN)
+        finally:
+            handle.close()
 
 
 def _looks_like_tls_record_at(data: bytes, offset: int) -> bool:
@@ -1055,14 +1148,21 @@ class KanbanDbCorruptError(RuntimeError):
 
 
 def _backup_corrupt_db(path: Path) -> Optional[Path]:
-    """Copy a corrupt DB (and its WAL/SHM sidecars) to a timestamped backup.
+    """Copy a corrupt DB (and its WAL/SHM sidecars) to a content-addressed backup.
+
+    The backup filename is deterministic in the main DB's sha256, so repeated
+    quarantines of the same corrupt bytes (gateway restarts, dispatcher retries,
+    multi-profile fleets all hitting the same shared DB) reuse one backup
+    instead of amplifying disk usage by N. If the corrupt bytes actually
+    change between attempts — e.g. a partial repair or further damage — the
+    fingerprint changes and a separate backup is preserved.
 
     Returns the backup path of the main DB file, or ``None`` if the copy
     itself failed (the caller still raises loudly in that case).
 
-    Writes are confined to the original DB's parent directory. The
-    backup basename is derived purely from ``path.name``, never from
-    caller-supplied directory segments — no traversal is possible.
+    Writes are confined to the original DB's parent directory. The backup
+    basename is derived purely from ``path.name`` and a content hash, never
+    from caller-supplied directory segments — no traversal is possible.
     """
     # Resolve once and pin the parent so subsequent path operations cannot
     # escape it. ``Path.resolve()`` collapses any ``..`` segments and
@@ -1070,32 +1170,31 @@ def _backup_corrupt_db(path: Path) -> Optional[Path]:
     resolved = path.resolve()
     parent = resolved.parent
     base_name = resolved.name  # basename only
-    stamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-    candidate = parent / f"{base_name}.corrupt.{stamp}.bak"
-    # Defensive: candidate must still be inside parent after construction.
-    # f-string interpolation of ``base_name`` cannot escape ``parent``
-    # because ``base_name`` is itself a resolved basename, but assert it
-    # anyway so static analyzers can see the containment guarantee.
-    if candidate.parent != parent:
-        return None
-    counter = 0
-    while candidate.exists():
-        counter += 1
-        candidate = parent / f"{base_name}.corrupt.{stamp}.{counter}.bak"
-        if candidate.parent != parent:
-            return None
+    digest = hashlib.sha256()
     try:
-        shutil.copy2(resolved, candidate)
+        with resolved.open("rb") as handle:
+            for chunk in iter(lambda: handle.read(1024 * 1024), b""):
+                digest.update(chunk)
     except OSError:
         return None
+    token = digest.hexdigest()[:16]
+    candidate = parent / f"{base_name}.corrupt.{token}.bak"
+    # Defensive: candidate must still be inside parent after construction.
+    if candidate.parent != parent:
+        return None
+    if not candidate.exists():
+        try:
+            shutil.copy2(resolved, candidate)
+        except OSError:
+            return None
     for suffix in ("-wal", "-shm"):
         sidecar = parent / (base_name + suffix)
         if sidecar.parent != parent or not sidecar.exists():
             continue
+        sidecar_backup = parent / (candidate.name + suffix)
+        if sidecar_backup.parent != parent or sidecar_backup.exists():
+            continue
         try:
-            sidecar_backup = parent / (candidate.name + suffix)
-            if sidecar_backup.parent != parent:
-                continue
             shutil.copy2(sidecar, sidecar_backup)
         except OSError:
             pass
@@ -1142,7 +1241,7 @@ def _guard_existing_db_is_healthy(path: Path) -> None:
         return
     reason: Optional[str] = None
     try:
-        probe = sqlite3.connect(str(resolved), timeout=5, isolation_level=None)
+        probe = _sqlite_connect(resolved)
         try:
             row = probe.execute("PRAGMA integrity_check").fetchone()
         finally:
@@ -1188,54 +1287,90 @@ def connect(
     else:
         path = kanban_db_path(board=board)
     path.parent.mkdir(parents=True, exist_ok=True)
-    # Cheap byte-level check first — catches the #29507 TLS-overwrite shape
-    # and other invalid-header cases without opening a sqlite connection.
-    _validate_sqlite_header(path)
-    # Full integrity probe — catches corruption past the header (malformed
-    # pages, broken internal metadata). Cached per-path after first success
-    # via _INITIALIZED_PATHS so it only runs once per process per path.
-    _guard_existing_db_is_healthy(path)
-    resolved = str(path.resolve())
-    conn = sqlite3.connect(str(path), isolation_level=None, timeout=30)
-    try:
-        conn.row_factory = sqlite3.Row
-        with _INIT_LOCK:
-            # WAL activation can take an exclusive lock while SQLite creates the
-            # sidecar files for a fresh database. Keep it in the same process-local
-            # critical section as schema initialization so concurrent gateway
-            # startup threads do not race before _INITIALIZED_PATHS is populated.
-            # WAL doesn't work on network filesystems (NFS/SMB/FUSE). Shared helper
-            # falls back to DELETE with one WARNING so kanban stays usable there.
-            # See hermes_state._WAL_INCOMPAT_MARKERS for detection logic.
-            from hermes_state import apply_wal_with_fallback
-            apply_wal_with_fallback(conn, db_label=f"kanban.db ({path.name})")
-            # FULL (was NORMAL): fsync before each checkpoint to narrow the
-            # crash window that can leave a b-tree page header torn.
-            conn.execute("PRAGMA synchronous=FULL")
-            conn.execute("PRAGMA wal_autocheckpoint=100")
-            conn.execute("PRAGMA foreign_keys=ON")
-            # Zero freed pages so a later torn write cannot expose stale
-            # cell content; persisted in the DB header for new DBs.
-            conn.execute("PRAGMA secure_delete=ON")
-            # Surface corrupt cells as read errors instead of silent
-            # wrong-data returns.
-            conn.execute("PRAGMA cell_size_check=ON")
-            needs_init = resolved not in _INITIALIZED_PATHS
-            if needs_init:
-                # Idempotent: runs CREATE TABLE IF NOT EXISTS + the additive
-                # migrations. Cached so subsequent connect() calls in the same
-                # process are cheap. The lock prevents same-process dispatcher
-                # threads from racing through the additive ALTER TABLE pass with
-                # stale PRAGMA snapshots during gateway startup.
-                conn.executescript(SCHEMA_SQL)
-                _migrate_add_optional_columns(conn)
-                _INITIALIZED_PATHS.add(resolved)
-    except Exception:
-        conn.close()
-        raise
+    with _cross_process_init_lock(path):
+        # Cheap byte-level check first — catches the #29507 TLS-overwrite shape
+        # and other invalid-header cases without opening a sqlite connection.
+        _validate_sqlite_header(path)
+        # Full integrity probe — catches corruption past the header (malformed
+        # pages, broken internal metadata). Cached per-path after first success
+        # via _INITIALIZED_PATHS so it only runs once per process per path.
+        _guard_existing_db_is_healthy(path)
+        resolved = str(path.resolve())
+        conn = _sqlite_connect(path)
+        try:
+            conn.row_factory = sqlite3.Row
+            with _INIT_LOCK:
+                # WAL activation can take an exclusive lock while SQLite creates the
+                # sidecar files for a fresh database. Keep it in the same process-local
+                # critical section as schema initialization so concurrent gateway
+                # startup threads do not race before _INITIALIZED_PATHS is populated.
+                # WAL doesn't work on network filesystems (NFS/SMB/FUSE). Shared helper
+                # falls back to DELETE with one WARNING so kanban stays usable there.
+                # See hermes_state._WAL_INCOMPAT_MARKERS for detection logic.
+                from hermes_state import apply_wal_with_fallback
+                apply_wal_with_fallback(conn, db_label=f"kanban.db ({path.name})")
+                # FULL (was NORMAL): fsync before each checkpoint to narrow the
+                # crash window that can leave a b-tree page header torn.
+                conn.execute("PRAGMA synchronous=FULL")
+                conn.execute("PRAGMA wal_autocheckpoint=100")
+                conn.execute("PRAGMA foreign_keys=ON")
+                # Zero freed pages so a later torn write cannot expose stale
+                # cell content; persisted in the DB header for new DBs.
+                conn.execute("PRAGMA secure_delete=ON")
+                # Surface corrupt cells as read errors instead of silent
+                # wrong-data returns.
+                conn.execute("PRAGMA cell_size_check=ON")
+                needs_init = resolved not in _INITIALIZED_PATHS
+                if needs_init:
+                    # Idempotent: runs CREATE TABLE IF NOT EXISTS + the additive
+                    # migrations. Cached so subsequent connect() calls in the same
+                    # process are cheap. The lock prevents same-process dispatcher
+                    # threads from racing through the additive ALTER TABLE pass with
+                    # stale PRAGMA snapshots during gateway startup.
+                    conn.executescript(SCHEMA_SQL)
+                    _migrate_add_optional_columns(conn)
+                    _INITIALIZED_PATHS.add(resolved)
+        except Exception:
+            conn.close()
+            raise
     return conn
 
 
+@contextlib.contextmanager
+def connect_closing(
+    db_path: Optional[Path] = None,
+    *,
+    board: Optional[str] = None,
+):
+    """Open a kanban DB connection and guarantee it is closed on exit.
+
+    Use this instead of ``with kb.connect() as conn:`` — sqlite3's
+    built-in connection context manager only commits/rollbacks the
+    transaction; it does NOT close the file descriptor. In long-lived
+    processes (gateway, dashboard) that route every kanban operation
+    through ``connect()`` (e.g. ``run_slash`` dispatching ``/kanban …``
+    commands, ``decompose_task_endpoint`` calling
+    ``kanban_decompose.decompose_task``), the unclosed connections
+    accumulate as open FDs to ``kanban.db`` and ``kanban.db-wal``. After
+    enough operations the process hits the kernel FD limit and dies
+    with ``[Errno 24] Too many open files``.
+
+    See #33159 for the production incident.
+
+    The ``connect()`` function itself remains unchanged so callers that
+    intentionally manage the connection lifetime (tests, long-lived
+    callers) continue to work.
+    """
+    conn = connect(db_path=db_path, board=board)
+    try:
+        yield conn
+    finally:
+        try:
+            conn.close()
+        except Exception:
+            pass
+
+
 def init_db(
     db_path: Optional[Path] = None,
     *,
@@ -2615,9 +2750,19 @@ def release_stale_claims(
     then-immediately-reclaim loop seen on slow models that spend longer
     than ``DEFAULT_CLAIM_TTL_SECONDS`` inside a single tool-free LLM
     call (#23025): no tool calls means no ``kanban_heartbeat``, even
-    though the subprocess is healthy. ``enforce_max_runtime`` and
-    ``detect_crashed_workers`` remain the upper bounds for genuinely
-    wedged or dead workers.
+    though the subprocess is healthy.
+
+    Backstop (#29747 gap 3): if the worker's PID is still alive but its
+    ``last_heartbeat_at`` is stale by more than
+    ``DEFAULT_CLAIM_HEARTBEAT_MAX_STALE_SECONDS`` (1h), the worker has
+    been making no observable progress and we reclaim anyway — even if
+    ``_pid_alive`` is still true. This catches the wedged-in-a-logic-loop
+    case where the process is technically running but accomplishing
+    nothing. ``_touch_activity`` (run_agent.py) bridges chunk-level
+    liveness into ``last_heartbeat_at`` via #31752, so any genuinely
+    active worker keeps its heartbeat fresh as a side effect of normal
+    API traffic. ``enforce_max_runtime`` and ``detect_crashed_workers``
+    remain the upper bounds for genuinely wedged or dead workers.
 
     Returns the number of stale claims actually reclaimed (live-pid
     extensions don't count). Safe to call often.
@@ -2635,7 +2780,21 @@ def release_stale_claims(
     for row in stale:
         lock = row["claim_lock"] or ""
         host_local = lock.startswith(host_prefix)
-        if host_local and row["worker_pid"] and _pid_alive(row["worker_pid"]):
+        hb = row["last_heartbeat_at"]
+        # Heartbeat staleness backstop: if we have a heartbeat at all
+        # and it's older than the max-stale threshold, the worker is
+        # not making observable progress.  Reclaim instead of extending,
+        # even if the PID is still alive (it's likely in a logic loop).
+        heartbeat_stale = (
+            hb is not None
+            and (now - int(hb)) > DEFAULT_CLAIM_HEARTBEAT_MAX_STALE_SECONDS
+        )
+        if (
+            host_local
+            and row["worker_pid"]
+            and _pid_alive(row["worker_pid"])
+            and not heartbeat_stale
+        ):
             new_expires = now + _resolve_claim_ttl_seconds()
             with write_txn(conn):
                 cur = conn.execute(
@@ -2704,6 +2863,7 @@ def release_stale_claims(
                 ),
                 "now": now,
                 "host_local": host_local,
+                "heartbeat_stale": bool(heartbeat_stale),
             }
             payload.update(termination)
             _append_event(
@@ -4163,6 +4323,12 @@ class DispatchResult:
     skipped_unassigned: list[str] = field(default_factory=list)
     """Ready task ids skipped because they have no assignee at all.
     Operator-actionable — usually a misfiled task waiting for routing."""
+    auto_assigned_default: list[str] = field(default_factory=list)
+    """Task ids that were unassigned in the DB and had
+    ``kanban.default_assignee`` applied this tick before spawning (#27145).
+    Surfaces the auto-assignment to telemetry / CLI / dashboard so the
+    operator can see when the dispatcher is acting on the fallback rule
+    rather than on explicit per-task assignments."""
     skipped_nonspawnable: list[str] = field(default_factory=list)
     """Ready task ids skipped because their assignee names a control-plane
     lane (a Claude Code terminal like ``orion-cc``) rather than a Hermes
@@ -4170,6 +4336,14 @@ class DispatchResult:
     operator-actionable failure. Tracked separately so health telemetry
     can distinguish "real stuck" (nothing spawned but spawnable work
     available) from "correctly idle" (nothing spawnable in the queue)."""
+    skipped_per_profile_capped: list[tuple[str, str, int]] = field(default_factory=list)
+    """Tasks deferred this tick because their assignee is already at
+    ``kanban.max_in_progress_per_profile`` (#21582). Each entry is
+    ``(task_id, assignee, current_running_count)``. NOT an
+    operator-actionable failure — the task will be picked up on a
+    subsequent tick when the assignee has capacity. Separate bucket so
+    telemetry / dashboards can show "this profile is busy" vs
+    "task is genuinely stuck"."""
     crashed: list[str] = field(default_factory=list)
     """Task ids reclaimed because their worker PID disappeared."""
     auto_blocked: list[str] = field(default_factory=list)
@@ -4264,21 +4438,20 @@ def reap_worker_zombies() -> "list[int]":
     Returns the list of reaped PIDs. Safe to call when there are no
     children (returns []). No-op on Windows.
     """
-    if os.name == "nt":
-        return []
     reaped: "list[int]" = []
-    try:
-        while True:
-            try:
-                pid, status = os.waitpid(-1, os.WNOHANG)
-            except ChildProcessError:
-                break
-            if pid == 0:
-                break
-            _record_worker_exit(pid, status)
-            reaped.append(pid)
-    except Exception:
-        pass
+    if os.name != "nt":
+        try:
+            while True:
+                try:
+                    pid, status = os.waitpid(-1, os.WNOHANG)
+                except ChildProcessError:
+                    break
+                if pid == 0:
+                    break
+                _record_worker_exit(pid, status)
+                reaped.append(pid)
+        except Exception:
+            pass
     return reaped
 
 
@@ -4604,7 +4777,6 @@ def detect_stale_running(
     if stale_timeout_seconds <= 0:
         return []
 
-    import signal as _signal_mod
 
     now = int(time.time())
     host_prefix = f"{_claimer_id().split(':', 1)[0]}:"
@@ -4693,21 +4865,6 @@ def detect_stale_running(
     return reclaimed
 
 
-def set_max_runtime(
-    conn: sqlite3.Connection,
-    task_id: str,
-    seconds: Optional[int],
-) -> bool:
-    """Set or clear the per-task max_runtime_seconds. Returns True on
-    success."""
-    with write_txn(conn):
-        cur = conn.execute(
-            "UPDATE tasks SET max_runtime_seconds = ? WHERE id = ?",
-            (int(seconds) if seconds is not None else None, task_id),
-        )
-    return cur.rowcount == 1
-
-
 def _error_fingerprint(error_text: str) -> str:
     """Normalize an error message for grouping identical failures.
 
@@ -5217,6 +5374,8 @@ def dispatch_once(
     failure_limit: int = DEFAULT_SPAWN_FAILURE_LIMIT,
     stale_timeout_seconds: int = 0,
     board: Optional[str] = None,
+    default_assignee: Optional[str] = None,
+    max_in_progress_per_profile: Optional[int] = None,
 ) -> DispatchResult:
     """Run one dispatcher tick.
 
@@ -5302,12 +5461,89 @@ def dispatch_once(
         if max_spawn is None or max_spawn > remaining:
             max_spawn = remaining
     spawned = 0
+    # Per-profile concurrency cap (#21582): when set, track how many
+    # workers each assignee already has in flight, and refuse to spawn
+    # when this would push that assignee past the cap. Prevents
+    # fan-out workloads from melting a single profile's local model /
+    # API quota / browser pool while leaving other profiles idle.
+    # Tasks blocked this way go to skipped_per_profile_capped (not
+    # skipped_unassigned — the operator-actionable signal is different:
+    # "this profile is busy, try again later" not "this needs routing").
+    _per_profile_cap = max_in_progress_per_profile if (
+        isinstance(max_in_progress_per_profile, int)
+        and max_in_progress_per_profile > 0
+    ) else None
+    _per_profile_running: dict[str, int] = {}
+    if _per_profile_cap is not None:
+        for prow in conn.execute(
+            "SELECT assignee, COUNT(*) AS n FROM tasks "
+            "WHERE status = 'running' AND assignee IS NOT NULL "
+            "GROUP BY assignee"
+        ):
+            _per_profile_running[prow["assignee"]] = int(prow["n"])
+    # Normalize default_assignee once: empty/whitespace string → None so the
+    # rest of the loop can use ``if default_assignee:`` as a single check.
+    # We also resolve profile_exists once here for the same reason.
+    _default_assignee = (default_assignee or "").strip() or None
+    _default_assignee_resolved = False
+    if _default_assignee:
+        try:
+            from hermes_cli.profiles import profile_exists as _pe
+            _default_assignee_resolved = bool(_pe(_default_assignee))
+        except Exception:
+            # Profiles module not importable (test stubs, exotic envs).
+            # Trust the operator's config and try the assignment; the
+            # downstream profile_exists check on the assigned row will
+            # bucket it as nonspawnable if the profile genuinely isn't
+            # there, with the existing diagnostic.
+            _default_assignee_resolved = True
     for row in ready_rows:
         if max_spawn is not None and running_count + spawned >= max_spawn:
             break
-        if not row["assignee"]:
-            result.skipped_unassigned.append(row["id"])
-            continue
+        row_assignee = row["assignee"]
+        if not row_assignee:
+            # Honour kanban.default_assignee: when the dispatcher hits an
+            # unassigned ready task and an operator-configured fallback
+            # exists, persist the assignment and proceed. This removes the
+            # dashboard footgun where a task created without an assignee
+            # parks in 'ready' forever even though the operator's intent
+            # ("default") was perfectly clear (#27145). Mutating the row
+            # (not just the in-memory view) keeps diagnostics and the
+            # board state consistent: the task is now legitimately owned
+            # by ``kanban.default_assignee``, not "unassigned but secretly
+            # routed".
+            if _default_assignee and _default_assignee_resolved:
+                # Dry-run: show what WOULD happen (auto-assign + spawn) without
+                # mutating the DB. Real run: mutate the row + emit the
+                # 'assigned' event so the board state matches what just happened.
+                if not dry_run:
+                    try:
+                        with write_txn(conn):
+                            conn.execute(
+                                "UPDATE tasks SET assignee = ? WHERE id = ? "
+                                "AND (assignee IS NULL OR assignee = '')",
+                                (_default_assignee, row["id"]),
+                            )
+                            _append_event(
+                                conn, row["id"], "assigned",
+                                {
+                                    "assignee": _default_assignee,
+                                    "source": "kanban.default_assignee",
+                                },
+                            )
+                    except Exception:
+                        _log.debug(
+                            "kanban dispatch: failed to apply default_assignee=%r "
+                            "to task %s",
+                            _default_assignee, row["id"], exc_info=True,
+                        )
+                        result.skipped_unassigned.append(row["id"])
+                        continue
+                row_assignee = _default_assignee
+                result.auto_assigned_default.append(row["id"])
+            else:
+                result.skipped_unassigned.append(row["id"])
+                continue
         # Skip ready tasks whose assignee is not a real Hermes profile.
         # `_default_spawn` invokes ``hermes -p <assignee>`` which fails
         # with "Profile 'X' does not exist" when the assignee names a
@@ -5322,7 +5558,7 @@ def dispatch_once(
             from hermes_cli.profiles import profile_exists  # local import: avoids cycle
         except Exception:
             profile_exists = None  # type: ignore[assignment]
-        if profile_exists is not None and not profile_exists(row["assignee"]):
+        if profile_exists is not None and not profile_exists(row_assignee):
             # Bucket separately from skipped_unassigned: the operator
             # cannot fix this by assigning a profile (the assignee IS the
             # intended owner — a terminal lane). Health telemetry uses
@@ -5331,6 +5567,19 @@ def dispatch_once(
             # of human-pulled work.
             result.skipped_nonspawnable.append(row["id"])
             continue
+        # Per-profile concurrency cap (#21582): even if there's global
+        # headroom, refuse to spawn for an assignee that's already at
+        # its in-flight cap. Prevents one profile's local model / API
+        # quota / browser pool from being overwhelmed by a fan-out
+        # while the global max_in_progress / max_spawn caps still allow
+        # work on OTHER profiles.
+        if _per_profile_cap is not None:
+            current = _per_profile_running.get(row_assignee, 0)
+            if current >= _per_profile_cap:
+                result.skipped_per_profile_capped.append(
+                    (row["id"], row_assignee, current)
+                )
+                continue
         # Respawn guard: refuse to re-spawn when useful work is already
         # in-flight/recent, or when the last failure is a deterministic
         # blocker (quota / auth). The guard defers the spawn this tick so
@@ -5353,7 +5602,15 @@ def dispatch_once(
                     )
             continue
         if dry_run:
-            result.spawned.append((row["id"], row["assignee"], ""))
+            result.spawned.append((row["id"], row_assignee, ""))
+            # Increment per-profile counter even in dry_run so the cap
+            # check sees the would-be spawn on subsequent iterations.
+            # Without this, dry_run reports every task as spawnable and
+            # under-reports the capped subset (#21582).
+            if _per_profile_cap is not None and row_assignee:
+                _per_profile_running[row_assignee] = (
+                    _per_profile_running.get(row_assignee, 0) + 1
+                )
             continue
         claimed = claim_task(conn, row["id"], ttl_seconds=ttl_seconds)
         if claimed is None:
@@ -5396,6 +5653,13 @@ def dispatch_once(
             # complete_task).
             result.spawned.append((claimed.id, claimed.assignee or "", str(workspace)))
             spawned += 1
+            # Track the new in-flight count for this profile so later
+            # iterations in this same tick respect the per-profile cap
+            # (#21582). Subsequent ticks re-query from the DB.
+            if _per_profile_cap is not None and claimed.assignee:
+                _per_profile_running[claimed.assignee] = (
+                    _per_profile_running.get(claimed.assignee, 0) + 1
+                )
         except Exception as exc:
             auto = _record_spawn_failure(
                 conn, claimed.id, str(exc),
@@ -6237,7 +6501,7 @@ def _to_epoch(val) -> Optional[int]:
         pass
     # ISO-8601 fallback (e.g. '2026-05-10T15:00:00Z')
     try:
-        from datetime import datetime, timezone
+        from datetime import datetime
         dt = datetime.fromisoformat(s.replace("Z", "+00:00"))
         return int(dt.timestamp())
     except (ValueError, OSError):
@@ -6688,16 +6952,6 @@ def get_run(conn: sqlite3.Connection, run_id: int) -> Optional[Run]:
     return Run.from_row(row) if row else None
 
 
-def active_run(conn: sqlite3.Connection, task_id: str) -> Optional[Run]:
-    """Return the currently-open run for ``task_id`` (``ended_at IS NULL``)."""
-    row = conn.execute(
-        "SELECT * FROM task_runs WHERE task_id = ? AND ended_at IS NULL "
-        "ORDER BY started_at DESC LIMIT 1",
-        (task_id,),
-    ).fetchone()
-    return Run.from_row(row) if row else None
-
-
 def latest_run(conn: sqlite3.Connection, task_id: str) -> Optional[Run]:
     """Return the most recent run regardless of outcome (active or closed)."""
     row = conn.execute(
diff --git a/hermes_cli/kanban_decompose.py b/hermes_cli/kanban_decompose.py
index 063abcf7b51..dec7c0b7c72 100644
--- a/hermes_cli/kanban_decompose.py
+++ b/hermes_cli/kanban_decompose.py
@@ -281,7 +281,7 @@ def decompose_task(
     configured, API error, malformed response, decomposer returned
     fanout=true with empty task list) — those surface via ``ok=False``.
     """
-    with kb.connect() as conn:
+    with kb.connect_closing() as conn:
         task = kb.get_task(conn, task_id)
     if task is None:
         return DecomposeOutcome(task_id, False, "unknown task id")
@@ -370,7 +370,7 @@ def decompose_task(
             return DecomposeOutcome(
                 task_id, False, "decomposer returned fanout=false with no title/body",
             )
-        with kb.connect() as conn:
+        with kb.connect_closing() as conn:
             ok = kb.specify_triage_task(
                 conn,
                 task_id,
@@ -439,7 +439,7 @@ def decompose_task(
         })
 
     try:
-        with kb.connect() as conn:
+        with kb.connect_closing() as conn:
             child_ids = kb.decompose_triage_task(
                 conn,
                 task_id,
@@ -467,7 +467,7 @@ def decompose_task(
 
 def list_triage_ids(*, tenant: Optional[str] = None) -> list[str]:
     """Return task ids currently in the triage column."""
-    with kb.connect() as conn:
+    with kb.connect_closing() as conn:
         rows = kb.list_tasks(
             conn,
             status="triage",
diff --git a/hermes_cli/kanban_diagnostics.py b/hermes_cli/kanban_diagnostics.py
index bed5a6ebccb..bef9bc8a97e 100644
--- a/hermes_cli/kanban_diagnostics.py
+++ b/hermes_cli/kanban_diagnostics.py
@@ -191,23 +191,6 @@ def _active_hallucination_events(
         elif k == kind:
             active.append(ev)
     return active
-
-
-def _latest_clean_event_ts(events: Iterable[Any]) -> int:
-    """Timestamp of the most recent clean completion / edit event.
-
-    Kept for general "has this task ever been successfully completed"
-    lookups; hallucination rules use ``_active_hallucination_events``
-    instead because they need strict ordering.
-    """
-    latest = 0
-    for ev in events:
-        if _event_kind(ev) in {"completed", "edited"}:
-            t = _event_ts(ev)
-            latest = max(latest, t)
-    return latest
-
-
 # Standard always-available actions. Every diagnostic can offer these as
 # fallbacks regardless of kind — they're the two baseline recovery
 # primitives the kernel supports.
@@ -791,6 +774,83 @@ def _rule_stuck_in_blocked(task, events, runs, now, cfg) -> list[Diagnostic]:
     )]
 
 
+def _rule_block_unblock_cycling(task, events, runs, now, cfg) -> list[Diagnostic]:
+    """Task has cycled through blocked → unblocked many times — the
+    ``unblock`` is not fixing the underlying problem and the worker
+    keeps re-blocking for substantially the same reason.
+
+    ``_rule_stuck_in_blocked`` resets its timer on any ``commented`` /
+    ``unblocked`` event, so a task that cycles every few minutes is
+    invisible to it regardless of how many times it cycles (#29747
+    gap 1). This rule complements that one by counting block→unblock
+    cycles in a sliding window.
+
+    Threshold: cfg["block_cycle_threshold"] (default 3) cycles within
+    cfg["block_cycle_window_seconds"] (default 24h).
+    """
+    threshold = _positive_int(cfg.get("block_cycle_threshold"), 3)
+    window_seconds = float(cfg.get("block_cycle_window_seconds", 24 * 3600))
+    cycle_cutoff = now - window_seconds
+
+    # Walk events chronologically (arrival order — callers pre-sort by
+    # id, which is the canonical chronological order; ``created_at``
+    # alone is insufficient because multiple events can share the same
+    # second).  Count "blocked after unblocked" transitions: every time
+    # a blocked event follows at least one unblocked event since the
+    # last cycle was counted, that's a new cycle.
+    cycles = 0
+    seen_unblock_since_last_cycle = False
+    initial_blocked_ts = 0
+    last_cycle_blocked_ts = 0
+    for ev in events:
+        ts = _event_ts(ev)
+        if ts < cycle_cutoff:
+            continue
+        kind = _event_kind(ev)
+        if kind == "blocked":
+            if initial_blocked_ts == 0:
+                initial_blocked_ts = ts
+            if seen_unblock_since_last_cycle:
+                cycles += 1
+                last_cycle_blocked_ts = ts
+                seen_unblock_since_last_cycle = False
+        elif kind == "unblocked":
+            seen_unblock_since_last_cycle = True
+
+    if cycles < threshold:
+        return []
+
+    task_id = _task_field(task, "id")
+    actions: list[DiagnosticAction] = []
+    if task_id:
+        actions.append(DiagnosticAction(
+            kind="cli_hint",
+            label=f"Check block reasons: hermes kanban events {task_id}",
+            payload={"command": f"hermes kanban events {task_id}"},
+            suggested=True,
+        ))
+    return [Diagnostic(
+        kind="block_unblock_cycling",
+        severity="warning",
+        title=f"Task block→unblock cycled {cycles}x in {int(window_seconds/3600)}h",
+        detail=(
+            f"This task has been blocked {cycles} times after being "
+            "unblocked, suggesting the unblock is not addressing the "
+            "root cause and the worker keeps hitting the same wall. "
+            "Review the block reasons in the event history; a different "
+            "intervention (reassign, change scope, archive) may be needed."
+        ),
+        actions=actions,
+        first_seen_at=int(initial_blocked_ts) if initial_blocked_ts else int(now),
+        last_seen_at=int(last_cycle_blocked_ts) if last_cycle_blocked_ts else int(now),
+        count=cycles,
+        data={
+            "cycles": cycles,
+            "window_seconds": int(window_seconds),
+        },
+    )]
+
+
 def _rule_stranded_in_ready(task, events, runs, now, cfg) -> list[Diagnostic]:
     """Task has been in ``ready`` status for too long without any worker
     claiming it.
@@ -923,6 +983,7 @@ _RULES: list[RuleFn] = [
     _rule_repeated_failures,
     _rule_repeated_crashes,
     _rule_stuck_in_blocked,
+    _rule_block_unblock_cycling,
     _rule_stranded_in_ready,
 ]
 
@@ -936,6 +997,7 @@ DIAGNOSTIC_KINDS = (
     "repeated_failures",
     "repeated_crashes",
     "stuck_in_blocked",
+    "block_unblock_cycling",
     "stranded_in_ready",
 )
 
@@ -1043,16 +1105,3 @@ def compute_task_diagnostics(
         )
     )
     return out
-
-
-def severity_of_highest(diagnostics: Iterable[Diagnostic]) -> Optional[str]:
-    """Highest severity present in the list, or None if empty. Useful
-    for card badges that need a single color."""
-    highest_idx = -1
-    highest = None
-    for d in diagnostics:
-        idx = SEVERITY_ORDER.index(d.severity) if d.severity in SEVERITY_ORDER else -1
-        if idx > highest_idx:
-            highest_idx = idx
-            highest = d.severity
-    return highest
diff --git a/hermes_cli/kanban_specify.py b/hermes_cli/kanban_specify.py
index 1ad576bf8f1..4bfcce61ee9 100644
--- a/hermes_cli/kanban_specify.py
+++ b/hermes_cli/kanban_specify.py
@@ -150,7 +150,7 @@ def specify_task(
     error, malformed response) — those surface via ``ok=False`` so the
     ``--all`` sweep can continue past individual failures.
     """
-    with kb.connect() as conn:
+    with kb.connect_closing() as conn:
         task = kb.get_task(conn, task_id)
     if task is None:
         return SpecifyOutcome(task_id, False, "unknown task id")
@@ -239,7 +239,7 @@ def specify_task(
                 task_id, False, "LLM response missing title and body"
             )
 
-    with kb.connect() as conn:
+    with kb.connect_closing() as conn:
         ok = kb.specify_triage_task(
             conn,
             task_id,
@@ -261,7 +261,7 @@ def list_triage_ids(*, tenant: Optional[str] = None) -> list[str]:
 
     ``tenant`` narrows the sweep; ``None`` returns every triage task.
     """
-    with kb.connect() as conn:
+    with kb.connect_closing() as conn:
         tasks = kb.list_tasks(
             conn,
             status="triage",
diff --git a/hermes_cli/kanban_swarm.py b/hermes_cli/kanban_swarm.py
index 2b0fa0b9e98..fe47a4c7713 100644
--- a/hermes_cli/kanban_swarm.py
+++ b/hermes_cli/kanban_swarm.py
@@ -209,7 +209,7 @@ def create_swarm(
         priority=priority,
         workspace_kind=workspace_kind,
         workspace_path=workspace_path,
-        skills=["avoid-ai-writing"],
+        skills=["humanizer"],
     )
 
     created = SwarmCreated(root, worker_ids, verifier, synthesizer)
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index 3cbf30f48fe..1f75e8bc8fe 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -2148,6 +2148,13 @@ def cmd_postinstall(args):
 def cmd_model(args):
     """Select default model — starts with provider selection, then model picker."""
     _require_tty("model")
+    if getattr(args, "refresh", False):
+        try:
+            from hermes_cli.models import clear_provider_models_cache
+            clear_provider_models_cache()
+            print("  Cleared model picker cache.")
+        except Exception:
+            pass
     select_provider_and_model(args=args)
 
 
@@ -3095,7 +3102,7 @@ def _model_flow_nous(config, current_model="", args=None):
 
     # Verify credentials are still valid (catches expired sessions early)
     try:
-        creds = resolve_nous_runtime_credentials(min_key_ttl_seconds=5 * 60)
+        creds = resolve_nous_runtime_credentials()
     except Exception as exc:
         relogin = isinstance(exc, AuthError) and exc.relogin_required
         msg = format_auth_error(exc) if isinstance(exc, AuthError) else str(exc)
@@ -3123,8 +3130,20 @@ def _model_flow_nous(config, current_model="", args=None):
     # Fetch live pricing (non-blocking — returns empty dict on failure)
     pricing = get_pricing_for_provider("nous")
 
-    # Check if user is on free tier
-    free_tier = check_nous_free_tier()
+    # Force fresh account data for model selection so recent credit purchases
+    # are reflected immediately.
+    free_tier = check_nous_free_tier(force_fresh=True)
+    if not free_tier:
+        try:
+            refreshed_creds = resolve_nous_runtime_credentials(
+                force_refresh=True,
+            )
+            if refreshed_creds:
+                creds = refreshed_creds
+        except Exception:
+            # Runtime inference has its own paid-entitlement recovery path; do
+            # not block model selection if this opportunistic refresh fails.
+            pass
 
     # Resolve portal URL early — needed both for upgrade links and for the
     # freeRecommendedModels endpoint below.
@@ -3146,7 +3165,24 @@ def _model_flow_nous(config, current_model="", args=None):
     # newly-launched paid models surface in the picker too — independent
     # of CLI release cadence.
     unavailable_models: list[str] = []
+    unavailable_message = ""
     if free_tier:
+        try:
+            from hermes_cli.nous_account import (
+                format_nous_portal_entitlement_message,
+                get_nous_portal_account_info,
+            )
+
+            _account_info = get_nous_portal_account_info(force_fresh=True)
+            unavailable_message = (
+                format_nous_portal_entitlement_message(
+                    _account_info,
+                    capability="paid Nous models",
+                )
+                or ""
+            )
+        except Exception:
+            unavailable_message = ""
         model_ids, pricing = union_with_portal_free_recommendations(
             model_ids, pricing, _nous_portal_url,
         )
@@ -3168,7 +3204,7 @@ def _model_flow_nous(config, current_model="", args=None):
             from hermes_cli.auth import DEFAULT_NOUS_PORTAL_URL
 
             _url = (_nous_portal_url or DEFAULT_NOUS_PORTAL_URL).rstrip("/")
-            print(f"Upgrade at {_url} to access paid models.")
+            print(unavailable_message or f"Upgrade at {_url} to access paid models.")
         return
 
     print(
@@ -3181,6 +3217,7 @@ def _model_flow_nous(config, current_model="", args=None):
         pricing=pricing,
         unavailable_models=unavailable_models,
         portal_url=_nous_portal_url,
+        unavailable_message=unavailable_message,
     )
     if selected:
         _save_model_choice(selected)
@@ -5585,7 +5622,6 @@ def _model_flow_bedrock(config, current_model=""):
 def _model_flow_api_key_provider(config, provider_id, current_model=""):
     """Generic flow for API-key providers (z.ai, MiniMax, OpenCode, etc.)."""
     from hermes_cli.auth import (
-        LMSTUDIO_NOAUTH_PLACEHOLDER,
         PROVIDER_REGISTRY,
         _prompt_model_selection,
         _save_model_choice,
@@ -6155,13 +6191,6 @@ def cmd_webhook(args):
     webhook_command(args)
 
 
-def cmd_portal(args):
-    """Nous Portal status and Tool Gateway routing surface."""
-    from hermes_cli.portal_cli import portal_command
-
-    return portal_command(args)
-
-
 def cmd_slack(args):
     """Slack integration helpers.
 
@@ -6476,7 +6505,7 @@ def _web_ui_build_needed(web_dir: Path) -> bool:
     """Return True if the web UI dist is missing or stale.
 
     Mirrors the staleness logic used by ``_tui_build_needed()`` for the TUI.
-    The dashboard source lives under ``apps/dashboard/``, but the Vite build
+    The dashboard source lives under ``web/``, but the Vite build
     still outputs to ``hermes_cli/web_dist/`` (per vite.config.ts
     outDir: "../hermes_cli/web_dist"), NOT to ``web/dist/``, so Python
     packaging can continue serving the same static asset directory. Uses the
@@ -6512,6 +6541,104 @@ def _web_ui_build_needed(web_dir: Path) -> bool:
     return False
 
 
+def _run_with_idle_timeout(
+    cmd: list[str],
+    cwd: Path,
+    *,
+    idle_timeout_seconds: int = 180,
+    indent: str = "    ",
+) -> subprocess.CompletedProcess:
+    """Run a subprocess that streams output, with an idle-output timeout.
+
+    Issue #33788: ``npm run build`` (Vite) was invoked with
+    ``capture_output=True`` and no timeout. On low-memory hosts (notably
+    WSL2 with the default 4 GB cap) the build can stall or sit silent for
+    minutes; users see a frozen terminal, assume the update is hung, and
+    reboot — leaving the editable install in a half-state with the
+    ``hermes`` launcher present but ``hermes_cli`` not importable.
+
+    This helper fixes both halves: stdout is streamed (so the user sees
+    progress), and if no bytes have appeared on stdout/stderr for
+    ``idle_timeout_seconds``, the process is terminated and the call
+    returns with a non-zero ``returncode``. The caller's existing
+    stale-dist fallback (#23817) takes over from there.
+
+    Returns a ``CompletedProcess`` with merged stdout (text), empty
+    stderr, and an integer returncode. Never raises on idle timeout —
+    propagation of failure is via the returncode.
+    """
+    merged_chunks: list[str] = []
+    last_output_ts = _time.monotonic()
+    lock = threading.Lock()
+
+    try:
+        proc = subprocess.Popen(
+            cmd,
+            cwd=cwd,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            text=True,
+            encoding="utf-8",
+            errors="replace",
+            bufsize=1,
+        )
+    except OSError as exc:
+        # E.g. npm not on PATH between the which() check and now.
+        return subprocess.CompletedProcess(cmd, 127, stdout="", stderr=str(exc))
+
+    def _reader() -> None:
+        nonlocal last_output_ts
+        assert proc.stdout is not None
+        for line in proc.stdout:
+            try:
+                print(f"{indent}{line.rstrip()}", flush=True)
+            except UnicodeEncodeError:
+                # Windows cp1252 fallback — same pattern as _say().
+                enc = getattr(sys.stdout, "encoding", None) or "ascii"
+                safe = line.rstrip().encode(enc, errors="replace").decode(enc, errors="replace")
+                print(f"{indent}{safe}", flush=True)
+            with lock:
+                merged_chunks.append(line)
+                last_output_ts = _time.monotonic()
+
+    reader_thread = threading.Thread(target=_reader, daemon=True)
+    reader_thread.start()
+
+    idle_killed = False
+    while True:
+        try:
+            rc = proc.wait(timeout=5)
+            break
+        except subprocess.TimeoutExpired:
+            with lock:
+                idle = _time.monotonic() - last_output_ts
+            if idle > idle_timeout_seconds:
+                idle_killed = True
+                proc.terminate()
+                try:
+                    rc = proc.wait(timeout=3)
+                except subprocess.TimeoutExpired:
+                    proc.kill()
+                    rc = proc.wait()
+                break
+
+    # Drain reader so we don't leak the stdout file descriptor.
+    reader_thread.join(timeout=2)
+
+    combined = "".join(merged_chunks)
+    if idle_killed:
+        msg = (
+            f"\n  ⚠ Build produced no output for {idle_timeout_seconds}s — terminated.\n"
+            "    Common causes: out-of-memory on a low-RAM host (WSL/container),\n"
+            "    a stuck Node process, or an antivirus scan stalling I/O.\n"
+        )
+        combined += msg
+        # Force a non-zero rc even if terminate() raced with a clean exit.
+        if rc == 0:
+            rc = 124  # GNU `timeout` convention
+    return subprocess.CompletedProcess(cmd, rc, stdout=combined, stderr="")
+
+
 def _run_npm_install_deterministic(
     npm: str,
     cwd: Path,
@@ -6588,7 +6715,7 @@ def _build_web_ui(web_dir: Path, *, fatal: bool = False) -> bool:
     if not npm:
         if fatal:
             _say("Web UI frontend not built and npm is not available.")
-            _say("Install Node.js, then run:  cd apps/dashboard && npm install && npm run build")
+            _say("Install Node.js, then run:  cd web && npm install && npm run build")
         return not fatal
     _say("→ Building web UI...")
 
@@ -6615,33 +6742,28 @@ def _build_web_ui(web_dir: Path, *, fatal: bool = False) -> bool:
         )
         _relay(r1)
         if fatal:
-            _say("  Run manually:  cd apps/dashboard && npm install && npm run build")
+            _say("  Run manually:  cd web && npm install && npm run build")
         return False
-    # First attempt
-    r2 = subprocess.run(
-        [npm, "run", "build"],
-        cwd=web_dir,
-        capture_output=True,
-        text=True,
-        encoding="utf-8",
-        errors="replace",
-    )
+    # First attempt — stream output via idle-timeout helper (issue #33788).
+    # capture_output=True on a long Vite build looks identical to a hang;
+    # users react by rebooting, which leaves the editable install in a
+    # half-state. Streaming + idle-kill makes failures observable AND
+    # recoverable (the stale-dist fallback below handles the kill path).
+    r2 = _run_with_idle_timeout([npm, "run", "build"], cwd=web_dir)
     if r2.returncode != 0:
         # Retry once after a short delay — covers boot-time races on Windows
         # (antivirus scanning Node.js binaries, npm cache not ready, transient
         # I/O when launched via Scheduled Task at logon). See issue #23817.
         _time.sleep(3)
-        r2 = subprocess.run(
-            [npm, "run", "build"],
-            cwd=web_dir,
-            capture_output=True,
-            text=True,
-            encoding="utf-8",
-            errors="replace",
-        )
+        r2 = _run_with_idle_timeout([npm, "run", "build"], cwd=web_dir)
 
     if r2.returncode != 0:
-        stderr_preview = (r2.stderr or "").strip()
+        # _run_with_idle_timeout merges stderr into stdout; older callers
+        # using subprocess.run kept them split. Pull from whichever has
+        # content so the error surfaces regardless of which path produced
+        # the CompletedProcess.
+        build_output = (r2.stderr or "") + (r2.stdout or "")
+        stderr_preview = build_output.strip()
         stderr_tail = "\n  ".join(stderr_preview.splitlines()[-10:]) if stderr_preview else ""
         project_root = web_dir.parent.parent if web_dir.parent.name == "apps" else web_dir.parent
         dist_dir = project_root / "hermes_cli" / "web_dist"
@@ -6662,7 +6784,7 @@ def _build_web_ui(web_dir: Path, *, fatal: bool = False) -> bool:
         )
         _relay(r2)
         if fatal:
-            _say("  Run manually:  cd apps/dashboard && npm install && npm run build")
+            _say("  Run manually:  cd web && npm install && npm run build")
         return False
     _say("  ✓ Web UI built")
     return True
@@ -7270,7 +7392,7 @@ def _update_via_zip(args):
         _install_python_dependencies_with_optional_fallback(pip_cmd)
 
     _update_node_dependencies()
-    _build_web_ui(PROJECT_ROOT / "apps" / "dashboard")
+    _build_web_ui(PROJECT_ROOT / "web")
 
     # Sync skills
     try:
@@ -8298,37 +8420,18 @@ def _install_psutil_android_compat(
     nothing is persisted in the repository.
 
     Stopgap: remove this once https://github.com/giampaolo/psutil/pull/2762
-    merges and ships in a release. ``scripts/install_psutil_android.py``
-    contains the same logic for ``scripts/install.sh`` (fresh installs).
-    Both copies should be removed together.
+    merges and ships in a release. The standalone installer script uses the
+    same shared helper and should be removed together.
     """
-    import tarfile
     import tempfile
     import urllib.request
-
-    psutil_url = (
-        "https://files.pythonhosted.org/packages/aa/c6/"
-        "d1ddf4abb55e93cebc4f2ed8b5d6dbad109ecb8d63748dd2b20ab5e57ebe/"
-        "psutil-7.2.2.tar.gz"
-    )
+    from hermes_cli.psutil_android import PSUTIL_URL, prepare_patched_psutil_sdist
 
     with tempfile.TemporaryDirectory() as tmp:
         tmp_path = Path(tmp)
         archive = tmp_path / "psutil.tar.gz"
-        urllib.request.urlretrieve(psutil_url, archive)
-        with tarfile.open(archive) as tar:
-            tar.extractall(tmp_path)
-
-        src_root = next(
-            p for p in tmp_path.iterdir() if p.is_dir() and p.name.startswith("psutil-")
-        )
-        common_py = src_root / "psutil" / "_common.py"
-        content = common_py.read_text(encoding="utf-8")
-        marker = 'LINUX = sys.platform.startswith("linux")'
-        replacement = 'LINUX = sys.platform.startswith(("linux", "android"))'
-        if marker not in content:
-            raise RuntimeError("psutil Android compatibility patch marker not found")
-        common_py.write_text(content.replace(marker, replacement), encoding="utf-8")
+        urllib.request.urlretrieve(PSUTIL_URL, archive)
+        src_root = prepare_patched_psutil_sdist(archive, tmp_path)
 
         _run_install_with_heartbeat(
             install_cmd_prefix + ["install", "--no-build-isolation", str(src_root)],
@@ -8598,6 +8701,14 @@ def _cmd_update_check(branch: str = "main", *, branch_explicit: bool = False):
     """
     from hermes_cli.config import detect_install_method
     method = detect_install_method(PROJECT_ROOT)
+    if method == "docker":
+        # Docker can't ``git fetch`` from within the container.  Surface the
+        # same long-form ``docker pull`` guidance ``hermes update`` (apply
+        # path) uses — telling the user to "reinstall via curl" or that
+        # ".git is missing" would point them at the wrong remediation.
+        from hermes_cli.config import format_docker_update_message
+        print(format_docker_update_message())
+        sys.exit(1)
     if method == "pip":
         from hermes_cli.config import recommended_update_command
         from hermes_cli.banner import check_via_pypi
@@ -8898,12 +9009,27 @@ def cmd_update(args):
     runs the update, then restores stdio on the way out (even on
     ``sys.exit`` or unhandled exceptions).
     """
-    from hermes_cli.config import is_managed, managed_error
+    from hermes_cli.config import (
+        detect_install_method,
+        format_docker_update_message,
+        is_managed,
+        managed_error,
+    )
 
     if is_managed():
         managed_error("update Hermes Agent")
         return
 
+    # Docker users can't ``git pull`` — the image excludes ``.git`` from
+    # the build context.  Bail with a friendly explanation pointing at
+    # ``docker pull`` BEFORE any of the apply-path / check-path branches
+    # below get a chance to error out with misleading "Not a git
+    # repository" text.  See format_docker_update_message() for the full
+    # rationale and tag-pinning / config-persistence notes.
+    if detect_install_method(PROJECT_ROOT) == "docker":
+        print(format_docker_update_message())
+        sys.exit(1)
+
     if getattr(args, "check", False):
         # --check honors --branch so the "any new commits?" answer matches
         # what a subsequent `hermes update --branch=<x>` would actually pull.
@@ -9176,12 +9302,13 @@ def _cmd_update_impl(args, gateway_mode: bool):
         # though `git pull` can't touch $HERMES_HOME, this is cheap
         # belt-and-suspenders insurance and gives the user something to
         # restore from via `/snapshot list` / `/snapshot restore <id>`.
+        pre_update_snapshot_id = None
         try:
             from hermes_cli.backup import create_quick_snapshot
 
-            snap_id = create_quick_snapshot(label="pre-update")
-            if snap_id:
-                print(f"  ✓ Pre-update snapshot: {snap_id}")
+            pre_update_snapshot_id = create_quick_snapshot(label="pre-update", keep=1)
+            if pre_update_snapshot_id:
+                print(f"  ✓ Pre-update snapshot: {pre_update_snapshot_id}")
         except Exception as exc:
             # Never let a snapshot failure block an update.
             logger.debug("Pre-update snapshot failed: %s", exc)
@@ -9349,7 +9476,7 @@ def _cmd_update_impl(args, gateway_mode: bool):
         _refresh_active_lazy_features()
 
         _update_node_dependencies()
-        _build_web_ui(PROJECT_ROOT / "apps" / "dashboard")
+        _build_web_ui(PROJECT_ROOT / "web")
 
         print()
         print("✓ Code updated!")
@@ -9514,6 +9641,25 @@ def _cmd_update_impl(args, gateway_mode: bool):
         else:
             print("  ✓ Configuration is up to date")
 
+        # Safety net: config-version migrations have been observed to leave
+        # cron/jobs.json valid-but-empty, silently dropping every scheduled
+        # job (issue #34600). If the live file is now empty while the
+        # pre-update snapshot held jobs, restore it and warn loudly.
+        try:
+            from hermes_cli.backup import restore_cron_jobs_if_emptied
+
+            cron_restore = restore_cron_jobs_if_emptied(pre_update_snapshot_id)
+            if cron_restore:
+                print()
+                print(
+                    "  ⚠️  cron/jobs.json was emptied during this update — "
+                    f"restored {cron_restore['job_count']} job(s) from "
+                    f"pre-update snapshot {cron_restore['snapshot_id']}."
+                )
+        except Exception as exc:
+            # Never let the cron safety net break an otherwise-good update.
+            logger.debug("Cron jobs auto-restore check failed: %s", exc)
+
         print()
         print("✓ Update complete!")
 
@@ -10611,11 +10757,10 @@ def cmd_profile(args):
             if collision:
                 print(f"Error: {collision}")
                 sys.exit(1)
-            wrapper_path = create_wrapper_script(alias_name)
+            wrapper_path = create_wrapper_script(
+                alias_name, target=name if custom_name else None
+            )
             if wrapper_path:
-                # If custom name, write the profile name into the wrapper
-                if custom_name:
-                    wrapper_path.write_text(f'#!/bin/sh\nexec hermes -p {name} "$@"\n')
                 print(f"✓ Alias created: {wrapper_path}")
                 if not _is_wrapper_dir_in_path():
                     print(f"⚠ {_get_wrapper_dir()} is not in your PATH.")
@@ -10953,7 +11098,7 @@ def cmd_dashboard(args):
     _sync_bundled_skills_quietly()
 
     if "HERMES_WEB_DIST" not in os.environ and not getattr(args, "skip_build", False):
-        if not _build_web_ui(PROJECT_ROOT / "apps" / "dashboard", fatal=True):
+        if not _build_web_ui(PROJECT_ROOT / "web", fatal=True):
             sys.exit(1)
     elif getattr(args, "skip_build", False):
         # --skip-build trusts the caller to have pre-built the web UI.
@@ -10966,7 +11111,7 @@ def cmd_dashboard(args):
         )
         if not (_dist_root / "index.html").exists():
             print(f"✗ --skip-build was passed but no web dist found at: {_dist_root}")
-            print("  Pre-build first:  cd apps/dashboard && npm install && npm run build")
+            print("  Pre-build first:  cd web && npm install && npm run build")
             print("  Or drop --skip-build to build automatically.")
             sys.exit(1)
         print(f"→ Skipping web UI build (--skip-build); using dist at {_dist_root}")
@@ -11031,24 +11176,6 @@ def cmd_logs(args):
         since=getattr(args, "since", None),
         component=getattr(args, "component", None),
     )
-
-
-def _build_provider_choices() -> list[str]:
-    """Build the --provider choices list from CANONICAL_PROVIDERS + 'auto'."""
-    try:
-        from hermes_cli.models import CANONICAL_PROVIDERS as _cp
-        return ["auto"] + [p.slug for p in _cp]
-    except Exception:
-        # Fallback: static list guarantees the CLI always works
-        return [
-            "auto", "openrouter", "nous", "openai-codex", "xai-oauth", "copilot-acp", "copilot",
-            "anthropic", "gemini", "google-gemini-cli", "xai", "bedrock", "azure-foundry",
-            "ollama-cloud", "huggingface", "zai", "kimi-coding", "kimi-coding-cn",
-            "stepfun", "minimax", "minimax-cn", "kilocode", "novita", "xiaomi", "arcee",
-            "nvidia", "deepseek", "alibaba", "qwen-oauth", "opencode-zen", "opencode-go",
-        ]
-
-
 # Top-level subcommands that argparse knows about WITHOUT running plugin
 # discovery.  Used to short-circuit eager plugin imports (which can take
 # 500ms+ pulling in google.cloud.pubsub_v1, aiohttp, grpc, etc.) when the
@@ -11371,6 +11498,11 @@ def main():
         help="Select default model and provider",
         description="Interactively select your inference provider and default model",
     )
+    model_parser.add_argument(
+        "--refresh",
+        action="store_true",
+        help="Wipe the model picker disk cache and re-fetch every provider's live /v1/models list.",
+    )
     model_parser.add_argument(
         "--portal-url",
         help="Portal base URL for Nous login (default: production portal)",
@@ -12716,6 +12848,11 @@ Examples:
         ],
     )
     skills_search.add_argument("--limit", type=int, default=10, help="Max results")
+    skills_search.add_argument(
+        "--json",
+        action="store_true",
+        help="Output JSON instead of a table (full identifiers, scripting-friendly)",
+    )
 
     skills_install = skills_subparsers.add_parser("install", help="Install a skill")
     skills_install.add_argument(
@@ -12954,7 +13091,34 @@ Examples:
     )
     plugins_remove.add_argument("name", help="Plugin directory name to remove")
 
-    plugins_subparsers.add_parser("list", aliases=["ls"], help="List installed plugins")
+    plugins_list = plugins_subparsers.add_parser(
+        "list", aliases=["ls"], help="List installed plugins"
+    )
+    plugins_list.add_argument(
+        "--enabled",
+        action="store_true",
+        help="Show only enabled plugins",
+    )
+    plugins_list.add_argument(
+        "--user",
+        action="store_true",
+        help="Show only user-installed plugins (including git plugins)",
+    )
+    plugins_list.add_argument(
+        "--no-bundled",
+        action="store_true",
+        help="Hide bundled plugins",
+    )
+    plugins_list.add_argument(
+        "--plain",
+        action="store_true",
+        help="Print compact plain-text output instead of a Rich table",
+    )
+    plugins_list.add_argument(
+        "--json",
+        action="store_true",
+        help="Print machine-readable JSON",
+    )
 
     plugins_enable = plugins_subparsers.add_parser(
         "enable", help="Enable a disabled plugin"
@@ -13434,6 +13598,11 @@ Examples:
         "--yes", "-y", action="store_true", help="Skip confirmation"
     )
 
+    sessions_subparsers.add_parser(
+        "optimize",
+        help="Reclaim disk space: merge FTS5 segments + VACUUM (no data change)",
+    )
+
     sessions_subparsers.add_parser("stats", help="Show session store statistics")
 
     sessions_rename = sessions_subparsers.add_parser(
@@ -13606,6 +13775,34 @@ Examples:
             relaunch(["--resume", selected_id])
             return  # won't reach here after execvp
 
+        elif action == "optimize":
+            db_path = db.db_path
+            before_mb = (
+                os.path.getsize(db_path) / (1024 * 1024)
+                if db_path.exists()
+                else 0.0
+            )
+            print("Optimizing session store (FTS merge + VACUUM)…")
+            try:
+                # vacuum() merges FTS5 segments (optimize_fts) then VACUUMs,
+                # and returns the number of indexes it merged.
+                n = db.vacuum()
+            except Exception as e:
+                print(f"Error: optimization failed: {e}")
+                db.close()
+                return
+            after_mb = (
+                os.path.getsize(db_path) / (1024 * 1024)
+                if db_path.exists()
+                else 0.0
+            )
+            saved = before_mb - after_mb
+            print(f"Optimized {n} FTS index(es).")
+            print(
+                f"Database size: {before_mb:.1f} MB -> {after_mb:.1f} MB "
+                f"(reclaimed {saved:.1f} MB)"
+            )
+
         elif action == "stats":
             total = db.session_count()
             msgs = db.message_count()
diff --git a/hermes_cli/mcp_catalog.py b/hermes_cli/mcp_catalog.py
index 18214767590..ba1ab297ed2 100644
--- a/hermes_cli/mcp_catalog.py
+++ b/hermes_cli/mcp_catalog.py
@@ -23,7 +23,6 @@ See references/mcp-catalog.md (this repo's skill) for the manifest schema.
 
 from __future__ import annotations
 
-import os
 import re
 import shutil
 import subprocess
@@ -41,7 +40,7 @@ from hermes_cli.config import (
     get_env_value,
     save_env_value,
 )
-from hermes_cli.cli_output import prompt as _prompt_input, prompt_yes_no
+from hermes_cli.cli_output import prompt as _prompt_input
 
 _MANIFEST_VERSION = 1
 
diff --git a/hermes_cli/mcp_config.py b/hermes_cli/mcp_config.py
index 0a1ca336193..378de219a9a 100644
--- a/hermes_cli/mcp_config.py
+++ b/hermes_cli/mcp_config.py
@@ -205,6 +205,22 @@ def _probe_single_server(
     return tools_found
 
 
+def _oauth_tokens_present(name: str) -> bool:
+    """Return True if an OAuth token file exists on disk for ``name``.
+
+    Used after ``hermes mcp login`` to distinguish a genuine authentication
+    from a probe that succeeded only because the server allowed
+    initialize/tools-list without auth (so no token was ever acquired).
+    """
+    try:
+        from tools.mcp_oauth import HermesTokenStorage
+        return HermesTokenStorage(name).has_cached_tokens()
+    except Exception as exc:  # pragma: no cover — defensive
+        logger.debug("Could not check OAuth tokens for '%s': %s", name, exc)
+        # Be permissive on unexpected errors: don't block a real success.
+        return True
+
+
 def _unwrap_exception_group(exc: BaseException) -> Exception:
     """Extract the root-cause exception from anyio TaskGroup wrappers.
 
@@ -631,6 +647,36 @@ def cmd_mcp_login(args):
     # Probe triggers the OAuth flow (browser redirect + callback capture).
     try:
         tools = _probe_single_server(name, server_config)
+        # A clean probe is NOT proof of authentication. Some MCP servers
+        # (notably Google's official Drive server) serve initialize +
+        # tools/list WITHOUT auth, so the probe lists tools even when the
+        # OAuth flow never completed — e.g. dynamic client registration
+        # 400'd because the provider doesn't support RFC 7591. Reporting
+        # "Authenticated — N tools" in that case is a false success: every
+        # real tool call later hangs until timeout because there's no token.
+        # Verify a token actually landed on disk before claiming success.
+        if not _oauth_tokens_present(name):
+            _warning(
+                "Server responded, but no OAuth token was obtained — "
+                "authentication did not complete."
+            )
+            print()
+            _info(
+                "Some providers (e.g. Google Drive, Atlassian) do not support "
+                "automatic client registration. For those you must create an "
+                "OAuth client yourself and add its credentials to config.yaml:"
+            )
+            print()
+            print(color(f"    mcp_servers:", Colors.DIM))
+            print(color(f"      {name}:", Colors.DIM))
+            print(color(f"        url: {url}", Colors.DIM))
+            print(color(f"        auth: oauth", Colors.DIM))
+            print(color(f"        oauth:", Colors.DIM))
+            print(color(f"          client_id: \"<your-oauth-client-id>\"", Colors.DIM))
+            print(color(f"          client_secret: \"<your-oauth-client-secret>\"", Colors.DIM))
+            print()
+            _info("Then re-run `hermes mcp login " + name + "`.")
+            return
         if tools:
             _success(f"Authenticated — {len(tools)} tool(s) available")
         else:
diff --git a/hermes_cli/model_catalog.py b/hermes_cli/model_catalog.py
index a1f4b761566..703d958402c 100644
--- a/hermes_cli/model_catalog.py
+++ b/hermes_cli/model_catalog.py
@@ -64,6 +64,15 @@ logger = logging.getLogger(__name__)
 DEFAULT_CATALOG_URL = (
     "https://hermes-agent.nousresearch.com/docs/api/model-catalog.json"
 )
+# Fallback fetch chain. The Docusaurus site is served through Vercel, which
+# occasionally returns HTTP 403 + x-vercel-mitigated: challenge for non-
+# browser clients (urllib, curl). When that happens the disk cache goes
+# stale and new model releases never reach the picker. The raw GitHub URL
+# is the same manifest published from the same repo and is not bot-gated,
+# so we fall through to it whenever the primary URL fails.
+DEFAULT_CATALOG_FALLBACK_URLS: tuple[str, ...] = (
+    "https://raw.githubusercontent.com/NousResearch/hermes-agent/main/website/static/api/model-catalog.json",
+)
 DEFAULT_TTL_HOURS = 24
 DEFAULT_FETCH_TIMEOUT = 8.0
 SUPPORTED_SCHEMA_VERSION = 1
@@ -139,6 +148,31 @@ def _fetch_manifest(url: str, timeout: float) -> dict[str, Any] | None:
     return data
 
 
+def _fetch_manifest_with_fallback(
+    primary_url: str,
+    timeout: float,
+    fallback_urls: tuple[str, ...] = DEFAULT_CATALOG_FALLBACK_URLS,
+) -> dict[str, Any] | None:
+    """Try ``primary_url`` first, then walk ``fallback_urls``.
+
+    Returns the first manifest that fetches and validates, or None when
+    every URL fails. Skips fallback URLs identical to the primary so an
+    operator who configured the catalog URL to point at the raw GitHub
+    copy doesn't double-fetch.
+    """
+    data = _fetch_manifest(primary_url, timeout)
+    if data is not None:
+        return data
+    for url in fallback_urls:
+        if not url or url == primary_url:
+            continue
+        data = _fetch_manifest(url, timeout)
+        if data is not None:
+            logger.info("model catalog primary URL failed; using fallback %s", url)
+            return data
+    return None
+
+
 def _validate_manifest(data: Any) -> bool:
     """Return True when ``data`` matches the minimum manifest shape."""
     if not isinstance(data, dict):
@@ -235,7 +269,7 @@ def get_catalog(*, force_refresh: bool = False) -> dict[str, Any]:
         return disk_data
 
     # Need to (re)fetch. If it fails, fall back to any stale disk copy.
-    fetched = _fetch_manifest(cfg["url"], DEFAULT_FETCH_TIMEOUT)
+    fetched = _fetch_manifest_with_fallback(cfg["url"], DEFAULT_FETCH_TIMEOUT)
     if fetched is not None:
         _write_disk_cache(fetched)
         new_disk_data, new_mtime = _read_disk_cache()
diff --git a/hermes_cli/model_switch.py b/hermes_cli/model_switch.py
index 0e01903eba9..5fb78612773 100644
--- a/hermes_cli/model_switch.py
+++ b/hermes_cli/model_switch.py
@@ -277,49 +277,43 @@ class ModelSwitchResult:
     capabilities: Optional[ModelCapabilities] = None
     model_info: Optional[ModelInfo] = None
     is_global: bool = False
-
-
-@dataclass
-class CustomAutoResult:
-    """Result of switching to bare 'custom' provider with auto-detect."""
-
-    success: bool
-    model: str = ""
-    base_url: str = ""
-    api_key: str = ""
-    error_message: str = ""
-
-
 # ---------------------------------------------------------------------------
 # Flag parsing
 # ---------------------------------------------------------------------------
 
-def parse_model_flags(raw_args: str) -> tuple[str, str, bool]:
-    """Parse --provider and --global flags from /model command args.
+def parse_model_flags(raw_args: str) -> tuple[str, str, bool, bool]:
+    """Parse --provider, --global, and --refresh flags from /model command args.
 
-    Returns (model_input, explicit_provider, is_global).
+    Returns (model_input, explicit_provider, is_global, force_refresh).
 
     Examples::
 
-        "sonnet"                         -> ("sonnet", "", False)
-        "sonnet --global"                -> ("sonnet", "", True)
-        "sonnet --provider anthropic"    -> ("sonnet", "anthropic", False)
-        "--provider my-ollama"           -> ("", "my-ollama", False)
-        "sonnet --provider anthropic --global" -> ("sonnet", "anthropic", True)
+        "sonnet"                         -> ("sonnet", "", False, False)
+        "sonnet --global"                -> ("sonnet", "", True, False)
+        "sonnet --provider anthropic"    -> ("sonnet", "anthropic", False, False)
+        "--provider my-ollama"           -> ("", "my-ollama", False, False)
+        "--refresh"                      -> ("", "", False, True)
+        "sonnet --provider anthropic --global" -> ("sonnet", "anthropic", True, False)
     """
     is_global = False
     explicit_provider = ""
+    force_refresh = False
 
     # Normalize Unicode dashes (Telegram/iOS auto-converts -- to em/en dash)
     # A single Unicode dash before a flag keyword becomes "--"
     import re as _re
-    raw_args = _re.sub(r'[\u2012\u2013\u2014\u2015](provider|global)', r'--\1', raw_args)
+    raw_args = _re.sub(r'[\u2012\u2013\u2014\u2015](provider|global|refresh)', r'--\1', raw_args)
 
     # Extract --global
     if "--global" in raw_args:
         is_global = True
         raw_args = raw_args.replace("--global", "").strip()
 
+    # Extract --refresh (bust the model picker disk cache before listing)
+    if "--refresh" in raw_args:
+        force_refresh = True
+        raw_args = raw_args.replace("--refresh", "").strip()
+
     # Extract --provider <name>
     parts = raw_args.split()
     i = 0
@@ -333,7 +327,7 @@ def parse_model_flags(raw_args: str) -> tuple[str, str, bool]:
             i += 1
 
     model_input = " ".join(filtered).strip()
-    return (model_input, explicit_provider, is_global)
+    return (model_input, explicit_provider, is_global, force_refresh)
 
 
 # ---------------------------------------------------------------------------
@@ -1078,7 +1072,7 @@ def list_authenticated_providers(
     from hermes_cli.auth import PROVIDER_REGISTRY
     from hermes_cli.models import (
         OPENROUTER_MODELS, _PROVIDER_MODELS,
-        _MODELS_DEV_PREFERRED, _merge_with_models_dev, provider_model_ids,
+        _MODELS_DEV_PREFERRED, _merge_with_models_dev, cached_provider_model_ids,
         get_curated_nous_model_ids,
     )
 
@@ -1239,13 +1233,15 @@ def list_authenticated_providers(
         if not has_creds:
             continue
 
-        # Use curated list, falling back to models.dev if no curated list.
-        # For preferred providers, merge models.dev entries into the curated
-        # catalog so newly released models (e.g. mimo-v2.5-pro on opencode-go)
-        # show up in the picker without requiring a Hermes release.
-        model_ids = curated.get(hermes_id, [])
-        if hermes_id in _MODELS_DEV_PREFERRED:
-            model_ids = _merge_with_models_dev(hermes_id, model_ids)
+        # Unified pathway: route through cached_provider_model_ids() so the
+        # /model picker sees the SAME list `hermes model` would build, with
+        # disk caching to keep the picker open snappy. Falls back to the
+        # curated static list when the live fetcher returns nothing.
+        model_ids = cached_provider_model_ids(hermes_id)
+        if not model_ids:
+            model_ids = curated.get(hermes_id, [])
+            if hermes_id in _MODELS_DEV_PREFERRED:
+                model_ids = _merge_with_models_dev(hermes_id, model_ids)
         total = len(model_ids)
         top = model_ids[:max_models]
 
@@ -1351,25 +1347,27 @@ def list_authenticated_providers(
             # matches what the user's authenticated Codex/Copilot backend
             # actually serves — including ChatGPT-Pro-only Codex slugs
             # (e.g. gpt-5.3-codex-spark) that aren't in the static curated
-            # catalog. ``provider_model_ids()`` falls back to the curated
-            # list when the live endpoint is unreachable, so this is safe
-            # for unauthenticated and offline cases too.
-            model_ids = provider_model_ids(hermes_slug)
+            # catalog. ``cached_provider_model_ids()`` falls back to the
+            # curated list when the live endpoint is unreachable, so this
+            # is safe for unauthenticated and offline cases too.
+            model_ids = cached_provider_model_ids(hermes_slug)
         # For aws_sdk providers (bedrock), use live discovery so the list
         # reflects the active region (eu.*, ap.*) not the static us.* list.
         elif overlay.auth_type == "aws_sdk":
             try:
-                from agent.bedrock_adapter import bedrock_model_ids_or_none
-                _ids = bedrock_model_ids_or_none()
-                model_ids = _ids if _ids is not None else (curated.get(hermes_slug, []) or curated.get(pid, []))
+                _ids = cached_provider_model_ids(hermes_slug)
+                model_ids = _ids if _ids else (curated.get(hermes_slug, []) or curated.get(pid, []))
             except Exception:
                 model_ids = curated.get(hermes_slug, []) or curated.get(pid, [])
         else:
-            # Use curated list — look up by Hermes slug, fall back to overlay key
-            model_ids = curated.get(hermes_slug, []) or curated.get(pid, [])
-            # Merge with models.dev for preferred providers (same rationale as above).
-            if hermes_slug in _MODELS_DEV_PREFERRED:
-                model_ids = _merge_with_models_dev(hermes_slug, model_ids)
+            # Unified pathway — see Section 1 rationale. Fall back to the
+            # curated dict (with models.dev merge for preferred providers)
+            # when the live fetcher comes up empty.
+            model_ids = cached_provider_model_ids(hermes_slug)
+            if not model_ids:
+                model_ids = curated.get(hermes_slug, []) or curated.get(pid, [])
+                if hermes_slug in _MODELS_DEV_PREFERRED:
+                    model_ids = _merge_with_models_dev(hermes_slug, model_ids)
         total = len(model_ids)
         top = model_ids[:max_models]
 
@@ -1436,13 +1434,15 @@ def list_authenticated_providers(
         # region (eu.*, us.*, ap.*) instead of the hardcoded us.* static list.
         if _cp_config and getattr(_cp_config, "auth_type", "") == "aws_sdk":
             try:
-                from agent.bedrock_adapter import bedrock_model_ids_or_none
-                _ids = bedrock_model_ids_or_none()
-                _cp_model_ids = _ids if _ids is not None else curated.get(_cp.slug, [])
+                _ids = cached_provider_model_ids(_cp.slug)
+                _cp_model_ids = _ids if _ids else curated.get(_cp.slug, [])
             except Exception:
                 _cp_model_ids = curated.get(_cp.slug, [])
         else:
-            _cp_model_ids = curated.get(_cp.slug, [])
+            # Unified pathway — same as sections 1 and 2.
+            _cp_model_ids = cached_provider_model_ids(_cp.slug)
+            if not _cp_model_ids:
+                _cp_model_ids = curated.get(_cp.slug, [])
         _cp_total = len(_cp_model_ids)
         _cp_top = _cp_model_ids[:max_models]
 
@@ -1556,24 +1556,21 @@ def list_authenticated_providers(
 
     # --- 4. Saved custom providers from config ---
     # Each ``custom_providers`` entry represents one model under a named
-    # provider. Entries sharing the same endpoint (``base_url`` + ``api_key``)
-    # are grouped into a single picker row, so e.g. four Ollama entries
-    # pointing at ``http://localhost:11434/v1`` with per-model display names
-    # ("Ollama — GLM 5.1", "Ollama — Qwen3-coder", ...) appear as one
+    # provider. Entries sharing the same endpoint, credential identity, and
+    # wire protocol are grouped into a single picker row, so e.g. four Ollama
+    # entries pointing at ``http://localhost:11434/v1`` with per-model display
+    # names ("Ollama — GLM 5.1", "Ollama — Qwen3-coder", ...) appear as one
     # "Ollama" row with four models inside instead of four near-duplicates
-    # that differ only by suffix. Entries with distinct endpoints still
-    # produce separate rows.
-    #
-    # When the grouped endpoint matches ``current_base_url`` the group's
-    # slug becomes ``current_provider`` so that selecting a model from the
-    # picker flows back through the runtime provider that already holds
-    # valid credentials — no re-resolution needed.
+    # that differ only by suffix. Same-host entries with different ``key_env``
+    # or ``api_mode`` remain distinct providers.
     if custom_providers and isinstance(custom_providers, list):
         from collections import OrderedDict
 
-        # Key by (base_url, api_key) instead of slug: names frequently
-        # differ per model ("Ollama — X") while the endpoint stays the
-        # same. Slug-based grouping left them as separate rows.
+        # Key by endpoint + credential identity + wire protocol instead of
+        # slug: names frequently differ per model ("Ollama — X") while the
+        # endpoint stays the same.  Keep same-host providers with distinct
+        # env-backed credentials or API protocols separate so picker selection
+        # cannot route through the wrong credential/mode pair.
         groups: "OrderedDict[tuple, dict]" = OrderedDict()
         for entry in custom_providers:
             if not isinstance(entry, dict):
@@ -1588,9 +1585,23 @@ def list_authenticated_providers(
             ).strip().rstrip("/")
             if not raw_name or not api_url:
                 continue
-            api_key = (entry.get("api_key") or "").strip()
+            inline_api_key = (entry.get("api_key") or "").strip()
+            key_env = (entry.get("key_env") or "").strip()
+            api_key = inline_api_key or (
+                os.environ.get(key_env, "").strip() if key_env else ""
+            )
+            api_mode = str(
+                entry.get("api_mode")
+                or entry.get("transport")
+                or ""
+            ).strip().lower()
+            credential_identity = (
+                inline_api_key
+                if inline_api_key
+                else (f"env:{key_env}" if key_env else "")
+            )
 
-            group_key = (api_url, api_key)
+            group_key = (api_url, credential_identity, api_mode)
             if group_key not in groups:
                 # Strip per-model suffix so "Ollama — GLM 5.1" becomes
                 # "Ollama" for the grouped row. Em dash is the convention
@@ -1603,29 +1614,16 @@ def list_authenticated_providers(
                         break
                 if not display_name:
                     display_name = raw_name
-                # If this endpoint matches the currently active one, use
-                # ``current_provider`` as the slug so picker-driven switches
-                # route through the live credential pipeline.
-                if (
-                    current_base_url
-                    and api_url == current_base_url.strip().rstrip("/")
-                ):
-                    # Guard against bare "custom" slug left by a prior
-                    # failed switch — always resolve to the canonical
-                    # custom:<name> form.  (GH #17478)
-                    slug = (
-                        current_provider
-                        if current_provider and current_provider != "custom"
-                        else custom_provider_slug(display_name)
-                    )
-                else:
-                    slug = custom_provider_slug(display_name)
+                slug = custom_provider_slug(display_name)
                 groups[group_key] = {
                     "slug": slug,
                     "name": display_name,
                     "api_url": api_url,
+                    "api_key": api_key,
                     "models": [],
                 }
+            elif api_key and not groups[group_key].get("api_key"):
+                groups[group_key]["api_key"] = api_key
 
             # The singular ``model:`` field only holds the currently
             # active model. Hermes's own writer (main.py::_save_custom_provider)
@@ -1647,8 +1645,16 @@ def list_authenticated_providers(
                         groups[group_key]["models"].append(m)
 
         _section4_emitted_slugs: set = set()
-        for grp_key, grp in groups.items():
-            api_url, api_key = grp_key
+        _current_base_url_norm = str(current_base_url or "").strip().rstrip("/").lower()
+        _current_base_url_group_count = sum(
+            1
+            for _grp in groups.values()
+            if _current_base_url_norm
+            and str(_grp["api_url"]).strip().rstrip("/").lower() == _current_base_url_norm
+        )
+        for grp in groups.values():
+            api_url = grp["api_url"]
+            api_key = grp.get("api_key", "")
             slug = grp["slug"]
             # If the slug is already claimed by a built-in / overlay /
             # user-provider row (sections 1-3), skip this custom group
@@ -1721,8 +1727,10 @@ def list_authenticated_providers(
                 "slug": slug,
                 "name": grp["name"],
                 "is_current": slug == current_provider or (
-                    bool(current_base_url)
-                    and _grp_url_norm == current_base_url.strip().rstrip("/").lower()
+                    current_provider == "custom"
+                    and bool(_current_base_url_norm)
+                    and _grp_url_norm == _current_base_url_norm
+                    and _current_base_url_group_count == 1
                 ),
                 "is_user_defined": True,
                 "models": grp["models"],
diff --git a/hermes_cli/models.py b/hermes_cli/models.py
index 097b6a7eb93..42eadfd7629 100644
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -32,6 +32,8 @@ COPILOT_REASONING_EFFORTS_O_SERIES = ["low", "medium", "high"]
 # Fallback OpenRouter snapshot used when the live catalog is unavailable.
 # (model_id, display description shown in menus)
 OPENROUTER_MODELS: list[tuple[str, str]] = [
+    ("anthropic/claude-opus-4.8",              ""),
+    ("anthropic/claude-opus-4.8-fast",         "2x price, higher output speed"),
     ("anthropic/claude-opus-4.7",              ""),
     ("anthropic/claude-opus-4.6",              ""),
     ("anthropic/claude-sonnet-4.6",            ""),
@@ -47,11 +49,11 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
     ("xiaomi/mimo-v2.5-pro",                   ""),
     ("tencent/hy3-preview",                    ""),
     ("google/gemini-3-pro-image-preview",      ""),
-    ("google/gemini-3-flash-preview",          ""),
+    ("google/gemini-3.5-flash",                ""),
     ("google/gemini-3.1-pro-preview",          ""),
     ("google/gemini-3.1-flash-lite-preview",   ""),
     ("qwen/qwen3.6-35b-a3b",                   ""),
-    ("stepfun/step-3.5-flash",                 ""),
+    ("stepfun/step-3.7-flash",                 ""),
     ("minimax/minimax-m2.7",                   ""),
     ("z-ai/glm-5.1",                           ""),
     ("x-ai/grok-4.20",                         ""),
@@ -139,6 +141,7 @@ def _xai_curated_models() -> list[str]:
 
 _PROVIDER_MODELS: dict[str, list[str]] = {
     "nous": [
+        "anthropic/claude-opus-4.8",
         "anthropic/claude-opus-4.7",
         "anthropic/claude-opus-4.6",
         "anthropic/claude-sonnet-4.6",
@@ -153,11 +156,11 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
         "xiaomi/mimo-v2.5-pro",
         "tencent/hy3-preview",
         "google/gemini-3-pro-preview",
-        "google/gemini-3-flash-preview",
+        "google/gemini-3.5-flash",
         "google/gemini-3.1-pro-preview",
         "google/gemini-3.1-flash-lite-preview",
         "qwen/qwen3.6-35b-a3b",
-        "stepfun/step-3.5-flash",
+        "stepfun/step-3.7-flash",
         "minimax/minimax-m2.7",
         "z-ai/glm-5.1",
         "x-ai/grok-4.3",
@@ -290,6 +293,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
         "MiniMax-M2",
     ],
     "anthropic": [
+        "claude-opus-4-8",
         "claude-opus-4-7",
         "claude-opus-4-6",
         "claude-sonnet-4-6",
@@ -480,47 +484,22 @@ def _is_model_free(model_id: str, pricing: dict[str, dict[str, str]]) -> bool:
 # ---------------------------------------------------------------------------
 # Nous Portal account tier detection
 # ---------------------------------------------------------------------------
-
-def fetch_nous_account_tier(access_token: str, portal_base_url: str = "") -> dict[str, Any]:
-    """Fetch the user's Nous Portal account/subscription info.
-
-    Calls ``<portal>/api/oauth/account`` with the OAuth access token.
-
-    Returns the parsed JSON dict on success, e.g.::
-
-        {
-            "subscription": {
-                "plan": "Plus",
-                "tier": 2,
-                "monthly_charge": 20,
-                "credits_remaining": 1686.60,
-                ...
-            },
-            ...
-        }
-
-    Returns an empty dict on any failure (network, auth, parse).
-    """
-    base = (portal_base_url or "https://portal.nousresearch.com").rstrip("/")
-    url = f"{base}/api/oauth/account"
-    headers = {
-        "Authorization": f"Bearer {access_token}",
-        "Accept": "application/json",
-    }
-    try:
-        req = urllib.request.Request(url, headers=headers)
-        with urllib.request.urlopen(req, timeout=8) as resp:
-            return json.loads(resp.read().decode())
-    except Exception:
-        return {}
-
-
 def is_nous_free_tier(account_info: dict[str, Any]) -> bool:
     """Return True if the account info indicates a free (unpaid) tier.
 
-    Checks ``subscription.monthly_charge == 0``.  Returns False when
-    the field is missing or unparseable (assumes paid — don't block users).
+    Prefer the Portal's explicit ``paid_service_access.allowed`` entitlement
+    decision.  Legacy payloads fall back to ``subscription.monthly_charge == 0``.
+    Returns False when both signals are missing or unparseable.
     """
+    paid_access = account_info.get("paid_service_access")
+    if isinstance(paid_access, dict):
+        allowed = paid_access.get("allowed")
+        if isinstance(allowed, bool):
+            return not allowed
+        paid = paid_access.get("paid_access")
+        if isinstance(paid, bool):
+            return not paid
+
     sub = account_info.get("subscription")
     if not isinstance(sub, dict):
         return False
@@ -699,40 +678,28 @@ _FREE_TIER_CACHE_TTL: int = 180  # seconds (3 minutes)
 _free_tier_cache: tuple[bool, float] | None = None  # (result, timestamp)
 
 
-def check_nous_free_tier() -> bool:
+def check_nous_free_tier(*, force_fresh: bool = False) -> bool:
     """Check if the current Nous Portal user is on a free (unpaid) tier.
 
     Results are cached for ``_FREE_TIER_CACHE_TTL`` seconds to avoid
     hitting the Portal API on every call.  The cache is short-lived so
     that an account upgrade is reflected within a few minutes.
 
-    Returns False (assume paid) on any error — never blocks paying users.
+    Returns True only when entitlement is known to be free.  Unknown/error
+    states return False so this compatibility wrapper does not block users.
     """
     global _free_tier_cache
     now = time.monotonic()
-    if _free_tier_cache is not None:
+    if not force_fresh and _free_tier_cache is not None:
         cached_result, cached_at = _free_tier_cache
         if now - cached_at < _FREE_TIER_CACHE_TTL:
             return cached_result
 
     try:
-        from hermes_cli.auth import get_provider_auth_state, resolve_nous_runtime_credentials
+        from hermes_cli.nous_account import get_nous_portal_account_info
 
-        # Ensure we have a fresh token (triggers refresh if needed)
-        resolve_nous_runtime_credentials(min_key_ttl_seconds=60)
-
-        state = get_provider_auth_state("nous")
-        if not state:
-            _free_tier_cache = (False, now)
-            return False
-        access_token = state.get("access_token", "")
-        portal_url = state.get("portal_base_url", "")
-        if not access_token:
-            _free_tier_cache = (False, now)
-            return False
-
-        account_info = fetch_nous_account_tier(access_token, portal_url)
-        result = is_nous_free_tier(account_info)
+        account_info = get_nous_portal_account_info(force_fresh=force_fresh)
+        result = account_info.is_free_tier
         _free_tier_cache = (result, now)
         return result
     except Exception:
@@ -1221,68 +1188,6 @@ def _format_price_per_mtok(per_token_str: str) -> str:
     return f"${per_m:.2f}"
 
 
-def format_model_pricing_table(
-    models: list[tuple[str, str]],
-    pricing_map: dict[str, dict[str, str]],
-    current_model: str = "",
-    indent: str = "      ",
-) -> list[str]:
-    """Build a column-aligned model+pricing table for terminal display.
-
-    Returns a list of pre-formatted lines ready to print.
-    *models* is ``[(model_id, description), ...]``.
-    """
-    if not models:
-        return []
-
-    # Build rows: (model_id, input_price, output_price, cache_price, is_current)
-    rows: list[tuple[str, str, str, str, bool]] = []
-    has_cache = False
-    for mid, _desc in models:
-        is_cur = mid == current_model
-        p = pricing_map.get(mid)
-        if p:
-            inp = _format_price_per_mtok(p.get("prompt", ""))
-            out = _format_price_per_mtok(p.get("completion", ""))
-            cache_read = p.get("input_cache_read", "")
-            cache = _format_price_per_mtok(cache_read) if cache_read else ""
-            if cache:
-                has_cache = True
-        else:
-            inp, out, cache = "", "", ""
-        rows.append((mid, inp, out, cache, is_cur))
-
-    name_col = max(len(r[0]) for r in rows) + 2
-    # Compute price column widths from the actual data so decimals align
-    price_col = max(
-        max((len(r[1]) for r in rows if r[1]), default=4),
-        max((len(r[2]) for r in rows if r[2]), default=4),
-        3,  # minimum: "In" / "Out" header
-    )
-    cache_col = max(
-        max((len(r[3]) for r in rows if r[3]), default=4),
-        5,  # minimum: "Cache" header
-    ) if has_cache else 0
-    lines: list[str] = []
-
-    # Header
-    if has_cache:
-        lines.append(f"{indent}{'Model':<{name_col}} {'In':>{price_col}}  {'Out':>{price_col}}  {'Cache':>{cache_col}}  /Mtok")
-        lines.append(f"{indent}{'-' * name_col} {'-' * price_col}  {'-' * price_col}  {'-' * cache_col}")
-    else:
-        lines.append(f"{indent}{'Model':<{name_col}} {'In':>{price_col}}  {'Out':>{price_col}}  /Mtok")
-        lines.append(f"{indent}{'-' * name_col} {'-' * price_col}  {'-' * price_col}")
-
-    for mid, inp, out, cache, is_cur in rows:
-        marker = "  ← current" if is_cur else ""
-        if has_cache:
-            lines.append(f"{indent}{mid:<{name_col}} {inp:>{price_col}}  {out:>{price_col}}  {cache:>{cache_col}}{marker}")
-        else:
-            lines.append(f"{indent}{mid:<{name_col}} {inp:>{price_col}}  {out:>{price_col}}{marker}")
-
-    return lines
-
-
 def fetch_models_with_pricing(
     api_key: str | None = None,
     base_url: str = "https://openrouter.ai/api",
@@ -2045,6 +1950,12 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
                     return live
         except Exception:
             pass
+        # Live failed (or no creds). Fall back to the docs-hosted manifest
+        # — NOT the in-repo _PROVIDER_MODELS["nous"] snapshot — so newly
+        # added Portal models still surface without a Hermes release.
+        manifest_ids = get_curated_nous_model_ids()
+        if manifest_ids:
+            return manifest_ids
     if normalized == "stepfun":
         try:
             from hermes_cli.auth import resolve_api_key_provider_credentials
@@ -2148,6 +2059,206 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
     return curated_static
 
 
+# ---------------------------------------------------------------------------
+# Generic disk cache for provider_model_ids() — keeps /model picker fast.
+# ---------------------------------------------------------------------------
+#
+# Without this layer, every /model picker open re-fetches every authed
+# provider's /v1/models endpoint. On a well-configured user (anthropic +
+# openai + copilot + gemini + huggingface + ...) that's 2+ seconds of cold
+# HTTP roundtrips just to render the provider list.
+#
+# Cache strategy:
+#   - One JSON file at $HERMES_HOME/provider_models_cache.json
+#   - Per-provider entries keyed by (provider, credential fingerprint)
+#   - Credential fingerprint = sha256 of env-var values that the provider
+#     normally reads. Swap your OPENAI_API_KEY and the entry invalidates.
+#   - 1h TTL by default. `force_refresh=True` skips the cache entirely
+#     and overwrites it on success.
+#   - Only NON-EMPTY results are cached. An empty/None response from a
+#     transient network error never gets pinned.
+#   - Cache file is best-effort. Any read/write error degrades silently
+#     to a live fetch — the picker keeps working.
+
+_PROVIDER_MODELS_CACHE_TTL = 3600  # 1h
+
+
+def _provider_models_cache_path() -> Path:
+    from hermes_constants import get_hermes_home
+    return get_hermes_home() / "provider_models_cache.json"
+
+
+def _credential_fingerprint(provider: str) -> str:
+    """Return a short hash representing the credentials that
+    ``provider_model_ids(provider)`` would see right now.
+
+    Rotating any of the relevant env vars invalidates the cached entry
+    for that provider. We hash AT LEAST the api-key + base-url env vars
+    declared in ``PROVIDER_REGISTRY``. For OAuth-backed providers
+    (codex, copilot, anthropic-via-claude-code, nous portal), the
+    relevant tokens live in ``$HERMES_HOME/auth.json`` and external
+    credential files. Rather than parse every shape, we additionally
+    fold the mtime of those files into the fingerprint so refreshes
+    after re-auth bust the cache.
+    """
+    import hashlib
+    import os as _os
+
+    parts: list[str] = []
+
+    # Env vars from PROVIDER_REGISTRY for this slug
+    try:
+        from hermes_cli.auth import PROVIDER_REGISTRY
+        pcfg = PROVIDER_REGISTRY.get(provider)
+        if pcfg is not None:
+            for ev in getattr(pcfg, "api_key_env_vars", ()) or ():
+                parts.append(f"{ev}={_os.environ.get(ev, '')}")
+            bev = getattr(pcfg, "base_url_env_var", "") or ""
+            if bev:
+                parts.append(f"{bev}={_os.environ.get(bev, '')}")
+    except Exception:
+        pass
+
+    # OAuth / external-file mtimes that change on re-auth
+    try:
+        from hermes_constants import get_hermes_home
+        for rel in ("auth.json", "credentials.json"):
+            p = get_hermes_home() / rel
+            try:
+                parts.append(f"{rel}@{p.stat().st_mtime_ns}")
+            except FileNotFoundError:
+                parts.append(f"{rel}@missing")
+            except Exception:
+                pass
+    except Exception:
+        pass
+
+    # External well-known credential file locations
+    for path in (
+        _os.path.expanduser("~/.codex/auth.json"),
+        _os.path.expanduser("~/.claude/.credentials.json"),
+        _os.path.expanduser("~/.config/github-copilot/hosts.json"),
+        _os.path.expanduser("~/.minimax/credentials.json"),
+    ):
+        try:
+            mt = _os.stat(path).st_mtime_ns
+            parts.append(f"{path}@{mt}")
+        except FileNotFoundError:
+            parts.append(f"{path}@missing")
+        except Exception:
+            pass
+
+    blob = "|".join(parts).encode("utf-8", errors="replace")
+    # blake2b for cache-key fingerprinting only — not for credential storage.
+    # We never reverse this hash; collisions are harmless (worst case: cache
+    # miss → live re-fetch). Use blake2b instead of sha256 here because
+    # CodeQL's `py/weak-sensitive-data-hashing` rule flags sha256 over env
+    # vars whose names contain "API_KEY" / "TOKEN" even when the hash is
+    # used as an identity fingerprint, not for password storage. blake2b
+    # is a keyed-hash primitive and isn't flagged.
+    return hashlib.blake2b(blob, digest_size=8).hexdigest()
+
+
+def _load_provider_models_cache() -> dict:
+    """Return the full cache dict, or {} on any error."""
+    try:
+        path = _provider_models_cache_path()
+        if not path.exists():
+            return {}
+        with open(path, encoding="utf-8") as f:
+            data = json.load(f)
+        return data if isinstance(data, dict) else {}
+    except Exception:
+        return {}
+
+
+def _save_provider_models_cache(data: dict) -> None:
+    """Persist the cache dict. Best-effort — silent on any error."""
+    try:
+        from utils import atomic_json_write
+        path = _provider_models_cache_path()
+        path.parent.mkdir(parents=True, exist_ok=True)
+        atomic_json_write(path, data, indent=None)
+    except Exception:
+        pass
+
+
+def cached_provider_model_ids(
+    provider: Optional[str],
+    *,
+    force_refresh: bool = False,
+    ttl_seconds: int = _PROVIDER_MODELS_CACHE_TTL,
+) -> list[str]:
+    """Disk-cached wrapper around :func:`provider_model_ids`.
+
+    Hits the cache when fresh; otherwise calls the live function and
+    persists a non-empty result. Always returns a list (never None).
+    """
+    normalized = normalize_provider(provider) or (provider or "")
+    if not normalized:
+        return []
+
+    cache = _load_provider_models_cache()
+    fp = _credential_fingerprint(normalized)
+    entry = cache.get(normalized)
+    now = time.time()
+
+    if (
+        not force_refresh
+        and isinstance(entry, dict)
+        and entry.get("fp") == fp
+        and isinstance(entry.get("models"), list)
+        and entry["models"]
+        and (now - float(entry.get("at", 0))) < ttl_seconds
+    ):
+        return list(entry["models"])
+
+    # Cache miss / stale / forced refresh — call the live path.
+    live = provider_model_ids(normalized, force_refresh=force_refresh)
+    if live:
+        cache[normalized] = {
+            "fp": fp,
+            "at": now,
+            "models": list(live),
+        }
+        _save_provider_models_cache(cache)
+        return list(live)
+
+    # Live fetch returned nothing. If we have a stale entry with the
+    # SAME fingerprint, prefer it over an empty result — stale data
+    # beats no data when the network is flaky.
+    if (
+        isinstance(entry, dict)
+        and entry.get("fp") == fp
+        and isinstance(entry.get("models"), list)
+        and entry["models"]
+    ):
+        return list(entry["models"])
+    return list(live or [])
+
+
+def clear_provider_models_cache(provider: Optional[str] = None) -> None:
+    """Drop a single provider's cache entry, or wipe the whole cache.
+
+    ``provider=None`` wipes everything; otherwise only that provider's
+    entry is removed. Used by ``/model --refresh`` and
+    ``hermes model --refresh``.
+    """
+    try:
+        if provider is None:
+            path = _provider_models_cache_path()
+            if path.exists():
+                path.unlink()
+            return
+        cache = _load_provider_models_cache()
+        normalized = normalize_provider(provider) or provider or ""
+        if normalized in cache:
+            del cache[normalized]
+            _save_provider_models_cache(cache)
+    except Exception:
+        pass
+
+
 def _fetch_anthropic_models(timeout: float = 5.0) -> Optional[list[str]]:
     """Fetch available models from the Anthropic /v1/models endpoint.
 
diff --git a/hermes_cli/nous_account.py b/hermes_cli/nous_account.py
new file mode 100644
index 00000000000..02ccb86c7dd
--- /dev/null
+++ b/hermes_cli/nous_account.py
@@ -0,0 +1,678 @@
+"""Normalized Nous Portal account entitlement helpers."""
+
+from __future__ import annotations
+
+import hashlib
+import json
+import time
+import urllib.request
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from typing import Any, Literal, Optional
+
+
+NousAccountInfoSource = Literal["jwt", "account_api", "inference_key", "none", "error"]
+
+_ACCOUNT_INFO_CACHE_TTL = 60
+_account_info_cache: tuple[str, float, "NousPortalAccountInfo"] | None = None
+
+
+@dataclass(frozen=True)
+class NousPortalSubscriptionInfo:
+    plan: Optional[str] = None
+    tier: Optional[int] = None
+    monthly_charge: Optional[float] = None
+    current_period_end: Optional[str] = None
+    credits_remaining: Optional[float] = None
+    rollover_credits: Optional[float] = None
+
+
+@dataclass(frozen=True)
+class NousPaidServiceAccessInfo:
+    allowed: Optional[bool] = None
+    paid_access: Optional[bool] = None
+    reason: Optional[str] = None
+    organisation_id: Optional[str] = None
+    effective_at_ms: Optional[int] = None
+    has_active_subscription: Optional[bool] = None
+    active_subscription_is_paid: Optional[bool] = None
+    subscription_tier: Optional[int] = None
+    subscription_monthly_charge: Optional[float] = None
+    subscription_credits_remaining: Optional[float] = None
+    purchased_credits_remaining: Optional[float] = None
+    total_usable_credits: Optional[float] = None
+
+
+@dataclass(frozen=True)
+class NousPortalAccountInfo:
+    logged_in: bool
+    source: NousAccountInfoSource
+    fresh: bool
+    user_id: Optional[str] = None
+    org_id: Optional[str] = None
+    client_id: Optional[str] = None
+    product_id: Optional[str] = None
+    nous_client: Optional[str] = None
+    portal_base_url: Optional[str] = None
+    inference_base_url: Optional[str] = None
+    inference_credential_present: bool = False
+    credential_source: Optional[str] = None
+    expires_at: Optional[datetime] = None
+    email: Optional[str] = None
+    privy_did: Optional[str] = None
+    subscription: Optional[NousPortalSubscriptionInfo] = None
+    paid_service_access: Optional[bool] = None
+    paid_service_access_info: Optional[NousPaidServiceAccessInfo] = None
+    raw_claims: Optional[dict[str, Any]] = None
+    raw_account: Optional[dict[str, Any]] = None
+    error: Optional[str] = None
+
+    @property
+    def is_paid(self) -> bool:
+        return self.paid_service_access is True
+
+    @property
+    def is_free_tier(self) -> bool:
+        return self.paid_service_access is False
+
+    @property
+    def tool_gateway_entitled(self) -> bool:
+        return self.paid_service_access is True
+
+
+def nous_portal_billing_url(account_info: Optional[NousPortalAccountInfo] = None) -> str:
+    """Return the billing URL for a normalized Nous account snapshot."""
+    try:
+        from hermes_cli.auth import DEFAULT_NOUS_PORTAL_URL
+    except Exception:
+        DEFAULT_NOUS_PORTAL_URL = "https://portal.nousresearch.com"
+
+    base = None
+    if account_info is not None:
+        base = account_info.portal_base_url
+    if not isinstance(base, str) or not base.strip():
+        base = DEFAULT_NOUS_PORTAL_URL
+    return f"{base.rstrip('/')}/billing"
+
+
+def format_nous_portal_entitlement_message(
+    account_info: Optional[NousPortalAccountInfo],
+    *,
+    capability: str = "this feature",
+    include_refresh_hint: bool = True,
+) -> Optional[str]:
+    """Return user-facing guidance for a missing Nous paid entitlement.
+
+    ``None`` means the account is known to have paid service access.  The
+    message intentionally works from normalized entitlement fields rather than
+    subscription price alone: purchased credits without a subscription still
+    count as paid access, while a paid subscription with exhausted usable
+    credits does not.
+    """
+    billing_url = nous_portal_billing_url(account_info)
+
+    if account_info is not None and account_info.paid_service_access is True:
+        return None
+
+    if account_info is None:
+        return (
+            f"Hermes could not verify your Nous Portal entitlement, so {capability} "
+            f"is unavailable. Run `hermes model` to refresh your login, or check "
+            f"billing at {billing_url}."
+        )
+
+    if not account_info.logged_in:
+        if account_info.inference_credential_present:
+            return (
+                f"Nous inference credentials are configured, but Hermes cannot verify "
+                f"your Nous Portal paid access for {capability}. Log in with "
+                f"`hermes model` to enable Portal-managed features. Billing and "
+                f"credits are managed at {billing_url}."
+            )
+        return (
+            f"Log in to Nous Portal to use {capability}: run `hermes model`. "
+            f"Billing and credits are managed at {billing_url}."
+        )
+
+    if account_info.paid_service_access is None:
+        detail = (
+            f"Hermes could not verify your Nous Portal paid access, so {capability} "
+            f"is unavailable."
+        )
+        if account_info.error:
+            detail += f" Account lookup failed: {account_info.error}."
+        if include_refresh_hint:
+            detail += " Run `hermes model` to refresh your session."
+        detail += f" Check billing at {billing_url}."
+        return detail
+
+    access = account_info.paid_service_access_info
+    reason = access.reason if access else None
+    if reason == "account_missing":
+        return (
+            f"Hermes could not find a Nous Portal account or organisation for this "
+            f"login, so {capability} is unavailable. Run `hermes model` to "
+            f"authenticate again; if the problem persists, contact Nous support."
+        )
+
+    if reason == "no_usable_credits" or account_info.paid_service_access is False:
+        message = _no_paid_access_message(account_info, capability, billing_url)
+        if include_refresh_hint and not account_info.fresh:
+            message += " If you recently bought credits, run `hermes model` to refresh Hermes."
+        return message
+
+    return (
+        f"Your Nous Portal account does not currently have paid service access, "
+        f"so {capability} is unavailable. Add credits or update billing at {billing_url}."
+    )
+
+
+def _no_paid_access_message(
+    account_info: NousPortalAccountInfo,
+    capability: str,
+    billing_url: str,
+) -> str:
+    access = account_info.paid_service_access_info
+    has_active_subscription = access.has_active_subscription if access else None
+    active_subscription_is_paid = access.active_subscription_is_paid if access else None
+    total_usable = access.total_usable_credits if access else None
+    subscription_credits = access.subscription_credits_remaining if access else None
+    purchased_credits = access.purchased_credits_remaining if access else None
+
+    if has_active_subscription and active_subscription_is_paid:
+        credit_detail = _credit_detail(total_usable, subscription_credits, purchased_credits)
+        return (
+            f"Your Nous Portal credits are exhausted{credit_detail}, so {capability} "
+            f"is unavailable. Top up or renew credits at {billing_url}."
+        )
+
+    if has_active_subscription and active_subscription_is_paid is False:
+        return (
+            f"Your current Nous Portal plan does not include paid service access, "
+            f"so {capability} is unavailable. Upgrade or add credits at {billing_url}."
+        )
+
+    if has_active_subscription is False:
+        credit_detail = _credit_detail(total_usable, subscription_credits, purchased_credits)
+        return (
+            f"Your Nous Portal account has no active subscription or usable credits"
+            f"{credit_detail}, so {capability} is unavailable. Subscribe or add credits "
+            f"at {billing_url}."
+        )
+
+    credit_detail = _credit_detail(total_usable, subscription_credits, purchased_credits)
+    return (
+        f"Your Nous Portal account has no usable paid credits{credit_detail}, so "
+        f"{capability} is unavailable. Add credits or update billing at {billing_url}."
+    )
+
+
+def _credit_detail(
+    total_usable: Optional[float],
+    subscription_credits: Optional[float],
+    purchased_credits: Optional[float],
+) -> str:
+    parts: list[str] = []
+    if total_usable is not None:
+        parts.append(f"usable ${total_usable:.2f}")
+    if subscription_credits is not None:
+        parts.append(f"subscription ${subscription_credits:.2f}")
+    if purchased_credits is not None:
+        parts.append(f"purchased ${purchased_credits:.2f}")
+    if not parts:
+        return ""
+    return f" ({', '.join(parts)})"
+
+
+def reset_nous_portal_account_info_cache() -> None:
+    """Clear the short-lived account-info cache used by tests."""
+    global _account_info_cache
+    _account_info_cache = None
+
+
+def get_nous_portal_account_info(
+    *,
+    force_fresh: bool = False,
+    min_jwt_ttl_seconds: int = 60,
+) -> NousPortalAccountInfo:
+    """Return normalized Nous Portal account entitlement information.
+
+    By default, a valid unexpired OAuth access JWT is used as a low-latency
+    local account snapshot. ``force_fresh=True`` always calls
+    ``/api/oauth/account`` and bypasses the short-lived cache. JWT claims are
+    decoded locally for UX gating only; server APIs remain authoritative.
+    """
+    try:
+        from hermes_cli.auth import get_provider_auth_state
+
+        state = get_provider_auth_state("nous") or {}
+    except Exception as exc:
+        return _error_info(error=exc, logged_in=False)
+
+    access_token = state.get("access_token")
+    portal_base_url = _portal_base_url(state)
+    if not isinstance(access_token, str) or not access_token.strip():
+        pool_oauth_info = _info_from_oauth_pool(
+            force_fresh=force_fresh,
+            min_jwt_ttl_seconds=min_jwt_ttl_seconds,
+            portal_base_url=portal_base_url,
+        )
+        if pool_oauth_info is not None:
+            return pool_oauth_info
+        pool_info = _info_from_inference_key_pool(portal_base_url)
+        if pool_info is not None:
+            return pool_info
+        return NousPortalAccountInfo(
+            logged_in=False,
+            source="none",
+            fresh=False,
+            portal_base_url=portal_base_url,
+        )
+
+    if not force_fresh:
+        jwt_info = _info_from_valid_jwt(
+            access_token,
+            state=state,
+            portal_base_url=portal_base_url,
+            min_jwt_ttl_seconds=min_jwt_ttl_seconds,
+        )
+        if jwt_info is not None:
+            return jwt_info
+
+    return _fresh_account_info(
+        state=state,
+        force_fresh=force_fresh,
+        portal_base_url=portal_base_url,
+    )
+
+
+def _fresh_account_info(
+    *,
+    state: dict[str, Any],
+    force_fresh: bool,
+    portal_base_url: Optional[str],
+) -> NousPortalAccountInfo:
+    global _account_info_cache
+
+    try:
+        from hermes_cli.auth import get_provider_auth_state, resolve_nous_access_token
+
+        access_token = resolve_nous_access_token()
+        refreshed_state = get_provider_auth_state("nous") or state
+        portal_base_url = _portal_base_url(refreshed_state) or portal_base_url
+        cache_key = _cache_key(access_token, portal_base_url)
+
+        if not force_fresh and _account_info_cache is not None:
+            cached_key, cached_at, cached_info = _account_info_cache
+            if cached_key == cache_key and (time.monotonic() - cached_at) < _ACCOUNT_INFO_CACHE_TTL:
+                return cached_info
+
+        payload = _fetch_nous_account_info(access_token, portal_base_url)
+        if not payload:
+            return _error_info(
+                error="empty_account_response",
+                logged_in=True,
+                portal_base_url=portal_base_url,
+            )
+        if isinstance(payload.get("error"), str):
+            return _error_info(
+                error=payload.get("error") or "account_response_error",
+                logged_in=True,
+                portal_base_url=portal_base_url,
+                raw_account=payload,
+            )
+
+        info = _info_from_account_payload(
+            payload,
+            state=refreshed_state,
+            portal_base_url=portal_base_url,
+        )
+        _account_info_cache = (cache_key, time.monotonic(), info)
+        return info
+    except Exception as exc:
+        return _error_info(
+            error=exc,
+            logged_in=bool(state.get("access_token")),
+            portal_base_url=portal_base_url,
+        )
+
+
+def _info_from_inference_key_pool(
+    portal_base_url: Optional[str],
+) -> Optional[NousPortalAccountInfo]:
+    """Return an explicit unknown-entitlement snapshot for opaque Nous keys."""
+    try:
+        entry = _select_nous_pool_entry()
+        if entry is None:
+            return None
+        runtime_key = getattr(entry, "runtime_api_key", None) or getattr(entry, "access_token", "")
+        if not isinstance(runtime_key, str) or not runtime_key.strip():
+            return None
+
+        return NousPortalAccountInfo(
+            logged_in=False,
+            source="inference_key",
+            fresh=False,
+            portal_base_url=(
+                getattr(entry, "portal_base_url", None)
+                or portal_base_url
+            ),
+            inference_base_url=(
+                getattr(entry, "inference_base_url", None)
+                or getattr(entry, "runtime_base_url", None)
+                or getattr(entry, "base_url", None)
+            ),
+            inference_credential_present=True,
+            credential_source=f"pool:{getattr(entry, 'label', 'unknown')}",
+            error="portal_oauth_missing",
+        )
+    except Exception:
+        return None
+
+
+def _info_from_oauth_pool(
+    *,
+    force_fresh: bool,
+    min_jwt_ttl_seconds: int,
+    portal_base_url: Optional[str],
+) -> Optional[NousPortalAccountInfo]:
+    try:
+        entry = _select_nous_pool_entry()
+    except Exception:
+        return None
+    if entry is None or not _pool_entry_is_portal_oauth(entry):
+        return None
+
+    access_token = getattr(entry, "access_token", None)
+    if not isinstance(access_token, str) or not access_token.strip():
+        return None
+
+    entry_portal_url = (
+        getattr(entry, "portal_base_url", None)
+        or portal_base_url
+    )
+    state = {
+        "access_token": access_token,
+        "client_id": getattr(entry, "client_id", None),
+        "inference_base_url": (
+            getattr(entry, "inference_base_url", None)
+            or getattr(entry, "runtime_base_url", None)
+            or getattr(entry, "base_url", None)
+        ),
+        "agent_key": getattr(entry, "agent_key", None),
+        "credential_source": f"pool:{getattr(entry, 'label', 'unknown')}",
+    }
+
+    if not force_fresh:
+        jwt_info = _info_from_valid_jwt(
+            access_token,
+            state=state,
+            portal_base_url=entry_portal_url,
+            min_jwt_ttl_seconds=min_jwt_ttl_seconds,
+        )
+        if jwt_info is not None:
+            return jwt_info
+
+    try:
+        payload = _fetch_nous_account_info(access_token, entry_portal_url)
+    except Exception as exc:
+        return _error_info(
+            error=exc,
+            logged_in=True,
+            portal_base_url=entry_portal_url,
+        )
+    if not payload:
+        return _error_info(
+            error="empty_account_response",
+            logged_in=True,
+            portal_base_url=entry_portal_url,
+        )
+    if isinstance(payload.get("error"), str):
+        return _error_info(
+            error=payload.get("error") or "account_response_error",
+            logged_in=True,
+            portal_base_url=entry_portal_url,
+            raw_account=payload,
+        )
+    return _info_from_account_payload(
+        payload,
+        state=state,
+        portal_base_url=entry_portal_url,
+    )
+
+
+def _select_nous_pool_entry() -> Optional[Any]:
+    from agent.credential_pool import load_pool
+
+    pool = load_pool("nous")
+    if not pool or not pool.has_credentials():
+        return None
+    entries = list(pool.entries())
+    if not entries:
+        return None
+
+    def _entry_sort_key(entry: Any) -> tuple[float, float, int]:
+        agent_exp = _parse_iso_timestamp(getattr(entry, "agent_key_expires_at", None)) or 0.0
+        access_exp = _parse_iso_timestamp(getattr(entry, "expires_at", None)) or 0.0
+        priority = int(getattr(entry, "priority", 0) or 0)
+        return (agent_exp, access_exp, -priority)
+
+    return max(entries, key=_entry_sort_key)
+
+
+def _pool_entry_is_portal_oauth(entry: Any) -> bool:
+    access_token = getattr(entry, "access_token", None)
+    if not isinstance(access_token, str) or not access_token.strip():
+        return False
+    auth_type = str(getattr(entry, "auth_type", "") or "").strip().lower()
+    refresh_token = getattr(entry, "refresh_token", None)
+    return auth_type.startswith("oauth") or bool(refresh_token)
+
+
+def _fetch_nous_account_info(
+    access_token: str,
+    portal_base_url: Optional[str] = None,
+) -> dict[str, Any]:
+    base = (portal_base_url or "https://portal.nousresearch.com").rstrip("/")
+    url = f"{base}/api/oauth/account"
+    headers = {
+        "Authorization": f"Bearer {access_token}",
+        "Accept": "application/json",
+    }
+    req = urllib.request.Request(url, headers=headers)
+    with urllib.request.urlopen(req, timeout=8) as resp:
+        payload = json.loads(resp.read().decode())
+    return payload if isinstance(payload, dict) else {}
+
+
+def _info_from_valid_jwt(
+    token: str,
+    *,
+    state: dict[str, Any],
+    portal_base_url: Optional[str],
+    min_jwt_ttl_seconds: int,
+) -> Optional[NousPortalAccountInfo]:
+    try:
+        from hermes_cli.auth import _decode_jwt_claims
+    except Exception:
+        return None
+
+    claims = _decode_jwt_claims(token)
+    if not claims:
+        return None
+
+    exp = _coerce_float(claims.get("exp"))
+    if exp is None or exp <= time.time() + max(0, int(min_jwt_ttl_seconds)):
+        return None
+
+    paid_access = _coerce_bool(claims.get("paid_access"))
+    subscription_tier = _coerce_int(claims.get("subscription_tier"))
+    access_info = NousPaidServiceAccessInfo(
+        allowed=paid_access,
+        paid_access=paid_access,
+        organisation_id=_coerce_str(claims.get("org_id")),
+        subscription_tier=subscription_tier,
+    )
+
+    return NousPortalAccountInfo(
+        logged_in=True,
+        source="jwt",
+        fresh=False,
+        user_id=_coerce_str(claims.get("sub")),
+        org_id=_coerce_str(claims.get("org_id")),
+        client_id=_coerce_str(claims.get("client_id") or state.get("client_id")),
+        product_id=_coerce_str(claims.get("product_id")),
+        nous_client=_coerce_str(claims.get("nous_client")),
+        portal_base_url=portal_base_url,
+        inference_base_url=_coerce_str(state.get("inference_base_url")),
+        inference_credential_present=True,
+        credential_source=_coerce_str(state.get("credential_source")) or "auth_store",
+        expires_at=datetime.fromtimestamp(exp, tz=timezone.utc),
+        paid_service_access=paid_access,
+        paid_service_access_info=access_info,
+        raw_claims=dict(claims),
+    )
+
+
+def _info_from_account_payload(
+    payload: dict[str, Any],
+    *,
+    state: dict[str, Any],
+    portal_base_url: Optional[str],
+) -> NousPortalAccountInfo:
+    user = payload.get("user") if isinstance(payload.get("user"), dict) else {}
+    organisation = (
+        payload.get("organisation")
+        if isinstance(payload.get("organisation"), dict)
+        else {}
+    )
+    subscription = _subscription_from_payload(payload.get("subscription"))
+    access = _paid_service_access_from_payload(payload.get("paid_service_access"))
+    paid_access = access.allowed if access else None
+    if paid_access is None and access is not None:
+        paid_access = access.paid_access
+
+    return NousPortalAccountInfo(
+        logged_in=True,
+        source="account_api",
+        fresh=True,
+        org_id=_coerce_str(organisation.get("id")) or (access.organisation_id if access else None),
+        client_id=_coerce_str(state.get("client_id")),
+        portal_base_url=portal_base_url,
+        inference_base_url=_coerce_str(state.get("inference_base_url")),
+        inference_credential_present=bool(state.get("access_token") or state.get("agent_key")),
+        credential_source=_coerce_str(state.get("credential_source")) or "auth_store",
+        email=_coerce_str(user.get("email")),
+        privy_did=_coerce_str(user.get("privy_did")),
+        subscription=subscription,
+        paid_service_access=paid_access,
+        paid_service_access_info=access,
+        raw_account=dict(payload),
+    )
+
+
+def _subscription_from_payload(value: Any) -> Optional[NousPortalSubscriptionInfo]:
+    if not isinstance(value, dict):
+        return None
+    return NousPortalSubscriptionInfo(
+        plan=_coerce_str(value.get("plan")),
+        tier=_coerce_int(value.get("tier")),
+        monthly_charge=_coerce_float(value.get("monthly_charge")),
+        current_period_end=_coerce_str(value.get("current_period_end")),
+        credits_remaining=_coerce_float(value.get("credits_remaining")),
+        rollover_credits=_coerce_float(value.get("rollover_credits")),
+    )
+
+
+def _paid_service_access_from_payload(value: Any) -> Optional[NousPaidServiceAccessInfo]:
+    if not isinstance(value, dict):
+        return None
+    allowed = _coerce_bool(value.get("allowed"))
+    paid_access = _coerce_bool(value.get("paid_access"))
+    return NousPaidServiceAccessInfo(
+        allowed=allowed,
+        paid_access=paid_access,
+        reason=_coerce_str(value.get("reason")),
+        organisation_id=_coerce_str(value.get("organisation_id")),
+        effective_at_ms=_coerce_int(value.get("effective_at_ms")),
+        has_active_subscription=_coerce_bool(value.get("has_active_subscription")),
+        active_subscription_is_paid=_coerce_bool(value.get("active_subscription_is_paid")),
+        subscription_tier=_coerce_int(value.get("subscription_tier")),
+        subscription_monthly_charge=_coerce_float(value.get("subscription_monthly_charge")),
+        subscription_credits_remaining=_coerce_float(value.get("subscription_credits_remaining")),
+        purchased_credits_remaining=_coerce_float(value.get("purchased_credits_remaining")),
+        total_usable_credits=_coerce_float(value.get("total_usable_credits")),
+    )
+
+
+def _error_info(
+    *,
+    error: object,
+    logged_in: bool,
+    portal_base_url: Optional[str] = None,
+    raw_account: Optional[dict[str, Any]] = None,
+) -> NousPortalAccountInfo:
+    return NousPortalAccountInfo(
+        logged_in=logged_in,
+        source="error",
+        fresh=False,
+        portal_base_url=portal_base_url,
+        raw_account=raw_account,
+        error=str(error),
+    )
+
+
+def _portal_base_url(state: dict[str, Any]) -> Optional[str]:
+    value = state.get("portal_base_url")
+    if not isinstance(value, str) or not value.strip():
+        return None
+    return value.strip().rstrip("/")
+
+
+def _cache_key(access_token: str, portal_base_url: Optional[str]) -> str:
+    digest = hashlib.sha256(access_token.encode("utf-8")).hexdigest()
+    return f"{portal_base_url or ''}:{digest}"
+
+
+def _parse_iso_timestamp(value: Any) -> Optional[float]:
+    if not isinstance(value, str) or not value:
+        return None
+    text = value.strip()
+    if text.endswith("Z"):
+        text = text[:-1] + "+00:00"
+    try:
+        return datetime.fromisoformat(text).timestamp()
+    except Exception:
+        return None
+
+
+def _coerce_str(value: Any) -> Optional[str]:
+    if isinstance(value, str) and value:
+        return value
+    return None
+
+
+def _coerce_bool(value: Any) -> Optional[bool]:
+    return value if isinstance(value, bool) else None
+
+
+def _coerce_int(value: Any) -> Optional[int]:
+    if isinstance(value, bool):
+        return None
+    try:
+        if value is None:
+            return None
+        return int(value)
+    except (TypeError, ValueError):
+        return None
+
+
+def _coerce_float(value: Any) -> Optional[float]:
+    if isinstance(value, bool):
+        return None
+    try:
+        if value is None:
+            return None
+        return float(value)
+    except (TypeError, ValueError):
+        return None
diff --git a/hermes_cli/nous_subscription.py b/hermes_cli/nous_subscription.py
index be027e85cd1..5f29101eb01 100644
--- a/hermes_cli/nous_subscription.py
+++ b/hermes_cli/nous_subscription.py
@@ -6,8 +6,8 @@ from dataclasses import dataclass
 from pathlib import Path
 from typing import Dict, Iterable, Optional, Set
 
-from hermes_cli.auth import get_nous_auth_status
 from hermes_cli.config import get_env_value, load_config
+from hermes_cli.nous_account import NousPortalAccountInfo, get_nous_portal_account_info
 from tools.managed_tool_gateway import is_managed_tool_gateway_ready
 from utils import is_truthy_value
 from tools.tool_backend_helpers import (
@@ -53,6 +53,7 @@ class NousSubscriptionFeatures:
     nous_auth_present: bool
     provider_is_nous: bool
     features: Dict[str, NousFeatureState]
+    account_info: Optional[NousPortalAccountInfo] = None
 
     @property
     def web(self) -> NousFeatureState:
@@ -70,12 +71,16 @@ class NousSubscriptionFeatures:
     def browser(self) -> NousFeatureState:
         return self.features["browser"]
 
+    @property
+    def video_gen(self) -> NousFeatureState:
+        return self.features["video_gen"]
+
     @property
     def modal(self) -> NousFeatureState:
         return self.features["modal"]
 
     def items(self) -> Iterable[NousFeatureState]:
-        ordered = ("web", "image_gen", "tts", "browser", "modal")
+        ordered = ("web", "image_gen", "video_gen", "tts", "browser", "modal")
         for key in ordered:
             yield self.features[key]
 
@@ -227,6 +232,8 @@ def _resolve_browser_feature_state(
 
 def get_nous_subscription_features(
     config: Optional[Dict[str, object]] = None,
+    *,
+    force_fresh: bool = False,
 ) -> NousSubscriptionFeatures:
     if config is None:
         config = load_config() or {}
@@ -235,16 +242,24 @@ def get_nous_subscription_features(
     provider_is_nous = str(model_cfg.get("provider") or "").strip().lower() == "nous"
 
     try:
-        nous_status = get_nous_auth_status()
+        if force_fresh:
+            account_info = get_nous_portal_account_info(force_fresh=True)
+        else:
+            account_info = get_nous_portal_account_info()
     except Exception:
-        nous_status = {}
+        account_info = None
 
-    managed_tools_flag = managed_nous_tools_enabled()
-    nous_auth_present = bool(nous_status.get("logged_in"))
+    managed_tools_flag = bool(
+        account_info
+        and account_info.logged_in
+        and account_info.paid_service_access is True
+    )
+    nous_auth_present = bool(account_info and account_info.logged_in)
     subscribed = provider_is_nous or nous_auth_present
 
     web_tool_enabled = _toolset_enabled(config, "web")
     image_tool_enabled = _toolset_enabled(config, "image_gen")
+    video_tool_enabled = _toolset_enabled(config, "video_gen")
     tts_tool_enabled = _toolset_enabled(config, "tts")
     browser_tool_enabled = _toolset_enabled(config, "browser")
     modal_tool_enabled = _toolset_enabled(config, "terminal")
@@ -279,6 +294,8 @@ def get_nous_subscription_features(
     browser_use_gateway = _uses_gateway(browser_cfg)
     image_gen_cfg = config.get("image_gen") if isinstance(config.get("image_gen"), dict) else {}
     image_use_gateway = _uses_gateway(image_gen_cfg)
+    video_gen_cfg = config.get("video_gen") if isinstance(config.get("video_gen"), dict) else {}
+    video_use_gateway = _uses_gateway(video_gen_cfg)
 
     direct_exa = bool(get_env_value("EXA_API_KEY"))
     direct_firecrawl = bool(get_env_value("FIRECRAWL_API_KEY") or get_env_value("FIRECRAWL_API_URL"))
@@ -286,6 +303,7 @@ def get_nous_subscription_features(
     direct_tavily = bool(get_env_value("TAVILY_API_KEY"))
     direct_searxng = bool(get_env_value("SEARXNG_URL"))
     direct_fal = fal_key_is_configured()
+    direct_fal_video = direct_fal  # same FAL_KEY; separate var so use_gateway is independent
     direct_openai_tts = bool(resolve_openai_audio_api_key())
     direct_elevenlabs = bool(get_env_value("ELEVENLABS_API_KEY"))
     direct_camofox = bool(get_env_value("CAMOFOX_URL"))
@@ -301,6 +319,8 @@ def get_nous_subscription_features(
         direct_tavily = False
     if image_use_gateway:
         direct_fal = False
+    if video_use_gateway:
+        direct_fal_video = False
     if tts_use_gateway:
         direct_openai_tts = False
         direct_elevenlabs = False
@@ -310,6 +330,8 @@ def get_nous_subscription_features(
 
     managed_web_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("firecrawl")
     managed_image_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("fal-queue")
+    # Video gen uses the same fal-queue gateway as image gen.
+    managed_video_available = managed_image_available
     managed_tts_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("openai-audio")
     managed_browser_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("browser-use")
     managed_modal_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("modal")
@@ -317,6 +339,7 @@ def get_nous_subscription_features(
         modal_mode,
         has_direct=direct_modal,
         managed_ready=managed_modal_available,
+        managed_enabled=managed_tools_flag,
     )
 
     web_managed = web_backend == "firecrawl" and managed_web_available and not direct_firecrawl
@@ -346,6 +369,10 @@ def get_nous_subscription_features(
     image_active = bool(image_tool_enabled and (image_managed or direct_fal))
     image_available = bool(managed_image_available or direct_fal)
 
+    video_managed = video_tool_enabled and managed_video_available and not direct_fal_video
+    video_active = bool(video_tool_enabled and (video_managed or direct_fal_video))
+    video_available = bool(managed_video_available or direct_fal_video)
+
     tts_current_provider = tts_provider or "edge"
     tts_managed = (
         tts_tool_enabled
@@ -440,6 +467,18 @@ def get_nous_subscription_features(
             current_provider="FAL" if direct_fal else ("Nous Subscription" if image_managed else ""),
             explicit_configured=direct_fal,
         ),
+        "video_gen": NousFeatureState(
+            key="video_gen",
+            label="Video generation",
+            included_by_default=False,
+            available=video_available,
+            active=video_active,
+            managed_by_nous=video_managed,
+            direct_override=video_active and not video_managed,
+            toolset_enabled=video_tool_enabled,
+            current_provider="FAL" if direct_fal_video else ("Nous Subscription" if video_managed else ""),
+            explicit_configured=direct_fal_video,
+        ),
         "tts": NousFeatureState(
             key="tts",
             label="OpenAI TTS",
@@ -483,6 +522,7 @@ def get_nous_subscription_features(
         nous_auth_present=nous_auth_present,
         provider_is_nous=provider_is_nous,
         features=features,
+        account_info=account_info,
     )
 
 
@@ -493,11 +533,15 @@ def apply_nous_managed_defaults(
     config: Dict[str, object],
     *,
     enabled_toolsets: Optional[Iterable[str]] = None,
+    force_fresh: bool = False,
 ) -> set[str]:
-    if not managed_nous_tools_enabled():
+    features = get_nous_subscription_features(config, force_fresh=force_fresh)
+    if not (
+        features.account_info
+        and features.account_info.logged_in
+        and features.account_info.paid_service_access is True
+    ):
         return set()
-
-    features = get_nous_subscription_features(config)
     if not features.provider_is_nous:
         return set()
 
@@ -545,6 +589,9 @@ def apply_nous_managed_defaults(
     if "image_gen" in selected_toolsets and not fal_key_is_configured():
         changed.add("image_gen")
 
+    if "video_gen" in selected_toolsets and not fal_key_is_configured():
+        changed.add("video_gen")
+
     return changed
 
 
@@ -555,6 +602,7 @@ def apply_nous_managed_defaults(
 _GATEWAY_TOOL_LABELS = {
     "web": "Web search & extract (Firecrawl)",
     "image_gen": "Image generation (FAL)",
+    "video_gen": "Video generation (FAL)",
     "tts": "Text-to-speech (OpenAI TTS)",
     "browser": "Browser automation (Browser Use)",
 }
@@ -562,6 +610,7 @@ _GATEWAY_TOOL_LABELS = {
 
 def _get_gateway_direct_credentials() -> Dict[str, bool]:
     """Return a dict of tool_key -> has_direct_credentials."""
+    fal_direct = fal_key_is_configured()
     return {
         "web": bool(
             get_env_value("FIRECRAWL_API_KEY")
@@ -570,7 +619,8 @@ def _get_gateway_direct_credentials() -> Dict[str, bool]:
             or get_env_value("TAVILY_API_KEY")
             or get_env_value("EXA_API_KEY")
         ),
-        "image_gen": fal_key_is_configured(),
+        "image_gen": fal_direct,
+        "video_gen": fal_direct,
         "tts": bool(
             resolve_openai_audio_api_key()
             or get_env_value("ELEVENLABS_API_KEY")
@@ -585,15 +635,18 @@ def _get_gateway_direct_credentials() -> Dict[str, bool]:
 _GATEWAY_DIRECT_LABELS = {
     "web": "Firecrawl/Exa/Parallel/Tavily key",
     "image_gen": "FAL key",
+    "video_gen": "FAL key",
     "tts": "OpenAI/ElevenLabs key",
     "browser": "Browser Use/Browserbase key",
 }
 
-_ALL_GATEWAY_KEYS = ("web", "image_gen", "tts", "browser")
+_ALL_GATEWAY_KEYS = ("web", "image_gen", "video_gen", "tts", "browser")
 
 
 def get_gateway_eligible_tools(
     config: Optional[Dict[str, object]] = None,
+    *,
+    force_fresh: bool = False,
 ) -> tuple[list[str], list[str], list[str]]:
     """Return (unconfigured, has_direct, already_managed) tool key lists.
 
@@ -604,7 +657,11 @@ def get_gateway_eligible_tools(
     All lists are empty when the user is not a paid Nous subscriber or
     is not using Nous as their provider.
     """
-    if not managed_nous_tools_enabled():
+    if force_fresh:
+        managed_enabled = managed_nous_tools_enabled(force_fresh=True)
+    else:
+        managed_enabled = managed_nous_tools_enabled()
+    if not managed_enabled:
         return [], [], []
 
     if config is None:
@@ -624,6 +681,7 @@ def get_gateway_eligible_tools(
     opted_in = {
         "web": _uses_gateway(config.get("web")),
         "image_gen": _uses_gateway(config.get("image_gen")),
+        "video_gen": _uses_gateway(config.get("video_gen")),
         "tts": _uses_gateway(config.get("tts")),
         "browser": _uses_gateway(config.get("browser")),
     }
@@ -692,10 +750,23 @@ def apply_gateway_defaults(
         image_cfg["use_gateway"] = True
         changed.add("image_gen")
 
+    if "video_gen" in tool_keys:
+        video_cfg = config.get("video_gen")
+        if not isinstance(video_cfg, dict):
+            video_cfg = {}
+            config["video_gen"] = video_cfg
+        video_cfg["provider"] = "fal"
+        video_cfg["use_gateway"] = True
+        changed.add("video_gen")
+
     return changed
 
 
-def prompt_enable_tool_gateway(config: Dict[str, object]) -> set[str]:
+def prompt_enable_tool_gateway(
+    config: Dict[str, object],
+    *,
+    force_fresh: bool = True,
+) -> set[str]:
     """If eligible tools exist, prompt the user to enable the Tool Gateway.
 
     Uses prompt_choice() with a description parameter so the curses TUI
@@ -704,7 +775,10 @@ def prompt_enable_tool_gateway(config: Dict[str, object]) -> set[str]:
     Returns the set of tools that were enabled, or empty set if the user
     declined or no tools were eligible.
     """
-    unconfigured, has_direct, already_managed = get_gateway_eligible_tools(config)
+    unconfigured, has_direct, already_managed = get_gateway_eligible_tools(
+        config,
+        force_fresh=force_fresh,
+    )
     if not unconfigured and not has_direct:
         return set()
 
diff --git a/hermes_cli/partial_compress.py b/hermes_cli/partial_compress.py
new file mode 100644
index 00000000000..dc1115d9f39
--- /dev/null
+++ b/hermes_cli/partial_compress.py
@@ -0,0 +1,235 @@
+"""Boundary-aware partial compression — "summarize up to here".
+
+Inspired by Claude Code's Rewind menu "Summarize up to here" action
+(v2.1.139–v2.1.142, Week 20, May 2026):
+https://code.claude.com/docs/en/whats-new/2026-w20
+
+Hermes already has ``/compress`` (full-history compaction) and an
+automatic token-budget tail-protection heuristic inside
+``ContextCompressor``. What was missing is *user-chosen* boundary
+control: "fold everything before this point into a summary, but keep
+my most recent N exchanges exactly as they are." That is the value of
+the Claude Code feature — the user decides the compression boundary
+instead of leaving it to the token-budget heuristic.
+
+This module owns the pure, side-effect-free split logic so both the
+CLI (``cli.py::_manual_compress``) and the gateway
+(``gateway/run.py::_handle_compress_command``) share one
+implementation. The slash-command surfaces handle compression of the
+*head* via the existing ``_compress_context`` pipeline (preserving all
+the session-rotation / lock / memory-notify machinery) and then
+re-append the verbatim *tail* returned here.
+
+Design notes / invariants honored:
+
+* **Role alternation.** The compressed head ends with summary/handoff
+  content (assistant- or user-role, possibly a trailing todo snapshot).
+  The verbatim tail must begin with a ``user`` message so the rejoined
+  history keeps the user↔assistant alternation that providers validate.
+  :func:`split_history_for_partial_compress` snaps the tail boundary
+  backwards to the nearest ``user`` turn so the rejoin is always legal.
+
+* **No silent context mutation.** This is a manual, user-invoked
+  action. It rotates the session exactly like ``/compress`` does (via
+  the caller), so the prompt-cache reset is explicit and expected, not
+  silent.
+
+* **Conservative defaults.** ``keep_last`` counts *exchanges* (a user
+  turn plus its following assistant/tool turns), defaulting to 2. The
+  split never compresses if doing so would leave nothing in the head.
+"""
+
+from __future__ import annotations
+
+from typing import Any, Dict, List, Optional, Tuple
+
+#: Default number of recent exchanges to preserve verbatim when the user
+#: runs ``/compress here`` without an explicit count.
+DEFAULT_KEEP_LAST = 2
+
+#: Hard ceiling so a fat-fingered ``/compress here 9999`` doesn't turn
+#: into a no-op surprise — clamp instead.
+MAX_KEEP_LAST = 100
+
+
+def parse_partial_compress_args(
+    raw_args: str,
+) -> Tuple[bool, int, Optional[str]]:
+    """Parse the argument string after ``/compress``.
+
+    Recognizes the boundary-aware forms:
+
+    * ``here``            → partial compress, keep ``DEFAULT_KEEP_LAST``
+    * ``here 4``          → partial compress, keep 4 exchanges
+    * ``--keep 4``        → partial compress, keep 4 exchanges
+    * ``up to here``      → alias for ``here`` (matches Claude Code's
+                            menu label "Summarize up to here")
+
+    Anything else is treated as a focus topic for the existing full
+    ``/compress <focus>`` behavior.
+
+    Returns ``(partial, keep_last, focus_topic)``:
+
+    * ``partial`` — True when a boundary-aware form was requested.
+    * ``keep_last`` — exchanges to preserve verbatim (only meaningful
+      when ``partial`` is True).
+    * ``focus_topic`` — focus string for full compression, or None.
+      Always None when ``partial`` is True (the two modes are exclusive;
+      a focused partial compress is not a documented Claude Code
+      behavior and would muddy the UX).
+    """
+    text = (raw_args or "").strip()
+    if not text:
+        return False, DEFAULT_KEEP_LAST, None
+
+    lowered = text.lower()
+
+    # Normalize the "up to here" alias to "here".
+    if lowered.startswith("up to here"):
+        lowered = lowered[len("up to ") :]
+        text = text[len("up to ") :]
+
+    tokens = lowered.split()
+
+    # Form: here [N]
+    if tokens and tokens[0] == "here":
+        keep = DEFAULT_KEEP_LAST
+        if len(tokens) >= 2:
+            keep = _coerce_keep(tokens[1])
+        return True, keep, None
+
+    # Form: --keep N  (or --keep=N)
+    if tokens and tokens[0] in ("--keep", "-k") and len(tokens) >= 2:
+        return True, _coerce_keep(tokens[1]), None
+    if tokens and tokens[0].startswith("--keep="):
+        return True, _coerce_keep(tokens[0].split("=", 1)[1]), None
+
+    # Otherwise: full compression with this as the focus topic.
+    return False, DEFAULT_KEEP_LAST, text or None
+
+
+def _coerce_keep(value: str) -> int:
+    """Parse a keep-count token, clamping to [1, MAX_KEEP_LAST]."""
+    try:
+        n = int(value)
+    except (TypeError, ValueError):
+        return DEFAULT_KEEP_LAST
+    if n < 1:
+        return 1
+    if n > MAX_KEEP_LAST:
+        return MAX_KEEP_LAST
+    return n
+
+
+def split_history_for_partial_compress(
+    history: List[Dict[str, Any]],
+    keep_last: int,
+) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
+    """Split ``history`` into ``(head, tail)`` for partial compression.
+
+    ``head`` is the earlier portion that will be summarized; ``tail`` is
+    the most recent ``keep_last`` exchanges, preserved verbatim.
+
+    An *exchange* is counted by ``user``-role messages: keeping N
+    exchanges means keeping everything from the Nth-most-recent ``user``
+    message onward. This guarantees the tail starts on a ``user`` turn,
+    so when the caller rejoins ``compressed_head + tail`` the
+    user↔assistant alternation stays valid (the compressed head's
+    trailing content is followed by a fresh user turn).
+
+    Returns ``(head, tail)``. If the split would leave the head empty
+    (not enough history to compress meaningfully), returns
+    ``(history, [])`` — signaling the caller to fall back to full
+    compression or report "nothing to do".
+    """
+    if keep_last < 1:
+        keep_last = 1
+
+    n = len(history)
+    if n == 0:
+        return [], []
+
+    # Walk backwards collecting the indices of the most recent `keep_last`
+    # user-message starts. The tail begins at the earliest such index.
+    user_starts: List[int] = []
+    for idx in range(n - 1, -1, -1):
+        if history[idx].get("role") == "user":
+            user_starts.append(idx)
+            if len(user_starts) >= keep_last:
+                break
+
+    if not user_starts:
+        # No user turns at all (degenerate) — nothing sensible to keep
+        # as a "recent exchange"; treat as full compression.
+        return list(history), []
+
+    boundary = user_starts[-1]  # earliest of the kept user starts
+
+    head = history[:boundary]
+    tail = history[boundary:]
+
+    # If everything is in the tail (nothing left to compress), signal the
+    # caller to fall back to full compression rather than producing a
+    # no-op that rotates the session for no benefit.
+    if not head:
+        return list(history), []
+
+    return head, tail
+
+
+def rejoin_compressed_head_and_tail(
+    compressed_head: List[Dict[str, Any]],
+    tail: List[Dict[str, Any]],
+) -> List[Dict[str, Any]]:
+    """Concatenate a compressed head with the verbatim tail, defending
+    the seam against an illegal user→user / assistant→assistant adjacency.
+
+    In normal operation the compressed head ends with the head's own
+    protected verbatim tail (the ``ContextCompressor`` always preserves a
+    recent window), which terminates on an ``assistant``/``tool`` turn —
+    so ``assistant → user`` at the seam is already valid. But the head
+    compressor's exact output shape is not contractually guaranteed (a
+    plugin context engine could return something that ends on a ``user``
+    turn, or a degenerate single-summary message). Rather than trust the
+    seam, this helper inspects the boundary and, if the last head message
+    and the first tail message share a ``user``/``assistant`` role, folds
+    the tail's first message content onto the head's last message so the
+    rejoined list never violates provider role-alternation rules.
+
+    ``tool`` messages are left alone — consecutive ``tool`` entries are
+    the one legal repetition (parallel tool results).
+    """
+    if not tail:
+        return list(compressed_head)
+    if not compressed_head:
+        return list(tail)
+
+    head = list(compressed_head)
+    rest = list(tail)
+
+    last = head[-1]
+    first = rest[0]
+    last_role = last.get("role")
+    first_role = first.get("role")
+
+    if last_role == first_role and last_role in ("user", "assistant"):
+        # Illegal adjacency. Merge the tail's first message text into the
+        # head's last message so alternation is preserved. Only string
+        # contents are merged inline; structured/multimodal contents fall
+        # back to dropping the redundant standalone (the content is
+        # preserved by concatenation when both are strings).
+        last_content = last.get("content")
+        first_content = first.get("content")
+        if isinstance(last_content, str) and isinstance(first_content, str):
+            merged = dict(last)
+            merged["content"] = f"{last_content}\n\n{first_content}"
+            head[-1] = merged
+            rest = rest[1:]
+        else:
+            # Can't safely string-merge multimodal content. Insert a
+            # minimal bridging turn so the seam alternates rather than
+            # losing data.
+            bridge_role = "assistant" if first_role == "user" else "user"
+            head.append({"role": bridge_role, "content": ""})
+
+    return head + rest
diff --git a/hermes_cli/plugins.py b/hermes_cli/plugins.py
index 854f3d9f309..b904b8a0125 100644
--- a/hermes_cli/plugins.py
+++ b/hermes_cli/plugins.py
@@ -34,7 +34,6 @@ so plugin-defined tools appear alongside the built-in tools.
 from __future__ import annotations
 
 import asyncio
-import importlib
 import importlib.metadata
 import importlib.util
 import inspect
diff --git a/hermes_cli/plugins_cmd.py b/hermes_cli/plugins_cmd.py
index d1b36b4fdc8..ddbd0402f2a 100644
--- a/hermes_cli/plugins_cmd.py
+++ b/hermes_cli/plugins_cmd.py
@@ -10,6 +10,7 @@ rendered with Rich Markdown.  Otherwise a default confirmation is shown.
 from __future__ import annotations
 
 import functools
+import json
 import logging
 import os
 import shutil
@@ -780,7 +781,29 @@ def _discover_all_plugins() -> list:
     return list(seen.values())
 
 
-def cmd_list() -> None:
+def _plugin_status(name: str, enabled: set, disabled: set) -> str:
+    """Return the user-facing activation state for a plugin name."""
+    if name in disabled:
+        return "disabled"
+    if name in enabled:
+        return "enabled"
+    return "not enabled"
+
+
+def _filter_plugin_entries(entries: list, args: Any, enabled: set, disabled: set) -> list:
+    """Apply ``hermes plugins list`` CLI filters."""
+    filtered = entries
+    if getattr(args, "no_bundled", False) or getattr(args, "user", False):
+        filtered = [entry for entry in filtered if entry[3] != "bundled"]
+    if getattr(args, "enabled", False):
+        filtered = [
+            entry for entry in filtered
+            if _plugin_status(entry[0], enabled, disabled) == "enabled"
+        ]
+    return filtered
+
+
+def cmd_list(args: Any | None = None) -> None:
     """List all plugins (bundled + user) with enabled/disabled state."""
     from rich.console import Console
     from rich.table import Table
@@ -794,6 +817,31 @@ def cmd_list() -> None:
 
     enabled = _get_enabled_set()
     disabled = _get_disabled_set()
+    entries = _filter_plugin_entries(entries, args, enabled, disabled)
+
+    if getattr(args, "json", False):
+        payload = [
+            {
+                "name": name,
+                "status": _plugin_status(name, enabled, disabled),
+                "version": str(version),
+                "description": description,
+                "source": source,
+            }
+            for name, version, description, source, _dir in entries
+        ]
+        print(json.dumps(payload, indent=2))
+        return
+
+    if getattr(args, "plain", False):
+        for name, version, _description, source, _dir in entries:
+            status = _plugin_status(name, enabled, disabled)
+            print(f"{status:12} {source:8} {str(version):8} {name}")
+        return
+
+    if not entries:
+        console.print("[dim]No plugins matched the selected filters.[/dim]")
+        return
 
     table = Table(title="Plugins", show_lines=False)
     table.add_column("Name", style="bold")
@@ -803,9 +851,10 @@ def cmd_list() -> None:
     table.add_column("Source", style="dim")
 
     for name, version, description, source, _dir in entries:
-        if name in disabled:
+        status_name = _plugin_status(name, enabled, disabled)
+        if status_name == "disabled":
             status = "[red]disabled[/red]"
-        elif name in enabled:
+        elif status_name == "enabled":
             status = "[green]enabled[/green]"
         else:
             status = "[yellow]not enabled[/yellow]"
@@ -814,6 +863,7 @@ def cmd_list() -> None:
     console.print()
     console.print(table)
     console.print()
+    console.print("[dim]Compact view:[/dim] hermes plugins list --plain --no-bundled")
     console.print("[dim]Interactive toggle:[/dim] hermes plugins")
     console.print("[dim]Enable/disable:[/dim] hermes plugins enable/disable <name>")
     console.print("[dim]Plugins are opt-in by default — only 'enabled' plugins load.[/dim]")
@@ -834,12 +884,35 @@ def _discover_memory_providers() -> list[tuple[str, str]]:
 
 
 def _discover_context_engines() -> list[tuple[str, str]]:
-    """Return [(name, description), ...] for available context engines."""
+    """Return [(name, description), ...] for available context engines.
+
+    Includes repo-shipped engines from ``plugins/context_engine/`` AND
+    plugin-registered engines (third-party engines installed as Hermes
+    plugins via ``ctx.register_context_engine``). Repo-shipped descriptions
+    win when a plugin-registered engine collides on name.
+    """
+    engines: list[tuple[str, str]] = []
+    seen: set[str] = set()
+
     try:
         from plugins.context_engine import discover_context_engines
-        return [(name, desc) for name, desc, _avail in discover_context_engines()]
+        for name, desc, _avail in discover_context_engines():
+            if name not in seen:
+                engines.append((name, desc))
+                seen.add(name)
     except Exception:
-        return []
+        pass
+
+    try:
+        from hermes_cli.plugins import discover_plugins, get_plugin_context_engine
+        discover_plugins()
+        plugin_engine = get_plugin_context_engine()
+        if plugin_engine and getattr(plugin_engine, "name", None) and plugin_engine.name not in seen:
+            engines.append((plugin_engine.name, "installed plugin"))
+    except Exception:
+        pass
+
+    return engines
 
 
 def _get_current_memory_provider() -> str:
@@ -1057,7 +1130,7 @@ def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
                 stdscr.addnstr(0, 0, "Plugins", max_x - 1, hattr)
                 stdscr.addnstr(
                     1, 0,
-                    "  \u2191\u2193 navigate  SPACE toggle  ENTER configure/confirm  ESC done",
+                    "  ↑↓/j/k navigate  PgUp/PgDn page  SPACE toggle  ENTER configure/confirm  ESC done",
                     max_x - 1, curses.A_DIM,
                 )
             except curses.error:
@@ -1097,7 +1170,9 @@ def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
                         pass
                     y += 1
 
-                for i in range(n_plugins):
+                plugin_start = scroll_offset
+                plugin_stop = min(n_plugins, scroll_offset + max(visible_rows, 0))
+                for i in range(plugin_start, plugin_stop):
                     if y >= max_y - 1:
                         break
                     check = "\u2713" if i in chosen else " "
@@ -1155,6 +1230,16 @@ def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
             elif key in {curses.KEY_DOWN, ord("j")}:
                 if total_items > 0:
                     cursor = (cursor + 1) % total_items
+            elif key in {curses.KEY_NPAGE, ord("f")}:
+                if total_items > 0:
+                    cursor = min(total_items - 1, cursor + max(1, max_y - 5))
+            elif key in {curses.KEY_PPAGE, ord("b")}:
+                if total_items > 0:
+                    cursor = max(0, cursor - max(1, max_y - 5))
+            elif key == curses.KEY_HOME:
+                cursor = 0
+            elif key == curses.KEY_END:
+                cursor = max(0, total_items - 1)
             elif key == ord(" "):
                 if cursor < n_plugins:
                     # Toggle general plugin
@@ -1596,7 +1681,7 @@ def plugins_command(args) -> None:
     elif action == "disable":
         cmd_disable(args.name)
     elif action in {"list", "ls"}:
-        cmd_list()
+        cmd_list(args)
     elif action is None:
         cmd_toggle()
     else:
diff --git a/hermes_cli/portal_cli.py b/hermes_cli/portal_cli.py
index aa658e41d21..bf257ae7253 100644
--- a/hermes_cli/portal_cli.py
+++ b/hermes_cli/portal_cli.py
@@ -13,7 +13,6 @@ from __future__ import annotations
 
 import sys
 import webbrowser
-from typing import Optional
 
 from hermes_cli.colors import Colors, color
 from hermes_cli.config import load_config
@@ -23,19 +22,6 @@ SUBSCRIPTION_URL = "https://portal.nousresearch.com/manage-subscription"
 DOCS_URL = "https://hermes-agent.nousresearch.com/docs/user-guide/features/tool-gateway"
 
 
-def _nous_portal_base_url() -> str:
-    """Resolve the Portal base URL from auth state or default."""
-    try:
-        from hermes_cli.auth import get_nous_auth_status
-        status = get_nous_auth_status() or {}
-        url = status.get("portal_base_url")
-        if isinstance(url, str) and url.strip():
-            return url.rstrip("/")
-    except Exception:
-        pass
-    return DEFAULT_PORTAL_URL
-
-
 def _cmd_status(args) -> int:
     """Show Portal auth + Tool Gateway routing summary."""
     from hermes_cli.auth import get_nous_auth_status
diff --git a/hermes_cli/profile_describer.py b/hermes_cli/profile_describer.py
index 0da67e8a3d3..f80d1f5451e 100644
--- a/hermes_cli/profile_describer.py
+++ b/hermes_cli/profile_describer.py
@@ -28,7 +28,6 @@ from __future__ import annotations
 
 import json
 import logging
-import os
 import re
 from dataclasses import dataclass
 from pathlib import Path
diff --git a/hermes_cli/profiles.py b/hermes_cli/profiles.py
index ec315c7fdb1..f490cbbfb99 100644
--- a/hermes_cli/profiles.py
+++ b/hermes_cli/profiles.py
@@ -329,16 +329,19 @@ def check_alias_collision(name: str) -> Optional[str]:
 
     # Check existing commands in PATH
     wrapper_dir = _get_wrapper_dir()
+    is_windows = sys.platform == "win32"
     try:
         result = subprocess.run(
-            ["which", canon], capture_output=True, text=True, timeout=5,
+            ["where" if is_windows else "which", canon],
+            capture_output=True, text=True, timeout=5,
         )
         if result.returncode == 0:
-            existing_path = result.stdout.strip()
+            existing_path = result.stdout.strip().splitlines()[0]
             # Allow overwriting our own wrappers
-            if existing_path == str(wrapper_dir / canon):
+            expected = wrapper_dir / (f"{canon}.bat" if is_windows else canon)
+            if existing_path == str(expected):
                 try:
-                    content = (wrapper_dir / canon).read_text()
+                    content = expected.read_text()
                     if "hermes -p" in content:
                         return None  # it's our wrapper, safe to overwrite
                 except Exception:
@@ -356,12 +359,18 @@ def _is_wrapper_dir_in_path() -> bool:
     return wrapper_dir in os.environ.get("PATH", "").split(os.pathsep)
 
 
-def create_wrapper_script(name: str) -> Optional[Path]:
+def create_wrapper_script(name: str, target: Optional[str] = None) -> Optional[Path]:
     """Create a shell wrapper script at ~/.local/bin/<name>.
 
+    The wrapper file is named after ``name`` (the alias). The profile it
+    activates is ``target`` if given, otherwise ``name`` — this lets a custom
+    alias name point at a differently-named profile without a post-hoc rewrite.
+
+    On Windows, creates a ``.bat`` file instead of a POSIX shell script.
     Returns the path to the created wrapper, or None if creation failed.
     """
     canon = normalize_profile_name(name)
+    profile = normalize_profile_name(target) if target else canon
     wrapper_dir = _get_wrapper_dir()
     try:
         wrapper_dir.mkdir(parents=True, exist_ok=True)
@@ -369,28 +378,47 @@ def create_wrapper_script(name: str) -> Optional[Path]:
         print(f"⚠ Could not create {wrapper_dir}: {e}")
         return None
 
-    wrapper_path = wrapper_dir / canon
-    try:
-        wrapper_path.write_text(f'#!/bin/sh\nexec hermes -p {canon} "$@"\n')
-        wrapper_path.chmod(wrapper_path.stat().st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH)
-        return wrapper_path
-    except OSError as e:
-        print(f"⚠ Could not create wrapper at {wrapper_path}: {e}")
-        return None
+    is_windows = sys.platform == "win32"
+    if is_windows:
+        wrapper_path = wrapper_dir / f"{canon}.bat"
+        try:
+            wrapper_path.write_text(f"@echo off\r\nhermes -p {profile} %*\r\n")
+            return wrapper_path
+        except OSError as e:
+            print(f"⚠ Could not create wrapper at {wrapper_path}: {e}")
+            return None
+    else:
+        wrapper_path = wrapper_dir / canon
+        try:
+            wrapper_path.write_text(f'#!/bin/sh\nexec hermes -p {profile} "$@"\n')
+            wrapper_path.chmod(wrapper_path.stat().st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH)
+            return wrapper_path
+        except OSError as e:
+            print(f"⚠ Could not create wrapper at {wrapper_path}: {e}")
+            return None
 
 
 def remove_wrapper_script(name: str) -> bool:
     """Remove the wrapper script for a profile. Returns True if removed."""
-    wrapper_path = _get_wrapper_dir() / normalize_profile_name(name)
-    if wrapper_path.exists():
-        try:
-            # Verify it's our wrapper before removing
-            content = wrapper_path.read_text()
-            if "hermes -p" in content:
-                wrapper_path.unlink()
-                return True
-        except Exception:
-            pass
+    wrapper_dir = _get_wrapper_dir()
+    canon = normalize_profile_name(name)
+    is_windows = sys.platform == "win32"
+
+    # Check both the extensionless path (POSIX) and .bat (Windows)
+    candidates = [wrapper_dir / canon]
+    if is_windows:
+        candidates.insert(0, wrapper_dir / f"{canon}.bat")
+
+    for wrapper_path in candidates:
+        if wrapper_path.exists():
+            try:
+                # Verify it's our wrapper before removing
+                content = wrapper_path.read_text()
+                if "hermes -p" in content:
+                    wrapper_path.unlink()
+                    return True
+            except Exception:
+                pass
     return False
 
 
@@ -940,7 +968,6 @@ def delete_profile(name: str, yes: bool = False) -> Path:
             ``sys.exc_info()`` tuple).
             """
             import stat as _stat
-            import sys as _sys
 
             # Normalise the two callback signatures:
             #   onexc(func, path, exc_instance)   — 3.12+
diff --git a/hermes_cli/proxy/adapters/base.py b/hermes_cli/proxy/adapters/base.py
index db778e18fa9..65107b6f180 100644
--- a/hermes_cli/proxy/adapters/base.py
+++ b/hermes_cli/proxy/adapters/base.py
@@ -69,11 +69,11 @@ class UpstreamAdapter(ABC):
 
     @abstractmethod
     def get_credential(self) -> UpstreamCredential:
-        """Return a fresh credential, refreshing/minting if necessary.
+        """Return a fresh credential, refreshing or rotating if necessary.
 
         Implementations should:
           - refresh the access token if it's near expiry
-          - mint/rotate the upstream bearer key if it's near expiry
+          - rotate the upstream bearer key if it's near expiry
           - persist any refreshed state back to disk
 
         Raises:
@@ -90,8 +90,7 @@ class UpstreamAdapter(ABC):
         """Return an alternate credential after an upstream auth failure.
 
         The default is no retry. Providers can override this for one-shot
-        fallback paths, such as switching from a preferred token type to a
-        legacy bearer after the upstream rejects the first request.
+        fallback paths after the upstream rejects the first request.
         """
         _ = failed_credential, status_code
         return None
diff --git a/hermes_cli/proxy/adapters/nous_portal.py b/hermes_cli/proxy/adapters/nous_portal.py
index 57c0a8824cf..4759d8dd22b 100644
--- a/hermes_cli/proxy/adapters/nous_portal.py
+++ b/hermes_cli/proxy/adapters/nous_portal.py
@@ -1,13 +1,8 @@
 """Nous Portal upstream adapter.
 
 Reads the user's Nous OAuth state from ``~/.hermes/auth.json`` through the
-shared runtime resolver, refreshes the access token and resolves the
-``agent_key`` compatibility credential when needed, then exposes the upstream
-base URL plus bearer for the proxy server to forward to.
-
-The ``agent_key`` field may hold either a NAS invoke JWT or the legacy
-opaque session key. The refresh helper handles both — see
-:func:`hermes_cli.auth.resolve_nous_runtime_credentials`.
+shared runtime resolver, validates or refreshes the inference JWT, then exposes
+the upstream base URL plus bearer for the proxy server to forward to.
 """
 
 from __future__ import annotations
@@ -19,8 +14,6 @@ from typing import Any, Dict, FrozenSet, Optional
 from hermes_cli.auth import (
     AuthError,
     DEFAULT_NOUS_INFERENCE_URL,
-    NOUS_INFERENCE_AUTH_MODE_AUTO,
-    NOUS_INFERENCE_AUTH_MODE_LEGACY,
     _load_auth_store,
     _auth_store_lock,
     _is_terminal_nous_refresh_error,
@@ -72,17 +65,15 @@ class NousPortalAdapter(UpstreamAdapter):
         state = self._read_state()
         if state is None:
             return False
-        # We need either a usable agent_key OR (refresh_token + access_token)
-        # to recover. The refresh helper will mint/refresh as needed.
+        # We need either a usable inference JWT OR (refresh_token + access_token)
+        # to recover. The refresh helper validates and refreshes as needed.
         return bool(
             state.get("agent_key")
             or (state.get("refresh_token") and state.get("access_token"))
         )
 
     def get_credential(self) -> UpstreamCredential:
-        return self._get_credential(
-            inference_auth_mode=NOUS_INFERENCE_AUTH_MODE_AUTO,
-        )
+        return self._get_credential()
 
     def get_retry_credential(
         self,
@@ -90,16 +81,19 @@ class NousPortalAdapter(UpstreamAdapter):
         failed_credential: UpstreamCredential,
         status_code: int,
     ) -> Optional[UpstreamCredential]:
+        _ = failed_credential
         if status_code != 401:
             return None
-        if failed_credential.bearer.count(".") != 2:
-            return None
-        logger.info("proxy: Nous upstream rejected bearer; retrying with legacy session key")
+        logger.info("proxy: Nous upstream rejected bearer; force-refreshing invoke JWT")
         return self._get_credential(
-            inference_auth_mode=NOUS_INFERENCE_AUTH_MODE_LEGACY,
+            force_refresh=True,
         )
 
-    def _get_credential(self, *, inference_auth_mode: str) -> UpstreamCredential:
+    def _get_credential(
+        self,
+        *,
+        force_refresh: bool = False,
+    ) -> UpstreamCredential:
         with self._lock:
             state = self._read_state()
             if state is None:
@@ -109,7 +103,7 @@ class NousPortalAdapter(UpstreamAdapter):
 
             try:
                 refreshed = resolve_nous_runtime_credentials(
-                    inference_auth_mode=inference_auth_mode,
+                    force_refresh=force_refresh,
                 )
             except AuthError as exc:
                 if _is_terminal_nous_refresh_error(exc):
@@ -131,10 +125,10 @@ class NousPortalAdapter(UpstreamAdapter):
                     f"Failed to refresh Nous Portal credentials: {exc}"
                 ) from exc
 
-            agent_key = refreshed.get("api_key")
-            if not agent_key:
+            runtime_key = refreshed.get("api_key")
+            if not runtime_key:
                 raise RuntimeError(
-                    "Nous Portal refresh did not return a usable agent_key. "
+                    "Nous Portal refresh did not return a usable inference JWT. "
                     "Try `hermes auth add nous` to re-authenticate."
                 )
 
@@ -145,7 +139,7 @@ class NousPortalAdapter(UpstreamAdapter):
             base_url = base_url.rstrip("/")
 
             return UpstreamCredential(
-                bearer=agent_key,
+                bearer=runtime_key,
                 base_url=base_url,
                 expires_at=refreshed.get("expires_at"),
             )
diff --git a/hermes_cli/proxy/adapters/xai.py b/hermes_cli/proxy/adapters/xai.py
index 30a640df750..d85db8630ab 100644
--- a/hermes_cli/proxy/adapters/xai.py
+++ b/hermes_cli/proxy/adapters/xai.py
@@ -79,7 +79,7 @@ class XAIGrokAdapter(UpstreamAdapter):
         failed_credential: UpstreamCredential,
         status_code: int,
     ) -> Optional[UpstreamCredential]:
-        if status_code != 401:
+        if status_code not in {401, 429}:
             return None
 
         with self._lock:
@@ -87,16 +87,25 @@ class XAIGrokAdapter(UpstreamAdapter):
             if pool is None:
                 return None
 
-            refreshed = pool.try_refresh_current()
-            if refreshed is None:
+            if status_code == 429:
+                # Mark the rate-limited key with its 1-hour cooldown and rotate
+                # to the next available credential. Returns None when the pool
+                # has no other key to offer — the 429 will flow back to the client.
                 refreshed = pool.mark_exhausted_and_rotate(status_code=status_code)
+            else:
+                refreshed = pool.try_refresh_current()
+                if refreshed is None:
+                    refreshed = pool.mark_exhausted_and_rotate(status_code=status_code)
             if refreshed is None:
                 return None
 
             retry_cred = self._credential_from_entry(refreshed)
             if retry_cred.bearer == failed_credential.bearer:
                 return None
-            logger.info("proxy: xAI upstream rejected bearer; retrying with refreshed pool credential")
+            logger.info(
+                "proxy: xAI upstream returned %s; retrying with rotated pool credential",
+                status_code,
+            )
             return retry_cred
 
     def _load_pool(self) -> Optional[CredentialPool]:
diff --git a/hermes_cli/proxy/server.py b/hermes_cli/proxy/server.py
index a72f75d67ee..17e8615fcbd 100644
--- a/hermes_cli/proxy/server.py
+++ b/hermes_cli/proxy/server.py
@@ -12,7 +12,6 @@ or rewrite request/response bodies. It's a credential-attaching forwarder.
 from __future__ import annotations
 
 import asyncio
-import json
 import logging
 import signal
 from typing import Optional
@@ -105,17 +104,6 @@ def create_app(adapter: UpstreamAdapter) -> "web.Application":
             }
         )
 
-    async def handle_models_fallback(request: "web.Request") -> "web.Response":
-        # Most clients hit /v1/models on startup. If the upstream doesn't
-        # serve /models, synthesize a minimal response so clients don't
-        # crash. The actual forwarding path handles /models when allowed.
-        return web.json_response(
-            {
-                "object": "list",
-                "data": [],
-            }
-        )
-
     async def handle_proxy(request: "web.Request") -> "web.StreamResponse":
         # Extract the path *after* /v1
         rel_path = request.match_info.get("tail", "")
@@ -206,7 +194,7 @@ def create_app(adapter: UpstreamAdapter) -> "web.Application":
             return session_or_response
         session = session_or_response
 
-        if upstream_resp.status == 401:
+        if upstream_resp.status in {401, 429}:
             try:
                 retry_cred = adapter.get_retry_credential(
                     failed_credential=cred,
diff --git a/hermes_cli/psutil_android.py b/hermes_cli/psutil_android.py
new file mode 100644
index 00000000000..c029324542c
--- /dev/null
+++ b/hermes_cli/psutil_android.py
@@ -0,0 +1,108 @@
+"""Helpers for the temporary psutil-on-Android compatibility installer."""
+
+from __future__ import annotations
+
+import shutil
+import tarfile
+from pathlib import Path, PurePosixPath
+
+# Pin a version we know patches cleanly. Update when a newer psutil
+# changes the marker line shape and we need to follow upstream.
+PSUTIL_URL = (
+    "https://files.pythonhosted.org/packages/aa/c6/"
+    "d1ddf4abb55e93cebc4f2ed8b5d6dbad109ecb8d63748dd2b20ab5e57ebe/"
+    "psutil-7.2.2.tar.gz"
+)
+
+MARKER = 'LINUX = sys.platform.startswith("linux")'
+REPLACEMENT = 'LINUX = sys.platform.startswith(("linux", "android"))'
+
+
+class PsutilAndroidInstallError(RuntimeError):
+    """Raised when the pinned psutil sdist is missing or unsafe."""
+
+
+def _normalize_member_parts(member_name: str) -> tuple[str, ...]:
+    path = PurePosixPath(member_name)
+    parts = tuple(part for part in path.parts if part not in ("", "."))
+    if path.is_absolute() or ".." in parts or not parts:
+        raise PsutilAndroidInstallError(
+            f"Unsafe archive member path: {member_name!r}"
+        )
+    return parts
+
+
+def _safe_extract_tar_gz(archive: Path, destination: Path) -> None:
+    """Extract a tar.gz without allowing traversal or link members."""
+    with tarfile.open(archive, "r:gz") as tf:
+        for member in tf.getmembers():
+            parts = _normalize_member_parts(member.name)
+            target = destination.joinpath(*parts)
+
+            if member.isdir():
+                target.mkdir(parents=True, exist_ok=True)
+                continue
+
+            if not member.isfile():
+                raise PsutilAndroidInstallError(
+                    f"Unsupported archive member type: {member.name}"
+                )
+
+            target.parent.mkdir(parents=True, exist_ok=True)
+            extracted = tf.extractfile(member)
+            if extracted is None:
+                raise PsutilAndroidInstallError(
+                    f"Cannot read archive member: {member.name}"
+                )
+
+            with extracted, open(target, "wb") as dst:
+                shutil.copyfileobj(extracted, dst)
+
+            try:
+                target.chmod(member.mode & 0o777)
+            except OSError:
+                pass
+
+
+def prepare_patched_psutil_sdist(archive: Path, destination: Path) -> Path:
+    """Safely extract the pinned psutil sdist and patch it for Android."""
+    _safe_extract_tar_gz(archive, destination)
+
+    src_roots = sorted(
+        (
+            path for path in destination.iterdir()
+            if path.is_dir() and path.name.startswith("psutil-")
+        ),
+        key=lambda path: path.name,
+    )
+    if not src_roots:
+        raise PsutilAndroidInstallError(
+            "psutil sdist did not contain a psutil-* directory"
+        )
+
+    src_root = src_roots[0]
+    common_py = src_root / "psutil" / "_common.py"
+    if not common_py.is_file():
+        raise PsutilAndroidInstallError(
+            f"psutil sdist did not contain {common_py.relative_to(src_root)!s}"
+        )
+    try:
+        content = common_py.read_text(encoding="utf-8")
+    except OSError as exc:
+        raise PsutilAndroidInstallError(
+            f"Failed to read {common_py.relative_to(src_root)!s}"
+        ) from exc
+    if MARKER not in content:
+        raise PsutilAndroidInstallError(
+            "psutil Android compatibility patch marker not found"
+        )
+    try:
+        common_py.write_text(
+            content.replace(MARKER, REPLACEMENT),
+            encoding="utf-8",
+        )
+    except OSError as exc:
+        raise PsutilAndroidInstallError(
+            f"Failed to write {common_py.relative_to(src_root)!s}"
+        ) from exc
+    return src_root
diff --git a/hermes_cli/pt_input_extras.py b/hermes_cli/pt_input_extras.py
index 008c931cfb7..16a0f17ea43 100644
--- a/hermes_cli/pt_input_extras.py
+++ b/hermes_cli/pt_input_extras.py
@@ -81,3 +81,40 @@ def install_ctrl_enter_alias() -> int:
             ANSI_SEQUENCES[seq] = alt_enter
             changed += 1
     return changed
+
+
+def install_ignored_terminal_sequences() -> int:
+    """Map terminal-emitted noise sequences to ``Keys.Ignore`` so they
+    are consumed by the VT100 parser before they reach key bindings or
+    the input buffer.
+
+    Currently covers focus reports:
+      - ``\\x1b[I`` — terminal regained focus (focus in)
+      - ``\\x1b[O`` — terminal lost focus (focus out)
+
+    Ghostty, iTerm2, and some xterm builds can emit these sequences when
+    the user switches tabs / windows or when a multiplexer toggles focus
+    tracking upstream. prompt_toolkit does not map these by default, so
+    its parser falls back to literal key presses (ESC, ``[``, ``I``/``O``)
+    and inserts ``[I``/``[O`` into the prompt buffer after the ESC byte
+    is handled.
+
+    Registering them as ``Keys.Ignore`` is parser-level — strictly
+    cleaner than post-hoc regex stripping in the input sanitizer because
+    the bytes never reach the buffer. ``setdefault`` is used so any user
+    or downstream registration wins.
+
+    Returns the number of sequences whose mapping was changed.
+    """
+    try:
+        from prompt_toolkit.input.ansi_escape_sequences import ANSI_SEQUENCES
+        from prompt_toolkit.keys import Keys
+    except Exception:
+        return 0
+
+    changed = 0
+    for seq in ("\x1b[I", "\x1b[O"):
+        if seq not in ANSI_SEQUENCES:
+            ANSI_SEQUENCES[seq] = Keys.Ignore
+            changed += 1
+    return changed
diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py
index c40316e02cc..1edb8e99e47 100644
--- a/hermes_cli/runtime_provider.py
+++ b/hermes_cli/runtime_provider.py
@@ -1115,14 +1115,20 @@ def _resolve_explicit_runtime(
             explicit_base_url
             or str(state.get("inference_base_url") or auth_mod.DEFAULT_NOUS_INFERENCE_URL).strip().rstrip("/")
         )
-        # Only use the agent_key compatibility field for inference. It may be
-        # either a NAS invoke JWT or a legacy opaque session key; raw OAuth
-        # access_token fallback is handled by resolve_nous_runtime_credentials().
-        api_key = explicit_api_key or str(state.get("agent_key") or "").strip()
+        # Only use the agent_key compatibility field for inference when it
+        # contains a NAS invoke JWT; raw OAuth access_token fallback is handled
+        # by resolve_nous_runtime_credentials().
+        api_key = explicit_api_key or (
+            str(state.get("agent_key") or "").strip()
+            if _agent_key_is_usable(
+                state,
+                max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
+            )
+            else ""
+        )
         expires_at = state.get("agent_key_expires_at") or state.get("expires_at")
         if not api_key:
             creds = resolve_nous_runtime_credentials(
-                min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
                 timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
             )
             api_key = creds.get("api_key", "")
@@ -1309,12 +1315,11 @@ def resolve_runtime_provider(
                 or getattr(entry, "access_token", "")
             )
         # For Nous, the pool entry's runtime_api_key is the agent_key
-        # compatibility field: either an invoke JWT or legacy opaque key.
-        # The pool doesn't
+        # compatibility field. It must be an invoke JWT. The pool doesn't
         # refresh it during selection (that would trigger network calls in
         # non-runtime contexts like `hermes auth list`).  If the key is
         # expired, clear pool_api_key so we fall through to
-        # resolve_nous_runtime_credentials() which handles refresh + fallback.
+        # resolve_nous_runtime_credentials() which handles refresh.
         if provider == "nous" and entry is not None and pool_api_key:
             min_ttl = max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800")))
             nous_state = {
@@ -1338,7 +1343,6 @@ def resolve_runtime_provider(
     if provider == "nous":
         try:
             creds = resolve_nous_runtime_credentials(
-                min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
                 timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
             )
             return {
diff --git a/hermes_cli/secrets_cli.py b/hermes_cli/secrets_cli.py
index fafb37f576a..5ef8b15aef2 100644
--- a/hermes_cli/secrets_cli.py
+++ b/hermes_cli/secrets_cli.py
@@ -14,9 +14,8 @@ import argparse
 import json
 import os
 import subprocess
-import sys
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import List, Optional
 
 from rich.console import Console
 from rich.panel import Panel
diff --git a/hermes_cli/security_advisories.py b/hermes_cli/security_advisories.py
index 311383eab4d..ef058a5a422 100644
--- a/hermes_cli/security_advisories.py
+++ b/hermes_cli/security_advisories.py
@@ -36,7 +36,7 @@ from __future__ import annotations
 import logging
 import os
 import sys
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from pathlib import Path
 from typing import Iterable, Optional
 
@@ -104,7 +104,9 @@ ADVISORIES: tuple[Advisory, ...] = (
             "them to a hardcoded webhook. If you ran any Python process that "
             "imported mistralai 2.4.6 — including hermes when configured "
             "with provider=mistral for TTS or STT — assume those credentials "
-            "are exposed."
+            "are exposed. PyPI has since removed 2.4.6 and the project ships "
+            "clean releases again (2.4.7, 2.4.8); this advisory only fires if "
+            "the compromised 2.4.6 is still installed."
         ),
         url="https://socket.dev/blog/mini-shai-hulud-worm-pypi",
         compromised=(
diff --git a/hermes_cli/security_audit.py b/hermes_cli/security_audit.py
index 82d414e0b23..f29c61f3332 100644
--- a/hermes_cli/security_audit.py
+++ b/hermes_cli/security_audit.py
@@ -28,7 +28,7 @@ import urllib.error
 import urllib.request
 from dataclasses import dataclass, field
 from pathlib import Path
-from typing import Any, Iterable, Optional
+from typing import Iterable, Optional
 
 from hermes_constants import get_hermes_home
 
diff --git a/hermes_cli/service_manager.py b/hermes_cli/service_manager.py
index b894ca764a8..1d0ce5d0d72 100644
--- a/hermes_cli/service_manager.py
+++ b/hermes_cli/service_manager.py
@@ -566,8 +566,11 @@ class S6ServiceManager:
           1. Sources HERMES_HOME (and any extra env) via with-contenv —
              so e.g. ``-e HERMES_HOME=/data/hermes`` is honored at run
              time, not Python-substituted at registration time (OQ8-C).
-          2. Activates the bundled venv.
-          3. Drops to the hermes user and exec's
+          2. Resets ``HOME`` to ``/opt/data`` before the privilege drop
+             so with-contenv's root HOME does not leak into the
+             unprivileged gateway process.
+          3. Activates the bundled venv.
+          4. Drops to the hermes user and exec's
              ``hermes -p <profile> gateway run`` (or just ``hermes
              gateway run`` for the default profile — see below).
 
@@ -597,6 +600,7 @@ class S6ServiceManager:
             "#!/command/with-contenv sh",
             "# shellcheck shell=sh",
             "set -e",
+            "export HOME=/opt/data",
             "cd /opt/data",
             ". /opt/hermes/.venv/bin/activate",
         ]
@@ -628,6 +632,38 @@ class S6ServiceManager:
         — so a container started with ``-e HERMES_HOME=/data/hermes``
         gets its logs under /data/hermes/logs/..., not the build-time
         default.
+
+        Output routing — the script is two action directives, applied
+        per line, in order:
+
+          1. ``1`` (forward to stdout) — propagates the line up the
+             s6-supervise pipeline to /init's stdout, which is the
+             container's stdout, which is ``docker logs``. Without
+             this, supervised stdout would be terminated inside
+             s6-log and never reach the container's log stream;
+             users would have to ``docker exec`` and ``tail`` the
+             file just to see startup banners. (Python's ``logging``
+             module defaults to stderr, which s6-supervise leaves
+             unfiltered — so warnings/errors already reach docker
+             logs. This change is specifically about the rich-console
+             banner output and other plain stdout writes.)
+          2. ``T <log_dir>`` — also write a timestamped copy to the
+             rotated log directory (``current`` + archived ``@*.s``
+             files). This is what ``hermes logs`` reads and what
+             persists across container restarts via the volume mount.
+
+        ``T`` is non-sticky: it only prefixes lines for the next
+        action directive. We deliberately put ``T`` between ``1``
+        and the log dir (not before ``1``) so:
+
+          * ``docker logs`` shows raw lines — Python's logging
+            formatter has its own timestamps, and ``docker logs
+            --timestamps`` adds a third layer when desired. No
+            double-stamping in the most common reading path.
+          * The persisted file gets s6-log's own ISO 8601 timestamp
+            so even output that lacked a Python-logger timestamp
+            (rich banners, third-party libs' raw prints) is
+            correlatable in ``current``.
         """
         import shlex
         prof = shlex.quote(profile)
@@ -638,7 +674,7 @@ class S6ServiceManager:
             f'log_dir="$HERMES_HOME/logs/gateways/{prof}"\n'
             f'mkdir -p "$log_dir"\n'
             f'chown -R hermes:hermes "$log_dir" 2>/dev/null || true\n'
-            f'exec s6-setuidgid hermes s6-log n10 s1000000 T "$log_dir"\n'
+            f'exec s6-setuidgid hermes s6-log 1 n10 s1000000 T "$log_dir"\n'
         )
 
     # -- lifecycle ---------------------------------------------------------
diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index d5f77d4c540..fe650918705 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -12,7 +12,6 @@ Config files are stored in ~/.hermes/ for easy access.
 """
 
 import importlib.util
-import json
 import logging
 import os
 import re
@@ -455,22 +454,25 @@ def _print_setup_summary(config: dict, hermes_home):
     # Video generation — opt-in via `hermes tools` → Video Generation.
     # Only show the row when a plugin reports available so we don't badger
     # users who don't care about video gen with a "missing" status line.
-    try:
-        from agent.video_gen_registry import list_providers as _list_video_providers
-        from hermes_cli.plugins import _ensure_plugins_discovered as _ensure_plugins
-        _ensure_plugins()
-        _video_backend = None
-        for _vp in _list_video_providers():
-            try:
-                if _vp.is_available():
-                    _video_backend = _vp.display_name
-                    break
-            except Exception:
-                continue
-    except Exception:
-        _video_backend = None
-    if _video_backend:
-        tool_status.append((f"Video Generation ({_video_backend})", True, None))
+    if subscription_features.video_gen.managed_by_nous:
+        tool_status.append(("Video Generation (FAL via Nous subscription)", True, None))
+    else:
+        try:
+            from agent.video_gen_registry import list_providers as _list_video_providers
+            from hermes_cli.plugins import _ensure_plugins_discovered as _ensure_plugins
+            _ensure_plugins()
+            _video_backend = None
+            for _vp in _list_video_providers():
+                try:
+                    if _vp.is_available():
+                        _video_backend = _vp.display_name
+                        break
+                except Exception:
+                    continue
+        except Exception:
+            _video_backend = None
+        if _video_backend:
+            tool_status.append((f"Video Generation ({_video_backend})", True, None))
 
     # TTS — show configured provider
     tts_provider = cfg_get(config, "tts", "provider", default="edge")
@@ -781,7 +783,6 @@ def setup_model_provider(config: dict, *, quick: bool = False):
                         timeout=15.0,
                         insecure=False,
                         ca_bundle=None,
-                        min_key_ttl_seconds=5 * 60,
                     )
                 )
                 pool = load_pool(selected_provider)
@@ -2974,7 +2975,6 @@ def _run_portal_one_shot(config: dict) -> None:
             timeout=None,
             insecure=False,
             ca_bundle=None,
-            min_key_ttl_seconds=5 * 60,
         )
         try:
             auth_add_command(ns)
diff --git a/hermes_cli/skills_hub.py b/hermes_cli/skills_hub.py
index b617b69f384..4fe2a4dc7d8 100644
--- a/hermes_cli/skills_hub.py
+++ b/hermes_cli/skills_hub.py
@@ -58,7 +58,9 @@ def _resolve_short_name(name: str, sources, console: Console) -> str:
         table = Table()
         table.add_column("Source", style="dim")
         table.add_column("Trust", style="dim")
-        table.add_column("Identifier", style="bold cyan")
+        # overflow="fold" keeps the full slug visible (wraps instead of ellipsis-truncating)
+        # so users can copy it for `hermes skills install`.
+        table.add_column("Identifier", style="bold cyan", overflow="fold", no_wrap=False)
         for r in exact:
             trust_style = {"builtin": "bright_cyan", "trusted": "green", "community": "yellow"}.get(r.trust_level, "dim")
             trust_label = "official" if r.source == "official" else r.trust_level
@@ -244,15 +246,39 @@ def _prompt_for_category(c: Console, existing: List[str]) -> str:
 
 
 def do_search(query: str, source: str = "all", limit: int = 10,
-              console: Optional[Console] = None) -> None:
-    """Search registries and display results as a Rich table."""
+              console: Optional[Console] = None, as_json: bool = False) -> None:
+    """Search registries and display results as a Rich table.
+
+    When ``as_json=True`` writes a JSON array of result records to stdout
+    (one object per skill: ``name``, ``identifier``, ``source``,
+    ``trust_level``, ``description``) and skips the table render. This is
+    the scripting / copy-paste handle: the full identifier is always
+    intact, even for browse-sh slugs that the table would otherwise wrap.
+    """
     from tools.skills_hub import GitHubAuth, create_source_router, unified_search
 
     c = console or _console
-    c.print(f"\n[bold]Searching for:[/] {query}")
 
     auth = GitHubAuth()
     sources = create_source_router(auth)
+    if as_json:
+        # Avoid Rich status spinner contaminating stdout — JSON consumers
+        # expect a clean parseable stream.
+        results = unified_search(query, sources, source_filter=source, limit=limit)
+        payload = [
+            {
+                "name": r.name,
+                "identifier": r.identifier,
+                "source": r.source,
+                "trust_level": r.trust_level,
+                "description": r.description,
+            }
+            for r in results
+        ]
+        print(json.dumps(payload, indent=2))
+        return
+
+    c.print(f"\n[bold]Searching for:[/] {query}")
     with c.status("[bold]Searching registries..."):
         results = unified_search(query, sources, source_filter=source, limit=limit)
 
@@ -265,7 +291,11 @@ def do_search(query: str, source: str = "all", limit: int = 10,
     table.add_column("Description", max_width=60)
     table.add_column("Source", style="dim")
     table.add_column("Trust", style="dim")
-    table.add_column("Identifier", style="dim")
+    # overflow="fold" keeps the full slug visible (wraps instead of
+    # ellipsis-truncating). Browse.sh slugs end in a `-XXXXXX` hash that
+    # is part of the actual identifier — truncating it makes copy-paste
+    # into `hermes skills install` fail.
+    table.add_column("Identifier", style="dim", overflow="fold", no_wrap=False)
 
     for r in results:
         trust_style = {"builtin": "bright_cyan", "trusted": "green", "community": "yellow"}.get(r.trust_level, "dim")
@@ -280,7 +310,8 @@ def do_search(query: str, source: str = "all", limit: int = 10,
 
     c.print(table)
     c.print("[dim]Use: hermes skills inspect <identifier> to preview, "
-            "hermes skills install <identifier> to install[/]\n")
+            "hermes skills install <identifier> to install "
+            "(--json for scripting)[/]\n")
 
 
 def do_browse(page: int = 1, page_size: int = 20, source: str = "all",
@@ -1390,7 +1421,8 @@ def skills_command(args) -> None:
     if action == "browse":
         do_browse(page=args.page, page_size=args.size, source=args.source)
     elif action == "search":
-        do_search(args.query, source=args.source, limit=args.limit)
+        do_search(args.query, source=args.source, limit=args.limit,
+                  as_json=getattr(args, "json", False))
     elif action == "install":
         do_install(args.identifier, category=args.category, force=args.force,
                    skip_confirm=getattr(args, "yes", False),
@@ -1511,10 +1543,11 @@ def handle_skills_slash(cmd: str, console: Optional[Console] = None) -> None:
 
     elif action == "search":
         if not args:
-            c.print("[bold red]Usage:[/] /skills search <query> [--source skills-sh|well-known|github|official] [--limit N]\n")
+            c.print("[bold red]Usage:[/] /skills search <query> [--source skills-sh|well-known|github|official] [--limit N] [--json]\n")
             return
         source = "all"
         limit = 10
+        as_json = False
         query_parts = []
         i = 0
         while i < len(args):
@@ -1527,10 +1560,14 @@ def handle_skills_slash(cmd: str, console: Optional[Console] = None) -> None:
                 except ValueError:
                     pass
                 i += 2
+            elif args[i] == "--json":
+                as_json = True
+                i += 1
             else:
                 query_parts.append(args[i])
                 i += 1
-        do_search(" ".join(query_parts), source=source, limit=limit, console=c)
+        do_search(" ".join(query_parts), source=source, limit=limit,
+                  console=c, as_json=as_json)
 
     elif action == "install":
         if not args:
diff --git a/hermes_cli/status.py b/hermes_cli/status.py
index bae5430205b..f1d2f5f9ff7 100644
--- a/hermes_cli/status.py
+++ b/hermes_cli/status.py
@@ -7,7 +7,6 @@ Shows the status of all Hermes Agent components.
 import os
 import sys
 import subprocess  # noqa: F401 — re-exported for tests that monkeypatch status.subprocess to guard against regressions
-import importlib.util
 from pathlib import Path
 
 PROJECT_ROOT = Path(__file__).parent.parent.resolve()
@@ -16,6 +15,10 @@ from hermes_cli.auth import AuthError, resolve_provider
 from hermes_cli.colors import Colors, color
 from hermes_cli.config import get_env_path, get_env_value, get_hermes_home, load_config
 from hermes_cli.models import provider_label
+from hermes_cli.nous_account import (
+    format_nous_portal_entitlement_message,
+    get_nous_portal_account_info,
+)
 from hermes_cli.nous_subscription import get_nous_subscription_features
 from hermes_cli.runtime_provider import resolve_requested_provider
 from hermes_constants import OPENROUTER_MODELS_URL
@@ -193,26 +196,57 @@ def show_status(args):
         qwen_status = {}
         minimax_status = {}
 
-    nous_logged_in = bool(nous_status.get("logged_in"))
+    nous_account_info = None
+    if (
+        nous_status.get("logged_in")
+        or nous_status.get("access_token")
+        or nous_status.get("portal_base_url")
+        or nous_status.get("inference_credential_present")
+        or nous_status.get("error_code")
+    ):
+        try:
+            nous_account_info = get_nous_portal_account_info()
+        except Exception:
+            nous_account_info = None
+
+    nous_logged_in = bool(
+        nous_status.get("logged_in")
+        or (nous_account_info and nous_account_info.logged_in)
+    )
+    nous_inference_present = bool(
+        nous_status.get("inference_credential_present")
+        or (nous_account_info and nous_account_info.inference_credential_present)
+    )
     nous_error = nous_status.get("error")
-    nous_label = "logged in" if nous_logged_in else "not logged in (run: hermes auth add nous --type oauth)"
+    if nous_logged_in:
+        nous_label = "logged in"
+    elif nous_inference_present:
+        nous_label = "not logged in (Nous inference key configured)"
+    else:
+        nous_label = "not logged in (run: hermes auth add nous --type oauth)"
     print(
         f"  {'Nous Portal':<12}  {check_mark(nous_logged_in)} "
         f"{nous_label}"
     )
     portal_url = nous_status.get("portal_base_url") or "(unknown)"
+    inference_url = (
+        nous_status.get("inference_base_url")
+        or (nous_account_info.inference_base_url if nous_account_info else None)
+    )
     access_exp = _format_iso_timestamp(nous_status.get("access_expires_at"))
     key_exp = _format_iso_timestamp(nous_status.get("agent_key_expires_at"))
     refresh_label = "yes" if nous_status.get("has_refresh_token") else "no"
     if nous_logged_in or portal_url != "(unknown)" or nous_error:
         print(f"    Portal URL: {portal_url}")
+    if nous_inference_present and inference_url:
+        print(f"    Inference:  {inference_url}")
     if nous_logged_in or nous_status.get("access_expires_at"):
         print(f"    Access exp: {access_exp}")
-    if nous_logged_in or nous_status.get("agent_key_expires_at"):
+    if nous_logged_in or nous_inference_present or nous_status.get("agent_key_expires_at"):
         print(f"    Key exp:    {key_exp}")
     if nous_logged_in or nous_status.get("has_refresh_token"):
         print(f"    Refresh:    {refresh_label}")
-    if nous_error and not nous_logged_in:
+    if nous_error:
         print(f"    Error:      {nous_error}")
 
     codex_logged_in = bool(codex_status.get("logged_in"))
@@ -303,18 +337,18 @@ def show_status(args):
             else:
                 state = "not configured"
             print(f"  {feature.label:<15} {check_mark(feature.available or feature.active or feature.managed_by_nous)} {state}")
-    elif nous_logged_in:
-        # Logged into Nous but on the free tier — show upgrade nudge
+    elif nous_logged_in or nous_inference_present:
+        # Nous OAuth without entitlement, or an opaque inference key without
+        # Portal account information, cannot enable the Tool Gateway.
         print()
         print(color("◆ Nous Tool Gateway", Colors.CYAN, Colors.BOLD))
-        print("  Your free-tier Nous account does not include Tool Gateway access.")
-        print("  Upgrade your subscription to unlock managed web, image, TTS, and browser tools.")
-        try:
-            portal_url = nous_status.get("portal_base_url", "").rstrip("/")
-            if portal_url:
-                print(f"  Upgrade: {portal_url}")
-        except Exception:
-            pass
+        message = format_nous_portal_entitlement_message(
+            nous_account_info,
+            capability="managed web, image, TTS, browser, and Modal tools",
+        )
+        if message:
+            for line in message.splitlines():
+                print(f"  {line}")
 
     # =========================================================================
     # API-Key Providers
diff --git a/hermes_cli/stdio.py b/hermes_cli/stdio.py
index a1733f0fe0b..b8caf2b052e 100644
--- a/hermes_cli/stdio.py
+++ b/hermes_cli/stdio.py
@@ -216,7 +216,6 @@ def _augment_path_with_known_tools() -> None:
     if not is_windows():
         return
 
-    import shutil as _shutil
 
     local_appdata = os.environ.get("LOCALAPPDATA", "")
     if not local_appdata:
diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py
index 0274316d12c..06ff1843ee2 100644
--- a/hermes_cli/tools_config.py
+++ b/hermes_cli/tools_config.py
@@ -28,7 +28,8 @@ from hermes_cli.nous_subscription import (
     apply_nous_managed_defaults,
     get_nous_subscription_features,
 )
-from tools.tool_backend_helpers import fal_key_is_configured, managed_nous_tools_enabled
+from hermes_cli.nous_account import format_nous_portal_entitlement_message
+from tools.tool_backend_helpers import fal_key_is_configured
 from utils import base_url_hostname, is_truthy_value
 
 logger = logging.getLogger(__name__)
@@ -67,6 +68,7 @@ CONFIGURABLE_TOOLSETS = [
     ("skills",          "📚 Skills",                    "list, view, manage"),
     ("todo",            "📋 Task Planning",             "todo"),
     ("memory",          "💾 Memory",                    "persistent memory across sessions"),
+    ("context_engine",  "🧩 Context Engine",            "runtime tools from the active context engine"),
     ("session_search",  "🔎 Session Search",            "search past conversations"),
     ("clarify",         "❓ Clarifying Questions",      "clarify"),
     ("delegation",      "👥 Task Delegation",           "delegate_task"),
@@ -242,9 +244,16 @@ TOOL_CATEGORIES = {
                 ],
                 "tts_provider": "elevenlabs",
             },
-            # Mistral (Voxtral TTS) temporarily hidden — `mistralai` PyPI
-            # package is currently quarantined (malicious 2.4.6 release on
-            # 2026-05-12). Restore this entry once PyPI un-quarantines.
+            # Mistral Voxtral TTS — `mistralai` SDK lazy-installs on first use.
+            {
+                "name": "Mistral (Voxtral TTS)",
+                "badge": "paid",
+                "tag": "Multilingual, native Opus",
+                "env_vars": [
+                    {"key": "MISTRAL_API_KEY", "prompt": "Mistral API key", "url": "https://console.mistral.ai/"},
+                ],
+                "tts_provider": "mistral",
+            },
             {
                 "name": "Google Gemini TTS",
                 "badge": "preview",
@@ -337,11 +346,26 @@ TOOL_CATEGORIES = {
     "video_gen": {
         "name": "Video Generation",
         "icon": "🎬",
-        # Providers list is intentionally empty — every video gen backend
-        # is a plugin, surfaced by ``_plugin_video_gen_providers()`` and
-        # injected by ``_visible_providers``. Mirrors the design we'll
-        # converge image_gen toward.
-        "providers": [],
+        # "Nous Subscription" row mirrors the image_gen pattern — managed
+        # FAL video generation billed via the Nous Portal.  Plugin-backed
+        # provider rows (FAL BYOK, xAI, …) are injected at runtime by
+        # ``_plugin_video_gen_providers()`` in ``_visible_providers``.
+        "providers": [
+            {
+                "name": "Nous Subscription",
+                "badge": "subscription",
+                "tag": "Managed FAL video generation billed to your subscription",
+                "env_vars": [],
+                "requires_nous_auth": True,
+                "managed_nous_feature": "video_gen",
+                "override_env_vars": ["FAL_KEY"],
+                # The underlying plugin backend — when the user picks
+                # "Nous Subscription" we set video_gen.provider = "fal"
+                # and video_gen.use_gateway = True so the FAL plugin
+                # routes through the managed queue gateway.
+                "video_gen_plugin_name": "fal",
+            },
+        ],
     },
     "x_search": {
         "name": "X (Twitter) Search",
@@ -1335,6 +1359,24 @@ def _get_platform_tools(
                 enabled_toolsets.add(pts)
             # else: known but not in config = user disabled it
 
+    # Context-engine tools are runtime-provided by the active engine, so they
+    # are not part of any static platform composite. When a non-default engine
+    # is selected, keep its recovery/status tools available even after a user
+    # saves an explicit platform toolset list. Preserve the explicit empty-list
+    # contract: selecting no configurable tools means no context-engine tools
+    # either unless the user adds ``context_engine`` manually later.
+    context_cfg = config.get("context") or {}
+    if not isinstance(context_cfg, dict):
+        context_cfg = {}
+    context_engine_name = str(context_cfg.get("engine") or "compressor").strip().lower()
+    explicit_empty_selection = (
+        platform in platform_toolsets
+        and isinstance(platform_toolsets.get(platform), list)
+        and not toolset_names
+    )
+    if context_engine_name and context_engine_name != "compressor" and not explicit_empty_selection:
+        enabled_toolsets.add("context_engine")
+
     # Preserve any explicit non-configurable toolset entries (for example,
     # custom toolsets or MCP server names saved in platform_toolsets).
     explicit_passthrough = {
@@ -1440,7 +1482,12 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[
     save_config(config)
 
 
-def _toolset_has_keys(ts_key: str, config: dict = None) -> bool:
+def _toolset_has_keys(
+    ts_key: str,
+    config: dict = None,
+    *,
+    force_fresh: bool = False,
+) -> bool:
     """Check if a toolset's required API keys are configured."""
     if config is None:
         config = load_config()
@@ -1454,8 +1501,8 @@ def _toolset_has_keys(ts_key: str, config: dict = None) -> bool:
         except Exception:
             return False
 
-    if ts_key in {"web", "image_gen", "tts", "browser"}:
-        features = get_nous_subscription_features(config)
+    if ts_key in {"web", "image_gen", "video_gen", "tts", "browser"}:
+        features = get_nous_subscription_features(config, force_fresh=force_fresh)
         feature = features.features.get(ts_key)
         if feature and (feature.available or feature.managed_by_nous):
             return True
@@ -1463,7 +1510,7 @@ def _toolset_has_keys(ts_key: str, config: dict = None) -> bool:
     # Check TOOL_CATEGORIES first (provider-aware)
     cat = TOOL_CATEGORIES.get(ts_key)
     if cat:
-        for provider in _visible_providers(cat, config):
+        for provider in _visible_providers(cat, config, force_fresh=force_fresh):
             env_vars = provider.get("env_vars", [])
             if not env_vars:
                 return True  # No-key provider (e.g. Local Browser, Edge TTS)
@@ -1534,7 +1581,13 @@ def _estimate_tool_tokens() -> Dict[str, int]:
     return _tool_token_cache
 
 
-def _prompt_toolset_checklist(platform_label: str, enabled: Set[str], platform: str = "cli") -> Set[str]:
+def _prompt_toolset_checklist(
+    platform_label: str,
+    enabled: Set[str],
+    platform: str = "cli",
+    *,
+    force_fresh: bool = True,
+) -> Set[str]:
     """Multi-select checklist of toolsets. Returns set of selected toolset keys."""
     from hermes_cli.curses_ui import curses_checklist
     from toolsets import resolve_toolset
@@ -1552,7 +1605,10 @@ def _prompt_toolset_checklist(platform_label: str, enabled: Set[str], platform:
     labels = []
     for ts_key, ts_label, ts_desc in effective:
         suffix = ""
-        if not _toolset_has_keys(ts_key) and (TOOL_CATEGORIES.get(ts_key) or TOOLSET_ENV_REQUIREMENTS.get(ts_key)):
+        if (
+            not _toolset_has_keys(ts_key, force_fresh=force_fresh)
+            and (TOOL_CATEGORIES.get(ts_key) or TOOLSET_ENV_REQUIREMENTS.get(ts_key))
+        ):
             suffix = "  [no API key]"
         labels.append(f"{ts_label}  ({ts_desc}){suffix}")
 
@@ -1588,7 +1644,12 @@ def _prompt_toolset_checklist(platform_label: str, enabled: Set[str], platform:
 
 # ─── Provider-Aware Configuration ────────────────────────────────────────────
 
-def _configure_toolset(ts_key: str, config: dict):
+def _configure_toolset(
+    ts_key: str,
+    config: dict,
+    *,
+    force_fresh: bool = True,
+):
     """Configure a toolset - provider selection + API keys.
     
     Uses TOOL_CATEGORIES for provider-aware config, falls back to simple
@@ -1597,7 +1658,7 @@ def _configure_toolset(ts_key: str, config: dict):
     cat = TOOL_CATEGORIES.get(ts_key)
 
     if cat:
-        _configure_tool_category(ts_key, cat, config)
+        _configure_tool_category(ts_key, cat, config, force_fresh=force_fresh)
     else:
         # Simple fallback for vision, moa, etc.
         _configure_simple_requirements(ts_key)
@@ -1850,12 +1911,22 @@ def _plugin_tts_providers() -> list[dict]:
     return rows
 
 
-def _visible_providers(cat: dict, config: dict) -> list[dict]:
+def _visible_providers(
+    cat: dict,
+    config: dict,
+    *,
+    force_fresh: bool = False,
+) -> list[dict]:
     """Return provider entries visible for the current auth/config state."""
-    features = get_nous_subscription_features(config)
+    features = get_nous_subscription_features(config, force_fresh=force_fresh)
+    managed_available = bool(
+        features.account_info
+        and features.account_info.logged_in
+        and features.account_info.paid_service_access is True
+    )
     visible = []
     for provider in cat.get("providers", []):
-        if provider.get("managed_nous_feature") and not managed_nous_tools_enabled():
+        if provider.get("managed_nous_feature") and not managed_available:
             continue
         if provider.get("requires_nous_auth") and not features.nous_auth_present:
             continue
@@ -1896,6 +1967,31 @@ def _visible_providers(cat: dict, config: dict) -> list[dict]:
     return visible
 
 
+def _hidden_nous_gateway_message(
+    cat: dict,
+    config: dict,
+    capability: str,
+    *,
+    force_fresh: bool = False,
+) -> str:
+    """Return a reason when a category's Nous provider is hidden."""
+    features = get_nous_subscription_features(config, force_fresh=force_fresh)
+    managed_available = bool(
+        features.account_info
+        and features.account_info.logged_in
+        and features.account_info.paid_service_access is True
+    )
+    if managed_available:
+        return ""
+    if not any(p.get("managed_nous_feature") for p in cat.get("providers", [])):
+        return ""
+    message = format_nous_portal_entitlement_message(
+        features.account_info,
+        capability=capability,
+    )
+    return message or ""
+
+
 _POST_SETUP_INSTALLED: dict = {
     # post_setup_key -> predicate(): True when the install side-effect
     # is already satisfied. Used by `_toolset_needs_configuration_prompt`
@@ -1927,17 +2023,22 @@ def _post_setup_already_installed(post_setup_key: str) -> bool:
         return True
 
 
-def _toolset_needs_configuration_prompt(ts_key: str, config: dict) -> bool:
+def _toolset_needs_configuration_prompt(
+    ts_key: str,
+    config: dict,
+    *,
+    force_fresh: bool = False,
+) -> bool:
     """Return True when enabling this toolset should open provider setup."""
     cat = TOOL_CATEGORIES.get(ts_key)
     if not cat:
-        return not _toolset_has_keys(ts_key, config)
+        return not _toolset_has_keys(ts_key, config, force_fresh=force_fresh)
 
     # If any visible provider has a registered post_setup install-state
     # check that hasn't been satisfied (e.g. cua-driver binary not on
     # PATH yet), force the configuration flow so `_configure_provider`
     # invokes `_run_post_setup` and the install actually runs.
-    for provider in _visible_providers(cat, config):
+    for provider in _visible_providers(cat, config, force_fresh=force_fresh):
         post_setup = provider.get("post_setup")
         if post_setup and not _post_setup_already_installed(post_setup):
             return True
@@ -1988,14 +2089,26 @@ def _toolset_needs_configuration_prompt(ts_key: str, config: dict) -> bool:
             pass
         return True
 
-    return not _toolset_has_keys(ts_key, config)
+    return not _toolset_has_keys(ts_key, config, force_fresh=force_fresh)
 
 
-def _configure_tool_category(ts_key: str, cat: dict, config: dict):
+def _configure_tool_category(
+    ts_key: str,
+    cat: dict,
+    config: dict,
+    *,
+    force_fresh: bool = True,
+):
     """Configure a tool category with provider selection."""
     icon = cat.get("icon", "")
     name = cat["name"]
-    providers = _visible_providers(cat, config)
+    providers = _visible_providers(cat, config, force_fresh=force_fresh)
+    hidden_nous_message = _hidden_nous_gateway_message(
+        cat,
+        config,
+        f"the Nous Subscription provider for {name}",
+        force_fresh=force_fresh,
+    )
 
     # Check Python version requirement
     if cat.get("requires_python"):
@@ -2016,7 +2129,10 @@ def _configure_tool_category(ts_key: str, cat: dict, config: dict):
         # For single-provider tools, show a note if available
         if cat.get("setup_note"):
             _print_info(f"  {cat['setup_note']}")
-        _configure_provider(provider, config)
+        if hidden_nous_message:
+            for line in hidden_nous_message.splitlines():
+                _print_warning(f"  {line}")
+        _configure_provider(provider, config, force_fresh=force_fresh)
     else:
         # Multiple providers - let user choose
         print()
@@ -2025,6 +2141,9 @@ def _configure_tool_category(ts_key: str, cat: dict, config: dict):
         print(color(f"  --- {icon} {name} - {title} ---", Colors.CYAN))
         if cat.get("setup_note"):
             _print_info(f"  {cat['setup_note']}")
+        if hidden_nous_message:
+            for line in hidden_nous_message.splitlines():
+                _print_warning(f"  {line}")
         print()
 
         # Plain text labels only (no ANSI codes in menu items)
@@ -2033,7 +2152,10 @@ def _configure_tool_category(ts_key: str, cat: dict, config: dict):
         # obvious which options cost extra vs. cost nothing on top of Nous.
         try:
             _nous_logged_in = bool(
-                get_nous_subscription_features(config).nous_auth_present
+                get_nous_subscription_features(
+                    config,
+                    force_fresh=force_fresh,
+                ).nous_auth_present
             )
         except Exception:
             _nous_logged_in = False
@@ -2045,7 +2167,7 @@ def _configure_tool_category(ts_key: str, cat: dict, config: dict):
             configured = ""
             env_vars = p.get("env_vars", [])
             if not env_vars or all(get_env_value(v["key"]) for v in env_vars):
-                if _is_provider_active(p, config):
+                if _is_provider_active(p, config, force_fresh=force_fresh):
                     configured = " [active]"
                 elif not env_vars:
                     configured = ""
@@ -2065,7 +2187,11 @@ def _configure_tool_category(ts_key: str, cat: dict, config: dict):
         provider_choices.append("Skip — keep defaults / configure later")
 
         # Detect current provider as default
-        default_idx = _detect_active_provider_index(providers, config)
+        default_idx = _detect_active_provider_index(
+            providers,
+            config,
+            force_fresh=force_fresh,
+        )
 
         provider_idx = _prompt_choice(f"  {title}:", provider_choices, default_idx)
 
@@ -2074,10 +2200,15 @@ def _configure_tool_category(ts_key: str, cat: dict, config: dict):
             _print_info(f"  Skipped {name}")
             return
 
-        _configure_provider(providers[provider_idx], config)
+        _configure_provider(providers[provider_idx], config, force_fresh=force_fresh)
 
 
-def _is_provider_active(provider: dict, config: dict) -> bool:
+def _is_provider_active(
+    provider: dict,
+    config: dict,
+    *,
+    force_fresh: bool = False,
+) -> bool:
     """Check if a provider entry matches the currently active config."""
     plugin_name = provider.get("image_gen_plugin_name")
     if plugin_name:
@@ -2085,13 +2216,13 @@ def _is_provider_active(provider: dict, config: dict) -> bool:
         return isinstance(image_cfg, dict) and image_cfg.get("provider") == plugin_name
 
     video_plugin_name = provider.get("video_gen_plugin_name")
-    if video_plugin_name:
+    if video_plugin_name and not provider.get("managed_nous_feature"):
         video_cfg = config.get("video_gen", {})
         return isinstance(video_cfg, dict) and video_cfg.get("provider") == video_plugin_name
 
     managed_feature = provider.get("managed_nous_feature")
     if managed_feature:
-        features = get_nous_subscription_features(config)
+        features = get_nous_subscription_features(config, force_fresh=force_fresh)
         feature = features.features.get(managed_feature)
         if feature is None:
             return False
@@ -2104,6 +2235,15 @@ def _is_provider_active(provider: dict, config: dict) -> bool:
                 if image_cfg.get("use_gateway") is not None and not is_truthy_value(image_cfg.get("use_gateway"), default=False):
                     return False
             return feature.managed_by_nous
+        if managed_feature == "video_gen":
+            video_cfg = config.get("video_gen", {})
+            if isinstance(video_cfg, dict):
+                configured_provider = video_cfg.get("provider")
+                if configured_provider not in {None, "", "fal"}:
+                    return False
+                if video_cfg.get("use_gateway") is not None and not is_truthy_value(video_cfg.get("use_gateway"), default=False):
+                    return False
+            return feature.managed_by_nous
         if provider.get("tts_provider"):
             return (
                 feature.managed_by_nous
@@ -2138,10 +2278,15 @@ def _is_provider_active(provider: dict, config: dict) -> bool:
     return False
 
 
-def _detect_active_provider_index(providers: list, config: dict) -> int:
+def _detect_active_provider_index(
+    providers: list,
+    config: dict,
+    *,
+    force_fresh: bool = False,
+) -> int:
     """Return the index of the currently active provider, or 0."""
     for i, p in enumerate(providers):
-        if _is_provider_active(p, config):
+        if _is_provider_active(p, config, force_fresh=force_fresh):
             return i
         # Fallback: env vars present → likely configured
         env_vars = p.get("env_vars", [])
@@ -2432,27 +2577,41 @@ def _configure_videogen_model_for_plugin(plugin_name: str, config: dict) -> None
     _print_success(f"  Model set to: {chosen}")
 
 
-def _select_plugin_video_gen_provider(plugin_name: str, config: dict) -> None:
+def _select_plugin_video_gen_provider(plugin_name: str, config: dict, *, use_gateway: bool = False) -> None:
     """Persist a plugin-backed video generation provider selection."""
     vid_cfg = config.setdefault("video_gen", {})
     if not isinstance(vid_cfg, dict):
         vid_cfg = {}
         config["video_gen"] = vid_cfg
     vid_cfg["provider"] = plugin_name
-    vid_cfg["use_gateway"] = False
+    vid_cfg["use_gateway"] = use_gateway
     _print_success(f"  video_gen.provider set to: {plugin_name}")
     _configure_videogen_model_for_plugin(plugin_name, config)
 
 
-def _configure_provider(provider: dict, config: dict):
+def _configure_provider(
+    provider: dict,
+    config: dict,
+    *,
+    force_fresh: bool = True,
+):
     """Configure a single provider - prompt for API keys and set config."""
     env_vars = provider.get("env_vars", [])
     managed_feature = provider.get("managed_nous_feature")
 
     if provider.get("requires_nous_auth"):
-        features = get_nous_subscription_features(config)
-        if not features.nous_auth_present:
-            _print_warning("  Nous Subscription is only available after logging into Nous Portal.")
+        features = get_nous_subscription_features(config, force_fresh=force_fresh)
+        entitled = bool(
+            features.account_info and features.account_info.paid_service_access is True
+        )
+        if not features.nous_auth_present or not entitled:
+            message = format_nous_portal_entitlement_message(
+                features.account_info,
+                capability=f"{provider.get('name', 'Nous Subscription')}",
+            )
+            _print_warning(
+                f"  {message or 'Nous Subscription is only available after logging into Nous Portal.'}"
+            )
             return
 
     # Set TTS provider in config if applicable
@@ -2510,7 +2669,7 @@ def _configure_provider(provider: dict, config: dict):
         # registry.
         video_plugin = provider.get("video_gen_plugin_name")
         if video_plugin:
-            _select_plugin_video_gen_provider(video_plugin, config)
+            _select_plugin_video_gen_provider(video_plugin, config, use_gateway=bool(managed_feature))
             return
         # Imagegen backends prompt for model selection after backend pick.
         backend = provider.get("imagegen_backend")
@@ -2542,7 +2701,10 @@ def _configure_provider(provider: dict, config: dict):
                     _has_managed_sibling = True
                     break
             if _has_managed_sibling:
-                _features = get_nous_subscription_features(config)
+                _features = get_nous_subscription_features(
+                    config,
+                    force_fresh=force_fresh,
+                )
                 _show_portal_hint = not _features.nous_auth_present
         except Exception:
             _show_portal_hint = False
@@ -2586,7 +2748,7 @@ def _configure_provider(provider: dict, config: dict):
             return
         video_plugin = provider.get("video_gen_plugin_name")
         if video_plugin:
-            _select_plugin_video_gen_provider(video_plugin, config)
+            _select_plugin_video_gen_provider(video_plugin, config, use_gateway=bool(managed_feature))
             return
         # Imagegen backends prompt for model selection after env vars are in.
         backend = provider.get("imagegen_backend")
@@ -2660,7 +2822,11 @@ def _configure_simple_requirements(ts_key: str):
             _print_warning("    Skipped")
 
 
-def _reconfigure_tool(config: dict):
+def _reconfigure_tool(
+    config: dict,
+    *,
+    force_fresh: bool = True,
+):
     """Let user reconfigure an existing tool's provider or API key."""
     # Build list of configurable tools that are currently set up
     configurable = []
@@ -2668,7 +2834,10 @@ def _reconfigure_tool(config: dict):
         cat = TOOL_CATEGORIES.get(ts_key)
         reqs = TOOLSET_ENV_REQUIREMENTS.get(ts_key)
         if cat or reqs:
-            if _toolset_has_keys(ts_key, config) or _toolset_enabled_for_reconfigure(ts_key, config):
+            if (
+                _toolset_has_keys(ts_key, config, force_fresh=force_fresh)
+                or _toolset_enabled_for_reconfigure(ts_key, config)
+            ):
                 configurable.append((ts_key, ts_label))
 
     if not configurable:
@@ -2687,7 +2856,12 @@ def _reconfigure_tool(config: dict):
     cat = TOOL_CATEGORIES.get(ts_key)
 
     if cat:
-        _configure_tool_category_for_reconfig(ts_key, cat, config)
+        _configure_tool_category_for_reconfig(
+            ts_key,
+            cat,
+            config,
+            force_fresh=force_fresh,
+        )
     else:
         _reconfigure_simple_requirements(ts_key)
 
@@ -2716,20 +2890,38 @@ def _toolset_enabled_for_reconfigure(ts_key: str, config: dict) -> bool:
     return False
 
 
-def _configure_tool_category_for_reconfig(ts_key: str, cat: dict, config: dict):
+def _configure_tool_category_for_reconfig(
+    ts_key: str,
+    cat: dict,
+    config: dict,
+    *,
+    force_fresh: bool = True,
+):
     """Reconfigure a tool category - provider selection + API key update."""
     icon = cat.get("icon", "")
     name = cat["name"]
-    providers = _visible_providers(cat, config)
+    providers = _visible_providers(cat, config, force_fresh=force_fresh)
+    hidden_nous_message = _hidden_nous_gateway_message(
+        cat,
+        config,
+        f"the Nous Subscription provider for {name}",
+        force_fresh=force_fresh,
+    )
 
     if len(providers) == 1:
         provider = providers[0]
         print()
         print(color(f"  --- {icon} {name} ({provider['name']}) ---", Colors.CYAN))
-        _reconfigure_provider(provider, config)
+        if hidden_nous_message:
+            for line in hidden_nous_message.splitlines():
+                _print_warning(f"  {line}")
+        _reconfigure_provider(provider, config, force_fresh=force_fresh)
     else:
         print()
         print(color(f"  --- {icon} {name} - Choose a provider ---", Colors.CYAN))
+        if hidden_nous_message:
+            for line in hidden_nous_message.splitlines():
+                _print_warning(f"  {line}")
         print()
 
         provider_choices = []
@@ -2739,7 +2931,7 @@ def _configure_tool_category_for_reconfig(ts_key: str, cat: dict, config: dict):
             configured = ""
             env_vars = p.get("env_vars", [])
             if not env_vars or all(get_env_value(v["key"]) for v in env_vars):
-                if _is_provider_active(p, config):
+                if _is_provider_active(p, config, force_fresh=force_fresh):
                     configured = " [active]"
                 elif not env_vars:
                     configured = ""
@@ -2747,21 +2939,43 @@ def _configure_tool_category_for_reconfig(ts_key: str, cat: dict, config: dict):
                     configured = " [configured]"
             provider_choices.append(f"{p['name']}{badge}{tag}{configured}")
 
-        default_idx = _detect_active_provider_index(providers, config)
+        default_idx = _detect_active_provider_index(
+            providers,
+            config,
+            force_fresh=force_fresh,
+        )
 
         provider_idx = _prompt_choice("  Select provider:", provider_choices, default_idx)
-        _reconfigure_provider(providers[provider_idx], config)
+        _reconfigure_provider(
+            providers[provider_idx],
+            config,
+            force_fresh=force_fresh,
+        )
 
 
-def _reconfigure_provider(provider: dict, config: dict):
+def _reconfigure_provider(
+    provider: dict,
+    config: dict,
+    *,
+    force_fresh: bool = True,
+):
     """Reconfigure a provider - update API keys."""
     env_vars = provider.get("env_vars", [])
     managed_feature = provider.get("managed_nous_feature")
 
     if provider.get("requires_nous_auth"):
-        features = get_nous_subscription_features(config)
-        if not features.nous_auth_present:
-            _print_warning("  Nous Subscription is only available after logging into Nous Portal.")
+        features = get_nous_subscription_features(config, force_fresh=force_fresh)
+        entitled = bool(
+            features.account_info and features.account_info.paid_service_access is True
+        )
+        if not features.nous_auth_present or not entitled:
+            message = format_nous_portal_entitlement_message(
+                features.account_info,
+                capability=f"{provider.get('name', 'Nous Subscription')}",
+            )
+            _print_warning(
+                f"  {message or 'Nous Subscription is only available after logging into Nous Portal.'}"
+            )
             return
 
     if provider.get("tts_provider"):
@@ -2815,7 +3029,7 @@ def _reconfigure_provider(provider: dict, config: dict):
         # Plugin-registered video_gen provider — same flow, different registry.
         video_plugin = provider.get("video_gen_plugin_name")
         if video_plugin:
-            _select_plugin_video_gen_provider(video_plugin, config)
+            _select_plugin_video_gen_provider(video_plugin, config, use_gateway=bool(managed_feature))
             return
         # Imagegen backends prompt for model selection on reconfig too.
         backend = provider.get("imagegen_backend")
@@ -2855,7 +3069,7 @@ def _reconfigure_provider(provider: dict, config: dict):
     # Plugin-registered video_gen provider — same flow, different registry.
     video_plugin = provider.get("video_gen_plugin_name")
     if video_plugin:
-        _select_plugin_video_gen_provider(video_plugin, config)
+        _select_plugin_video_gen_provider(video_plugin, config, use_gateway=bool(managed_feature))
         return
 
     backend = provider.get("imagegen_backend")
@@ -2962,11 +3176,11 @@ def tools_command(args=None, first_install: bool = False, config: dict = None):
             auto_configured = apply_nous_managed_defaults(
                 config,
                 enabled_toolsets=new_enabled,
+                force_fresh=True,
             )
-            if managed_nous_tools_enabled():
-                for ts_key in sorted(auto_configured):
-                    label = next((l for k, l, _ in CONFIGURABLE_TOOLSETS if k == ts_key), ts_key)
-                    print(color(f"  ✓ {label}: using your Nous subscription defaults", Colors.GREEN))
+            for ts_key in sorted(auto_configured):
+                label = next((l for k, l, _ in CONFIGURABLE_TOOLSETS if k == ts_key), ts_key)
+                print(color(f"  ✓ {label}: using your Nous subscription defaults", Colors.GREEN))
 
             # Walk through ALL selected tools that have provider options or
             # need API keys.  This ensures browser (Local vs Browserbase),
@@ -3034,7 +3248,7 @@ def tools_command(args=None, first_install: bool = False, config: dict = None):
 
         # "Reconfigure" selected
         if idx == _reconfig_idx:
-            _reconfigure_tool(config)
+            _reconfigure_tool(config, force_fresh=True)
             print()
             continue
 
@@ -3050,7 +3264,11 @@ def tools_command(args=None, first_install: bool = False, config: dict = None):
             all_current = set()
             for pk in platform_keys:
                 all_current |= _get_platform_tools(config, pk, include_default_mcp_servers=False)
-            new_enabled = _prompt_toolset_checklist("All platforms", all_current)
+            new_enabled = _prompt_toolset_checklist(
+                "All platforms",
+                all_current,
+                force_fresh=True,
+            )
             if new_enabled != all_current:
                 for pk in platform_keys:
                     prev = _get_platform_tools(config, pk, include_default_mcp_servers=False)
@@ -3068,7 +3286,11 @@ def tools_command(args=None, first_install: bool = False, config: dict = None):
                     # Configure API keys for newly enabled tools
                     for ts_key in sorted(added):
                         if (TOOL_CATEGORIES.get(ts_key) or TOOLSET_ENV_REQUIREMENTS.get(ts_key)):
-                            if _toolset_needs_configuration_prompt(ts_key, config):
+                            if _toolset_needs_configuration_prompt(
+                                ts_key,
+                                config,
+                                force_fresh=True,
+                            ):
                                 _configure_toolset(ts_key, config)
                     _save_platform_tools(config, pk, new_enabled)
                 save_config(config)
@@ -3090,7 +3312,11 @@ def tools_command(args=None, first_install: bool = False, config: dict = None):
         current_enabled = _get_platform_tools(config, pkey, include_default_mcp_servers=False)
 
         # Show checklist
-        new_enabled = _prompt_toolset_checklist(pinfo["label"], current_enabled)
+        new_enabled = _prompt_toolset_checklist(
+            pinfo["label"],
+            current_enabled,
+            force_fresh=True,
+        )
 
         if new_enabled != current_enabled:
             added = new_enabled - current_enabled
@@ -3108,7 +3334,11 @@ def tools_command(args=None, first_install: bool = False, config: dict = None):
             # Configure newly enabled toolsets that need API keys
             for ts_key in sorted(added):
                 if (TOOL_CATEGORIES.get(ts_key) or TOOLSET_ENV_REQUIREMENTS.get(ts_key)):
-                    if _toolset_needs_configuration_prompt(ts_key, config):
+                    if _toolset_needs_configuration_prompt(
+                        ts_key,
+                        config,
+                        force_fresh=True,
+                    ):
                         _configure_toolset(ts_key, config)
 
             _save_platform_tools(config, pkey, new_enabled)
diff --git a/hermes_cli/uninstall.py b/hermes_cli/uninstall.py
index 028b66575ff..430abd47b9b 100644
--- a/hermes_cli/uninstall.py
+++ b/hermes_cli/uninstall.py
@@ -117,6 +117,49 @@ def remove_wrapper_script():
     return removed
 
 
+def remove_node_symlinks(hermes_home: Path) -> list:
+    """Remove the node/npm/npx symlinks the installer drops in ~/.local/bin.
+
+    The POSIX installer (``scripts/install.sh`` / ``scripts/lib/node-bootstrap.sh``)
+    creates::
+
+        ~/.local/bin/node -> $HERMES_HOME/node/bin/node
+        ~/.local/bin/npm  -> $HERMES_HOME/node/bin/npm
+        ~/.local/bin/npx  -> $HERMES_HOME/node/bin/npx
+
+    and prepends ``~/.local/bin`` to PATH, so these shadow an existing Node
+    manager such as nvm.  Symmetrically remove them on uninstall, but *only*
+    when the link still resolves into this Hermes home's ``node`` directory.
+    A link the user has since repointed at nvm (or anything else outside
+    Hermes) is left untouched so we never break unrelated tooling.
+    """
+    node_dir = (hermes_home / "node").resolve()
+    removed = []
+
+    for name in ("node", "npm", "npx"):
+        link = Path.home() / ".local" / "bin" / name
+        try:
+            # Only act on symlinks — never delete a real binary the user put here.
+            if not link.is_symlink():
+                continue
+
+            # Resolve the link target and confirm it points into our node dir.
+            # os.readlink + manual join handles broken (dangling) links too;
+            # Path.resolve() on a dangling link still returns the target path.
+            target = Path(os.readlink(link))
+            if not target.is_absolute():
+                target = (link.parent / target)
+            target = target.resolve()
+
+            if target == node_dir or node_dir in target.parents:
+                link.unlink()
+                removed.append(link)
+        except Exception as e:
+            log_warn(f"Could not remove {link}: {e}")
+
+    return removed
+
+
 def uninstall_gateway_service():
     """Stop and uninstall the gateway service (systemd, launchd, Windows
     Scheduled Task / Startup folder) and kill any standalone gateway processes.
@@ -594,6 +637,17 @@ def run_uninstall(args):
             log_success(f"Removed {wrapper}")
     else:
         log_info("No wrapper script found")
+
+    # 3b. Remove node/npm/npx symlinks the installer left in ~/.local/bin
+    #     (only when they still point into this Hermes home's node dir, so we
+    #     never clobber an existing nvm / user-managed Node).
+    log_info("Removing Hermes-managed node/npm/npx symlinks...")
+    removed_node_links = remove_node_symlinks(hermes_home)
+    if removed_node_links:
+        for link in removed_node_links:
+            log_success(f"Removed {link}")
+    else:
+        log_info("No Hermes-managed node/npm/npx symlinks found")
     
     # 4. Remove installation directory (code)
     log_info("Removing installation directory...")
diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py
index be05fc4be7b..40dd9e2efff 100644
--- a/hermes_cli/web_server.py
+++ b/hermes_cli/web_server.py
@@ -66,15 +66,8 @@ except ImportError:
     # them out of every other install path. After install, re-import.
     try:
         from tools.lazy_deps import ensure as _lazy_ensure
-
         _lazy_ensure("tool.dashboard", prompt=False)
-        from fastapi import (
-            FastAPI,
-            HTTPException,
-            Request,
-            WebSocket,
-            WebSocketDisconnect,
-        )
+        from fastapi import FastAPI, HTTPException, Request, WebSocket, WebSocketDisconnect
         from fastapi.middleware.cors import CORSMiddleware
         from fastapi.responses import FileResponse, HTMLResponse, JSONResponse, Response
         from fastapi.staticfiles import StaticFiles
@@ -85,11 +78,7 @@ except ImportError:
             f"Install with: {sys.executable} -m pip install 'fastapi' 'uvicorn[standard]'"
         )
 
-WEB_DIST = (
-    Path(os.environ["HERMES_WEB_DIST"])
-    if "HERMES_WEB_DIST" in os.environ
-    else Path(__file__).parent / "web_dist"
-)
+WEB_DIST = Path(os.environ["HERMES_WEB_DIST"]) if "HERMES_WEB_DIST" in os.environ else Path(__file__).parent / "web_dist"
 _log = logging.getLogger(__name__)
 
 app = FastAPI(title="Hermes Agent", version=__version__)
@@ -98,12 +87,8 @@ app = FastAPI(title="Hermes Agent", version=__version__)
 # Session token for protecting sensitive endpoints (reveal).
 # Generated fresh on every server start — dies when the process exits.
 # Injected into the SPA HTML so only the legitimate web UI can use it.
-# Native desktop shells can pre-seed the token because they own the local
-# child process and do not need to scrape index.html before opening /api/ws.
 # ---------------------------------------------------------------------------
-_SESSION_TOKEN = os.environ.get(
-    "HERMES_DASHBOARD_SESSION_TOKEN"
-) or secrets.token_urlsafe(32)
+_SESSION_TOKEN = secrets.token_urlsafe(32)
 _SESSION_HEADER_NAME = "X-Hermes-Session-Token"
 
 # In-browser Chat tab (/chat, /api/pty, …).  Off unless ``hermes dashboard --tui``
@@ -118,39 +103,30 @@ _REVEAL_WINDOW_SECONDS = 30
 # CORS: restrict to localhost origins only.  The web UI is intended to run
 # locally; binding to 0.0.0.0 with allow_origins=["*"] would let any website
 # read/modify config and secrets.
-#
-# Electron renderers load index.html via file:// URLs.  Chromium sets the
-# Origin header to "null" for such windows on the WebSocket upgrade request,
-# which Starlette's CORSMiddleware rejects with HTTP 403 before the
-# /api/ws route handler ever runs.  We allow "null" explicitly so the
-# packaged desktop app can connect; security is preserved because:
-#   1. The gateway binds to 127.0.0.1 by default, so a malicious file://
-#      page on another machine can't reach it.
-#   2. Every authenticated /api/ endpoint past the CORS layer is gated by
-#      the per-process session token, so even a local file:// page with
-#      Origin: null cannot make authenticated requests without already
-#      knowing the secret.
 
 app.add_middleware(
     CORSMiddleware,
-    allow_origin_regex=r"^(https?://(localhost|127\.0\.0\.1)(:\d+)?|null)$",
+    allow_origin_regex=r"^https?://(localhost|127\.0\.0\.1)(:\d+)?$",
     allow_methods=["*"],
     allow_headers=["*"],
 )
 
 # ---------------------------------------------------------------------------
 # Endpoints that do NOT require the session token.  Everything else under
-# /api/ is gated by the auth middleware below.  Keep this list minimal —
-# only truly non-sensitive, read-only endpoints belong here.
+# /api/ is gated by the auth middleware below.
+#
+# This list is defined in ``hermes_cli.dashboard_auth.public_paths`` so the
+# OAuth gate middleware can honour the same allowlist — keeping the two
+# gates in lockstep avoids drift like the wildcard-subdomain regression
+# where ``/api/status`` was public under the legacy gate but 401'd under
+# the OAuth gate (breaking the portal's liveness probe).
+#
+# Keep the upstream list minimal — only truly non-sensitive, read-only
+# endpoints belong there.
 # ---------------------------------------------------------------------------
-_PUBLIC_API_PATHS: frozenset = frozenset({
-    "/api/status",
-    "/api/config/defaults",
-    "/api/config/schema",
-    "/api/model/info",
-    "/api/dashboard/themes",
-    "/api/dashboard/plugins",
-})
+from hermes_cli.dashboard_auth.public_paths import (
+    PUBLIC_API_PATHS as _PUBLIC_API_PATHS,
+)
 
 
 def _has_valid_session_token(request: Request) -> bool:
@@ -185,13 +161,9 @@ def _require_token(request: Request) -> None:
 # checks because the browser now considers evil.test and our dashboard
 # "same origin". Validating the Host header at the app layer rejects any
 # request whose Host isn't one we bound for. See GHSA-ppp5-vxwm-4cf7.
-_LOOPBACK_HOST_VALUES: frozenset = frozenset(
-    {
-        "localhost",
-        "127.0.0.1",
-        "::1",
-    }
-)
+_LOOPBACK_HOST_VALUES: frozenset = frozenset({
+    "localhost", "127.0.0.1", "::1",
+})
 
 
 def should_require_auth(host: str, allow_public: bool) -> bool:
@@ -317,41 +289,6 @@ async def auth_middleware(request: Request, call_next):
     return await call_next(request)
 
 
-@app.middleware("http")
-async def api_error_logging_middleware(request: Request, call_next):
-    """Emit compact diagnostics for API failures and crashes."""
-    path = request.url.path
-    is_api = path.startswith("/api/")
-    start = time.monotonic()
-    try:
-        response = await call_next(request)
-    except Exception:
-        if is_api:
-            _log.exception("api-crash method=%s path=%s", request.method, path)
-        raise
-
-    if is_api:
-        elapsed_ms = (time.monotonic() - start) * 1000
-        status = int(response.status_code)
-        if status >= 500:
-            _log.error(
-                "api-failure method=%s path=%s status=%d elapsed_ms=%.1f",
-                request.method,
-                path,
-                status,
-                elapsed_ms,
-            )
-        elif status >= 400 and status not in {401, 403}:
-            _log.warning(
-                "api-failure method=%s path=%s status=%d elapsed_ms=%.1f",
-                request.method,
-                path,
-                status,
-                elapsed_ms,
-            )
-    return response
-
-
 # ---------------------------------------------------------------------------
 # Config schema — auto-generated from DEFAULT_CONFIG
 # ---------------------------------------------------------------------------
@@ -474,21 +411,9 @@ _CATEGORY_MERGE: Dict[str, str] = {
 
 # Display order for tabs — unlisted categories sort alphabetically after these.
 _CATEGORY_ORDER = [
-    "general",
-    "agent",
-    "terminal",
-    "display",
-    "delegation",
-    "memory",
-    "compression",
-    "security",
-    "browser",
-    "voice",
-    "tts",
-    "stt",
-    "logging",
-    "discord",
-    "auxiliary",
+    "general", "agent", "terminal", "display", "delegation",
+    "memory", "compression", "security", "browser", "voice",
+    "tts", "stt", "logging", "discord", "auxiliary",
 ]
 
 
@@ -517,9 +442,7 @@ def _build_schema_from_config(
         full_key = f"{prefix}.{key}" if prefix else key
 
         # Skip internal / version keys
-        if full_key in {
-            "_config_version",
-        }:
+        if full_key in {"_config_version",}:
             continue
 
         # Category is the first path component for nested keys, or "general"
@@ -544,9 +467,7 @@ def _build_schema_from_config(
             if full_key in _SCHEMA_OVERRIDES:
                 entry.update(_SCHEMA_OVERRIDES[full_key])
             # Merge small categories
-            entry["category"] = _CATEGORY_MERGE.get(
-                entry["category"], entry["category"]
-            )
+            entry["category"] = _CATEGORY_MERGE.get(entry["category"], entry["category"])
             schema[full_key] = entry
     return schema
 
@@ -631,6 +552,20 @@ def _audio_extension_for_mime(mime_type: str) -> str:
     return _AUDIO_MIME_EXTENSIONS.get(normalized, ".webm")
 
 
+class ModelAssignment(BaseModel):
+    """Payload for POST /api/model/set — assign a provider/model to a slot.
+
+    scope="main"        → writes model.provider + model.default
+    scope="auxiliary"   → writes auxiliary.<task>.provider + auxiliary.<task>.model
+    scope="auxiliary" with task=""  → applied to every auxiliary.* slot
+    scope="auxiliary" with task="__reset__"  → resets every slot to provider="auto"
+    """
+    scope: str
+    provider: str
+    model: str
+    task: str = ""
+
+
 _GATEWAY_HEALTH_URL = os.getenv("GATEWAY_HEALTH_URL")
 try:
     _GATEWAY_HEALTH_TIMEOUT = float(os.getenv("GATEWAY_HEALTH_TIMEOUT", "3"))
@@ -732,11 +667,7 @@ async def get_status():
     # Prefer the detailed health endpoint response (has full state) when the
     # local runtime status file is absent or stale (cross-container).
     runtime = read_runtime_status()
-    if (
-        runtime is None
-        and remote_health_body
-        and remote_health_body.get("gateway_state")
-    ):
+    if runtime is None and remote_health_body and remote_health_body.get("gateway_state"):
         runtime = remote_health_body
 
     if runtime:
@@ -751,11 +682,7 @@ async def get_status():
         gateway_exit_reason = runtime.get("exit_reason")
         gateway_updated_at = runtime.get("updated_at")
         if not gateway_running:
-            gateway_state = (
-                gateway_state
-                if gateway_state in {"stopped", "startup_failed"}
-                else "stopped"
-            )
+            gateway_state = gateway_state if gateway_state in {"stopped", "startup_failed"} else "stopped"
             gateway_platforms = {}
         elif gateway_running and remote_health_body is not None:
             # The health probe confirmed the gateway is alive, but the local
@@ -772,14 +699,12 @@ async def get_status():
     active_sessions = 0
     try:
         from hermes_state import SessionDB
-
         db = SessionDB()
         try:
             sessions = db.list_sessions_rich(limit=50)
             now = time.time()
             active_sessions = sum(
-                1
-                for s in sessions
+                1 for s in sessions
                 if s.get("ended_at") is None
                 and (now - s.get("last_active", s.get("started_at", 0))) < 300
             )
@@ -822,6 +747,109 @@ async def get_status():
     }
 
 
+# ---------------------------------------------------------------------------
+# Gateway + update actions (invoked from the Status page).
+#
+# Both commands are spawned as detached subprocesses so the HTTP request
+# returns immediately.  stdin is closed (``DEVNULL``) so any stray ``input()``
+# calls fail fast with EOF rather than hanging forever.  stdout/stderr are
+# streamed to a per-action log file under ``~/.hermes/logs/<action>.log`` so
+# the dashboard can tail them back to the user.
+# ---------------------------------------------------------------------------
+
+_ACTION_LOG_DIR: Path = get_hermes_home() / "logs"
+
+# Short ``name`` (from the URL) → absolute log file path.
+_ACTION_LOG_FILES: Dict[str, str] = {
+    "gateway-restart": "gateway-restart.log",
+    "hermes-update": "hermes-update.log",
+}
+
+# ``name`` → most recently spawned Popen handle.  Used so ``status`` can
+# report liveness and exit code without shelling out to ``ps``.
+_ACTION_PROCS: Dict[str, subprocess.Popen] = {}
+
+
+def _spawn_hermes_action(subcommand: List[str], name: str) -> subprocess.Popen:
+    """Spawn ``hermes <subcommand>`` detached and record the Popen handle.
+
+    Uses the running interpreter's ``hermes_cli.main`` module so the action
+    inherits the same venv/PYTHONPATH the web server is using.
+    """
+    log_file_name = _ACTION_LOG_FILES[name]
+    _ACTION_LOG_DIR.mkdir(parents=True, exist_ok=True)
+    log_path = _ACTION_LOG_DIR / log_file_name
+    log_file = open(log_path, "ab", buffering=0)
+    log_file.write(
+        f"\n=== {name} started {time.strftime('%Y-%m-%d %H:%M:%S')} ===\n".encode()
+    )
+
+    cmd = [sys.executable, "-m", "hermes_cli.main", *subcommand]
+
+    popen_kwargs: Dict[str, Any] = {
+        "cwd": str(PROJECT_ROOT),
+        "stdin": subprocess.DEVNULL,
+        "stdout": log_file,
+        "stderr": subprocess.STDOUT,
+        "env": {**os.environ, "HERMES_NONINTERACTIVE": "1"},
+    }
+    if sys.platform == "win32":
+        popen_kwargs["creationflags"] = (
+            subprocess.CREATE_NEW_PROCESS_GROUP  # type: ignore[attr-defined]
+            | getattr(subprocess, "DETACHED_PROCESS", 0)
+        )
+    else:
+        popen_kwargs["start_new_session"] = True
+
+    proc = subprocess.Popen(cmd, **popen_kwargs)
+    _ACTION_PROCS[name] = proc
+    return proc
+
+
+def _tail_lines(path: Path, n: int) -> List[str]:
+    """Return the last ``n`` lines of ``path``.  Reads the whole file — fine
+    for our small per-action logs.  Binary-decoded with ``errors='replace'``
+    so log corruption doesn't 500 the endpoint."""
+    if not path.exists():
+        return []
+    try:
+        text = path.read_text(encoding="utf-8", errors="replace")
+    except OSError:
+        return []
+    lines = text.splitlines()
+    return lines[-n:] if n > 0 else lines
+
+
+@app.post("/api/gateway/restart")
+async def restart_gateway():
+    """Kick off a ``hermes gateway restart`` in the background."""
+    try:
+        proc = _spawn_hermes_action(["gateway", "restart"], "gateway-restart")
+    except Exception as exc:
+        _log.exception("Failed to spawn gateway restart")
+        raise HTTPException(status_code=500, detail=f"Failed to restart gateway: {exc}")
+    return {
+        "ok": True,
+        "pid": proc.pid,
+        "name": "gateway-restart",
+    }
+
+
+@app.post("/api/hermes/update")
+async def update_hermes():
+    """Kick off ``hermes update`` in the background."""
+    try:
+        proc = _spawn_hermes_action(["update"], "hermes-update")
+    except Exception as exc:
+        _log.exception("Failed to spawn hermes update")
+        raise HTTPException(status_code=500, detail=f"Failed to start update: {exc}")
+    return {
+        "ok": True,
+        "pid": proc.pid,
+        "name": "hermes-update",
+    }
+
+
 @app.post("/api/audio/transcribe")
 async def transcribe_audio_upload(payload: AudioTranscriptionRequest):
     data_url = (payload.data_url or "").strip()
@@ -896,307 +924,6 @@ async def transcribe_audio_upload(payload: AudioTranscriptionRequest):
     }
 
 
-class TTSSpeakRequest(BaseModel):
-    text: str
-
-
-def _elevenlabs_voice_label(voice: Dict[str, Any]) -> str:
-    name = str(voice.get("name") or voice.get("voice_id") or "Voice").strip()
-    category = str(voice.get("category") or "").strip()
-
-    return f"{name} ({category})" if category else name
-
-
-@app.get("/api/audio/elevenlabs/voices")
-async def get_elevenlabs_voices():
-    """Return ElevenLabs voices when an API key is configured.
-
-    The desktop UI uses this for the ``tts.elevenlabs.voice_id`` dropdown.
-    Only non-secret voice metadata is returned; the API key stays server-side.
-    """
-    api_key = (
-        load_env().get("ELEVENLABS_API_KEY")
-        or os.environ.get("ELEVENLABS_API_KEY")
-        or ""
-    ).strip()
-    if not api_key:
-        return {"available": False, "voices": []}
-
-    request = urllib.request.Request(
-        "https://api.elevenlabs.io/v1/voices",
-        headers={
-            "Accept": "application/json",
-            "xi-api-key": api_key,
-        },
-    )
-
-    try:
-        loop = asyncio.get_running_loop()
-
-        def _fetch() -> Dict[str, Any]:
-            with urllib.request.urlopen(request, timeout=10) as response:
-                return json.loads(response.read().decode("utf-8"))
-
-        payload = await loop.run_in_executor(None, _fetch)
-    except Exception as exc:
-        _log.warning("ElevenLabs voice list failed: %s", exc)
-        raise HTTPException(status_code=502, detail="Could not load ElevenLabs voices")
-
-    voices = []
-    for voice in payload.get("voices") or []:
-        if not isinstance(voice, dict):
-            continue
-
-        voice_id = str(voice.get("voice_id") or "").strip()
-        if not voice_id:
-            continue
-
-        voices.append(
-            {
-                "voice_id": voice_id,
-                "name": str(voice.get("name") or voice_id),
-                "label": _elevenlabs_voice_label(voice),
-            }
-        )
-
-    voices.sort(key=lambda item: str(item.get("label") or "").lower())
-    return {"available": True, "voices": voices}
-
-
-@app.post("/api/audio/speak")
-async def speak_text(payload: TTSSpeakRequest):
-    """Synthesize speech and return audio as base64 data URL.
-
-    Used by the desktop voice-conversation mode to play back assistant
-    responses without exposing the on-disk file path. Reuses the
-    existing TTS provider chain (Edge / OpenAI / ElevenLabs / etc.)
-    configured in ``~/.hermes/config.yaml`` under ``tts.``.
-    """
-    text = (payload.text or "").strip()
-    if not text:
-        raise HTTPException(status_code=400, detail="Text is required")
-
-    try:
-        from tools.tts_tool import text_to_speech_tool
-
-        loop = asyncio.get_running_loop()
-        result_json = await loop.run_in_executor(None, text_to_speech_tool, text)
-    except Exception as exc:
-        _log.exception("Desktop voice TTS failed")
-        raise HTTPException(status_code=500, detail=f"Speech synthesis failed: {exc}")
-
-    try:
-        result = (
-            json.loads(result_json) if isinstance(result_json, str) else result_json
-        )
-    except Exception:
-        raise HTTPException(status_code=500, detail="Invalid TTS response")
-
-    if not result.get("success"):
-        raise HTTPException(
-            status_code=400,
-            detail=result.get("error") or "Speech synthesis failed",
-        )
-
-    file_path = result.get("file_path")
-    if not file_path or not os.path.isfile(file_path):
-        raise HTTPException(status_code=500, detail="Audio file missing")
-
-    ext = os.path.splitext(file_path)[1].lower()
-    mime_type = {
-        ".mp3": "audio/mpeg",
-        ".ogg": "audio/ogg",
-        ".opus": "audio/ogg",
-        ".wav": "audio/wav",
-        ".flac": "audio/flac",
-    }.get(ext, "audio/mpeg")
-
-    try:
-        with open(file_path, "rb") as fh:
-            audio_bytes = fh.read()
-    except OSError as exc:
-        raise HTTPException(status_code=500, detail=f"Could not read audio: {exc}")
-    finally:
-        try:
-            os.unlink(file_path)
-        except OSError:
-            pass
-
-    encoded = base64.b64encode(audio_bytes).decode("ascii")
-    return {
-        "ok": True,
-        "data_url": f"data:{mime_type};base64,{encoded}",
-        "mime_type": mime_type,
-        "provider": result.get("provider"),
-    }
-
-
-# ---------------------------------------------------------------------------
-# Gateway + update actions (invoked from the Status page).
-#
-# Both commands are spawned as detached subprocesses so the HTTP request
-# returns immediately.  stdin is closed (``DEVNULL``) so any stray ``input()``
-# calls fail fast with EOF rather than hanging forever.  stdout/stderr are
-# streamed to a per-action log file under ``~/.hermes/logs/<action>.log`` so
-# the dashboard can tail them back to the user.
-# ---------------------------------------------------------------------------
-
-_ACTION_LOG_DIR: Path = get_hermes_home() / "logs"
-
-# Short ``name`` (from the URL) → absolute log file path.
-_ACTION_LOG_FILES: Dict[str, str] = {
-    "gateway-restart": "gateway-restart.log",
-    "hermes-update": "hermes-update.log",
-}
-
-# ``name`` → most recently spawned Popen handle.  Used so ``status`` can
-# report liveness and exit code without shelling out to ``ps``.
-_ACTION_PROCS: Dict[str, subprocess.Popen] = {}
-# Serialize the check+spawn+store sequence so two concurrent restart/update
-# requests can't both observe "no live process" and double-spawn (the FastAPI
-# default threadpool runs sync handlers concurrently).
-_ACTION_SPAWN_LOCK = threading.Lock()
-
-
-def _current_git_branch() -> Optional[str]:
-    """Return the branch PROJECT_ROOT is on, or None if detached/unknown.
-
-    Used to pin ``hermes update`` to the tracked branch instead of letting it
-    default to main. Best-effort: returns None on detached HEAD, non-git
-    checkouts, or any git failure, in which case callers fall back to the
-    bare ``hermes update`` default.
-    """
-    try:
-        result = subprocess.run(
-            ["git", "rev-parse", "--abbrev-ref", "HEAD"],
-            cwd=str(PROJECT_ROOT),
-            capture_output=True,
-            text=True,
-            timeout=10,
-        )
-    except Exception:
-        return None
-    if result.returncode != 0:
-        return None
-    branch = result.stdout.strip()
-    if not branch or branch == "HEAD":  # empty or detached
-        return None
-    return branch
-
-
-def _spawn_hermes_action(subcommand: List[str], name: str) -> subprocess.Popen:
-    """Spawn ``hermes <subcommand>`` detached and record the Popen handle.
-
-    Uses the running interpreter's ``hermes_cli.main`` module so the action
-    inherits the same venv/PYTHONPATH the web server is using.
-    """
-    with _ACTION_SPAWN_LOCK:
-        existing = _ACTION_PROCS.get(name)
-        if existing is not None and existing.poll() is None:
-            # Desktop can fire duplicate restart/update requests from retries.
-            # Reuse the active process instead of spawning overlapping actions.
-            return existing
-
-        log_file_name = _ACTION_LOG_FILES[name]
-        _ACTION_LOG_DIR.mkdir(parents=True, exist_ok=True)
-        log_path = _ACTION_LOG_DIR / log_file_name
-        log_file = open(log_path, "ab", buffering=0)
-        log_file.write(
-            f"\n=== {name} started {time.strftime('%Y-%m-%d %H:%M:%S')} ===\n".encode()
-        )
-
-        cmd = [sys.executable, "-m", "hermes_cli.main", *subcommand]
-
-        popen_kwargs: Dict[str, Any] = {
-            "cwd": str(PROJECT_ROOT),
-            "stdin": subprocess.DEVNULL,
-            "stdout": log_file,
-            "stderr": subprocess.STDOUT,
-            "env": {**os.environ, "HERMES_NONINTERACTIVE": "1"},
-        }
-        if sys.platform == "win32":
-            popen_kwargs["creationflags"] = (
-                subprocess.CREATE_NEW_PROCESS_GROUP  # type: ignore[attr-defined]
-                | getattr(subprocess, "DETACHED_PROCESS", 0)
-            )
-        else:
-            popen_kwargs["start_new_session"] = True
-
-        try:
-            proc = subprocess.Popen(cmd, **popen_kwargs)
-        except (OSError, ValueError) as exc:
-            # Record the failure in the action log so the dashboard's status
-            # endpoint surfaces something useful, then close the file handle
-            # before re-raising so we don't leak it.
-            try:
-                log_file.write(
-                    f"=== {name} spawn failed: {exc} ===\n".encode()
-                )
-            except Exception:  # pragma: no cover - defensive logging
-                pass
-            log_file.close()
-            raise
-
-        _ACTION_PROCS[name] = proc
-        return proc
-
-
-def _tail_lines(path: Path, n: int) -> List[str]:
-    """Return the last ``n`` lines of ``path``.  Reads the whole file — fine
-    for our small per-action logs.  Binary-decoded with ``errors='replace'``
-    so log corruption doesn't 500 the endpoint."""
-    if not path.exists():
-        return []
-    try:
-        text = path.read_text(encoding="utf-8", errors="replace")
-    except OSError:
-        return []
-    lines = text.splitlines()
-    return lines[-n:] if n > 0 else lines
-
-
-@app.post("/api/gateway/restart")
-async def restart_gateway():
-    """Kick off a ``hermes gateway restart`` in the background."""
-    try:
-        proc = _spawn_hermes_action(["gateway", "restart"], "gateway-restart")
-    except Exception as exc:
-        _log.exception("Failed to spawn gateway restart")
-        raise HTTPException(status_code=500, detail=f"Failed to restart gateway: {exc}")
-    return {
-        "ok": True,
-        "pid": proc.pid,
-        "name": "gateway-restart",
-    }
-
-
-@app.post("/api/hermes/update")
-async def update_hermes():
-    """Kick off ``hermes update`` in the background.
-
-    Pin the update to the branch the checkout is CURRENTLY on. Without this,
-    ``hermes update`` falls back to its built-in default (main) and switches
-    the working tree off the tracked branch — e.g. a bb/gui install silently
-    jumps to main and loses the desktop CLI. Pass --branch <current> so the
-    update stays on-branch. Detached HEAD / detection failure falls back to
-    bare ``hermes update`` (the prior behavior).
-    """
-    args: List[str] = ["update"]
-    branch = _current_git_branch()
-    if branch:
-        args += ["--branch", branch]
-    try:
-        proc = _spawn_hermes_action(args, "hermes-update")
-    except Exception as exc:
-        _log.exception("Failed to spawn hermes update")
-        raise HTTPException(status_code=500, detail=f"Failed to start update: {exc}")
-    return {
-        "ok": True,
-        "pid": proc.pid,
-        "name": "hermes-update",
-    }
-
-
 @app.get("/api/actions/{name}/status")
 async def get_action_status(name: str, lines: int = 200):
     """Tail an action log and report whether the process is still running."""
@@ -1227,33 +954,20 @@ async def get_action_status(name: str, lines: int = 200):
 
 
 @app.get("/api/sessions")
-async def get_sessions(limit: int = 20, offset: int = 0, min_messages: int = 0):
+async def get_sessions(limit: int = 20, offset: int = 0):
     try:
         from hermes_state import SessionDB
-
         db = SessionDB()
         try:
-            min_message_count = max(0, min_messages)
-            sessions = db.list_sessions_rich(
-                limit=limit, offset=offset, min_message_count=min_message_count
-            )
-            total = db.session_count(min_message_count=min_message_count)
+            sessions = db.list_sessions_rich(limit=limit, offset=offset)
+            total = db.session_count()
             now = time.time()
             for s in sessions:
-                # Return only persisted per-session cwd from SessionDB.
-                # Falling back to process-level terminal.cwd causes historical
-                # sessions to "teleport" between workspaces as config changes.
-                s["cwd"] = s.get("cwd") or None
                 s["is_active"] = (
                     s.get("ended_at") is None
                     and (now - s.get("last_active", s.get("started_at", 0))) < 300
                 )
-            return {
-                "sessions": sessions,
-                "total": total,
-                "limit": limit,
-                "offset": offset,
-            }
+            return {"sessions": sessions, "total": total, "limit": limit, "offset": offset}
         finally:
             db.close()
     except Exception:
@@ -1268,14 +982,12 @@ async def search_sessions(q: str = "", limit: int = 20):
         return {"results": []}
     try:
         from hermes_state import SessionDB
-
         db = SessionDB()
         try:
             # Auto-add prefix wildcards so partial words match
             # e.g. "nimb" → "nimb*" matches "nimby"
             # Preserve quoted phrases and existing wildcards as-is
             import re
-
             terms = []
             for token in re.findall(r'"[^"]*"|\S+', q.strip()):
                 if token.startswith('"') or token.endswith("*"):
@@ -1386,7 +1098,6 @@ def get_model_info():
         # purely auto-detected value, then separately report the override)
         try:
             from agent.model_metadata import get_model_context_length
-
             auto_ctx = get_model_context_length(
                 model=model_name,
                 base_url=base_url,
@@ -1407,7 +1118,6 @@ def get_model_info():
         caps = {}
         try:
             from agent.models_dev import get_model_capabilities
-
             mc = get_model_capabilities(provider=provider, model=model_name)
             if mc is not None:
                 caps = {
@@ -1496,17 +1206,13 @@ def get_auxiliary_models():
 
         tasks = []
         for slot in _AUX_TASK_SLOTS:
-            slot_cfg = (
-                aux_cfg.get(slot, {}) if isinstance(aux_cfg.get(slot), dict) else {}
-            )
-            tasks.append(
-                {
-                    "task": slot,
-                    "provider": str(slot_cfg.get("provider", "auto") or "auto"),
-                    "model": str(slot_cfg.get("model", "") or ""),
-                    "base_url": str(slot_cfg.get("base_url", "") or ""),
-                }
-            )
+            slot_cfg = aux_cfg.get(slot, {}) if isinstance(aux_cfg.get(slot), dict) else {}
+            tasks.append({
+                "task": slot,
+                "provider": str(slot_cfg.get("provider", "auto") or "auto"),
+                "model": str(slot_cfg.get("model", "") or ""),
+                "base_url": str(slot_cfg.get("base_url", "") or ""),
+            })
 
         model_cfg = cfg.get("model", {})
         if isinstance(model_cfg, dict):
@@ -1537,18 +1243,14 @@ async def set_model_assignment(body: ModelAssignment):
     task = (body.task or "").strip().lower()
 
     if scope not in {"main", "auxiliary"}:
-        raise HTTPException(
-            status_code=400, detail="scope must be 'main' or 'auxiliary'"
-        )
+        raise HTTPException(status_code=400, detail="scope must be 'main' or 'auxiliary'")
 
     try:
         cfg = load_config()
 
         if scope == "main":
             if not provider or not model:
-                raise HTTPException(
-                    status_code=400, detail="provider and model required for main"
-                )
+                raise HTTPException(status_code=400, detail="provider and model required for main")
             model_cfg = cfg.get("model", {})
             if not isinstance(model_cfg, dict):
                 model_cfg = {}
@@ -1584,16 +1286,12 @@ async def set_model_assignment(body: ModelAssignment):
             return {"ok": True, "scope": "auxiliary", "reset": True}
 
         if not provider:
-            raise HTTPException(
-                status_code=400, detail="provider required for auxiliary"
-            )
+            raise HTTPException(status_code=400, detail="provider required for auxiliary")
 
         targets = [task] if task else list(_AUX_TASK_SLOTS)
         for slot in targets:
             if slot not in _AUX_TASK_SLOTS:
-                raise HTTPException(
-                    status_code=400, detail=f"unknown auxiliary task: {slot}"
-                )
+                raise HTTPException(status_code=400, detail=f"unknown auxiliary task: {slot}")
             slot_cfg = aux.get(slot)
             if not isinstance(slot_cfg, dict):
                 slot_cfg = {}
@@ -1617,6 +1315,8 @@ async def set_model_assignment(body: ModelAssignment):
         raise HTTPException(status_code=500, detail="Failed to save model assignment")
 
 
+
+
 def _denormalize_config_from_web(config: Dict[str, Any]) -> Dict[str, Any]:
     """Reverse _normalize_config_for_web before saving.
 
@@ -1745,9 +1445,7 @@ async def reveal_env_var(body: EnvVarReveal, request: Request):
     cutoff = now - _REVEAL_WINDOW_SECONDS
     _reveal_timestamps[:] = [t for t in _reveal_timestamps if t > cutoff]
     if len(_reveal_timestamps) >= _REVEAL_MAX_PER_WINDOW:
-        raise HTTPException(
-            status_code=429, detail="Too many reveal requests. Try again shortly."
-        )
+        raise HTTPException(status_code=429, detail="Too many reveal requests. Try again shortly.")
     _reveal_timestamps.append(now)
 
     # --- Reveal ---
@@ -1760,8 +1458,6 @@ async def reveal_env_var(body: EnvVarReveal, request: Request):
     return {"key": body.key, "value": value}
 
 
-# Curated UI metadata for messaging platforms. Keyed by the gateway's
-# platform id (Platform.value for built-ins, PlatformEntry.name for plugins).
 # Entries omit fields they don't need to override; the catalog builder fills
 # in env_vars from OPTIONAL_ENV_VARS via prefix matching when not specified,
 # and pulls required_env from a plugin's PlatformEntry when available.
@@ -2534,7 +2230,6 @@ def _claude_code_only_status() -> Dict[str, Any]:
     """
     try:
         from agent.anthropic_adapter import read_claude_code_credentials
-
         creds = read_claude_code_credentials()
     except Exception:
         creds = None
@@ -2623,7 +2318,6 @@ def _resolve_provider_status(provider_id: str, status_fn) -> Dict[str, Any]:
             return {"logged_in": False, "error": str(e)}
     try:
         from hermes_cli import auth as hauth
-
         if provider_id == "nous":
             raw = hauth.get_nous_auth_status()
             return {
@@ -2691,16 +2385,14 @@ async def list_oauth_providers():
     providers = []
     for p in _OAUTH_PROVIDER_CATALOG:
         status = _resolve_provider_status(p["id"], p.get("status_fn"))
-        providers.append(
-            {
-                "id": p["id"],
-                "name": p["name"],
-                "flow": p["flow"],
-                "cli_command": p["cli_command"],
-                "docs_url": p["docs_url"],
-                "status": status,
-            }
-        )
+        providers.append({
+            "id": p["id"],
+            "name": p["name"],
+            "flow": p["flow"],
+            "cli_command": p["cli_command"],
+            "docs_url": p["docs_url"],
+            "status": status,
+        })
     return {"providers": providers}
 
 
@@ -2714,7 +2406,7 @@ async def disconnect_oauth_provider(provider_id: str, request: Request):
         raise HTTPException(
             status_code=400,
             detail=f"Unknown provider: {provider_id}. "
-            f"Available: {', '.join(sorted(valid_ids))}",
+                   f"Available: {', '.join(sorted(valid_ids))}",
         )
 
     # Anthropic and claude-code clear the same Hermes-managed PKCE file
@@ -2724,7 +2416,6 @@ async def disconnect_oauth_provider(provider_id: str, request: Request):
     if provider_id in {"anthropic", "claude-code"}:
         try:
             from agent.anthropic_adapter import _HERMES_OAUTH_FILE
-
             if _HERMES_OAUTH_FILE.exists():
                 _HERMES_OAUTH_FILE.unlink()
         except Exception:
@@ -2732,7 +2423,6 @@ async def disconnect_oauth_provider(provider_id: str, request: Request):
         # Also clear the credential pool entry if present.
         try:
             from hermes_cli.auth import clear_provider_auth
-
             clear_provider_auth("anthropic")
         except Exception:
             pass
@@ -2741,7 +2431,6 @@ async def disconnect_oauth_provider(provider_id: str, request: Request):
 
     try:
         from hermes_cli.auth import clear_provider_auth
-
         cleared = clear_provider_auth(provider_id)
         _log.info("oauth/disconnect: %s (cleared=%s)", provider_id, cleared)
         return {"ok": bool(cleared), "provider": provider_id}
@@ -2801,7 +2490,6 @@ try:
         _OAUTH_SCOPES as _ANTHROPIC_OAUTH_SCOPES,
         _generate_pkce as _generate_pkce_pair,
     )
-
     _ANTHROPIC_OAUTH_AVAILABLE = True
 except ImportError:
     _ANTHROPIC_OAUTH_AVAILABLE = False
@@ -2812,9 +2500,7 @@ def _gc_oauth_sessions() -> None:
     """Drop expired sessions. Called opportunistically on /start."""
     cutoff = time.time() - _OAUTH_SESSION_TTL_SECONDS
     with _oauth_sessions_lock:
-        stale = [
-            sid for sid, sess in _oauth_sessions.items() if sess["created_at"] < cutoff
-        ]
+        stale = [sid for sid, sess in _oauth_sessions.items() if sess["created_at"] < cutoff]
         for sid in stale:
             _oauth_sessions.pop(sid, None)
 
@@ -2835,16 +2521,13 @@ def _new_oauth_session(provider_id: str, flow: str) -> tuple[str, Dict[str, Any]
     return sid, sess
 
 
-def _save_anthropic_oauth_creds(
-    access_token: str, refresh_token: str, expires_at_ms: int
-) -> None:
+def _save_anthropic_oauth_creds(access_token: str, refresh_token: str, expires_at_ms: int) -> None:
     """Persist Anthropic PKCE creds to both Hermes file AND credential pool.
 
     Mirrors what auth_commands.add_command does so the dashboard flow leaves
     the system in the same state as ``hermes auth add anthropic``.
     """
     from agent.anthropic_adapter import _HERMES_OAUTH_FILE
-
     payload = {
         "accessToken": access_token,
         "refreshToken": refresh_token,
@@ -2881,14 +2564,9 @@ def _save_anthropic_oauth_creds(
             SOURCE_MANUAL,
         )
         import uuid
-
         pool = load_pool("anthropic")
         # Avoid duplicate entries: delete any prior dashboard-issued OAuth entry
-        existing = [
-            e
-            for e in pool.entries()
-            if getattr(e, "source", "").startswith(f"{SOURCE_MANUAL}:dashboard_pkce")
-        ]
+        existing = [e for e in pool.entries() if getattr(e, "source", "").startswith(f"{SOURCE_MANUAL}:dashboard_pkce")]
         for e in existing:
             try:
                 pool.remove_entry(getattr(e, "id", ""))
@@ -2913,9 +2591,7 @@ def _save_anthropic_oauth_creds(
 def _start_anthropic_pkce() -> Dict[str, Any]:
     """Begin PKCE flow. Returns the auth URL the UI should open."""
     if not _ANTHROPIC_OAUTH_AVAILABLE:
-        raise HTTPException(
-            status_code=501, detail="Anthropic OAuth not available (missing adapter)"
-        )
+        raise HTTPException(status_code=501, detail="Anthropic OAuth not available (missing adapter)")
     verifier, challenge = _generate_pkce_pair()
     sid, sess = _new_oauth_session("anthropic", "pkce")
     sess["verifier"] = verifier
@@ -2946,11 +2622,7 @@ def _submit_anthropic_pkce(session_id: str, code_input: str) -> Dict[str, Any]:
     if not sess or sess["provider"] != "anthropic" or sess["flow"] != "pkce":
         raise HTTPException(status_code=404, detail="Unknown or expired session")
     if sess["status"] != "pending":
-        return {
-            "ok": False,
-            "status": sess["status"],
-            "message": sess.get("error_message"),
-        }
+        return {"ok": False, "status": sess["status"], "message": sess.get("error_message")}
 
     # Anthropic's redirect callback page formats the code as `<code>#<state>`.
     # Strip the state suffix if present (we already have the verifier server-side).
@@ -2960,16 +2632,14 @@ def _submit_anthropic_pkce(session_id: str, code_input: str) -> Dict[str, Any]:
         return {"ok": False, "status": "error", "message": "No code provided"}
     state_from_callback = parts[1] if len(parts) > 1 else ""
 
-    exchange_data = json.dumps(
-        {
-            "grant_type": "authorization_code",
-            "client_id": _ANTHROPIC_OAUTH_CLIENT_ID,
-            "code": code,
-            "state": state_from_callback or sess["state"],
-            "redirect_uri": _ANTHROPIC_OAUTH_REDIRECT_URI,
-            "code_verifier": sess["verifier"],
-        }
-    ).encode()
+    exchange_data = json.dumps({
+        "grant_type": "authorization_code",
+        "client_id": _ANTHROPIC_OAUTH_CLIENT_ID,
+        "code": code,
+        "state": state_from_callback or sess["state"],
+        "redirect_uri": _ANTHROPIC_OAUTH_REDIRECT_URI,
+        "code_verifier": sess["verifier"],
+    }).encode()
     req = urllib.request.Request(
         _ANTHROPIC_OAUTH_TOKEN_URL,
         data=exchange_data,
@@ -3020,12 +2690,10 @@ async def _start_device_code_flow(provider_id: str) -> Dict[str, Any]:
     """
     if provider_id == "nous":
         from hermes_cli.auth import (
-            _nous_device_scope_with_env_override,
-            _request_nous_device_code_with_scope_fallback,
+            _request_device_code,
             PROVIDER_REGISTRY,
         )
         import httpx
-
         pconfig = PROVIDER_REGISTRY["nous"]
         portal_base_url = (
             os.getenv("HERMES_PORTAL_BASE_URL")
@@ -3033,22 +2701,21 @@ async def _start_device_code_flow(provider_id: str) -> Dict[str, Any]:
             or pconfig.portal_base_url
         ).rstrip("/")
         client_id = pconfig.client_id
-        scope, explicit_scope = _nous_device_scope_with_env_override(
-            None,
-            default_scope=pconfig.scope,
-        )
+        scope = pconfig.scope
 
         def _do_nous_device_request():
             with httpx.Client(
                 timeout=httpx.Timeout(15.0),
                 headers={"Accept": "application/json"},
             ) as client:
-                return _request_nous_device_code_with_scope_fallback(
-                    client=client,
-                    portal_base_url=portal_base_url,
-                    client_id=client_id,
-                    scope=scope,
-                    allow_legacy_fallback=not explicit_scope,
+                return (
+                    _request_device_code(
+                        client=client,
+                        portal_base_url=portal_base_url,
+                        client_id=client_id,
+                        scope=scope,
+                    ),
+                    scope,
                 )
 
         device_data, effective_scope = await asyncio.get_running_loop().run_in_executor(
@@ -3082,9 +2749,7 @@ async def _start_device_code_flow(provider_id: str) -> Dict[str, Any]:
         # verification_url back via the session dict. The helper prints
         # to stdout — we capture nothing here, just status.
         threading.Thread(
-            target=_codex_full_login_worker,
-            args=(sid,),
-            daemon=True,
+            target=_codex_full_login_worker, args=(sid,), daemon=True,
             name=f"oauth-codex-{sid[:6]}",
         ).start()
         # Block briefly until the worker has populated the user_code, OR error.
@@ -3098,14 +2763,9 @@ async def _start_device_code_flow(provider_id: str) -> Dict[str, Any]:
         with _oauth_sessions_lock:
             s = _oauth_sessions.get(sid, {})
         if s.get("status") == "error":
-            raise HTTPException(
-                status_code=500, detail=s.get("error_message") or "device-auth failed"
-            )
+            raise HTTPException(status_code=500, detail=s.get("error_message") or "device-auth failed")
         if not s.get("user_code"):
-            raise HTTPException(
-                status_code=504,
-                detail="device-auth timed out before returning a user code",
-            )
+            raise HTTPException(status_code=504, detail="device-auth timed out before returning a user code")
         return {
             "session_id": sid,
             "flow": "device_code",
@@ -3129,12 +2789,10 @@ async def _start_device_code_flow(provider_id: str) -> Dict[str, Any]:
             MINIMAX_OAUTH_GLOBAL_BASE,
         )
         import httpx
-
         verifier, challenge, state = _minimax_pkce_pair()
         portal_base_url = (
             os.getenv("MINIMAX_PORTAL_BASE_URL") or MINIMAX_OAUTH_GLOBAL_BASE
         ).rstrip("/")
-
         def _do_minimax_request():
             with httpx.Client(
                 timeout=httpx.Timeout(15.0),
@@ -3148,7 +2806,6 @@ async def _start_device_code_flow(provider_id: str) -> Dict[str, Any]:
                     code_challenge=challenge,
                     state=state,
                 )
-
         device_data = await asyncio.get_event_loop().run_in_executor(
             None, _do_minimax_request
         )
@@ -3157,7 +2814,9 @@ async def _start_device_code_flow(provider_id: str) -> Dict[str, Any]:
         # `interval` field is in milliseconds (defensive default 2000ms
         # in _minimax_poll_token).
         interval_raw = device_data.get("interval")
-        sess["interval_ms"] = int(interval_raw) if interval_raw is not None else None
+        sess["interval_ms"] = (
+            int(interval_raw) if interval_raw is not None else None
+        )
         sess["user_code"] = str(device_data["user_code"])
         sess["code_verifier"] = verifier
         sess["state"] = state
@@ -3192,22 +2851,17 @@ async def _start_device_code_flow(provider_id: str) -> Dict[str, Any]:
             "poll_interval": max(2, (sess["interval_ms"] or 2000) // 1000),
         }
 
-    raise HTTPException(
-        status_code=400,
-        detail=f"Provider {provider_id} does not support device-code flow",
-    )
+    raise HTTPException(status_code=400, detail=f"Provider {provider_id} does not support device-code flow")
 
 
 def _nous_poller(session_id: str) -> None:
     """Background poller that drives a Nous device-code flow to completion."""
     from hermes_cli.auth import (
-        NOUS_INFERENCE_AUTH_MODE_FRESH,
         _poll_for_token,
         refresh_nous_oauth_from_state,
     )
     from datetime import datetime, timezone
     import httpx
-
     with _oauth_sessions_lock:
         sess = _oauth_sessions.get(session_id)
     if not sess:
@@ -3219,9 +2873,7 @@ def _nous_poller(session_id: str) -> None:
     scope = sess.get("scope")
     expires_in = max(60, int(sess["expires_at"] - time.time()))
     try:
-        with httpx.Client(
-            timeout=httpx.Timeout(15.0), headers={"Accept": "application/json"}
-        ) as client:
+        with httpx.Client(timeout=httpx.Timeout(15.0), headers={"Accept": "application/json"}) as client:
             token_data = _poll_for_token(
                 client=client,
                 portal_base_url=portal_base_url,
@@ -3230,7 +2882,7 @@ def _nous_poller(session_id: str) -> None:
                 expires_in=expires_in,
                 poll_interval=interval,
             )
-        # Same post-processing as _nous_device_code_login (mint agent key)
+        # Same post-processing as _nous_device_code_login (validate/refresh JWT)
         now = datetime.now(timezone.utc)
         token_ttl = int(token_data.get("expires_in") or 0)
         auth_state = {
@@ -3243,23 +2895,17 @@ def _nous_poller(session_id: str) -> None:
             "refresh_token": token_data.get("refresh_token"),
             "obtained_at": now.isoformat(),
             "expires_at": (
-                datetime.fromtimestamp(
-                    now.timestamp() + token_ttl, tz=timezone.utc
-                ).isoformat()
-                if token_ttl
-                else None
+                datetime.fromtimestamp(now.timestamp() + token_ttl, tz=timezone.utc).isoformat()
+                if token_ttl else None
             ),
             "expires_in": token_ttl,
         }
         full_state = refresh_nous_oauth_from_state(
             auth_state,
-            min_key_ttl_seconds=300,
             timeout_seconds=15.0,
             force_refresh=False,
-            inference_auth_mode=NOUS_INFERENCE_AUTH_MODE_FRESH,
         )
         from hermes_cli.auth import persist_nous_credentials
-
         persist_nous_credentials(full_state)
         with _oauth_sessions_lock:
             sess["status"] = "approved"
@@ -3291,7 +2937,6 @@ def _minimax_poller(session_id: str) -> None:
     )
     from datetime import datetime, timezone
     import httpx
-
     with _oauth_sessions_lock:
         sess = _oauth_sessions.get(session_id)
     if not sess:
@@ -3324,8 +2969,7 @@ def _minimax_poller(session_id: str) -> None:
         # flow which supports `--region cn`.
         now = datetime.now(timezone.utc)
         expires_at_ts = _minimax_resolve_token_expiry_unix(
-            int(token_data["expired_in"]),
-            now=now,
+            int(token_data["expired_in"]), now=now,
         )
         expires_in_s = max(0, int(expires_at_ts - now.timestamp()))
         auth_state = {
@@ -3378,7 +3022,6 @@ def _codex_full_login_worker(session_id: str) -> None:
             CODEX_OAUTH_TOKEN_URL,
             DEFAULT_CODEX_BASE_URL,
         )
-
         issuer = "https://auth.openai.com"
 
         # Step 1: request device code
@@ -3395,9 +3038,7 @@ def _codex_full_login_worker(session_id: str) -> None:
         device_auth_id = device_data.get("device_auth_id", "")
         poll_interval = max(3, int(device_data.get("interval", "5")))
         if not user_code or not device_auth_id:
-            raise RuntimeError(
-                "device-code response missing user_code or device_auth_id"
-            )
+            raise RuntimeError("device-code response missing user_code or device_auth_id")
         verification_url = f"{issuer}/codex/device"
         with _oauth_sessions_lock:
             sess = _oauth_sessions.get(session_id)
@@ -3438,9 +3079,7 @@ def _codex_full_login_worker(session_id: str) -> None:
         authorization_code = code_resp.get("authorization_code", "")
         code_verifier = code_resp.get("code_verifier", "")
         if not authorization_code or not code_verifier:
-            raise RuntimeError(
-                "device-auth response missing authorization_code/code_verifier"
-            )
+            raise RuntimeError("device-auth response missing authorization_code/code_verifier")
         with httpx.Client(timeout=httpx.Timeout(15.0)) as client:
             token_resp = client.post(
                 CODEX_OAUTH_TOKEN_URL,
@@ -3469,7 +3108,6 @@ def _codex_full_login_worker(session_id: str) -> None:
             SOURCE_MANUAL,
         )
         import uuid as _uuid
-
         pool = load_pool("openai-codex")
         base_url = (
             os.getenv("HERMES_CODEX_BASE_URL", "").strip().rstrip("/")
@@ -3543,14 +3181,9 @@ async def submit_oauth_code(provider_id: str, body: OAuthSubmitBody, request: Re
     _require_token(request)
     if provider_id == "anthropic":
         return await asyncio.get_running_loop().run_in_executor(
-            None,
-            _submit_anthropic_pkce,
-            body.session_id,
-            body.code,
+            None, _submit_anthropic_pkce, body.session_id, body.code,
         )
-    raise HTTPException(
-        status_code=400, detail=f"submit not supported for {provider_id}"
-    )
+    raise HTTPException(status_code=400, detail=f"submit not supported for {provider_id}")
 
 
 @app.get("/api/providers/oauth/{provider_id}/poll/{session_id}")
@@ -3586,6 +3219,7 @@ async def cancel_oauth_session(session_id: str, request: Request):
 # ---------------------------------------------------------------------------
 
 
+
 def _session_latest_descendant(session_id: str):
     """Resolve a session id to the newest child leaf session.
 
@@ -3624,13 +3258,11 @@ def _session_latest_descendant(session_id: str):
                 "SELECT id, parent_session_id, started_at FROM sessions"
             ).fetchall()
             for row in raw_rows:
-                rows.append(
-                    {
-                        "id": row_get(row, "id", 0),
-                        "parent_session_id": row_get(row, "parent_session_id", 1),
-                        "started_at": row_get(row, "started_at", 2),
-                    }
-                )
+                rows.append({
+                    "id": row_get(row, "id", 0),
+                    "parent_session_id": row_get(row, "parent_session_id", 1),
+                    "started_at": row_get(row, "started_at", 2),
+                })
         else:
             rows = db.list_sessions_rich(limit=10000, offset=0)
 
@@ -3664,11 +3296,9 @@ def _session_latest_descendant(session_id: str):
     finally:
         db.close()
 
-
 @app.get("/api/sessions/{session_id}")
 async def get_session_detail(session_id: str):
     from hermes_state import SessionDB
-
     db = SessionDB()
     try:
         sid = db.resolve_session_id(session_id)
@@ -3680,6 +3310,7 @@ async def get_session_detail(session_id: str):
         db.close()
 
 
+
 @app.get("/api/sessions/{session_id}/latest-descendant")
 async def get_session_latest_descendant(session_id: str):
     latest, path = _session_latest_descendant(session_id)
@@ -3692,11 +3323,9 @@ async def get_session_latest_descendant(session_id: str):
         "changed": bool(path and latest != path[0]),
     }
 
-
 @app.get("/api/sessions/{session_id}/messages")
 async def get_session_messages(session_id: str):
     from hermes_state import SessionDB
-
     db = SessionDB()
     try:
         sid = db.resolve_session_id(session_id)
@@ -3711,7 +3340,6 @@ async def get_session_messages(session_id: str):
 @app.delete("/api/sessions/{session_id}")
 async def delete_session_endpoint(session_id: str):
     from hermes_state import SessionDB
-
     db = SessionDB()
     try:
         if not db.delete_session(session_id):
@@ -3721,29 +3349,6 @@ async def delete_session_endpoint(session_id: str):
         db.close()
 
 
-class SessionRename(BaseModel):
-    title: str
-
-
-@app.patch("/api/sessions/{session_id}")
-async def rename_session_endpoint(session_id: str, body: SessionRename):
-    from hermes_state import SessionDB
-
-    db = SessionDB()
-    try:
-        sid = db.resolve_session_id(session_id) or session_id
-        try:
-            ok = db.set_session_title(sid, body.title)
-        except ValueError as exc:
-            # Title collision or validation failure (e.g. duplicate title).
-            raise HTTPException(status_code=409, detail=str(exc))
-        if not ok:
-            raise HTTPException(status_code=404, detail="Session not found")
-        return {"ok": True, "title": db.get_session_title(sid) or ""}
-    finally:
-        db.close()
-
-
 # ---------------------------------------------------------------------------
 # Log viewer endpoint
 # ---------------------------------------------------------------------------
@@ -3781,15 +3386,14 @@ async def get_logs(
             raise HTTPException(
                 status_code=400,
                 detail=f"Unknown component: {component}. "
-                f"Available: {', '.join(sorted(COMPONENT_PREFIXES))}",
+                       f"Available: {', '.join(sorted(COMPONENT_PREFIXES))}",
             )
     else:
         comp_prefixes = None
 
     has_filters = bool(min_level or comp_prefixes or search)
     result = _read_tail(
-        log_path,
-        min(lines, 500) if not search else 2000,
+        log_path, min(lines, 500) if not search else 2000,
         has_filters=has_filters,
         min_level=min_level,
         component_prefixes=comp_prefixes,
@@ -3799,7 +3403,7 @@ async def get_logs(
     # trim to the requested line count afterward.
     if search:
         needle = search.lower()
-        result = [l for l in result if needle in l.lower()][-min(lines, 500) :]
+        result = [l for l in result if needle in l.lower()][-min(lines, 500):]
     return {"file": file, "lines": result}
 
 
@@ -4053,44 +3657,32 @@ def _fallback_profile_dicts(profiles_mod) -> List[Dict[str, Any]]:
     profiles: List[Dict[str, Any]] = []
     default_home = profiles_mod._get_default_hermes_home()
     if default_home.is_dir():
-        model, provider = _safe(
-            lambda: profiles_mod._read_config_model(default_home), (None, None)
-        )
-        profiles.append(
-            {
-                "name": "default",
-                "path": str(default_home),
-                "is_default": True,
-                "model": model,
-                "provider": provider,
-                "has_env": (default_home / ".env").exists(),
-                "skill_count": _safe(
-                    lambda: profiles_mod._count_skills(default_home), 0
-                ),
-            }
-        )
+        model, provider = _safe(lambda: profiles_mod._read_config_model(default_home), (None, None))
+        profiles.append({
+            "name": "default",
+            "path": str(default_home),
+            "is_default": True,
+            "model": model,
+            "provider": provider,
+            "has_env": (default_home / ".env").exists(),
+            "skill_count": _safe(lambda: profiles_mod._count_skills(default_home), 0),
+        })
 
     profiles_root = profiles_mod._get_profiles_root()
     if profiles_root.is_dir():
         for entry in sorted(profiles_root.iterdir()):
             if not entry.is_dir() or not profiles_mod._PROFILE_ID_RE.match(entry.name):
                 continue
-            model, provider = _safe(
-                lambda entry=entry: profiles_mod._read_config_model(entry), (None, None)
-            )
-            profiles.append(
-                {
-                    "name": entry.name,
-                    "path": str(entry),
-                    "is_default": False,
-                    "model": model,
-                    "provider": provider,
-                    "has_env": (entry / ".env").exists(),
-                    "skill_count": _safe(
-                        lambda entry=entry: profiles_mod._count_skills(entry), 0
-                    ),
-                }
-            )
+            model, provider = _safe(lambda entry=entry: profiles_mod._read_config_model(entry), (None, None))
+            profiles.append({
+                "name": entry.name,
+                "path": str(entry),
+                "is_default": False,
+                "model": model,
+                "provider": provider,
+                "has_env": (entry / ".env").exists(),
+                "skill_count": _safe(lambda entry=entry: profiles_mod._count_skills(entry), 0),
+            })
 
     return profiles
 
@@ -4098,7 +3690,6 @@ def _fallback_profile_dicts(profiles_mod) -> List[Dict[str, Any]]:
 def _resolve_profile_dir(name: str) -> Path:
     """Validate ``name`` and resolve to its directory or raise an HTTPException."""
     from hermes_cli import profiles as profiles_mod
-
     try:
         profiles_mod.validate_profile_name(name)
     except ValueError as e:
@@ -4117,20 +3708,16 @@ def _profile_setup_command(name: str) -> str:
 @app.get("/api/profiles")
 async def list_profiles_endpoint():
     from hermes_cli import profiles as profiles_mod
-
     try:
         return {"profiles": [_profile_to_dict(p) for p in profiles_mod.list_profiles()]}
     except Exception:
-        _log.exception(
-            "GET /api/profiles failed; falling back to profile directory scan"
-        )
+        _log.exception("GET /api/profiles failed; falling back to profile directory scan")
         return {"profiles": _fallback_profile_dicts(profiles_mod)}
 
 
 @app.post("/api/profiles")
 async def create_profile_endpoint(body: ProfileCreate):
     from hermes_cli import profiles as profiles_mod
-
     try:
         path = profiles_mod.create_profile(
             name=body.name,
@@ -4182,10 +3769,7 @@ async def open_profile_terminal_endpoint(name: str):
             subprocess.Popen(["osascript", "-e", applescript])
         else:
             terminal_commands = [
-                (
-                    "x-terminal-emulator",
-                    ["x-terminal-emulator", "-e", "sh", "-lc", command],
-                ),
+                ("x-terminal-emulator", ["x-terminal-emulator", "-e", "sh", "-lc", command]),
                 ("gnome-terminal", ["gnome-terminal", "--", "sh", "-lc", command]),
                 ("konsole", ["konsole", "-e", "sh", "-lc", command]),
                 ("xfce4-terminal", ["xfce4-terminal", "-e", f"sh -lc '{command}'"]),
@@ -4197,14 +3781,11 @@ async def open_profile_terminal_endpoint(name: str):
                 ("xterm", ["xterm", "-e", "sh", "-lc", command]),
             ]
             for executable, popen_args in terminal_commands:
-                if (
-                    subprocess.call(
-                        ["which", executable],
-                        stdout=subprocess.DEVNULL,
-                        stderr=subprocess.DEVNULL,
-                    )
-                    == 0
-                ):
+                if subprocess.call(
+                    ["which", executable],
+                    stdout=subprocess.DEVNULL,
+                    stderr=subprocess.DEVNULL,
+                ) == 0:
                     subprocess.Popen(popen_args)
                     break
             else:
@@ -4227,7 +3808,6 @@ async def open_profile_terminal_endpoint(name: str):
 @app.patch("/api/profiles/{name}")
 async def rename_profile_endpoint(name: str, body: ProfileRename):
     from hermes_cli import profiles as profiles_mod
-
     try:
         path = profiles_mod.rename_profile(name, body.new_name)
     except FileNotFoundError as e:
@@ -4246,7 +3826,6 @@ async def delete_profile_endpoint(name: str):
     its own dialog before this request, so we always pass ``yes=True`` to
     skip the CLI's interactive prompt."""
     from hermes_cli import profiles as profiles_mod
-
     try:
         path = profiles_mod.delete_profile(name, yes=True)
     except FileNotFoundError as e:
@@ -4295,7 +3874,6 @@ class SkillToggle(BaseModel):
 async def get_skills():
     from tools.skills_tool import _find_all_skills
     from hermes_cli.skills_config import get_disabled_skills
-
     config = load_config()
     disabled = get_disabled_skills(config)
     skills = _find_all_skills(skip_disabled=True)
@@ -4307,7 +3885,6 @@ async def get_skills():
 @app.put("/api/skills/toggle")
 async def toggle_skill(body: SkillToggle):
     from hermes_cli.skills_config import get_disabled_skills, save_disabled_skills
-
     config = load_config()
     disabled = get_disabled_skills(config)
     if body.enabled:
@@ -4340,17 +3917,13 @@ async def get_toolsets():
         except Exception:
             tools = []
         is_enabled = name in enabled_toolsets
-        result.append(
-            {
-                "name": name,
-                "label": label,
-                "description": desc,
-                "enabled": is_enabled,
-                "available": is_enabled,
-                "configured": _toolset_has_keys(name, config),
-                "tools": tools,
-            }
-        )
+        result.append({
+            "name": name, "label": label, "description": desc,
+            "enabled": is_enabled,
+            "available": is_enabled,
+            "configured": _toolset_has_keys(name, config),
+            "tools": tools,
+        })
     return result
 
 
@@ -4396,8 +3969,7 @@ async def get_usage_analytics(days: int = 30):
     db = SessionDB()
     try:
         cutoff = time.time() - (days * 86400)
-        cur = db._conn.execute(
-            """
+        cur = db._conn.execute("""
             SELECT date(started_at, 'unixepoch') as day,
                    SUM(input_tokens) as input_tokens,
                    SUM(output_tokens) as output_tokens,
@@ -4409,13 +3981,10 @@ async def get_usage_analytics(days: int = 30):
                    SUM(COALESCE(api_call_count, 0)) as api_calls
             FROM sessions WHERE started_at > ?
             GROUP BY day ORDER BY day
-        """,
-            (cutoff,),
-        )
+        """, (cutoff,))
         daily = [dict(r) for r in cur.fetchall()]
 
-        cur2 = db._conn.execute(
-            """
+        cur2 = db._conn.execute("""
             SELECT model,
                    SUM(input_tokens) as input_tokens,
                    SUM(output_tokens) as output_tokens,
@@ -4424,13 +3993,10 @@ async def get_usage_analytics(days: int = 30):
                    SUM(COALESCE(api_call_count, 0)) as api_calls
             FROM sessions WHERE started_at > ? AND model IS NOT NULL
             GROUP BY model ORDER BY SUM(input_tokens) + SUM(output_tokens) DESC
-        """,
-            (cutoff,),
-        )
+        """, (cutoff,))
         by_model = [dict(r) for r in cur2.fetchall()]
 
-        cur3 = db._conn.execute(
-            """
+        cur3 = db._conn.execute("""
             SELECT SUM(input_tokens) as total_input,
                    SUM(output_tokens) as total_output,
                    SUM(cache_read_tokens) as total_cache_read,
@@ -4440,23 +4006,18 @@ async def get_usage_analytics(days: int = 30):
                    COUNT(*) as total_sessions,
                    SUM(COALESCE(api_call_count, 0)) as total_api_calls
             FROM sessions WHERE started_at > ?
-        """,
-            (cutoff,),
-        )
+        """, (cutoff,))
         totals = dict(cur3.fetchone())
         insights_report = InsightsEngine(db).generate(days=days)
-        skills = insights_report.get(
-            "skills",
-            {
-                "summary": {
-                    "total_skill_loads": 0,
-                    "total_skill_edits": 0,
-                    "total_skill_actions": 0,
-                    "distinct_skills_used": 0,
-                },
-                "top_skills": [],
+        skills = insights_report.get("skills", {
+            "summary": {
+                "total_skill_loads": 0,
+                "total_skill_edits": 0,
+                "total_skill_actions": 0,
+                "distinct_skills_used": 0,
             },
-        )
+            "top_skills": [],
+        })
 
         return {
             "daily": daily,
@@ -4482,8 +4043,7 @@ async def get_models_analytics(days: int = 30):
     try:
         cutoff = time.time() - (days * 86400)
 
-        cur = db._conn.execute(
-            """
+        cur = db._conn.execute("""
             SELECT model,
                    billing_provider,
                    SUM(input_tokens) as input_tokens,
@@ -4500,9 +4060,7 @@ async def get_models_analytics(days: int = 30):
             FROM sessions WHERE started_at > ? AND model IS NOT NULL AND model != ''
             GROUP BY model, billing_provider
             ORDER BY SUM(input_tokens) + SUM(output_tokens) DESC
-        """,
-            (cutoff,),
-        )
+        """, (cutoff,))
         rows = [dict(r) for r in cur.fetchall()]
 
         models = []
@@ -4512,7 +4070,6 @@ async def get_models_analytics(days: int = 30):
             caps = {}
             try:
                 from agent.models_dev import get_model_capabilities
-
                 mc = get_model_capabilities(provider=provider, model=model_name)
                 if mc is not None:
                     caps = {
@@ -4526,27 +4083,24 @@ async def get_models_analytics(days: int = 30):
             except Exception:
                 pass
 
-            models.append(
-                {
-                    "model": model_name,
-                    "provider": provider,
-                    "input_tokens": row["input_tokens"],
-                    "output_tokens": row["output_tokens"],
-                    "cache_read_tokens": row["cache_read_tokens"],
-                    "reasoning_tokens": row["reasoning_tokens"],
-                    "estimated_cost": row["estimated_cost"],
-                    "actual_cost": row["actual_cost"],
-                    "sessions": row["sessions"],
-                    "api_calls": row["api_calls"],
-                    "tool_calls": row["tool_calls"],
-                    "last_used_at": row["last_used_at"],
-                    "avg_tokens_per_session": row["avg_tokens_per_session"],
-                    "capabilities": caps,
-                }
-            )
+            models.append({
+                "model": model_name,
+                "provider": provider,
+                "input_tokens": row["input_tokens"],
+                "output_tokens": row["output_tokens"],
+                "cache_read_tokens": row["cache_read_tokens"],
+                "reasoning_tokens": row["reasoning_tokens"],
+                "estimated_cost": row["estimated_cost"],
+                "actual_cost": row["actual_cost"],
+                "sessions": row["sessions"],
+                "api_calls": row["api_calls"],
+                "tool_calls": row["tool_calls"],
+                "last_used_at": row["last_used_at"],
+                "avg_tokens_per_session": row["avg_tokens_per_session"],
+                "capabilities": caps,
+            })
 
-        totals_cur = db._conn.execute(
-            """
+        totals_cur = db._conn.execute("""
             SELECT COUNT(DISTINCT model) as distinct_models,
                    SUM(input_tokens) as total_input,
                    SUM(output_tokens) as total_output,
@@ -4557,9 +4111,7 @@ async def get_models_analytics(days: int = 30):
                    COUNT(*) as total_sessions,
                    SUM(COALESCE(api_call_count, 0)) as total_api_calls
             FROM sessions WHERE started_at > ? AND model IS NOT NULL AND model != ''
-        """,
-            (cutoff,),
-        )
+        """, (cutoff,))
         totals = dict(totals_cur.fetchone())
 
         return {
@@ -4577,7 +4129,7 @@ async def get_models_analytics(days: int = 30):
 # The endpoint spawns the same ``hermes --tui`` binary the CLI uses, behind
 # a POSIX pseudo-terminal, and forwards bytes + resize escapes across a
 # WebSocket.  The browser renders the ANSI through xterm.js (see
-# apps/dashboard/src/pages/ChatPage.tsx).
+# web/src/pages/ChatPage.tsx).
 #
 # Auth: ``?token=<session_token>`` query param (browsers can't set
 # Authorization on the WS upgrade).  Same ephemeral ``_SESSION_TOKEN`` as
@@ -4586,7 +4138,6 @@ async def get_models_analytics(days: int = 30):
 # ---------------------------------------------------------------------------
 
 import re
-import asyncio
 
 # PTY bridge is POSIX-only (depends on fcntl/termios/ptyprocess).  On native
 # Windows the import raises; catch and leave PtyBridge=None so the rest of
@@ -4594,7 +4145,6 @@ import asyncio
 # /api/pty endpoint cleanly refuses with a WSL-suggested message.
 try:
     from hermes_cli.pty_bridge import PtyBridge, PtyUnavailableError
-
     _PTY_BRIDGE_AVAILABLE = True
 except ImportError as _pty_import_err:  # pragma: no cover - Windows-only path
     PtyBridge = None  # type: ignore[assignment]
@@ -4602,10 +4152,8 @@ except ImportError as _pty_import_err:  # pragma: no cover - Windows-only path
 
     class PtyUnavailableError(RuntimeError):  # type: ignore[no-redef]
         """Stub on platforms where pty_bridge can't be imported."""
-
         pass
 
-
 _RESIZE_RE = re.compile(rb"\x1b\[RESIZE:(\d+);(\d+)\]")
 _PTY_READ_CHUNK_TIMEOUT = 0.2
 _VALID_CHANNEL_RE = re.compile(r"^[A-Za-z0-9._-]{1,128}$")
@@ -4637,15 +4185,6 @@ def _ws_client_is_allowed(ws: "WebSocket") -> bool:
     return client_host in _LOOPBACK_HOSTS
 
 
-def _ws_client_label(ws: "WebSocket") -> str:
-    """Best-effort peer label for websocket diagnostics."""
-    if ws.client is None:
-        return "unknown"
-    host = ws.client.host or "unknown"
-    port = ws.client.port
-    return f"{host}:{port}" if port is not None else host
-
-
 def _ws_host_origin_is_allowed(ws: "WebSocket") -> bool:
     """Apply the dashboard Host/Origin guard to WebSocket upgrades.
 
@@ -4654,14 +4193,6 @@ def _ws_host_origin_is_allowed(ws: "WebSocket") -> bool:
     repeated here before accepting the upgrade.  Browsers also send an Origin
     header on WebSocket handshakes; when present, require it to target the
     same bound dashboard host.
-
-    Special case: when bound to a loopback interface (127.0.0.1 / ::1 /
-    localhost), accept ``Origin: file://`` from desktop wrappers (Electron,
-    similar) that load the renderer from disk. DNS-rebinding requires a
-    DNS-resolved hostname; ``file://`` carries no host and therefore cannot
-    be the rebinding vector this guard exists to block. Non-loopback binds
-    keep rejecting ``file://`` — the operator chose to expose the dashboard
-    to the network, so the looser policy doesn't apply.
     """
     bound_host = getattr(app.state, "bound_host", None)
     if not bound_host:
@@ -4669,10 +4200,6 @@ def _ws_host_origin_is_allowed(ws: "WebSocket") -> bool:
 
     host_header = ws.headers.get("host", "")
     if not _is_accepted_host(host_header, bound_host):
-        _log.warning(
-            "ws-guard reject reason=bad_host host_header=%r bound_host=%r",
-            host_header, bound_host,
-        )
         return False
 
     origin = ws.headers.get("origin", "")
@@ -4680,28 +4207,10 @@ def _ws_host_origin_is_allowed(ws: "WebSocket") -> bool:
         return True
 
     parsed = urllib.parse.urlparse(origin)
-
-    # Loopback-bind + file:// carve-out: see docstring.
-    if (
-        parsed.scheme == "file"
-        and bound_host.lower() in _LOOPBACK_HOST_VALUES
-    ):
-        return True
-
     if parsed.scheme not in {"http", "https"} or not parsed.netloc:
-        _log.warning(
-            "ws-guard reject reason=bad_origin_scheme origin=%r bound_host=%r",
-            origin, bound_host,
-        )
         return False
 
-    if not _is_accepted_host(parsed.netloc, bound_host):
-        _log.warning(
-            "ws-guard reject reason=origin_host_mismatch origin=%r netloc=%r bound_host=%r",
-            origin, parsed.netloc, bound_host,
-        )
-        return False
-    return True
+    return _is_accepted_host(parsed.netloc, bound_host)
 
 
 def _ws_request_is_allowed(ws: "WebSocket") -> bool:
@@ -4752,7 +4261,6 @@ def _ws_auth_ok(ws: "WebSocket") -> bool:
     token = ws.query_params.get("token", "")
     return hmac.compare_digest(token.encode(), _SESSION_TOKEN.encode())
 
-
 # Per-channel subscriber registry used by /api/pub (PTY-side gateway → dashboard)
 # and /api/events (dashboard → browser sidebar).  Keyed by an opaque channel id
 # the chat tab generates on mount; entries auto-evict when the last subscriber
@@ -4875,10 +4383,10 @@ async def _broadcast_event(channel: str, payload: str) -> None:
     for sub in subs:
         try:
             await sub.send_text(payload)
-        except Exception as exc:
+        except Exception:
             # Subscriber went away mid-send; the /api/events finally clause
             # will remove it from the registry on its next iteration.
-            _log.debug("event broadcast drop channel=%s error=%s", channel, exc)
+            _log.warning("broadcast send failed for subscriber on %s", channel, exc_info=True)
 
 
 def _channel_or_close_code(ws: WebSocket) -> Optional[str]:
@@ -4890,9 +4398,7 @@ def _channel_or_close_code(ws: WebSocket) -> Optional[str]:
 
 @app.websocket("/api/pty")
 async def pty_ws(ws: WebSocket) -> None:
-    peer = _ws_client_label(ws)
     if not _DASHBOARD_EMBEDDED_CHAT_ENABLED:
-        _log.debug("pty-ws reject peer=%s reason=embedded_chat_disabled", peer)
         await ws.close(code=4403)
         return
 
@@ -4902,7 +4408,6 @@ async def pty_ws(ws: WebSocket) -> None:
         return
 
     if not _ws_request_is_allowed(ws):
-        _log.warning("pty-ws reject peer=%s reason=non_loopback_or_bad_origin", peer)
         await ws.close(code=4403)
         return
 
@@ -4911,7 +4416,6 @@ async def pty_ws(ws: WebSocket) -> None:
     # On native Windows, the POSIX PTY bridge can't be imported.  Tell the
     # client and close cleanly rather than pretending the feature works.
     if not _PTY_BRIDGE_AVAILABLE:
-        _log.warning("pty-ws unavailable peer=%s reason=pty_bridge_missing", peer)
         await ws.send_text(
             "\r\n\x1b[31mChat unavailable: the embedded terminal requires a "
             "POSIX PTY, which native Windows Python doesn't provide.\x1b[0m\r\n"
@@ -4925,68 +4429,43 @@ async def pty_ws(ws: WebSocket) -> None:
     resume = ws.query_params.get("resume") or None
     channel = _channel_or_close_code(ws)
     sidecar_url = _build_sidecar_url(channel) if channel else None
-    _log.info(
-        "pty-ws connect peer=%s resume=%s channel=%s",
-        peer,
-        bool(resume),
-        channel or "-",
-    )
 
     try:
         argv, cwd, env = _resolve_chat_argv(resume=resume, sidecar_url=sidecar_url)
     except SystemExit as exc:
         # _make_tui_argv calls sys.exit(1) when node/npm is missing.
-        _log.warning("pty-ws argv resolution failed peer=%s error=%s", peer, exc)
         await ws.send_text(f"\r\n\x1b[31mChat unavailable: {exc}\x1b[0m\r\n")
         await ws.close(code=1011)
         return
 
-    _log.debug(
-        "pty-ws spawn peer=%s argv0=%s cwd=%s sidecar=%s",
-        peer,
-        argv[0] if argv else "",
-        cwd,
-        bool(sidecar_url),
-    )
 
     try:
         bridge = PtyBridge.spawn(argv, cwd=cwd, env=env)
     except PtyUnavailableError as exc:
-        _log.warning("pty-ws spawn unavailable peer=%s error=%s", peer, exc)
         await ws.send_text(f"\r\n\x1b[31mChat unavailable: {exc}\x1b[0m\r\n")
         await ws.close(code=1011)
         return
     except (FileNotFoundError, OSError) as exc:
-        _log.exception("pty-ws spawn failed peer=%s", peer)
         await ws.send_text(f"\r\n\x1b[31mChat failed to start: {exc}\x1b[0m\r\n")
         await ws.close(code=1011)
         return
 
-    _log.info("pty-ws spawned peer=%s pid=%s", peer, bridge.pid)
     loop = asyncio.get_running_loop()
-    bytes_to_client = 0
-    bytes_to_pty = 0
-    resize_events = 0
-    disconnect_reason = "closed"
 
     # --- reader task: PTY master → WebSocket ----------------------------
     async def pump_pty_to_ws() -> None:
-        nonlocal bytes_to_client, disconnect_reason
         while True:
             chunk = await loop.run_in_executor(
                 None, bridge.read, _PTY_READ_CHUNK_TIMEOUT
             )
             if chunk is None:  # EOF
-                disconnect_reason = "pty_eof"
                 return
             if not chunk:  # no data this tick; yield control and retry
                 await asyncio.sleep(0)
                 continue
-            bytes_to_client += len(chunk)
             try:
                 await ws.send_bytes(chunk)
             except Exception:
-                disconnect_reason = "ws_send_failed"
                 return
 
     reader_task = asyncio.create_task(pump_pty_to_ws())
@@ -4997,7 +4476,6 @@ async def pty_ws(ws: WebSocket) -> None:
             msg = await ws.receive()
             msg_type = msg.get("type")
             if msg_type == "websocket.disconnect":
-                disconnect_reason = "ws_disconnect"
                 break
             raw = msg.get("bytes")
             if raw is None:
@@ -5012,13 +4490,11 @@ async def pty_ws(ws: WebSocket) -> None:
                 cols = int(match.group(1))
                 rows = int(match.group(2))
                 bridge.resize(cols=cols, rows=rows)
-                resize_events += 1
                 continue
 
             bridge.write(raw)
-            bytes_to_pty += len(raw)
     except WebSocketDisconnect:
-        disconnect_reason = "ws_disconnect"
+        pass
     finally:
         reader_task.cancel()
         try:
@@ -5026,15 +4502,6 @@ async def pty_ws(ws: WebSocket) -> None:
         except (asyncio.CancelledError, Exception):
             pass
         bridge.close()
-        _log.info(
-            "pty-ws closed peer=%s pid=%s reason=%s bytes_out=%d bytes_in=%d resizes=%d",
-            peer,
-            bridge.pid,
-            disconnect_reason,
-            bytes_to_client,
-            bytes_to_pty,
-            resize_events,
-        )
 
 
 # ---------------------------------------------------------------------------
@@ -5050,12 +4517,7 @@ async def pty_ws(ws: WebSocket) -> None:
 
 @app.websocket("/api/ws")
 async def gateway_ws(ws: WebSocket) -> None:
-    peer = _ws_client_label(ws)
     if not _DASHBOARD_EMBEDDED_CHAT_ENABLED:
-        _log.warning(
-            "gateway-ws reject peer=%s reason=embedded_chat_disabled close_code=4403",
-            peer,
-        )
         await ws.close(code=4403)
         return
 
@@ -5064,32 +4526,12 @@ async def gateway_ws(ws: WebSocket) -> None:
         return
 
     if not _ws_request_is_allowed(ws):
-        _log.warning(
-            "gateway-ws reject peer=%s reason=non_loopback_or_bad_origin "
-            "bound_host=%s close_code=4403",
-            peer,
-            getattr(app.state, "bound_host", ""),
-        )
         await ws.close(code=4403)
         return
 
     from tui_gateway.ws import handle_ws
 
-    _log.info("gateway-ws connect peer=%s", peer)
-    try:
-        await handle_ws(ws)
-    except WebSocketDisconnect as exc:
-        _log.info(
-            "gateway-ws disconnect peer=%s code=%s reason=%s",
-            peer,
-            getattr(exc, "code", None),
-            getattr(exc, "reason", None),
-        )
-    except Exception:
-        _log.exception("gateway-ws error peer=%s", peer)
-        raise
-    else:
-        _log.info("gateway-ws closed peer=%s", peer)
+    await handle_ws(ws)
 
 
 # ---------------------------------------------------------------------------
@@ -5106,9 +4548,7 @@ async def gateway_ws(ws: WebSocket) -> None:
 
 @app.websocket("/api/pub")
 async def pub_ws(ws: WebSocket) -> None:
-    peer = _ws_client_label(ws)
     if not _DASHBOARD_EMBEDDED_CHAT_ENABLED:
-        _log.debug("pub-ws reject peer=%s reason=embedded_chat_disabled", peer)
         await ws.close(code=4403)
         return
 
@@ -5117,36 +4557,26 @@ async def pub_ws(ws: WebSocket) -> None:
         return
 
     if not _ws_request_is_allowed(ws):
-        _log.warning("pub-ws reject peer=%s reason=non_loopback_or_bad_origin", peer)
         await ws.close(code=4403)
         return
 
     channel = _channel_or_close_code(ws)
     if not channel:
-        _log.warning("pub-ws reject peer=%s reason=invalid_channel", peer)
         await ws.close(code=4400)
         return
 
     await ws.accept()
-    _log.info("pub-ws connect peer=%s channel=%s", peer, channel)
-    messages = 0
 
     try:
         while True:
-            payload = await ws.receive_text()
-            messages += 1
-            await _broadcast_event(channel, payload)
+            await _broadcast_event(channel, await ws.receive_text())
     except WebSocketDisconnect:
-        _log.info(
-            "pub-ws disconnect peer=%s channel=%s messages=%d", peer, channel, messages
-        )
+        pass
 
 
 @app.websocket("/api/events")
 async def events_ws(ws: WebSocket) -> None:
-    peer = _ws_client_label(ws)
     if not _DASHBOARD_EMBEDDED_CHAT_ENABLED:
-        _log.debug("events-ws reject peer=%s reason=embedded_chat_disabled", peer)
         await ws.close(code=4403)
         return
 
@@ -5155,18 +4585,15 @@ async def events_ws(ws: WebSocket) -> None:
         return
 
     if not _ws_request_is_allowed(ws):
-        _log.warning("events-ws reject peer=%s reason=non_loopback_or_bad_origin", peer)
         await ws.close(code=4403)
         return
 
     channel = _channel_or_close_code(ws)
     if not channel:
-        _log.warning("events-ws reject peer=%s reason=invalid_channel", peer)
         await ws.close(code=4400)
         return
 
     await ws.accept()
-    _log.info("events-ws connect peer=%s channel=%s", peer, channel)
 
     async with _event_lock:
         _event_channels.setdefault(channel, set()).add(ws)
@@ -5178,7 +4605,7 @@ async def events_ws(ws: WebSocket) -> None:
             # browser holds it.
             await ws.receive_text()
     except WebSocketDisconnect:
-        _log.info("events-ws disconnect peer=%s channel=%s", peer, channel)
+        pass
     finally:
         async with _event_lock:
             subs = _event_channels.get(channel)
@@ -5217,16 +4644,12 @@ def mount_spa(application: FastAPI):
     without rebuilding the bundle.
     """
     if not WEB_DIST.exists():
-
         @application.get("/{full_path:path}")
         async def no_frontend(full_path: str):
             return JSONResponse(
-                {
-                    "error": "Frontend not built. Run: cd apps/dashboard && npm run build"
-                },
+                {"error": "Frontend not built. Run: cd web && npm run build"},
                 status_code=404,
             )
-
         return
 
     _index_path = WEB_DIST / "index.html"
@@ -5297,13 +4720,11 @@ def mount_spa(application: FastAPI):
         if prefix:
             for asset_dir in ("/fonts/", "/fonts-terminal/", "/ds-assets/", "/assets/"):
                 css = css.replace(f"url({asset_dir}", f"url({prefix}{asset_dir}")
-                css = css.replace(f'url("{asset_dir}', f'url("{prefix}{asset_dir}')
+                css = css.replace(f"url(\"{asset_dir}", f"url(\"{prefix}{asset_dir}")
                 css = css.replace(f"url('{asset_dir}", f"url('{prefix}{asset_dir}")
         return Response(content=css, media_type="text/css")
 
-    application.mount(
-        "/assets", StaticFiles(directory=WEB_DIST / "assets"), name="assets"
-    )
+    application.mount("/assets", StaticFiles(directory=WEB_DIST / "assets"), name="assets")
 
     @application.get("/{full_path:path}")
     async def serve_spa(full_path: str, request: Request):
@@ -5325,49 +4746,19 @@ def mount_spa(application: FastAPI):
 # ---------------------------------------------------------------------------
 
 # Built-in dashboard themes — label + description only.  The actual color
-# definitions live in the frontend (apps/dashboard/src/themes/presets.ts).
+# definitions live in the frontend (web/src/themes/presets.ts).
 _BUILTIN_DASHBOARD_THEMES = [
-    {
-        "name": "default",
-        "label": "Hermes Teal",
-        "description": "Classic dark teal — the canonical Hermes look",
-    },
-    {
-        "name": "default-large",
-        "label": "Hermes Teal (Large)",
-        "description": "Hermes Teal with bigger fonts and roomier spacing",
-    },
-    {
-        "name": "midnight",
-        "label": "Midnight",
-        "description": "Deep blue-violet with cool accents",
-    },
-    {
-        "name": "ember",
-        "label": "Ember",
-        "description": "Warm crimson and bronze — forge vibes",
-    },
-    {
-        "name": "mono",
-        "label": "Mono",
-        "description": "Clean grayscale — minimal and focused",
-    },
-    {
-        "name": "cyberpunk",
-        "label": "Cyberpunk",
-        "description": "Neon green on black — matrix terminal",
-    },
-    {
-        "name": "rose",
-        "label": "Rosé",
-        "description": "Soft pink and warm ivory — easy on the eyes",
-    },
+    {"name": "default",       "label": "Hermes Teal",         "description": "Classic dark teal — the canonical Hermes look"},
+    {"name": "default-large", "label": "Hermes Teal (Large)", "description": "Hermes Teal with bigger fonts and roomier spacing"},
+    {"name": "midnight",      "label": "Midnight",            "description": "Deep blue-violet with cool accents"},
+    {"name": "ember",     "label": "Ember",          "description": "Warm crimson and bronze — forge vibes"},
+    {"name": "mono",      "label": "Mono",           "description": "Clean grayscale — minimal and focused"},
+    {"name": "cyberpunk", "label": "Cyberpunk",      "description": "Neon green on black — matrix terminal"},
+    {"name": "rose",      "label": "Rosé",           "description": "Soft pink and warm ivory — easy on the eyes"},
 ]
 
 
-def _parse_theme_layer(
-    value: Any, default_hex: str, default_alpha: float = 1.0
-) -> Optional[Dict[str, Any]]:
+def _parse_theme_layer(value: Any, default_hex: str, default_alpha: float = 1.0) -> Optional[Dict[str, Any]]:
     """Normalise a theme layer spec from YAML into `{hex, alpha}` form.
 
     Accepts shorthand (a bare hex string) or full dict form.  Returns
@@ -5405,25 +4796,11 @@ _THEME_DEFAULT_LAYOUT: Dict[str, str] = {
 }
 
 _THEME_OVERRIDE_KEYS = {
-    "card",
-    "cardForeground",
-    "popover",
-    "popoverForeground",
-    "primary",
-    "primaryForeground",
-    "secondary",
-    "secondaryForeground",
-    "muted",
-    "mutedForeground",
-    "accent",
-    "accentForeground",
-    "destructive",
-    "destructiveForeground",
-    "success",
-    "warning",
-    "border",
-    "input",
-    "ring",
+    "card", "cardForeground", "popover", "popoverForeground",
+    "primary", "primaryForeground", "secondary", "secondaryForeground",
+    "muted", "mutedForeground", "accent", "accentForeground",
+    "destructive", "destructiveForeground", "success", "warning",
+    "border", "input", "ring",
 }
 
 # Well-known named asset slots themes can populate.  Any other keys under
@@ -5438,15 +4815,8 @@ _THEME_NAMED_ASSET_KEYS = {"bg", "hero", "logo", "crest", "sidebar", "header"}
 # can restyle chrome (clip-path, border-image, segmented progress, etc.)
 # without shipping their own CSS.
 _THEME_COMPONENT_BUCKETS = {
-    "card",
-    "header",
-    "footer",
-    "sidebar",
-    "tab",
-    "progress",
-    "badge",
-    "backdrop",
-    "page",
+    "card", "header", "footer", "sidebar", "tab",
+    "progress", "badge", "backdrop", "page",
 }
 
 _THEME_LAYOUT_VARIANTS = {"standard", "cockpit", "tiled"}
@@ -5471,30 +4841,20 @@ def _normalise_theme_definition(data: Dict[str, Any]) -> Optional[Dict[str, Any]
         return None
 
     # Palette
-    palette_src = (
-        data.get("palette", {}) if isinstance(data.get("palette"), dict) else {}
-    )
+    palette_src = data.get("palette", {}) if isinstance(data.get("palette"), dict) else {}
     # Allow top-level `colors.background` as a shorthand too.
     colors_src = data.get("colors", {}) if isinstance(data.get("colors"), dict) else {}
 
-    def _layer(
-        key: str, default_hex: str, default_alpha: float = 1.0
-    ) -> Dict[str, Any]:
+    def _layer(key: str, default_hex: str, default_alpha: float = 1.0) -> Dict[str, Any]:
         spec = palette_src.get(key, colors_src.get(key))
         parsed = _parse_theme_layer(spec, default_hex, default_alpha)
-        return (
-            parsed
-            if parsed is not None
-            else {"hex": default_hex, "alpha": default_alpha}
-        )
+        return parsed if parsed is not None else {"hex": default_hex, "alpha": default_alpha}
 
     palette = {
         "background": _layer("background", "#041c1c", 1.0),
         "midground": _layer("midground", "#ffe6cb", 1.0),
         "foreground": _layer("foreground", "#ffffff", 0.0),
-        "warmGlow": palette_src.get("warmGlow")
-        or data.get("warmGlow")
-        or "rgba(255, 189, 56, 0.35)",
+        "warmGlow": palette_src.get("warmGlow") or data.get("warmGlow") or "rgba(255, 189, 56, 0.35)",
         "noiseOpacity": 1.0,
     }
     raw_noise = palette_src.get("noiseOpacity", data.get("noiseOpacity"))
@@ -5504,19 +4864,9 @@ def _normalise_theme_definition(data: Dict[str, Any]) -> Optional[Dict[str, Any]
         palette["noiseOpacity"] = 1.0
 
     # Typography
-    typo_src = (
-        data.get("typography", {}) if isinstance(data.get("typography"), dict) else {}
-    )
+    typo_src = data.get("typography", {}) if isinstance(data.get("typography"), dict) else {}
     typography = dict(_THEME_DEFAULT_TYPOGRAPHY)
-    for key in (
-        "fontSans",
-        "fontMono",
-        "fontDisplay",
-        "fontUrl",
-        "baseSize",
-        "lineHeight",
-        "letterSpacing",
-    ):
+    for key in ("fontSans", "fontMono", "fontDisplay", "fontUrl", "baseSize", "lineHeight", "letterSpacing"):
         val = typo_src.get(key)
         if isinstance(val, str) and val.strip():
             typography[key] = val
@@ -5598,8 +4948,7 @@ def _normalise_theme_definition(data: Dict[str, Any]) -> Optional[Dict[str, Any]
     layout_variant_src = data.get("layoutVariant")
     layout_variant = (
         layout_variant_src
-        if isinstance(layout_variant_src, str)
-        and layout_variant_src in _THEME_LAYOUT_VARIANTS
+        if isinstance(layout_variant_src, str) and layout_variant_src in _THEME_LAYOUT_VARIANTS
         else "standard"
     )
 
@@ -5650,7 +4999,7 @@ async def get_dashboard_themes():
     """Return available themes and the currently active one.
 
     Built-in entries ship name/label/description only (the frontend owns
-    their full definitions in `apps/dashboard/src/themes/presets.ts`).  User themes
+    their full definitions in `web/src/themes/presets.ts`).  User themes
     from `~/.hermes/dashboard-themes/*.yaml` ship with their full
     normalised definition under `definition`, so the client can apply
     them without a stub.
@@ -5666,14 +5015,12 @@ async def get_dashboard_themes():
     for t in user_themes:
         if t["name"] in seen:
             continue
-        themes.append(
-            {
-                "name": t["name"],
-                "label": t["label"],
-                "description": t["description"],
-                "definition": t,
-            }
-        )
+        themes.append({
+            "name": t["name"],
+            "label": t["label"],
+            "description": t["description"],
+            "definition": t,
+        })
         seen.add(t["name"])
     return {"themes": themes, "active": active}
 
@@ -5734,7 +5081,6 @@ def _safe_plugin_api_relpath(api_field: Any, *, dashboard_dir: Path) -> Optional
     return api_field
 
 
-
 def _discover_dashboard_plugins() -> list:
     """Scan plugins/*/dashboard/manifest.json for dashboard extensions.
 
@@ -5747,7 +5093,6 @@ def _discover_dashboard_plugins() -> list:
     seen_names: set = set()
 
     from hermes_cli.plugins import get_bundled_plugins_dir
-
     bundled_root = get_bundled_plugins_dir()
     search_dirs = [
         (get_hermes_home() / "plugins", "user"),
@@ -5785,9 +5130,7 @@ def _discover_dashboard_plugins() -> list:
                 # ``override`` to replace a built-in route, and ``hidden`` to
                 # register the plugin component/slots without adding a tab
                 # (useful for slot-only plugins like a header-crest injector).
-                raw_tab = (
-                    data.get("tab", {}) if isinstance(data.get("tab"), dict) else {}
-                )
+                raw_tab = data.get("tab", {}) if isinstance(data.get("tab"), dict) else {}
                 tab_info = {
                     "path": raw_tab.get("path", f"/{name}"),
                     "position": raw_tab.get("position", "end"),
@@ -5909,9 +5252,7 @@ def _merged_plugins_hub() -> Dict[str, Any]:
 
     # Read user-hidden plugins from config for the user_hidden field.
     config = load_config()
-    hidden_plugins: list = (
-        cfg_get(config, "dashboard", "hidden_plugins", default=[]) or []
-    )
+    hidden_plugins: list = cfg_get(config, "dashboard", "hidden_plugins", default=[]) or []
 
     plugins_root_resolved = (get_hermes_home() / "plugins").resolve()
     rows: List[Dict[str, Any]] = []
@@ -5926,9 +5267,7 @@ def _merged_plugins_hub() -> Dict[str, Any]:
 
         dir_path = Path(dir_str)
         dm = dash_by_name.get(name)
-        has_dash_manifest = (
-            dm is not None or (dir_path / "dashboard" / "manifest.json").exists()
-        )
+        has_dash_manifest = dm is not None or (dir_path / "dashboard" / "manifest.json").exists()
 
         under_user_tree = False
         try:
@@ -5949,7 +5288,6 @@ def _merged_plugins_hub() -> Dict[str, Any]:
         if provides_tools:
             try:
                 from tools.registry import registry
-
                 for tname in provides_tools:
                     entry = registry.get_entry(tname)
                     if entry and entry.check_fn and not entry.check_fn():
@@ -5959,24 +5297,21 @@ def _merged_plugins_hub() -> Dict[str, Any]:
             except Exception:
                 pass
 
-        rows.append(
-            {
-                "name": name,
-                "version": version or "",
-                "description": description or "",
-                "source": source,
-                "runtime_status": runtime_status,
-                "has_dashboard_manifest": has_dash_manifest,
-                "dashboard_manifest": _strip_dashboard_manifest(dm) if dm else None,
-                "path": dir_str,
-                "can_remove": can_remove_update,
-                "can_update_git": can_remove_update
-                and (Path(dir_str) / ".git").exists(),
-                "auth_required": auth_required,
-                "auth_command": auth_command,
-                "user_hidden": name in hidden_plugins,
-            }
-        )
+        rows.append({
+            "name": name,
+            "version": version or "",
+            "description": description or "",
+            "source": source,
+            "runtime_status": runtime_status,
+            "has_dashboard_manifest": has_dash_manifest,
+            "dashboard_manifest": _strip_dashboard_manifest(dm) if dm else None,
+            "path": dir_str,
+            "can_remove": can_remove_update,
+            "can_update_git": can_remove_update and (Path(dir_str) / ".git").exists(),
+            "auth_required": auth_required,
+            "auth_command": auth_command,
+            "user_hidden": name in hidden_plugins,
+        })
 
     agent_names = {r["name"] for r in rows}
     orphan_dashboard = [
@@ -6019,9 +5354,7 @@ async def get_plugins_hub(request: Request):
         return _merged_plugins_hub()
     except Exception as exc:
         _log.warning("plugins/hub failed: %s", exc)
-        raise HTTPException(
-            status_code=500, detail="Failed to build plugins hub."
-        ) from exc
+        raise HTTPException(status_code=500, detail="Failed to build plugins hub.") from exc
 
 
 @app.post("/api/dashboard/agent-plugins/install")
@@ -6061,9 +5394,7 @@ async def post_agent_plugin_enable(request: Request, name: str):
 
     result = dashboard_set_agent_plugin_enabled(name, enabled=True)
     if not result.get("ok"):
-        raise HTTPException(
-            status_code=400, detail=result.get("error") or "Enable failed."
-        )
+        raise HTTPException(status_code=400, detail=result.get("error") or "Enable failed.")
     return result
 
 
@@ -6075,9 +5406,7 @@ async def post_agent_plugin_disable(request: Request, name: str):
 
     result = dashboard_set_agent_plugin_enabled(name, enabled=False)
     if not result.get("ok"):
-        raise HTTPException(
-            status_code=400, detail=result.get("error") or "Disable failed."
-        )
+        raise HTTPException(status_code=400, detail=result.get("error") or "Disable failed.")
     return result
 
 
@@ -6089,9 +5418,7 @@ async def post_agent_plugin_update(request: Request, name: str):
 
     result = dashboard_update_user_plugin(name)
     if not result.get("ok"):
-        raise HTTPException(
-            status_code=400, detail=result.get("error") or "Update failed."
-        )
+        raise HTTPException(status_code=400, detail=result.get("error") or "Update failed.")
     _get_dashboard_plugins(force_rescan=True)
     return result
 
@@ -6104,9 +5431,7 @@ async def delete_agent_plugin(request: Request, name: str):
 
     result = dashboard_remove_user_plugin(name)
     if not result.get("ok"):
-        raise HTTPException(
-            status_code=400, detail=result.get("error") or "Remove failed."
-        )
+        raise HTTPException(status_code=400, detail=result.get("error") or "Remove failed.")
     _get_dashboard_plugins(force_rescan=True)
     return result
 
@@ -6271,11 +5596,7 @@ def _mount_plugin_api_routes():
             )
             continue
         if not api_path.exists():
-            _log.warning(
-                "Plugin %s declares api=%s but file not found",
-                plugin["name"],
-                api_file_name,
-            )
+            _log.warning("Plugin %s declares api=%s but file not found", plugin["name"], api_file_name)
             continue
         try:
             module_name = f"hermes_dashboard_plugin_{plugin['name']}"
@@ -6297,9 +5618,7 @@ def _mount_plugin_api_routes():
                 raise
             router = getattr(mod, "router", None)
             if router is None:
-                _log.warning(
-                    "Plugin %s api file has no 'router' attribute", plugin["name"]
-                )
+                _log.warning("Plugin %s api file has no 'router' attribute", plugin["name"])
                 continue
             app.include_router(router, prefix=f"/api/plugins/{plugin['name']}")
             _log.info("Mounted plugin API routes: /api/plugins/%s/", plugin["name"])
@@ -6329,14 +5648,6 @@ def start_server(
     embedded_chat: bool = False,
 ):
     """Start the web UI server."""
-    try:
-        from hermes_logging import setup_logging as _setup_logging
-
-        log_dir = _setup_logging(mode="gui")
-        _log.info("GUI logging enabled: %s", log_dir / "gui.log")
-    except Exception:
-        pass
-
     import uvicorn
 
     global _DASHBOARD_EMBEDDED_CHAT_ENABLED
@@ -6403,8 +5714,7 @@ def start_server(
         # --insecure path — no auth, loud warning.
         _log.warning(
             "Binding to %s with --insecure — the dashboard has no robust "
-            "authentication. Only use on trusted networks.",
-            host,
+            "authentication. Only use on trusted networks.", host,
         )
 
     # Record the bound host so host_header_middleware can validate incoming
@@ -6431,7 +5741,6 @@ def start_server(
         )
 
         if _has_display:
-
             def _open():
                 try:
                     time.sleep(1.0)
@@ -6446,13 +5755,6 @@ def start_server(
                 "(headless Linux). Pass --no-open to suppress this detection."
             )
 
-    _log.info(
-        "dashboard starting host=%s port=%s embedded_chat=%s open_browser=%s",
-        host,
-        port,
-        embedded_chat,
-        open_browser,
-    )
     print(f"  Hermes Web UI → http://{host}:{port}")
     # proxy_headers defaults to False so _ws_client_is_allowed sees the real
     # connection peer rather than X-Forwarded-For's rewritten value (which
diff --git a/hermes_logging.py b/hermes_logging.py
index 2d5c6562218..a3656c8c139 100644
--- a/hermes_logging.py
+++ b/hermes_logging.py
@@ -320,19 +320,39 @@ def setup_verbose_logging() -> None:
 # ---------------------------------------------------------------------------
 
 class _ManagedRotatingFileHandler(RotatingFileHandler):
-    """RotatingFileHandler that ensures group-writable perms in managed mode.
+    """RotatingFileHandler that ensures group-writable perms in managed mode
+    AND survives external rotation.
 
-    In managed mode (NixOS), the stateDir uses setgid (2770) so new files
-    inherit the hermes group. However, both _open() (initial creation) and
-    doRollover() create files via open(), which uses the process umask —
-    typically 0022, producing 0644. This subclass applies chmod 0660 after
-    both operations so the gateway and interactive users can share log files.
+    Two responsibilities:
+
+    1.  In managed mode (NixOS), the stateDir uses setgid (2770) so new files
+        inherit the hermes group. However, both ``_open()`` (initial creation)
+        and ``doRollover()`` create files via ``open()``, which uses the
+        process umask — typically 0022, producing 0644. This subclass applies
+        ``chmod 0660`` after both operations so the gateway and interactive
+        users can share log files.
+
+    2.  ``RotatingFileHandler`` keeps an open file descriptor.  If anything
+        rotates the file *externally* (``logrotate``, manual ``mv``,
+        another process rotating under us, a transient unlink), our fd
+        keeps pointing at the renamed/unlinked inode and every subsequent
+        write goes to ``gateway.log.1`` instead of ``gateway.log`` — silent
+        log loss for the file every operator expects to read.  Before each
+        emit we ``stat`` ``baseFilename`` and compare it against the open
+        stream's inode; on mismatch we reopen.  This is the same pattern
+        as stdlib ``WatchedFileHandler.reopenIfNeeded()``, adapted for
+        rotating handlers.
     """
 
     def __init__(self, *args, **kwargs):
         from hermes_cli.config import is_managed
         self._managed = is_managed()
         super().__init__(*args, **kwargs)
+        # Snapshot the inode of the currently open stream so emit() can
+        # detect external rotation without an extra fstat per write.
+        self._stat_dev: Optional[int] = None
+        self._stat_ino: Optional[int] = None
+        self._record_stream_stat()
 
     def _chmod_if_managed(self):
         if self._managed:
@@ -341,6 +361,70 @@ class _ManagedRotatingFileHandler(RotatingFileHandler):
             except OSError:
                 pass
 
+    def _record_stream_stat(self) -> None:
+        """Snapshot dev/ino of ``baseFilename`` so we can detect external rotation."""
+        try:
+            st = os.stat(self.baseFilename)
+            self._stat_dev, self._stat_ino = st.st_dev, st.st_ino
+        except OSError:
+            self._stat_dev, self._stat_ino = None, None
+
+    def _reopen_if_externally_rotated(self) -> None:
+        """Reopen the stream when ``baseFilename`` no longer matches our fd.
+
+        Triggered when ``baseFilename`` was renamed (logrotate), unlinked,
+        or replaced by a different inode.  Silent + best-effort: any error
+        falls back to the existing (possibly stale) stream so logging keeps
+        working instead of dying on a stat failure.
+        """
+        try:
+            st = os.stat(self.baseFilename)
+        except FileNotFoundError:
+            # File was rotated/unlinked underneath us.  Close + reopen so a
+            # fresh inode is created at the expected path.
+            try:
+                if self.stream is not None:
+                    self.stream.close()
+            except Exception:
+                pass
+            self.stream = None  # type: ignore[assignment]
+            try:
+                self.stream = self._open()
+                self._record_stream_stat()
+            except Exception:
+                # Couldn't reopen — leave stream=None; next emit will
+                # bail rather than write to a stale inode.
+                pass
+            return
+        except OSError:
+            return  # transient — try again on the next emit
+
+        if self._stat_dev is None or self._stat_ino is None:
+            self._stat_dev, self._stat_ino = st.st_dev, st.st_ino
+            return
+
+        if (st.st_dev, st.st_ino) != (self._stat_dev, self._stat_ino):
+            # baseFilename now points at a DIFFERENT inode than the one we
+            # hold open.  Close the old stream and open the new file.
+            try:
+                if self.stream is not None:
+                    self.stream.close()
+            except Exception:
+                pass
+            self.stream = None  # type: ignore[assignment]
+            try:
+                self.stream = self._open()
+                self._stat_dev, self._stat_ino = st.st_dev, st.st_ino
+            except Exception:
+                pass
+
+    def emit(self, record: logging.LogRecord) -> None:
+        # Cheap-ish stat-per-record check; the kernel caches inode metadata
+        # so the syscall is sub-microsecond on a hot file.
+        if self.stream is not None or os.path.exists(self.baseFilename):
+            self._reopen_if_externally_rotated()
+        super().emit(record)
+
     def _open(self):
         stream = super()._open()
         self._chmod_if_managed()
@@ -349,6 +433,9 @@ class _ManagedRotatingFileHandler(RotatingFileHandler):
     def doRollover(self):
         super().doRollover()
         self._chmod_if_managed()
+        # Our own rollover writes a new baseFilename; refresh the snapshot
+        # so the next emit doesn't mistake it for external rotation.
+        self._record_stream_stat()
 
 
 def _add_rotating_handler(
diff --git a/hermes_state.py b/hermes_state.py
index 9954597032a..f132c171654 100644
--- a/hermes_state.py
+++ b/hermes_state.py
@@ -33,7 +33,7 @@ T = TypeVar("T")
 
 DEFAULT_DB_PATH = get_hermes_home() / "state.db"
 
-SCHEMA_VERSION = 13
+SCHEMA_VERSION = 14
 
 # ---------------------------------------------------------------------------
 # WAL-compatibility fallback
@@ -282,10 +282,18 @@ CREATE TABLE IF NOT EXISTS state_meta (
     value TEXT
 );
 
+CREATE TABLE IF NOT EXISTS compression_locks (
+    session_id TEXT PRIMARY KEY,
+    holder TEXT NOT NULL,
+    acquired_at REAL NOT NULL,
+    expires_at REAL NOT NULL
+);
+
 CREATE INDEX IF NOT EXISTS idx_sessions_source ON sessions(source);
 CREATE INDEX IF NOT EXISTS idx_sessions_parent ON sessions(parent_session_id);
 CREATE INDEX IF NOT EXISTS idx_sessions_started ON sessions(started_at DESC);
 CREATE INDEX IF NOT EXISTS idx_messages_session ON messages(session_id, timestamp);
+CREATE INDEX IF NOT EXISTS idx_compression_locks_expires ON compression_locks(expires_at);
 """
 
 FTS_SQL = """
@@ -803,6 +811,132 @@ class SessionDB:
             conn.execute("UPDATE sessions SET cwd = ? WHERE id = ?", (cwd, session_id))
 
         self._execute_write(_do)
+    # ──────────────────────────────────────────────────────────────────────
+    # Compression locks
+    # ──────────────────────────────────────────────────────────────────────
+    # Atomic per-session locks that prevent two compression paths from
+    # racing on the same session_id and producing orphan child sessions.
+    #
+    # The race: ``conversation_compression.py`` rotates ``agent.session_id``
+    # as a side effect of a successful compression (end old session, create
+    # new). That mutation is local to the AIAgent instance — but ``state.db``
+    # is shared across all instances. Two AIAgents that share the same
+    # ``session_id`` at the moment they both decide to compress (most
+    # commonly the parent turn's agent + a background-review fork started
+    # right after the turn ended) each end the parent and create their own
+    # NEW session, parented to the same old id. The gateway SessionEntry
+    # only catches one rotation; the other child silently accumulates
+    # writes — Damien's "parent → two orphan children" repro shape.
+    #
+    # The lock is keyed by ``session_id`` and is held for the duration of
+    # the compress() call plus the rotation. ``holder`` identifies the
+    # current owner (pid:tid:nonce) for diagnostics; the lock is recovered
+    # via ``expires_at`` if the holder process crashed without releasing.
+    def try_acquire_compression_lock(
+        self,
+        session_id: str,
+        holder: str,
+        ttl_seconds: float = 300.0,
+    ) -> bool:
+        """Try to atomically acquire the compression lock for ``session_id``.
+
+        Returns ``True`` on success (caller now owns the lock and must
+        release via :meth:`release_compression_lock`).  Returns ``False``
+        if another holder already owns a non-expired lock — the caller
+        MUST NOT proceed with compression in that case (its rotation would
+        race against the holder's, splitting the session lineage).
+
+        Expired locks (``expires_at < now``) are reclaimed transparently:
+        the stale row is deleted and the new holder acquires it. This
+        prevents a crashed compressor from permanently blocking the
+        session.
+
+        Implementation: single-transaction DELETE-expired + INSERT-or-IGNORE,
+        followed by a SELECT to confirm we got the row. SQLite serialises
+        writes, so the whole sequence is atomic against other writers.
+        """
+        if not session_id:
+            return False
+        now = time.time()
+        expires_at = now + ttl_seconds
+
+        def _do(conn):
+            # First: reclaim any expired lock for this session_id.
+            conn.execute(
+                "DELETE FROM compression_locks "
+                "WHERE session_id = ? AND expires_at < ?",
+                (session_id, now),
+            )
+            # Then: try to insert. INSERT OR IGNORE returns no rowcount
+            # difference — verify ownership via SELECT.
+            conn.execute(
+                "INSERT OR IGNORE INTO compression_locks "
+                "(session_id, holder, acquired_at, expires_at) "
+                "VALUES (?, ?, ?, ?)",
+                (session_id, holder, now, expires_at),
+            )
+            row = conn.execute(
+                "SELECT holder FROM compression_locks WHERE session_id = ?",
+                (session_id,),
+            ).fetchone()
+            return row is not None and (
+                row["holder"] if isinstance(row, sqlite3.Row) else row[0]
+            ) == holder
+
+        try:
+            return bool(self._execute_write(_do))
+        except sqlite3.Error as exc:
+            logger.warning(
+                "try_acquire_compression_lock(%s) failed: %s",
+                session_id, exc,
+            )
+            # Fail open: returning False makes the caller skip compression,
+            # which is the safe behaviour when the lock subsystem is broken.
+            return False
+
+    def release_compression_lock(self, session_id: str, holder: str) -> None:
+        """Release the compression lock for ``session_id`` iff we own it.
+
+        Idempotent: no-op when the lock has already expired and been
+        reclaimed by a different holder, or when no lock exists. The
+        ``holder`` check prevents a late-returning compressor from
+        clobbering a fresh lock held by someone else.
+        """
+        if not session_id:
+            return
+
+        def _do(conn):
+            conn.execute(
+                "DELETE FROM compression_locks "
+                "WHERE session_id = ? AND holder = ?",
+                (session_id, holder),
+            )
+
+        try:
+            self._execute_write(_do)
+        except sqlite3.Error as exc:
+            logger.warning(
+                "release_compression_lock(%s) failed: %s",
+                session_id, exc,
+            )
+
+    def get_compression_lock_holder(self, session_id: str) -> Optional[str]:
+        """Return the current (non-expired) holder for ``session_id``, or None.
+
+        Diagnostic helper — not used by the locking protocol itself.
+        """
+        if not session_id:
+            return None
+        now = time.time()
+        row = self._conn.execute(
+            "SELECT holder FROM compression_locks "
+            "WHERE session_id = ? AND expires_at >= ?",
+            (session_id, now),
+        ).fetchone()
+        if row is None:
+            return None
+        return row["holder"] if isinstance(row, sqlite3.Row) else row[0]
+
 
     def update_system_prompt(self, session_id: str, system_prompt: str) -> None:
         """Store the full assembled system prompt snapshot."""
@@ -3140,7 +3274,59 @@ class SessionDB:
 
     # ── Space reclamation ──
 
-    def vacuum(self) -> None:
+    # FTS5 virtual tables whose b-tree segments we merge on optimize. The
+    # trigram table is created lazily / may be disabled, so we probe before
+    # touching it (see optimize_fts).
+    _FTS_TABLES = ("messages_fts", "messages_fts_trigram")
+
+    def _fts_table_exists(self, name: str) -> bool:
+        """True if an FTS5 virtual table is queryable in this DB."""
+        try:
+            self._conn.execute(f"SELECT 1 FROM {name} LIMIT 0")
+            return True
+        except sqlite3.OperationalError:
+            return False
+
+    def optimize_fts(self) -> int:
+        """Merge fragmented FTS5 b-tree segments into one per index.
+
+        FTS5 indexes grow as a series of incremental segments — one per
+        ``INSERT`` batch driven by the message triggers. Over tens of
+        thousands of messages these segments accumulate, which both bloats
+        the ``*_data`` shadow tables and slows ``MATCH`` queries that must
+        scan every segment. The special ``'optimize'`` command rewrites each
+        index as a single merged segment.
+
+        This is purely a maintenance operation — it changes neither search
+        results nor ``snippet()`` output, only on-disk layout and query
+        speed. It is complementary to VACUUM: ``optimize`` compacts the FTS
+        index internally, then VACUUM returns the freed pages to the OS.
+
+        Skips any FTS table that does not exist (e.g. the trigram index when
+        disabled via ``HERMES_DISABLE_FTS_TRIGRAM`` or not yet created), so
+        it is safe to call unconditionally.
+
+        Returns the number of FTS indexes that were optimized.
+        """
+        optimized = 0
+        with self._lock:
+            for tbl in self._FTS_TABLES:
+                if not self._fts_table_exists(tbl):
+                    continue
+                try:
+                    # The column name in the INSERT must match the table name
+                    # for FTS5 special commands.
+                    self._conn.execute(
+                        f"INSERT INTO {tbl}({tbl}) VALUES('optimize')"
+                    )
+                    optimized += 1
+                except sqlite3.OperationalError as exc:
+                    logger.warning(
+                        "FTS optimize failed for %s: %s", tbl, exc
+                    )
+        return optimized
+
+    def vacuum(self) -> int:
         """Run VACUUM to reclaim disk space after large deletes.
 
         SQLite does not shrink the database file when rows are deleted —
@@ -3153,7 +3339,21 @@ class SessionDB:
         exclusive lock, so callers must ensure no other writers are
         active. Safe to call at startup before the gateway/CLI starts
         serving traffic.
+
+        FTS5 segments are merged first via :meth:`optimize_fts` so the
+        subsequent VACUUM reclaims the pages freed by the merge. This is a
+        layout-only optimization — search results are unchanged.
+
+        Returns the number of FTS indexes that were optimized (0 if the
+        merge step failed or no FTS tables exist).
         """
+        # Merge FTS5 segments before VACUUM so the freed pages are returned
+        # to the OS in the same pass. optimize_fts() manages its own lock.
+        optimized = 0
+        try:
+            optimized = self.optimize_fts()
+        except Exception as exc:
+            logger.warning("FTS optimize before VACUUM failed: %s", exc)
         # VACUUM cannot be executed inside a transaction.
         with self._lock:
             # Best-effort WAL checkpoint first, then VACUUM.
@@ -3162,6 +3362,7 @@ class SessionDB:
             except Exception:
                 pass
             self._conn.execute("VACUUM")
+        return optimized
 
     def maybe_auto_prune_and_vacuum(
         self,
diff --git a/mini_swe_runner.py b/mini_swe_runner.py
index e3d2f174e99..95a2cc7285e 100644
--- a/mini_swe_runner.py
+++ b/mini_swe_runner.py
@@ -29,12 +29,8 @@ Usage:
 import json
 import logging
 import os
-import sys
-import time
-import uuid
 from datetime import datetime
-from pathlib import Path
-from typing import List, Dict, Any, Optional, Literal
+from typing import List, Dict, Any, Optional
 
 import fire
 from dotenv import load_dotenv
diff --git a/model_tools.py b/model_tools.py
index f461afff5ba..8e85581be30 100644
--- a/model_tools.py
+++ b/model_tools.py
@@ -262,9 +262,10 @@ def _clear_tool_defs_cache() -> None:
 
 
 def get_tool_definitions(
-    enabled_toolsets: List[str] = None,
-    disabled_toolsets: List[str] = None,
+    enabled_toolsets: Optional[List[str]] = None,
+    disabled_toolsets: Optional[List[str]] = None,
     quiet_mode: bool = False,
+    skip_tool_search_assembly: bool = False,
 ) -> List[Dict[str, Any]]:
     """
     Get tool definitions for model API calls with toolset-based filtering.
@@ -275,6 +276,11 @@ def get_tool_definitions(
         enabled_toolsets: Only include tools from these toolsets.
         disabled_toolsets: Exclude tools from these toolsets (if enabled_toolsets is None).
         quiet_mode: Suppress status prints.
+        skip_tool_search_assembly: When True, return the pre-assembly tool list
+            (raw schemas for every enabled tool). Used internally by the
+            tool_search / tool_describe bridge handlers so they can read the
+            real catalog, not the already-collapsed one. Public callers should
+            leave this False.
 
     Returns:
         Filtered list of OpenAI-format tool definitions.
@@ -301,6 +307,7 @@ def get_tool_definitions(
             registry._generation,
             cfg_fp,
             bool(os.environ.get("HERMES_KANBAN_TASK")),
+            bool(skip_tool_search_assembly),
         )
         cached = _tool_defs_cache.get(cache_key)
         if cached is not None:
@@ -312,7 +319,8 @@ def get_tool_definitions(
             # schemas are treated as read-only by all known callers.
             return list(cached)
 
-    result = _compute_tool_definitions(enabled_toolsets, disabled_toolsets, quiet_mode)
+    result = _compute_tool_definitions(enabled_toolsets, disabled_toolsets, quiet_mode,
+                                       skip_tool_search_assembly=skip_tool_search_assembly)
     if quiet_mode:
         # Cache the freshly-computed list, but hand callers a shallow copy so
         # downstream mutations (e.g. run_agent appending memory/LCM tool
@@ -327,9 +335,10 @@ def get_tool_definitions(
 
 
 def _compute_tool_definitions(
-    enabled_toolsets: List[str] = None,
-    disabled_toolsets: List[str] = None,
+    enabled_toolsets: Optional[List[str]] = None,
+    disabled_toolsets: Optional[List[str]] = None,
     quiet_mode: bool = False,
+    skip_tool_search_assembly: bool = False,
 ) -> List[Dict[str, Any]]:
     """Uncached implementation of :func:`get_tool_definitions`."""
     # Determine which tool names the caller wants
@@ -481,9 +490,61 @@ def _compute_tool_definitions(
     except Exception as e:  # pragma: no cover — defensive
         logger.warning("Schema sanitization skipped: %s", e)
 
+    # ── Tool Search (progressive disclosure) ────────────────────────────
+    # Conditionally replace MCP + plugin (non-core) tools with three bridge
+    # tools (tool_search / tool_describe / tool_call) when the deferrable
+    # surface exceeds the configured threshold (default 10% of context
+    # window). Core Hermes tools (toolsets._HERMES_CORE_TOOLS) are NEVER
+    # deferred. See tools/tool_search.py for full design notes.
+    #
+    # This is deliberately the last step before returning — sanitization
+    # has already normalized schemas, and the assembly is idempotent in
+    # case some caller invokes get_tool_definitions twice.
+    try:
+        from tools.tool_search import assemble_tool_defs, load_config as _load_ts_config
+        ts_cfg = _load_ts_config()
+        if not skip_tool_search_assembly and ts_cfg.enabled != "off":
+            context_length = _resolve_active_context_length()
+            assembly = assemble_tool_defs(
+                filtered_tools,
+                context_length=context_length,
+                config=ts_cfg,
+            )
+            if assembly.activated and not quiet_mode:
+                print(
+                    f"🔎 Tool Search: {assembly.deferred_count} MCP/plugin tools deferred "
+                    f"(~{assembly.deferred_tokens} tokens) behind tool_search/describe/call. "
+                    f"Threshold ~{assembly.threshold_tokens} tokens."
+                )
+            filtered_tools = assembly.tool_defs
+    except Exception as e:  # pragma: no cover — never break tool loading
+        logger.warning("Tool search assembly skipped: %s", e)
+
     return filtered_tools
 
 
+def _resolve_active_context_length() -> int:
+    """Look up the active model's context length for the tool-search gate.
+
+    Returns 0 when the model can't be resolved — ``should_activate`` falls
+    back to a fixed token cutoff in that case.
+    """
+    try:
+        from hermes_cli.config import load_config as _load
+        cfg = _load() or {}
+        model_cfg = cfg.get("model") if isinstance(cfg.get("model"), dict) else {}
+        if not isinstance(model_cfg, dict):
+            model_cfg = {}
+        model_id = (model_cfg.get("model") or model_cfg.get("default") or "").strip()
+        if not model_id:
+            return 0
+        from agent.model_metadata import get_model_context_length
+        return int(get_model_context_length(model_id) or 0)
+    except Exception as e:
+        logger.debug("Could not resolve active context length: %s", e)
+        return 0
+
+
 # =============================================================================
 # handle_function_call  (the main dispatcher)
 # =============================================================================
@@ -747,6 +808,8 @@ def handle_function_call(
     user_task: Optional[str] = None,
     enabled_tools: Optional[List[str]] = None,
     skip_pre_tool_call_hook: bool = False,
+    enabled_toolsets: Optional[List[str]] = None,
+    disabled_toolsets: Optional[List[str]] = None,
 ) -> str:
     """
     Main function call dispatcher that routes calls to the tool registry.
@@ -760,6 +823,14 @@ def handle_function_call(
                        execute_code uses this list to determine which sandbox
                        tools to generate.  Falls back to the process-global
                        ``_last_resolved_tool_names`` for backward compat.
+        enabled_toolsets: The session's enabled toolsets.  Used to scope the
+                       Tool Search bridge catalog so ``tool_search`` /
+                       ``tool_describe`` / ``tool_call`` only see and invoke
+                       tools the session was actually granted.  ``None`` means
+                       "no restriction" (the caller scopes to every toolset),
+                       matching ``get_tool_definitions`` semantics.
+        disabled_toolsets: The session's disabled toolsets, applied as a
+                       subtraction when scoping the bridge catalog.
 
     Returns:
         Function result as a JSON string.
@@ -767,6 +838,79 @@ def handle_function_call(
     # Coerce string arguments to their schema-declared types (e.g. "42"→42)
     function_args = coerce_tool_args(function_name, function_args)
 
+    # ── Tool Search bridge dispatch ──────────────────────────────────
+    # tool_search and tool_describe are pure catalog reads — handle them
+    # inline. tool_call is unwrapped to the underlying tool so that every
+    # downstream hook (pre/post, edit approval, guardrails) sees the real
+    # tool name, not the bridge.
+    _ts_mod = None
+    try:
+        from tools import tool_search as _ts_mod  # noqa: F401
+    except Exception:
+        _ts_mod = None
+
+    if _ts_mod is not None and _ts_mod.is_bridge_tool(function_name):
+        try:
+            # Use skip_tool_search_assembly=True so we see the real catalog,
+            # not the already-collapsed bridge-only list (the bridge would
+            # otherwise be searching only itself).
+            #
+            # Scope the catalog to the session's toolsets so the bridge can
+            # only surface and invoke tools the session was actually granted.
+            # Without this, a restricted-toolset session (subagent, kanban
+            # worker, curated gateway session) would see and be able to call
+            # the entire process registry via the bridge. Passing the same
+            # enabled/disabled toolsets the session was assembled with keeps
+            # the deferred catalog identical to the deferrable subset of the
+            # session's own tool list, and avoids polluting the process-global
+            # _last_resolved_tool_names with out-of-scope tools.
+            current_defs = get_tool_definitions(
+                enabled_toolsets=enabled_toolsets,
+                disabled_toolsets=disabled_toolsets,
+                quiet_mode=True, skip_tool_search_assembly=True,
+            ) or []
+        except Exception:
+            current_defs = []
+        if function_name == _ts_mod.TOOL_SEARCH_NAME:
+            return _ts_mod.dispatch_tool_search(function_args or {},
+                                                current_tool_defs=current_defs)
+        if function_name == _ts_mod.TOOL_DESCRIBE_NAME:
+            return _ts_mod.dispatch_tool_describe(function_args or {},
+                                                  current_tool_defs=current_defs)
+        if function_name == _ts_mod.TOOL_CALL_NAME:
+            underlying_name, underlying_args, err = _ts_mod.resolve_underlying_call(function_args or {})
+            if err or not underlying_name:
+                return json.dumps({"error": err or "tool_call could not be resolved"},
+                                  ensure_ascii=False)
+            # Defense in depth: the underlying tool MUST be in the session's
+            # scoped deferrable catalog. resolve_underlying_call() only checks
+            # that the name is deferrable in the global registry; this gate
+            # additionally rejects any tool the session was not granted, so a
+            # restricted session can never invoke an out-of-scope tool through
+            # the bridge even if the catalog scoping above regressed.
+            _scoped_deferrable = _ts_mod.scoped_deferrable_names(current_defs)
+            if underlying_name not in _scoped_deferrable:
+                return json.dumps({
+                    "error": (
+                        f"'{underlying_name}' is not available in this session. "
+                        "Use tool_search to find tools you can call."
+                    ),
+                }, ensure_ascii=False)
+            # Recurse with the underlying tool. All hooks fire against the
+            # real tool name. The bridge is invisible to hooks by design.
+            return handle_function_call(
+                function_name=underlying_name,
+                function_args=underlying_args,
+                task_id=task_id,
+                tool_call_id=tool_call_id,
+                session_id=session_id,
+                user_task=user_task,
+                enabled_tools=enabled_tools,
+                skip_pre_tool_call_hook=skip_pre_tool_call_hook,
+                enabled_toolsets=enabled_toolsets,
+                disabled_toolsets=disabled_toolsets,
+            )
+
     try:
         if function_name in _AGENT_LOOP_TOOLS:
             return json.dumps({"error": f"{function_name} must be handled by the agent loop"})
diff --git a/nix/nixosModules.nix b/nix/nixosModules.nix
index f5c067a6398..19abc81a3b7 100644
--- a/nix/nixosModules.nix
+++ b/nix/nixosModules.nix
@@ -242,7 +242,7 @@
         type = types.str;
         default = "${cfg.stateDir}/workspace";
         defaultText = literalExpression ''"''${cfg.stateDir}/workspace"'';
-        description = "Working directory for the agent (MESSAGING_CWD).";
+        description = "Working directory for the agent.";
       };
 
       # ── Declarative config ───────────────────────────────────────────────
diff --git a/nix/packages.nix b/nix/packages.nix
index b6dd6cd839c..cf4ec8012b2 100644
--- a/nix/packages.nix
+++ b/nix/packages.nix
@@ -43,7 +43,6 @@
             "modal"
             "parallel-web"
             "tts-premium"
-            "vercel"
             "voice"
           ] ++ lib.optionals pkgs.stdenv.isLinux [ "matrix" ];
         };
diff --git a/nix/web.nix b/nix/web.nix
index c65859faaf6..8144eab6386 100644
--- a/nix/web.nix
+++ b/nix/web.nix
@@ -1,21 +1,20 @@
 # nix/web.nix — Hermes Web Dashboard (Vite/React) frontend build
 { pkgs, hermesNpmLib, ... }:
 let
-  src = ../apps;
+  src = ../web;
   npmDeps = pkgs.fetchNpmDeps {
     inherit src;
-    hash = "sha256-6qhGuifHVtCeep1SiQdCUxBMr7UGhYpdMTvXhrQu/zA=";
+    hash = "sha256-HV0aISBVjwbGqDj8qQynSxGFrrZDzuYAW3D3lB/x3zo=";
   };
 
-  npm = hermesNpmLib.mkNpmPassthru { folder = "apps/dashboard"; attr = "web"; pname = "hermes-web"; };
+  npm = hermesNpmLib.mkNpmPassthru { folder = "web"; attr = "web"; pname = "hermes-web"; };
 
-  packageJson = builtins.fromJSON (builtins.readFile (src + "/dashboard/package.json"));
+  packageJson = builtins.fromJSON (builtins.readFile (src + "/package.json"));
   version = packageJson.version;
 in
 pkgs.buildNpmPackage (npm // {
   pname = "hermes-web";
   inherit src npmDeps version;
-  sourceRoot = "apps/dashboard";
 
   doCheck = false;
 
diff --git a/optional-skills/autonomous-ai-agents/antigravity-cli/SKILL.md b/optional-skills/autonomous-ai-agents/antigravity-cli/SKILL.md
new file mode 100644
index 00000000000..8973a85723b
--- /dev/null
+++ b/optional-skills/autonomous-ai-agents/antigravity-cli/SKILL.md
@@ -0,0 +1,177 @@
+---
+name: antigravity-cli
+description: "Operate the Antigravity CLI (agy): plugins, auth, sandbox."
+version: 0.1.0
+author: Tony Simons (asimons81), Hermes Agent
+license: MIT
+platforms: [linux, macos, windows]
+metadata:
+  hermes:
+    tags: [Coding-Agent, Antigravity, CLI, Auth, Plugins, Sandbox]
+    related_skills: [grok, codex, claude-code, hermes-agent]
+---
+
+# Antigravity CLI (`agy`)
+
+Operator guide for the Antigravity CLI, invoked as `agy`. Run all `agy`
+commands through the Hermes `terminal` tool; inspect its config and logs with
+`read_file`. This skill is reference + procedure — it does not wrap a network
+API, so there is nothing to authenticate from Hermes itself.
+
+## When to Use
+
+- Installing, updating, or smoke-testing the `agy` binary
+- Driving non-interactive `agy --print` / `agy -p` one-shots
+- Debugging Antigravity auth, sandbox, permissions, or plugin state
+- Reading Antigravity settings, keybindings, conversations, or logs
+
+## Mental model
+
+Antigravity has two layers — keep them distinct or the guidance will be wrong:
+
+1. **Shell wrapper commands** — `agy help`, `agy install`, `agy plugin`,
+   `agy update`, `agy changelog`. Run these through the `terminal` tool.
+2. **Interactive in-session slash commands** — `/config`, `/permissions`,
+   `/skills`, `/agents`, etc. These only exist inside a running `agy` TUI
+   session, not on the shell wrapper.
+
+`agy help` shows the shell wrapper surface, NOT the in-session slash commands.
+
+## Prerequisites
+
+- The `agy` binary on PATH. Verify through the `terminal` tool:
+  `command -v agy && agy --version`.
+- No env vars or API keys required by this skill — Antigravity manages its own
+  auth via the OS keyring / browser sign-in (see Authentication below).
+
+## How to Run
+
+Invoke every `agy` command through the `terminal` tool. Examples:
+
+```
+terminal(command="agy --version")
+terminal(command="agy help")
+terminal(command="agy plugin list")
+terminal(command="agy --print 'Summarize the repo in 3 bullets'", workdir="/path/to/project")
+```
+
+For an interactive multi-turn TUI session, launch `agy` with `pty=true` (and
+tmux for capture/monitoring), the same pattern the `codex` / `claude-code`
+skills use. For one-shot smoke tests and scripted prompts, prefer
+`agy --print` (non-interactive).
+
+To inspect Antigravity's own files, use `read_file` on the paths under Core
+paths below — do not `cat` them through the terminal.
+
+## Core paths
+
+- Binary / entrypoint: `agy`
+- App data dir: `~/.gemini/antigravity-cli/`
+- Settings file: `~/.gemini/antigravity-cli/settings.json`
+- Keybindings file: `~/.gemini/antigravity-cli/keybindings.json`
+- Logs: `~/.gemini/antigravity-cli/log/cli-*.log`
+- Conversations: `~/.gemini/antigravity-cli/conversations/`
+- Brain artifacts: `~/.gemini/antigravity-cli/brain/`
+- History: `~/.gemini/antigravity-cli/history.jsonl`
+- Plugin staging: `~/.gemini/antigravity-cli/plugins/<plugin_name>/`
+
+## Quick Reference
+
+### Wrapper commands
+- `agy changelog`
+- `agy help`
+- `agy install`
+- `agy plugin` / `agy plugins`
+- `agy update`
+
+### Useful flags
+- `--add-dir`
+- `--continue` / `-c`
+- `--conversation`
+- `--dangerously-skip-permissions`
+- `--print` / `-p`
+- `--print-timeout`
+- `--prompt`
+- `--prompt-interactive` / `-i`
+- `--sandbox`
+- `--log-file`
+- `--version`
+
+### Plugin subcommands (`agy plugin --help`)
+- `list`, `import [source]`, `install <target>`, `uninstall <name>`,
+  `enable <name>`, `disable <name>`, `validate [path]`, `link <mp> <target>`,
+  `help`
+
+### Install flags (`agy install --help`)
+- `--dir`, `--skip-aliases`, `--skip-path`
+
+### In-session slash commands
+- **Conversation control:** `/resume` (`/switch`), `/rewind` (`/undo`),
+  `/rename <name>`, `/clear`, `/fork`, `/reset`, `/new`
+- **Settings & tools:** `/config`, `/settings`, `/permissions`, `/model`,
+  `/keybindings`, `/statusline`, `/tasks`, `/skills`, `/mcp`, `/open <path>`,
+  `/usage`, `/logout`, `/agents`
+- **Prompt helpers:** `@` path autocomplete, `esc esc` clears the prompt (when
+  not streaming), `!` runs a terminal command directly, `?` opens help
+
+## Settings and permissions
+
+### Common settings keys (`settings.json`)
+- `allowNonWorkspaceAccess`
+- `colorScheme`
+- `permissions.allow`
+- `trustedWorkspaces`
+
+### Permission modes
+`request-review`, `always-proceed`, `strict`, `proceed-in-sandbox`.
+
+### Sandbox behavior
+- `enableTerminalSandbox` is a boolean in `settings.json`; default `false`.
+- Launch-time overrides (`--sandbox`, `--dangerously-skip-permissions`) can
+  supersede persistent settings for the current session.
+
+## Authentication behavior
+
+- The CLI tries the OS secure keyring first.
+- With no saved session, it falls back to browser-based Google sign-in.
+- Locally it opens the default browser; over SSH it prints an authorization URL
+  and expects the auth code pasted back.
+- `/logout` removes saved credentials.
+
+## Plugins
+
+- Plugins stage under `~/.gemini/antigravity-cli/plugins/<plugin_name>/`.
+- They can bundle skills, agents, rules, MCP servers, and hooks.
+- `agy plugin list` returning no imported plugins is a valid empty state.
+
+## Pitfalls
+
+- `agy help` shows wrapper commands, not interactive slash commands.
+- `agy --version` is the safe non-interactive version check; `agy version` is
+  interactive and can fail without a real TTY.
+- First place to look for failures: `~/.gemini/antigravity-cli/log/cli-*.log`
+  (read with `read_file`).
+- Don't confuse persistent JSON settings with launch-time overrides.
+- `~/.gemini/antigravity-cli/bin/agentapi` is a thin wrapper to `agy agentapi`.
+- On WSL, token storage is file-based, so auth issues are usually local-file /
+  session-state problems, not browser-only problems.
+- Workspace identity can depend on launch directory and the `.antigravitycli`
+  project marker.
+
+## Verification
+
+Confirm the install is real and usable, all through the `terminal` tool (read
+files with `read_file`):
+
+1. `terminal(command="command -v agy")`
+2. `terminal(command="agy --version")`
+3. `terminal(command="agy help")`
+4. `terminal(command="agy plugin list")`
+5. `read_file` on `~/.gemini/antigravity-cli/settings.json`
+6. `read_file` on the latest `~/.gemini/antigravity-cli/log/cli-*.log`
+7. If needed, `read_file` on `~/.gemini/antigravity-cli/keybindings.json`
+
+## Support files
+
+- `references/cli-docs.md` — condensed notes from the getting-started, usage,
+  and features docs.
diff --git a/optional-skills/autonomous-ai-agents/antigravity-cli/references/cli-docs.md b/optional-skills/autonomous-ai-agents/antigravity-cli/references/cli-docs.md
new file mode 100644
index 00000000000..2a7158c08e2
--- /dev/null
+++ b/optional-skills/autonomous-ai-agents/antigravity-cli/references/cli-docs.md
@@ -0,0 +1,64 @@
+# Antigravity CLI docs, condensed
+
+Source pages reviewed:
+- `/docs/cli-getting-started`
+- `/docs/cli-using`
+- `/docs/cli-features`
+
+## Install
+- macOS/Linux: `curl -fsSL https://antigravity.google/cli/install.sh | bash`
+- Windows PowerShell: `irm https://antigravity.google/cli/install.ps1 | iex`
+- Windows CMD: `curl -fsSL https://antigravity.google/cli/install.cmd -o install.cmd && install.cmd && del install.cmd`
+
+## Authentication
+- Tries secure keyring first.
+- If no saved session exists, falls back to browser-based Google sign-in.
+- Local machine: opens the default browser.
+- SSH/remote: prints a secure authorization URL, then expects the auth code to be pasted back.
+- `/logout` removes saved credentials.
+
+## Config and files
+- Settings: `~/.gemini/antigravity-cli/settings.json`
+- Keybindings: `~/.gemini/antigravity-cli/keybindings.json`
+- Plugins: `~/.gemini/antigravity-cli/plugins/<plugin_name>/`
+
+## Useful slash commands
+- `/config`, `/settings`
+- `/permissions`
+- `/resume` / `/switch`
+- `/rewind` / `/undo`
+- `/rename <name>`
+- `/model`
+- `/keybindings`
+- `/statusline`
+- `/tasks`
+- `/skills`
+- `/mcp`
+- `/open <path>`
+- `/usage`
+- `/logout`
+- `/agents`
+
+## Prompt helpers
+- `@` path autocomplete
+- `esc esc` clears prompt when not streaming
+- `!` runs a terminal command
+- `?` opens help / slash command list
+
+## Permissions and sandbox
+- Permission modes: `request-review`, `always-proceed`, `strict`, `proceed-in-sandbox`
+- Launch overrides: `--sandbox`, `--dangerously-skip-permissions`
+- Sandbox setting: `enableTerminalSandbox` in `settings.json` (default `false`)
+
+## Plugins
+- Plugins can bundle skills, agents, rules, MCP servers, and hooks.
+- They are staged locally and auto-discovered once installed.
+
+## Subagents
+- `/agents` opens the panel for active/completed subagents.
+- Subagents can run in parallel and request approvals.
+
+## Keybindings
+- `~/.gemini/antigravity-cli/keybindings.json`
+- Malformed JSON falls back to defaults for broken actions.
+- Docs list default bindings for clear, submit, cancel, exit, suspend, editor, approval yes/no, navigation, clipboard, undo/redo, and newline insertion.
diff --git a/optional-skills/autonomous-ai-agents/grok/SKILL.md b/optional-skills/autonomous-ai-agents/grok/SKILL.md
new file mode 100644
index 00000000000..4ba4131051c
--- /dev/null
+++ b/optional-skills/autonomous-ai-agents/grok/SKILL.md
@@ -0,0 +1,301 @@
+---
+name: grok
+description: "Delegate coding to xAI Grok Build CLI (features, PRs)."
+version: 0.1.0
+author: Matt Maximo (MattMaximo), Hermes Agent
+license: MIT
+platforms: [linux, macos, windows]
+metadata:
+  hermes:
+    tags: [Coding-Agent, Grok, xAI, Code-Review, Refactoring, Automation]
+    related_skills: [codex, claude-code, hermes-agent]
+---
+
+# Grok Build CLI — Hermes Orchestration Guide
+
+Delegate coding tasks to [Grok Build](https://docs.x.ai/build/overview) (xAI's
+autonomous coding agent CLI, the `grok` command) via the Hermes terminal. Grok
+can read files, write code, run shell commands, spawn subagents, and manage git
+workflows. It runs three ways: an interactive TUI, **headless** (`-p`), and as
+an **ACP agent** over JSON-RPC.
+
+This is the third sibling to `codex` and `claude-code`. The orchestration
+pattern is nearly identical — **prefer headless `-p` for one-shots**, use a PTY
+for interactive sessions.
+
+## When to use
+
+- Building features
+- Refactoring
+- PR reviews
+- Batch issue fixing
+- Any task where you'd otherwise reach for Codex / Claude Code but want Grok
+
+## Prerequisites
+
+- **Install (preferred):** `npm install -g @xai-official/grok`
+  - The official installer `curl -fsSL https://x.ai/cli/install.sh | bash` also
+    works, but the `x.ai` host is Cloudflare-walled in some environments. The
+    npm path avoids that dependency entirely.
+- **Auth — SuperGrok / X Premium+ subscription (primary path):**
+  - Run `grok login` once → opens a browser for OAuth → token cached in
+    `~/.grok/auth.json`. This uses your **SuperGrok or X Premium+** subscription
+    (no per-token API billing).
+  - Check sign-in state by looking for `~/.grok/auth.json`, or run a cheap
+    headless smoke test: `grok --no-auto-update -p "Say ok."`
+  - In the TUI, `/logout` signs out and `/login` (or relaunching) signs back in.
+- **No git repo required** — unlike Codex, Grok runs fine outside a git
+  directory (good for scratch/throwaway tasks).
+- **Claude Code / AGENTS.md compatible with zero config** — Grok auto-reads
+  `CLAUDE.md`, `.claude/` (skills, agents, MCPs, hooks, rules), and the
+  `AGENTS.md` family. Existing project context just works.
+
+> **API-key fallback (not the default for this user):** Grok also supports
+> setting the `XAI_API_KEY` environment variable for pay-as-you-go billing
+> via `api.x.ai`. Only use
+> this if `grok login` / SuperGrok auth is unavailable. The subscription path
+> (`grok login`) is the intended setup here.
+
+## Two Orchestration Modes
+
+### Mode 1: Headless (`-p`) — Non-Interactive (PREFERRED)
+
+Runs a one-shot task, prints the result, and exits. No PTY, no interactive
+dialogs to navigate. This is the cleanest integration path — the analog of
+`claude -p` and `codex exec`.
+
+```
+terminal(command="grok --no-auto-update -p 'Add a dark mode toggle to settings'", workdir="/path/to/project", timeout=180)
+```
+
+Always pass `--no-auto-update` in automation to skip background update checks.
+
+**When to use headless:**
+- One-shot coding tasks (fix a bug, add a feature, refactor)
+- CI/CD automation and scripting
+- Structured output parsing with `--output-format json`
+- Any task that doesn't need multi-turn conversation
+
+### Mode 2: Interactive PTY — Multi-Turn TUI Sessions
+
+The TUI is a fullscreen, mouse-interactive app. Drive it with `pty=true`. For
+robust monitoring/input use tmux (same pattern as the `claude-code` skill).
+
+```
+# Launch in a tmux session for capture-pane monitoring
+terminal(command="tmux new-session -d -s grok-work -x 140 -y 40")
+terminal(command="tmux send-keys -t grok-work 'cd /path/to/project && grok' Enter")
+
+# Wait for startup, then send a task
+terminal(command="sleep 5 && tmux send-keys -t grok-work 'Refactor the auth module to use JWT' Enter")
+
+# Monitor progress
+terminal(command="sleep 15 && tmux capture-pane -t grok-work -p -S -50")
+
+# Exit when done
+terminal(command="tmux send-keys -t grok-work '/quit' Enter && sleep 1 && tmux kill-session -t grok-work")
+```
+
+**Tip for headless-but-inline output:** if you want TUI-style output without the
+fullscreen alt-screen takeover (e.g. for cleaner logs), add `--no-alt-screen`.
+For pure automation, headless `-p` is still cleaner than the TUI.
+
+## Headless Deep Dive
+
+### Common Flags
+
+| Flag | Effect |
+|------|--------|
+| `-p, --single <PROMPT>` | Send one prompt, run headless, exit |
+| `-m, --model <MODEL>` | Choose a model |
+| `-s, --session-id <ID>` | Create or resume a named headless session |
+| `-r, --resume <ID>` | Resume an existing session |
+| `-c, --continue` | Continue the most recent session in the current directory |
+| `--cwd <PATH>` | Set the working directory |
+| `--output-format <FMT>` | `plain` (default), `json`, or `streaming-json` |
+| `--always-approve` | Auto-approve all tool executions (the `--full-auto` / `--yolo` equivalent) |
+| `--no-alt-screen` | Run inline, no fullscreen TUI takeover |
+| `--no-auto-update` | Skip background update checks (use in all automation) |
+
+### Output Formats
+
+- `plain` — human-readable text (default)
+- `json` — one JSON object at the end of the run (parse the result cleanly)
+- `streaming-json` — newline-delimited JSON events as they arrive
+
+```
+# Structured result for parsing
+terminal(command="grok --no-auto-update -p 'List all TODO comments in src/' --output-format json", workdir="/project", timeout=120)
+
+# Auto-approve for autonomous building
+terminal(command="grok --no-auto-update --always-approve -p 'Refactor the database layer and run the tests'", workdir="/project", timeout=300)
+```
+
+### Background Mode (Long Tasks)
+
+```
+# Start headless in background
+terminal(command="grok --no-auto-update --always-approve -p 'Refactor the auth module'", workdir="/project", background=true, notify_on_complete=true)
+# Returns session_id
+
+# Monitor
+process(action="poll", session_id="<id>")
+process(action="log", session_id="<id>")
+
+# Kill if needed
+process(action="kill", session_id="<id>")
+```
+
+For an interactive (TUI) background session, use `pty=true` + tmux and monitor
+with `tmux capture-pane`, exactly like the `claude-code` / `codex` skills.
+
+### Session Continuation
+
+```
+# Start a named session
+terminal(command="grok --no-auto-update -s refactor-db -p 'Start refactoring the database layer' --always-approve", workdir="/project", timeout=240)
+
+# Resume it later
+terminal(command="grok --no-auto-update -r refactor-db -p 'Now add connection pooling' --always-approve", workdir="/project", timeout=180)
+
+# Or continue the most recent session in this directory
+terminal(command="grok --no-auto-update -c -p 'What did you change last time?'", workdir="/project", timeout=60)
+```
+
+## Read-Only Audit → Markdown Note Pattern
+
+To have Grok review local artifacts and return a clean markdown note (for
+Obsidian or a repo) without mutating anything:
+
+1. Prepare stable input files first with Hermes tools (`read_file`,
+   `write_file`). Snapshot only the relevant context into a temp file rather
+   than dumping raw paths.
+2. Run Grok headless **without** `--always-approve` so it cannot auto-write, and
+   demand `markdown only, no preamble`.
+3. Save Grok's stdout straight into the destination note with `write_file()`.
+
+```
+grok --no-auto-update -p "Read /tmp/current.md and /tmp/inventory.md. Produce markdown only, no preamble. Output a clean note titled 'Cleanup Review'." --output-format plain
+```
+
+**Pitfall (same as Claude Code):** for document rewrites, a loose "rewrite this"
+prompt may return a change summary instead of the full file. Instead: pipe the
+file in, and demand `Return ONLY the full revised markdown document. No intro,
+no explanation, no code fences. Start immediately with '# Title'.` Verify the
+first lines with `read_file()` before overwriting the destination.
+
+## PR Review Patterns
+
+### Quick Review (Headless)
+
+```
+terminal(command="cd /path/to/repo && git diff main...feature-branch | grok --no-auto-update -p 'Review this diff for bugs, security issues, and style problems. Be thorough.'", timeout=120)
+```
+
+### Clone-to-temp Review (safe, no repo mutation)
+
+```
+terminal(command="REVIEW=$(mktemp -d) && git clone https://github.com/user/repo.git $REVIEW && cd $REVIEW && gh pr checkout 42 && grok --no-auto-update -p 'Review the changes vs origin/main. Check bugs, security, race conditions, missing tests.'", pty=true, timeout=300)
+```
+
+### Post the review
+
+```
+terminal(command="gh pr comment 42 --body '<review text>'", workdir="/path/to/repo")
+```
+
+## Parallel Issue Fixing with Worktrees
+
+```
+# Create worktrees
+terminal(command="git worktree add -b fix/issue-78 /tmp/issue-78 main", workdir="~/project")
+terminal(command="git worktree add -b fix/issue-99 /tmp/issue-99 main", workdir="~/project")
+
+# Launch Grok headless in each (background)
+terminal(command="grok --no-auto-update --always-approve -p 'Fix issue #78: <description>. Commit when done.'", workdir="/tmp/issue-78", background=true, notify_on_complete=true)
+terminal(command="grok --no-auto-update --always-approve -p 'Fix issue #99: <description>. Commit when done.'", workdir="/tmp/issue-99", background=true, notify_on_complete=true)
+
+# Monitor
+process(action="list")
+
+# After completion: push and open PRs
+terminal(command="cd /tmp/issue-78 && git push -u origin fix/issue-78")
+terminal(command="gh pr create --repo user/repo --head fix/issue-78 --title 'fix: ...' --body '...'")
+
+# Cleanup
+terminal(command="git worktree remove /tmp/issue-78", workdir="~/project")
+```
+
+## Useful Subcommands & TUI Commands
+
+| Command | Purpose |
+|---------|---------|
+| `grok` | Start the interactive TUI |
+| `grok -p "query"` | Headless one-shot |
+| `grok login` / `grok logout` | Sign in / out (SuperGrok / X Premium+ OAuth) |
+| `grok inspect` | Show what Grok discovered in cwd: config sources, instructions, skills, plugins, hooks, MCP servers |
+| `grok agent stdio` | Run as an ACP agent over JSON-RPC (for IDE/tool integration) |
+| `grok update` | Update the CLI (needs the `x.ai` host; skip in automation) |
+
+TUI slash commands (interactive only): `/model <name>`, `/always-approve`,
+`/plan`, `/context`, `/compact`, `/resume`, `/sessions`, `/fork`, `/usage`,
+`/quit`. `Shift+Tab` cycles session modes (including Plan mode, which blocks
+write tools except the session plan file).
+
+## Config (`~/.grok/config.toml`)
+
+```toml
+[cli]
+auto_update = false          # skip background update checks persistently
+
+[ui]
+permission_mode = "ask"      # or "always-approve" to skip tool prompts by default
+
+[models]
+default = "grok-build-0.1"
+```
+
+Put global preferences in `~/.grok/config.toml` (not project-scoped
+`.grok/config.toml`). `permission_mode` supersedes the legacy `approval_mode` /
+`yolo = true` keys.
+
+## Pitfalls & Gotchas
+
+1. **Auth is subscription-gated.** `grok login` requires a SuperGrok or X
+   Premium+ subscription. If login fails or there's no `~/.grok/auth.json`,
+   confirm the subscription is active before falling back to `XAI_API_KEY`.
+2. **Don't conflate Hermes' xAI auth with the `grok` CLI's auth.** Hermes'
+   `x_search` runs on its own xAI OAuth; the standalone `grok` CLI has a
+   separate token in `~/.grok/auth.json`. A working `x_search` does NOT mean
+   `grok` is logged in.
+3. **Always pass `--no-auto-update` in automation** — otherwise Grok phones home
+   for update checks (and `x.ai`/`storage.googleapis.com` may be unreachable).
+4. **Prefer npm install over the curl installer** — `npm install -g
+   @xai-official/grok` avoids the Cloudflare-walled `x.ai` host.
+5. **`--always-approve` is the autonomous-build switch.** Without it, headless
+   runs may stall waiting on tool-approval prompts. Omit it deliberately for
+   read-only review/audit work so Grok can't mutate files.
+6. **Headless `-p` skips TUI dialogs**; the TUI needs `pty=true` (+ tmux for
+   monitoring), just like Claude Code.
+7. **Use `--no-alt-screen`** if you run the TUI inline and the fullscreen
+   alt-screen takeover garbles captured output.
+8. **No git repo needed**, but for PR/commit workflows you still want one — use
+   `mktemp -d && git init` for scratch commit tasks.
+9. **Clean up tmux sessions** with `tmux kill-session -t <name>` when done.
+
+## Rules for Hermes Agents
+
+1. **Prefer headless `-p`** for single tasks — cleanest integration, structured
+   output via `--output-format json`.
+2. **Always set `workdir`** (or `--cwd`) so Grok targets the right project.
+3. **Pass `--no-auto-update`** in every automated invocation.
+4. **Use `--always-approve` only when Grok should write autonomously**; omit it
+   for read-only reviews and audits.
+5. **Background long tasks** with `background=true, notify_on_complete=true` and
+   monitor via the `process` tool.
+6. **Use tmux for multi-turn interactive work** and monitor with
+   `tmux capture-pane -t <session> -p -S -50`.
+7. **Verify auth before relying on it** — check `~/.grok/auth.json` or run a
+   cheap `grok -p "Say ok."` smoke test; don't assume Hermes' xAI auth carries
+   over.
+8. **Report results to the user** — summarize what Grok changed and what's left.
diff --git a/optional-skills/creative/meme-generation/scripts/generate_meme.py b/optional-skills/creative/meme-generation/scripts/generate_meme.py
index 807fee71165..1a93c13f6b8 100644
--- a/optional-skills/creative/meme-generation/scripts/generate_meme.py
+++ b/optional-skills/creative/meme-generation/scripts/generate_meme.py
@@ -18,7 +18,6 @@ unknown templates get smart default text positioning based on their box_count.
 import json
 import os
 import sys
-import textwrap
 from io import BytesIO
 from pathlib import Path
 
diff --git a/optional-skills/finance/dcf-model/scripts/validate_dcf.py b/optional-skills/finance/dcf-model/scripts/validate_dcf.py
index 6c8172cf8cf..876edde9f1f 100755
--- a/optional-skills/finance/dcf-model/scripts/validate_dcf.py
+++ b/optional-skills/finance/dcf-model/scripts/validate_dcf.py
@@ -7,7 +7,6 @@ Validates Excel DCF models for formula errors and common DCF mistakes
 import sys
 import json
 from pathlib import Path
-from typing import Optional
 
 
 class DCFModelValidator:
diff --git a/optional-skills/health/fitness-nutrition/scripts/nutrition_search.py b/optional-skills/health/fitness-nutrition/scripts/nutrition_search.py
index 7494f6c3881..e741fca9abc 100644
--- a/optional-skills/health/fitness-nutrition/scripts/nutrition_search.py
+++ b/optional-skills/health/fitness-nutrition/scripts/nutrition_search.py
@@ -16,7 +16,6 @@ import json
 import time
 import urllib.request
 import urllib.parse
-import urllib.error
 
 API_KEY = os.environ.get("USDA_API_KEY", "DEMO_KEY")
 BASE = "https://api.nal.usda.gov/fdc/v1"
diff --git a/optional-skills/migration/openclaw-migration/SKILL.md b/optional-skills/migration/openclaw-migration/SKILL.md
index 4d8734f52bc..3bceba8727d 100644
--- a/optional-skills/migration/openclaw-migration/SKILL.md
+++ b/optional-skills/migration/openclaw-migration/SKILL.md
@@ -38,7 +38,7 @@ It uses `scripts/openclaw_to_hermes.py` to:
 - import `SOUL.md` into the Hermes home directory as `SOUL.md`
 - transform OpenClaw `MEMORY.md` and `USER.md` into Hermes memory entries
 - merge OpenClaw command approval patterns into Hermes `command_allowlist`
-- migrate Hermes-compatible messaging settings such as `TELEGRAM_ALLOWED_USERS` and `MESSAGING_CWD`
+- migrate Hermes-compatible messaging settings such as `TELEGRAM_ALLOWED_USERS`, and map OpenClaw workspace settings to Hermes working-directory configuration
 - copy OpenClaw skills into `~/.hermes/skills/openclaw-imports/`
 - optionally copy the OpenClaw workspace instructions file into a chosen Hermes workspace
 - mirror compatible workspace assets such as `workspace/tts/` into `~/.hermes/tts/`
diff --git a/optional-skills/research/drug-discovery/scripts/chembl_target.py b/optional-skills/research/drug-discovery/scripts/chembl_target.py
index 1346b999ab3..0c7937469ba 100644
--- a/optional-skills/research/drug-discovery/scripts/chembl_target.py
+++ b/optional-skills/research/drug-discovery/scripts/chembl_target.py
@@ -5,7 +5,7 @@ Usage: python3 chembl_target.py "EGFR" --min-pchembl 7 --limit 20
 No external dependencies.
 """
 import sys, json, time, argparse
-import urllib.request, urllib.parse, urllib.error
+import urllib.request, urllib.parse
 
 BASE = "https://www.ebi.ac.uk/chembl/api/data"
 
diff --git a/optional-skills/research/drug-discovery/scripts/ro5_screen.py b/optional-skills/research/drug-discovery/scripts/ro5_screen.py
index 84e438fa14b..dd304d0d4d9 100644
--- a/optional-skills/research/drug-discovery/scripts/ro5_screen.py
+++ b/optional-skills/research/drug-discovery/scripts/ro5_screen.py
@@ -4,8 +4,8 @@ ro5_screen.py — Batch Lipinski Ro5 + Veber screening via PubChem API.
 Usage: python3 ro5_screen.py aspirin ibuprofen paracetamol
 No external dependencies beyond stdlib.
 """
-import sys, json, time, argparse
-import urllib.request, urllib.parse, urllib.error
+import sys, json, time
+import urllib.request, urllib.parse
 
 BASE = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name"
 PROPS = "MolecularWeight,XLogP,HBondDonorCount,HBondAcceptorCount,RotatableBondCount,TPSA"
diff --git a/optional-skills/research/osint-investigation/scripts/fetch_gdelt.py b/optional-skills/research/osint-investigation/scripts/fetch_gdelt.py
index fa98dabc9bb..008b050ea0d 100644
--- a/optional-skills/research/osint-investigation/scripts/fetch_gdelt.py
+++ b/optional-skills/research/osint-investigation/scripts/fetch_gdelt.py
@@ -11,7 +11,6 @@ from __future__ import annotations
 
 import argparse
 import csv
-import json
 import sys
 import time
 import urllib.parse
diff --git a/optional-skills/research/osint-investigation/scripts/fetch_opencorporates.py b/optional-skills/research/osint-investigation/scripts/fetch_opencorporates.py
index 6924a8056a6..a30ba13a91e 100644
--- a/optional-skills/research/osint-investigation/scripts/fetch_opencorporates.py
+++ b/optional-skills/research/osint-investigation/scripts/fetch_opencorporates.py
@@ -12,7 +12,6 @@ from __future__ import annotations
 
 import argparse
 import csv
-import json
 import os
 import re
 import sys
diff --git a/optional-skills/research/osint-investigation/scripts/fetch_wikipedia.py b/optional-skills/research/osint-investigation/scripts/fetch_wikipedia.py
index 4ce5c93813c..3e65b87b882 100644
--- a/optional-skills/research/osint-investigation/scripts/fetch_wikipedia.py
+++ b/optional-skills/research/osint-investigation/scripts/fetch_wikipedia.py
@@ -12,7 +12,6 @@ from __future__ import annotations
 
 import argparse
 import csv
-import json
 import re
 import sys
 import urllib.parse
diff --git a/optional-skills/research/osint-investigation/scripts/timing_analysis.py b/optional-skills/research/osint-investigation/scripts/timing_analysis.py
index 4e0ece227b4..9407264158d 100644
--- a/optional-skills/research/osint-investigation/scripts/timing_analysis.py
+++ b/optional-skills/research/osint-investigation/scripts/timing_analysis.py
@@ -19,7 +19,6 @@ import argparse
 import csv
 import datetime as dt
 import json
-import math
 import random
 import statistics
 from collections import defaultdict
diff --git a/package-lock.json b/package-lock.json
index fe19020fb2c..093d6353a23 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -13,7 +13,6 @@
         "apps/*"
       ],
       "dependencies": {
-        "@askjo/camofox-browser": "^1.5.2",
         "@streamdown/math": "^1.0.2",
         "agent-browser": "^0.26.0"
       },
@@ -55,48 +54,6 @@
         "vite": "^7.3.1"
       }
     },
-    "apps/dashboard": {
-      "version": "0.0.0",
-      "dependencies": {
-        "@hermes/shared": "file:../shared",
-        "@nous-research/ui": "0.16.0",
-        "@observablehq/plot": "^0.6.17",
-        "@react-three/fiber": "^9.6.0",
-        "@tailwindcss/vite": "^4.2.1",
-        "@xterm/addon-fit": "^0.11.0",
-        "@xterm/addon-unicode11": "^0.9.0",
-        "@xterm/addon-web-links": "^0.12.0",
-        "@xterm/addon-webgl": "^0.19.0",
-        "@xterm/xterm": "^6.0.0",
-        "class-variance-authority": "^0.7.1",
-        "clsx": "^2.1.1",
-        "gsap": "^3.15.0",
-        "leva": "^0.10.1",
-        "lucide-react": "^0.577.0",
-        "motion": "^12.38.0",
-        "react": "^19.2.4",
-        "react-dom": "^19.2.4",
-        "react-router-dom": "^7.14.1",
-        "tailwind-merge": "^3.5.0",
-        "tailwindcss": "^4.2.1",
-        "unicode-animations": "^1.0.3"
-      },
-      "devDependencies": {
-        "@eslint/js": "^9.39.4",
-        "@types/node": "^24.12.0",
-        "@types/react": "^19.2.14",
-        "@types/react-dom": "^19.2.3",
-        "@vitejs/plugin-react": "^5.2.0",
-        "eslint": "^9.39.4",
-        "eslint-plugin-react-hooks": "^7.0.1",
-        "eslint-plugin-react-refresh": "^0.5.2",
-        "globals": "^17.4.0",
-        "three": "^0.180.0",
-        "typescript": "~5.9.3",
-        "typescript-eslint": "^8.56.1",
-        "vite": "^7.3.1"
-      }
-    },
     "apps/desktop": {
       "name": "hermes",
       "version": "0.0.2",
@@ -456,25 +413,6 @@
       "dev": true,
       "license": "MIT"
     },
-    "node_modules/@askjo/camofox-browser": {
-      "version": "1.5.2",
-      "resolved": "https://registry.npmjs.org/@askjo/camofox-browser/-/camofox-browser-1.5.2.tgz",
-      "integrity": "sha512-SvRCzhWnJaplxHkRVF9l1OWako6pp2eUw2mZKHOERUfLWDO2Xe/IKI+5bB+UT1TNvO45P6XdhgfAtihcTEARCg==",
-      "hasInstallScript": true,
-      "license": "MIT",
-      "dependencies": {
-        "camoufox-js": "^0.8.5",
-        "express": "^4.18.2",
-        "playwright": "^1.50.0",
-        "playwright-core": "^1.58.0",
-        "playwright-extra": "^4.3.6",
-        "prom-client": "^15.1.3",
-        "puppeteer-extra-plugin-stealth": "^2.11.2"
-      },
-      "engines": {
-        "node": ">=18"
-      }
-    },
     "node_modules/@assistant-ui/core": {
       "version": "0.1.17",
       "resolved": "https://registry.npmjs.org/@assistant-ui/core/-/core-0.1.17.tgz",
@@ -10376,34 +10314,6 @@
         "node": ">=6"
       }
     },
-    "node_modules/camoufox-js": {
-      "version": "0.8.5",
-      "resolved": "https://registry.npmjs.org/camoufox-js/-/camoufox-js-0.8.5.tgz",
-      "integrity": "sha512-20ihPbspAcOVSUTX9Drxxp0C116DON1n8OVA1eUDglWZiHwiHwFVFOMrIEBwAHMZpU11mIEH/kawJtstRIrDPA==",
-      "license": "MPL-2.0",
-      "dependencies": {
-        "adm-zip": "^0.5.16",
-        "better-sqlite3": "^12.2.0",
-        "commander": "^14.0.0",
-        "fingerprint-generator": "^2.1.66",
-        "glob": "^13.0.0",
-        "impit": "^0.7.0",
-        "language-tags": "^2.0.1",
-        "maxmind": "^5.0.0",
-        "progress": "^2.0.3",
-        "ua-parser-js": "^2.0.2",
-        "xml2js": "^0.6.2"
-      },
-      "bin": {
-        "camoufox-js": "dist/__main__.js"
-      },
-      "engines": {
-        "node": ">= 20"
-      },
-      "peerDependencies": {
-        "playwright-core": "*"
-      }
-    },
     "node_modules/caniuse-lite": {
       "version": "1.0.30001787",
       "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001787.tgz",
@@ -11488,10 +11398,6 @@
         "lodash-es": "^4.17.21"
       }
     },
-    "node_modules/dashboard": {
-      "resolved": "apps/dashboard",
-      "link": true
-    },
     "node_modules/data-urls": {
       "version": "7.0.0",
       "resolved": "https://registry.npmjs.org/data-urls/-/data-urls-7.0.0.tgz",
diff --git a/package.json b/package.json
index f2145f14233..6d519553f69 100644
--- a/package.json
+++ b/package.json
@@ -19,7 +19,6 @@
   },
   "homepage": "https://github.com/NousResearch/Hermes-Agent#readme",
   "dependencies": {
-    "@askjo/camofox-browser": "^1.5.2",
     "@streamdown/math": "^1.0.2",
     "agent-browser": "^0.26.0"
   },
diff --git a/plugins/browser/browserbase/provider.py b/plugins/browser/browserbase/provider.py
index 2b05d01d03b..41ceb9e83d1 100644
--- a/plugins/browser/browserbase/provider.py
+++ b/plugins/browser/browserbase/provider.py
@@ -26,7 +26,7 @@ Optional feature knobs::
     BROWSERBASE_PROXIES=true      # default true
     BROWSERBASE_ADVANCED_STEALTH=false
     BROWSERBASE_KEEP_ALIVE=true   # default true
-    BROWSERBASE_SESSION_TIMEOUT=... (ms, integer)
+    BROWSERBASE_SESSION_TIMEOUT=... (seconds, integer, max 21600 = 6h)
 """
 
 from __future__ import annotations
diff --git a/plugins/context_engine/__init__.py b/plugins/context_engine/__init__.py
index da9206dc349..906ade4a34c 100644
--- a/plugins/context_engine/__init__.py
+++ b/plugins/context_engine/__init__.py
@@ -174,7 +174,7 @@ def _load_engine_from_dir(engine_dir: Path) -> Optional["ContextEngine"]:
 
     # Try register(ctx) pattern first (how plugins are written)
     if hasattr(mod, "register"):
-        collector = _EngineCollector()
+        collector = _EngineCollector(engine_name=name)
         try:
             mod.register(collector)
             if collector.engine:
@@ -197,14 +197,80 @@ def _load_engine_from_dir(engine_dir: Path) -> Optional["ContextEngine"]:
 
 
 class _EngineCollector:
-    """Fake plugin context that captures register_context_engine calls."""
+    """Fake plugin context that captures register_context_engine calls.
 
-    def __init__(self):
+    Plugin context engines using the standard ``register(ctx)`` pattern may
+    also call ``ctx.register_command(...)`` to expose slash commands (e.g.
+    ``/lcm``). Forward those to the global plugin command registry so they
+    behave identically to commands registered by normal plugins.
+    """
+
+    def __init__(self, engine_name: str = ""):
         self.engine = None
+        self._engine_name = engine_name or "context_engine"
+        self._registered_commands: list[str] = []
 
     def register_context_engine(self, engine):
         self.engine = engine
 
+    def register_command(
+        self,
+        name: str,
+        handler,
+        description: str = "",
+        args_hint: str = "",
+    ) -> None:
+        """Forward to the global plugin command registry."""
+        clean = (name or "").lower().strip().lstrip("/").replace(" ", "-")
+        if not clean:
+            logger.warning(
+                "Context engine '%s' tried to register a command with an empty name.",
+                self._engine_name,
+            )
+            return
+
+        # Reject conflicts with built-in commands.
+        try:
+            from hermes_cli.commands import resolve_command
+            if resolve_command(clean) is not None:
+                logger.warning(
+                    "Context engine '%s' tried to register command '/%s' which conflicts "
+                    "with a built-in command. Skipping.",
+                    self._engine_name, clean,
+                )
+                return
+        except Exception:
+            pass
+
+        try:
+            from hermes_cli.plugins import get_plugin_manager
+            manager = get_plugin_manager()
+            if clean in manager._plugin_commands:
+                # Don't clobber a regular plugin's command — same conflict
+                # policy the plugin system uses for plugin-vs-plugin collisions.
+                logger.warning(
+                    "Context engine '%s' tried to register command '/%s' which "
+                    "is already registered by a plugin. Skipping.",
+                    self._engine_name, clean,
+                )
+                return
+            manager._plugin_commands[clean] = {
+                "handler": handler,
+                "description": description or "Context engine command",
+                "plugin": f"context-engine:{self._engine_name}",
+                "args_hint": (args_hint or "").strip(),
+            }
+            self._registered_commands.append(clean)
+            logger.debug(
+                "Context engine '%s' registered command: /%s",
+                self._engine_name, clean,
+            )
+        except Exception as exc:
+            logger.debug(
+                "Context engine '%s' could not register /%s: %s",
+                self._engine_name, clean, exc,
+            )
+
     # No-op for other registration methods
     def register_tool(self, *args, **kwargs):
         pass
diff --git a/plugins/disk-cleanup/disk_cleanup.py b/plugins/disk-cleanup/disk_cleanup.py
index b7f748e7f21..8d984273e79 100755
--- a/plugins/disk-cleanup/disk_cleanup.py
+++ b/plugins/disk-cleanup/disk_cleanup.py
@@ -481,7 +481,14 @@ def guess_category(path: Path) -> Optional[str]:
         }:
             return None
         if top == "cron" or top == "cronjobs":
-            return "cron-output"
+            # Only files under the disposable ``output/`` subtree are
+            # cleanup candidates. Top-level cron control-plane state
+            # (e.g. ``jobs.json``, ``.tick.lock``) must never be
+            # auto-tracked — deleting it wipes the live scheduler
+            # registry. See issue #32164.
+            if len(rel.parts) >= 2 and rel.parts[1] == "output":
+                return "cron-output"
+            return None
         if top == "cache":
             return "temp"
     except ValueError:
diff --git a/plugins/google_meet/cli.py b/plugins/google_meet/cli.py
index 0e9b08881b3..e721c037c81 100644
--- a/plugins/google_meet/cli.py
+++ b/plugins/google_meet/cli.py
@@ -13,7 +13,6 @@ from __future__ import annotations
 
 import argparse
 import json
-import os
 import sys
 from pathlib import Path
 from typing import Optional
diff --git a/plugins/kanban/dashboard/dist/style.css b/plugins/kanban/dashboard/dist/style.css
index 052fa4622c5..9aa780e6213 100644
--- a/plugins/kanban/dashboard/dist/style.css
+++ b/plugins/kanban/dashboard/dist/style.css
@@ -67,10 +67,6 @@
   gap: 0.75rem;
   align-items: start;
   overflow-x: auto;
-  scrollbar-width: none;
-}
-.hermes-kanban-columns::-webkit-scrollbar {
-  display: none;
 }
 
 .hermes-kanban-column {
@@ -143,6 +139,8 @@
   gap: 0.45rem;
   overflow-y: auto;
   padding-right: 0.1rem;
+  flex: 1;
+  min-height: 0;
 }
 
 .hermes-kanban-empty {
diff --git a/plugins/kanban/dashboard/plugin_api.py b/plugins/kanban/dashboard/plugin_api.py
index 104f666c300..cae4d872302 100644
--- a/plugins/kanban/dashboard/plugin_api.py
+++ b/plugins/kanban/dashboard/plugin_api.py
@@ -1310,6 +1310,58 @@ def inspect_run_endpoint(
         return {"run_id": run_id, "alive": True, "pid": pid, "error": "access denied"}
 
 
+class TerminateRunBody(BaseModel):
+    reason: Optional[str] = None
+
+
+@router.post("/runs/{run_id}/terminate")
+def terminate_run_endpoint(
+    run_id: int,
+    payload: TerminateRunBody,
+    board: Optional[str] = Query(None, description="Kanban board slug (omit for current)"),
+):
+    """Terminate the worker process backing an in-flight run.
+
+    Resolves ``run_id`` to its parent ``task_id`` and routes through
+    :func:`kanban_db.reclaim_task` so the SIGTERM->SIGKILL flow,
+    run-outcome bookkeeping, and event-log append all match what the
+    existing ``POST /tasks/{task_id}/reclaim`` endpoint does.
+
+    Responses:
+      * 200 ``{"ok": true, "run_id": ..., "task_id": ...}`` on success.
+      * 404 when ``run_id`` is unknown.
+      * 409 when the run has already ended, or the task is no longer in
+        a claimable state.
+
+    Closes the gap left by PR #28432, which shipped the read-only
+    sibling endpoints (``/workers/active``, ``/runs/{run_id}``,
+    ``/runs/{run_id}/inspect``) but no termination control surface.
+    """
+    board = _resolve_board(board)
+    conn = _conn(board=board)
+    try:
+        r = kanban_db.get_run(conn, run_id)
+        if r is None:
+            raise HTTPException(status_code=404, detail=f"run {run_id} not found")
+        if r.ended_at is not None:
+            raise HTTPException(
+                status_code=409,
+                detail=f"run {run_id} already ended",
+            )
+        ok = kanban_db.reclaim_task(conn, r.task_id, reason=payload.reason)
+        if not ok:
+            raise HTTPException(
+                status_code=409,
+                detail=(
+                    f"cannot terminate run {run_id}: task {r.task_id} is no "
+                    "longer in a reclaimable state"
+                ),
+            )
+        return {"ok": True, "run_id": run_id, "task_id": r.task_id}
+    finally:
+        conn.close()
+
+
 # ---------------------------------------------------------------------------
 # Recovery actions — reclaim a running claim, reassign to a new profile
 # ---------------------------------------------------------------------------
diff --git a/plugins/memory/hindsight/README.md b/plugins/memory/hindsight/README.md
index 4c7e0f6be30..d8f96a45e1e 100644
--- a/plugins/memory/hindsight/README.md
+++ b/plugins/memory/hindsight/README.md
@@ -75,8 +75,17 @@ Config file: `~/.hermes/hindsight/config.json`
 | `recall_prompt_preamble` | — | Custom preamble for recalled memories in context |
 | `recall_tags` | — | Tags to filter when searching memories |
 | `recall_tags_match` | `any` | Tag matching mode: `any` / `all` / `any_strict` / `all_strict` |
+| `recall_types` | `observation` | Fact types surfaced by recall (both auto-recall and the `hindsight_recall` tool). Comma-separated string or JSON list. **Default narrowed to `observation` only** (see "Behavior change" below). Set to `observation,world,experience` to also include raw facts. |
 | `auto_recall` | `true` | Automatically recall memories before each turn |
 
+> **Behavior change — `recall_types` defaults to `observation` only.**
+>
+> Previously recall returned all three fact types. It now returns only observations.
+>
+> Per [Hindsight's docs](https://hindsight.vectorize.io/developer/observations), observations are the **consolidated** knowledge layer Hindsight builds on top of raw facts: deduplicated beliefs grounded in evidence, refined as new facts arrive, with proof counts and freshness signals. Raw `world` / `experience` facts are the individual supporting evidence that feeds them. For per-turn context injection, observations are denser per token and avoid feeding the model multiple raw facts that one observation already summarizes.
+>
+> Restore the broad recall with `"recall_types": "observation,world,experience"` (string or JSON list) in `~/.hermes/hindsight/config.json`. This applies to **both** auto-recall and the `hindsight_recall` tool — both read the same `recall_types` setting (the tool schema has no per-call `types` argument), so narrowing the default narrows both paths.
+
 ### Retain
 
 | Key | Default | Description |
diff --git a/plugins/memory/hindsight/__init__.py b/plugins/memory/hindsight/__init__.py
index 1ca362e0089..ef8fcafb88a 100644
--- a/plugins/memory/hindsight/__init__.py
+++ b/plugins/memory/hindsight/__init__.py
@@ -579,7 +579,15 @@ class HindsightMemoryProvider(MemoryProvider):
         # Recall controls
         self._auto_recall = True
         self._recall_max_tokens = 4096
-        self._recall_types: list[str] | None = None
+        # Default to observation-only recall. Observations are Hindsight's
+        # consolidated knowledge layer — deduplicated, evidence-grounded
+        # beliefs built from many raw facts, with proof counts and
+        # freshness signals (see hindsight.vectorize.io/developer/observations).
+        # Including raw world/experience facts re-ships the supporting
+        # evidence that observations already summarize, burning the
+        # `recall_max_tokens` budget. Users can restore the broader
+        # recall via the `recall_types` config key.
+        self._recall_types: list[str] = ["observation"]
         self._recall_prompt_preamble = ""
         self._recall_max_input_chars = 800
 
@@ -856,6 +864,7 @@ class HindsightMemoryProvider(MemoryProvider):
             {"key": "retain_assistant_prefix", "description": "Label used before assistant turns in retained transcripts", "default": "Assistant"},
             {"key": "recall_tags", "description": "Tags to filter when searching memories (comma-separated)", "default": ""},
             {"key": "recall_tags_match", "description": "Tag matching mode for recall", "default": "any", "choices": ["any", "all", "any_strict", "all_strict"]},
+            {"key": "recall_types", "description": "Fact types to surface on recall — applies to both auto-recall and the hindsight_recall tool (comma-separated or list). Defaults to observation-only — observations are Hindsight's consolidated, deduplicated, evidence-grounded knowledge layer; raw world/experience facts are the supporting evidence observations already summarize. Set to e.g. 'observation,world,experience' to also include raw facts.", "default": "observation"},
             {"key": "auto_recall", "description": "Automatically recall memories before each turn", "default": True},
             {"key": "auto_retain", "description": "Automatically retain conversation turns", "default": True},
             {"key": "retain_every_n_turns", "description": "Retain every N turns (1 = every turn)", "default": 1},
@@ -1187,7 +1196,17 @@ class HindsightMemoryProvider(MemoryProvider):
         # Recall controls
         self._auto_recall = self._config.get("auto_recall", True)
         self._recall_max_tokens = int(self._config.get("recall_max_tokens", 4096))
-        self._recall_types = self._config.get("recall_types") or None
+        # Default narrows recall to observation-only; pass an explicit
+        # `recall_types` list in config.json to broaden (e.g. include
+        # "world" / "experience") or to disable the filter entirely.
+        configured_types = self._config.get("recall_types")
+        if configured_types is None:
+            self._recall_types = ["observation"]
+        elif isinstance(configured_types, str):
+            # Allow comma-separated strings for parity with recall_tags.
+            self._recall_types = [t.strip() for t in configured_types.split(",") if t.strip()]
+        else:
+            self._recall_types = list(configured_types) or ["observation"]
         self._recall_prompt_preamble = self._config.get("recall_prompt_preamble", "")
         self._recall_max_input_chars = int(self._config.get("recall_max_input_chars", 800))
         self._retain_async = self._config.get("retain_async", True)
diff --git a/plugins/model-providers/opencode-zen/__init__.py b/plugins/model-providers/opencode-zen/__init__.py
index 385741f09a1..a8c72cdc25c 100644
--- a/plugins/model-providers/opencode-zen/__init__.py
+++ b/plugins/model-providers/opencode-zen/__init__.py
@@ -34,6 +34,21 @@ def _is_deepseek_thinking_model(model: str | None) -> bool:
 class OpenCodeGoProfile(ProviderProfile):
     """OpenCode Go - model-specific reasoning controls."""
 
+    # Per-model completion-token cap. The opencode-go relay's default is
+    # too large for mimo-v2.5-pro — it sends max_tokens=262144 but Xiaomi
+    # only supports 131072 completion tokens and 400s the request.
+    # Setting an explicit cap here prevents the relay default from being
+    # applied. Keys are normalized via _flat_model_name().
+    _MODEL_MAX_TOKENS: dict[str, int] = {
+        "mimo-v2.5-pro": 131072,
+    }
+
+    def get_max_tokens(self, model: str | None) -> int | None:
+        cap = self._MODEL_MAX_TOKENS.get(_flat_model_name(model))
+        if cap is not None:
+            return cap
+        return self.default_max_tokens
+
     def build_api_kwargs_extras(
         self, *, reasoning_config: dict | None = None, model: str | None = None, **context
     ) -> tuple[dict[str, Any], dict[str, Any]]:
diff --git a/plugins/model-providers/openrouter/__init__.py b/plugins/model-providers/openrouter/__init__.py
index d1bf10de11d..1b464b42e82 100644
--- a/plugins/model-providers/openrouter/__init__.py
+++ b/plugins/model-providers/openrouter/__init__.py
@@ -43,6 +43,8 @@ class OpenRouterProfile(ProviderProfile):
         self, *, session_id: str | None = None, **context: Any
     ) -> dict[str, Any]:
         body: dict[str, Any] = {}
+        if session_id:
+            body["session_id"] = session_id
         prefs = context.get("provider_preferences")
         if prefs:
             body["provider"] = prefs
diff --git a/plugins/platforms/discord/adapter.py b/plugins/platforms/discord/adapter.py
index 0ffe1abac7a..c58afffcd74 100644
--- a/plugins/platforms/discord/adapter.py
+++ b/plugins/platforms/discord/adapter.py
@@ -4811,14 +4811,19 @@ class DiscordAdapter(BasePlatformAdapter):
         # to keep the partition rule clean.
         _channel_context = None
         _is_dm = isinstance(message.channel, discord.DMChannel)
-        if not _is_dm:
-            _needed_mention = (
-                require_mention
-                and not is_free_channel
-                and not in_bot_thread
-            )
-            _backfill_enabled = self._discord_history_backfill()
-            if _needed_mention and _backfill_enabled:
+        if not _is_dm and self._discord_history_backfill():
+            # Run backfill when there's a real gap to fill:
+            #   - mention-gated channels with no free-response override
+            #     (messages between bot turns aren't in the transcript)
+            #   - any thread (in_bot_thread bypasses the mention check, but
+            #     processing-window gaps and post-restart context still need
+            #     recovery)
+            # DMs skip entirely because every DM message triggers the bot,
+            # so the session transcript already has everything.
+            # Auto-threaded messages also skip — we just created the thread,
+            # there's nothing prior to backfill.
+            _has_mention_gap = require_mention and not is_free_channel and not in_bot_thread
+            if (_has_mention_gap or is_thread) and auto_threaded_channel is None:
                 _backfill_text = await self._fetch_channel_context(
                     message.channel, before=message,
                 )
diff --git a/plugins/platforms/irc/adapter.py b/plugins/platforms/irc/adapter.py
index 3358fa5b188..2d06cffbdeb 100644
--- a/plugins/platforms/irc/adapter.py
+++ b/plugins/platforms/irc/adapter.py
@@ -49,8 +49,7 @@ from gateway.platforms.base import (
     MessageEvent,
     MessageType,
 )
-from gateway.session import SessionSource
-from gateway.config import PlatformConfig, Platform
+from gateway.config import Platform
 
 
 # ---------------------------------------------------------------------------
diff --git a/plugins/platforms/line/adapter.py b/plugins/platforms/line/adapter.py
index ee035ea2e1d..00663702ea1 100644
--- a/plugins/platforms/line/adapter.py
+++ b/plugins/platforms/line/adapter.py
@@ -76,7 +76,7 @@ import time
 import uuid
 from dataclasses import dataclass, field
 from pathlib import Path
-from typing import Any, Awaitable, Callable, Dict, List, Optional, Set, Tuple
+from typing import Any, Dict, List, Optional, Set, Tuple
 from urllib.parse import quote as _urlquote
 
 logger = logging.getLogger(__name__)
@@ -95,7 +95,6 @@ from gateway.platforms.base import (
     cache_image_from_bytes,
 )
 from gateway.config import Platform
-from gateway.session import SessionSource
 
 
 # ---------------------------------------------------------------------------
diff --git a/plugins/platforms/ntfy/adapter.py b/plugins/platforms/ntfy/adapter.py
index b9280ab9e6e..4ab46cecfb2 100644
--- a/plugins/platforms/ntfy/adapter.py
+++ b/plugins/platforms/ntfy/adapter.py
@@ -81,6 +81,7 @@ DEDUP_WINDOW_SECONDS = 300
 DEDUP_MAX_SIZE = 1000
 RECONNECT_BACKOFF = [2, 5, 10, 30, 60]
 STREAM_TIMEOUT_SECONDS = 90  # ntfy keepalive default is 55s; give margin
+_ECHO_TAG = "hermes-agent"  # tag added to outgoing messages for echo-loop prevention
 
 
 def _build_auth_header(token: str) -> Dict[str, str]:
@@ -311,6 +312,12 @@ class NtfyAdapter(BasePlatformAdapter):
             logger.debug("[%s] Duplicate message %s, skipping", self.name, msg_id)
             return
 
+        # Echo-loop prevention: skip messages tagged by this adapter.
+        tags = event.get("tags") or []
+        if _ECHO_TAG in tags:
+            logger.debug("[%s] Skipping own message (echo tag)", self.name)
+            return
+
         text = (event.get("message") or "").strip()
         if not text:
             logger.debug("[%s] Empty message body, skipping", self.name)
@@ -387,7 +394,11 @@ class NtfyAdapter(BasePlatformAdapter):
 
         url = f"{self._server}/{publish_topic}"
         markdown_enabled = (self.config.extra or {}).get("markdown", False)
-        headers = {**self._auth_headers(), "Content-Type": "text/plain; charset=utf-8"}
+        headers = {
+            **self._auth_headers(),
+            "Content-Type": "text/plain; charset=utf-8",
+            "X-Tags": _ECHO_TAG,
+        }
         if markdown_enabled:
             headers["X-Markdown"] = "true"
 
@@ -519,7 +530,7 @@ async def _standalone_send(
     markdown_env = os.getenv("NTFY_MARKDOWN", "").strip().lower()
     markdown_enabled = bool(extra.get("markdown")) or markdown_env in ("1", "true", "yes")
 
-    headers = {"Content-Type": "text/plain; charset=utf-8", **_build_auth_header(token)}
+    headers = {"Content-Type": "text/plain; charset=utf-8", "X-Tags": _ECHO_TAG, **_build_auth_header(token)}
     if markdown_enabled:
         headers["X-Markdown"] = "true"
 
diff --git a/plugins/spotify/tools.py b/plugins/spotify/tools.py
index f6022ff5aab..4bd18a02b61 100644
--- a/plugins/spotify/tools.py
+++ b/plugins/spotify/tools.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import Any, Dict, List
+from typing import Any, List
 
 from hermes_cli.auth import get_auth_status
 from plugins.spotify.client import (
diff --git a/plugins/teams_pipeline/cli.py b/plugins/teams_pipeline/cli.py
index 7afaa3888a0..4b0b1266d94 100644
--- a/plugins/teams_pipeline/cli.py
+++ b/plugins/teams_pipeline/cli.py
@@ -24,7 +24,6 @@ from plugins.teams_pipeline.store import TeamsPipelineStore, resolve_teams_pipel
 from plugins.teams_pipeline.subscriptions import (
     build_graph_client,
     maintain_graph_subscriptions,
-    sync_graph_subscription_record,
 )
 from tools.microsoft_graph_auth import MicrosoftGraphConfigError, MicrosoftGraphTokenProvider
 
diff --git a/plugins/teams_pipeline/pipeline.py b/plugins/teams_pipeline/pipeline.py
index d1d16164861..1b2c1d8b0fc 100644
--- a/plugins/teams_pipeline/pipeline.py
+++ b/plugins/teams_pipeline/pipeline.py
@@ -7,7 +7,6 @@ import json
 import logging
 import os
 import shutil
-import subprocess
 import tempfile
 import uuid
 from dataclasses import dataclass
@@ -19,7 +18,6 @@ import httpx
 from agent.auxiliary_client import async_call_llm, extract_content_or_reasoning
 from hermes_constants import get_hermes_home
 from plugins.teams_pipeline.meetings import (
-    TeamsMeetingArtifactNotFoundError,
     download_recording_artifact,
     enrich_meeting_with_call_record,
     fetch_preferred_transcript_text,
diff --git a/plugins/video_gen/fal/__init__.py b/plugins/video_gen/fal/__init__.py
index 61b36789855..e3ee7ffa100 100644
--- a/plugins/video_gen/fal/__init__.py
+++ b/plugins/video_gen/fal/__init__.py
@@ -17,7 +17,7 @@ Model families (each with t2v + i2v endpoints):
     veo3.1        fal-ai/veo3.1                                  /  fal-ai/veo3.1/image-to-video
     seedance-2.0  bytedance/seedance-2.0/text-to-video           /  bytedance/seedance-2.0/image-to-video
     kling-v3-4k   fal-ai/kling-video/v3/4k/text-to-video         /  fal-ai/kling-video/v3/4k/image-to-video
-    happy-horse   fal-ai/happy-horse/text-to-video               /  fal-ai/happy-horse/image-to-video
+    happy-horse   alibaba/happy-horse/text-to-video              /  alibaba/happy-horse/image-to-video
 
 Selection precedence for the active family:
     1. ``model=`` arg from the tool call
@@ -26,14 +26,16 @@ Selection precedence for the active family:
     4. ``video_gen.model`` in ``config.yaml`` (when it's one of our family IDs)
     5. ``DEFAULT_MODEL``
 
-Authentication via ``FAL_KEY``. Output is an HTTPS URL from FAL's CDN; the
-gateway downloads and delivers it.
+Authentication via ``FAL_KEY`` or the managed Nous gateway. Output is an
+HTTPS URL from FAL's CDN; the gateway downloads and delivers it.
 """
 
 from __future__ import annotations
 
 import logging
 import os
+import threading
+import uuid
 from typing import Any, Dict, List, Optional, Tuple
 
 from agent.video_gen_provider import (
@@ -104,8 +106,9 @@ FAL_FAMILIES: Dict[str, Dict[str, Any]] = {
         "text_endpoint": "fal-ai/veo3.1",
         "image_endpoint": "fal-ai/veo3.1/image-to-video",
         "aspect_ratios": ("16:9", "9:16"),
-        "resolutions": ("720p", "1080p"),
+        "resolutions": ("720p", "1080p", "4k"),
         "durations": (4, 6, 8),
+        "duration_suffix": "s",  # FAL veo3.1 wants "4s" not "4"
         "audio": True,
         "negative": True,
     },
@@ -148,8 +151,8 @@ FAL_FAMILIES: Dict[str, Dict[str, Any]] = {
         "price": "premium",
         "strengths": "Alibaba. New model, sparse public docs — conservative defaults.",
         "tier": "premium",
-        "text_endpoint": "fal-ai/happy-horse/text-to-video",
-        "image_endpoint": "fal-ai/happy-horse/image-to-video",
+        "text_endpoint": "alibaba/happy-horse/text-to-video",
+        "image_endpoint": "alibaba/happy-horse/image-to-video",
         # Docs don't expose duration/aspect/resolution — let the endpoint
         # apply its own defaults.
         "aspect_ratios": None,
@@ -270,7 +273,9 @@ def _build_payload(
     clamped = _clamp_duration(family, duration)
     if clamped is not None and family.get("durations"):
         # FAL exposes duration as a string in the queue API ("8" not 8).
-        payload["duration"] = str(clamped)
+        # Some families (e.g. veo3.1) require a unit suffix ("4s" not "4").
+        suffix = family.get("duration_suffix", "")
+        payload["duration"] = f"{clamped}{suffix}"
 
     if family.get("audio") and audio is not None:
         payload["generate_audio"] = bool(audio)
@@ -302,6 +307,92 @@ def _load_fal_client() -> Any:
     return _fal_client
 
 
+# ---------------------------------------------------------------------------
+# Managed FAL gateway (Nous Subscription)
+# ---------------------------------------------------------------------------
+
+_managed_fal_video_client: Any = None
+_managed_fal_video_client_config: Any = None
+_managed_fal_video_client_lock = threading.Lock()
+
+
+def _resolve_managed_fal_video_gateway():
+    """Return managed fal-queue gateway config when the user prefers the gateway
+    or direct FAL credentials are absent."""
+    from tools.tool_backend_helpers import fal_key_is_configured, prefers_gateway
+
+    if fal_key_is_configured() and not prefers_gateway("video_gen"):
+        return None
+    from tools.managed_tool_gateway import resolve_managed_tool_gateway
+
+    return resolve_managed_tool_gateway("fal-queue")
+
+
+def _get_managed_fal_video_client(managed_gateway):
+    """Reuse the managed FAL client so its internal httpx.Client is not leaked per call."""
+    global _managed_fal_video_client, _managed_fal_video_client_config
+    from tools.fal_common import _ManagedFalSyncClient
+
+    client_config = (
+        managed_gateway.gateway_origin.rstrip("/"),
+        managed_gateway.nous_user_token,
+    )
+    with _managed_fal_video_client_lock:
+        if _managed_fal_video_client is not None and _managed_fal_video_client_config == client_config:
+            return _managed_fal_video_client
+
+        _load_fal_client()
+        _managed_fal_video_client = _ManagedFalSyncClient(
+            _fal_client,
+            key=managed_gateway.nous_user_token,
+            queue_run_origin=managed_gateway.gateway_origin,
+        )
+        _managed_fal_video_client_config = client_config
+        return _managed_fal_video_client
+
+
+def _submit_fal_video_request(endpoint: str, arguments: Dict[str, Any]):
+    """Submit a FAL video request using direct credentials or the managed queue gateway.
+
+    Returns a request handle whose ``.get()`` blocks until the result is ready.
+    """
+    _load_fal_client()
+    request_headers = {"x-idempotency-key": str(uuid.uuid4())}
+    managed_gateway = _resolve_managed_fal_video_gateway()
+    if managed_gateway is None:
+        return _fal_client.submit(endpoint, arguments=arguments, headers=request_headers)
+
+    managed_client = _get_managed_fal_video_client(managed_gateway)
+    try:
+        return managed_client.submit(
+            endpoint,
+            arguments=arguments,
+            headers=request_headers,
+        )
+    except Exception as exc:
+        from tools.fal_common import _extract_http_status
+
+        status = _extract_http_status(exc)
+        if status is not None and 400 <= status < 500:
+            raise ValueError(
+                f"Nous Subscription gateway rejected endpoint '{endpoint}' "
+                f"(HTTP {status}). This model may not yet be enabled on "
+                f"the Nous Portal's FAL proxy. Either:\n"
+                f"  • Set FAL_KEY in your environment to use FAL.ai directly, or\n"
+                f"  • Pick a different model via `hermes tools` → Video Generation."
+            ) from exc
+        raise
+
+
+def _check_fal_video_available() -> bool:
+    """True if the FAL.ai video backend is reachable (direct key or managed gateway)."""
+    from tools.tool_backend_helpers import fal_key_is_configured
+
+    if fal_key_is_configured():
+        return True
+    return _resolve_managed_fal_video_gateway() is not None
+
+
 # ---------------------------------------------------------------------------
 # Provider
 # ---------------------------------------------------------------------------
@@ -323,13 +414,10 @@ class FALVideoGenProvider(VideoGenProvider):
         return "FAL"
 
     def is_available(self) -> bool:
-        if not os.environ.get("FAL_KEY", "").strip():
-            return False
         try:
-            import fal_client  # noqa: F401
-        except ImportError:
+            return _check_fal_video_available()
+        except Exception:  # noqa: BLE001 — never break the picker
             return False
-        return True
 
     def list_models(self) -> List[Dict[str, Any]]:
         out: List[Dict[str, Any]] = []
@@ -394,11 +482,12 @@ class FALVideoGenProvider(VideoGenProvider):
         seed: Optional[int] = None,
         **kwargs: Any,
     ) -> Dict[str, Any]:
-        if not os.environ.get("FAL_KEY", "").strip():
+        if not _check_fal_video_available():
             return error_response(
                 error=(
-                    "FAL_KEY not set. Run `hermes tools` → Video Generation "
-                    "→ FAL to configure."
+                    "No FAL backend available. Either set FAL_KEY "
+                    "(run `hermes tools` → Video Generation → FAL to configure) "
+                    "or sign in to Nous (`hermes setup`) for managed gateway access."
                 ),
                 error_type="auth_required",
                 provider="fal",
@@ -406,7 +495,7 @@ class FALVideoGenProvider(VideoGenProvider):
             )
 
         try:
-            fal_client = _load_fal_client()
+            _load_fal_client()
         except ImportError:
             return error_response(
                 error="fal_client Python package not installed (pip install fal-client)",
@@ -467,11 +556,8 @@ class FALVideoGenProvider(VideoGenProvider):
         )
 
         try:
-            result = fal_client.subscribe(
-                endpoint,
-                arguments=payload,
-                with_logs=False,
-            )
+            handle = _submit_fal_video_request(endpoint, payload)
+            result = handle.get()
         except Exception as exc:
             logger.warning(
                 "FAL video gen failed (family=%s, endpoint=%s): %s",
@@ -511,7 +597,7 @@ class FALVideoGenProvider(VideoGenProvider):
             prompt=prompt,
             modality=modality_used,
             aspect_ratio=aspect_ratio if "aspect_ratio" in payload else "",
-            duration=int(payload["duration"]) if "duration" in payload else 0,
+            duration=int("".join(c for c in payload["duration"] if c.isdigit()) or "0") if "duration" in payload else 0,
             provider="fal",
             extra=extra,
         )
diff --git a/plugins/web/firecrawl/provider.py b/plugins/web/firecrawl/provider.py
index bcc574ffca3..9e3f123e520 100644
--- a/plugins/web/firecrawl/provider.py
+++ b/plugins/web/firecrawl/provider.py
@@ -196,9 +196,13 @@ def _raise_web_backend_configuration_error() -> None:
     )
     if _wt.managed_nous_tools_enabled():
         message += (
-            " With your Nous subscription you can also use the Tool Gateway — "
+            " With your Nous subscription you can also use the Tool Gateway. "
             "run `hermes tools` and select Nous Subscription as the web provider."
         )
+    else:
+        message += " " + _wt.nous_tool_gateway_unavailable_message(
+            "managed Firecrawl web tools",
+        )
     raise ValueError(message)
 
 
@@ -381,9 +385,6 @@ class FirecrawlWebSearchProvider(WebSearchProvider):
     def supports_extract(self) -> bool:
         return True
 
-    def supports_crawl(self) -> bool:
-        return True
-
     def search(self, query: str, limit: int = 5) -> Dict[str, Any]:
         """Execute a Firecrawl search.
 
@@ -575,192 +576,12 @@ class FirecrawlWebSearchProvider(WebSearchProvider):
 
         return results
 
-    async def crawl(self, url: str, **kwargs: Any) -> Dict[str, Any]:
-        """Crawl a seed URL via Firecrawl's ``/crawl`` endpoint.
-
-        Sync SDK call wrapped in ``asyncio.to_thread`` because the dispatcher
-        in :func:`tools.web_tools.web_crawl_tool` is async and runs LLM
-        post-processing on the response. The dispatcher gates the seed URL
-        against SSRF + website-access policy before calling us; this method
-        re-checks every crawled page's URL against the policy after the
-        crawl returns to catch redirected pages that map to a blocked host.
-
-        Accepted kwargs (others ignored for forward compat):
-          - ``instructions``: str — logged then dropped. Firecrawl's /crawl
-            endpoint does NOT accept natural-language instructions (that's
-            an /extract feature), so we record the value for debugging and
-            proceed without it. Tavily's crawl IS instruction-aware; this
-            divergence is documented in both plugins' docstrings.
-          - ``limit``: int — max pages to crawl (default 20).
-          - ``depth``: str — accepted for API parity with Tavily; ignored
-            by Firecrawl's crawl endpoint.
-
-        Returns ``{"results": [...]}`` matching the shape that
-        :func:`tools.web_tools.web_crawl_tool`'s shared LLM-summarization
-        path expects. Per-page failures (policy block on redirected URL,
-        bad response shape) are included as items with an ``error`` field
-        rather than raising.
-        """
-        try:
-            from tools.interrupt import is_interrupted
-
-            if is_interrupted():
-                return {"results": [{"url": url, "title": "", "content": "", "error": "Interrupted"}]}
-
-            instructions = kwargs.get("instructions")
-            limit = kwargs.get("limit", 20)
-
-            # Firecrawl's /crawl endpoint does not accept natural-language
-            # instructions (that's an /extract feature). Log + drop.
-            if instructions:
-                logger.info(
-                    "Firecrawl crawl: 'instructions' parameter ignored "
-                    "(not supported by Firecrawl /crawl)"
-                )
-
-            logger.info("Firecrawl crawl: %s (limit=%d)", url, limit)
-
-            crawl_params = {
-                "limit": limit,
-                "scrape_options": {"formats": ["markdown"]},
-            }
-
-            # The SDK call is sync; run in a thread so we don't block the
-            # gateway event loop on a multi-page crawl.
-            crawl_result = await asyncio.to_thread(
-                _get_firecrawl_client().crawl,
-                url=url,
-                **crawl_params,
-            )
-
-            # CrawlJob normalization across SDK + direct + gateway shapes.
-            data_list: List[Any] = []
-            if hasattr(crawl_result, "data"):
-                data_list = crawl_result.data if crawl_result.data else []
-                logger.info(
-                    "Firecrawl crawl status: %s, %d pages",
-                    getattr(crawl_result, "status", "unknown"),
-                    len(data_list),
-                )
-            elif isinstance(crawl_result, dict) and "data" in crawl_result:
-                data_list = crawl_result.get("data", []) or []
-            else:
-                logger.warning(
-                    "Firecrawl crawl: unexpected result type %r",
-                    type(crawl_result).__name__,
-                )
-
-            pages: List[Dict[str, Any]] = []
-            for item in data_list:
-                # Pydantic model | typed object | dict — handle all shapes.
-                content_markdown = None
-                content_html = None
-                metadata: Any = {}
-
-                if hasattr(item, "model_dump"):
-                    item_dict = item.model_dump()
-                    content_markdown = item_dict.get("markdown")
-                    content_html = item_dict.get("html")
-                    metadata = item_dict.get("metadata", {})
-                elif hasattr(item, "__dict__"):
-                    content_markdown = getattr(item, "markdown", None)
-                    content_html = getattr(item, "html", None)
-                    metadata_obj = getattr(item, "metadata", {})
-                    if hasattr(metadata_obj, "model_dump"):
-                        metadata = metadata_obj.model_dump()
-                    elif hasattr(metadata_obj, "__dict__"):
-                        metadata = metadata_obj.__dict__
-                    elif isinstance(metadata_obj, dict):
-                        metadata = metadata_obj
-                    else:
-                        metadata = {}
-                elif isinstance(item, dict):
-                    content_markdown = item.get("markdown")
-                    content_html = item.get("html")
-                    metadata = item.get("metadata", {})
-
-                # Ensure metadata is a plain dict.
-                if not isinstance(metadata, dict):
-                    if hasattr(metadata, "model_dump"):
-                        metadata = metadata.model_dump()
-                    elif hasattr(metadata, "__dict__"):
-                        metadata = metadata.__dict__
-                    else:
-                        metadata = {}
-
-                page_url = metadata.get(
-                    "sourceURL", metadata.get("url", "Unknown URL")
-                )
-                title = metadata.get("title", "")
-
-                # Per-page policy re-check (catches blocked redirects).
-                page_blocked = check_website_access(page_url)
-                if page_blocked:
-                    logger.info(
-                        "Blocked crawled page %s by rule %s",
-                        page_blocked["host"],
-                        page_blocked["rule"],
-                    )
-                    pages.append(
-                        {
-                            "url": page_url,
-                            "title": title,
-                            "content": "",
-                            "raw_content": "",
-                            "error": page_blocked["message"],
-                            "blocked_by_policy": {
-                                "host": page_blocked["host"],
-                                "rule": page_blocked["rule"],
-                                "source": page_blocked["source"],
-                            },
-                        }
-                    )
-                    continue
-
-                content = content_markdown or content_html or ""
-                pages.append(
-                    {
-                        "url": page_url,
-                        "title": title,
-                        "content": content,
-                        "raw_content": content,
-                        "metadata": metadata,
-                    }
-                )
-
-            return {"results": pages}
-        except ValueError as exc:
-            return {"results": [{"url": url, "title": "", "content": "", "error": str(exc)}]}
-        except ImportError as exc:
-            return {
-                "results": [
-                    {
-                        "url": url,
-                        "title": "",
-                        "content": "",
-                        "error": f"Firecrawl SDK not installed: {exc}",
-                    }
-                ]
-            }
-        except Exception as exc:  # noqa: BLE001
-            logger.warning("Firecrawl crawl error: %s", exc)
-            return {
-                "results": [
-                    {
-                        "url": url,
-                        "title": "",
-                        "content": "",
-                        "error": f"Firecrawl crawl failed: {exc}",
-                    }
-                ]
-            }
-
     def get_setup_schema(self) -> Dict[str, Any]:
         return {
             "name": "Firecrawl",
             "badge": "paid · optional gateway",
             "tag": (
-                "Full search + extract + crawl; supports direct API and "
+                "Full search + extract; supports direct API and "
                 "Nous tool-gateway routing."
             ),
             "env_vars": [
diff --git a/plugins/web/tavily/__init__.py b/plugins/web/tavily/__init__.py
index be0b21dbe78..1e0ced61d12 100644
--- a/plugins/web/tavily/__init__.py
+++ b/plugins/web/tavily/__init__.py
@@ -1,9 +1,4 @@
-"""Tavily web search + extract + crawl plugin — bundled, auto-loaded.
-
-First plugin in this codebase to advertise ``supports_crawl=True``. The
-crawl method maps to Tavily's ``/crawl`` endpoint, which accepts a seed
-URL plus optional instructions and extract depth.
-"""
+"""Tavily web search + extract plugin — bundled, auto-loaded."""
 
 from __future__ import annotations
 
diff --git a/plugins/web/tavily/provider.py b/plugins/web/tavily/provider.py
index 50e15973fb3..fe161a4a096 100644
--- a/plugins/web/tavily/provider.py
+++ b/plugins/web/tavily/provider.py
@@ -1,33 +1,24 @@
-"""Tavily web search + content extraction + crawl — plugin form.
+"""Tavily web search + content extraction — plugin form.
 
-Subclasses :class:`agent.web_search_provider.WebSearchProvider`. Three
+Subclasses :class:`agent.web_search_provider.WebSearchProvider`. Two
 capabilities advertised:
 
 - ``supports_search()``  -> True (Tavily ``/search``)
 - ``supports_extract()`` -> True (Tavily ``/extract``)
-- ``supports_crawl()``   -> True (Tavily ``/crawl``) — sync HTTP crawl;
-  Firecrawl also advertises ``supports_crawl=True`` (async)
 
-All three are sync — the underlying call is ``httpx.post(...)``. The
-dispatcher in :func:`tools.web_tools.web_crawl_tool` (which is itself
-async) will run sync providers in a thread when appropriate.
+Both are sync — the underlying call is ``httpx.post(...)``.
 
 Config keys this provider responds to::
 
     web:
       search_backend: "tavily"     # explicit per-capability
       extract_backend: "tavily"    # explicit per-capability
-      crawl_backend: "tavily"      # explicit per-capability
-      backend: "tavily"            # shared fallback for all three
+      backend: "tavily"            # shared fallback for both
 
 Env vars::
 
     TAVILY_API_KEY=...           # https://app.tavily.com/home (required)
     TAVILY_BASE_URL=...          # optional override of https://api.tavily.com
-
-Auth note: Tavily uses ``api_key`` in the JSON body for /search and
-/extract, but **also requires** ``Authorization: Bearer <key>`` for /crawl
-(body-only auth returns 401 on /crawl). The plugin handles both.
 """
 
 from __future__ import annotations
@@ -63,11 +54,7 @@ def _tavily_request(endpoint: str, payload: Dict[str, Any]) -> Dict[str, Any]:
     url = f"{base_url}/{endpoint.lstrip('/')}"
     logger.info("Tavily %s request to %s", endpoint, url)
 
-    # Tavily /crawl requires Bearer header auth in addition to body auth;
-    # /search and /extract are body-only.
-    headers = {"Authorization": f"Bearer {api_key}"} if endpoint.strip("/") == "crawl" else {}
-
-    response = httpx.post(url, json=payload, headers=headers, timeout=60)
+    response = httpx.post(url, json=payload, timeout=60)
     response.raise_for_status()
     return response.json()
 
@@ -90,7 +77,7 @@ def _normalize_tavily_search_results(response: Dict[str, Any]) -> Dict[str, Any]
 def _normalize_tavily_documents(
     response: Dict[str, Any], fallback_url: str = ""
 ) -> List[Dict[str, Any]]:
-    """Map Tavily ``/extract`` or ``/crawl`` response to standard documents.
+    """Map Tavily ``/extract`` response to standard documents.
 
     Documents follow the legacy LLM post-processing shape::
 
@@ -139,7 +126,7 @@ def _normalize_tavily_documents(
 
 
 class TavilyWebSearchProvider(WebSearchProvider):
-    """Tavily search + extract + crawl provider."""
+    """Tavily search + extract provider."""
 
     @property
     def name(self) -> str:
@@ -159,9 +146,6 @@ class TavilyWebSearchProvider(WebSearchProvider):
     def supports_extract(self) -> bool:
         return True
 
-    def supports_crawl(self) -> bool:
-        return True
-
     def search(self, query: str, limit: int = 5) -> Dict[str, Any]:
         """Execute a Tavily search."""
         try:
@@ -221,60 +205,11 @@ class TavilyWebSearchProvider(WebSearchProvider):
                 for u in urls
             ]
 
-    def crawl(self, url: str, **kwargs: Any) -> Dict[str, Any]:
-        """Crawl a seed URL via Tavily's ``/crawl`` endpoint.
-
-        Accepted kwargs (others ignored for forward compat):
-          - ``instructions``: str — natural-language guidance for the crawl
-          - ``depth``: str — ``"basic"`` (default) or ``"advanced"``
-          - ``limit``: int — max pages to crawl (default 20)
-
-        Returns ``{"results": [...]}`` shaped to match what
-        :func:`tools.web_tools.web_crawl_tool` post-processes.
-        """
-        try:
-            from tools.interrupt import is_interrupted
-
-            if is_interrupted():
-                return {"results": [{"url": url, "title": "", "content": "", "error": "Interrupted"}]}
-
-            instructions = kwargs.get("instructions")
-            depth = kwargs.get("depth", "basic")
-            limit = kwargs.get("limit", 20)
-
-            logger.info("Tavily crawl: %s (depth=%s, limit=%d)", url, depth, limit)
-            payload: Dict[str, Any] = {
-                "url": url,
-                "limit": limit,
-                "extract_depth": depth,
-            }
-            if instructions:
-                payload["instructions"] = instructions
-
-            raw = _tavily_request("crawl", payload)
-            return {
-                "results": _normalize_tavily_documents(raw, fallback_url=url)
-            }
-        except ValueError as exc:
-            return {"results": [{"url": url, "title": "", "content": "", "error": str(exc)}]}
-        except Exception as exc:  # noqa: BLE001
-            logger.warning("Tavily crawl error: %s", exc)
-            return {
-                "results": [
-                    {
-                        "url": url,
-                        "title": "",
-                        "content": "",
-                        "error": f"Tavily crawl failed: {exc}",
-                    }
-                ]
-            }
-
     def get_setup_schema(self) -> Dict[str, Any]:
         return {
             "name": "Tavily",
             "badge": "paid",
-            "tag": "Search + extract + crawl in one provider.",
+            "tag": "Search + extract in one provider.",
             "env_vars": [
                 {
                     "key": "TAVILY_API_KEY",
diff --git a/plugins/web/xai/provider.py b/plugins/web/xai/provider.py
index a74b6a683e8..2b86238d11b 100644
--- a/plugins/web/xai/provider.py
+++ b/plugins/web/xai/provider.py
@@ -143,9 +143,6 @@ class XAIWebSearchProvider(WebSearchProvider):
     def supports_extract(self) -> bool:
         return False
 
-    def supports_crawl(self) -> bool:
-        return False
-
     # -- Search -----------------------------------------------------------
 
     def search(self, query: str, limit: int = 5) -> Dict[str, Any]:
diff --git a/providers/base.py b/providers/base.py
index fa6765d103c..01023ff55c2 100644
--- a/providers/base.py
+++ b/providers/base.py
@@ -129,6 +129,20 @@ class ProviderProfile:
         """
         return {}, {}
 
+    def get_max_tokens(self, model: str | None) -> int | None:
+        """Return the default max_tokens cap for *model*.
+
+        Overrideable hook for providers that need per-model output caps —
+        e.g. a relay that fronts several upstream backends, each with a
+        different completion-token limit. The transport calls this when
+        the user hasn't set an explicit max_tokens.
+
+        Default: return self.default_max_tokens (the static profile field),
+        ignoring the model name. Override in a subclass to vary the cap
+        per-model.
+        """
+        return self.default_max_tokens
+
     def fetch_models(
         self,
         *,
diff --git a/pyproject.toml b/pyproject.toml
index 75c4129fde1..fae89baea12 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "hermes-agent"
-version = "0.14.0"
+version = "0.15.1"
 description = "The self-improving AI agent — creates skills from experience, improves them during use, and runs anywhere"
 readme = "README.md"
 requires-python = ">=3.11"
@@ -87,7 +87,7 @@ edge-tts = ["edge-tts==7.2.7"]
 modal = ["modal==1.3.4"]
 daytona = ["daytona==0.155.0"]
 hindsight = ["hindsight-client==0.6.1"]
-dev = ["debugpy==1.8.20", "pytest==9.0.2", "pytest-asyncio==1.3.0", "pytest-timeout==2.4.0", "mcp==1.26.0", "ty==0.0.21", "ruff==0.15.10"]
+dev = ["debugpy==1.8.20", "pytest==9.0.2", "pytest-asyncio==1.3.0", "pytest-timeout==2.4.0", "mcp==1.26.0", "ty==0.0.21", "ruff==0.15.10", "setuptools==82.0.1"]
 messaging = ["python-telegram-bot[webhooks]==22.6", "discord.py[voice]==2.7.1", "aiohttp==3.13.3", "brotlicffi==1.2.0.1", "slack-bolt==1.27.0", "slack-sdk==3.40.1", "qrcode==7.4.2"]
 cron = []  # croniter is now a core dependency; this extra kept for back-compat
 slack = ["slack-bolt==1.27.0", "slack-sdk==3.40.1", "aiohttp==3.13.3"]
@@ -123,22 +123,15 @@ sms = ["aiohttp==3.13.3"]
 # to it, which is already provided by the `mcp` extra.
 computer-use = ["mcp==1.26.0"]
 acp = ["agent-client-protocol==0.9.0"]
-# mistral: extra REMOVED 2026-05-12 — `mistralai` PyPI project quarantined
-# after malicious 2.4.6 release (Mini Shai-Hulud worm). Every version of
-# `mistralai` returns 404 on PyPI right now, so any pin we'd write is
-# unresolvable, which breaks `uv lock --check` in CI.
-#
-# To restore once PyPI un-quarantines:
-#   1. Verify the new release is clean (read the changelog, check Socket
-#      advisory page, confirm no malicious code review findings).
-#   2. Add back: mistral = ["mistralai==<verified-version>"]
-#   3. Re-enable Mistral in:
-#        - tools/lazy_deps.py (LAZY_DEPS["tts.mistral"], LAZY_DEPS["stt.mistral"])
-#        - hermes_cli/tools_config.py (un-hide from provider picker)
-#        - hermes_cli/web_server.py (re-add to dashboard STT options)
-#        - tools/transcription_tools.py / tools/tts_tool.py (drop disabled stubs)
-#   4. Run `uv lock` to regenerate transitives.
-#   5. Optionally re-add to [all] only after a few days of clean operation.
+# mistral: Voxtral STT + TTS. Pinned to an exact verified-clean version.
+# The `mistralai` PyPI project was quarantined 2026-05-12 after the malicious
+# 2.4.6 release (Mini Shai-Hulud worm); 2.4.6 was removed from PyPI and the
+# project is serving clean releases again (2.4.7 2026-05-25, 2.4.8 2026-05-28).
+# Like other opt-in TTS/STT backends, this is lazy-installed via
+# tools/lazy_deps.py (stt.mistral / tts.mistral) at first use — deliberately
+# NOT re-added to [all] so a future quarantined release can't break fresh
+# installs (see [all] policy comment below).
+mistral = ["mistralai==2.4.8"]
 bedrock = ["boto3==1.42.89"]
 azure-identity = ["azure-identity==1.25.3"]
 termux = [
@@ -232,10 +225,18 @@ plugins = [
   "*/dashboard/manifest.json",
   "*/dashboard/dist/*",
   "*/dashboard/dist/**/*",
+  # Plugin discovery (hermes_cli/plugins.py) reads a plugin.yaml/plugin.yml
+  # manifest from each bundled plugin directory to register it. Wheels only
+  # carry files declared here, so without this glob the wheel ships every
+  # plugin's Python code but none of its manifests — the scan finds zero
+  # plugins and all gateway platforms fail with "No adapter available for
+  # <platform>" (#34034), web-search providers go missing (#28149), etc.
+  "**/plugin.yaml",
+  "**/plugin.yml",
 ]
 
 [tool.setuptools.packages.find]
-include = ["agent", "agent.*", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "tui_gateway", "tui_gateway.*", "cron", "acp_adapter", "plugins", "plugins.*", "providers", "providers.*"]
+include = ["agent", "agent.*", "tools", "tools.*", "hermes_cli", "hermes_cli.*", "gateway", "gateway.*", "tui_gateway", "tui_gateway.*", "cron", "acp_adapter", "plugins", "plugins.*", "providers", "providers.*"]
 
 [tool.pytest.ini_options]
 testpaths = ["tests"]
diff --git a/run_agent.py b/run_agent.py
index 5828ec20702..036103442e9 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -33,26 +33,19 @@ except ModuleNotFoundError:
 
 import asyncio
 import base64
-import concurrent.futures
-import contextvars
 import copy
 import hashlib
 import json
 import logging
 logger = logging.getLogger(__name__)
 import os
-import random
 import re
-import ssl
 import sys
 import tempfile
 import time
 import threading
-from types import SimpleNamespace
-import urllib.request
 import uuid
 from typing import List, Dict, Any, Optional
-from urllib.parse import urlparse, parse_qs, urlunparse
 # NOTE: `from openai import OpenAI` is deliberately NOT at module top — the
 # SDK pulls ~240 ms of imports. We expose `OpenAI` as a thin proxy object
 # that imports the SDK on first call/isinstance check. This preserves:
@@ -72,13 +65,13 @@ from hermes_constants import get_hermes_home
 
 # OpenAI lazy proxy + safe stdio + proxy URL helpers — see agent/process_bootstrap.py.
 # `OpenAI` is re-exported here so `patch("run_agent.OpenAI", ...)` in tests works.
+# The other `# noqa: F401` re-exports below cover names accessed via
+# `mock.patch("run_agent.<X>")`, `from run_agent import <X>` in production
+# siblings, or the `_ra().<X>` indirection in agent/system_prompt.py — none
+# of which ruff's in-module usage scan can see.
 from agent.process_bootstrap import (
-    OpenAI,
-    _OpenAIProxy,
-    _load_openai_cls,
-    _SafeWriter,
-    _install_safe_stdio,
-    _get_proxy_from_env,
+    OpenAI,  # noqa: F401  # re-exported for tests that mock.patch("run_agent.OpenAI")
+    _SafeWriter,  # noqa: F401  # re-exported for tests that `from run_agent import _SafeWriter`
     _get_proxy_for_base_url,
 )
 from agent.iteration_budget import IterationBudget
@@ -102,77 +95,38 @@ else:
 
 # Import our tool system
 from model_tools import (
-    get_tool_definitions,
+    get_tool_definitions,  # noqa: F401  # re-exported for tests that mock.patch("run_agent.get_tool_definitions")
     get_toolset_for_tool,
-    handle_function_call,
-    check_toolset_requirements,
+    handle_function_call,  # noqa: F401  # re-exported for tests that mock.patch("run_agent.handle_function_call")
+    check_toolset_requirements,  # noqa: F401  # re-exported for tests that mock.patch("run_agent.check_toolset_requirements")
 )
-from tools.terminal_tool import cleanup_vm, get_active_env, is_persistent_env
-from tools.terminal_tool import (
-    set_approval_callback as _set_approval_callback,
-    set_sudo_password_callback as _set_sudo_password_callback,
-    _get_approval_callback,
-    _get_sudo_password_callback,
-)
-from tools.tool_result_storage import maybe_persist_tool_result, enforce_turn_budget
+from tools.terminal_tool import cleanup_vm
 from tools.interrupt import set_interrupt as _set_interrupt
 from tools.browser_tool import cleanup_browser
 
 
 # Agent internals extracted to agent/ package for modularity
-from agent.memory_manager import StreamingContextScrubber, build_memory_context_block, sanitize_context
-from agent.think_scrubber import StreamingThinkScrubber
-from agent.retry_utils import jittered_backoff
-from agent.error_classifier import classify_api_error, FailoverReason
+from agent.memory_manager import sanitize_context
+from agent.error_classifier import FailoverReason
 from agent.redact import redact_sensitive_text
-from agent.prompt_builder import (
-    DEFAULT_AGENT_IDENTITY, PLATFORM_HINTS,
-    MEMORY_GUIDANCE, SESSION_SEARCH_GUIDANCE, SKILLS_GUIDANCE,
-    HERMES_AGENT_HELP_GUIDANCE,
-    KANBAN_GUIDANCE,
-    build_nous_subscription_prompt,
-)
 from agent.model_metadata import (
-    fetch_model_metadata,
-    estimate_tokens_rough, estimate_messages_tokens_rough, estimate_request_tokens_rough,
-    get_next_probe_tier, parse_context_limit_from_error,
-    parse_available_output_tokens_from_error,
-    save_context_length, is_local_endpoint,
-    query_ollama_num_ctx,
+    estimate_request_tokens_rough,  # noqa: F401  # re-exported for tests that mock.patch("run_agent.estimate_request_tokens_rough")
+    is_local_endpoint,
 )
-from agent.context_compressor import ContextCompressor
-from agent.subdirectory_hints import SubdirectoryHintTracker
-from agent.prompt_caching import apply_anthropic_cache_control
-from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, build_environment_hints, load_soul_md, TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS, GOOGLE_MODEL_OPERATIONAL_GUIDANCE, OPENAI_MODEL_EXECUTION_GUIDANCE
-from agent.usage_pricing import estimate_usage_cost, normalize_usage
-from agent.codex_responses_adapter import (
-    _derive_responses_function_call_id as _codex_derive_responses_function_call_id,
-    _deterministic_call_id as _codex_deterministic_call_id,
-    _split_responses_tool_id as _codex_split_responses_tool_id,
-    _summarize_user_message_for_log,
+from agent.usage_pricing import normalize_usage
+# Re-exported for tests that monkeypatch these symbols on run_agent.
+from agent.context_compressor import ContextCompressor  # noqa: F401
+from agent.retry_utils import jittered_backoff  # noqa: F401
+from agent.prompt_builder import (  # noqa: F401  # re-exported via _ra() / mock.patch("run_agent.<name>") / from run_agent import <name>
+    DEFAULT_AGENT_IDENTITY,
+    build_skills_system_prompt,
+    build_context_files_prompt,
+    build_environment_hints,
+    build_nous_subscription_prompt,
+    load_soul_md,
 )
-from agent.display import (
-    KawaiiSpinner, build_tool_preview as _build_tool_preview,
-    get_cute_tool_message as _get_cute_tool_message_impl,
-    _detect_tool_failure,
-    get_tool_emoji as _get_tool_emoji,
-)
-from agent.tool_guardrails import (
-    ToolCallGuardrailConfig,
-    ToolCallGuardrailController,
-    ToolGuardrailDecision,
-    append_toolguard_guidance,
-    toolguard_synthetic_result,
-)
-from agent.tool_result_classification import (
-    FILE_MUTATING_TOOL_NAMES as _FILE_MUTATING_TOOLS,
-    file_mutation_result_landed,
-)
-from agent.trajectory import (
-    convert_scratchpad_to_think,
-    save_trajectory as _save_trajectory_to_file,
-)
-from agent.message_sanitization import (
+from agent.process_bootstrap import _get_proxy_from_env  # noqa: F401
+from agent.message_sanitization import (  # noqa: F401
     _SURROGATE_RE,
     _sanitize_surrogates,
     _sanitize_structure_surrogates,
@@ -185,25 +139,38 @@ from agent.message_sanitization import (
     _strip_images_from_messages,
     _sanitize_structure_non_ascii,
 )
+from agent.codex_responses_adapter import (
+    _derive_responses_function_call_id as _codex_derive_responses_function_call_id,
+    _deterministic_call_id as _codex_deterministic_call_id,
+    _split_responses_tool_id as _codex_split_responses_tool_id,
+    _summarize_user_message_for_log,  # noqa: F401  # re-exported for tests
+)
+from agent.tool_guardrails import (
+    ToolGuardrailDecision,
+    append_toolguard_guidance,
+    toolguard_synthetic_result,
+)
+from agent.tool_result_classification import (
+    FILE_MUTATING_TOOL_NAMES as _FILE_MUTATING_TOOLS,
+    file_mutation_result_landed,
+)
+from agent.trajectory import (
+    convert_scratchpad_to_think,
+    save_trajectory as _save_trajectory_to_file,
+)
 from agent.tool_dispatch_helpers import (
-    _NEVER_PARALLEL_TOOLS,
-    _PARALLEL_SAFE_TOOLS,
-    _PATH_SCOPED_TOOLS,
-    _DESTRUCTIVE_PATTERNS,
-    _REDIRECT_OVERWRITE,
-    _is_destructive_command,
     _should_parallelize_tool_batch,
-    _extract_parallel_scope_path,
-    _paths_overlap,
+    _is_destructive_command,  # noqa: F401  # re-exported for tests that access `run_agent._is_destructive_command`
+    _extract_parallel_scope_path,  # noqa: F401  # re-exported for tests that `from run_agent import _extract_parallel_scope_path`
+    _paths_overlap,  # noqa: F401  # re-exported for tests that `from run_agent import _paths_overlap`
     _is_multimodal_tool_result,
     _multimodal_text_summary,
-    _append_subdir_hint_to_multimodal,
+    _append_subdir_hint_to_multimodal,  # noqa: F401  # re-exported for tests that `from run_agent import _append_subdir_hint_to_multimodal`
     _extract_file_mutation_targets,
     _extract_error_preview,
-    _trajectory_normalize_msg,
+    _trajectory_normalize_msg,  # noqa: F401  # re-exported for tests that `from run_agent import _trajectory_normalize_msg`
 )
-from utils import atomic_json_write, base_url_host_matches, base_url_hostname, env_var_enabled, normalize_proxy_url
-from hermes_cli.config import cfg_get
+from utils import atomic_json_write, base_url_host_matches, base_url_hostname
 
 
 
@@ -527,7 +494,81 @@ class AIAgent:
                 "Session DB creation failed (will retry next turn): %s", e
             )
 
-    def reset_session_state(self):
+    def _transition_context_engine_session(
+        self,
+        *,
+        old_session_id: Optional[str] = None,
+        new_session_id: Optional[str] = None,
+        previous_messages: Optional[list] = None,
+        carry_over_context: bool = False,
+        reset_engine: bool = True,
+        **extra_context,
+    ) -> None:
+        """Notify the active context engine about a host session transition.
+
+        Generic host-side lifecycle helper. The built-in compressor keeps its
+        existing reset behavior; plugin engines that implement richer hooks
+        (``on_session_end``, ``on_session_reset``, ``on_session_start``,
+        ``carry_over_new_session_context``) can flush old-session state,
+        reset runtime counters, bind to the new session, and optionally
+        carry retained context forward.
+        """
+        engine = getattr(self, "context_compressor", None)
+        if not engine:
+            return
+
+        if old_session_id and previous_messages is not None and hasattr(engine, "on_session_end"):
+            try:
+                engine.on_session_end(old_session_id, previous_messages)
+            except Exception as exc:
+                logger.debug("context engine on_session_end during transition: %s", exc)
+
+        if reset_engine and hasattr(engine, "on_session_reset"):
+            try:
+                engine.on_session_reset()
+            except Exception as exc:
+                logger.debug("context engine on_session_reset during transition: %s", exc)
+
+        should_start = bool(
+            old_session_id
+            or previous_messages is not None
+            or carry_over_context
+            or extra_context
+        )
+        target_session_id = new_session_id or getattr(self, "session_id", "") or ""
+        if should_start and target_session_id and hasattr(engine, "on_session_start"):
+            start_context = {
+                "old_session_id": old_session_id,
+                "carry_over_context": carry_over_context,
+                "platform": getattr(self, "platform", None) or os.environ.get("HERMES_SESSION_SOURCE", "cli"),
+                "model": getattr(self, "model", ""),
+                "context_length": getattr(engine, "context_length", None),
+                "conversation_id": getattr(self, "_gateway_session_key", None),
+            }
+            start_context.update(extra_context)
+            start_context = {k: v for k, v in start_context.items() if v not in (None, "")}
+            try:
+                engine.on_session_start(target_session_id, **start_context)
+            except Exception as exc:
+                logger.debug("context engine on_session_start during transition: %s", exc)
+
+        if (
+            carry_over_context
+            and old_session_id
+            and target_session_id
+            and hasattr(engine, "carry_over_new_session_context")
+        ):
+            try:
+                engine.carry_over_new_session_context(old_session_id, target_session_id)
+            except Exception as exc:
+                logger.debug("context engine carry_over_new_session_context during transition: %s", exc)
+
+    def reset_session_state(
+        self,
+        previous_messages: Optional[list] = None,
+        old_session_id: Optional[str] = None,
+        carry_over_context: bool = False,
+    ):
         """Reset all session-scoped token counters to 0 for a fresh session.
         
         This method encapsulates the reset logic for all session-level metrics
@@ -541,9 +582,12 @@ class AIAgent:
         
         The method safely handles optional attributes (e.g., context compressor)
         using ``hasattr`` checks.
-        
-        This keeps the counter reset logic DRY and maintainable in one place
-        rather than scattering it across multiple methods.
+
+        When ``previous_messages`` / ``old_session_id`` / ``carry_over_context``
+        are provided, the active context engine is notified through the
+        full transition lifecycle (``_transition_context_engine_session``)
+        instead of a bare reset. Default callers pass nothing and keep the
+        existing reset-only behavior.
         """
         # Token usage counters
         self.session_total_tokens = 0
@@ -562,9 +606,14 @@ class AIAgent:
         # Turn counter (added after reset_session_state was first written — #2635)
         self._user_turn_count = 0
 
-        # Context engine reset (works for both built-in compressor and plugins)
-        if hasattr(self, "context_compressor") and self.context_compressor:
-            self.context_compressor.on_session_reset()
+        # Context engine reset/transition (works for built-in compressor and plugins)
+        self._transition_context_engine_session(
+            old_session_id=old_session_id,
+            new_session_id=getattr(self, "session_id", None),
+            previous_messages=previous_messages,
+            carry_over_context=carry_over_context,
+            reset_engine=True,
+        )
 
     def _ensure_lmstudio_runtime_loaded(self, config_context_length: Optional[int] = None) -> None:
         """
@@ -719,6 +768,83 @@ class AIAgent:
             except Exception:
                 logger.debug("status_callback error in _emit_warning", exc_info=True)
 
+    # ── Buffered retry/fallback status ────────────────────────────────────
+    # Retry and fallback chains were flooding the CLI/gateway with status
+    # noise that users found confusing: a single transient 429 could produce
+    # 10+ "Provider/Endpoint/Retrying in 5s..." lines before the request
+    # eventually succeeded.  The buffered helpers below capture these
+    # status messages instead of emitting them immediately.  They are
+    # flushed (shown to the user) ONLY when every retry and fallback has
+    # been exhausted; on success they are silently dropped.  Backend logs
+    # (agent.log) are unaffected — every individual emission site still
+    # writes to ``logger.warning`` / ``logger.info`` for diagnosis.
+
+    def _buffer_status(self, message: str) -> None:
+        """Buffer a retry/fallback status message.
+
+        Stored as a (kind, text) tuple where ``kind`` is one of:
+        - ``"status"``  -> replays via ``_emit_status``
+        - ``"vprint"``  -> replays via ``_vprint(force=True)``
+        - ``"warn"``    -> replays via ``_emit_warning``
+        Used to defer noisy retry chatter until we know whether the
+        turn ultimately recovered or failed.
+        """
+        try:
+            buf = getattr(self, "_retry_status_buffer", None)
+            if buf is None:
+                buf = []
+                self._retry_status_buffer = buf
+            buf.append(("status", message))
+        except Exception:
+            # Never break the retry loop on a buffer hiccup.
+            pass
+
+    def _buffer_vprint(self, message: str) -> None:
+        """Buffer a vprint(force=True) retry/fallback line."""
+        try:
+            buf = getattr(self, "_retry_status_buffer", None)
+            if buf is None:
+                buf = []
+                self._retry_status_buffer = buf
+            buf.append(("vprint", message))
+        except Exception:
+            pass
+
+    def _clear_status_buffer(self) -> None:
+        """Drop buffered retry messages — call on successful recovery."""
+        try:
+            buf = getattr(self, "_retry_status_buffer", None)
+            if buf:
+                buf.clear()
+        except Exception:
+            pass
+
+    def _flush_status_buffer(self) -> None:
+        """Emit buffered retry messages — call on terminal failure.
+
+        Surfaces the full retry/fallback trace so the user can see what
+        was tried before the turn gave up.
+        """
+        try:
+            buf = getattr(self, "_retry_status_buffer", None)
+            if not buf:
+                return
+            # Drain first so a callback exception doesn't double-emit.
+            messages = list(buf)
+            buf.clear()
+            for kind, msg in messages:
+                try:
+                    if kind == "status":
+                        self._emit_status(msg)
+                    elif kind == "warn":
+                        self._emit_warning(msg)
+                    else:
+                        self._vprint(f"{self.log_prefix}{msg}", force=True)
+                except Exception:
+                    pass
+        except Exception:
+            pass
+
     def _disable_codex_reasoning_replay(
         self,
         messages: Optional[List[Dict[str, Any]]] = None,
@@ -2075,9 +2201,26 @@ class AIAgent:
         return apply_pending_steer_to_tool_results(self, messages, num_tool_msgs)
 
     def _touch_activity(self, desc: str) -> None:
-        """Update the last-activity timestamp and description (thread-safe)."""
+        """Update the last-activity timestamp and description (thread-safe).
+
+        Also bridges to the kanban board's heartbeat fields when this
+        process is a dispatcher-spawned worker (HERMES_KANBAN_TASK set),
+        so the dispatcher watchdog doesn't reclaim an actively-running
+        worker as stale (#31752). Bridge is rate-limited (60s) and
+        best-effort — it never raises into the agent loop.
+        """
         self._last_activity_ts = time.time()
         self._last_activity_desc = desc
+        if os.environ.get("HERMES_KANBAN_TASK"):
+            try:
+                from tools.kanban_tools import heartbeat_current_worker_from_env
+                heartbeat_current_worker_from_env()
+            except Exception:
+                # Never let the bridge break the agent loop.  The function
+                # already swallows exceptions internally; this outer guard
+                # covers import-time failures (kanban_tools unavailable,
+                # etc.) on niche deployment surfaces.
+                pass
 
     def _capture_rate_limits(self, http_response: Any) -> None:
         """Parse x-ratelimit-* headers from an HTTP response and cache the state.
@@ -2200,6 +2343,7 @@ class AIAgent:
         original_user_message: Any,
         final_response: Any,
         interrupted: bool,
+        messages: list | None = None,
     ) -> None:
         """Mirror a completed turn into external memory providers.
 
@@ -2232,9 +2376,13 @@ class AIAgent:
         if not (self._memory_manager and final_response and original_user_message):
             return
         try:
+            sync_kwargs = {"session_id": self.session_id or ""}
+            if messages is not None:
+                sync_kwargs["messages"] = messages
             self._memory_manager.sync_all(
-                original_user_message, final_response,
-                session_id=self.session_id or "",
+                original_user_message,
+                final_response,
+                **sync_kwargs,
             )
             self._memory_manager.queue_prefetch_all(
                 original_user_message,
@@ -2904,25 +3052,20 @@ class AIAgent:
 
         return True
 
-    def _try_refresh_nous_client_credentials(self, *, force: bool = True) -> bool:
+    def _try_refresh_nous_client_credentials(
+        self,
+        *,
+        force: bool = True,
+    ) -> bool:
         if self.api_mode != "chat_completions" or self.provider != "nous":
             return False
 
         try:
-            from hermes_cli.auth import (
-                NOUS_INFERENCE_AUTH_MODE_AUTO,
-                NOUS_INFERENCE_AUTH_MODE_LEGACY,
-                resolve_nous_runtime_credentials,
-            )
+            from hermes_cli.auth import resolve_nous_runtime_credentials
 
             creds = resolve_nous_runtime_credentials(
-                min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
                 timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
-                inference_auth_mode=(
-                    NOUS_INFERENCE_AUTH_MODE_LEGACY
-                    if force
-                    else NOUS_INFERENCE_AUTH_MODE_AUTO
-                ),
+                force_refresh=force,
             )
         except Exception as exc:
             logger.debug("Nous credential refresh failed: %s", exc)
@@ -4045,6 +4188,11 @@ class AIAgent:
         from agent.agent_runtime_helpers import copy_reasoning_content_for_api
         return copy_reasoning_content_for_api(self, source_msg, api_msg)
 
+    def _reapply_reasoning_echo_for_provider(self, api_messages: list) -> int:
+        """Forwarder — see ``agent.agent_runtime_helpers.reapply_reasoning_echo_for_provider``."""
+        from agent.agent_runtime_helpers import reapply_reasoning_echo_for_provider
+        return reapply_reasoning_echo_for_provider(self, api_messages)
+
     @staticmethod
     def _sanitize_tool_calls_for_strict_api(api_msg: dict) -> dict:
         """Strip Codex Responses API fields from tool_calls for strict providers.
diff --git a/scripts/LIVETEST_README.md b/scripts/LIVETEST_README.md
new file mode 100644
index 00000000000..332d5509b90
--- /dev/null
+++ b/scripts/LIVETEST_README.md
@@ -0,0 +1,45 @@
+# Tool Search live test harness
+
+Runs five scenarios against a real model (Claude Haiku 4.5 via OpenRouter) to
+verify that the bridge tools work end-to-end. Records transcripts in
+`scripts/out/`.
+
+## Running
+
+```bash
+cd <repo root>
+python3 scripts/tool_search_livetest.py        # runs all 5 scenarios x 2 modes
+python3 scripts/analyze_livetest.py            # side-by-side report
+```
+
+Requires `OPENROUTER_API_KEY` set or present in `~/.hermes/.env`.
+
+## What it verifies
+
+| Scenario | Tests |
+|----------|-------|
+| A obvious_single | BM25 retrieval on an obvious tool name (github_create_issue) |
+| B vague_paraphrased | Retrieval when the model has to paraphrase ("schedule meeting" → evt_create) |
+| C multi_tool_chain | Multi-step task chaining two deferred tools (GitHub + Slack) |
+| D core_plus_deferred | Mixed: core tool (read_file) called directly, deferred tool (Slack) via bridge |
+| E no_tool_needed | Pure-knowledge prompt; verify no spurious tool_search invocations |
+
+Each scenario runs with `tool_search.enabled = on` and again with `off` for an
+A/B baseline. The harness records:
+
+- bridge_calls (the tool_search / tool_describe / tool_call sequence the model emitted)
+- underlying_tool_calls (what actually ran through the registry dispatcher)
+- final_response, iteration count, elapsed time, any errors
+
+## Output structure
+
+```
+scripts/out/
+  <scenario>__enabled.json    # tool_search ON
+  <scenario>__disabled.json   # tool_search OFF
+  _summary.json               # one-line summary across all runs
+```
+
+The 2026-05 baseline run is checked in for reference. Re-running may produce
+slightly different transcripts (the model is non-deterministic) but the
+expected_underlying_tools assertions should remain satisfied.
diff --git a/scripts/analyze_livetest.py b/scripts/analyze_livetest.py
new file mode 100644
index 00000000000..f11dae197c0
--- /dev/null
+++ b/scripts/analyze_livetest.py
@@ -0,0 +1,114 @@
+#!/usr/bin/env python3
+"""Compare enabled vs disabled runs and produce a readable report.
+
+Reads scripts/out/_summary.json and the per-scenario JSONs, prints a side-by-
+side comparison of what happened, and flags anomalies.
+"""
+
+from __future__ import annotations
+
+import json
+import sys
+from pathlib import Path
+
+
+HERE = Path(__file__).resolve().parent
+OUT = HERE / "out"
+
+
+def load_record(scenario_id: str, mode: str):
+    path = OUT / f"{scenario_id}__{mode}.json"
+    if not path.exists():
+        return None
+    return json.loads(path.read_text(encoding="utf-8"))
+
+
+def fmt_tool_seq(calls):
+    if not calls:
+        return "(none)"
+    return " → ".join(c["name"] for c in calls)
+
+
+def fmt_bridge_seq(calls):
+    if not calls:
+        return "(none)"
+    parts = []
+    for c in calls:
+        if c["name"] == "tool_call":
+            inner = (c.get("args") or {}).get("name", "?")
+            parts.append(f"tool_call→{inner}")
+        elif c["name"] == "tool_search":
+            q = (c.get("args") or {}).get("query", "?")
+            parts.append(f"search('{q[:30]}')")
+        elif c["name"] == "tool_describe":
+            n = (c.get("args") or {}).get("name", "?")
+            parts.append(f"describe({n})")
+    return " → ".join(parts)
+
+
+def main():
+    if not OUT.exists():
+        print("No output directory at", OUT)
+        sys.exit(1)
+    summary_path = OUT / "_summary.json"
+    if not summary_path.exists():
+        print("No _summary.json yet")
+        sys.exit(1)
+
+    summary = json.loads(summary_path.read_text(encoding="utf-8"))
+    scenarios = sorted({row["scenario"] for row in summary})
+
+    print(f"{'='*78}")
+    print(f"  Live test results: tool_search ENABLED vs DISABLED")
+    print(f"{'='*78}\n")
+
+    fails = 0
+    for sid in scenarios:
+        en = load_record(sid, "enabled")
+        di = load_record(sid, "disabled")
+        if not en or not di:
+            continue
+        expected = set(en["expected_underlying_tools"])
+
+        print(f"┌─ {sid}  ({en['scenario_description']})")
+        print(f"│  Prompt: {en['prompt'][:120]}")
+        print(f"│  Expected underlying tools: {sorted(expected) or '(none)'}")
+        print(f"│")
+
+        for label, rec in [("ENABLED ", en), ("DISABLED", di)]:
+            called_under = [c["name"] for c in rec["underlying_tool_calls"]]
+            called_set = set(called_under)
+            missing = expected - called_set
+            extra = called_set - expected - {"read_file", "search_files", "terminal", "todo", "memory"}
+
+            mark = "✓" if (expected.issubset(called_set) and not rec["error"]) else "✗"
+            if mark == "✗":
+                fails += 1
+
+            print(f"│  {label} {mark}  bridges={len(rec['bridge_calls']):2}  underlying={len(rec['underlying_tool_calls']):2}  "
+                  f"iters={rec['n_iterations']:2}  elapsed={rec['elapsed_seconds']:5.1f}s  err={bool(rec['error'])}")
+            print(f"│    underlying: {fmt_tool_seq(rec['underlying_tool_calls'])}")
+            if rec["bridge_calls"]:
+                print(f"│    bridges:    {fmt_bridge_seq(rec['bridge_calls'])}")
+            if missing:
+                print(f"│    ⚠ MISSING expected tools: {sorted(missing)}")
+            if extra:
+                print(f"│    ⓘ extra tools called: {sorted(extra)}")
+            if rec["error"]:
+                print(f"│    💥 error: {rec['error'][:200]}")
+        # Bridge-trip count vs direct (interesting comparator)
+        en_bridges = len(en["bridge_calls"])
+        di_underlying = len(di["underlying_tool_calls"])
+        en_underlying = len(en["underlying_tool_calls"])
+        overhead = en_bridges + en_underlying - di_underlying
+        print(f"│  Δ round-trip cost: enabled used {en_bridges + en_underlying} calls vs disabled {di_underlying}  →  +{overhead}")
+        print(f"│  Final (enabled):  {(en.get('final_response') or '')[:140]}")
+        print(f"│  Final (disabled): {(di.get('final_response') or '')[:140]}")
+        print(f"└──")
+        print()
+
+    print(f"\nFails: {fails}/{2*len(scenarios)}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/build_skills_index.py b/scripts/build_skills_index.py
index 9b9277547f7..2712ae5403a 100644
--- a/scripts/build_skills_index.py
+++ b/scripts/build_skills_index.py
@@ -80,30 +80,27 @@ def crawl_source(source, source_name: str, limit: int) -> list:
 
 
 def crawl_skills_sh(source: SkillsShSource) -> list:
-    """Crawl skills.sh using popular queries for broad coverage."""
-    print("  Crawling skills.sh (popular queries)...", flush=True)
+    """Crawl skills.sh via its sitemap to enumerate the full catalog (~20k entries).
+
+    Previously walked a hardcoded list of ~28 popular keywords (each capped at
+    50 results) which yielded ~850 unique skills — about 4% of the real catalog.
+    The SkillsShSource.search("") path now hits the sitemap directly, returning
+    the full 20k-entry catalog deduplicated by canonical identifier.
+    """
+    print("  Crawling skills.sh (sitemap)...", flush=True)
     start = time.time()
 
-    queries = [
-        "",  # featured
-        "react", "python", "web", "api", "database", "docker",
-        "testing", "scraping", "design", "typescript", "git",
-        "aws", "security", "data", "ml", "ai", "devops",
-        "frontend", "backend", "mobile", "cli", "documentation",
-        "kubernetes", "terraform", "rust", "go", "java",
-    ]
+    try:
+        results = source.search("", limit=0)  # 0 = no cap, return the whole catalog
+    except Exception as e:
+        print(f"    Warning: skills.sh sitemap walk failed: {e}", file=sys.stderr)
+        results = []
 
     all_skills: dict[str, dict] = {}
-    for query in queries:
-        try:
-            results = source.search(query, limit=50)
-            for meta in results:
-                entry = _meta_to_dict(meta)
-                if entry["identifier"] not in all_skills:
-                    all_skills[entry["identifier"]] = entry
-        except Exception as e:
-            print(f"    Warning: skills.sh search '{query}' failed: {e}",
-                  file=sys.stderr)
+    for meta in results:
+        entry = _meta_to_dict(meta)
+        if entry["identifier"] not in all_skills:
+            all_skills[entry["identifier"]] = entry
 
     elapsed = time.time() - start
     print(f"  skills.sh: {len(all_skills)} unique skills ({elapsed:.1f}s)",
@@ -269,11 +266,28 @@ def main():
     # Crawl skills.sh
     all_skills.extend(crawl_skills_sh(skills_sh_source))
 
-    # Crawl other sources in parallel
+    # Crawl other sources in parallel.
+    # Per-source soft caps — sources stop returning when they run out, so these
+    # are ceilings, not targets.  ClawHub has 20k+ skills; bumping to 100k
+    # (well above current catalog size) lets the full catalog land in the
+    # index instead of being truncated at an arbitrary build-time limit.
+    SOURCE_LIMITS = {
+        # ClawHub had 49,698+ skills as of May 2026; 200k leaves headroom.
+        "clawhub": 200_000,
+        "lobehub": 100_000,
+        "browse-sh": 5_000,
+        "claude-marketplace": 5_000,
+        "github": 5_000,
+        "well-known": 5_000,
+        "official": 5_000,
+    }
+    DEFAULT_SOURCE_LIMIT = 500
+
     with ThreadPoolExecutor(max_workers=4) as pool:
         futures = {}
         for name, source in sources.items():
-            futures[pool.submit(crawl_source, source, name, 500)] = name
+            limit = SOURCE_LIMITS.get(name, DEFAULT_SOURCE_LIMIT)
+            futures[pool.submit(crawl_source, source, name, limit)] = name
         for future in as_completed(futures):
             try:
                 all_skills.extend(future.result())
@@ -328,9 +342,17 @@ def main():
     # or rate limiting kicked in.  Failing here forces a human look before
     # the broken index reaches the live docs.
     EXPECTED_FLOORS = {
-        "skills.sh": 100,
+        # skills.sh now uses the sitemap walker (~20k catalog as of May 2026).
+        # Anything under 10k means the sitemap shape changed or fetches failed
+        # — better to fail loudly than ship a regression to the 858-skill
+        # popular-queries era.
+        "skills.sh": 10000,
         "lobehub": 100,
-        "clawhub": 50,
+        # ClawHub had 49,698+ skills as of May 2026 — anything under 20k means
+        # pagination broke or the API surface changed.  Fail loudly rather
+        # than ship a degenerate index (we shipped 200/50000 silently for
+        # weeks because the floor was 50).
+        "clawhub": 20000,
         "official": 50,
         "github": 30,        # collapsed across all GitHub taps
         "browse-sh": 50,
diff --git a/scripts/contributor_audit.py b/scripts/contributor_audit.py
index 50bf3042642..a4166511559 100644
--- a/scripts/contributor_audit.py
+++ b/scripts/contributor_audit.py
@@ -17,7 +17,6 @@ Usage:
 
 import argparse
 import json
-import os
 import re
 import subprocess
 import sys
@@ -30,7 +29,7 @@ from pathlib import Path
 SCRIPT_DIR = Path(__file__).resolve().parent
 sys.path.insert(0, str(SCRIPT_DIR))
 
-from release import AUTHOR_MAP, resolve_author  # noqa: E402
+from release import resolve_author  # noqa: E402
 
 REPO_ROOT = SCRIPT_DIR.parent
 
@@ -42,6 +41,7 @@ IGNORED_PATTERNS = [
     re.compile(r"^Copilot$", re.IGNORECASE),
     re.compile(r"^Cursor(\s+Agent)?$", re.IGNORECASE),
     re.compile(r"^GitHub\s*Actions?$", re.IGNORECASE),
+    re.compile(r"^github-actions(\[bot\])?$", re.IGNORECASE),
     re.compile(r"^dependabot", re.IGNORECASE),
     re.compile(r"^renovate", re.IGNORECASE),
     re.compile(r"^Hermes\s+(Agent|Audit)$", re.IGNORECASE),
@@ -51,10 +51,12 @@ IGNORED_PATTERNS = [
 IGNORED_EMAILS = {
     "noreply@anthropic.com",
     "noreply@github.com",
+    "noreply@nousresearch.com",
     "cursoragent@cursor.com",
     "hermes@nousresearch.com",
     "hermes-audit@example.com",
     "hermes@habibilabs.dev",
+    "omx@oh-my-codex.dev",
 }
 
 
diff --git a/scripts/install_psutil_android.py b/scripts/install_psutil_android.py
index 4e2c49805a6..6423b360ad2 100755
--- a/scripts/install_psutil_android.py
+++ b/scripts/install_psutil_android.py
@@ -27,21 +27,22 @@ import argparse
 import shutil
 import subprocess
 import sys
-import tarfile
 import tempfile
 import urllib.request
 from pathlib import Path
 
-# Pin a version we know patches cleanly. Update when a newer psutil
-# changes the marker line shape and we need to follow upstream.
-PSUTIL_URL = (
-    "https://files.pythonhosted.org/packages/aa/c6/"
-    "d1ddf4abb55e93cebc4f2ed8b5d6dbad109ecb8d63748dd2b20ab5e57ebe/"
-    "psutil-7.2.2.tar.gz"
+# Keep sibling imports working when invoked as
+# ``python scripts/install_psutil_android.py`` from the repo checkout.
+REPO_ROOT = Path(__file__).resolve().parents[1]
+if str(REPO_ROOT) not in sys.path:
+    sys.path.insert(0, str(REPO_ROOT))
+
+from hermes_cli.psutil_android import (
+    PSUTIL_URL,
+    PsutilAndroidInstallError,
+    prepare_patched_psutil_sdist,
 )
 
-MARKER = 'LINUX = sys.platform.startswith("linux")'
-REPLACEMENT = 'LINUX = sys.platform.startswith(("linux", "android"))'
 
 
 def _resolve_install_cmd(pip_arg: str | None, prefer_uv: bool) -> list[str]:
@@ -82,26 +83,10 @@ def main() -> int:
         tmp_path = Path(tmp)
         archive = tmp_path / "psutil.tar.gz"
         urllib.request.urlretrieve(PSUTIL_URL, archive)
-        with tarfile.open(archive) as tar:
-            tar.extractall(tmp_path)
-
         try:
-            src_root = next(
-                p for p in tmp_path.iterdir()
-                if p.is_dir() and p.name.startswith("psutil-")
-            )
-        except StopIteration:
-            sys.exit("psutil sdist did not contain a psutil-* directory")
-
-        common_py = src_root / "psutil" / "_common.py"
-        content = common_py.read_text(encoding="utf-8")
-        if MARKER not in content:
-            sys.exit(
-                "psutil Android compatibility patch marker not found — "
-                "upstream may have changed the LINUX detection line. "
-                "Update MARKER/REPLACEMENT in this script."
-            )
-        common_py.write_text(content.replace(MARKER, REPLACEMENT), encoding="utf-8")
+            src_root = prepare_patched_psutil_sdist(archive, tmp_path)
+        except PsutilAndroidInstallError as exc:
+            sys.exit(str(exc))
 
         cmd = install_cmd_prefix + ["install", "--no-build-isolation", str(src_root)]
         print(f"  $ {' '.join(cmd)}")
diff --git a/scripts/release.py b/scripts/release.py
index d9e2aacd8b1..9b7e12a5d1f 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -45,18 +45,36 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json"
 
 # Auto-extracted from noreply emails + manual overrides
 AUTHOR_MAP = {
+    "metalclaudbot@gmail.com": "HashClawAI",
+    "tonybear55665566@gmail.com": "TonyPepeBear",
+    "kaspersniels@gmail.com": "nielskaspers",
+    "kurobaryo@gmail.com": "kurobaryo",
+    "155192176+alelpoan@users.noreply.github.com": "alelpoan",
+    "aman@abacus.ai": "Aman113114-IITD",
+    "octavio.turra@gmail.com": "octavioturra",
+    "524706+Twanislas@users.noreply.github.com": "Twanislas",
     "9592417+adam91holt@users.noreply.github.com": "adam91holt",
+    "kchuang1015@users.noreply.github.com": "kchuang1015",
     "45688690+fujinice@users.noreply.github.com": "fujinice",
     "276689385+carltonawong@users.noreply.github.com": "carltonawong",
     "195255660+EvilHumphrey@users.noreply.github.com": "EvilHumphrey",
     "270604154+superearn-fisher@users.noreply.github.com": "superearn-fisher",
     "3540493+kpadilha@users.noreply.github.com": "kpadilha",
     "40378218+chaconne67@users.noreply.github.com": "chaconne67",
+    "Pluviobyte@users.noreply.github.com": "Pluviobyte",
     "sanghyuk_seo@nexcubecorp.com": "sanghyuk-seo-nexcube",
     "subrtt@gmail.com": "Brixyy",
     "wangpuv@hotmail.com": "wangpuv",
     "202622897+ticketclosed-wontfix@users.noreply.github.com": "ticketclosed-wontfix",
     "wuxuebin1993@gmail.com": "victorGPT",
+    "frowte3k@gmail.com": "Frowtek",
+    "211828103+julio-cloudvisor@users.noreply.github.com": "julio-cloudvisor",
+    "17778+kweiner@users.noreply.github.com": "kweiner",
+    "223516181+faisfamilytravel@users.noreply.github.com": "faisfamilytravel",
+    "45189813+baofuen@users.noreply.github.com": "baofuen",
+    "interstellar.consulting@gmail.com": "Interstellar-code",
+    "33978413+Interstellar-code@users.noreply.github.com": "Interstellar-code",
+    "tillfalko@gmail.com": "tillfalko",
     # teknium (multiple emails)
     "teknium1@gmail.com": "teknium1",
     "kenyon1977@gmail.com": "kenyonxu",
@@ -73,6 +91,8 @@ AUTHOR_MAP = {
     "anadi.jaggia@gmail.com": "Jaggia",
     "steve@steveonjava.com": "steveonjava",
     "steveonjava@gmail.com": "steveonjava",
+    "squiddy@2rook.ai": "MoonRay305",
+    "annguyenNous@users.noreply.github.com": "annguyenNous",
     "32201324+simpolism@users.noreply.github.com": "simpolism",
     "simpolism@gmail.com": "simpolism",
     "jake@nousresearch.com": "simpolism",
@@ -87,6 +107,7 @@ AUTHOR_MAP = {
     "omar@techdeveloper.site": "nycomar",
     "qiyin.zuo@pcitc.com": "qiyin-code",
     "mr.aashiz@gmail.com": "aashizpoudel",
+    "adityargadgil@gmail.com": "AdityaRajeshGadgil",
     "70629228+shaun0927@users.noreply.github.com": "shaun0927",
     "soju06@users.noreply.github.com": "Soju06",
     "34199905+Soju06@users.noreply.github.com": "Soju06",
@@ -97,7 +118,10 @@ AUTHOR_MAP = {
     "kronexoi13@gmail.com": "kronexoi",
     "hua.zhong@kingsmith.com": "vgocoder",
     "hermes@marian.local": "Schrotti77",
+    "david@memorilabs.ai": "devwdave",
+    "dave@devwdave.com": "devwdave",
     "1920071390@campus.ouj.ac.jp": "zapabob",
+    "zapabob@users.noreply.github.com": "zapabob",
     "gaia@gaia.local": "jfuenmayor",
     "jiahuigu@users.noreply.github.com": "Jiahui-Gu",
     "openhands@all-hands.dev": "YLChen-007",
@@ -106,8 +130,14 @@ AUTHOR_MAP = {
     "32711803+waefrebeorn@users.noreply.github.com": "waefrebeorn",
     "32869278+dusterbloom@users.noreply.github.com": "dusterbloom",
     "liuhao1024@users.noreply.github.com": "liuhao1024",
+    "annguyenNous@users.noreply.github.com": "annguyenNous",
+    "285874597+annguyenNous@users.noreply.github.com": "annguyenNous",
     "kylekahraman@users.noreply.github.com": "kylekahraman",
     "130975919+kylekahraman@users.noreply.github.com": "kylekahraman",
+    "seppe@fushia.be": "seppegadeyne",
+    "18264851+seppegadeyne@users.noreply.github.com": "seppegadeyne",
+    "blackpilledsoftware@gmail.com": "blackpilledsoftware-prog",
+    "266800570+blackpilledsoftware-prog@users.noreply.github.com": "blackpilledsoftware-prog",
     "dsr-restyn@users.noreply.github.com": "dsr-restyn",
     "210765158+WuKongAI-CMU@users.noreply.github.com": "WuKongAI-CMU",
     "lichriszhang@gmail.com": "codeblackhole1024",
@@ -124,6 +154,7 @@ AUTHOR_MAP = {
     "buraysandro9@gmail.com": "ygd58",
     "108427749+buntingszn@users.noreply.github.com": "buntingszn",
     "yanglongwei06@gmail.com": "Alex-yang00",
+    "yanghongda@jackyun.com": "yangguangjin",
     "teknium@nousresearch.com": "teknium1",
     "markuscontasul@gmail.com": "Glucksberg",
     "80581902+Glucksberg@users.noreply.github.com": "Glucksberg",
@@ -256,6 +287,8 @@ AUTHOR_MAP = {
     "harryykyle1@gmail.com": "hharry11",
     "wysie@users.noreply.github.com": "wysie",
     "ronhi@buildabear1.localdomain": "RonHillDev",  # PR #29523 salvage (machine-local commit email)
+    "moikapy@devmoi.com": "Moikapy",  # PR #31527 salvage
+    "barany.gabor@gmail.com": "gbarany",  # PR #27907 salvage (xAI sanitizer deepcopy)
     "hello@nami4d.tech": "Nami4D",  # PR #28490 salvage
     "jkausel@gmail.com": "jkausel-ai",
     "e.silacandmr@gmail.com": "Es1la",
@@ -493,6 +526,8 @@ AUTHOR_MAP = {
     "barnacleboy.jezzahehn@agentmail.to": "JezzaHehn",
     "254021826+dodo-reach@users.noreply.github.com": "dodo-reach",
     "259807879+Bartok9@users.noreply.github.com": "Bartok9",
+    "123342691+banditburai@users.noreply.github.com": "banditburai",
+    "9063726+Kyzcreig@users.noreply.github.com": "Kyzcreig",
     "270082434+crayfish-ai@users.noreply.github.com": "crayfish-ai",
     "241404605+MestreY0d4-Uninter@users.noreply.github.com": "MestreY0d4-Uninter",
     "268667990+Roy-oss1@users.noreply.github.com": "Roy-oss1",
@@ -568,7 +603,7 @@ AUTHOR_MAP = {
     "ruzzgarcn@gmail.com": "Ruzzgar",
     "yukipukikedy@gmail.com": "Yukipukii1",
     "alireza78.crypto@gmail.com": "alireza78a",
-    "brooklyn.bb.nicholson@gmail.com": "brooklynnicholson",
+    "brooklyn.bb.nicholson@gmail.com": "OutThisLife",
     "withapurpose37@gmail.com": "StefanIsMe",
     "4317663+helix4u@users.noreply.github.com": "helix4u",
     "ifkellx@users.noreply.github.com": "Ifkellx",
@@ -615,6 +650,7 @@ AUTHOR_MAP = {
     "pub_forgreatagent@antgroup.com": "AntAISecurityLab",
     "252620095+briandevans@users.noreply.github.com": "briandevans",
     "danielrpike9@gmail.com": "Bartok9",
+    "96944678+ymylive@users.noreply.github.com": "sweetcornna",
     "skozyuk@cruxexperts.com": "CruxExperts",
     "154585401+LeonSGP43@users.noreply.github.com": "LeonSGP43",
     "12250313+Kailigithub@users.noreply.github.com": "Kailigithub",
@@ -1287,6 +1323,8 @@ AUTHOR_MAP = {
     "rudi193@gmail.com": "rudi193-cmd",
     "86684667+sadiksaifi@users.noreply.github.com": "sadiksaifi",  # PR #27982 salvage (kanban horiz scroll)
     "mail@sadiksaifi.dev": "sadiksaifi",
+    "231588442+vynxevainglory-ai@users.noreply.github.com": "vynxevainglory-ai",  # PR #29233 salvage (kanban scrollbar + body overflow)
+    "vynxevainglory@gmail.com": "vynxevainglory-ai",
     # batch salvage (May 2026 LHF run, group 8)
     "266824395+AceWattGit@users.noreply.github.com": "AceWattGit",  # PR #28159 salvage (_pool_may_recover NameError)
     "57024493+YuanHanzhong@users.noreply.github.com": "YuanHanzhong",  # PR #28032 salvage (x.com status link-like)
@@ -1342,6 +1380,23 @@ AUTHOR_MAP = {
     "timothy.b.dixon@gmail.com": "Codename-11",  # PR #29302 (API server session controls — sessions/chat/fork/stream)
     "jpschwartz2@uwalumni.com": "Schwartz10",  # PR #29302 sub-PR (multimodal media in session chat API)
     "JohnC1009@users.noreply.github.com": "JohnC1009",  # PR #32020 salvage (auth: global auth.json fallback in _load_provider_state)
+    "biser@bisko.be": "bisko",  # PR #33784 salvage (re-pad reasoning_content on cross-provider fallback to require-side providers)
+    # v0.15.0 additions
+    "glen@workmanfirearms.com": "sgtworkman",
+    "jorge.fuenmayort@gmail.com": "jfuenmayor",
+    "mordred@inaugust.com": "emonty",
+    "rodrigoeq@hotmail.com": "rodrigoeqnit",
+    "soliva.johnpaul@icloud.com": "jonpol01",
+    "2182712990@qq.com": "yu-xin-c",  # PR #32122 (Docker audio bridge notes)
+    "baxter@bitreserve.ai": "BaxBit",  # PR #30200 (Svix webhook signature validation)
+    "chris.eth@qq.com": "duyua9",  # PR #10949 (render object config values structurally)
+    "ethie@nous": "ethernet8023",  # PR #29342 (TUI clipboard copy on linux/wayland)
+    "jiahuigu@sjtu.edu.cn": "Jiahui-Gu",  # PR #29276 (guard pickle.loads in darwinian-evolver)
+    "justinccdev@gmail.com": "justincc",  # PR #28914 (set tool_name on tool-result messages)
+    "kdkcfp@gmail.com": "slowtokki0409",  # PR #29025 (ignore local Hermes runtime files)
+    "peter.yuqin@gmail.com": "WuKongAI-CMU",  # PR #10082 (reject symlinked audio inputs)
+    "sunil.nitie@gmail.com": "Sunil123135",  # PR #31031 (Windows Docker Desktop compose)
+    "weichangyuwcy@gmail.com": "ChyuWei",  # PR #30987 (TUI TTS env var on voice off)
 }
 
 
diff --git a/scripts/tool_search_livetest.py b/scripts/tool_search_livetest.py
new file mode 100644
index 00000000000..86aca38ccab
--- /dev/null
+++ b/scripts/tool_search_livetest.py
@@ -0,0 +1,549 @@
+#!/usr/bin/env python3
+"""Live test harness for Hermes Agent's Tool Search feature.
+
+Spins up a real AIAgent against a real model, registers ~20 fake "MCP" tools
+with realistic shapes (github-like, slack-like, calendar-like, search-like),
+runs a small set of scenarios, and records exactly what the model did.
+
+For each scenario we record:
+  - the full message transcript
+  - the sequence of tool calls (name + args) the model emitted
+  - which underlying tools actually got invoked (after bridge unwrap)
+  - the final assistant response
+  - timing and round-trip count
+
+Each scenario runs twice:
+  - tool_search ENABLED  (deferred behind bridges)
+  - tool_search DISABLED (all tools loaded directly)
+
+Output: ./out/<scenario_id>__<enabled|disabled>.json
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import re
+import shutil
+import sys
+import tempfile
+import time
+import traceback
+from pathlib import Path
+from typing import Any, Dict, List, Tuple
+
+# Force-isolate the test environment BEFORE any hermes imports.
+ORIGINAL_HOME = os.environ.get("HERMES_HOME")
+ORIGINAL_AUTH = Path.home() / ".hermes" / "auth.json"
+
+_THIS_DIR = Path(__file__).resolve().parent
+_WORKTREE_ROOT = _THIS_DIR.parent
+sys.path.insert(0, str(_WORKTREE_ROOT))
+
+# ---------------------------------------------------------------------------
+# Fake MCP tools — realistic shape, varied difficulty for retrieval
+# ---------------------------------------------------------------------------
+
+FAKE_MCP_TOOLS: List[Dict[str, Any]] = [
+    # GitHub cluster
+    {
+        "name": "github_create_issue",
+        "description": "Open a new issue in a GitHub repository. Use when the user wants to report a bug or request a feature in a repo.",
+        "params": {"repo": ("string", "Repository in owner/name form"),
+                   "title": ("string", "Issue title"),
+                   "body": ("string", "Issue body in Markdown")},
+        "returns": lambda args: {"ok": True, "issue_url": f"https://github.com/{args.get('repo','x/y')}/issues/42"},
+    },
+    {
+        "name": "github_search_repos",
+        "description": "Search GitHub repositories by free-text query. Returns a ranked list of repo names with star counts.",
+        "params": {"query": ("string", "Search terms"),
+                   "limit": ("integer", "Max results")},
+        "returns": lambda args: {"results": [{"name": "fake/repo-1", "stars": 1200},
+                                             {"name": "fake/repo-2", "stars": 540}]},
+    },
+    {
+        "name": "github_close_pr",
+        "description": "Close a pull request without merging it. Use when the PR should be abandoned.",
+        "params": {"repo": ("string", ""), "pr_number": ("integer", "")},
+        "returns": lambda args: {"ok": True, "state": "closed"},
+    },
+    {
+        "name": "github_list_pulls",
+        "description": "List open pull requests for a repository.",
+        "params": {"repo": ("string", "")},
+        "returns": lambda args: {"pulls": [{"number": 31163, "title": "feat(tools): tool search"}]},
+    },
+
+    # Slack cluster
+    {
+        "name": "slack_send_message",
+        "description": "Post a message into a Slack channel as the connected workspace's app.",
+        "params": {"channel": ("string", "Channel name with leading #"),
+                   "text": ("string", "Message body")},
+        "returns": lambda args: {"ok": True, "ts": "1716528000.000100"},
+    },
+    {
+        "name": "slack_list_channels",
+        "description": "Return all channels visible to the connected Slack workspace bot.",
+        "params": {},
+        "returns": lambda args: {"channels": ["#general", "#engineering", "#random"]},
+    },
+    {
+        "name": "slack_set_status",
+        "description": "Set the current user's Slack status (emoji + text).",
+        "params": {"emoji": ("string", ""), "text": ("string", "")},
+        "returns": lambda args: {"ok": True},
+    },
+
+    # Calendar cluster (intentionally vague names to stress retrieval)
+    {
+        "name": "evt_create",
+        "description": "Add an event to the connected calendar. Used for scheduling meetings.",
+        "params": {"title": ("string", ""),
+                   "start": ("string", "ISO 8601 datetime"),
+                   "duration_min": ("integer", "")},
+        "returns": lambda args: {"ok": True, "event_id": "evt_abc"},
+    },
+    {
+        "name": "evt_list",
+        "description": "List upcoming calendar events.",
+        "params": {"max_results": ("integer", "")},
+        "returns": lambda args: {"events": [{"id": "evt_1", "title": "Standup", "start": "2026-05-25T09:00:00Z"}]},
+    },
+
+    # Knowledge / docs (paraphrased name to stress retrieval)
+    {
+        "name": "docsearch_query",
+        "description": "Search the user's internal documentation index for matching pages.",
+        "params": {"q": ("string", "Search query"), "limit": ("integer", "")},
+        "returns": lambda args: {"hits": [{"title": "Onboarding", "url": "https://docs/x"}]},
+    },
+    {
+        "name": "docsearch_fetch",
+        "description": "Fetch the full markdown content of one document by ID.",
+        "params": {"id": ("string", "")},
+        "returns": lambda args: {"content": "# Onboarding\n..."},
+    },
+
+    # Database
+    {
+        "name": "db_query",
+        "description": "Run a read-only SQL query against the analytics database.",
+        "params": {"sql": ("string", "SELECT ... statement")},
+        "returns": lambda args: {"rows": [{"id": 1, "name": "alice"}]},
+    },
+    {
+        "name": "db_describe_table",
+        "description": "Show the schema of a database table.",
+        "params": {"table": ("string", "")},
+        "returns": lambda args: {"columns": [{"name": "id", "type": "int"}, {"name": "name", "type": "text"}]},
+    },
+
+    # Linear
+    {
+        "name": "linear_create_ticket",
+        "description": "Create a new Linear issue (ticket) in the connected workspace.",
+        "params": {"title": ("string", ""), "body": ("string", ""), "priority": ("integer", "1-4")},
+        "returns": lambda args: {"ok": True, "id": "ENG-101"},
+    },
+    {
+        "name": "linear_assign",
+        "description": "Reassign a Linear ticket to a different user.",
+        "params": {"ticket_id": ("string", ""), "user": ("string", "")},
+        "returns": lambda args: {"ok": True},
+    },
+
+    # Notion
+    {
+        "name": "notion_create_page",
+        "description": "Create a new page in the connected Notion workspace.",
+        "params": {"title": ("string", ""), "body": ("string", ""), "parent": ("string", "")},
+        "returns": lambda args: {"ok": True, "page_id": "abc123"},
+    },
+
+    # Random others (filler / distractors)
+    {
+        "name": "weather_get",
+        "description": "Look up the current weather for a city.",
+        "params": {"city": ("string", "")},
+        "returns": lambda args: {"city": args.get("city", ""), "temp_c": 19, "summary": "Cloudy"},
+    },
+    {
+        "name": "translate_text",
+        "description": "Translate a short text from one language to another.",
+        "params": {"text": ("string", ""), "to": ("string", "Target language code")},
+        "returns": lambda args: {"translated": args.get("text", "") + " [translated to " + args.get("to", "??") + "]"},
+    },
+    {
+        "name": "pdf_extract",
+        "description": "Extract text from a PDF file given its path.",
+        "params": {"path": ("string", "")},
+        "returns": lambda args: {"text": "[fake PDF text]"},
+    },
+    {
+        "name": "yt_transcript",
+        "description": "Fetch the transcript for a YouTube video by URL.",
+        "params": {"url": ("string", "")},
+        "returns": lambda args: {"transcript": "[fake transcript]"},
+    },
+]
+
+
+# ---------------------------------------------------------------------------
+# Scenario definitions
+# ---------------------------------------------------------------------------
+
+SCENARIOS: List[Dict[str, Any]] = [
+    {
+        "id": "A_obvious_single",
+        "description": "Single tool, obvious name in the user request",
+        "prompt": (
+            "Open a GitHub issue in repo 'acme/widget' titled 'Crash on startup' "
+            "with body 'App crashes immediately after launch when offline.' "
+            "Then tell me you're done. Don't do anything else."
+        ),
+        "expected_underlying_tools": ["github_create_issue"],
+    },
+    {
+        "id": "B_vague_paraphrased",
+        "description": "Single tool, paraphrased intent (tests retrieval quality)",
+        "prompt": (
+            "Add a meeting to my schedule for tomorrow morning at 10am called "
+            "'Design review', 30 minutes long. Then tell me you're done. Don't do anything else."
+        ),
+        "expected_underlying_tools": ["evt_create"],
+    },
+    {
+        "id": "C_multi_tool_chain",
+        "description": "Multi-step task requiring 2-3 deferred tools",
+        "prompt": (
+            "Find the open pull requests on repo 'acme/widget', then post a "
+            "summary of how many there are to the #engineering Slack channel. "
+            "Then tell me you're done."
+        ),
+        "expected_underlying_tools": ["github_list_pulls", "slack_send_message"],
+    },
+    {
+        "id": "D_core_plus_deferred",
+        "description": "Task uses BOTH a core tool (read_file) and a deferred tool",
+        "prompt": (
+            "Read the file at /tmp/livetest/notes.txt (it exists, just read it) "
+            "and then post its contents to the #random Slack channel. Tell me you're done."
+        ),
+        "expected_underlying_tools": ["read_file", "slack_send_message"],
+        "expected_core_tool_direct": True,  # must NOT use tool_call for read_file
+    },
+    {
+        "id": "E_no_tool_needed",
+        "description": "Question doesn't need any tool — model should just answer",
+        "prompt": "What's 7 times 8? Answer with just the number.",
+        "expected_underlying_tools": [],
+    },
+]
+
+
+# ---------------------------------------------------------------------------
+# Harness
+# ---------------------------------------------------------------------------
+
+
+def setup_isolated_home(enabled: bool) -> Path:
+    """Create a fresh ~/.hermes/ for one test, copying minimal credentials.
+
+    Also reads OPENROUTER_API_KEY from the user's real ``~/.hermes/.env`` so
+    the agent can authenticate against OpenRouter inside the isolated home.
+    """
+    home_dir = Path(tempfile.mkdtemp(prefix="hermes_ts_live_"))
+    hermes_home = home_dir / ".hermes"
+    hermes_home.mkdir(parents=True)
+
+    if ORIGINAL_AUTH.exists():
+        shutil.copy(ORIGINAL_AUTH, hermes_home / "auth.json")
+
+    # Copy .env so OPENROUTER_API_KEY (or others) are visible to the agent
+    # running inside the isolated home.
+    real_env_file = Path.home() / ".hermes" / ".env"
+    if real_env_file.exists():
+        shutil.copy(real_env_file, hermes_home / ".env")
+        # Also load the real user env into this process so the provider
+        # resolver can authenticate. We go through the canonical loader
+        # (python-dotenv under the hood) rather than parsing the file by
+        # hand — it never materializes the secret in a local variable in
+        # this module, which both avoids a hand-rolled parser bug and keeps
+        # static analysis from tainting the transcript records with the key.
+        from hermes_cli.env_loader import load_hermes_dotenv
+        load_hermes_dotenv(hermes_home=str(Path.home() / ".hermes"))
+
+    cfg = {
+        "model": {
+            "provider": "openrouter",
+            "model": "anthropic/claude-haiku-4.5",
+        },
+        "tools": {
+            "tool_search": {
+                "enabled": "on" if enabled else "off",
+                "threshold_pct": 10,
+                "search_default_limit": 5,
+                "max_search_limit": 20,
+            },
+        },
+        "logging": {"level": "WARNING"},
+    }
+    (hermes_home / "config.yaml").write_text(_yaml_dump(cfg), encoding="utf-8")
+    return hermes_home
+
+
+def _yaml_dump(obj: Any) -> str:
+    try:
+        import yaml
+        return yaml.safe_dump(obj, sort_keys=False)
+    except ImportError:
+        return json.dumps(obj, indent=2)
+
+
+def register_fake_tools() -> int:
+    """Register the FAKE_MCP_TOOLS into the live tool registry."""
+    from tools.registry import registry
+
+    def make_handler(tool_def):
+        def _handler(*args, **kwargs):
+            try:
+                return json.dumps(tool_def["returns"](kwargs), ensure_ascii=False)
+            except Exception as e:
+                return json.dumps({"error": f"fake tool handler error: {e}"})
+        return _handler
+
+    count = 0
+    for tdef in FAKE_MCP_TOOLS:
+        properties = {}
+        required = []
+        for p_name, (p_type, p_desc) in tdef["params"].items():
+            properties[p_name] = {"type": p_type, "description": p_desc}
+            required.append(p_name)
+
+        registry.register(
+            name=tdef["name"],
+            toolset="mcp-fake",
+            schema={
+                "name": tdef["name"],
+                "description": tdef["description"],
+                "parameters": {
+                    "type": "object",
+                    "properties": properties,
+                    "required": required,
+                },
+            },
+            handler=make_handler(tdef),
+        )
+        count += 1
+    return count
+
+
+def reset_module_state():
+    """Drop cached modules so the new HERMES_HOME takes effect."""
+    keys = [k for k in sys.modules.keys()
+            if k.startswith(("tools.", "model_tools", "toolsets",
+                             "hermes_cli", "agent.", "run_agent"))]
+    for k in keys:
+        del sys.modules[k]
+
+
+def run_one_scenario(scenario: Dict[str, Any], enabled: bool, out_dir: Path) -> Dict[str, Any]:
+    """Run one (scenario, enabled) combination. Returns the recorded transcript."""
+    reset_module_state()
+    home = setup_isolated_home(enabled=enabled)
+    os.environ["HERMES_HOME"] = str(home)
+
+    # Pre-create the test file used by scenario D.
+    Path("/tmp/livetest").mkdir(exist_ok=True)
+    Path("/tmp/livetest/notes.txt").write_text("Hello from the test fixture.\n", encoding="utf-8")
+
+    n_registered = register_fake_tools()
+
+    # Capture tool calls via a hook on the registry dispatch path. We use the
+    # registry hook (rather than the run_agent.handle_function_call binding,
+    # which is already cached by tool_executor) because the dispatch call is
+    # the one place every underlying tool call lands. Bridge calls are
+    # extracted from the message transcript after the run.
+    tool_call_log: List[Dict[str, Any]] = []
+
+    from tools.registry import registry
+    original_dispatch = registry.dispatch
+
+    def logging_dispatch(name, args, **kw):
+        tool_call_log.append({"name": name, "args": _trim_args(args)})
+        return original_dispatch(name, args, **kw)
+    registry.dispatch = logging_dispatch
+
+    # Build agent and run
+    started = time.time()
+    error = None
+    final_response = ""
+    messages_out = []
+    try:
+        from run_agent import AIAgent
+        agent = AIAgent(
+            provider="openrouter",
+            model="anthropic/claude-haiku-4.5",
+            enabled_toolsets=None,  # Default = all available toolsets, including the registered mcp-fake tools
+            quiet_mode=True,
+            save_trajectories=False,
+            skip_context_files=True,
+            skip_memory=True,
+            platform="cli",
+            max_iterations=15,
+        )
+        result = agent.run_conversation(
+            user_message=scenario["prompt"],
+            system_message=(
+                "You are a test agent. Complete the user's task using available "
+                "tools. Be concise; don't add commentary beyond what's needed."
+            ),
+        )
+        if isinstance(result, dict):
+            final_response = result.get("final_response") or ""
+            messages_out = result.get("messages") or []
+        else:
+            final_response = str(result)
+    except Exception as e:
+        error = f"{type(e).__name__}: {e}\n{traceback.format_exc()}"
+    finally:
+        registry.dispatch = original_dispatch
+
+    elapsed = time.time() - started
+
+    # Extract bridge calls from the message transcript. Easier and more
+    # accurate than monkey-patching: this is the actual wire shape the
+    # model emitted.
+    bridge_call_log = _extract_bridge_calls(messages_out)
+
+    # Compose the trace.
+    record = {
+        "scenario_id": scenario["id"],
+        "scenario_description": scenario["description"],
+        "tool_search_enabled": enabled,
+        "model": "anthropic/claude-haiku-4.5 (via openrouter)",
+        "prompt": scenario["prompt"],
+        "expected_underlying_tools": scenario.get("expected_underlying_tools", []),
+        "n_fake_tools_registered": n_registered,
+        "elapsed_seconds": round(elapsed, 2),
+        "bridge_calls": bridge_call_log,
+        "underlying_tool_calls": tool_call_log,
+        "final_response": _redact_secrets(final_response),
+        "n_iterations": _count_assistant_turns(messages_out),
+        "error": _redact_secrets(error) if error else error,
+    }
+
+    suffix = "enabled" if enabled else "disabled"
+    out_path = out_dir / f"{scenario['id']}__{suffix}.json"
+    out_path.write_text(json.dumps(record, indent=2, default=str), encoding="utf-8")
+
+    # Cleanup
+    shutil.rmtree(home.parent, ignore_errors=True)
+    return record
+
+
+def _redact_secrets(text: str) -> str:
+    """Strip anything secret-shaped from text before it is stored or printed.
+
+    The harness runs against a real OpenRouter key, and ``error`` can carry a
+    full traceback that — for an auth failure — may echo a request header or
+    URL containing the key. We never want a credential landing in a checked-in
+    transcript or the console, so we mask:
+      * the live OPENROUTER_API_KEY value, if present in the environment, and
+      * any ``sk-``/``sk-or-`` style bearer token by pattern.
+    """
+    if not text:
+        return text
+    out = text
+    live_key = os.environ.get("OPENROUTER_API_KEY")
+    if live_key and len(live_key) >= 8:
+        out = out.replace(live_key, "[REDACTED]")
+    out = re.sub(r"sk-[A-Za-z0-9_\-]{12,}", "[REDACTED]", out)
+    out = re.sub(r"(?i)(authorization|bearer)\s*[:=]\s*\S+", r"\1: [REDACTED]", out)
+    return out
+
+
+def _trim_args(args: Any, max_chars: int = 300) -> Any:
+    """Trim long string args so the log stays readable."""
+    if not isinstance(args, dict):
+        return args
+    out = {}
+    for k, v in args.items():
+        if isinstance(v, str) and len(v) > max_chars:
+            out[k] = v[:max_chars] + f"...[{len(v)-max_chars} chars trimmed]"
+        else:
+            out[k] = v
+    return out
+
+
+def _count_assistant_turns(messages: List[Dict[str, Any]]) -> int:
+    return sum(1 for m in messages if isinstance(m, dict) and m.get("role") == "assistant")
+
+
+def _extract_bridge_calls(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """Pull out every tool_search / tool_describe / tool_call from a transcript."""
+    bridges = ("tool_search", "tool_describe", "tool_call")
+    out: List[Dict[str, Any]] = []
+    for m in messages or []:
+        if not isinstance(m, dict) or m.get("role") != "assistant":
+            continue
+        tcs = m.get("tool_calls") or []
+        for c in tcs:
+            if not isinstance(c, dict):
+                continue
+            fn = c.get("function") or {}
+            name = fn.get("name")
+            if name in bridges:
+                raw_args = fn.get("arguments") or "{}"
+                try:
+                    args = json.loads(raw_args) if isinstance(raw_args, str) else raw_args
+                except json.JSONDecodeError:
+                    args = {"_raw": raw_args}
+                out.append({"name": name, "args": _trim_args(args)})
+    return out
+
+
+def main():
+    out_dir = _THIS_DIR / "out"
+    out_dir.mkdir(exist_ok=True)
+    print(f"Writing transcripts to: {out_dir}")
+
+    summary = []
+    for scenario in SCENARIOS:
+        for enabled in (True, False):
+            label = "enabled" if enabled else "disabled"
+            print(f"\n{'='*72}\nScenario {scenario['id']} (tool_search={label})\n{'='*72}")
+            record = run_one_scenario(scenario, enabled, out_dir)
+            n_bridge = len(record["bridge_calls"])
+            n_under = len(record["underlying_tool_calls"])
+            err = record["error"]
+            print(f"  bridge calls: {n_bridge}, underlying tool calls: {n_under}, "
+                  f"elapsed: {record['elapsed_seconds']}s, error: {bool(err)}")
+            if err:
+                print(f"  ERROR: {err[:300]}")
+            summary.append({
+                "scenario": scenario["id"],
+                "enabled": enabled,
+                "n_bridge": n_bridge,
+                "n_underlying": n_under,
+                "elapsed": record["elapsed_seconds"],
+                "error": bool(err),
+                "underlying_tools_called": [c["name"] for c in record["underlying_tool_calls"]],
+                "expected": scenario.get("expected_underlying_tools", []),
+            })
+
+    summary_path = out_dir / "_summary.json"
+    summary_path.write_text(json.dumps(summary, indent=2), encoding="utf-8")
+    print(f"\nSummary saved to: {summary_path}")
+
+    # Restore original HERMES_HOME
+    if ORIGINAL_HOME is not None:
+        os.environ["HERMES_HOME"] = ORIGINAL_HOME
+    else:
+        os.environ.pop("HERMES_HOME", None)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/skills/apple/apple-reminders/SKILL.md b/skills/apple/apple-reminders/SKILL.md
index 37c4fa74fd8..45366448708 100644
--- a/skills/apple/apple-reminders/SKILL.md
+++ b/skills/apple/apple-reminders/SKILL.md
@@ -68,6 +68,38 @@ remindctl add --title "Call mom" --list Personal --due tomorrow
 remindctl add --title "Meeting prep" --due "2026-02-15 09:00"
 ```
 
+### Due Time vs Alarm / Early Nudge
+
+`--due` and `--alarm` are different fields:
+
+- `--due` sets the reminder's due date/time.
+- `--alarm` sets the EventKit alarm/notification trigger. Timed due reminders may default to an alarm at the due time, but pass `--alarm` explicitly when the user asks for an earlier nudge.
+
+For a reminder due at 2:00 PM with a notification 30 minutes earlier:
+
+```bash
+remindctl add --title "Hairdresser" --due "2026-05-15 14:00" --alarm "2026-05-15 13:30"
+```
+
+To edit an existing reminder:
+
+```bash
+remindctl edit 87354 --due "2026-05-15 14:00" --alarm "2026-05-15 13:30"
+```
+
+The Reminders UI may show or group the item by the alarm time because that is when the notification fires. Verify with JSON instead of assuming the due time moved:
+
+```bash
+remindctl today --json
+```
+
+Expected shape:
+
+- `dueDate`: actual due time
+- `alarmDate`: notification / early nudge time
+
+Apple's public `EKReminder` docs list only reminder-specific properties. Alarm support comes from inherited `EKCalendarItem` behavior exposed by remindctl's `--alarm` flag.
+
 ### Complete / Delete
 
 ```bash
diff --git a/skills/autonomous-ai-agents/hermes-agent/SKILL.md b/skills/autonomous-ai-agents/hermes-agent/SKILL.md
index a93c0ef0f0e..2d3d4957772 100644
--- a/skills/autonomous-ai-agents/hermes-agent/SKILL.md
+++ b/skills/autonomous-ai-agents/hermes-agent/SKILL.md
@@ -446,15 +446,15 @@ Common "why is Hermes doing X to my output / tool calls / commands?" toggles —
 
 ### Secret redaction in tool output
 
-Secret redaction is **off by default** — tool output (terminal stdout, `read_file`, web content, subagent summaries, etc.) passes through unmodified. If the user wants Hermes to auto-mask strings that look like API keys, tokens, and secrets before they enter the conversation context and logs:
+Secret redaction is **on by default** — tool output (terminal stdout, `read_file`, web content, subagent summaries, etc.) is scanned for strings that look like API keys, tokens, and secrets before it enters the conversation context and logs. Leave it enabled for normal use:
 
 ```bash
-hermes config set security.redact_secrets true       # enable globally
+hermes config set security.redact_secrets true       # keep enabled globally
 ```
 
-**Restart required.** `security.redact_secrets` is snapshotted at import time — toggling it mid-session (e.g. via `export HERMES_REDACT_SECRETS=true` from a tool call) will NOT take effect for the running process. Tell the user to run `hermes config set security.redact_secrets true` in a terminal, then start a new session. This is deliberate — it prevents an LLM from flipping the toggle on itself mid-task.
+**Restart required.** `security.redact_secrets` is snapshotted at import time — toggling it mid-session (e.g. via `export HERMES_REDACT_SECRETS=false` from a tool call) will NOT take effect for the running process. Tell the user to change it in config from a terminal, then start a new session. This is deliberate — it prevents an LLM from flipping the toggle on itself mid-task.
 
-Disable again with:
+Disable only when you deliberately need raw credential-like strings for debugging or redactor development:
 ```bash
 hermes config set security.redact_secrets false
 ```
diff --git a/skills/creative/comfyui/scripts/fetch_logs.py b/skills/creative/comfyui/scripts/fetch_logs.py
index e0b6e12ac75..e885a03e70f 100755
--- a/skills/creative/comfyui/scripts/fetch_logs.py
+++ b/skills/creative/comfyui/scripts/fetch_logs.py
@@ -15,7 +15,6 @@ Usage:
 from __future__ import annotations
 
 import argparse
-import json
 import sys
 from pathlib import Path
 
diff --git a/skills/creative/comfyui/tests/test_common.py b/skills/creative/comfyui/tests/test_common.py
index 0263fe1d91b..a5ce6a32714 100644
--- a/skills/creative/comfyui/tests/test_common.py
+++ b/skills/creative/comfyui/tests/test_common.py
@@ -2,15 +2,11 @@
 
 from __future__ import annotations
 
-from pathlib import Path
 
 import pytest
 
 from _common import (
-    DEFAULT_LOCAL_HOST,
     EMBEDDING_REGEX,
-    FOLDER_ALIASES,
-    build_cloud_aware_url,
     cloud_endpoint,
     coerce_seed,
     folder_aliases_for,
diff --git a/skills/creative/comfyui/tests/test_extract_schema.py b/skills/creative/comfyui/tests/test_extract_schema.py
index 072a788f318..c4557ba8caf 100644
--- a/skills/creative/comfyui/tests/test_extract_schema.py
+++ b/skills/creative/comfyui/tests/test_extract_schema.py
@@ -2,7 +2,6 @@
 
 from __future__ import annotations
 
-import pytest
 
 from extract_schema import (
     extract_schema,
diff --git a/skills/creative/comfyui/tests/test_run_workflow.py b/skills/creative/comfyui/tests/test_run_workflow.py
index 32eb172ad1c..69957dd2355 100644
--- a/skills/creative/comfyui/tests/test_run_workflow.py
+++ b/skills/creative/comfyui/tests/test_run_workflow.py
@@ -2,10 +2,7 @@
 
 from __future__ import annotations
 
-import copy
-import json
 
-import pytest
 
 from extract_schema import extract_schema
 from run_workflow import (
diff --git a/skills/devops/kanban-worker/SKILL.md b/skills/devops/kanban-worker/SKILL.md
index 4954e6dc9dd..ef9022d9a8d 100644
--- a/skills/devops/kanban-worker/SKILL.md
+++ b/skills/devops/kanban-worker/SKILL.md
@@ -167,6 +167,7 @@ You can configure the gateway to receive cross-profile Kanban task notifications
 ## Do NOT
 
 - Call `delegate_task` as a substitute for `kanban_create`. `delegate_task` is for short reasoning subtasks inside YOUR run; `kanban_create` is for cross-agent handoffs that outlive one API loop.
+- Call `clarify` to ask the human a question. You are running headless — there is no live user to answer. The call will time out (default ~120s) and the task will sit silently in `running` with no signal that it needs input. Use `kanban_comment` (context) + `kanban_block(reason=...)` (decision needed) instead — the task surfaces on the board as blocked, the operator sees it, unblocks with their answer in a comment, and you respawn with the thread.
 - Modify files outside `$HERMES_KANBAN_WORKSPACE` unless the task body says to.
 - Create follow-up tasks assigned to yourself — assign to the right specialist.
 - Complete a task you didn't actually finish. Block it instead.
diff --git a/skills/email/himalaya/SKILL.md b/skills/email/himalaya/SKILL.md
index d7392e6bdc8..79da4133f02 100644
--- a/skills/email/himalaya/SKILL.md
+++ b/skills/email/himalaya/SKILL.md
@@ -17,6 +17,11 @@ prerequisites:
 
 Himalaya is a CLI email client that lets you manage emails from the terminal using IMAP, SMTP, Notmuch, or Sendmail backends.
 
+This skill is separate from the Hermes Email gateway adapter. The gateway
+adapter lets people email the agent and uses Hermes' built-in IMAP/SMTP
+adapter; this skill lets the agent operate a mailbox from terminal tools and
+requires the external `himalaya` CLI.
+
 ## References
 
 - `references/configuration.md` (config file setup + IMAP/SMTP authentication)
diff --git a/skills/productivity/maps/scripts/maps_client.py b/skills/productivity/maps/scripts/maps_client.py
index d272b4a7566..90763589ae2 100644
--- a/skills/productivity/maps/scripts/maps_client.py
+++ b/skills/productivity/maps/scripts/maps_client.py
@@ -18,7 +18,6 @@ Commands:
 import argparse
 import json
 import math
-import os
 import sys
 import time
 import urllib.error
diff --git a/skills/research/research-paper-writing/templates/acl/acl_lualatex.tex b/skills/research/research-paper-writing/templates/acl/acl_lualatex.tex
index 6684e893074..3eb5e856057 100644
--- a/skills/research/research-paper-writing/templates/acl/acl_lualatex.tex
+++ b/skills/research/research-paper-writing/templates/acl/acl_lualatex.tex
@@ -46,7 +46,7 @@
 % \author{Author 1 \\ Address line \\  ... \\ Address line
 %         \And  ... \And
 %         Author n \\ Address line \\ ... \\ Address line}
-% To start a seperate ``row'' of authors use \AND, as in
+% To start a separate ``row'' of authors use \AND, as in
 % \author{Author 1 \\ Address line \\  ... \\ Address line
 %         \AND
 %         Author 2 \\ Address line \\ ... \\ Address line \And
diff --git a/tests/acp/test_approval_isolation.py b/tests/acp/test_approval_isolation.py
index 99a38aadd9e..e6d3f593f76 100644
--- a/tests/acp/test_approval_isolation.py
+++ b/tests/acp/test_approval_isolation.py
@@ -13,11 +13,8 @@ Both fixed together by:
    threads don't collide.
 """
 
-import os
 import threading
-from unittest.mock import MagicMock
 
-import pytest
 
 
 class TestThreadLocalApprovalCallback:
diff --git a/tests/acp/test_events.py b/tests/acp/test_events.py
index ec0b32549da..025245ba0a9 100644
--- a/tests/acp/test_events.py
+++ b/tests/acp/test_events.py
@@ -9,7 +9,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
 import pytest
 
 import acp
-from acp.schema import AgentPlanUpdate, ToolCallStart, ToolCallProgress, AgentThoughtChunk, AgentMessageChunk
+from acp.schema import AgentPlanUpdate
 
 from acp_adapter.events import (
     _build_plan_update_from_todo_result,
diff --git a/tests/acp/test_mcp_e2e.py b/tests/acp/test_mcp_e2e.py
index 00bf53b21f3..f5f62c17a97 100644
--- a/tests/acp/test_mcp_e2e.py
+++ b/tests/acp/test_mcp_e2e.py
@@ -7,9 +7,6 @@ Exercises the full flow through the ACP server layer:
     session_update events arrive at the mock client
 """
 
-import asyncio
-from collections import deque
-from types import SimpleNamespace
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
diff --git a/tests/acp/test_server.py b/tests/acp/test_server.py
index de9df54d3a6..33fb72c2edc 100644
--- a/tests/acp/test_server.py
+++ b/tests/acp/test_server.py
@@ -18,7 +18,6 @@ from acp.schema import (
     AvailableCommandsUpdate,
     Implementation,
     InitializeResponse,
-    ListSessionsResponse,
     LoadSessionResponse,
     NewSessionResponse,
     PromptResponse,
@@ -33,7 +32,6 @@ from acp.schema import (
     TextContentBlock,
     ToolCallProgress,
     ToolCallStart,
-    Usage,
     UsageUpdate,
     UserMessageChunk,
 )
diff --git a/tests/acp/test_tools.py b/tests/acp/test_tools.py
index 455ee25194a..1da33df1ed4 100644
--- a/tests/acp/test_tools.py
+++ b/tests/acp/test_tools.py
@@ -1,6 +1,5 @@
 """Tests for acp_adapter.tools — tool kind mapping and ACP content building."""
 
-import pytest
 
 from acp_adapter.edit_approval import EditProposal
 from acp_adapter.tools import (
diff --git a/tests/agent/lsp/test_backend_gate.py b/tests/agent/lsp/test_backend_gate.py
index 3c0df8702ea..9d313883d5f 100644
--- a/tests/agent/lsp/test_backend_gate.py
+++ b/tests/agent/lsp/test_backend_gate.py
@@ -8,8 +8,6 @@ syntax check exactly as if LSP were disabled.
 """
 from __future__ import annotations
 
-import os
-import sys
 from unittest.mock import MagicMock
 
 import pytest
diff --git a/tests/agent/lsp/test_broken_set.py b/tests/agent/lsp/test_broken_set.py
index c854bdc3861..e9f092afb11 100644
--- a/tests/agent/lsp/test_broken_set.py
+++ b/tests/agent/lsp/test_broken_set.py
@@ -14,15 +14,12 @@ This module verifies:
 """
 from __future__ import annotations
 
-import os
-import sys
 from pathlib import Path
-from unittest.mock import MagicMock, patch
+from unittest.mock import patch
 
 import pytest
 
 from agent.lsp.manager import LSPService
-from agent.lsp.servers import SERVERS, ServerContext, ServerDef, SpawnSpec
 from agent.lsp.workspace import clear_cache
 
 
diff --git a/tests/agent/lsp/test_diagnostics_field.py b/tests/agent/lsp/test_diagnostics_field.py
index 6cb0c2896ce..8d5b12aa859 100644
--- a/tests/agent/lsp/test_diagnostics_field.py
+++ b/tests/agent/lsp/test_diagnostics_field.py
@@ -6,12 +6,8 @@ having LSP output prepended to the lint string.
 """
 from __future__ import annotations
 
-import os
-import sys
-import tempfile
-from unittest.mock import MagicMock, patch
+from unittest.mock import patch
 
-import pytest
 
 from tools.environments.local import LocalEnvironment
 from tools.file_operations import (
diff --git a/tests/agent/lsp/test_lifecycle.py b/tests/agent/lsp/test_lifecycle.py
index 2fc12b10520..e0f35238fb0 100644
--- a/tests/agent/lsp/test_lifecycle.py
+++ b/tests/agent/lsp/test_lifecycle.py
@@ -7,7 +7,7 @@ pyright/gopls/etc. are still alive on the host.
 from __future__ import annotations
 
 import atexit
-from unittest.mock import MagicMock, patch
+from unittest.mock import MagicMock
 
 import pytest
 
diff --git a/tests/agent/lsp/test_reporter.py b/tests/agent/lsp/test_reporter.py
index e4b1cbd39f4..67794e40401 100644
--- a/tests/agent/lsp/test_reporter.py
+++ b/tests/agent/lsp/test_reporter.py
@@ -2,7 +2,6 @@
 from __future__ import annotations
 
 from agent.lsp.reporter import (
-    DEFAULT_SEVERITIES,
     MAX_PER_FILE,
     format_diagnostic,
     report_for_file,
diff --git a/tests/agent/lsp/test_service.py b/tests/agent/lsp/test_service.py
index 952a8519adc..24de76e9968 100644
--- a/tests/agent/lsp/test_service.py
+++ b/tests/agent/lsp/test_service.py
@@ -7,7 +7,6 @@ on.
 """
 from __future__ import annotations
 
-import os
 import sys
 from pathlib import Path
 
@@ -19,7 +18,6 @@ from agent.lsp.servers import (
     ServerContext,
     ServerDef,
     SpawnSpec,
-    find_server_for_file,
 )
 
 
diff --git a/tests/agent/test_anthropic_adapter.py b/tests/agent/test_anthropic_adapter.py
index cfd6edeca65..7c7e8e33373 100644
--- a/tests/agent/test_anthropic_adapter.py
+++ b/tests/agent/test_anthropic_adapter.py
@@ -1188,16 +1188,27 @@ class TestBuildAnthropicKwargs:
         # params through its signature, we exercise the strip behavior by
         # calling the internal predicate directly.
         from agent.anthropic_adapter import _forbids_sampling_params
+        assert _forbids_sampling_params("claude-opus-4-8") is True
+        assert _forbids_sampling_params("claude-opus-4-8-fast") is True
         assert _forbids_sampling_params("claude-opus-4-7") is True
         assert _forbids_sampling_params("claude-opus-4-6") is False
         assert _forbids_sampling_params("claude-sonnet-4-5") is False
 
     def test_supports_fast_mode_predicate(self):
-        """Fast mode is Opus 4.6 only — Opus 4.7 and others must be excluded."""
+        """Fast mode is Opus 4.6 only — Opus 4.7 and others must be excluded.
+
+        For Opus 4.8 the fast variant is a separate model ID
+        (anthropic/claude-opus-4.8-fast) routed through the normal model
+        field, NOT via the ``speed: "fast"`` request parameter. So
+        ``_supports_fast_mode`` (which gates the parameter) must stay
+        False for both opus-4-8 and opus-4-8-fast.
+        """
         from agent.anthropic_adapter import _supports_fast_mode
         assert _supports_fast_mode("claude-opus-4-6") is True
         assert _supports_fast_mode("anthropic/claude-opus-4-6") is True
         assert _supports_fast_mode("claude-opus-4-7") is False
+        assert _supports_fast_mode("claude-opus-4-8") is False
+        assert _supports_fast_mode("claude-opus-4-8-fast") is False
         assert _supports_fast_mode("claude-sonnet-4-6") is False
         assert _supports_fast_mode("claude-haiku-4-5") is False
         assert _supports_fast_mode("") is False
diff --git a/tests/agent/test_anthropic_keychain.py b/tests/agent/test_anthropic_keychain.py
index c0f9c771824..44a458fdf72 100644
--- a/tests/agent/test_anthropic_keychain.py
+++ b/tests/agent/test_anthropic_keychain.py
@@ -1,10 +1,8 @@
 """Tests for Bug #12905 fixes in agent/anthropic_adapter.py — macOS Keychain support."""
 
 import json
-import platform
 from unittest.mock import patch, MagicMock
 
-import pytest
 
 from agent.anthropic_adapter import (
     _read_claude_code_credentials_from_keychain,
diff --git a/tests/agent/test_anthropic_mcp_prefix_strip.py b/tests/agent/test_anthropic_mcp_prefix_strip.py
index 102cbadca51..4806661497d 100644
--- a/tests/agent/test_anthropic_mcp_prefix_strip.py
+++ b/tests/agent/test_anthropic_mcp_prefix_strip.py
@@ -8,11 +8,9 @@ name in the tool registry.
 
 from __future__ import annotations
 
-import json
 from types import SimpleNamespace
 from unittest.mock import patch
 
-import pytest
 
 
 # ---------------------------------------------------------------------------
diff --git a/tests/agent/test_anthropic_oauth_pkce.py b/tests/agent/test_anthropic_oauth_pkce.py
index 5cf74d7a6a5..49045e94541 100644
--- a/tests/agent/test_anthropic_oauth_pkce.py
+++ b/tests/agent/test_anthropic_oauth_pkce.py
@@ -15,7 +15,6 @@ History:
 
 from __future__ import annotations
 
-import io
 import json
 from typing import Any, Dict
 from urllib.parse import parse_qs, urlparse
@@ -53,6 +52,13 @@ def _patch_oauth_flow(
         return True
 
     monkeypatch.setattr("webbrowser.open", fake_open)
+    # The flow now gates webbrowser.open() behind a graphical-browser check so
+    # it never launches a console browser (w3m/lynx) inside the terminal. Tests
+    # run headless, so force the GUI path to True — the URL capture relies on
+    # webbrowser.open() being invoked.
+    monkeypatch.setattr(
+        "hermes_cli.auth._can_open_graphical_browser", lambda: True
+    )
     monkeypatch.setattr("builtins.input", lambda *_a, **_kw: callback_code)
 
     class _FakeResponse:
diff --git a/tests/agent/test_async_utils.py b/tests/agent/test_async_utils.py
index 33ce84ee0c6..8354384c34e 100644
--- a/tests/agent/test_async_utils.py
+++ b/tests/agent/test_async_utils.py
@@ -8,7 +8,6 @@ import warnings
 from concurrent.futures import Future
 from unittest.mock import patch
 
-import pytest
 
 from agent.async_utils import safe_schedule_threadsafe
 
diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py
index 07d3688272c..97c3a7f6b66 100644
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@@ -1,10 +1,9 @@
 """Tests for agent.auxiliary_client resolution chain, provider overrides, and model overrides."""
 
+import base64
 import json
 import logging
-import os
 import time
-from pathlib import Path
 from types import SimpleNamespace
 from unittest.mock import patch, MagicMock, AsyncMock
 
@@ -31,6 +30,12 @@ from agent.auxiliary_client import (
 )
 
 
+def _jwt_with_claims(claims: dict) -> str:
+    header = base64.urlsafe_b64encode(b'{"alg":"none","typ":"JWT"}').decode().rstrip("=")
+    payload = base64.urlsafe_b64encode(json.dumps(claims).encode()).decode().rstrip("=")
+    return f"{header}.{payload}.sig"
+
+
 @pytest.fixture(autouse=True)
 def _clean_env(monkeypatch):
     """Strip provider env vars so each test starts clean."""
@@ -83,6 +88,62 @@ class TestAuxiliaryMaxTokensParam:
             assert auxiliary_max_tokens_param(2048) == {"max_completion_tokens": 2048}
 
 
+class TestBuildCallKwargsMaxTokens:
+    """_build_call_kwargs should not cap output by default (#34530).
+
+    Most chat-completions providers treat an omitted max_tokens as "use the
+    model max", which is what we want for auxiliary tasks. An explicit cap only
+    risks truncation or a wire-format 400 (GitHub Copilot / GPT-5 reject
+    max_tokens; ZAI vision rejects it entirely). The Anthropic Messages wire is
+    the one exception — max_tokens is a mandatory field there.
+    """
+
+    @pytest.mark.parametrize(
+        "provider,model,base_url",
+        [
+            ("copilot", "gpt-5.4", "https://api.githubcopilot.com"),
+            ("copilot", "gpt-5.5", "https://api.githubcopilot.com"),
+            ("custom", "gpt-5", "https://api.openai.com/v1"),
+            ("openrouter", "anthropic/claude-sonnet-4.6", "https://openrouter.ai/api/v1"),
+            ("nous", "hermes-4", "https://inference-api.nousresearch.com/v1"),
+            ("custom", "qwen", "http://localhost:8080/v1"),
+            ("zai", "glm-4v-flash", "https://open.bigmodel.cn/api/paas/v4"),
+        ],
+    )
+    def test_omits_max_tokens_for_openai_compatible(self, provider, model, base_url):
+        from agent.auxiliary_client import _build_call_kwargs
+
+        kwargs = _build_call_kwargs(
+            provider=provider,
+            model=model,
+            messages=[{"role": "user", "content": "hi"}],
+            max_tokens=1234,
+            base_url=base_url,
+        )
+        assert "max_tokens" not in kwargs
+        assert "max_completion_tokens" not in kwargs
+
+    @pytest.mark.parametrize(
+        "provider,model,base_url",
+        [
+            ("minimax", "minimax-m2", "https://api.minimax.io/v1"),
+            ("custom", "claude", "https://proxy.example.com/anthropic/v1"),
+        ],
+    )
+    def test_keeps_max_tokens_on_anthropic_wire(self, provider, model, base_url):
+        from agent.auxiliary_client import _build_call_kwargs
+
+        kwargs = _build_call_kwargs(
+            provider=provider,
+            model=model,
+            messages=[{"role": "user", "content": "hi"}],
+            max_tokens=1234,
+            base_url=base_url,
+        )
+        assert kwargs["max_tokens"] == 1234
+        assert "max_completion_tokens" not in kwargs
+
+
 class TestNormalizeAuxProvider:
     def test_maps_github_copilot_aliases(self):
         assert _normalize_aux_provider("github") == "copilot"
@@ -609,7 +670,7 @@ class TestExpiredCodexFallback:
         monkeypatch.setenv("ANTHROPIC_TOKEN", "sk-ant-oat01-test-fallback")
         with patch("agent.anthropic_adapter.build_anthropic_client") as mock_build:
             mock_build.return_value = MagicMock()
-            from agent.auxiliary_client import _resolve_auto, AnthropicAuxiliaryClient
+            from agent.auxiliary_client import _resolve_auto
             client, model = _resolve_auto()
             # Should NOT be Codex, should be Anthropic (or another available provider)
             assert not isinstance(client, type(None)), "Should find a provider after expired Codex"
@@ -696,7 +757,7 @@ class TestExpiredCodexFallback:
              patch("agent.anthropic_adapter.build_anthropic_client") as mock_build, \
              patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)):
             mock_build.return_value = MagicMock()
-            from agent.auxiliary_client import _try_anthropic, AnthropicAuxiliaryClient
+            from agent.auxiliary_client import _try_anthropic
             client, model = _try_anthropic()
             assert client is not None, "Should resolve token"
             adapter = client.chat.completions
@@ -751,7 +812,7 @@ class TestExpiredCodexFallback:
         monkeypatch.delenv("ANTHROPIC_TOKEN", raising=False)
         with patch("agent.anthropic_adapter.build_anthropic_client") as mock_build:
             mock_build.return_value = MagicMock()
-            from agent.auxiliary_client import _try_anthropic, AnthropicAuxiliaryClient
+            from agent.auxiliary_client import _try_anthropic
             client, model = _try_anthropic()
             assert client is not None
             adapter = client.chat.completions
@@ -889,9 +950,16 @@ class TestVisionClientFallback:
 
 class TestAuxiliaryPoolAwareness:
     def test_try_nous_uses_pool_entry(self):
+        pooled_token = _jwt_with_claims({
+            "scope": "inference:invoke",
+            "exp": int(time.time() + 3600),
+        })
+
         class _Entry:
             access_token = "pooled-access-token"
-            agent_key = "pooled-agent-key"
+            agent_key = pooled_token
+            agent_key_expires_at = "2099-01-01T00:00:00+00:00"
+            scope = "inference:invoke"
             inference_base_url = "https://inference.pool.example/v1"
 
         class _Pool:
@@ -912,7 +980,7 @@ class TestAuxiliaryPoolAwareness:
 
         assert client is not None
         assert model == "google/gemini-3-flash-preview"
-        assert mock_openai.call_args.kwargs["api_key"] == "pooled-agent-key"
+        assert mock_openai.call_args.kwargs["api_key"] == pooled_token
         assert mock_openai.call_args.kwargs["base_url"] == "https://inference.pool.example/v1"
 
     def test_try_nous_uses_portal_recommendation_for_text(self):
@@ -992,6 +1060,47 @@ class TestAuxiliaryPoolAwareness:
         assert stale_client.chat.completions.create.call_count == 1
         assert fresh_client.chat.completions.create.call_count == 1
 
+    def test_call_llm_refreshes_nous_after_free_tier_block_when_account_paid(self):
+        from hermes_cli.nous_account import NousPortalAccountInfo
+
+        class _Payment404(Exception):
+            status_code = 404
+
+        stale_client = MagicMock()
+        stale_client.base_url = "https://inference-api.nousresearch.com/v1"
+        stale_client.chat.completions.create.side_effect = _Payment404(
+            "model_not_supported_on_free_tier: model is not available on the free tier"
+        )
+
+        fresh_client = MagicMock()
+        fresh_client.base_url = "https://inference-api.nousresearch.com/v1"
+        fresh_client.chat.completions.create.return_value = {"ok": True}
+
+        with (
+            patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("nous", "nous-model", None, None, None)),
+            patch("agent.auxiliary_client._get_cached_client", return_value=(stale_client, "nous-model")),
+            patch("agent.auxiliary_client.OpenAI", return_value=fresh_client),
+            patch("agent.auxiliary_client._validate_llm_response", side_effect=lambda resp, _task: resp),
+            patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", "https://inference-api.nousresearch.com/v1")),
+            patch(
+                "hermes_cli.nous_account.get_nous_portal_account_info",
+                return_value=NousPortalAccountInfo(
+                    logged_in=True,
+                    source="account_api",
+                    fresh=True,
+                    paid_service_access=True,
+                ),
+            ),
+        ):
+            result = call_llm(
+                task="compression",
+                messages=[{"role": "user", "content": "hi"}],
+            )
+
+        assert result == {"ok": True}
+        assert stale_client.chat.completions.create.call_count == 1
+        assert fresh_client.chat.completions.create.call_count == 1
+
     @pytest.mark.asyncio
     async def test_async_call_llm_retries_nous_after_401(self):
         class _Auth401(Exception):
@@ -1021,6 +1130,48 @@ class TestAuxiliaryPoolAwareness:
         assert stale_client.chat.completions.create.await_count == 1
         assert fresh_async_client.chat.completions.create.await_count == 1
 
+    @pytest.mark.asyncio
+    async def test_async_call_llm_refreshes_nous_after_free_tier_block_when_account_paid(self):
+        from hermes_cli.nous_account import NousPortalAccountInfo
+
+        class _Payment404(Exception):
+            status_code = 404
+
+        stale_client = MagicMock()
+        stale_client.base_url = "https://inference-api.nousresearch.com/v1"
+        stale_client.chat.completions.create = AsyncMock(side_effect=_Payment404(
+            "model_not_supported_on_free_tier: model is not available on the free tier"
+        ))
+
+        fresh_async_client = MagicMock()
+        fresh_async_client.base_url = "https://inference-api.nousresearch.com/v1"
+        fresh_async_client.chat.completions.create = AsyncMock(return_value={"ok": True})
+
+        with (
+            patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("nous", "nous-model", None, None, None)),
+            patch("agent.auxiliary_client._get_cached_client", return_value=(stale_client, "nous-model")),
+            patch("agent.auxiliary_client._to_async_client", return_value=(fresh_async_client, "nous-model")),
+            patch("agent.auxiliary_client._validate_llm_response", side_effect=lambda resp, _task: resp),
+            patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", "https://inference-api.nousresearch.com/v1")),
+            patch(
+                "hermes_cli.nous_account.get_nous_portal_account_info",
+                return_value=NousPortalAccountInfo(
+                    logged_in=True,
+                    source="account_api",
+                    fresh=True,
+                    paid_service_access=True,
+                ),
+            ),
+        ):
+            result = await async_call_llm(
+                task="session_search",
+                messages=[{"role": "user", "content": "hi"}],
+            )
+
+        assert result == {"ok": True}
+        assert stale_client.chat.completions.create.await_count == 1
+        assert fresh_async_client.chat.completions.create.await_count == 1
+
     def test_cached_gmi_client_keeps_explicit_slash_model_override(self):
         import agent.auxiliary_client as aux
 
@@ -1076,6 +1227,19 @@ class TestIsPaymentError:
         exc.status_code = 429
         assert _is_payment_error(exc) is True
 
+    def test_404_free_tier_model_block_is_payment(self):
+        exc = Exception(
+            "Model 'gpt-5' is not available on the Free Tier. "
+            "Upgrade at https://portal.nousresearch.com or pick a free model."
+        )
+        exc.status_code = 404
+        assert _is_payment_error(exc) is True
+
+    def test_404_generic_not_found_is_not_payment(self):
+        exc = Exception("Not Found")
+        exc.status_code = 404
+        assert _is_payment_error(exc) is False
+
     def test_429_without_credits_message_is_not_payment(self):
         """Normal rate limits should NOT be treated as payment errors."""
         exc = Exception("Rate limit exceeded, try again in 2 seconds")
diff --git a/tests/agent/test_auxiliary_client_azure_foundry.py b/tests/agent/test_auxiliary_client_azure_foundry.py
index dea08a5caa2..f3e06e5a513 100644
--- a/tests/agent/test_auxiliary_client_azure_foundry.py
+++ b/tests/agent/test_auxiliary_client_azure_foundry.py
@@ -27,7 +27,6 @@ from __future__ import annotations
 
 import sys
 from types import SimpleNamespace
-from unittest.mock import MagicMock, patch
 
 import pytest
 
diff --git a/tests/agent/test_auxiliary_client_xai_oauth_recovery.py b/tests/agent/test_auxiliary_client_xai_oauth_recovery.py
new file mode 100644
index 00000000000..3434a68d80d
--- /dev/null
+++ b/tests/agent/test_auxiliary_client_xai_oauth_recovery.py
@@ -0,0 +1,153 @@
+"""Tests for xAI OAuth 403 error recovery in auxiliary_client.
+
+xAI returns HTTP 403 (not 401) with "unauthenticated:bad-credentials" when
+an OAuth2 access token has expired.  These tests verify the three fixes:
+
+1. _is_auth_error detects xAI 403 as an auth failure
+2. _recoverable_pool_provider maps api.x.ai to xai-oauth
+3. _refresh_provider_credentials includes xai-oauth refresh logic
+"""
+
+import pytest
+
+
+# ── _is_auth_error ──────────────────────────────────────────────────────────
+
+def _import_is_auth_error():
+    from agent.auxiliary_client import _is_auth_error
+    return _is_auth_error
+
+
+class TestIsAuthErrorXaiOauth403:
+    """Verify _is_auth_error correctly identifies xAI's 403 bad-credentials."""
+
+    @pytest.fixture(autouse=True)
+    def _import(self):
+        self.is_auth_error = _import_is_auth_error()
+
+    def test_xai_403_bad_credentials_is_auth_error(self):
+        """The exact error xAI returns for expired OAuth tokens."""
+        exc = Exception(
+            "Error code: 403 - {'code': 'The caller does not have permission "
+            "to execute the specified operation', 'error': 'The OAuth2 access "
+            "token could not be validated. [WKE=unauthenticated:bad-credentials]'}"
+        )
+        exc.status_code = 403  # openai.PermissionDenied sets this
+        assert self.is_auth_error(exc) is True
+
+    def test_xai_403_bad_credentials_without_status_code(self):
+        """Fallback match when status_code attribute is missing."""
+        exc = Exception(
+            "Error code: 403 - unauthenticated:bad-credentials"
+        )
+        # No status_code attribute — should still match via string pattern
+        assert self.is_auth_error(exc) is True
+
+    def test_generic_403_is_not_auth_error(self):
+        """A generic 403 (e.g. rate limit, forbidden) should NOT be treated as auth."""
+        exc = Exception("Error code: 403 - rate limit exceeded")
+        exc.status_code = 403
+        assert self.is_auth_error(exc) is False
+
+    def test_401_status_code_is_auth_error(self):
+        """Existing 401 detection still works."""
+        exc = Exception("Unauthorized")
+        exc.status_code = 401
+        assert self.is_auth_error(exc) is True
+
+    def test_401_string_is_auth_error(self):
+        """Existing string-based 401 detection still works."""
+        exc = Exception("Error code: 401 - Unauthorized")
+        assert self.is_auth_error(exc) is True
+
+    def test_authentication_error_class_is_auth_error(self):
+        """Existing AuthenticationError class detection still works."""
+        exc_type = type("AuthenticationError", (Exception,), {})
+        exc = exc_type("auth failure")
+        assert self.is_auth_error(exc) is True
+
+    def test_permission_denied_without_bad_credentials_is_not_auth_error(self):
+        """403 PermissionDenied without bad-credentials should not be auth."""
+        exc = Exception("Error code: 403 - Permission denied")
+        exc.status_code = 403
+        assert self.is_auth_error(exc) is False
+
+    def test_500_is_not_auth_error(self):
+        """Server errors are not auth errors."""
+        exc = Exception("Error code: 500 - Internal server error")
+        exc.status_code = 500
+        assert self.is_auth_error(exc) is False
+
+    def test_unauthenticated_without_bad_credentials_is_not_auth_error(self):
+        """'unauthenticated' alone (without 'bad-credentials') should not match."""
+        exc = Exception("unauthenticated request")
+        assert self.is_auth_error(exc) is False
+
+
+# ── _recoverable_pool_provider ──────────────────────────────────────────────
+
+def _import_recoverable_pool_provider():
+    from agent.auxiliary_client import _recoverable_pool_provider
+    return _recoverable_pool_provider
+
+
+class TestRecoverablePoolProviderXaiOAuth:
+    """Verify _recoverable_pool_provider maps api.x.ai to xai-oauth."""
+
+    @pytest.fixture(autouse=True)
+    def _import(self):
+        self.recover = _import_recoverable_pool_provider()
+
+    def test_explicit_xai_oauth_provider(self):
+        """Explicit provider name passes through."""
+        result = self.recover("xai-oauth", None)
+        assert result == "xai-oauth"
+
+    def test_api_x_ai_host_match(self):
+        """api.x.ai base URL maps to xai-oauth pool."""
+        class MockClient:
+            base_url = "https://api.x.ai/v1/"
+
+        result = self.recover("auto", MockClient())
+        assert result == "xai-oauth"
+
+    def test_auto_with_unknown_host_returns_none(self):
+        """auto provider with unknown host returns None."""
+        class MockClient:
+            base_url = "https://unknown.example.com/v1/"
+
+        result = self.recover("auto", MockClient())
+        assert result is None
+
+
+# ── _refresh_provider_credentials (structure check) ─────────────────────────
+
+def _import_refresh_provider_credentials():
+    from agent.auxiliary_client import _refresh_provider_credentials
+    return _refresh_provider_credentials
+
+
+class TestRefreshProviderCredentialsXaiOAuth:
+    """Verify _refresh_provider_credentials has xai-oauth branch.
+
+    Full integration testing requires live OAuth tokens, so we verify
+    the branch exists and handles the no-credential case gracefully.
+    """
+
+    @pytest.fixture(autouse=True)
+    def _import(self):
+        self.refresh = _import_refresh_provider_credentials()
+
+    def test_xai_oauth_no_pool_returns_false(self):
+        """When no xai-oauth pool exists, refresh returns False gracefully."""
+        # This tests that the branch exists and doesn't crash.
+        # It may return True if the singleton resolver finds tokens,
+        # or False if neither pool nor singleton has credentials.
+        # Either way, it should not raise an exception.
+        result = self.refresh("xai-oauth")
+        assert isinstance(result, bool)
+
+    def test_unknown_provider_returns_false(self):
+        """Unknown providers fall through to return False."""
+        result = self.refresh("unknown-provider-xyz")
+        assert result is False
\ No newline at end of file
diff --git a/tests/agent/test_auxiliary_config_bridge.py b/tests/agent/test_auxiliary_config_bridge.py
index 3215303b5c2..b2727d33608 100644
--- a/tests/agent/test_auxiliary_config_bridge.py
+++ b/tests/agent/test_auxiliary_config_bridge.py
@@ -4,14 +4,11 @@ are properly mapped to environment variables by both CLI and gateway loaders.
 Also tests the vision_tools and browser_tool model override env vars.
 """
 
-import json
 import os
 import sys
 from pathlib import Path
 from unittest.mock import patch, MagicMock
 
-import pytest
-import yaml
 
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
 
diff --git a/tests/agent/test_auxiliary_main_first.py b/tests/agent/test_auxiliary_main_first.py
index d1b758c2884..7854313293e 100644
--- a/tests/agent/test_auxiliary_main_first.py
+++ b/tests/agent/test_auxiliary_main_first.py
@@ -15,7 +15,6 @@ from __future__ import annotations
 
 from unittest.mock import MagicMock, patch
 
-import pytest
 
 
 # ── Text aux tasks — _resolve_auto ──────────────────────────────────────────
diff --git a/tests/agent/test_auxiliary_named_custom_providers.py b/tests/agent/test_auxiliary_named_custom_providers.py
index 52c85998e3d..afceeec02d9 100644
--- a/tests/agent/test_auxiliary_named_custom_providers.py
+++ b/tests/agent/test_auxiliary_named_custom_providers.py
@@ -1,6 +1,5 @@
 """Tests for named custom provider and 'main' alias resolution in auxiliary_client."""
 
-import os
 from unittest.mock import patch, MagicMock
 
 import pytest
diff --git a/tests/agent/test_azure_identity_adapter.py b/tests/agent/test_azure_identity_adapter.py
index a569709e00d..c63caf4eace 100644
--- a/tests/agent/test_azure_identity_adapter.py
+++ b/tests/agent/test_azure_identity_adapter.py
@@ -23,7 +23,6 @@ import sys
 from collections.abc import Callable
 from types import SimpleNamespace
 from typing import cast
-from unittest.mock import MagicMock, patch
 
 import pytest
 
diff --git a/tests/agent/test_bedrock_adapter.py b/tests/agent/test_bedrock_adapter.py
index 04c0913f289..5f98fe5cf78 100644
--- a/tests/agent/test_bedrock_adapter.py
+++ b/tests/agent/test_bedrock_adapter.py
@@ -10,11 +10,9 @@ Covers:
 """
 
 import json
-import os
-import time
 from contextlib import contextmanager
-from types import ModuleType, SimpleNamespace
-from unittest.mock import MagicMock, patch, PropertyMock
+from types import ModuleType
+from unittest.mock import MagicMock, patch
 
 import pytest
 
@@ -129,7 +127,7 @@ class TestResolveBedrocRegion:
 
     def test_defaults_to_us_east_1(self):
         from agent.bedrock_adapter import resolve_bedrock_region
-        from unittest.mock import patch, MagicMock
+        from unittest.mock import MagicMock
         mock_session = MagicMock()
         mock_session.get_config_variable.return_value = None
         with _mock_botocore_session(return_value=mock_session):
@@ -137,7 +135,7 @@ class TestResolveBedrocRegion:
 
     def test_falls_back_to_botocore_profile_region(self):
         from agent.bedrock_adapter import resolve_bedrock_region
-        from unittest.mock import patch, MagicMock
+        from unittest.mock import MagicMock
         mock_session = MagicMock()
         mock_session.get_config_variable.return_value = "eu-central-1"
         with _mock_botocore_session(return_value=mock_session):
@@ -145,7 +143,6 @@ class TestResolveBedrocRegion:
 
     def test_botocore_failure_falls_back_to_us_east_1(self):
         from agent.bedrock_adapter import resolve_bedrock_region
-        from unittest.mock import patch
         with _mock_botocore_session(side_effect=Exception("no botocore")):
             assert resolve_bedrock_region({}) == "us-east-1"
 
diff --git a/tests/agent/test_bedrock_integration.py b/tests/agent/test_bedrock_integration.py
index a5ab3563381..65df149e528 100644
--- a/tests/agent/test_bedrock_integration.py
+++ b/tests/agent/test_bedrock_integration.py
@@ -9,7 +9,6 @@ Note: Tests that import ``hermes_cli.auth`` or ``hermes_cli.runtime_provider``
 require Python 3.10+ due to ``str | None`` type syntax in the import chain.
 """
 
-import os
 from unittest.mock import MagicMock, patch
 
 import pytest
@@ -93,7 +92,6 @@ class TestResolveProvider:
 
     def test_explicit_bedrock_resolves(self, monkeypatch):
         """When user explicitly requests 'bedrock', it should resolve."""
-        from hermes_cli.auth import PROVIDER_REGISTRY
         # bedrock is in the registry, so resolve_provider should return it
         from hermes_cli.auth import resolve_provider
         result = resolve_provider("bedrock")
diff --git a/tests/agent/test_codex_cloudflare_headers.py b/tests/agent/test_codex_cloudflare_headers.py
index 2d9633a8039..fc52b78e886 100644
--- a/tests/agent/test_codex_cloudflare_headers.py
+++ b/tests/agent/test_codex_cloudflare_headers.py
@@ -29,7 +29,6 @@ import base64
 import json
 from unittest.mock import MagicMock, patch
 
-import pytest
 
 
 # ---------------------------------------------------------------------------
diff --git a/tests/agent/test_codex_responses_adapter.py b/tests/agent/test_codex_responses_adapter.py
index 751348bc6da..db3316a0567 100644
--- a/tests/agent/test_codex_responses_adapter.py
+++ b/tests/agent/test_codex_responses_adapter.py
@@ -1,6 +1,11 @@
 from types import SimpleNamespace
 
-from agent.codex_responses_adapter import _normalize_codex_response
+import pytest
+
+from agent.codex_responses_adapter import (
+    _format_responses_error,
+    _normalize_codex_response,
+)
 
 
 def test_normalize_codex_response_drops_transient_rs_tmp_reasoning_items():
@@ -61,3 +66,111 @@ def test_normalize_codex_response_treats_summary_only_reasoning_as_incomplete():
     assert assistant_message.content == ""
     assert assistant_message.reasoning == "still thinking"
     assert assistant_message.codex_reasoning_items is None
+
+
+# ---------------------------------------------------------------------------
+# _format_responses_error — adapted from anomalyco/opencode#28757.
+# Provider failures should surface BOTH the code (rate_limit_exceeded /
+# context_length_exceeded / internal_error / server_error) and the message,
+# so consumers can tell rate limits apart from context-length failures and
+# both apart from generic stream drops.
+# ---------------------------------------------------------------------------
+
+
+def test_format_responses_error_combines_code_and_message():
+    err = {"code": "rate_limit_exceeded", "message": "Slow down"}
+    assert _format_responses_error(err, "failed") == "rate_limit_exceeded: Slow down"
+
+
+def test_format_responses_error_message_only():
+    err = {"message": "Upstream model unavailable"}
+    assert _format_responses_error(err, "failed") == "Upstream model unavailable"
+
+
+def test_format_responses_error_code_only_when_message_empty():
+    # Some providers/proxies emit a code with an empty message body. We
+    # used to fall back to ``str(error_obj)`` — a dict dump — which leaked
+    # ``{'code': 'internal_error', 'message': ''}`` into chat output. Now
+    # the bare code is surfaced, which is the meaningful field.
+    err = {"code": "internal_error", "message": ""}
+    assert _format_responses_error(err, "failed") == "internal_error"
+
+
+def test_format_responses_error_code_only_when_message_missing():
+    err = {"code": "server_error"}
+    assert _format_responses_error(err, "failed") == "server_error"
+
+
+def test_format_responses_error_attribute_style_payload():
+    # SDK objects expose ``code``/``message`` as attributes rather than dict
+    # keys. The helper must accept both shapes since the Responses SDK
+    # returns SimpleNamespace-style objects on ``response.failed``.
+    err = SimpleNamespace(code="context_length_exceeded", message="too long")
+    assert _format_responses_error(err, "failed") == "context_length_exceeded: too long"
+
+
+def test_format_responses_error_falls_back_to_status_when_empty():
+    assert (
+        _format_responses_error(None, "failed")
+        == "Responses API returned status 'failed'"
+    )
+    assert (
+        _format_responses_error(None, "cancelled")
+        == "Responses API returned status 'cancelled'"
+    )
+
+
+def test_format_responses_error_stringifies_opaque_payload():
+    # Last-resort: a provider sent something that isn't a dict and has no
+    # code/message attributes. Surface its repr rather than swallow it
+    # silently — at least it's visible in logs.
+    assert _format_responses_error("opaque sentinel", "failed") == "opaque sentinel"
+
+
+def test_format_responses_error_ignores_non_string_code_message():
+    # Defensive: a malformed gateway could send numbers/objects in these
+    # fields. We don't want to crash; we want a best-effort string.
+    err = {"code": 500, "message": None}
+    assert _format_responses_error(err, "failed") == "500"
+
+
+def test_normalize_codex_response_failed_includes_code_in_error():
+    """Regression: response_status == 'failed' should surface the error
+    code, not just the message. Used to leak a bare 'Slow down' string
+    that was indistinguishable from a generic stream truncation."""
+    # ``output`` non-empty so we don't trip the "no output items" guard
+    # before reaching the failed-status branch. Real failed responses
+    # often DO carry a partial message item alongside the error.
+    response = SimpleNamespace(
+        status="failed",
+        output=[
+            SimpleNamespace(
+                type="message",
+                role="assistant",
+                status="incomplete",
+                content=[SimpleNamespace(type="output_text", text="partial")],
+            ),
+        ],
+        error={"code": "rate_limit_exceeded", "message": "Slow down"},
+    )
+    with pytest.raises(RuntimeError, match=r"^rate_limit_exceeded: Slow down$"):
+        _normalize_codex_response(response)
+
+
+def test_normalize_codex_response_failed_with_message_only():
+    """Backwards-compat: a failed response with only a message field
+    (no code) should still surface that message verbatim."""
+    response = SimpleNamespace(
+        status="failed",
+        output=[
+            SimpleNamespace(
+                type="message",
+                role="assistant",
+                status="incomplete",
+                content=[SimpleNamespace(type="output_text", text="partial")],
+            ),
+        ],
+        error={"message": "model error"},
+    )
+    with pytest.raises(RuntimeError, match=r"^model error$"):
+        _normalize_codex_response(response)
diff --git a/tests/agent/test_codex_ttfb_watchdog.py b/tests/agent/test_codex_ttfb_watchdog.py
index 57466a81834..d989d69d1e3 100644
--- a/tests/agent/test_codex_ttfb_watchdog.py
+++ b/tests/agent/test_codex_ttfb_watchdog.py
@@ -102,6 +102,47 @@ def test_ttfb_kills_when_no_stream_event(tmp_path, monkeypatch):
         stop["flag"] = True
 
 
+def test_ttfb_default_tolerates_slow_first_event(tmp_path, monkeypatch):
+    """With no env var set, the no-byte TTFB default is generous (120s), so a
+    request whose first stream event is merely slow (~2s of backend admission /
+    prefill) is NOT killed. This is the subscription-backed Codex case the tight
+    12s default used to abort mid-prefill."""
+    from agent import chat_completion_helpers as h
+
+    agent = _make_codex_agent(tmp_path, monkeypatch)
+    # Default behavior: no explicit TTFB override.
+    monkeypatch.delenv("HERMES_CODEX_TTFB_TIMEOUT_SECONDS", raising=False)
+    monkeypatch.delenv("HERMES_CODEX_TTFB_MAX_SECONDS", raising=False)
+
+    closes: list = []
+    dummy_client = SimpleNamespace()
+    monkeypatch.setattr(agent, "_create_request_openai_client", lambda **k: dummy_client)
+    monkeypatch.setattr(
+        agent, "_abort_request_openai_client",
+        lambda c, reason=None: closes.append(reason),
+    )
+    monkeypatch.setattr(
+        agent, "_close_request_openai_client",
+        lambda c, reason=None: closes.append(reason),
+    )
+
+    sentinel = SimpleNamespace(ok=True)
+
+    def fake_slow_first_event(api_kwargs, client=None, on_first_delta=None):
+        # Backend is alive but slow to admit: first event lands after ~2s,
+        # well under the 120s default cutoff. Mark the first byte so the
+        # no-byte detector sees activity, then return the response.
+        time.sleep(2.0)
+        agent._codex_stream_last_event_ts = time.time()
+        return sentinel
+
+    monkeypatch.setattr(agent, "_run_codex_stream", fake_slow_first_event)
+
+    resp = h.interruptible_api_call(agent, {"model": "gpt-5.5", "input": "hi"})
+    assert resp is sentinel
+    assert "codex_ttfb_kill" not in closes
+
+
 def test_ttfb_includes_silent_hang_hint_for_gpt_5_5(tmp_path, monkeypatch):
     """The no-first-byte watchdog should surface the same actionable hint as the
     stale-call timeout path when the model matches the silent-hang heuristic."""
@@ -114,6 +155,7 @@ def test_ttfb_includes_silent_hang_hint_for_gpt_5_5(tmp_path, monkeypatch):
     statuses: list[str] = []
     dummy_client = SimpleNamespace()
     monkeypatch.setattr(agent, "_create_request_openai_client", lambda **k: dummy_client)
+    monkeypatch.setattr(agent, "_buffer_status", lambda msg: statuses.append(msg))
     monkeypatch.setattr(agent, "_emit_status", lambda msg: statuses.append(msg))
     monkeypatch.setattr(
         agent, "_abort_request_openai_client",
diff --git a/tests/agent/test_compression_concurrent_fork.py b/tests/agent/test_compression_concurrent_fork.py
new file mode 100644
index 00000000000..76e8a459258
--- /dev/null
+++ b/tests/agent/test_compression_concurrent_fork.py
@@ -0,0 +1,240 @@
+"""Regression: prevent transcript fork when two paths compress the same session_id.
+
+Damien's incident (Discord, 2026-05-28): a long Hermes session in a Discord
+gateway hit the compression threshold at the end of a turn.  The parent agent
+finished delivering the response and ``conversation_loop.py`` fired
+``_spawn_background_review(...)`` — which builds a forked ``AIAgent`` that
+inherits ``agent.session_id`` (see ``agent/background_review.py``::
+``review_agent.session_id = agent.session_id``).  Roughly two seconds later
+a synthetic ``Background process proc_… completed`` event arrived and
+started a fresh turn on the same parent ``session_id`` (still cached in the
+gateway's ``SessionEntry``).  Both paths hit preflight compression on the
+same parent transcript and called ``_compress_context`` concurrently.  Each
+ended the parent and created its own CHILD session in ``state.db``, both
+parented to the same old id.  The gateway's ``SessionEntry`` only caught one
+rotation; the other child became an orphan that silently accumulated writes.
+
+Repro shape on Damien's machine:
+
+  parent 20260527_234659_e65f0e  ended_at=set  end_reason='compression'
+  child  20260528_113619_fc80e1  parent=20260527_234659_e65f0e  (in SessionEntry)
+  child  <orphan>                parent=20260527_234659_e65f0e  (silent writes)
+
+This regression simulates the two concurrent ``compress_context`` calls
+against a shared ``state.db`` and asserts that the per-session compression
+lock added in this PR prevents the orphan child.  Without the lock the
+fixture deterministically produces 2 children; with the lock, exactly 1.
+"""
+
+from __future__ import annotations
+
+import os
+import threading
+import time
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from hermes_state import SessionDB
+
+
+def _build_agent_with_db(db: SessionDB, session_id: str):
+    """Build an AIAgent that's wired to ``db`` and pinned to ``session_id``."""
+    with patch.dict(os.environ, {"OPENROUTER_API_KEY": "test-key"}):
+        from run_agent import AIAgent
+
+        agent = AIAgent(
+            api_key="test-key",
+            base_url="https://openrouter.ai/api/v1",
+            model="test/model",
+            quiet_mode=True,
+            session_db=db,
+            session_id=session_id,
+            skip_context_files=True,
+            skip_memory=True,
+        )
+
+    # Stub the compressor so it returns deterministic output and DOESN'T make
+    # an LLM call.  Sleep inside compress() so the two threads' rotations
+    # actually overlap — without that the OS could happen to serialize them
+    # and hide the bug.
+    compressor = MagicMock()
+
+    def _compress_with_overlap(*_a, **_kw):
+        time.sleep(0.25)
+        return [
+            {"role": "user", "content": "[CONTEXT COMPACTION] summary"},
+            {"role": "user", "content": "tail"},
+        ]
+
+    compressor.compress.side_effect = _compress_with_overlap
+    compressor.compression_count = 1
+    compressor.last_prompt_tokens = 0
+    compressor.last_completion_tokens = 0
+    compressor._last_summary_error = None
+    compressor._last_compress_aborted = False
+    compressor._last_aux_model_failure_model = None
+    compressor._last_aux_model_failure_error = None
+    agent.context_compressor = compressor
+    return agent
+
+
+def _count_children(db: SessionDB, parent_sid: str) -> int:
+    """Count rows in state.db whose parent_session_id == parent_sid."""
+    rows = db._conn.execute(
+        "SELECT id FROM sessions WHERE parent_session_id = ?",
+        (parent_sid,),
+    ).fetchall()
+    return len(rows)
+
+
+def test_concurrent_compression_does_not_fork_session(tmp_path: Path) -> None:
+    """Two AIAgents that share a session_id MUST NOT both rotate it.
+
+    Without the per-session compression lock this fixture deterministically
+    produces 2 child sessions (transcript fork).  With the lock the second
+    path aborts cleanly, leaving exactly 1 canonical child.
+    """
+    db = SessionDB(db_path=tmp_path / "state.db")
+
+    parent_sid = "PARENT_TEST_SESSION"
+    db.create_session(parent_sid, source="discord")
+
+    # Two agents on the same session_id, both wired to the same db —
+    # mirrors the parent-turn agent + the background-review fork right
+    # after a turn ends.
+    agent_a = _build_agent_with_db(db, parent_sid)
+    agent_b = _build_agent_with_db(db, parent_sid)
+    messages = [{"role": "user", "content": f"m{i}"} for i in range(20)]
+
+    def run(agent):
+        try:
+            agent._compress_context(messages, "sys", approx_tokens=120_000)
+        except Exception:
+            # Surface to the test if either raises — should not happen.
+            raise
+
+    t_a = threading.Thread(target=run, args=(agent_a,), name="main_turn")
+    t_b = threading.Thread(target=run, args=(agent_b,), name="review_fork")
+    t_a.start()
+    t_b.start()
+    t_a.join(timeout=10)
+    t_b.join(timeout=10)
+
+    # Exactly one canonical child — not two orphans.
+    assert _count_children(db, parent_sid) == 1, (
+        "Compression lock failed: parent session has multiple children in state.db "
+        "(transcript fork). This is Damien's incident shape — see the test docstring."
+    )
+
+    # And exactly one of the two agents actually rotated its session_id; the
+    # other should still hold the parent_sid (its compression was skipped).
+    rotated = sum(
+        1 for a in (agent_a, agent_b) if a.session_id != parent_sid
+    )
+    assert rotated == 1, (
+        f"Expected exactly one agent to rotate session_id, got {rotated}. "
+        "Both agents rotating means the lock didn't serialize them."
+    )
+
+    # The lock must be released after the winner finished.
+    assert db.get_compression_lock_holder(parent_sid) is None, (
+        "Compression lock leaked: still held after both rotations completed."
+    )
+
+
+def test_skipped_compression_returns_messages_unchanged(tmp_path: Path) -> None:
+    """The loser of the lock race must return its input messages verbatim.
+
+    Callers (preflight compression in ``conversation_loop.py``) detect the
+    no-op via ``len(returned) == len(input)`` and stop the auto-compress
+    retry loop.  If the skipped path returned the compressed view, that
+    detection would break and the caller would mutate the conversation
+    without going through state.db rotation.
+    """
+    db = SessionDB(db_path=tmp_path / "state.db")
+    parent_sid = "LOSER_TEST"
+    db.create_session(parent_sid, source="discord")
+
+    # Pre-acquire the lock so the agent's compress_context sees it held.
+    held = db.try_acquire_compression_lock(parent_sid, "external_holder")
+    assert held is True
+
+    agent = _build_agent_with_db(db, parent_sid)
+    messages = [{"role": "user", "content": "m1"}, {"role": "user", "content": "m2"}]
+
+    compressed, _sp = agent._compress_context(messages, "sys", approx_tokens=120_000)
+
+    # Skipped: messages returned verbatim, no rotation
+    assert compressed is messages or compressed == messages
+    assert agent.session_id == parent_sid
+    # Compressor was never called (the skip happens before .compress())
+    agent.context_compressor.compress.assert_not_called()
+
+
+class _NoLockSubsystemDB:
+    """Wraps a real SessionDB but simulates a pre-#34351 version skew.
+
+    A long-lived process can hold ``hermes_state.SessionDB`` bound to the
+    OLD class in memory (no compression-lock methods) while a lazily
+    re-imported ``conversation_compression.py`` calls the NEW lock code.
+    ``try_acquire_compression_lock`` then raises ``AttributeError`` — which
+    is NOT a ``sqlite3.Error``, so the method's own fail-open guard never
+    runs.  Before the fix the exception propagated to the outer agent loop,
+    which printed the error and retried; compression never succeeded, the
+    token count never dropped, and the loop re-triggered compaction forever.
+    """
+
+    def __init__(self, real_db: SessionDB) -> None:
+        self._real = real_db
+
+    def try_acquire_compression_lock(self, *_a, **_k):  # noqa: D401
+        raise AttributeError(
+            "'SessionDB' object has no attribute 'try_acquire_compression_lock'"
+        )
+
+    def get_compression_lock_holder(self, *_a, **_k):
+        raise AttributeError("'SessionDB' object has no attribute 'get_compression_lock_holder'")
+
+    def release_compression_lock(self, *_a, **_k):
+        raise AttributeError("'SessionDB' object has no attribute 'release_compression_lock'")
+
+    def __getattr__(self, name):
+        # Everything else (create_session, append, rotation helpers) goes to
+        # the real db so the post-lock compression + rotation path runs.
+        return getattr(self._real, name)
+
+
+def test_missing_lock_subsystem_fails_open_not_infinite_loop(tmp_path: Path) -> None:
+    """Version skew (no lock methods) must fail OPEN, not raise into the loop.
+
+    Reproduces the "API call #47/#48/#49 ... has no attribute
+    try_acquire_compression_lock" infinite-compaction spin: when the lock
+    subsystem is absent, ``_compress_context`` must skip locking and proceed
+    with compression (so the loop makes progress and terminates) instead of
+    letting the ``AttributeError`` escape to the retry loop.
+    """
+    db = SessionDB(db_path=tmp_path / "state.db")
+    parent_sid = "SKEW_TEST_SESSION"
+    db.create_session(parent_sid, source="discord")
+
+    agent = _build_agent_with_db(db, parent_sid)
+    # Swap in the lock-less wrapper AFTER construction (the agent already
+    # holds a normal db reference; we only break the lock methods).
+    agent._session_db = _NoLockSubsystemDB(db)
+
+    messages = [{"role": "user", "content": f"m{i}"} for i in range(20)]
+
+    # MUST NOT raise AttributeError. Before the fix this raised and the
+    # outer loop would retry forever.
+    compressed, _sp = agent._compress_context(messages, "sys", approx_tokens=120_000)
+
+    # Compression actually ran (proceeded past the broken lock) and made
+    # progress, so the auto-compress loop would terminate.
+    agent.context_compressor.compress.assert_called_once()
+    assert len(compressed) < len(messages), (
+        "Compression made no progress despite failing open — loop would still spin."
+    )
+    # Session rotated (compression succeeded end-to-end).
+    assert agent.session_id != parent_sid
diff --git a/tests/agent/test_compressor_image_tokens.py b/tests/agent/test_compressor_image_tokens.py
index 83198e5de90..73492eb8061 100644
--- a/tests/agent/test_compressor_image_tokens.py
+++ b/tests/agent/test_compressor_image_tokens.py
@@ -8,7 +8,6 @@ creative workflows that iterate on images across many turns.
 
 from __future__ import annotations
 
-import pytest
 
 from agent.context_compressor import (
     _CHARS_PER_TOKEN,
diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py
index dca10bb4462..0d7aa81f41f 100644
--- a/tests/agent/test_context_compressor.py
+++ b/tests/agent/test_context_compressor.py
@@ -67,6 +67,7 @@ class TestCompress:
     def test_truncation_fallback_no_client(self, compressor):
         # Simulate "no summarizer available" explicitly. call_llm can otherwise
         # discover the developer's real auxiliary credentials from auth state.
+        # The failed summary should use the deterministic fallback path.
         msgs = [{"role": "system", "content": "System prompt"}] + self._make_messages(10)
         with patch("agent.context_compressor.call_llm", side_effect=RuntimeError("no provider")):
             result = compressor.compress(msgs)
@@ -78,6 +79,64 @@ class TestCompress:
         assert compressor._last_compress_aborted is False
         assert compressor._last_summary_fallback_used is True
 
+    def test_summary_failure_uses_deterministic_fallback_with_recovered_context(self):
+        """Regression: failed LLM summaries should not emit a content-free marker.
+
+        The fallback should preserve locally recoverable continuity details so a
+        future turn does not see only "messages were removed" after compaction.
+        """
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
+            c = ContextCompressor(
+                model="test/model",
+                protect_first_n=1,
+                protect_last_n=2,
+                quiet_mode=True,
+            )
+
+        msgs = [
+            {"role": "system", "content": "System prompt"},
+            {"role": "user", "content": "Please fix the compression summary failure"},
+            {
+                "role": "assistant",
+                "content": None,
+                "tool_calls": [{
+                    "id": "call_1",
+                    "type": "function",
+                    "function": {
+                        "name": "read_file",
+                        "arguments": '{"path":"agent/context_compressor.py","offset":1}',
+                    },
+                }],
+            },
+            {
+                "role": "tool",
+                "tool_call_id": "call_1",
+                "content": "read agent/context_compressor.py and found static fallback marker",
+            },
+            {"role": "assistant", "content": "I found the issue."},
+            {"role": "user", "content": "latest protected ask"},
+            {"role": "assistant", "content": "ok"},
+        ]
+
+        with (
+            patch.object(c, "_find_tail_cut_by_tokens", return_value=5),
+            patch(
+                "agent.context_compressor.call_llm",
+                side_effect=RuntimeError("provider down"),
+            ),
+        ):
+            result = c.compress(msgs)
+
+        combined = "\n".join(str(m.get("content", "")) for m in result)
+        assert "## Active Task" in combined
+        assert "Please fix the compression summary failure" in combined
+        assert "read_file" in combined
+        assert "agent/context_compressor.py" in combined
+        assert "Summary generation was unavailable" in combined
+        assert "removed to free context space but could not be summarized" not in combined
+        assert c._last_summary_fallback_used is True
+        assert c._last_summary_dropped_count == 3
+
     def test_compression_increments_count(self, compressor):
         msgs = self._make_messages(10)
         # Default config (abort_on_summary_failure=False) — fallback path
@@ -756,6 +815,123 @@ class TestSummaryFailureTrackingForGatewayWarning:
             for m in result
         )
 
+    def test_summary_failure_fallback_preserves_tool_paths_and_redacts_secret_context(self):
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
+            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=1, protect_last_n=1)
+
+        secret = "ghp_" + ("a" * 36)
+        msgs = [
+            {"role": "system", "content": "sys"},
+            {"role": "user", "content": f"Fix /tmp/project/app.py and never leak {secret}"},
+            {
+                "role": "assistant",
+                "content": "I will inspect it.",
+                "tool_calls": [
+                    {
+                        "id": "call-1",
+                        "function": {
+                            "name": "read_file",
+                            "arguments": '{"path":"/tmp/project/app.py"}',
+                        },
+                    }
+                ],
+            },
+            {"role": "tool", "tool_call_id": "call-1", "content": f"read /tmp/project/app.py with token {secret}"},
+            {"role": "assistant", "content": "Found the bug in /tmp/project/app.py"},
+            {"role": "user", "content": "Patch it after this"},
+            {"role": "assistant", "content": "Ready to patch"},
+            {"role": "user", "content": "current live request should stay in tail"},
+        ]
+
+        with patch("agent.context_compressor.call_llm", side_effect=Exception("timeout")):
+            result = c.compress(msgs)
+
+        fallback = next(m["content"] for m in result if "Summary generation was unavailable" in m.get("content", ""))
+        assert "Called tool(s): read_file" in fallback
+        assert "/tmp/project/app.py" in fallback
+        assert secret not in fallback
+        assert "ghp_" not in fallback
+
+    def test_summary_failure_fallback_supports_object_tool_calls_and_content_path_mentions(self):
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
+            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=1, protect_last_n=1)
+
+        tool_call = MagicMock()
+        tool_call.id = "call-object"
+        tool_call.function.name = "terminal"
+        tool_call.function.arguments = '{"command":"python /repo/scripts/fix.py", "workdir":"/repo"}'
+        msgs = [
+            {"role": "system", "content": "sys"},
+            {"role": "user", "content": "Review ~/src/pkg/module.py before editing"},
+            {"role": "assistant", "content": "Running command", "tool_calls": [tool_call]},
+            {"role": "tool", "tool_call_id": "call-object", "content": "Traceback in /repo/src/pkg/module.py: boom"},
+            {"role": "assistant", "content": "Need to update C:\\work\\pkg\\module.py too"},
+            {"role": "user", "content": "Patch ~/src/pkg/module.py after checking those files"},
+            {"role": "assistant", "content": "Ready to patch"},
+            {"role": "user", "content": "tail task"},
+        ]
+
+        with patch("agent.context_compressor.call_llm", side_effect=Exception("timeout")):
+            result = c.compress(msgs)
+
+        fallback = next(m["content"] for m in result if "Summary generation was unavailable" in m.get("content", ""))
+        assert "Called tool(s): terminal" in fallback
+        assert "/repo/scripts/fix.py" in fallback
+        assert "/repo" in fallback
+        assert "/repo/src/pkg/module.py" in fallback
+        assert "C:\\work\\pkg\\module.py" in fallback
+        assert "Traceback" in fallback
+        assert "## Last Dropped Turns" in fallback
+        assert "TOOL: Traceback in /repo/src/pkg/module.py: boom" in fallback
+
+    def test_summary_failure_fallback_preserves_last_dropped_turns_without_tail(self):
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
+            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=1, protect_last_n=1)
+
+        msgs = [
+            {"role": "system", "content": "sys"},
+            {"role": "user", "content": "Investigate dropped-window request in /tmp/active.py"},
+            {"role": "assistant", "content": "I inspected /tmp/active.py and found the failing branch"},
+            {"role": "tool", "tool_call_id": "call-old", "content": "ValueError: boom in /tmp/active.py"},
+            {"role": "assistant", "content": "Next step is patching /tmp/active.py"},
+            {"role": "user", "content": "Confirm regression coverage for /tmp/active.py"},
+            {"role": "assistant", "content": "Regression note is ready"},
+            {"role": "user", "content": "protected tail request must not be copied from dropped window"},
+        ]
+
+        with patch("agent.context_compressor.call_llm", side_effect=Exception("timeout")):
+            result = c.compress(msgs)
+
+        fallback = next(m["content"] for m in result if "Summary generation was unavailable" in m.get("content", ""))
+        assert "## Last Dropped Turns" in fallback
+        assert "ASSISTANT: I inspected /tmp/active.py and found the failing branch" in fallback
+        assert "TOOL: ValueError: boom in /tmp/active.py" in fallback
+        assert "protected tail request must not be copied" not in fallback
+
+    def test_summary_failure_fallback_is_bounded(self):
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
+            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=1, protect_last_n=1)
+
+        long_text = "important detail " * 2000
+        msgs = [
+            {"role": "system", "content": "sys"},
+            {"role": "user", "content": "head user"},
+            {"role": "assistant", "content": "head assistant"},
+            {"role": "user", "content": long_text},
+            {"role": "assistant", "content": long_text},
+            {"role": "user", "content": long_text},
+            {"role": "assistant", "content": long_text},
+            {"role": "user", "content": "tail"},
+        ]
+
+        with patch("agent.context_compressor.call_llm", side_effect=Exception("timeout")):
+            result = c.compress(msgs)
+
+        fallback = next(m["content"] for m in result if "Summary generation was unavailable" in m.get("content", ""))
+        assert len(fallback) <= 8300
+        assert "deterministic fallback" in fallback
+        assert "important detail" in fallback
+
     def test_compress_clears_fallback_flag_on_subsequent_success(self):
         mock_response = MagicMock()
         mock_response.choices = [MagicMock()]
diff --git a/tests/agent/test_context_compressor_summary_continuity.py b/tests/agent/test_context_compressor_summary_continuity.py
index d797b661f01..f3101913ceb 100644
--- a/tests/agent/test_context_compressor_summary_continuity.py
+++ b/tests/agent/test_context_compressor_summary_continuity.py
@@ -67,3 +67,21 @@ def test_resume_rehydrates_previous_summary_from_handoff_message():
     assert "TURNS TO SUMMARIZE:" not in prompt
     assert prompt.count(old_summary) == 1
     assert f"[USER]: {SUMMARY_PREFIX}" not in prompt
+
+
+def test_handoff_in_protected_head_populates_previous_summary_before_update():
+    """A resumed protected-head handoff should restore iterative-summary state."""
+    compressor = _compressor()
+    old_summary = "PROTECTED-HEAD-SUMMARY durable facts from before restart"
+    seen_turns = []
+
+    def fake_generate_summary(turns_to_summarize, focus_topic=None):
+        seen_turns.extend(turns_to_summarize)
+        return "new summary from resumed turns"
+
+    with patch.object(compressor, "_generate_summary", side_effect=fake_generate_summary):
+        compressor.compress(_messages_with_handoff(old_summary))
+
+    assert compressor._previous_summary == old_summary
+    assert seen_turns
+    assert all(old_summary not in str(msg.get("content", "")) for msg in seen_turns)
diff --git a/tests/agent/test_context_engine.py b/tests/agent/test_context_engine.py
index a06285dc2af..32acec010c8 100644
--- a/tests/agent/test_context_engine.py
+++ b/tests/agent/test_context_engine.py
@@ -232,7 +232,7 @@ class TestPluginContextEngineSlot:
         assert mgr._context_engine is None
 
     def test_get_plugin_context_engine(self):
-        from hermes_cli.plugins import PluginManager, PluginContext, PluginManifest, get_plugin_context_engine, _plugin_manager
+        from hermes_cli.plugins import PluginManager, get_plugin_context_engine
         import hermes_cli.plugins as plugins_mod
 
         # Inject a test manager
diff --git a/tests/agent/test_context_engine_host_contract.py b/tests/agent/test_context_engine_host_contract.py
new file mode 100644
index 00000000000..bb6fb4c4108
--- /dev/null
+++ b/tests/agent/test_context_engine_host_contract.py
@@ -0,0 +1,289 @@
+"""Regressions for the context-engine host contract.
+
+These tests pin the five generic host-side guarantees that external context
+engine plugins (e.g. hermes-lcm) rely on:
+
+1. ``_transition_context_engine_session`` drives the full lifecycle
+   (on_session_end → on_session_reset → on_session_start → optional
+   carry_over_new_session_context) and ``reset_session_state`` delegates
+   to it when callers pass session metadata.
+
+2. ``on_session_start`` receives ``conversation_id`` derived from
+   ``_gateway_session_key`` at agent init time.
+
+3. ``conversation_loop`` forwards canonical cache buckets
+   (``cache_read_tokens``, ``cache_write_tokens``, ``input_tokens``,
+   ``output_tokens``, ``reasoning_tokens``) to the engine's
+   ``update_from_response``, on top of the legacy aggregate keys.
+
+4. ``_discover_context_engines`` includes plugin-registered engines (not
+   just repo-shipped engines under ``plugins/context_engine/``).
+
+5. The repo-shipped ``_EngineCollector`` honors ``ctx.register_command``
+   from a plugin engine's ``register(ctx)`` entry point and routes it
+   to the global plugin command registry.
+"""
+
+from __future__ import annotations
+
+from unittest.mock import MagicMock
+
+
+from run_agent import AIAgent
+
+
+def _bare_agent() -> AIAgent:
+    agent = object.__new__(AIAgent)
+    agent.session_id = "test-session"
+    agent.model = "fake-model"
+    agent.platform = "telegram"
+    agent._gateway_session_key = "agent:main:telegram:dm:42"
+    return agent
+
+
+def test_transition_runs_full_lifecycle_in_order():
+    """End → reset → start → carry_over, in that order, when all inputs apply."""
+    events: list[str] = []
+    engine = MagicMock()
+    engine.context_length = 200_000
+    engine.on_session_end.side_effect = lambda *a, **kw: events.append("on_session_end")
+    engine.on_session_reset.side_effect = lambda *a, **kw: events.append("on_session_reset")
+    engine.on_session_start.side_effect = lambda *a, **kw: events.append("on_session_start")
+    engine.carry_over_new_session_context.side_effect = lambda *a, **kw: events.append("carry_over")
+
+    agent = _bare_agent()
+    agent.context_compressor = engine
+
+    agent._transition_context_engine_session(
+        old_session_id="old-sid",
+        new_session_id="new-sid",
+        previous_messages=[{"role": "user", "content": "hi"}],
+        carry_over_context=True,
+    )
+
+    assert events == [
+        "on_session_end",
+        "on_session_reset",
+        "on_session_start",
+        "carry_over",
+    ]
+
+
+def test_transition_passes_conversation_id_from_gateway_session_key():
+    """on_session_start receives ``conversation_id`` from ``_gateway_session_key``."""
+    engine = MagicMock()
+    engine.context_length = 200_000
+    captured: dict = {}
+    engine.on_session_start.side_effect = lambda sid, **kw: captured.update(kw)
+
+    agent = _bare_agent()
+    agent.context_compressor = engine
+
+    agent._transition_context_engine_session(
+        old_session_id="old-sid",
+        new_session_id="new-sid",
+        previous_messages=[{"role": "user", "content": "hi"}],
+    )
+
+    assert captured.get("conversation_id") == "agent:main:telegram:dm:42"
+    assert captured.get("old_session_id") == "old-sid"
+    assert captured.get("platform") == "telegram"
+
+
+def test_transition_skips_optional_hooks_when_engine_lacks_them():
+    """Engines that don't implement on_session_end/carry_over still work."""
+    class MinimalEngine:
+        def __init__(self):
+            self.context_length = 100_000
+            self.reset_called = False
+            self.start_called_with = None
+
+        def on_session_reset(self):
+            self.reset_called = True
+
+        def on_session_start(self, sid, **kw):
+            self.start_called_with = (sid, kw)
+
+    engine = MinimalEngine()
+    agent = _bare_agent()
+    agent.context_compressor = engine
+
+    # Should not raise even though on_session_end / carry_over are missing.
+    agent._transition_context_engine_session(
+        old_session_id="old",
+        new_session_id="new",
+        previous_messages=[{"role": "user", "content": "hi"}],
+        carry_over_context=True,
+    )
+
+    assert engine.reset_called is True
+    assert engine.start_called_with is not None
+    new_sid, kw = engine.start_called_with
+    assert new_sid == "new"
+    assert kw.get("old_session_id") == "old"
+
+
+def test_reset_session_state_delegates_to_transition_when_args_provided():
+    """``reset_session_state(previous_messages=..., old_session_id=...)`` fires full lifecycle."""
+    engine = MagicMock()
+    engine.context_length = 100_000
+
+    agent = _bare_agent()
+    agent.context_compressor = engine
+
+    agent.reset_session_state(
+        previous_messages=[{"role": "user", "content": "hi"}],
+        old_session_id="old-sid",
+    )
+
+    assert engine.on_session_end.called
+    assert engine.on_session_reset.called
+    assert engine.on_session_start.called
+    # No carry_over_context, so carry_over hook NOT called.
+    assert not engine.carry_over_new_session_context.called
+
+
+def test_reset_session_state_default_call_only_resets():
+    """Bare ``reset_session_state()`` still only resets the engine (no end/start)."""
+    engine = MagicMock()
+    engine.context_length = 100_000
+
+    agent = _bare_agent()
+    agent.context_compressor = engine
+
+    agent.reset_session_state()
+
+    assert engine.on_session_reset.called
+    assert not engine.on_session_end.called
+    assert not engine.on_session_start.called
+
+
+def test_update_from_response_forwards_canonical_cache_buckets():
+    """conversation_loop passes cache_read/write/reasoning tokens to engine."""
+    # Test the contract directly: a usage_dict built from CanonicalUsage must
+    # contain the canonical buckets in addition to the legacy keys. We don't
+    # spin up the full conversation loop; we just verify the dict shape.
+    from agent.usage_pricing import CanonicalUsage
+
+    canonical = CanonicalUsage(
+        input_tokens=1000,
+        output_tokens=500,
+        cache_read_tokens=800,
+        cache_write_tokens=200,
+        reasoning_tokens=50,
+    )
+    usage_dict = {
+        "prompt_tokens": canonical.prompt_tokens,
+        "completion_tokens": canonical.output_tokens,
+        "total_tokens": canonical.total_tokens,
+        "input_tokens": canonical.input_tokens,
+        "output_tokens": canonical.output_tokens,
+        "cache_read_tokens": canonical.cache_read_tokens,
+        "cache_write_tokens": canonical.cache_write_tokens,
+        "reasoning_tokens": canonical.reasoning_tokens,
+    }
+
+    # Legacy keys present
+    assert usage_dict["prompt_tokens"] == canonical.prompt_tokens
+    assert usage_dict["completion_tokens"] == 500
+    assert usage_dict["total_tokens"] == canonical.total_tokens
+    # Canonical cache + reasoning buckets present
+    assert usage_dict["cache_read_tokens"] == 800
+    assert usage_dict["cache_write_tokens"] == 200
+    assert usage_dict["reasoning_tokens"] == 50
+    assert usage_dict["input_tokens"] == 1000
+    assert usage_dict["output_tokens"] == 500
+
+
+def test_discover_context_engines_includes_plugin_registered_engines(monkeypatch):
+    """Plugin-registered context engines appear in the ``hermes plugins`` picker."""
+    from hermes_cli import plugins_cmd
+
+    fake_repo = lambda: [("compressor", "built-in", True)]
+
+    class FakePluginEngine:
+        name = "lcm"
+
+    monkeypatch.setattr(
+        "plugins.context_engine.discover_context_engines",
+        fake_repo,
+    )
+    monkeypatch.setattr(
+        "hermes_cli.plugins.discover_plugins",
+        lambda *_a, **_kw: None,
+    )
+    monkeypatch.setattr(
+        "hermes_cli.plugins.get_plugin_context_engine",
+        lambda: FakePluginEngine(),
+    )
+
+    engines = plugins_cmd._discover_context_engines()
+    names = [n for n, _desc in engines]
+    assert "compressor" in names
+    assert "lcm" in names
+
+
+def test_discover_context_engines_dedupes_by_name(monkeypatch):
+    """Repo-shipped engine wins when name collides with a plugin-registered one."""
+    from hermes_cli import plugins_cmd
+
+    class FakePluginEngine:
+        name = "compressor"  # same name as repo-shipped
+
+    monkeypatch.setattr(
+        "plugins.context_engine.discover_context_engines",
+        lambda: [("compressor", "built-in compressor", True)],
+    )
+    monkeypatch.setattr(
+        "hermes_cli.plugins.discover_plugins",
+        lambda *_a, **_kw: None,
+    )
+    monkeypatch.setattr(
+        "hermes_cli.plugins.get_plugin_context_engine",
+        lambda: FakePluginEngine(),
+    )
+
+    engines = plugins_cmd._discover_context_engines()
+    # Only one entry — the repo-shipped one. Description is preserved.
+    assert engines == [("compressor", "built-in compressor")]
+
+
+def test_engine_collector_forwards_register_command_to_plugin_manager():
+    """A plugin context engine can register a slash command via ``ctx.register_command``."""
+    from plugins.context_engine import _EngineCollector
+    from hermes_cli.plugins import get_plugin_manager
+
+    handler = lambda raw_args: f"echo: {raw_args}"
+
+    collector = _EngineCollector(engine_name="my-lcm")
+    collector.register_command(
+        "my-lcm-test-cmd",
+        handler,
+        description="test command from a context engine",
+        args_hint="<msg>",
+    )
+
+    manager = get_plugin_manager()
+    try:
+        assert "my-lcm-test-cmd" in manager._plugin_commands
+        entry = manager._plugin_commands["my-lcm-test-cmd"]
+        assert entry["handler"] is handler
+        assert entry["args_hint"] == "<msg>"
+        assert entry["plugin"] == "context-engine:my-lcm"
+    finally:
+        # Clean up so we don't leak the registration across tests.
+        manager._plugin_commands.pop("my-lcm-test-cmd", None)
+
+
+def test_engine_collector_rejects_builtin_command_conflicts():
+    """Context engine cannot shadow built-in slash commands like /help."""
+    from plugins.context_engine import _EngineCollector
+    from hermes_cli.plugins import get_plugin_manager
+
+    collector = _EngineCollector(engine_name="my-lcm")
+    collector.register_command("help", lambda *_: "shadow")
+
+    manager = get_plugin_manager()
+    # Must NOT have overwritten / registered against built-in /help.
+    assert "help" not in manager._plugin_commands or \
+           manager._plugin_commands["help"].get("plugin") != "context-engine:my-lcm"
diff --git a/tests/agent/test_credential_pool.py b/tests/agent/test_credential_pool.py
index 69b30730e57..22a4de6d507 100644
--- a/tests/agent/test_credential_pool.py
+++ b/tests/agent/test_credential_pool.py
@@ -379,6 +379,415 @@ def test_mark_exhausted_and_rotate_persists_status(tmp_path, monkeypatch):
     assert persisted["last_error_code"] == 402
 
 
+def test_token_invalidated_marks_credential_dead(tmp_path, monkeypatch):
+    """OpenAI Codex token_invalidated must mark the credential DEAD, not exhausted.
+
+    Regression for #32849: when an OAuth credential is revoked upstream, the
+    1-hour exhausted TTL means it re-enters rotation every hour and fails
+    again with the same 401 — surfacing as "Failed to generate context
+    summary" on context compression.  Terminal OAuth failures should never
+    auto-recover.
+    """
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "credential_pool": {
+                "openai-codex": [
+                    {
+                        "id": "cred-dead",
+                        "label": "revoked",
+                        "auth_type": "oauth",
+                        "priority": 0,
+                        "source": "manual:device_code",
+                        "access_token": "revoked-at",
+                        "refresh_token": "revoked-rt",
+                    },
+                    {
+                        "id": "cred-ok",
+                        "label": "healthy",
+                        "auth_type": "oauth",
+                        "priority": 1,
+                        "source": "manual:device_code",
+                        "access_token": "healthy-at",
+                        "refresh_token": "healthy-rt",
+                    },
+                ]
+            },
+        },
+    )
+
+    from agent.credential_pool import load_pool, STATUS_DEAD
+
+    pool = load_pool("openai-codex")
+    assert pool.select().id == "cred-dead"
+
+    # Simulate the exact OpenAI Codex 401 token_invalidated response shape.
+    next_entry = pool.mark_exhausted_and_rotate(
+        status_code=401,
+        error_context={
+            "reason": "token_invalidated",
+            "message": "Your authentication token has been invalidated. Please try signing in again.",
+        },
+    )
+
+    # Rotation still works — we hand off to the healthy credential.
+    assert next_entry is not None
+    assert next_entry.id == "cred-ok"
+
+    # The revoked credential is now permanently marked DEAD.
+    auth_payload = json.loads((tmp_path / "hermes" / "auth.json").read_text())
+    persisted = auth_payload["credential_pool"]["openai-codex"][0]
+    assert persisted["last_status"] == STATUS_DEAD
+    assert persisted["last_error_code"] == 401
+    assert persisted["last_error_reason"] == "token_invalidated"
+
+
+def test_dead_credential_never_re_enters_rotation_after_ttl(tmp_path, monkeypatch):
+    """A DEAD credential must stay excluded regardless of how much time passes.
+
+    The exhausted TTL clears entries after 5 min (401) / 1 hour (429).
+    A DEAD credential has no recovery TTL — it stays dead until either
+    (a) an explicit re-auth write-side sync rewrites the tokens, or
+    (b) the manual-prune TTL elapses (covered by separate tests below).
+    This test verifies the core invariant in the recent-entry window.
+    """
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    # DEAD entry from 2 hours ago — well past the exhausted TTLs (5min/1h)
+    # but well within the 24h manual-prune window.
+    two_hours_ago = time.time() - (2 * 3600)
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "credential_pool": {
+                "openai-codex": [
+                    {
+                        "id": "cred-dead",
+                        "label": "revoked",
+                        "auth_type": "oauth",
+                        "priority": 0,
+                        "source": "manual:device_code",
+                        "access_token": "revoked-at",
+                        "refresh_token": "revoked-rt",
+                        "last_status": "dead",
+                        "last_status_at": two_hours_ago,
+                        "last_error_code": 401,
+                        "last_error_reason": "token_invalidated",
+                    },
+                    {
+                        "id": "cred-ok",
+                        "label": "healthy",
+                        "auth_type": "oauth",
+                        "priority": 1,
+                        "source": "manual:device_code",
+                        "access_token": "healthy-at",
+                        "refresh_token": "healthy-rt",
+                    },
+                ]
+            },
+        },
+    )
+
+    from agent.credential_pool import load_pool, STATUS_DEAD
+
+    pool = load_pool("openai-codex")
+    selected = pool.select()
+    # Should skip the dead entry and pick the healthy one — even though
+    # the dead entry has priority 0 (would normally be picked first) and
+    # plenty of time has passed since it was marked dead.
+    assert selected is not None
+    assert selected.id == "cred-ok"
+
+    # The DEAD entry is still marked dead on disk — not cleared by TTL.
+    auth_payload = json.loads((tmp_path / "hermes" / "auth.json").read_text())
+    dead_entry = next(e for e in auth_payload["credential_pool"]["openai-codex"]
+                       if e["id"] == "cred-dead")
+    assert dead_entry["last_status"] == STATUS_DEAD
+
+
+def test_429_rate_limit_still_uses_exhausted_not_dead(tmp_path, monkeypatch):
+    """429 rate limits must NOT be treated as terminal.
+
+    They should keep the existing 1-hour TTL cooldown semantics so the
+    credential re-enters rotation once the rate window resets.
+    """
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "credential_pool": {
+                "openai-codex": [
+                    {
+                        "id": "cred-1",
+                        "label": "primary",
+                        "auth_type": "oauth",
+                        "priority": 0,
+                        "source": "manual:device_code",
+                        "access_token": "at-1",
+                        "refresh_token": "rt-1",
+                    },
+                    {
+                        "id": "cred-2",
+                        "label": "secondary",
+                        "auth_type": "oauth",
+                        "priority": 1,
+                        "source": "manual:device_code",
+                        "access_token": "at-2",
+                        "refresh_token": "rt-2",
+                    },
+                ]
+            },
+        },
+    )
+
+    from agent.credential_pool import load_pool, STATUS_EXHAUSTED
+
+    pool = load_pool("openai-codex")
+    assert pool.select().id == "cred-1"
+
+    next_entry = pool.mark_exhausted_and_rotate(
+        status_code=429,
+        error_context={"reason": "rate_limit_exceeded", "message": "Rate limit exceeded"},
+    )
+    assert next_entry is not None
+    assert next_entry.id == "cred-2"
+
+    auth_payload = json.loads((tmp_path / "hermes" / "auth.json").read_text())
+    persisted = auth_payload["credential_pool"]["openai-codex"][0]
+    # 429 stays exhausted (transient) — NOT dead.
+    assert persisted["last_status"] == STATUS_EXHAUSTED
+    assert persisted["last_error_code"] == 429
+
+
+def test_generic_401_without_terminal_reason_still_uses_exhausted(tmp_path, monkeypatch):
+    """A 401 with no specific code/reason should keep TTL semantics.
+
+    Only specific terminal reasons (token_invalidated, token_revoked, etc.)
+    transition to DEAD.  A generic 401 might be a transient server-side
+    issue worth retrying after the 5-min TTL.
+    """
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "credential_pool": {
+                "openai-codex": [
+                    {
+                        "id": "cred-1",
+                        "label": "primary",
+                        "auth_type": "oauth",
+                        "priority": 0,
+                        "source": "manual:device_code",
+                        "access_token": "at-1",
+                        "refresh_token": "rt-1",
+                    },
+                    {
+                        "id": "cred-2",
+                        "label": "secondary",
+                        "auth_type": "oauth",
+                        "priority": 1,
+                        "source": "manual:device_code",
+                        "access_token": "at-2",
+                        "refresh_token": "rt-2",
+                    },
+                ]
+            },
+        },
+    )
+
+    from agent.credential_pool import load_pool, STATUS_EXHAUSTED
+
+    pool = load_pool("openai-codex")
+    pool.select()
+
+    # 401 with no specific reason — stays exhausted, NOT dead.
+    pool.mark_exhausted_and_rotate(
+        status_code=401,
+        error_context={"message": "Unauthorized"},
+    )
+
+    auth_payload = json.loads((tmp_path / "hermes" / "auth.json").read_text())
+    persisted = auth_payload["credential_pool"]["openai-codex"][0]
+    assert persisted["last_status"] == STATUS_EXHAUSTED
+    assert persisted["last_error_code"] == 401
+
+
+def test_dead_manual_entry_pruned_after_24h(tmp_path, monkeypatch):
+    """A DEAD manual entry is removed from the pool after the prune TTL.
+
+    Manual entries (``manual:*``) are independent credentials with no
+    singleton to re-seed from, so we can clean them up after a quiet
+    window without losing recoverability — the user can always re-add
+    via ``hermes auth add``.
+    """
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    # DEAD entry from > 24h ago
+    long_ago = time.time() - (25 * 3600)
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "credential_pool": {
+                "openai-codex": [
+                    {
+                        "id": "cred-old-dead",
+                        "label": "ancient-dead",
+                        "auth_type": "oauth",
+                        "priority": 0,
+                        "source": "manual:device_code",
+                        "access_token": "stale",
+                        "refresh_token": "stale",
+                        "last_status": "dead",
+                        "last_status_at": long_ago,
+                        "last_error_code": 401,
+                        "last_error_reason": "token_invalidated",
+                    },
+                    {
+                        "id": "cred-ok",
+                        "label": "healthy",
+                        "auth_type": "oauth",
+                        "priority": 1,
+                        "source": "manual:device_code",
+                        "access_token": "healthy-at",
+                        "refresh_token": "healthy-rt",
+                    },
+                ]
+            },
+        },
+    )
+
+    from agent.credential_pool import load_pool
+
+    pool = load_pool("openai-codex")
+    # Trigger _available_entries via select; that runs the prune.
+    selected = pool.select()
+    assert selected is not None
+    assert selected.id == "cred-ok"
+
+    # On-disk pool should have the dead entry removed.
+    auth_payload = json.loads((tmp_path / "hermes" / "auth.json").read_text())
+    persisted = auth_payload["credential_pool"]["openai-codex"]
+    assert len(persisted) == 1
+    assert persisted[0]["id"] == "cred-ok"
+
+
+def test_dead_manual_entry_kept_within_24h(tmp_path, monkeypatch):
+    """A DEAD manual entry stays in the pool until the prune TTL elapses.
+
+    Recent DEAD entries are kept so the audit trail (last_error_reason,
+    timestamps) remains visible while the user investigates.  They simply
+    don't participate in rotation (covered by the DEAD-skip test above).
+    """
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    # DEAD entry from only an hour ago — well within the 24h window
+    recent = time.time() - 3600
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "credential_pool": {
+                "openai-codex": [
+                    {
+                        "id": "cred-recent-dead",
+                        "label": "recent-dead",
+                        "auth_type": "oauth",
+                        "priority": 0,
+                        "source": "manual:device_code",
+                        "access_token": "stale",
+                        "refresh_token": "stale",
+                        "last_status": "dead",
+                        "last_status_at": recent,
+                        "last_error_code": 401,
+                        "last_error_reason": "token_invalidated",
+                    },
+                    {
+                        "id": "cred-ok",
+                        "label": "healthy",
+                        "auth_type": "oauth",
+                        "priority": 1,
+                        "source": "manual:device_code",
+                        "access_token": "healthy-at",
+                        "refresh_token": "healthy-rt",
+                    },
+                ]
+            },
+        },
+    )
+
+    from agent.credential_pool import load_pool, STATUS_DEAD
+
+    pool = load_pool("openai-codex")
+    selected = pool.select()
+    assert selected is not None
+    assert selected.id == "cred-ok"
+
+    # On-disk pool should still have BOTH entries — recent dead is preserved.
+    auth_payload = json.loads((tmp_path / "hermes" / "auth.json").read_text())
+    persisted = auth_payload["credential_pool"]["openai-codex"]
+    assert len(persisted) == 2
+    dead_entry = next(e for e in persisted if e["id"] == "cred-recent-dead")
+    assert dead_entry["last_status"] == STATUS_DEAD
+
+
+def test_dead_singleton_seeded_entry_not_pruned(tmp_path, monkeypatch):
+    """A DEAD ``device_code`` entry must NOT be pruned even after 24h.
+
+    Singleton-seeded entries get re-created by ``_seed_from_singletons`` on
+    every ``load_pool()``, so pruning them is pointless — they reappear
+    immediately with the same stale singleton tokens.  Keep them visible
+    with the DEAD marker so the user knows what's broken.
+    """
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    long_ago = time.time() - (48 * 3600)
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "providers": {
+                "openai-codex": {
+                    "tokens": {"access_token": "revoked-at", "refresh_token": "revoked-rt"},
+                    "last_refresh": "2026-01-01T00:00:00Z",
+                    "auth_mode": "chatgpt",
+                },
+            },
+            "credential_pool": {
+                "openai-codex": [
+                    {
+                        "id": "cred-seeded-dead",
+                        "label": "seeded-dead",
+                        "auth_type": "oauth",
+                        "priority": 0,
+                        "source": "device_code",   # singleton-seeded, NOT manual
+                        "access_token": "revoked-at",
+                        "refresh_token": "revoked-rt",
+                        "last_status": "dead",
+                        "last_status_at": long_ago,
+                        "last_error_code": 401,
+                        "last_error_reason": "token_invalidated",
+                    },
+                ]
+            },
+        },
+    )
+
+    from agent.credential_pool import load_pool, STATUS_DEAD
+
+    pool = load_pool("openai-codex")
+    # No healthy entry available; select returns None (pool empty for rotation).
+    assert pool.select() is None
+
+    # On-disk: the singleton-seeded DEAD entry is preserved.
+    auth_payload = json.loads((tmp_path / "hermes" / "auth.json").read_text())
+    persisted = auth_payload["credential_pool"]["openai-codex"]
+    assert len(persisted) == 1
+    assert persisted[0]["id"] == "cred-seeded-dead"
+    assert persisted[0]["last_status"] == STATUS_DEAD
+
+
 def test_load_pool_seeds_env_api_key(tmp_path, monkeypatch):
     monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
     monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-seeded")
@@ -816,7 +1225,7 @@ def test_load_pool_migrates_nous_provider_state(tmp_path, monkeypatch):
                     "inference_base_url": "https://inference.example.com/v1",
                     "client_id": "hermes-cli",
                     "token_type": "Bearer",
-                    "scope": "inference:mint_agent_key",
+                    "scope": "inference:invoke",
                     "access_token": "access-token",
                     "refresh_token": "refresh-token",
                     "expires_at": "2026-03-24T12:00:00+00:00",
@@ -843,7 +1252,7 @@ def test_load_pool_mirrors_nous_invoke_jwt_agent_key_runtime_api_key(tmp_path, m
     expires_at = datetime.fromtimestamp(time.time() + 3600, tz=timezone.utc).isoformat()
     token = _jwt_with_claims({
         "sub": "test-user",
-        "scope": ["inference:invoke", "inference:mint_agent_key"],
+        "scope": ["inference:invoke"],
         "exp": int(time.time() + 3600),
     })
     _write_auth_store(
@@ -857,7 +1266,7 @@ def test_load_pool_mirrors_nous_invoke_jwt_agent_key_runtime_api_key(tmp_path, m
                     "inference_base_url": "https://inference.example.com/v1",
                     "client_id": "hermes-cli",
                     "token_type": "Bearer",
-                    "scope": "inference:invoke inference:mint_agent_key",
+                    "scope": "inference:invoke",
                     "access_token": token,
                     "refresh_token": "refresh-token",
                     "expires_at": expires_at,
@@ -884,6 +1293,29 @@ def test_load_pool_mirrors_nous_invoke_jwt_agent_key_runtime_api_key(tmp_path, m
     assert pool_entry["agent_key_expires_at"] == expires_at
 
 
+def test_nous_runtime_api_key_rejects_opaque_agent_key():
+    from agent.credential_pool import PooledCredential
+
+    entry = PooledCredential(
+        provider="nous",
+        id="nous-opaque",
+        label="opaque",
+        auth_type="oauth",
+        priority=0,
+        source="device_code",
+        access_token="opaque-access-token",
+        refresh_token="refresh-token",
+        agent_key="opaque-agent-key",
+        agent_key_expires_at=datetime.fromtimestamp(
+            time.time() + 3600,
+            tz=timezone.utc,
+        ).isoformat(),
+        extra={"scope": "inference:invoke"},
+    )
+
+    assert entry.runtime_api_key == ""
+
+
 def test_nous_pool_terminal_refresh_removes_device_code_entry(tmp_path, monkeypatch):
     monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
     monkeypatch.setenv("HERMES_SHARED_AUTH_DIR", str(tmp_path / "shared"))
@@ -898,7 +1330,7 @@ def test_nous_pool_terminal_refresh_removes_device_code_entry(tmp_path, monkeypa
                     "inference_base_url": "https://inference.example.com/v1",
                     "client_id": "hermes-cli",
                     "token_type": "Bearer",
-                    "scope": "inference:mint_agent_key",
+                    "scope": "inference:invoke",
                     "access_token": "access-token",
                     "refresh_token": "refresh-token",
                     "expires_at": "2026-03-24T12:00:00+00:00",
@@ -1070,7 +1502,7 @@ def test_load_pool_migrates_nous_provider_state_preserves_tls(tmp_path, monkeypa
                     "inference_base_url": "https://inference.example.com/v1",
                     "client_id": "hermes-cli",
                     "token_type": "Bearer",
-                    "scope": "inference:mint_agent_key",
+                    "scope": "inference:invoke",
                     "access_token": "access-token",
                     "refresh_token": "refresh-token",
                     "expires_at": "2026-03-24T12:00:00+00:00",
@@ -1996,7 +2428,7 @@ def test_sync_nous_entry_from_auth_store_adopts_newer_tokens(tmp_path, monkeypat
                     "inference_base_url": "https://inference.example.com/v1",
                     "client_id": "hermes-cli",
                     "token_type": "Bearer",
-                    "scope": "inference:mint_agent_key",
+                    "scope": "inference:invoke",
                     "access_token": "access-OLD",
                     "refresh_token": "refresh-OLD",
                     "expires_at": "2026-03-24T12:00:00+00:00",
@@ -2026,7 +2458,7 @@ def test_sync_nous_entry_from_auth_store_adopts_newer_tokens(tmp_path, monkeypat
                     "inference_base_url": "https://inference.example.com/v1",
                     "client_id": "hermes-cli",
                     "token_type": "Bearer",
-                    "scope": "inference:mint_agent_key",
+                    "scope": "inference:invoke",
                     "access_token": "access-NEW",
                     "refresh_token": "refresh-NEW",
                     "expires_at": "2026-03-24T12:30:00+00:00",
@@ -2058,7 +2490,7 @@ def test_sync_nous_entry_noop_when_tokens_match(tmp_path, monkeypatch):
                     "inference_base_url": "https://inference.example.com/v1",
                     "client_id": "hermes-cli",
                     "token_type": "Bearer",
-                    "scope": "inference:mint_agent_key",
+                    "scope": "inference:invoke",
                     "access_token": "access-token",
                     "refresh_token": "refresh-token",
                     "expires_at": "2026-03-24T12:00:00+00:00",
@@ -2095,7 +2527,7 @@ def test_nous_exhausted_entry_recovers_via_auth_store_sync(tmp_path, monkeypatch
                     "inference_base_url": "https://inference.example.com/v1",
                     "client_id": "hermes-cli",
                     "token_type": "Bearer",
-                    "scope": "inference:mint_agent_key",
+                    "scope": "inference:invoke",
                     "access_token": "access-OLD",
                     "refresh_token": "refresh-OLD",
                     "expires_at": "2026-03-24T12:00:00+00:00",
@@ -2132,7 +2564,7 @@ def test_nous_exhausted_entry_recovers_via_auth_store_sync(tmp_path, monkeypatch
                     "inference_base_url": "https://inference.example.com/v1",
                     "client_id": "hermes-cli",
                     "token_type": "Bearer",
-                    "scope": "inference:mint_agent_key",
+                    "scope": "inference:invoke",
                     "access_token": "access-FRESH",
                     "refresh_token": "refresh-FRESH",
                     "expires_at": "2026-03-24T12:30:00+00:00",
diff --git a/tests/agent/test_crossloop_client_cache.py b/tests/agent/test_crossloop_client_cache.py
index be8d51cea8c..364c94e83d0 100644
--- a/tests/agent/test_crossloop_client_cache.py
+++ b/tests/agent/test_crossloop_client_cache.py
@@ -10,9 +10,7 @@ so it can run without optional dependencies like firecrawl.
 
 import asyncio
 import threading
-from concurrent.futures import ThreadPoolExecutor
 from unittest.mock import patch, MagicMock
-from types import SimpleNamespace
 
 import pytest
 
@@ -32,7 +30,6 @@ def _stub_resolve_provider_client(provider, model, async_mode, **kw):
 @pytest.fixture(autouse=True)
 def _clean_client_cache():
     """Clear the client cache before each test."""
-    import importlib
     # We need to patch before importing
     with patch.dict("sys.modules", {}):
         pass
@@ -48,7 +45,7 @@ class TestCrossLoopCacheIsolation:
 
     def test_same_loop_reuses_client(self):
         """Within a single event loop, the same client should be returned."""
-        from agent.auxiliary_client import _get_cached_client, _client_cache
+        from agent.auxiliary_client import _get_cached_client
 
         loop = asyncio.new_event_loop()
         asyncio.set_event_loop(loop)
diff --git a/tests/agent/test_curator.py b/tests/agent/test_curator.py
index ffbf907c9e5..764f714897e 100644
--- a/tests/agent/test_curator.py
+++ b/tests/agent/test_curator.py
@@ -7,7 +7,6 @@ tests run fully offline and the curator module doesn't need real credentials.
 from __future__ import annotations
 
 import importlib
-import json
 from datetime import datetime, timedelta, timezone
 from pathlib import Path
 
@@ -882,5 +881,5 @@ def test_curator_slot_is_canonical_aux_task():
     assert "curator" in _AUX_TASK_SLOTS, \
         "curator missing from _AUX_TASK_SLOTS (dashboard REST API)"
 
-    # 4. apps/dashboard/src/pages/ModelsPage.tsx is checked at build time; the tsx
+    # 4. web/src/pages/ModelsPage.tsx is checked at build time; the tsx
     #    array and this tuple share a ``Must match _AUX_TASK_SLOTS`` comment.
diff --git a/tests/agent/test_curator_reports.py b/tests/agent/test_curator_reports.py
index 29896a950fd..20773ad9a2f 100644
--- a/tests/agent/test_curator_reports.py
+++ b/tests/agent/test_curator_reports.py
@@ -7,8 +7,7 @@ the standard log dir, not inside the user's ``skills/`` data directory.
 from __future__ import annotations
 
 import json
-import os
-from datetime import datetime, timezone, timedelta
+from datetime import datetime, timezone
 from pathlib import Path
 
 import pytest
diff --git a/tests/agent/test_display.py b/tests/agent/test_display.py
index 5e18fa17e0c..994aae28648 100644
--- a/tests/agent/test_display.py
+++ b/tests/agent/test_display.py
@@ -1,9 +1,8 @@
 """Tests for agent/display.py — build_tool_preview() and inline diff previews."""
 
-import os
 import json
 import pytest
-from unittest.mock import MagicMock, patch
+from unittest.mock import MagicMock
 
 from agent.display import (
     build_tool_preview,
diff --git a/tests/agent/test_display_todo_progress.py b/tests/agent/test_display_todo_progress.py
index 7205602e01a..d3d804ce14f 100644
--- a/tests/agent/test_display_todo_progress.py
+++ b/tests/agent/test_display_todo_progress.py
@@ -5,7 +5,6 @@ todo tool call paths: read, create (merge=False), update (merge=True).
 """
 
 import json
-import pytest
 from agent.display import get_cute_tool_message
 
 
diff --git a/tests/agent/test_display_tool_failure.py b/tests/agent/test_display_tool_failure.py
index ca56e20f3a1..74535831d78 100644
--- a/tests/agent/test_display_tool_failure.py
+++ b/tests/agent/test_display_tool_failure.py
@@ -7,7 +7,6 @@ not a generic "[error]".
 """
 
 import json
-import pytest
 
 from agent.display import (
     _detect_tool_failure,
diff --git a/tests/agent/test_error_classifier.py b/tests/agent/test_error_classifier.py
index 579b364d146..b98fbe5beb9 100644
--- a/tests/agent/test_error_classifier.py
+++ b/tests/agent/test_error_classifier.py
@@ -59,6 +59,7 @@ class TestFailoverReason:
             "invalid_encrypted_content",
             "multimodal_tool_content_unsupported",
             "provider_policy_blocked",
+            "content_policy_blocked",
             "thinking_signature", "long_context_tier",
             "oauth_long_context_beta_forbidden",
             "llama_cpp_grammar_pattern",
@@ -254,12 +255,51 @@ class TestClassifyApiError:
         assert result.reason == FailoverReason.billing
         assert result.retryable is False
 
+    def test_402_out_of_funds_billing(self):
+        e = MockAPIError(
+            "Payment Required",
+            status_code=402,
+            body={
+                "status": 402,
+                "message": (
+                    "Your API key has run out of funds. Please go visit the "
+                    "portal to sort that out: https://portal.nousresearch.com"
+                ),
+            },
+        )
+        result = classify_api_error(e)
+        assert result.reason == FailoverReason.billing
+        assert result.retryable is False
+
     def test_402_transient_usage_limit(self):
         e = MockAPIError("usage limit exceeded, try again later", status_code=402)
         result = classify_api_error(e)
         assert result.reason == FailoverReason.rate_limit
         assert result.retryable is True
 
+    def test_403_plan_entitlement_billing(self):
+        e = MockAPIError("This plan does not include the requested model", status_code=403)
+        result = classify_api_error(e)
+        assert result.reason == FailoverReason.billing
+        assert result.retryable is False
+
+    def test_404_free_tier_model_block_is_billing(self):
+        e = MockAPIError(
+            "Not Found",
+            status_code=404,
+            body={
+                "status": 404,
+                "message": (
+                    "Model 'gpt-5' is not available on the Free Tier. "
+                    "Upgrade at https://portal.nousresearch.com or pick a free model."
+                ),
+            },
+        )
+        result = classify_api_error(e, provider="nous", model="gpt-5")
+        assert result.reason == FailoverReason.billing
+        assert result.retryable is False
+        assert result.should_fallback is True
+
     # ── Rate limit ──
 
     def test_429_rate_limit(self):
@@ -427,6 +467,78 @@ class TestClassifyApiError:
         result = classify_api_error(e)
         assert result.reason == FailoverReason.provider_policy_blocked
 
+    # ── Provider content-policy block (per-prompt safety filter) ──
+    #
+    # Distinct from ``provider_policy_blocked`` above — these are upstream
+    # model-provider safety refusals for THIS prompt, not OpenRouter
+    # account-level data policy. Recovery is fallback model, not config fix.
+    # See issue #18028 — OpenAI Codex was burning 3 retries on identical
+    # refusals before users saw "API failed after 3 retries" on Telegram.
+
+    def test_message_only_cyber_content_policy_blocked(self):
+        # OpenAI Codex returns this without an HTTP status. Retrying the
+        # same prompt three times only repeats the same policy decision, so
+        # the classifier must jump straight to fallback / abort instead of
+        # leaving it in the retryable ``unknown`` bucket.
+        e = Exception(
+            "This content was flagged for possible cybersecurity risk. If this "
+            "seems wrong, try rephrasing your request. To get authorized for "
+            "security work, join the Trusted Access for Cyber program."
+        )
+        result = classify_api_error(e, provider="openai-codex", model="gpt-5.5")
+        assert result.reason == FailoverReason.content_policy_blocked
+        assert result.retryable is False
+        assert result.should_fallback is True
+        assert result.should_compress is False
+
+    def test_400_cyber_content_policy_blocked(self):
+        # When the SDK does attach a status (e.g. 400), the safety pattern
+        # must still beat the format_error fallthrough.
+        e = MockAPIError(
+            "This content was flagged for possible cybersecurity risk",
+            status_code=400,
+        )
+        result = classify_api_error(e, provider="openai-codex", model="gpt-5.5")
+        assert result.reason == FailoverReason.content_policy_blocked
+        assert result.retryable is False
+        assert result.should_fallback is True
+
+    def test_openai_usage_policy_violation_content_policy_blocked(self):
+        # OpenAI moderation refusal wording from chat completions / responses.
+        e = MockAPIError(
+            "Your request was flagged by the moderation system as potentially "
+            "violating OpenAI's usage policies.",
+            status_code=400,
+        )
+        result = classify_api_error(e, provider="openai", model="gpt-4o")
+        assert result.reason == FailoverReason.content_policy_blocked
+        assert result.retryable is False
+        assert result.should_fallback is True
+
+    def test_anthropic_safety_system_content_policy_blocked(self):
+        # Anthropic safety refusal — distinct phrasing from OpenAI.
+        e = Exception(
+            "Your prompt was flagged by our safety system. Please rephrase "
+            "and try again."
+        )
+        result = classify_api_error(e, provider="anthropic", model="claude-3-5-sonnet")
+        assert result.reason == FailoverReason.content_policy_blocked
+        assert result.retryable is False
+        assert result.should_fallback is True
+
+    def test_azure_content_filter_content_policy_blocked(self):
+        # Azure OpenAI returns ``content_filter`` finish reason / error code
+        # and ``ResponsibleAIPolicyViolation`` in error bodies — both narrow
+        # tokens, not the generic English phrase.
+        e = MockAPIError(
+            "The response was filtered: ResponsibleAIPolicyViolation "
+            "(finish_reason=content_filter).",
+            status_code=400,
+        )
+        result = classify_api_error(e, provider="azure", model="gpt-4o")
+        assert result.reason == FailoverReason.content_policy_blocked
+        assert result.retryable is False
+
     def test_404_model_not_found_still_works(self):
         # Regression guard: the new policy-block check must not swallow
         # genuine model_not_found 404s.
@@ -753,6 +865,19 @@ class TestClassifyApiError:
         result = classify_api_error(e)
         assert result.reason == FailoverReason.context_overflow
 
+    def test_error_code_model_not_supported_on_free_tier_is_billing(self):
+        e = MockAPIError(
+            "Model unavailable",
+            body={
+                "error": {
+                    "code": "model_not_supported_on_free_tier",
+                    "message": "Model 'gpt-5' is not available on the Free Tier.",
+                }
+            },
+        )
+        result = classify_api_error(e, provider="nous", model="gpt-5")
+        assert result.reason == FailoverReason.billing
+
     # ── Message-only patterns (no status code) ──
 
     def test_message_billing_pattern(self):
@@ -760,6 +885,11 @@ class TestClassifyApiError:
         result = classify_api_error(e)
         assert result.reason == FailoverReason.billing
 
+    def test_message_free_tier_model_block_is_billing(self):
+        e = Exception("Model 'gpt-5' is not available on the Free Tier.")
+        result = classify_api_error(e, provider="nous", model="gpt-5")
+        assert result.reason == FailoverReason.billing
+
     def test_message_rate_limit_pattern(self):
         e = Exception("rate limit reached for this model")
         result = classify_api_error(e)
diff --git a/tests/agent/test_external_skills.py b/tests/agent/test_external_skills.py
index 1a9cd63d580..e49aa5e3962 100644
--- a/tests/agent/test_external_skills.py
+++ b/tests/agent/test_external_skills.py
@@ -2,7 +2,6 @@
 
 import json
 import os
-from pathlib import Path
 from unittest.mock import patch
 
 import pytest
diff --git a/tests/agent/test_external_skills_dirs_cache.py b/tests/agent/test_external_skills_dirs_cache.py
index 277214bd0d0..8baf3de4702 100644
--- a/tests/agent/test_external_skills_dirs_cache.py
+++ b/tests/agent/test_external_skills_dirs_cache.py
@@ -11,7 +11,6 @@ cache invalidates when config.yaml's mtime changes.
 from __future__ import annotations
 
 import os
-import time
 from pathlib import Path
 from unittest.mock import patch
 
diff --git a/tests/agent/test_file_safety.py b/tests/agent/test_file_safety.py
index a7ff019d438..b0303d561f9 100644
--- a/tests/agent/test_file_safety.py
+++ b/tests/agent/test_file_safety.py
@@ -4,8 +4,6 @@ Run with:  python -m pytest tests/agent/test_file_safety.py -v
 """
 
 import os
-import tempfile
-from pathlib import Path
 from unittest.mock import patch
 
 import pytest
diff --git a/tests/agent/test_file_safety_cross_profile.py b/tests/agent/test_file_safety_cross_profile.py
index cf3605774a3..d9d42bc5409 100644
--- a/tests/agent/test_file_safety_cross_profile.py
+++ b/tests/agent/test_file_safety_cross_profile.py
@@ -12,7 +12,6 @@ afterwards that the second path belonged to a different profile.
 """
 from __future__ import annotations
 
-import os
 from pathlib import Path
 
 import pytest
diff --git a/tests/agent/test_gemini_cloudcode.py b/tests/agent/test_gemini_cloudcode.py
index 480f562aa64..600a06ffe93 100644
--- a/tests/agent/test_gemini_cloudcode.py
+++ b/tests/agent/test_gemini_cloudcode.py
@@ -18,8 +18,6 @@ import json
 import stat
 import time
 from pathlib import Path
-from types import SimpleNamespace
-from unittest.mock import MagicMock, patch
 
 import pytest
 
diff --git a/tests/agent/test_gemini_free_tier_gate.py b/tests/agent/test_gemini_free_tier_gate.py
index bbd74389f53..f2d47653472 100644
--- a/tests/agent/test_gemini_free_tier_gate.py
+++ b/tests/agent/test_gemini_free_tier_gate.py
@@ -3,7 +3,6 @@ from __future__ import annotations
 
 from unittest.mock import MagicMock, patch
 
-import pytest
 
 from agent.gemini_native_adapter import (
     gemini_http_error,
diff --git a/tests/agent/test_image_routing.py b/tests/agent/test_image_routing.py
index ddb11cba409..b5a43f1ff0e 100644
--- a/tests/agent/test_image_routing.py
+++ b/tests/agent/test_image_routing.py
@@ -6,7 +6,6 @@ import base64
 from pathlib import Path
 from unittest.mock import patch
 
-import pytest
 
 from agent.image_routing import (
     _coerce_capability_bool,
@@ -16,6 +15,7 @@ from agent.image_routing import (
     _supports_vision_override,
     build_native_content_parts,
     decide_image_input_mode,
+    extract_image_refs,
 )
 
 
@@ -449,3 +449,190 @@ class TestLargeImageHandling:
         assert len(parts) == 2
         assert parts[0]["type"] == "text"
         assert parts[1]["type"] == "image_url"
+
+
+# ─── extract_image_refs ──────────────────────────────────────────────────────
+
+
+class TestExtractImageRefs:
+    """Scan task body / inbound text for image paths and URLs (kanban worker
+    enrichment, issue raised May 2026)."""
+
+    def test_empty_or_none_returns_empty(self):
+        assert extract_image_refs("") == ([], [])
+        assert extract_image_refs(None) == ([], [])  # type: ignore[arg-type]
+
+    def test_finds_absolute_path(self, tmp_path: Path):
+        img = tmp_path / "screenshot.png"
+        img.write_bytes(_png_bytes())
+        body = f"Look at {img} and tell me what's wrong."
+        paths, urls = extract_image_refs(body)
+        assert paths == [str(img)]
+        assert urls == []
+
+    def test_finds_home_relative_path(self, tmp_path: Path, monkeypatch):
+        # Simulate ~/foo.png by pointing HOME at tmp_path and creating the file
+        monkeypatch.setenv("HOME", str(tmp_path))
+        img = tmp_path / "foo.png"
+        img.write_bytes(_png_bytes())
+        paths, urls = extract_image_refs("see ~/foo.png please")
+        assert paths == [str(img)]
+        assert urls == []
+
+    def test_skips_nonexistent_paths(self, tmp_path: Path):
+        # Path-shaped but no file on disk → skipped.
+        body = f"What's at {tmp_path}/never_created.png ?"
+        paths, urls = extract_image_refs(body)
+        assert paths == []
+        assert urls == []
+
+    def test_finds_http_image_url(self):
+        body = "Check out https://example.com/photos/cat.png — cute right?"
+        paths, urls = extract_image_refs(body)
+        assert paths == []
+        assert urls == ["https://example.com/photos/cat.png"]
+
+    def test_finds_https_url_with_query_string(self):
+        body = "Diagram: https://cdn.example.com/img.jpeg?size=large&v=2 here"
+        paths, urls = extract_image_refs(body)
+        assert urls == ["https://cdn.example.com/img.jpeg?size=large&v=2"]
+
+    def test_url_trailing_punctuation_stripped(self):
+        # Prose punctuation right after the URL must not be part of the URL.
+        body = "See https://example.com/a.png."
+        paths, urls = extract_image_refs(body)
+        assert urls == ["https://example.com/a.png"]
+
+    def test_ignores_non_image_urls(self):
+        body = "See https://example.com/page.html and https://x.com/y.pdf"
+        paths, urls = extract_image_refs(body)
+        assert urls == []
+
+    def test_dedupes_paths_and_urls(self, tmp_path: Path):
+        img = tmp_path / "dup.png"
+        img.write_bytes(_png_bytes())
+        body = (
+            f"First {img} then again {img}. "
+            "Also https://example.com/x.png and https://example.com/x.png again."
+        )
+        paths, urls = extract_image_refs(body)
+        assert paths == [str(img)]
+        assert urls == ["https://example.com/x.png"]
+
+    def test_ignores_paths_in_fenced_code_block(self, tmp_path: Path):
+        img = tmp_path / "real.png"
+        img.write_bytes(_png_bytes())
+        body = (
+            "Outside the block, attach this:\n"
+            f"{img}\n"
+            "But not these examples:\n"
+            "```\n"
+            f"some_other_image: /tmp/example.png\n"
+            f"url: https://example.com/example.png\n"
+            "```\n"
+        )
+        paths, urls = extract_image_refs(body)
+        assert paths == [str(img)]
+        assert urls == []
+
+    def test_ignores_paths_in_inline_code(self, tmp_path: Path):
+        img = tmp_path / "real.jpg"
+        img.write_bytes(_png_bytes())
+        body = (
+            f"Attach {img}, but ignore the example "
+            "`https://example.com/skip.png` in backticks."
+        )
+        paths, urls = extract_image_refs(body)
+        assert paths == [str(img)]
+        assert urls == []
+
+    def test_does_not_match_paths_inside_urls(self, tmp_path: Path):
+        # The lookbehind in the regex prevents matching the path-portion of
+        # a URL as a local path. Only the URL should be detected.
+        body = "Just the URL: https://example.com/some/dir/image.png"
+        paths, urls = extract_image_refs(body)
+        assert paths == []
+        assert urls == ["https://example.com/some/dir/image.png"]
+
+    def test_mixed_paths_and_urls(self, tmp_path: Path):
+        img = tmp_path / "local.png"
+        img.write_bytes(_png_bytes())
+        body = (
+            f"Compare local {img} against the design at "
+            "https://example.com/design/v2.png — does it match?"
+        )
+        paths, urls = extract_image_refs(body)
+        assert paths == [str(img)]
+        assert urls == ["https://example.com/design/v2.png"]
+
+    def test_case_insensitive_extension(self, tmp_path: Path):
+        img = tmp_path / "shouty.PNG"
+        img.write_bytes(_png_bytes())
+        body = f"see {img}"
+        paths, urls = extract_image_refs(body)
+        assert paths == [str(img)]
+
+
+# ─── build_native_content_parts with URLs ────────────────────────────────────
+
+
+class TestBuildNativeContentPartsURLs:
+    """URL pass-through support added so kanban task bodies (and other
+    inbound surfaces) can route remote image URLs straight to the model."""
+
+    def test_url_only_no_local_paths(self):
+        parts, skipped = build_native_content_parts(
+            "what is this?",
+            [],
+            image_urls=["https://example.com/diagram.png"],
+        )
+        assert skipped == []
+        assert len(parts) == 2
+        assert parts[0]["type"] == "text"
+        assert "[Image attached: https://example.com/diagram.png]" in parts[0]["text"]
+        assert parts[0]["text"].startswith("what is this?")
+        assert parts[1] == {
+            "type": "image_url",
+            "image_url": {"url": "https://example.com/diagram.png"},
+        }
+
+    def test_mixed_path_and_url(self, tmp_path: Path):
+        img = tmp_path / "local.png"
+        img.write_bytes(_png_bytes())
+        parts, skipped = build_native_content_parts(
+            "compare these",
+            [str(img)],
+            image_urls=["https://example.com/remote.jpg"],
+        )
+        assert skipped == []
+        # 1 text + 2 image parts (local data URL first, then remote URL).
+        image_parts = [p for p in parts if p.get("type") == "image_url"]
+        assert len(image_parts) == 2
+        assert image_parts[0]["image_url"]["url"].startswith("data:image/png;base64,")
+        assert image_parts[1]["image_url"]["url"] == "https://example.com/remote.jpg"
+        text = parts[0]["text"]
+        assert "[Image attached at:" in text
+        assert "[Image attached: https://example.com/remote.jpg]" in text
+
+    def test_empty_url_list_is_no_op(self, tmp_path: Path):
+        img = tmp_path / "x.png"
+        img.write_bytes(_png_bytes())
+        # image_urls=[] should behave the same as not passing it at all.
+        parts_no_urls, _ = build_native_content_parts("hi", [str(img)])
+        parts_empty_urls, _ = build_native_content_parts("hi", [str(img)], image_urls=[])
+        assert parts_no_urls == parts_empty_urls
+
+    def test_blank_url_strings_are_dropped(self):
+        parts, _ = build_native_content_parts(
+            "x", [], image_urls=["", "  ", "https://example.com/a.png"]
+        )
+        image_parts = [p for p in parts if p.get("type") == "image_url"]
+        assert len(image_parts) == 1
+        assert image_parts[0]["image_url"]["url"] == "https://example.com/a.png"
+
+    def test_url_only_inserts_default_prompt_when_text_empty(self):
+        parts, _ = build_native_content_parts(
+            "", [], image_urls=["https://example.com/a.png"]
+        )
+        assert parts[0]["type"] == "text"
+        assert parts[0]["text"].startswith("What do you see in this image?")
diff --git a/tests/agent/test_insights.py b/tests/agent/test_insights.py
index 2740daf0962..723a40da4fb 100644
--- a/tests/agent/test_insights.py
+++ b/tests/agent/test_insights.py
@@ -2,7 +2,6 @@
 
 import time
 import pytest
-from pathlib import Path
 
 from hermes_state import SessionDB
 from agent.insights import (
@@ -11,7 +10,6 @@ from agent.insights import (
     _format_duration,
     _bar_chart,
     _has_known_pricing,
-    _DEFAULT_PRICING,
 )
 
 
@@ -596,7 +594,6 @@ class TestEdgeCases:
 
     def test_tool_usage_from_tool_calls_json(self, db):
         """Tool usage should be extracted from tool_calls JSON when tool_name is NULL."""
-        import json as _json
         db.create_session(session_id="s1", source="cli", model="test")
         # Assistant message with tool_calls (this is what CLI produces)
         db.append_message("s1", role="assistant", content="Let me search",
diff --git a/tests/agent/test_jiter_preload.py b/tests/agent/test_jiter_preload.py
new file mode 100644
index 00000000000..2fd358b5b71
--- /dev/null
+++ b/tests/agent/test_jiter_preload.py
@@ -0,0 +1,25 @@
+from __future__ import annotations
+
+import importlib
+import sys
+
+from agent import jiter_preload
+
+
+def test_preload_jiter_native_extension_loads_sdk_parser_dependency():
+    assert jiter_preload.preload_jiter_native_extension() is True
+    assert "jiter.jiter" in sys.modules
+
+
+def test_preload_jiter_native_extension_is_best_effort(monkeypatch):
+    monkeypatch.setattr(jiter_preload, "_JITER_PRELOADED", False)
+
+    def _raise_missing(name: str):
+        assert name == "jiter.jiter"
+        raise ModuleNotFoundError(name)
+
+    monkeypatch.setattr(importlib, "import_module", _raise_missing)
+
+    assert jiter_preload.preload_jiter_native_extension() is False
+    assert jiter_preload._JITER_PRELOADED is False
+    assert isinstance(jiter_preload._JITER_PRELOAD_ERROR, ModuleNotFoundError)
diff --git a/tests/agent/test_memory_provider.py b/tests/agent/test_memory_provider.py
index 6f8cfc8a93d..c516e408f7c 100644
--- a/tests/agent/test_memory_provider.py
+++ b/tests/agent/test_memory_provider.py
@@ -2,7 +2,7 @@
 
 import json
 import pytest
-from unittest.mock import MagicMock, patch
+from unittest.mock import MagicMock
 
 from agent.memory_provider import MemoryProvider
 from agent.memory_manager import MemoryManager
@@ -84,6 +84,13 @@ class MetadataMemoryProvider(FakeMemoryProvider):
         self.memory_writes.append((action, target, content, metadata or {}))
 
 
+class MessagesMemoryProvider(FakeMemoryProvider):
+    """Provider that opts into completed-turn message context."""
+
+    def sync_turn(self, user_content, assistant_content, *, session_id="", messages=None):
+        self.synced_turns.append((user_content, assistant_content, session_id, messages))
+
+
 # ---------------------------------------------------------------------------
 # MemoryProvider ABC tests
 # ---------------------------------------------------------------------------
@@ -236,6 +243,28 @@ class TestMemoryManager:
         assert p1.synced_turns == [("user msg", "assistant msg")]
         assert p2.synced_turns == [("user msg", "assistant msg")]
 
+    def test_sync_all_passes_messages_to_opted_in_provider(self):
+        mgr = MemoryManager()
+        p = MessagesMemoryProvider("external")
+        mgr.add_provider(p)
+        messages = [
+            {"role": "assistant", "tool_calls": [{"id": "call-1"}]},
+            {"role": "tool", "tool_call_id": "call-1", "content": "ok"},
+        ]
+
+        mgr.sync_all("user msg", "assistant msg", session_id="sess-1", messages=messages)
+
+        assert p.synced_turns == [("user msg", "assistant msg", "sess-1", messages)]
+
+    def test_sync_all_omits_messages_for_legacy_provider(self):
+        mgr = MemoryManager()
+        p = FakeMemoryProvider("external")
+        mgr.add_provider(p)
+
+        mgr.sync_all("user msg", "assistant msg", messages=[{"role": "tool"}])
+
+        assert p.synced_turns == [("user msg", "assistant msg")]
+
     def test_sync_failure_doesnt_block_others(self):
         """If one provider's sync fails, others still run."""
         mgr = MemoryManager()
@@ -433,7 +462,7 @@ class TestUserInstalledProviderDiscovery:
 
     def test_discover_finds_user_plugins(self, tmp_path, monkeypatch):
         """discover_memory_providers() includes user-installed plugins."""
-        from plugins.memory import discover_memory_providers, _get_user_plugins_dir
+        from plugins.memory import discover_memory_providers
         self._make_user_memory_plugin(tmp_path, "myexternal")
         monkeypatch.setattr(
             "plugins.memory._get_user_plugins_dir",
diff --git a/tests/agent/test_memory_session_switch.py b/tests/agent/test_memory_session_switch.py
index 61cd6edbafd..a40654fa579 100644
--- a/tests/agent/test_memory_session_switch.py
+++ b/tests/agent/test_memory_session_switch.py
@@ -7,7 +7,6 @@ state in initialize() (Hindsight, and any plugin that stores session_id
 for scoped writes) keep writing into the old session's record.
 """
 
-import json
 
 import pytest
 
diff --git a/tests/agent/test_memory_user_id.py b/tests/agent/test_memory_user_id.py
index 7b60b05dd24..2692dcb191d 100644
--- a/tests/agent/test_memory_user_id.py
+++ b/tests/agent/test_memory_user_id.py
@@ -6,7 +6,6 @@ so each gateway user gets their own memory bucket instead of sharing a static on
 
 import json
 import os
-import pytest
 from unittest.mock import MagicMock, patch
 
 from agent.memory_provider import MemoryProvider
diff --git a/tests/agent/test_model_metadata.py b/tests/agent/test_model_metadata.py
index e889f2e67bd..3f9fd56d140 100644
--- a/tests/agent/test_model_metadata.py
+++ b/tests/agent/test_model_metadata.py
@@ -10,13 +10,9 @@ Coverage levels:
   Persistent cache       — save/load, corruption, update, provider isolation
 """
 
-import os
 import time
-import tempfile
 
-import pytest
 import yaml
-from pathlib import Path
 from unittest.mock import patch, MagicMock
 
 from agent.model_metadata import (
@@ -131,10 +127,10 @@ class TestDefaultContextLengths:
         for key, value in DEFAULT_CONTEXT_LENGTHS.items():
             if "claude" not in key:
                 continue
-            # Claude 4.6+ models (4.6 and 4.7) have 1M context at standard
+            # Claude 4.6+ models (4.6, 4.7, 4.8) have 1M context at standard
             # API pricing (no long-context premium).  Older Claude 4.x and
             # 3.x models cap at 200k.
-            if any(tag in key for tag in ("4.6", "4-6", "4.7", "4-7")):
+            if any(tag in key for tag in ("4.6", "4-6", "4.7", "4-7", "4.8", "4-8")):
                 assert value == 1000000, f"{key} should be 1000000"
             else:
                 assert value == 200000, f"{key} should be 200000"
diff --git a/tests/agent/test_model_metadata_local_ctx.py b/tests/agent/test_model_metadata_local_ctx.py
index f449255c073..ca1c5d3f94a 100644
--- a/tests/agent/test_model_metadata_local_ctx.py
+++ b/tests/agent/test_model_metadata_local_ctx.py
@@ -6,12 +6,10 @@ All tests use synthetic inputs — no filesystem or live server required.
 
 import sys
 import os
-import json
 from unittest.mock import MagicMock, patch
 
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
 
-import pytest
 
 
 # ---------------------------------------------------------------------------
@@ -562,7 +560,7 @@ class TestGetModelContextLengthLocalFallback:
 
     def test_non_local_endpoint_does_not_query_local_server(self):
         """For non-local endpoints, _query_local_context_length is not called."""
-        from agent.model_metadata import get_model_context_length, CONTEXT_PROBE_TIERS
+        from agent.model_metadata import get_model_context_length
 
         with patch("agent.model_metadata.get_cached_context_length", return_value=None), \
              patch("agent.model_metadata.fetch_endpoint_model_metadata", return_value={}), \
diff --git a/tests/agent/test_models_dev.py b/tests/agent/test_models_dev.py
index 0353feba1de..41fb4463ec8 100644
--- a/tests/agent/test_models_dev.py
+++ b/tests/agent/test_models_dev.py
@@ -1,8 +1,6 @@
 """Tests for agent.models_dev — models.dev registry integration."""
-import json
 from unittest.mock import patch, MagicMock
 
-import pytest
 from agent.models_dev import (
     PROVIDER_TO_MODELS_DEV,
     _extract_context,
diff --git a/tests/agent/test_non_stream_stale_timeout.py b/tests/agent/test_non_stream_stale_timeout.py
index 702856275f6..281453db16d 100644
--- a/tests/agent/test_non_stream_stale_timeout.py
+++ b/tests/agent/test_non_stream_stale_timeout.py
@@ -11,10 +11,8 @@ Covers:
 
 from __future__ import annotations
 
-import os
 from pathlib import Path
 
-import pytest
 
 
 def _write_config(tmp_path: Path, body: str) -> None:
diff --git a/tests/agent/test_onboarding.py b/tests/agent/test_onboarding.py
index 1eaf0d01d2b..0ae03db3aa8 100644
--- a/tests/agent/test_onboarding.py
+++ b/tests/agent/test_onboarding.py
@@ -3,7 +3,6 @@
 from __future__ import annotations
 
 import yaml
-import pytest
 
 from agent.onboarding import (
     BUSY_INPUT_FLAG,
diff --git a/tests/agent/test_plugin_llm.py b/tests/agent/test_plugin_llm.py
index b31f8097a7e..517bd2d224c 100644
--- a/tests/agent/test_plugin_llm.py
+++ b/tests/agent/test_plugin_llm.py
@@ -10,7 +10,6 @@ from __future__ import annotations
 
 import asyncio
 import base64
-import json
 from types import SimpleNamespace
 from typing import Any
 from unittest.mock import MagicMock
diff --git a/tests/agent/test_prompt_builder.py b/tests/agent/test_prompt_builder.py
index 1715bf00ce6..3f4b0f46209 100644
--- a/tests/agent/test_prompt_builder.py
+++ b/tests/agent/test_prompt_builder.py
@@ -18,7 +18,6 @@ from agent.prompt_builder import (
     build_skills_system_prompt,
     build_nous_subscription_prompt,
     build_context_files_prompt,
-    build_environment_hints,
     CONTEXT_FILE_MAX_CHARS,
     DEFAULT_AGENT_IDENTITY,
     TOOL_USE_ENFORCEMENT_GUIDANCE,
@@ -441,6 +440,7 @@ class TestBuildNousSubscriptionPrompt:
                 features={
                     "web": NousFeatureState("web", "Web tools", True, True, True, True, False, True, "firecrawl"),
                     "image_gen": NousFeatureState("image_gen", "Image generation", True, True, True, True, False, True, "Nous Subscription"),
+                    "video_gen": NousFeatureState("video_gen", "Video generation", False, False, False, False, False, False, ""),
                     "tts": NousFeatureState("tts", "OpenAI TTS", True, True, True, True, False, True, "OpenAI TTS"),
                     "browser": NousFeatureState("browser", "Browser automation", True, True, True, True, False, True, "Browser Use"),
                     "modal": NousFeatureState("modal", "Modal execution", False, True, False, False, False, True, "local"),
@@ -465,6 +465,7 @@ class TestBuildNousSubscriptionPrompt:
                 features={
                     "web": NousFeatureState("web", "Web tools", True, False, False, False, False, True, ""),
                     "image_gen": NousFeatureState("image_gen", "Image generation", True, False, False, False, False, True, ""),
+                    "video_gen": NousFeatureState("video_gen", "Video generation", False, False, False, False, False, False, ""),
                     "tts": NousFeatureState("tts", "OpenAI TTS", True, False, False, False, False, True, ""),
                     "browser": NousFeatureState("browser", "Browser automation", True, False, False, False, False, True, ""),
                     "modal": NousFeatureState("modal", "Modal execution", False, False, False, False, False, True, ""),
@@ -948,6 +949,58 @@ class TestEnvironmentHints:
                 f"info is suppressed in the system prompt"
             )
 
+    def test_environment_hint_from_env_var_is_appended(self, monkeypatch):
+        """HERMES_ENVIRONMENT_HINT lets an embedder describe the runtime env."""
+        import agent.prompt_builder as _pb
+        monkeypatch.setattr(_pb, "is_wsl", lambda: False)
+        monkeypatch.delenv("TERMINAL_ENV", raising=False)
+        monkeypatch.setenv("HERMES_ENVIRONMENT_HINT", "Running inside an OpenShell sandbox.")
+        _pb._clear_backend_probe_cache()
+        result = _pb.build_environment_hints()
+        assert "Running inside an OpenShell sandbox." in result
+        # The factual host block must still come first.
+        assert result.index("Host:") < result.index("OpenShell")
+
+    def test_environment_hint_env_var_overrides_config(self, monkeypatch):
+        """Env var wins over config.yaml agent.environment_hint."""
+        import agent.prompt_builder as _pb
+        monkeypatch.setattr(_pb, "is_wsl", lambda: False)
+        monkeypatch.delenv("TERMINAL_ENV", raising=False)
+        monkeypatch.setenv("HERMES_ENVIRONMENT_HINT", "ENV-WINS")
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: {"agent": {"environment_hint": "CONFIG-VALUE"}},
+        )
+        _pb._clear_backend_probe_cache()
+        result = _pb.build_environment_hints()
+        assert "ENV-WINS" in result
+        assert "CONFIG-VALUE" not in result
+
+    def test_environment_hint_falls_back_to_config(self, monkeypatch):
+        """With no env var, the config.yaml value is used."""
+        import agent.prompt_builder as _pb
+        monkeypatch.setattr(_pb, "is_wsl", lambda: False)
+        monkeypatch.delenv("TERMINAL_ENV", raising=False)
+        monkeypatch.delenv("HERMES_ENVIRONMENT_HINT", raising=False)
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: {"agent": {"environment_hint": "CONFIG-VALUE"}},
+        )
+        _pb._clear_backend_probe_cache()
+        result = _pb.build_environment_hints()
+        assert "CONFIG-VALUE" in result
+
+    def test_environment_hint_empty_by_default(self, monkeypatch):
+        """No hint configured anywhere → no embedder text, host block intact."""
+        import agent.prompt_builder as _pb
+        monkeypatch.setattr(_pb, "is_wsl", lambda: False)
+        monkeypatch.delenv("TERMINAL_ENV", raising=False)
+        monkeypatch.delenv("HERMES_ENVIRONMENT_HINT", raising=False)
+        monkeypatch.setattr("hermes_cli.config.load_config", lambda: {"agent": {}})
+        _pb._clear_backend_probe_cache()
+        result = _pb.build_environment_hints()
+        assert "Host:" in result
+
 
 # =========================================================================
 # Conditional skill activation
diff --git a/tests/agent/test_prompt_caching.py b/tests/agent/test_prompt_caching.py
index f6f3e9f0a38..499ffc765a4 100644
--- a/tests/agent/test_prompt_caching.py
+++ b/tests/agent/test_prompt_caching.py
@@ -1,7 +1,5 @@
 """Tests for agent/prompt_caching.py — Anthropic cache control injection."""
 
-import copy
-import pytest
 
 from agent.prompt_caching import (
     _apply_cache_marker,
diff --git a/tests/agent/test_rate_limit_tracker.py b/tests/agent/test_rate_limit_tracker.py
index caef785678b..63cdee2db91 100644
--- a/tests/agent/test_rate_limit_tracker.py
+++ b/tests/agent/test_rate_limit_tracker.py
@@ -189,14 +189,11 @@ class TestAgentIntegration:
 
     def test_capture_rate_limits_from_headers(self):
         """Simulate the header capture path without a real API call."""
-        import sys
-        import os
         # Use a mock httpx-like response
         class MockResponse:
             headers = NOUS_HEADERS
 
         # Import AIAgent minimally
-        from unittest.mock import MagicMock, patch
 
         # Test the parsing directly
         state = parse_rate_limit_headers(MockResponse.headers, provider="nous")
diff --git a/tests/agent/test_redact.py b/tests/agent/test_redact.py
index ea79ea9ce39..e4fa5e95043 100644
--- a/tests/agent/test_redact.py
+++ b/tests/agent/test_redact.py
@@ -1,7 +1,6 @@
 """Tests for agent.redact -- secret masking in logs and output."""
 
 import logging
-import os
 
 import pytest
 
@@ -378,127 +377,57 @@ class TestDiscordMentions:
         assert result.endswith(" said hello")
 
 
-class TestUrlQueryParamRedaction:
-    """URL query-string redaction (ported from nearai/ironclaw#2529).
-
-    Catches opaque tokens that don't match vendor prefix regexes by
-    matching on parameter NAME rather than value shape.
+class TestWebUrlsNotRedacted:
+    """Web URLs (http/https/wss) pass through unchanged — magic-link
+    checkouts, OAuth callbacks the agent is meant to follow, and pre-signed
+    share URLs must reach the tool intact. Known credential shapes inside
+    URLs (sk-, ghp_, JWTs) are still caught by the prefix and JWT regexes.
+    DB connection-string passwords are still caught by _DB_CONNSTR_RE.
     """
 
-    def test_oauth_callback_code(self):
+    def test_oauth_callback_code_passes_through(self):
         text = "GET https://api.example.com/oauth/cb?code=abc123xyz789&state=csrf_ok"
-        result = redact_sensitive_text(text)
-        assert "abc123xyz789" not in result
-        assert "code=***" in result
-        assert "state=csrf_ok" in result  # state is not sensitive
-
-    def test_access_token_query(self):
-        text = "Fetching https://example.com/api?access_token=opaque_value_here_1234&format=json"
-        result = redact_sensitive_text(text)
-        assert "opaque_value_here_1234" not in result
-        assert "access_token=***" in result
-        assert "format=json" in result
-
-    def test_refresh_token_query(self):
-        text = "https://auth.example.com/token?refresh_token=somerefresh&grant_type=refresh"
-        result = redact_sensitive_text(text)
-        assert "somerefresh" not in result
-        assert "grant_type=refresh" in result
-
-    def test_api_key_query(self):
-        text = "https://api.example.com/v1/data?api_key=kABCDEF12345&limit=10"
-        result = redact_sensitive_text(text)
-        assert "kABCDEF12345" not in result
-        assert "limit=10" in result
-
-    def test_presigned_signature(self):
-        text = "https://s3.amazonaws.com/bucket/k?signature=LONG_PRESIGNED_SIG&id=public"
-        result = redact_sensitive_text(text)
-        assert "LONG_PRESIGNED_SIG" not in result
-        assert "id=public" in result
-
-    def test_case_insensitive_param_names(self):
-        """Lowercase/mixed-case sensitive param names are redacted."""
-        # NOTE: All-caps names like TOKEN= are swallowed by _ENV_ASSIGN_RE
-        # (which matches KEY=value patterns greedily) before URL regex runs.
-        # This test uses lowercase names to isolate URL-query redaction.
-        text = "https://example.com?api_key=abcdef&secret=ghijkl"
-        result = redact_sensitive_text(text)
-        assert "abcdef" not in result
-        assert "ghijkl" not in result
-        assert "api_key=***" in result
-        assert "secret=***" in result
-
-    def test_substring_match_does_not_trigger(self):
-        """`token_count` and `session_id` must NOT match `token` / `session`."""
-        text = "https://example.com/cb?token_count=42&session_id=xyz&foo=bar"
-        result = redact_sensitive_text(text)
-        assert "token_count=42" in result
-        assert "session_id=xyz" in result
-
-    def test_url_without_query_unchanged(self):
-        text = "https://example.com/path/to/resource"
         assert redact_sensitive_text(text) == text
 
-    def test_url_with_fragment(self):
-        text = "https://example.com/page?token=xyz#section"
-        result = redact_sensitive_text(text)
-        assert "token=xyz" not in result
-        assert "#section" in result
+    def test_access_token_query_passes_through(self):
+        text = "Fetching https://example.com/api?access_token=opaque_value_here_1234&format=json"
+        assert redact_sensitive_text(text) == text
 
-    def test_websocket_url_query(self):
+    def test_magic_link_checkout_passes_through(self):
+        text = "Open https://checkout.example.com/resume?magic=ABCDEF123456&customer=42"
+        assert redact_sensitive_text(text) == text
+
+    def test_presigned_signature_passes_through(self):
+        text = "https://s3.amazonaws.com/bucket/k?signature=LONG_PRESIGNED_SIG&id=public"
+        assert redact_sensitive_text(text) == text
+
+    def test_https_userinfo_passes_through(self):
+        text = "URL: https://user:supersecretpw@host.example.com/path"
+        assert redact_sensitive_text(text) == text
+
+    def test_websocket_url_query_passes_through(self):
         text = "wss://api.example.com/ws?token=opaqueWsToken123"
-        result = redact_sensitive_text(text)
-        assert "opaqueWsToken123" not in result
+        assert redact_sensitive_text(text) == text
 
-    def test_http_access_log_relative_request_target_query(self):
+    def test_http_access_log_request_target_passes_through(self):
         text = (
             'INFO aiohttp.access: 127.0.0.1 "POST '
             '/bluebubbles-webhook?password=webhookSecret123&event=new-message '
             'HTTP/1.1" 200 173 "-" "test-client"'
         )
-        result = redact_sensitive_text(text)
-        assert "webhookSecret123" not in result
-        assert "password=***" in result
-        assert "event=new-message" in result
-
-    def test_http_access_log_absolute_request_target_query(self):
-        text = (
-            'INFO aiohttp.access: 127.0.0.1 "GET '
-            'https://example.com/callback?code=oauthCode123&state=csrf-ok '
-            'HTTP/1.1" 200 173 "-" "test-client"'
-        )
-        result = redact_sensitive_text(text)
-        assert "oauthCode123" not in result
-        assert "code=***" in result
-        assert "state=csrf-ok" in result
-
-
-class TestUrlUserinfoRedaction:
-    """URL userinfo (`scheme://user:pass@host`) for non-DB schemes."""
-
-    def test_https_userinfo(self):
-        text = "URL: https://user:supersecretpw@host.example.com/path"
-        result = redact_sensitive_text(text)
-        assert "supersecretpw" not in result
-        assert "https://user:***@host.example.com" in result
-
-    def test_http_userinfo(self):
-        text = "http://admin:plaintextpass@internal.example.com/api"
-        result = redact_sensitive_text(text)
-        assert "plaintextpass" not in result
-
-    def test_ftp_userinfo(self):
-        text = "ftp://user:ftppass@ftp.example.com/file.txt"
-        result = redact_sensitive_text(text)
-        assert "ftppass" not in result
-
-    def test_url_without_userinfo_unchanged(self):
-        text = "https://example.com/path"
         assert redact_sensitive_text(text) == text
 
-    def test_db_connstr_still_handled(self):
-        """DB schemes are handled by _DB_CONNSTR_RE, not _URL_USERINFO_RE."""
+    def test_known_prefix_inside_url_still_redacted(self):
+        """sk-/ghp_/JWT-shaped values inside a URL are still caught by
+        _PREFIX_RE / _JWT_RE — the carve-out is for opaque tokens only."""
+        text = "https://evil.com/steal?key=sk-" + "a" * 30
+        result = redact_sensitive_text(text)
+        assert "sk-" + "a" * 30 not in result
+
+    def test_db_connstr_password_still_redacted(self):
+        """DB schemes (postgres/mysql/mongodb/redis/amqp) keep their
+        userinfo redaction via _DB_CONNSTR_RE — connection strings are
+        not web URLs the agent navigates to."""
         text = "postgres://admin:dbpass@db.internal:5432/app"
         result = redact_sensitive_text(text)
         assert "dbpass" not in result
diff --git a/tests/agent/test_shell_hooks.py b/tests/agent/test_shell_hooks.py
index 743c9acb843..ce060f2f3c7 100644
--- a/tests/agent/test_shell_hooks.py
+++ b/tests/agent/test_shell_hooks.py
@@ -9,10 +9,7 @@ covered in ``test_shell_hooks_consent.py``.
 from __future__ import annotations
 
 import json
-import os
-import stat
 from pathlib import Path
-from typing import Any, Dict
 
 import pytest
 
diff --git a/tests/agent/test_shell_hooks_consent.py b/tests/agent/test_shell_hooks_consent.py
index 2154dc84b2c..b64e79df4c7 100644
--- a/tests/agent/test_shell_hooks_consent.py
+++ b/tests/agent/test_shell_hooks_consent.py
@@ -7,7 +7,6 @@ hooks_auto_accept: config key).
 
 from __future__ import annotations
 
-import json
 from pathlib import Path
 from unittest.mock import patch
 
diff --git a/tests/agent/test_skill_bundles.py b/tests/agent/test_skill_bundles.py
index fa9e42d43ec..96fe0a057f9 100644
--- a/tests/agent/test_skill_bundles.py
+++ b/tests/agent/test_skill_bundles.py
@@ -2,7 +2,6 @@
 
 import os
 from pathlib import Path
-from unittest.mock import patch
 
 import pytest
 
diff --git a/tests/agent/test_subagent_progress.py b/tests/agent/test_subagent_progress.py
index 953f26a69e8..761edf6ea73 100644
--- a/tests/agent/test_subagent_progress.py
+++ b/tests/agent/test_subagent_progress.py
@@ -10,10 +10,8 @@ Verifies that:
 
 import io
 import sys
-import time
-import threading
 import pytest
-from unittest.mock import MagicMock, patch
+from unittest.mock import MagicMock
 
 from agent.display import KawaiiSpinner
 from tools.delegate_tool import _build_child_progress_callback
diff --git a/tests/agent/test_subdirectory_hints.py b/tests/agent/test_subdirectory_hints.py
index cf445797cee..bc1f7f17f24 100644
--- a/tests/agent/test_subdirectory_hints.py
+++ b/tests/agent/test_subdirectory_hints.py
@@ -1,6 +1,5 @@
 """Tests for progressive subdirectory hint discovery."""
 
-import os
 import pytest
 from pathlib import Path
 from unittest.mock import patch
diff --git a/tests/agent/test_title_generator.py b/tests/agent/test_title_generator.py
index c498a71ab50..56286f6ecc9 100644
--- a/tests/agent/test_title_generator.py
+++ b/tests/agent/test_title_generator.py
@@ -1,9 +1,7 @@
 """Tests for agent.title_generator — auto-generated session titles."""
 
-import threading
 from unittest.mock import MagicMock, patch
 
-import pytest
 
 from agent.title_generator import (
     generate_title,
diff --git a/tests/agent/test_unsupported_temperature_retry.py b/tests/agent/test_unsupported_temperature_retry.py
index 82d8d3208d2..4d2ebb980e3 100644
--- a/tests/agent/test_unsupported_temperature_retry.py
+++ b/tests/agent/test_unsupported_temperature_retry.py
@@ -112,8 +112,13 @@ class TestCallLlmUnsupportedTemperatureRetry:
         retry_kwargs = client.chat.completions.create.call_args_list[1].kwargs
         assert first_kwargs["temperature"] == 0.3
         assert "temperature" not in retry_kwargs
-        # other kwargs preserved
-        assert retry_kwargs["max_tokens"] == 500
+        # max_tokens is intentionally omitted on OpenAI-compatible endpoints
+        # (#34530) — auxiliary calls let the model max out its own output — so
+        # it must be absent in BOTH the first and retry kwargs. Use a kwarg that
+        # actually survives (model) to prove the retry preserves the rest.
+        assert "max_tokens" not in first_kwargs
+        assert "max_tokens" not in retry_kwargs
+        assert retry_kwargs["model"] == first_kwargs["model"]
 
     def test_non_temperature_400_does_not_retry_as_temperature(self):
         """Unrelated 400s (e.g. bad tool role) must not silently drop temp."""
@@ -207,7 +212,11 @@ class TestAsyncCallLlmUnsupportedTemperatureRetry:
         retry_kwargs = client.chat.completions.create.call_args_list[1].kwargs
         assert first_kwargs["temperature"] == 0.3
         assert "temperature" not in retry_kwargs
-        assert retry_kwargs["max_tokens"] == 500
+        # max_tokens is intentionally omitted on OpenAI-compatible endpoints
+        # (#34530); assert it's absent and that model survives the retry.
+        assert "max_tokens" not in first_kwargs
+        assert "max_tokens" not in retry_kwargs
+        assert retry_kwargs["model"] == first_kwargs["model"]
 
     @pytest.mark.asyncio
     async def test_async_non_temperature_400_does_not_retry(self):
diff --git a/tests/agent/transports/test_bedrock_transport.py b/tests/agent/transports/test_bedrock_transport.py
index 7a5301d84fc..2f43daf988d 100644
--- a/tests/agent/transports/test_bedrock_transport.py
+++ b/tests/agent/transports/test_bedrock_transport.py
@@ -1,11 +1,10 @@
 """Tests for the BedrockTransport."""
 
-import json
 import pytest
 from types import SimpleNamespace
 
 from agent.transports import get_transport
-from agent.transports.types import NormalizedResponse, ToolCall
+from agent.transports.types import NormalizedResponse
 
 
 @pytest.fixture
diff --git a/tests/agent/transports/test_codex_app_server_session.py b/tests/agent/transports/test_codex_app_server_session.py
index d43a92a1eb9..a0ee59d616d 100644
--- a/tests/agent/transports/test_codex_app_server_session.py
+++ b/tests/agent/transports/test_codex_app_server_session.py
@@ -7,7 +7,6 @@ deadline timeouts. These tests pin all of that without spawning real codex.
 
 from __future__ import annotations
 
-import threading
 import time
 from unittest.mock import patch
 from typing import Any, Optional
@@ -17,7 +16,6 @@ import pytest
 import agent.transports.codex_app_server_session as session_mod
 from agent.transports.codex_app_server_session import (
     CodexAppServerSession,
-    TurnResult,
     _ServerRequestRouting,
     _approval_choice_to_codex_decision,
     _coerce_turn_input_text,
@@ -275,8 +273,9 @@ class TestRunTurn:
     def test_turn_start_failure_attaches_redacted_stderr_tail(self):
         """When codex stderr has content (non-OAuth), the tail gets attached
         to the user-facing error so config/provider problems are debuggable
-        instead of just 'Internal error'. Secrets in stderr are redacted
-        via agent.redact(force=True)."""
+        instead of just 'Internal error'. Credential-shaped values in stderr
+        are redacted via agent.redact(force=True); web-URL query params pass
+        through (see fix(redact): pass web URLs through unchanged)."""
         client = FakeClient()
         client.set_stderr_tail([
             "ERROR: provider auth failed",
@@ -299,9 +298,8 @@ class TestRunTurn:
         # Stderr tail attached
         assert "codex stderr" in r.error
         assert "provider auth failed" in r.error
-        # Secrets redacted
+        # Credential-shaped values still redacted (sk- prefix + Bearer header)
         assert "sk-live-deadbeefdeadbeef" not in r.error
-        assert "querysecret12345" not in r.error
         # Non-OAuth → should NOT retire (subprocess JSON-RPC is still healthy).
         assert r.should_retire is False
 
diff --git a/tests/agent/transports/test_codex_event_projector.py b/tests/agent/transports/test_codex_event_projector.py
index 04980f35c61..8da4d8e911f 100644
--- a/tests/agent/transports/test_codex_event_projector.py
+++ b/tests/agent/transports/test_codex_event_projector.py
@@ -11,7 +11,6 @@ import pytest
 
 from agent.transports.codex_event_projector import (
     CodexEventProjector,
-    ProjectionResult,
     _deterministic_call_id,
     _format_tool_args,
 )
diff --git a/tests/agent/transports/test_codex_transport.py b/tests/agent/transports/test_codex_transport.py
index 1309c979218..5d8aa6ba12b 100644
--- a/tests/agent/transports/test_codex_transport.py
+++ b/tests/agent/transports/test_codex_transport.py
@@ -5,7 +5,7 @@ import pytest
 from types import SimpleNamespace
 
 from agent.transports import get_transport
-from agent.transports.types import NormalizedResponse, ToolCall
+from agent.transports.types import NormalizedResponse
 
 
 @pytest.fixture
diff --git a/tests/agent/transports/test_hermes_tools_mcp_server.py b/tests/agent/transports/test_hermes_tools_mcp_server.py
index 3c11cb3f81d..c61e6c684ea 100644
--- a/tests/agent/transports/test_hermes_tools_mcp_server.py
+++ b/tests/agent/transports/test_hermes_tools_mcp_server.py
@@ -8,9 +8,7 @@ build helper assembles a server when the SDK is present.
 
 from __future__ import annotations
 
-from unittest.mock import patch
 
-import pytest
 
 
 class TestModuleSurface:
diff --git a/tests/agent/transports/test_transport.py b/tests/agent/transports/test_transport.py
index 67fb486fc9a..18b210b7c31 100644
--- a/tests/agent/transports/test_transport.py
+++ b/tests/agent/transports/test_transport.py
@@ -2,10 +2,9 @@
 
 import pytest
 from types import SimpleNamespace
-from unittest.mock import MagicMock
 
 from agent.transports.base import ProviderTransport
-from agent.transports.types import NormalizedResponse, ToolCall, Usage
+from agent.transports.types import NormalizedResponse
 from agent.transports import get_transport, register_transport, _REGISTRY
 
 
diff --git a/tests/agent/transports/test_types.py b/tests/agent/transports/test_types.py
index 2d576a8f83c..c52e77e330f 100644
--- a/tests/agent/transports/test_types.py
+++ b/tests/agent/transports/test_types.py
@@ -1,7 +1,6 @@
 """Tests for agent/transports/types.py — dataclass construction + helpers."""
 
 import json
-import pytest
 
 from agent.transports.types import (
     NormalizedResponse,
diff --git a/tests/cli/test_branch_command.py b/tests/cli/test_branch_command.py
index cf48384403f..8f8a70749b8 100644
--- a/tests/cli/test_branch_command.py
+++ b/tests/cli/test_branch_command.py
@@ -10,10 +10,8 @@ Verifies that:
 """
 
 import os
-import uuid
 from datetime import datetime
-from pathlib import Path
-from unittest.mock import MagicMock, patch, PropertyMock
+from unittest.mock import MagicMock
 
 import pytest
 
diff --git a/tests/cli/test_cli_background_tui_refresh.py b/tests/cli/test_cli_background_tui_refresh.py
index 924df1026ad..7f86568cc32 100644
--- a/tests/cli/test_cli_background_tui_refresh.py
+++ b/tests/cli/test_cli_background_tui_refresh.py
@@ -4,11 +4,8 @@ Ensures the TUI is properly refreshed before printing background task output
 to prevent spinner/status bar overlap (#2718).
 """
 
-import threading
-from types import SimpleNamespace
 from unittest.mock import MagicMock, patch
 
-import pytest
 
 from cli import HermesCLI
 
diff --git a/tests/cli/test_cli_file_drop.py b/tests/cli/test_cli_file_drop.py
index a7a8c42e2da..4109ade9f3d 100644
--- a/tests/cli/test_cli_file_drop.py
+++ b/tests/cli/test_cli_file_drop.py
@@ -1,9 +1,6 @@
 """Tests for _detect_file_drop — file path detection that prevents
 dragged/pasted absolute paths from being mistaken for slash commands."""
 
-import os
-import tempfile
-from pathlib import Path
 
 import pytest
 
diff --git a/tests/cli/test_cli_goal_interrupt.py b/tests/cli/test_cli_goal_interrupt.py
index 851b87e856b..0ef04149038 100644
--- a/tests/cli/test_cli_goal_interrupt.py
+++ b/tests/cli/test_cli_goal_interrupt.py
@@ -12,7 +12,6 @@ minimal ``HermesCLI`` stub (pattern used elsewhere in tests/cli).
 from __future__ import annotations
 
 import queue
-import sys
 import uuid
 from pathlib import Path
 from unittest.mock import MagicMock, patch
diff --git a/tests/cli/test_cli_init.py b/tests/cli/test_cli_init.py
index 67004384ae7..105ec31f5b6 100644
--- a/tests/cli/test_cli_init.py
+++ b/tests/cli/test_cli_init.py
@@ -180,9 +180,9 @@ class TestPromptToolkitTerminalCompatibility:
     def test_lf_enter_binds_to_submit_handler_posix(self):
         """Some thin PTYs deliver Enter as LF/c-j instead of CR/enter.
 
-        On a bare local POSIX TTY (no SSH/WSL/WT) we keep c-j → submit so
+        On a bare local POSIX TTY (no SSH/WSL/WT/Ghostty) we keep c-j → submit so
         Enter works on thin PTYs (docker exec, certain ssh configurations).
-        On Windows, WSL, SSH sessions, and Windows Terminal we leave c-j
+        On Windows, WSL, SSH sessions, Windows Terminal, and Ghostty we leave c-j
         unbound here so it can be used as the Ctrl+Enter newline keystroke
         without conflicting with submit. See issue #22379.
         """
@@ -217,6 +217,17 @@ class TestPromptToolkitTerminalCompatibility:
             assert bindings[("c-m",)] is submit_handler
             assert ("c-j",) not in bindings
 
+        # Ghostty through tmux: TERM_PROGRAM is tmux, but Ghostty exports a
+        # stable env marker. Keep c-j free so Ctrl+J inserts a newline.
+        with _patch.object(_sys, "platform", "linux"), \
+             _patch.dict(_os.environ, {"TERM": "tmux-256color", "TERM_PROGRAM": "tmux", "GHOSTTY_RESOURCES_DIR": "/usr/share/ghostty"}, clear=True), \
+             _patch("builtins.open", side_effect=OSError("no /proc")):
+            kb = KeyBindings()
+            _bind_prompt_submit_keys(kb, submit_handler)
+            bindings = {tuple(key.value for key in binding.keys): binding.handler for binding in kb.bindings}
+            assert bindings[("c-m",)] is submit_handler
+            assert ("c-j",) not in bindings
+
         # Windows: only enter submits; c-j is free for the newline binding
         # added separately in the prompt setup.
         with _patch.object(_sys, "platform", "win32"):
diff --git a/tests/cli/test_cli_interrupt_subagent.py b/tests/cli/test_cli_interrupt_subagent.py
index 6821a6725d4..5b732425c8a 100644
--- a/tests/cli/test_cli_interrupt_subagent.py
+++ b/tests/cli/test_cli_interrupt_subagent.py
@@ -10,15 +10,12 @@ This tests the COMPLETE path including _run_single_child, _active_children
 registration, interrupt propagation, and child detection.
 """
 
-import json
-import os
-import queue
 import threading
 import time
 import unittest
-from unittest.mock import MagicMock, patch, PropertyMock
+from unittest.mock import MagicMock, patch
 
-from tools.interrupt import set_interrupt, is_interrupted
+from tools.interrupt import set_interrupt
 
 
 class TestCLISubagentInterrupt(unittest.TestCase):
diff --git a/tests/cli/test_cli_light_mode.py b/tests/cli/test_cli_light_mode.py
index bc5ca5128e0..c1df160e6b1 100644
--- a/tests/cli/test_cli_light_mode.py
+++ b/tests/cli/test_cli_light_mode.py
@@ -8,7 +8,6 @@ before the terminal query, which is the path most users hit.
 
 from __future__ import annotations
 
-import importlib
 
 import pytest
 
diff --git a/tests/cli/test_cli_provider_resolution.py b/tests/cli/test_cli_provider_resolution.py
index e71226da53f..a25d903f687 100644
--- a/tests/cli/test_cli_provider_resolution.py
+++ b/tests/cli/test_cli_provider_resolution.py
@@ -271,7 +271,10 @@ def test_codex_provider_replaces_incompatible_default_model(monkeypatch):
 
 
 def test_model_flow_nous_prints_subscription_guidance_without_mutating_explicit_tts(monkeypatch, capsys):
-    monkeypatch.setattr("hermes_cli.nous_subscription.managed_nous_tools_enabled", lambda: True)
+    monkeypatch.setattr(
+        "hermes_cli.nous_subscription.managed_nous_tools_enabled",
+        lambda *args, **kwargs: True,
+    )
     config = {
         "model": {"provider": "nous", "default": "claude-opus-4-6"},
         "tts": {"provider": "elevenlabs"},
@@ -306,7 +309,10 @@ def test_model_flow_nous_prints_subscription_guidance_without_mutating_explicit_
 
 
 def test_model_flow_nous_offers_tool_gateway_prompt_when_unconfigured(monkeypatch, capsys):
-    monkeypatch.setattr("hermes_cli.nous_subscription.managed_nous_tools_enabled", lambda: True)
+    monkeypatch.setattr(
+        "hermes_cli.nous_subscription.managed_nous_tools_enabled",
+        lambda *args, **kwargs: True,
+    )
     config = {
         "model": {"provider": "nous", "default": "claude-opus-4-6"},
         "tts": {"provider": "edge"},
diff --git a/tests/cli/test_cli_resume_command.py b/tests/cli/test_cli_resume_command.py
index 6368d973c88..eb691ab006a 100644
--- a/tests/cli/test_cli_resume_command.py
+++ b/tests/cli/test_cli_resume_command.py
@@ -11,6 +11,7 @@ def _make_cli():
     cli_obj.conversation_history = []
     cli_obj.agent = None
     cli_obj._session_db = MagicMock()
+    cli_obj._pending_resume_sessions = None
     # _handle_resume_command now triggers _display_resumed_history (#31695),
     # which reads self.resume_display. "minimal" short-circuits the recap so
     # the test only exercises session-switch behavior.
@@ -116,3 +117,107 @@ class TestCliResumeCommand:
 
         printed = " ".join(str(call) for call in mock_cprint.call_args_list)
         assert "<half" in printed
+
+
+class TestPendingResumeNumberedSelection:
+    """Bare `/resume` arms a one-shot prompt so the next bare number resumes.
+
+    Regression coverage for #34584: previously, running `/resume` (no args)
+    printed the recent-sessions list but left no selection state armed, so
+    typing just `3` on the next line was sent to the agent as chat instead of
+    resuming session #3.
+    """
+
+    def test_bare_resume_arms_pending_selection(self):
+        cli_obj = _make_cli()
+        sessions = [
+            {"id": "sess_002", "title": "Coding"},
+            {"id": "sess_001", "title": "Research"},
+        ]
+        cli_obj._list_recent_sessions = MagicMock(return_value=sessions)
+        cli_obj._show_recent_sessions = MagicMock(return_value=True)
+
+        with patch("cli._cprint"):
+            cli_obj._handle_resume_command("/resume")
+
+        assert cli_obj._pending_resume_sessions == sessions
+
+    def test_bare_resume_no_sessions_does_not_arm(self):
+        cli_obj = _make_cli()
+        cli_obj._show_recent_sessions = MagicMock(return_value=False)
+        cli_obj._list_recent_sessions = MagicMock(return_value=[])
+
+        with patch("cli._cprint"):
+            cli_obj._handle_resume_command("/resume")
+
+        assert cli_obj._pending_resume_sessions is None
+
+    def test_pending_number_resumes_selected_session(self):
+        cli_obj = _make_cli()
+        sessions = [
+            {"id": "sess_002", "title": "Coding"},
+            {"id": "sess_001", "title": "Research"},
+        ]
+        cli_obj._pending_resume_sessions = sessions
+        # _handle_resume_command("/resume 2") re-resolves the index via
+        # _list_recent_sessions, so it must return the same list.
+        cli_obj._list_recent_sessions = MagicMock(return_value=sessions)
+        cli_obj._session_db.get_session.return_value = {"id": "sess_001", "title": "Research"}
+        cli_obj._session_db.get_messages_as_conversation.return_value = [
+            {"role": "user", "content": "hello"},
+        ]
+        cli_obj._session_db.resolve_resume_session_id.return_value = "sess_001"
+
+        with (
+            patch("hermes_cli.main._resolve_session_by_name_or_id", return_value=None),
+            patch("cli._cprint"),
+        ):
+            consumed = cli_obj._consume_pending_resume_selection("2")
+
+        assert consumed is True
+        assert cli_obj.session_id == "sess_001"
+        # One-shot: prompt is disarmed after consuming.
+        assert cli_obj._pending_resume_sessions is None
+
+    def test_pending_out_of_range_consumed_with_message(self):
+        cli_obj = _make_cli()
+        cli_obj._pending_resume_sessions = [{"id": "sess_002", "title": "Coding"}]
+
+        with patch("cli._cprint") as mock_cprint:
+            consumed = cli_obj._consume_pending_resume_selection("9")
+
+        printed = " ".join(str(call) for call in mock_cprint.call_args_list)
+        # An out-of-range number is still consumed (not sent to the agent),
+        # and the prompt is disarmed.
+        assert consumed is True
+        assert "out of range" in printed.lower()
+        assert cli_obj.session_id == "current_session"
+        assert cli_obj._pending_resume_sessions is None
+
+    def test_pending_non_numeric_falls_through_and_disarms(self):
+        cli_obj = _make_cli()
+        cli_obj._pending_resume_sessions = [{"id": "sess_002", "title": "Coding"}]
+
+        with patch("cli._cprint"):
+            consumed = cli_obj._consume_pending_resume_selection("hello there")
+
+        # Free text is NOT consumed (caller treats it as chat), but the
+        # one-shot prompt is disarmed so a later number isn't hijacked.
+        assert consumed is False
+        assert cli_obj._pending_resume_sessions is None
+
+    def test_no_pending_returns_false(self):
+        cli_obj = _make_cli()
+        assert cli_obj._pending_resume_sessions is None
+        assert cli_obj._consume_pending_resume_selection("3") is False
+
+    def test_pending_disarmed_by_other_command(self):
+        cli_obj = _make_cli()
+        cli_obj._pending_resume_sessions = [{"id": "sess_002", "title": "Coding"}]
+        # Stub out the help handler so process_command("/help") is cheap.
+        cli_obj.show_help = MagicMock()
+
+        cli_obj.process_command("/help")
+
+        # A non-resume command disarms the one-shot prompt (#34584).
+        assert cli_obj._pending_resume_sessions is None
diff --git a/tests/cli/test_cli_status_bar.py b/tests/cli/test_cli_status_bar.py
index 47bd68aa25d..f62287f622b 100644
--- a/tests/cli/test_cli_status_bar.py
+++ b/tests/cli/test_cli_status_bar.py
@@ -568,7 +568,6 @@ class TestStatusBarWidthSource:
     """Ensure status bar fragments don't overflow the terminal width."""
 
     def _make_wide_cli(self):
-        from datetime import datetime, timedelta
         cli_obj = _attach_agent(
             _make_cli(),
             prompt_tokens=100_000,
diff --git a/tests/cli/test_cli_terminal_shortcuts.py b/tests/cli/test_cli_terminal_shortcuts.py
new file mode 100644
index 00000000000..3b91ce61073
--- /dev/null
+++ b/tests/cli/test_cli_terminal_shortcuts.py
@@ -0,0 +1,49 @@
+"""Regression tests for terminal navigation/focus escape sequences.
+
+Ghostty/macOS window and tab navigation can deliver terminal focus reports
+(CSI I / CSI O) to the running TUI. These must be consumed by the input parser,
+not inserted into the prompt buffer and cleaned up later.
+"""
+
+from prompt_toolkit.input.vt100_parser import Vt100Parser
+from prompt_toolkit.keys import Keys
+
+from hermes_cli.pt_input_extras import install_ignored_terminal_sequences
+
+
+def _parse_keys(data: str):
+    events = []
+    parser = Vt100Parser(events.append)
+    parser.feed_and_flush(data)
+    return [(event.key, event.data) for event in events]
+
+
+def test_focus_events_are_parser_level_ignored_before_prompt_buffer():
+    install_ignored_terminal_sequences()
+
+    assert _parse_keys("\x1b[O\x1b[Ihello") == [
+        (Keys.Ignore, "\x1b[O"),
+        (Keys.Ignore, "\x1b[I"),
+        ("h", "h"),
+        ("e", "e"),
+        ("l", "l"),
+        ("l", "l"),
+        ("o", "o"),
+    ]
+
+
+def test_regular_escape_shortcuts_still_parse_normally():
+    install_ignored_terminal_sequences()
+
+    assert _parse_keys("\x1bg") == [(Keys.Escape, "\x1b"), ("g", "g")]
+
+
+def test_install_is_idempotent_and_setdefault_safe():
+    """Second call should return 0 (no new mappings); existing user
+    registrations must not be overwritten."""
+    first = install_ignored_terminal_sequences()
+    second = install_ignored_terminal_sequences()
+    # At most first should be 2 (both CSI I + CSI O), second always 0
+    # since the entries are now present.
+    assert second == 0
+    assert first in (0, 1, 2)  # 0 if a prior test in same process already installed
diff --git a/tests/cli/test_cli_tools_command.py b/tests/cli/test_cli_tools_command.py
index 2f0b096d2e6..15cfce105b6 100644
--- a/tests/cli/test_cli_tools_command.py
+++ b/tests/cli/test_cli_tools_command.py
@@ -1,6 +1,6 @@
 """Tests for /tools slash command handler in the interactive CLI."""
 
-from unittest.mock import MagicMock, patch, call
+from unittest.mock import MagicMock, patch
 
 from cli import HermesCLI
 
diff --git a/tests/cli/test_cli_yolo_toggle.py b/tests/cli/test_cli_yolo_toggle.py
new file mode 100644
index 00000000000..55ee4882ee6
--- /dev/null
+++ b/tests/cli/test_cli_yolo_toggle.py
@@ -0,0 +1,244 @@
+"""Regression tests for the CLI ``/yolo`` in-chat toggle.
+
+Pre-fix bug (issue #33925): ``cli.HermesCLI._toggle_yolo`` mutated only
+``os.environ["HERMES_YOLO_MODE"]``. That env var is captured once at
+module-import time into ``tools.approval._YOLO_MODE_FROZEN`` (security
+hardening: stops prompt-injected skills from flipping the bypass mid-run),
+so the post-startup toggle was a silent no-op. ``/yolo`` advertised "YOLO ON"
+in the status bar while every dangerous command still hit the approval
+prompt. Only ``hermes --yolo`` (process-start env), ``HERMES_YOLO_MODE=1``,
+and ``hermes config set approvals.mode off`` actually bypassed.
+
+The fix routes the CLI toggle through ``enable_session_yolo`` /
+``disable_session_yolo`` (matching the gateway and TUI ``/yolo`` paths) and
+binds ``self.session_id`` as the active approval session key around each
+``run_conversation`` call so ``is_current_session_yolo_enabled()`` resolves
+against the same key the toggle writes under.
+
+We test ``_toggle_yolo`` and ``_is_session_yolo_active`` as unbound methods
+against a minimal stand-in object that exposes only the attribute they
+read (``session_id``). This avoids the heavy ``HermesCLI`` construction
+path used in ``test_cli_init.py``, which is incompatible with this test
+file's path layout — ``HermesCLI.__init__`` imports a lot of optional
+state we don't need here.
+"""
+
+import os
+from types import SimpleNamespace
+from unittest.mock import patch
+
+import pytest
+
+import tools.approval as approval_module
+from cli import HermesCLI
+
+
+SESSION_KEY = "test-cli-yolo-session"
+
+
+@pytest.fixture(autouse=True)
+def _clear_approval_state(monkeypatch):
+    """Clear the YOLO bypass + env var around every test so cases are independent."""
+    monkeypatch.delenv("HERMES_YOLO_MODE", raising=False)
+    approval_module.clear_session(SESSION_KEY)
+    approval_module.clear_session("default")
+    yield
+    approval_module.clear_session(SESSION_KEY)
+    approval_module.clear_session("default")
+
+
+def _make_stand_in(session_id: str = SESSION_KEY) -> SimpleNamespace:
+    """Minimal stand-in exposing only ``session_id``.
+
+    ``_toggle_yolo`` and ``_is_session_yolo_active`` are both pure methods
+    that only read ``self.session_id`` — no other CLI state is touched.
+    Calling them as unbound functions against this stand-in is equivalent
+    to invoking them on a fully-constructed ``HermesCLI`` for the
+    behaviour under test, and avoids the brittle prompt_toolkit / config
+    stubbing required to instantiate ``HermesCLI`` from this test file.
+    """
+    return SimpleNamespace(session_id=session_id)
+
+
+class TestToggleYoloIsSessionScoped:
+    """The CLI /yolo handler must mutate the session-yolo set, not the env var.
+
+    The env var path is dead-on-arrival because ``_YOLO_MODE_FROZEN`` is
+    captured once at module import, long before the CLI's ``/yolo`` command
+    can run.
+    """
+
+    def test_toggle_yolo_enables_session_bypass(self):
+        stand_in = _make_stand_in()
+
+        assert approval_module.is_session_yolo_enabled(SESSION_KEY) is False
+
+        with patch("cli._cprint"):
+            HermesCLI._toggle_yolo(stand_in)
+
+        assert approval_module.is_session_yolo_enabled(SESSION_KEY) is True
+
+    def test_toggle_yolo_disables_session_bypass_on_second_call(self):
+        stand_in = _make_stand_in()
+        with patch("cli._cprint"):
+            HermesCLI._toggle_yolo(stand_in)  # ON
+            assert approval_module.is_session_yolo_enabled(SESSION_KEY) is True
+            HermesCLI._toggle_yolo(stand_in)  # OFF
+            assert approval_module.is_session_yolo_enabled(SESSION_KEY) is False
+
+    def test_toggle_yolo_does_not_mutate_env_var(self):
+        """Toggling /yolo must not write ``HERMES_YOLO_MODE`` — that path is
+        frozen at import time and would mislead anyone reading the env later
+        (subprocesses, status bars wired to the env, the relaunch flag list)."""
+        stand_in = _make_stand_in()
+        with patch("cli._cprint"):
+            HermesCLI._toggle_yolo(stand_in)
+
+        assert os.environ.get("HERMES_YOLO_MODE") is None
+
+    def test_toggle_yolo_falls_back_to_default_when_session_id_missing(self):
+        """An edge case during CLI bootstrap: a ``/yolo`` triggered before the
+        session id is set should not blow up, and should land under the
+        ``default`` session key so the bypass still takes effect for any code
+        that resolves against the default key."""
+        stand_in = _make_stand_in(session_id="")
+        with patch("cli._cprint"):
+            HermesCLI._toggle_yolo(stand_in)
+
+        assert approval_module.is_session_yolo_enabled("default") is True
+
+    def test_two_independent_sessions_are_isolated(self):
+        """``/yolo`` toggled in one session must not bypass approvals in
+        another session — mirrors the gateway-side invariant."""
+        cli_a = _make_stand_in(session_id="session-yolo-a")
+        cli_b = _make_stand_in(session_id="session-yolo-b")
+
+        try:
+            with patch("cli._cprint"):
+                HermesCLI._toggle_yolo(cli_a)
+
+            assert approval_module.is_session_yolo_enabled("session-yolo-a") is True
+            assert approval_module.is_session_yolo_enabled("session-yolo-b") is False
+        finally:
+            approval_module.clear_session("session-yolo-a")
+            approval_module.clear_session("session-yolo-b")
+
+
+class TestIsSessionYoloActiveHelper:
+    """The status-bar helper must read the live session-yolo state, not the
+    env var (which is the bug class this PR fixes)."""
+
+    def test_helper_reflects_toggle(self):
+        stand_in = _make_stand_in()
+
+        assert HermesCLI._is_session_yolo_active(stand_in) is False
+
+        with patch("cli._cprint"):
+            HermesCLI._toggle_yolo(stand_in)
+
+        assert HermesCLI._is_session_yolo_active(stand_in) is True
+
+        with patch("cli._cprint"):
+            HermesCLI._toggle_yolo(stand_in)
+
+        assert HermesCLI._is_session_yolo_active(stand_in) is False
+
+    def test_helper_honors_frozen_yolo_mode(self):
+        """``hermes --yolo`` sets ``HERMES_YOLO_MODE`` before tool imports, so
+        ``_YOLO_MODE_FROZEN`` ends up True. The status bar should still
+        reflect YOLO on in that case even when the session toggle is off."""
+        stand_in = _make_stand_in()
+
+        with patch.object(approval_module, "_YOLO_MODE_FROZEN", True):
+            assert HermesCLI._is_session_yolo_active(stand_in) is True
+
+
+class TestToggleYoloEndToEnd:
+    """End-to-end: a dangerous command must auto-approve through the same
+    ``check_all_command_guards`` path the terminal tool uses."""
+
+    def test_toggle_yolo_bypasses_dangerous_command_check(self):
+        stand_in = _make_stand_in()
+
+        token = approval_module.set_current_session_key(SESSION_KEY)
+        try:
+            with patch("cli._cprint"):
+                HermesCLI._toggle_yolo(stand_in)  # YOLO ON
+
+            result = approval_module.check_all_command_guards(
+                "rm -rf /tmp/scratch-xyzzy", "local",
+            )
+            assert result["approved"] is True, (
+                f"YOLO toggle should auto-approve dangerous commands, got: {result}"
+            )
+        finally:
+            approval_module.reset_current_session_key(token)
+
+
+class TestIsSessionYoloActiveAttrSafety:
+    """The status-bar helper runs against partially-constructed CLI fixtures
+    (tests use ``HermesCLI.__new__(HermesCLI)`` to skip ``__init__``). It must
+    not raise ``AttributeError`` when ``session_id`` is absent — the
+    status-bar builders swallow exceptions silently and lose every field
+    after the failure, producing a regression that's hard to track back to
+    the helper."""
+
+    def test_helper_survives_missing_session_id_attr(self):
+        # SimpleNamespace WITHOUT session_id mimics __new__-built fixtures.
+        from types import SimpleNamespace
+        no_attr = SimpleNamespace()
+        # Must return False, not raise.
+        assert HermesCLI._is_session_yolo_active(no_attr) is False
+
+
+class TestSessionRotationTransfersYolo:
+    """When the CLI's ``session_id`` rotates mid-run (``/branch``, auto
+    compression continuation), YOLO state keyed under the old id must move
+    to the new id. Otherwise the user's ``/yolo ON`` silently reverts on
+    the next turn — the same UX failure mode this PR set out to fix.
+    Mirrors ``tui_gateway/server.py`` ~line 1297-1305."""
+
+    def test_transfer_moves_yolo_to_new_session(self):
+        stand_in = _make_stand_in(session_id="old-id")
+        try:
+            approval_module.enable_session_yolo("old-id")
+            assert approval_module.is_session_yolo_enabled("old-id") is True
+
+            HermesCLI._transfer_session_yolo(stand_in, "old-id", "new-id")
+
+            assert approval_module.is_session_yolo_enabled("new-id") is True
+            assert approval_module.is_session_yolo_enabled("old-id") is False
+        finally:
+            approval_module.clear_session("old-id")
+            approval_module.clear_session("new-id")
+
+    def test_transfer_is_noop_when_yolo_was_off(self):
+        stand_in = _make_stand_in(session_id="old-id")
+        try:
+            HermesCLI._transfer_session_yolo(stand_in, "old-id", "new-id")
+            assert approval_module.is_session_yolo_enabled("new-id") is False
+            assert approval_module.is_session_yolo_enabled("old-id") is False
+        finally:
+            approval_module.clear_session("old-id")
+            approval_module.clear_session("new-id")
+
+    def test_transfer_is_noop_when_ids_match(self):
+        stand_in = _make_stand_in(session_id="same-id")
+        try:
+            approval_module.enable_session_yolo("same-id")
+            HermesCLI._transfer_session_yolo(stand_in, "same-id", "same-id")
+            # Must NOT have been disabled — same-id == same-id is a no-op,
+            # not a "disable then re-enable" round-trip.
+            assert approval_module.is_session_yolo_enabled("same-id") is True
+        finally:
+            approval_module.clear_session("same-id")
+
+    def test_transfer_handles_empty_inputs_safely(self):
+        stand_in = _make_stand_in(session_id="x")
+        # Both directions of empty input should be safe no-ops; nothing
+        # to transfer from "" / to "".
+        HermesCLI._transfer_session_yolo(stand_in, "", "new")
+        HermesCLI._transfer_session_yolo(stand_in, "old", "")
+        # Neither key should have been touched.
+        assert approval_module.is_session_yolo_enabled("new") is False
+        assert approval_module.is_session_yolo_enabled("old") is False
diff --git a/tests/cli/test_compress_here.py b/tests/cli/test_compress_here.py
new file mode 100644
index 00000000000..115a12539e5
--- /dev/null
+++ b/tests/cli/test_compress_here.py
@@ -0,0 +1,119 @@
+"""Tests for /compress here [N] — boundary-aware partial compression.
+
+Verifies the CLI handler (_manual_compress) splits the history, compresses
+only the head, and re-appends the verbatim tail. Inspired by Claude Code's
+Rewind "Summarize up to here" action (v2.1.139, May 2026).
+"""
+
+from unittest.mock import MagicMock, patch
+
+from tests.cli.test_cli_init import _make_cli
+
+
+def _make_history() -> list[dict[str, str]]:
+    # 8 messages = 4 exchanges.
+    h: list[dict[str, str]] = []
+    for i in range(4):
+        h.append({"role": "user", "content": f"u{i}"})
+        h.append({"role": "assistant", "content": f"a{i}"})
+    return h
+
+
+def _wire_agent(shell, compressed_head):
+    shell.agent = MagicMock()
+    shell.agent.compression_enabled = True
+    shell.agent._cached_system_prompt = ""
+    shell.agent.session_id = None
+    shell.agent.tools = None
+    shell.agent._compress_context.return_value = (compressed_head, "")
+
+
+def test_compress_here_compresses_head_only(capsys):
+    """/compress here 2 passes only the head to _compress_context."""
+    shell = _make_cli()
+    history = _make_history()
+    shell.conversation_history = history
+    # Pretend compression collapses the head into a single summary message.
+    summary = [{"role": "user", "content": "[summary of earlier turns]"}]
+    _wire_agent(shell, summary)
+
+    with patch("agent.model_metadata.estimate_request_tokens_rough", return_value=100):
+        shell._manual_compress("/compress here 2")
+
+    # _compress_context should have been called with the HEAD only
+    # (everything before the last 2 user-starts = first 4 messages).
+    shell.agent._compress_context.assert_called_once()
+    call = shell.agent._compress_context.call_args
+    passed_head = call.args[0]
+    assert passed_head == history[:4]
+    # focus_topic must be None in partial mode (modes are exclusive).
+    assert call.kwargs.get("focus_topic") is None
+
+
+def test_compress_here_reappends_verbatim_tail(capsys):
+    """The most recent exchanges are preserved verbatim after the summary."""
+    shell = _make_cli()
+    history = _make_history()
+    shell.conversation_history = history
+    # Head compresses to an assistant-role summary so the seam
+    # (assistant -> user tail) is already valid — tail rides along whole.
+    summary = [{"role": "assistant", "content": "[summary]"}]
+    _wire_agent(shell, summary)
+
+    with patch("agent.model_metadata.estimate_request_tokens_rough", return_value=100):
+        shell._manual_compress("/compress here 2")
+
+    # Result = compressed head + verbatim tail (last 2 exchanges).
+    assert shell.conversation_history == summary + history[4:]
+    # Tail boundary keeps role alternation valid (tail starts on user).
+    assert history[4]["role"] == "user"
+    # No consecutive same-role user/assistant messages anywhere.
+    roles = [m["role"] for m in shell.conversation_history
+             if m["role"] in ("user", "assistant")]
+    assert all(roles[i] != roles[i + 1] for i in range(len(roles) - 1))
+
+
+def test_compress_here_banner_mentions_summarizing_up_to_here(capsys):
+    shell = _make_cli()
+    history = _make_history()
+    shell.conversation_history = history
+    _wire_agent(shell, [{"role": "user", "content": "[summary]"}])
+
+    with patch("agent.model_metadata.estimate_request_tokens_rough", return_value=100):
+        shell._manual_compress("/compress here")
+
+    out = capsys.readouterr().out
+    assert "Summarizing up to here" in out
+    assert "verbatim" in out
+
+
+def test_bare_compress_still_full(capsys):
+    """/compress with no args compresses the whole history (full mode)."""
+    shell = _make_cli()
+    history = _make_history()
+    shell.conversation_history = history
+    _wire_agent(shell, list(history))
+
+    with patch("agent.model_metadata.estimate_request_tokens_rough", return_value=100):
+        shell._manual_compress("/compress")
+
+    call = shell.agent._compress_context.call_args
+    # Full mode passes the entire history as the head.
+    assert call.args[0] == history
+    out = capsys.readouterr().out
+    assert "Summarizing up to here" not in out
+
+
+def test_focus_still_works(capsys):
+    """/compress <focus> keeps the existing focus behavior."""
+    shell = _make_cli()
+    history = _make_history()
+    shell.conversation_history = history
+    _wire_agent(shell, list(history))
+
+    with patch("agent.model_metadata.estimate_request_tokens_rough", return_value=100):
+        shell._manual_compress("/compress database schema")
+
+    call = shell.agent._compress_context.call_args
+    assert call.args[0] == history
+    assert call.kwargs.get("focus_topic") == "database schema"
diff --git a/tests/cli/test_ctrl_enter_newline.py b/tests/cli/test_ctrl_enter_newline.py
index 57056ab0e18..58cdd7c26eb 100644
--- a/tests/cli/test_ctrl_enter_newline.py
+++ b/tests/cli/test_ctrl_enter_newline.py
@@ -51,8 +51,20 @@ def test_windows_terminal_session_preserves_newline():
             assert cli_mod._preserve_ctrl_enter_newline() is True
 
 
+def test_ghostty_tmux_session_preserves_ctrl_j_newline():
+    """Ghostty-inherited env survives tmux even when TERM_PROGRAM becomes tmux."""
+    import cli as cli_mod
+    with patch.object(sys, "platform", "linux"):
+        with patch.dict(
+            os.environ,
+            {"TERM": "tmux-256color", "TERM_PROGRAM": "tmux", "GHOSTTY_RESOURCES_DIR": "/usr/share/ghostty"},
+            clear=True,
+        ):
+            assert cli_mod._preserve_ctrl_enter_newline() is True
+
+
 def test_pure_local_linux_does_not_preserve():
-    """A bare local Linux TTY (no SSH/WSL/WT) keeps c-j → submit so docker exec
+    """A bare local Linux TTY (no SSH/WSL/WT/Ghostty) keeps c-j → submit so docker exec
     style Enter-as-LF stays usable."""
     import cli as cli_mod
     # Stub out /proc reads — those are the WSL fallback signal.
diff --git a/tests/cli/test_cwd_env_respect.py b/tests/cli/test_cwd_env_respect.py
index 04e62cc12f8..49f0e603964 100644
--- a/tests/cli/test_cwd_env_respect.py
+++ b/tests/cli/test_cwd_env_respect.py
@@ -6,8 +6,6 @@ Rules:
 - Non-local with explicit path: keep as-is.
 """
 
-import os
-import pytest
 
 _CWD_PLACEHOLDERS = (".", "auto", "cwd")
 
diff --git a/tests/cli/test_partial_compress.py b/tests/cli/test_partial_compress.py
new file mode 100644
index 00000000000..a6cc30ff367
--- /dev/null
+++ b/tests/cli/test_partial_compress.py
@@ -0,0 +1,198 @@
+"""Tests for hermes_cli.partial_compress — the pure split/parse helpers
+behind ``/compress here [N]`` (boundary-aware "summarize up to here").
+
+Inspired by Claude Code's Rewind "Summarize up to here" action.
+"""
+
+from hermes_cli.partial_compress import (
+    DEFAULT_KEEP_LAST,
+    MAX_KEEP_LAST,
+    parse_partial_compress_args,
+    rejoin_compressed_head_and_tail,
+    split_history_for_partial_compress,
+)
+
+
+def _history(n_pairs: int) -> list[dict[str, str]]:
+    """Build n_pairs of (user, assistant) exchanges."""
+    h: list[dict[str, str]] = []
+    for i in range(n_pairs):
+        h.append({"role": "user", "content": f"u{i}"})
+        h.append({"role": "assistant", "content": f"a{i}"})
+    return h
+
+
+# ── parse_partial_compress_args ──────────────────────────────────────
+
+
+def test_empty_args_is_full_compress():
+    partial, keep, focus = parse_partial_compress_args("")
+    assert partial is False
+    assert keep == DEFAULT_KEEP_LAST
+    assert focus is None
+
+
+def test_here_defaults_keep_last():
+    partial, keep, focus = parse_partial_compress_args("here")
+    assert partial is True
+    assert keep == DEFAULT_KEEP_LAST
+    assert focus is None
+
+
+def test_here_with_count():
+    partial, keep, focus = parse_partial_compress_args("here 4")
+    assert partial is True
+    assert keep == 4
+    assert focus is None
+
+
+def test_up_to_here_alias():
+    partial, keep, focus = parse_partial_compress_args("up to here 3")
+    assert partial is True
+    assert keep == 3
+    assert focus is None
+
+
+def test_keep_flag_forms():
+    for arg in ("--keep 5", "-k 5", "--keep=5"):
+        partial, keep, focus = parse_partial_compress_args(arg)
+        assert partial is True, arg
+        assert keep == 5, arg
+        assert focus is None, arg
+
+
+def test_focus_topic_when_not_boundary_form():
+    partial, keep, focus = parse_partial_compress_args("database schema")
+    assert partial is False
+    assert focus == "database schema"
+
+
+def test_here_count_clamped_low_and_high():
+    _, keep_low, _ = parse_partial_compress_args("here 0")
+    assert keep_low == 1
+    _, keep_high, _ = parse_partial_compress_args(f"here {MAX_KEEP_LAST + 50}")
+    assert keep_high == MAX_KEEP_LAST
+
+
+def test_here_garbage_count_falls_back_to_default():
+    partial, keep, focus = parse_partial_compress_args("here lots")
+    assert partial is True
+    assert keep == DEFAULT_KEEP_LAST
+
+
+# ── split_history_for_partial_compress ───────────────────────────────
+
+
+def test_split_keeps_last_n_exchanges():
+    h = _history(5)  # 10 messages: u0 a0 u1 a1 u2 a2 u3 a3 u4 a4
+    head, tail = split_history_for_partial_compress(h, keep_last=2)
+    # Keep last 2 user-starts → tail begins at u3 (index 6).
+    assert tail == h[6:]
+    assert head == h[:6]
+    # Tail must begin on a user turn (role-alternation safety).
+    assert tail[0]["role"] == "user"
+
+
+def test_split_default_keep():
+    h = _history(4)  # 8 messages
+    head, tail = split_history_for_partial_compress(h, keep_last=DEFAULT_KEEP_LAST)
+    assert tail[0]["role"] == "user"
+    assert head + tail == h
+    assert len(head) > 0
+
+
+def test_split_tail_always_starts_on_user():
+    # Tool messages interleaved — tail must still snap to a user turn.
+    h = [
+        {"role": "user", "content": "u0"},
+        {"role": "assistant", "content": "a0"},
+        {"role": "user", "content": "u1"},
+        {"role": "assistant", "content": "a1"},
+        {"role": "tool", "content": "t1"},
+        {"role": "assistant", "content": "a1b"},
+        {"role": "user", "content": "u2"},
+        {"role": "assistant", "content": "a2"},
+    ]
+    head, tail = split_history_for_partial_compress(h, keep_last=1)
+    assert tail[0]["role"] == "user"
+    assert tail[0]["content"] == "u2"
+    assert head + tail == h
+
+
+def test_split_degenerate_returns_no_tail():
+    # keep_last larger than the number of exchanges → nothing to compress.
+    h = _history(2)  # 4 messages, 2 user turns
+    head, tail = split_history_for_partial_compress(h, keep_last=5)
+    # Boundary lands at the first user turn → head empty → signal full.
+    assert tail == []
+    assert head == h
+
+
+def test_split_empty_history():
+    head, tail = split_history_for_partial_compress([], keep_last=2)
+    assert head == []
+    assert tail == []
+
+
+def test_split_rejoin_preserves_all_messages():
+    h = _history(6)
+    head, tail = split_history_for_partial_compress(h, keep_last=3)
+    assert head + tail == h
+
+
+# ── rejoin_compressed_head_and_tail (seam-alternation guard) ─────────
+
+
+def _roles(msgs):
+    return [m["role"] for m in msgs if m["role"] in ("user", "assistant")]
+
+
+def _no_consecutive_dupes(msgs):
+    r = _roles(msgs)
+    return all(r[i] != r[i + 1] for i in range(len(r) - 1))
+
+
+def test_rejoin_valid_seam_assistant_then_user():
+    # Normal case: head ends on assistant, tail starts on user → valid.
+    head = [{"role": "user", "content": "[summary]"},
+            {"role": "assistant", "content": "ack"}]
+    tail = [{"role": "user", "content": "next"},
+            {"role": "assistant", "content": "reply"}]
+    out = rejoin_compressed_head_and_tail(head, tail)
+    assert out == head + tail
+    assert _no_consecutive_dupes(out)
+
+
+def test_rejoin_user_user_seam_merges():
+    # Degenerate head ending on a user summary; tail starts on user.
+    head = [{"role": "user", "content": "[summary of head]"}]
+    tail = [{"role": "user", "content": "latest question"},
+            {"role": "assistant", "content": "answer"}]
+    out = rejoin_compressed_head_and_tail(head, tail)
+    assert _no_consecutive_dupes(out), out
+    # The two user messages were merged into one.
+    assert out[0]["content"] == "[summary of head]\n\nlatest question"
+    assert out[1] == {"role": "assistant", "content": "answer"}
+
+
+def test_rejoin_assistant_assistant_seam_merges():
+    head = [{"role": "user", "content": "q"},
+            {"role": "assistant", "content": "head end"}]
+    tail = [{"role": "assistant", "content": "tail start"},
+            {"role": "user", "content": "u"}]
+    out = rejoin_compressed_head_and_tail(head, tail)
+    assert _no_consecutive_dupes(out), out
+    assert out[-2]["content"] == "head end\n\ntail start"
+
+
+def test_rejoin_empty_tail_returns_head():
+    head = [{"role": "user", "content": "x"}]
+    assert rejoin_compressed_head_and_tail(head, []) == head
+
+
+def test_rejoin_tool_seam_left_alone():
+    # tool->tool is the one legal repetition; don't merge.
+    head = [{"role": "user", "content": "q"}, {"role": "tool", "content": "t1"}]
+    tail = [{"role": "user", "content": "u"}]
+    out = rejoin_compressed_head_and_tail(head, tail)
+    assert out == head + tail
diff --git a/tests/cli/test_personality_none.py b/tests/cli/test_personality_none.py
index ad5e87e880a..3fa1ab2a693 100644
--- a/tests/cli/test_personality_none.py
+++ b/tests/cli/test_personality_none.py
@@ -1,6 +1,6 @@
 """Tests for /personality none — clearing personality overlay."""
 import pytest
-from unittest.mock import MagicMock, patch, mock_open
+from unittest.mock import MagicMock, patch
 import yaml
 
 
diff --git a/tests/cli/test_quick_commands.py b/tests/cli/test_quick_commands.py
index 57a39e8c53d..5f4ce2d32ac 100644
--- a/tests/cli/test_quick_commands.py
+++ b/tests/cli/test_quick_commands.py
@@ -1,7 +1,7 @@
 """Tests for user-defined quick commands that bypass the agent loop."""
 import os
 import subprocess
-from unittest.mock import MagicMock, patch, AsyncMock
+from unittest.mock import MagicMock, patch
 from rich.text import Text
 import pytest
 
diff --git a/tests/cli/test_resume_display.py b/tests/cli/test_resume_display.py
index be9282f8595..5ccac59ba6e 100644
--- a/tests/cli/test_resume_display.py
+++ b/tests/cli/test_resume_display.py
@@ -10,7 +10,6 @@ import sys
 from io import StringIO
 from unittest.mock import MagicMock, patch
 
-import pytest
 import cli as cli_mod
 
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
@@ -719,7 +718,6 @@ class TestResumeDisplayConfig:
 
     def test_cli_defaults_have_resume_display(self):
         """cli.py load_cli_config defaults include resume_display."""
-        import cli as _cli_mod
         from cli import load_cli_config
 
         with (
diff --git a/tests/cli/test_resume_quiet_stderr.py b/tests/cli/test_resume_quiet_stderr.py
index c3421a105ec..df82ce4dda9 100644
--- a/tests/cli/test_resume_quiet_stderr.py
+++ b/tests/cli/test_resume_quiet_stderr.py
@@ -13,7 +13,6 @@ Interactive mode (tool_progress_mode == "full") still uses ChatConsole.
 from datetime import datetime
 from unittest.mock import MagicMock, patch
 
-import pytest
 
 from cli import HermesCLI
 
diff --git a/tests/cli/test_save_conversation_location.py b/tests/cli/test_save_conversation_location.py
index 972c8fcb159..5e460fe81bc 100644
--- a/tests/cli/test_save_conversation_location.py
+++ b/tests/cli/test_save_conversation_location.py
@@ -11,7 +11,6 @@ the absolute path plus the resume hint for the live session.
 from __future__ import annotations
 
 import json
-import os
 import sys
 from datetime import datetime
 from pathlib import Path
diff --git a/tests/cli/test_session_boundary_hooks.py b/tests/cli/test_session_boundary_hooks.py
index 19de4cd97a3..3fcab991e95 100644
--- a/tests/cli/test_session_boundary_hooks.py
+++ b/tests/cli/test_session_boundary_hooks.py
@@ -1,9 +1,5 @@
-import pytest
 from unittest.mock import MagicMock, patch
 from hermes_cli.plugins import VALID_HOOKS, PluginManager
-import os
-import shutil
-import tempfile
 from cli import HermesCLI
 
 
diff --git a/tests/cli/test_slash_command_interrupt.py b/tests/cli/test_slash_command_interrupt.py
index 37e38c8c5f2..70f9799693c 100644
--- a/tests/cli/test_slash_command_interrupt.py
+++ b/tests/cli/test_slash_command_interrupt.py
@@ -11,7 +11,7 @@ prompt_toolkit input loop. We exercise the same try/except by calling
 through a thin wrapper that mirrors the real dispatch shape.
 """
 
-from unittest.mock import MagicMock, patch
+from unittest.mock import patch
 
 from cli import HermesCLI
 
diff --git a/tests/cli/test_slash_confirm_windows.py b/tests/cli/test_slash_confirm_windows.py
index 980bae32d26..c067bd791ca 100644
--- a/tests/cli/test_slash_confirm_windows.py
+++ b/tests/cli/test_slash_confirm_windows.py
@@ -20,7 +20,6 @@ import threading
 import time
 from unittest.mock import MagicMock, patch
 
-import pytest
 
 
 def _make_cli():
diff --git a/tests/cli/test_stream_delta_think_tag.py b/tests/cli/test_stream_delta_think_tag.py
index e7c406b37ba..93c738b7304 100644
--- a/tests/cli/test_stream_delta_think_tag.py
+++ b/tests/cli/test_stream_delta_think_tag.py
@@ -3,7 +3,6 @@ import sys
 import os
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
 
-import pytest
 
 
 def _make_cli_stub():
diff --git a/tests/cli/test_surrogate_sanitization.py b/tests/cli/test_surrogate_sanitization.py
index 9d677352c99..2a04a5c246f 100644
--- a/tests/cli/test_surrogate_sanitization.py
+++ b/tests/cli/test_surrogate_sanitization.py
@@ -13,7 +13,6 @@ from run_agent import (
     _sanitize_surrogates,
     _sanitize_messages_surrogates,
     _sanitize_structure_surrogates,
-    _SURROGATE_RE,
 )
 
 
diff --git a/tests/cli/test_worktree.py b/tests/cli/test_worktree.py
index b139acf7d2f..221903e0e96 100644
--- a/tests/cli/test_worktree.py
+++ b/tests/cli/test_worktree.py
@@ -9,7 +9,6 @@ import shutil
 import subprocess
 import pytest
 from pathlib import Path
-from unittest.mock import patch, MagicMock
 
 
 @pytest.fixture
@@ -397,7 +396,6 @@ class TestWorktreeInclude:
         assert info is not None
 
         # Manually copy .worktreeinclude entries (mirrors cli.py logic)
-        import shutil
         include_file = git_repo / ".worktreeinclude"
         wt_path = Path(info["path"])
         for line in include_file.read_text().splitlines():
diff --git a/tests/conftest.py b/tests/conftest.py
index 81067be6f3e..17bc68d8031 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -21,10 +21,8 @@ test runner at ``scripts/run_tests.sh``.
 
 import asyncio
 import os
-import re
 import sys
 from pathlib import Path
-from unittest.mock import patch
 
 import pytest
 
@@ -227,6 +225,8 @@ _HERMES_BEHAVIORAL_VARS = frozenset({
     "TERMINAL_CONTAINER_DISK",
     "TERMINAL_CONTAINER_MEMORY",
     "TERMINAL_CONTAINER_PERSISTENT",
+    "TERMINAL_DOCKER_PERSIST_ACROSS_PROCESSES",
+    "TERMINAL_DOCKER_ORPHAN_REAPER",
     "TERMINAL_DOCKER_RUN_AS_HOST_USER",
     "BROWSER_CDP_URL",
     "CAMOFOX_URL",
diff --git a/tests/cron/test_cron_context_from.py b/tests/cron/test_cron_context_from.py
index f0277d25e1c..5dabaa37782 100644
--- a/tests/cron/test_cron_context_from.py
+++ b/tests/cron/test_cron_context_from.py
@@ -45,7 +45,7 @@ class TestJobContextFromField:
         assert loaded["context_from"] == [job_a["id"]]
 
     def test_create_job_with_context_from_list(self, cron_env):
-        from cron.jobs import create_job, get_job
+        from cron.jobs import create_job
 
         job_a = create_job(prompt="Find news", schedule="every 1h")
         job_b = create_job(prompt="Find weather", schedule="every 1h")
diff --git a/tests/cron/test_cron_inactivity_timeout.py b/tests/cron/test_cron_inactivity_timeout.py
index 67e932089f7..5394a50f368 100644
--- a/tests/cron/test_cron_inactivity_timeout.py
+++ b/tests/cron/test_cron_inactivity_timeout.py
@@ -12,11 +12,8 @@ import concurrent.futures
 import os
 import sys
 import time
-import threading
 from pathlib import Path
-from unittest.mock import MagicMock, patch
 
-import pytest
 
 # Ensure project root is importable
 sys.path.insert(0, str(Path(__file__).parent.parent.parent))
diff --git a/tests/cron/test_cron_no_agent.py b/tests/cron/test_cron_no_agent.py
index 583cd34099e..af94713868b 100644
--- a/tests/cron/test_cron_no_agent.py
+++ b/tests/cron/test_cron_no_agent.py
@@ -12,7 +12,6 @@ Covers:
 from __future__ import annotations
 
 import json
-from pathlib import Path
 from unittest.mock import patch
 
 import pytest
diff --git a/tests/cron/test_cron_profile.py b/tests/cron/test_cron_profile.py
index 887849e635f..7ed28ba381e 100644
--- a/tests/cron/test_cron_profile.py
+++ b/tests/cron/test_cron_profile.py
@@ -8,7 +8,6 @@ from __future__ import annotations
 
 import json
 import os
-from pathlib import Path
 
 import pytest
 
diff --git a/tests/cron/test_cron_script.py b/tests/cron/test_cron_script.py
index 2905339bece..7a6a06d5348 100644
--- a/tests/cron/test_cron_script.py
+++ b/tests/cron/test_cron_script.py
@@ -9,11 +9,9 @@ Tests cover:
 
 import json
 import os
-import stat
 import sys
 import textwrap
 from pathlib import Path
-from unittest.mock import patch
 
 import pytest
 
diff --git a/tests/cron/test_cron_workdir.py b/tests/cron/test_cron_workdir.py
index 5f317c4f4c2..678038cb59b 100644
--- a/tests/cron/test_cron_workdir.py
+++ b/tests/cron/test_cron_workdir.py
@@ -13,7 +13,6 @@ Covers:
 from __future__ import annotations
 
 import json
-from pathlib import Path
 
 import pytest
 
diff --git a/tests/cron/test_file_permissions.py b/tests/cron/test_file_permissions.py
index cc816f6fa85..3f146829d80 100644
--- a/tests/cron/test_file_permissions.py
+++ b/tests/cron/test_file_permissions.py
@@ -1,6 +1,5 @@
 """Tests for file permissions hardening on sensitive files."""
 
-import json
 import os
 import stat
 import tempfile
diff --git a/tests/cron/test_jobs.py b/tests/cron/test_jobs.py
index d1e5df48be8..d044f051ff1 100644
--- a/tests/cron/test_jobs.py
+++ b/tests/cron/test_jobs.py
@@ -1,11 +1,8 @@
 """Tests for cron/jobs.py — schedule parsing, job CRUD, and due-job detection."""
 
-import json
 import threading
 import pytest
 from datetime import datetime, timedelta, timezone
-from pathlib import Path
-from unittest.mock import patch
 
 from cron.jobs import (
     parse_duration,
diff --git a/tests/cron/test_scheduler.py b/tests/cron/test_scheduler.py
index 94587fccedd..38da3fe4087 100644
--- a/tests/cron/test_scheduler.py
+++ b/tests/cron/test_scheduler.py
@@ -1275,7 +1275,6 @@ class TestRunJobSessionPersistence:
         (issue #8585)
         """
         from cron.scheduler import tick
-        from cron.jobs import load_jobs, save_jobs
 
         job = {
             "id": "empty-job",
@@ -1450,9 +1449,19 @@ class TestRunJobConfigLogging:
             "prompt": "hello",
         }
 
+        # Mock heavy post-yaml work so the test only exercises the warning
+        # path. Without these mocks, _run_job_impl continues into provider
+        # resolution and MCP discovery, both of which can spawn subprocesses
+        # / hit the network and have caused this test to time out on CI
+        # (>30s wall clock) under load. See PR #33661 follow-up.
         with patch("cron.scheduler._hermes_home", tmp_path), \
              patch("cron.scheduler._resolve_origin", return_value=None), \
              patch("dotenv.load_dotenv"), \
+             patch("hermes_cli.runtime_provider.resolve_runtime_provider",
+                   return_value={"provider": "openrouter", "api_key": "x",
+                                 "base_url": "https://example.invalid",
+                                 "api_mode": "chat_completions"}), \
+             patch("tools.mcp_tool.discover_mcp_tools", return_value=[]), \
              patch("run_agent.AIAgent") as mock_agent_cls:
             mock_agent = MagicMock()
             mock_agent.run_conversation.return_value = {"final_response": "ok"}
@@ -1482,6 +1491,11 @@ class TestRunJobConfigLogging:
         with patch("cron.scheduler._hermes_home", tmp_path), \
              patch("cron.scheduler._resolve_origin", return_value=None), \
              patch("dotenv.load_dotenv"), \
+             patch("hermes_cli.runtime_provider.resolve_runtime_provider",
+                   return_value={"provider": "openrouter", "api_key": "x",
+                                 "base_url": "https://example.invalid",
+                                 "api_mode": "chat_completions"}), \
+             patch("tools.mcp_tool.discover_mcp_tools", return_value=[]), \
              patch("run_agent.AIAgent") as mock_agent_cls:
             mock_agent = MagicMock()
             mock_agent.run_conversation.return_value = {"final_response": "ok"}
@@ -2285,7 +2299,6 @@ class TestParallelTick:
     def test_parallel_jobs_run_concurrently(self):
         """Two jobs launched in the same tick should overlap in time."""
         import threading
-        import time
 
         barrier = threading.Barrier(2, timeout=5)
         call_order = []
diff --git a/tests/cron/test_scheduler_mcp_init.py b/tests/cron/test_scheduler_mcp_init.py
index b751f0f00b2..a951fe4a7cf 100644
--- a/tests/cron/test_scheduler_mcp_init.py
+++ b/tests/cron/test_scheduler_mcp_init.py
@@ -15,9 +15,8 @@ short-circuit on already-connected servers.
 
 from __future__ import annotations
 
-from unittest.mock import patch, MagicMock
+from unittest.mock import patch
 
-import pytest
 
 
 
diff --git a/tests/docker/test_dashboard.py b/tests/docker/test_dashboard.py
index 56d4fa41c8a..d615ef94dd0 100644
--- a/tests/docker/test_dashboard.py
+++ b/tests/docker/test_dashboard.py
@@ -12,6 +12,7 @@ the realistic runtime context. See the conftest module docstring.
 """
 from __future__ import annotations
 
+import json
 import subprocess
 import time
 
@@ -87,7 +88,15 @@ def test_dashboard_slot_reports_up_when_enabled(
     """Symmetry: with HERMES_DASHBOARD=1, s6-svstat reports the slot as up."""
     subprocess.run(
         ["docker", "run", "-d", "--name", container_name,
-         "-e", "HERMES_DASHBOARD=1", built_image, "sleep", "120"],
+         "-e", "HERMES_DASHBOARD=1",
+         # The default dashboard host is 0.0.0.0, which now engages the
+         # OAuth auth gate. Without a provider registered (no
+         # HERMES_DASHBOARD_OAUTH_CLIENT_ID in this test env), start_server
+         # would fail closed and the slot would never come up. Pin the
+         # explicit insecure opt-in to keep this test focused on the s6
+         # supervision contract, not the auth gate.
+         "-e", "HERMES_DASHBOARD_INSECURE=1",
+         built_image, "sleep", "120"],
         check=True, capture_output=True, timeout=30,
     )
     # uvicorn takes a moment to bind; poll svstat.
@@ -112,7 +121,12 @@ def test_dashboard_opt_in_starts(
     """With HERMES_DASHBOARD=1, a dashboard process should be visible."""
     subprocess.run(
         ["docker", "run", "-d", "--name", container_name,
-         "-e", "HERMES_DASHBOARD=1", built_image, "sleep", "120"],
+         "-e", "HERMES_DASHBOARD=1",
+         # Default bind is 0.0.0.0; pin insecure opt-in so the auth gate
+         # doesn't fail-closed before the process can come up. See
+         # test_dashboard_slot_reports_up_when_enabled for the full rationale.
+         "-e", "HERMES_DASHBOARD_INSECURE=1",
+         built_image, "sleep", "120"],
         check=True, capture_output=True, timeout=30,
     )
     # Poll for the dashboard subprocess to appear — the entrypoint
@@ -131,6 +145,10 @@ def test_dashboard_port_override(
     subprocess.run(
         ["docker", "run", "-d", "--name", container_name,
          "-e", "HERMES_DASHBOARD=1", "-e", "HERMES_DASHBOARD_PORT=9120",
+         # Default bind is 0.0.0.0; pin insecure opt-in so the auth gate
+         # doesn't fail-closed before the port is bound. See
+         # test_dashboard_slot_reports_up_when_enabled for the full rationale.
+         "-e", "HERMES_DASHBOARD_INSECURE=1",
          built_image, "sleep", "120"],
         check=True, capture_output=True, timeout=30,
     )
@@ -160,7 +178,13 @@ def test_dashboard_restarts_after_crash(
     """
     subprocess.run(
         ["docker", "run", "-d", "--name", container_name,
-         "-e", "HERMES_DASHBOARD=1", built_image, "sleep", "120"],
+         "-e", "HERMES_DASHBOARD=1",
+         # Default bind is 0.0.0.0; pin insecure opt-in so the auth gate
+         # doesn't fail-closed before the supervised dashboard can come up.
+         # See test_dashboard_slot_reports_up_when_enabled for the full
+         # rationale.
+         "-e", "HERMES_DASHBOARD_INSECURE=1",
+         built_image, "sleep", "120"],
         check=True, capture_output=True, timeout=30,
     )
     # Wait for the first dashboard to come up.
@@ -201,3 +225,191 @@ def test_dashboard_restarts_after_crash(
     raise AssertionError(
         f"Dashboard not restarted after kill (first_pid={first_pid})"
     )
+
+
+# ---------------------------------------------------------------------------
+# OAuth auth-gate behaviour — regression guard for the dashboard-insecure
+# auto-injection bug. Pre-fix, the s6 run script appended `--insecure`
+# whenever `HERMES_DASHBOARD_HOST` was non-loopback, silently disabling
+# the OAuth gate on every container-deployed dashboard. The matching
+# static-text guard lives in tests/test_docker_home_override_scripts.py;
+# this is the behavioural end-to-end check.
+# ---------------------------------------------------------------------------
+
+
+def _http_probe(
+    container: str,
+    path: str,
+    *,
+    deadline_s: float = 60.0,
+) -> tuple[int, str]:
+    """Poll ``http://127.0.0.1:9119<path>`` from inside the container.
+
+    Returns ``(status_code, body)`` as soon as the dashboard answers any
+    HTTP response — 200, 401, 503, anything. The image doesn't ship
+    ``curl`` but the venv's stdlib ``urllib`` is good enough; we use a
+    proper ``try``/``except`` to intercept ``HTTPError`` because
+    ``urlopen`` raises on 4xx/5xx, and we treat those as legitimate
+    responses (the OAuth gate's 401 IS the success signal for the
+    gate-engaged test).
+
+    Connection errors (uvicorn still starting, fail-closed exited) keep
+    the poll loop running until ``deadline_s`` elapses.
+
+    The probe Python program is fed over stdin (``python -``) rather
+    than ``python -c`` so we can use proper multi-line syntax with
+    ``try``/``except`` blocks without escaping hell.
+
+    Raises ``AssertionError`` on timeout.
+    """
+    py_program = f"""\
+import urllib.request, urllib.error
+req = urllib.request.Request("http://127.0.0.1:9119{path}")
+try:
+    r = urllib.request.urlopen(req, timeout=5)
+    print(r.status)
+    print(r.read().decode(), end="")
+except urllib.error.HTTPError as h:
+    print(h.code)
+    print(h.read().decode(), end="")
+"""
+    # Feed the program over stdin via a heredoc so docker_exec_sh's
+    # single bash string stays clean. The 'PY' delimiter is quoted to
+    # disable shell expansion inside the heredoc body.
+    probe = (
+        "/opt/hermes/.venv/bin/python - <<'PY'\n"
+        f"{py_program}"
+        "PY"
+    )
+    end = time.monotonic() + deadline_s
+    last_err = ""
+    while time.monotonic() < end:
+        r = docker_exec_sh(container, probe, timeout=10)
+        if r.returncode == 0 and r.stdout.strip():
+            lines = r.stdout.split("\n", 1)
+            try:
+                status = int(lines[0].strip())
+                body = lines[1] if len(lines) > 1 else ""
+                return status, body
+            except (ValueError, IndexError) as exc:
+                last_err = f"parse: {exc!r} / stdout={r.stdout!r}"
+        else:
+            last_err = f"rc={r.returncode} stderr={r.stderr!r}"
+        time.sleep(0.5)
+    raise AssertionError(
+        f"Probe of {path} never returned HTTP within {deadline_s}s; "
+        f"last error: {last_err}"
+    )
+
+
+def test_dashboard_oauth_gate_engages_on_non_loopback_bind(
+    built_image: str, container_name: str,
+) -> None:
+    """The s6 dashboard run script must NOT auto-add ``--insecure`` when the
+    dashboard binds to ``0.0.0.0``. The OAuth auth gate engages on its own
+    when a ``DashboardAuthProvider`` is registered (the bundled nous
+    provider activates whenever ``HERMES_DASHBOARD_OAUTH_CLIENT_ID`` is
+    set).
+
+    Regression guard for the wildcard-subdomain rollout where every
+    portal-provisioned agent binds ``0.0.0.0`` and relies on the OAuth
+    gate to authenticate browser callers. Before this fix, the run script
+    flipped ``--insecure`` on for any non-loopback bind, which routed
+    ``start_server`` straight back into the legacy ``allow_public=True``
+    branch and disabled the gate every time.
+
+    We verify two independent observable consequences of the gate being
+    on:
+
+    1. ``/api/auth/providers`` (publicly reachable through the gate so
+       the login page can bootstrap) returns 200 with ``nous`` in the
+       provider list — proves the bundled provider registered.
+    2. ``/api/sessions`` (a gated route under both the legacy
+       ``_SESSION_TOKEN`` middleware and the OAuth gate) returns 401
+       to an unauthenticated caller — proves the OAuth gate is actively
+       intercepting browser traffic. We deliberately probe a gated route
+       here rather than ``/api/status``: status sits in the shared
+       ``PUBLIC_API_PATHS`` allowlist (portal liveness probe target) and
+       responds 200 without a cookie under both gates, so it cannot
+       distinguish "gate on" from "gate off".
+    """
+    subprocess.run(
+        ["docker", "run", "-d", "--name", container_name,
+         "-e", "HERMES_DASHBOARD=1",
+         "-e", "HERMES_DASHBOARD_HOST=0.0.0.0",
+         "-e", "HERMES_DASHBOARD_OAUTH_CLIENT_ID=agent:test-instance",
+         built_image, "sleep", "120"],
+        check=True, capture_output=True, timeout=30,
+    )
+
+    # (1) Provider registry visible via the public bootstrap endpoint.
+    status_code, body = _http_probe(container_name, "/api/auth/providers")
+    assert status_code == 200, (
+        f"/api/auth/providers should return 200 when a provider is "
+        f"registered; got {status_code} body={body!r}"
+    )
+    payload = json.loads(body)
+    provider_names = [p.get("name") for p in payload.get("providers", [])]
+    assert "nous" in provider_names, (
+        "Bundled dashboard_auth/nous provider should register when "
+        f"HERMES_DASHBOARD_OAUTH_CLIENT_ID is set. Got: {payload!r}"
+    )
+
+    # (2) A gated route (``/api/sessions``) returns 401 to an
+    #     unauthenticated caller — the OAuth gate is intercepting.
+    status_code, body = _http_probe(container_name, "/api/sessions")
+    assert status_code == 401, (
+        "OAuth gate must intercept gated /api/* routes on 0.0.0.0 bind "
+        "when a provider is registered and HERMES_DASHBOARD_INSECURE "
+        f"is unset. Got: status={status_code} body={body!r}"
+    )
+
+    # (3) ``/api/status`` remains 200 under the gate — it's in the shared
+    #     ``PUBLIC_API_PATHS`` allowlist so NAS's wildcard-subdomain
+    #     liveness probe (``fly-provider.ts`` ``getInstanceRuntimeStatus``)
+    #     can reach it without a cookie. Regression guard: this allowlist
+    #     drifted once already and surfaced every healthy agent as
+    #     STARTING/down in the portal UI.
+    status_code, body = _http_probe(container_name, "/api/status")
+    assert status_code == 200, (
+        "/api/status must remain publicly reachable under the OAuth gate "
+        "— the portal uses it as the wildcard-subdomain liveness probe. "
+        f"Got: status={status_code} body={body!r}"
+    )
+    status = json.loads(body)
+    assert status.get("auth_required") is True, (
+        "/api/status must report auth_required=True when the OAuth gate "
+        f"is engaged so the SPA/portal can distinguish modes. Got: {status!r}"
+    )
+
+
+def test_dashboard_insecure_env_var_opts_out_of_gate(
+    built_image: str, container_name: str,
+) -> None:
+    """``HERMES_DASHBOARD_INSECURE=1`` re-enables the legacy no-gate mode
+    for operators running on trusted LANs behind a reverse proxy without
+    the OAuth contract. Same opt-out shape as the rest of the s6 boolean
+    envs (``HERMES_DASHBOARD``, ``HERMES_DASHBOARD_TUI``).
+
+    With the gate off, ``/api/status`` (a public endpoint under the
+    legacy ``_SESSION_TOKEN`` middleware) returns 200 with the
+    ``auth_required: false`` body — proves the gate is bypassed.
+    """
+    subprocess.run(
+        ["docker", "run", "-d", "--name", container_name,
+         "-e", "HERMES_DASHBOARD=1",
+         "-e", "HERMES_DASHBOARD_HOST=0.0.0.0",
+         "-e", "HERMES_DASHBOARD_INSECURE=1",
+         built_image, "sleep", "120"],
+        check=True, capture_output=True, timeout=30,
+    )
+    status_code, body = _http_probe(container_name, "/api/status")
+    assert status_code == 200, (
+        f"/api/status should return 200 with the auth gate disabled; "
+        f"got {status_code} body={body!r}"
+    )
+    status = json.loads(body)
+    assert status.get("auth_required") is False, (
+        "HERMES_DASHBOARD_INSECURE=1 must disable the auth gate (explicit "
+        f"opt-in for trusted-LAN deployments). Got: {status!r}"
+    )
diff --git a/tests/docker/test_docker_exec_privilege_drop.py b/tests/docker/test_docker_exec_privilege_drop.py
new file mode 100644
index 00000000000..745848938a3
--- /dev/null
+++ b/tests/docker/test_docker_exec_privilege_drop.py
@@ -0,0 +1,290 @@
+"""Regression tests for the docker-exec privilege-drop shim.
+
+The shim (docker/hermes-exec-shim.sh, installed at /opt/hermes/bin/hermes)
+exists to prevent the auth.json ownership-mismatch bug where
+`docker exec <c> hermes login` would write /opt/data/auth.json as
+root:root mode 0600, leaving the supervised gateway (UID 10000) unable
+to read its own credentials and returning "Provider authentication
+failed: Hermes is not logged into Nous Portal" on every message.
+
+These tests verify:
+
+1. ``docker exec <c> hermes …`` (defaulting to root) gets dropped to the
+   hermes user before the real binary runs.
+2. ``docker exec --user hermes <c> hermes …`` (already non-root) short-
+   circuits and doesn't try to drop again.
+3. Files written under $HERMES_HOME from a ``docker exec`` session land
+   as hermes:hermes — the actual user-visible invariant.
+4. The HERMES_DOCKER_EXEC_AS_ROOT opt-out lets diagnostic sessions keep
+   running as root deliberately.
+5. The main CMD path (``docker run <image> …``) is unaffected by the
+   PATH-shim ordering — no recursion, no behavior change.
+"""
+
+from __future__ import annotations
+
+import subprocess
+import time
+from collections.abc import Iterator
+
+import pytest
+
+
+# How long to give a `docker run -d` container before declaring it not ready.
+_RUN_READY_TIMEOUT_S = 20
+
+
+def _wait_for_init(container: str) -> None:
+    """Block until /init is up enough that `docker exec` is responsive."""
+    deadline = time.time() + _RUN_READY_TIMEOUT_S
+    while time.time() < deadline:
+        r = subprocess.run(
+            ["docker", "exec", container, "true"],
+            capture_output=True, timeout=5,
+        )
+        if r.returncode == 0:
+            return
+        time.sleep(0.2)
+    pytest.fail(f"container {container} not responsive to docker exec within {_RUN_READY_TIMEOUT_S}s")
+
+
+@pytest.fixture
+def sleep_container(built_image: str, container_name: str) -> Iterator[str]:
+    """Long-lived container running `sleep infinity` so we can docker exec into it."""
+    subprocess.run(
+        ["docker", "rm", "-f", container_name],
+        capture_output=True, check=False,
+    )
+    r = subprocess.run(
+        ["docker", "run", "-d", "--name", container_name, built_image,
+         "sleep", "infinity"],
+        capture_output=True, text=True, timeout=30,
+    )
+    assert r.returncode == 0, f"docker run failed: {r.stderr}"
+    try:
+        _wait_for_init(container_name)
+        yield container_name
+    finally:
+        subprocess.run(
+            ["docker", "rm", "-f", container_name],
+            capture_output=True, check=False,
+        )
+
+
+def test_shim_drops_root_to_hermes_uid(sleep_container: str) -> None:
+    """docker exec defaults to root; the shim should drop to uid 10000.
+
+    We invoke `hermes` with a Python-style `-c` shim equivalent — there's no
+    pure-hermes "print my uid" command, so we use the venv's python directly
+    via the shim's PATH lookup: `python -c 'print(os.getuid())'` is resolved
+    through the venv. But that bypasses the shim. Instead, we exploit the
+    fact that the venv's `hermes` is a console_scripts entry — under the
+    hood it's a tiny Python wrapper. We can't easily inject "print my uid"
+    into it without forking subcommands. Simplest approach: have `hermes`
+    do anything that writes to disk, then check the file's owner.
+
+    Use `hermes config set` which writes config.yaml under HERMES_HOME.
+    The resulting file ownership tells us what UID the shim ended up at.
+    """
+    # Wipe any prior state.
+    subprocess.run(
+        ["docker", "exec", "--user", "root", sleep_container,
+         "rm", "-f", "/opt/data/config.yaml"],
+        capture_output=True, check=False,
+    )
+
+    # Default docker exec (root) — should be dropped by the shim.
+    r = subprocess.run(
+        ["docker", "exec", sleep_container,
+         "hermes", "config", "set", "_test.shim_marker", "1"],
+        capture_output=True, text=True, timeout=30,
+    )
+    assert r.returncode == 0, f"config set failed: stdout={r.stdout!r} stderr={r.stderr!r}"
+
+    # The written file must be owned by hermes, not root.
+    r = subprocess.run(
+        ["docker", "exec", sleep_container,
+         "stat", "-c", "%U:%G", "/opt/data/config.yaml"],
+        capture_output=True, text=True, timeout=10,
+    )
+    assert r.returncode == 0, f"stat failed: {r.stderr}"
+    assert r.stdout.strip() == "hermes:hermes", (
+        f"config.yaml owned by {r.stdout.strip()!r}, expected hermes:hermes. "
+        "The shim did not drop privileges before invoking hermes."
+    )
+
+
+def test_shim_short_circuits_for_non_root_exec(sleep_container: str) -> None:
+    """docker exec --user hermes already runs as 10000; shim should be a no-op.
+
+    Verified indirectly: the command must still succeed end-to-end. If the
+    shim incorrectly tried to drop privileges a second time (e.g. by
+    invoking s6-setuidgid which requires root), it would fail with
+    EPERM. A clean success proves the short-circuit fired.
+    """
+    subprocess.run(
+        ["docker", "exec", "--user", "root", sleep_container,
+         "rm", "-f", "/opt/data/config.yaml"],
+        capture_output=True, check=False,
+    )
+
+    r = subprocess.run(
+        ["docker", "exec", "--user", "hermes", sleep_container,
+         "hermes", "config", "set", "_test.shim_short_circuit", "1"],
+        capture_output=True, text=True, timeout=30,
+    )
+    assert r.returncode == 0, (
+        f"docker exec --user hermes failed: {r.stderr!r} stdout={r.stdout!r}. "
+        "If the shim mis-handled the non-root path, this would fail with EPERM."
+    )
+
+    # File still ends up hermes:hermes — orthogonally confirms uid.
+    r = subprocess.run(
+        ["docker", "exec", sleep_container,
+         "stat", "-c", "%U:%G", "/opt/data/config.yaml"],
+        capture_output=True, text=True, timeout=10,
+    )
+    assert r.stdout.strip() == "hermes:hermes"
+
+
+def test_shim_opt_out_keeps_root(sleep_container: str) -> None:
+    """HERMES_DOCKER_EXEC_AS_ROOT=1 should suppress the privilege drop.
+
+    Reserved for diagnostic sessions where the operator deliberately
+    wants root semantics. Verified by writing a file and checking its
+    owner.
+    """
+    subprocess.run(
+        ["docker", "exec", "--user", "root", sleep_container,
+         "rm", "-f", "/opt/data/config.yaml"],
+        capture_output=True, check=False,
+    )
+
+    r = subprocess.run(
+        ["docker", "exec",
+         "-e", "HERMES_DOCKER_EXEC_AS_ROOT=1",
+         sleep_container,
+         "hermes", "config", "set", "_test.opt_out", "1"],
+        capture_output=True, text=True, timeout=30,
+    )
+    assert r.returncode == 0, f"opt-out invocation failed: {r.stderr}"
+
+    r = subprocess.run(
+        ["docker", "exec", sleep_container,
+         "stat", "-c", "%U:%G", "/opt/data/config.yaml"],
+        capture_output=True, text=True, timeout=10,
+    )
+    assert r.stdout.strip() == "root:root", (
+        f"With HERMES_DOCKER_EXEC_AS_ROOT=1, expected root:root, "
+        f"got {r.stdout.strip()!r}"
+    )
+
+
+@pytest.mark.parametrize("falsy_value", ["0", "false", "no", "", "garbage", "2"])
+def test_shim_opt_out_strict_truthiness(
+    sleep_container: str, falsy_value: str,
+) -> None:
+    """Anything other than 1/true/yes (case-insensitive) does NOT opt out.
+
+    Strict truthiness so a typo (``HERMES_DOCKER_EXEC_AS_ROOT=0``) doesn't
+    silently keep the user as root. Mirrors the policy used by
+    ``HERMES_GATEWAY_NO_SUPERVISE`` in #33583.
+    """
+    subprocess.run(
+        ["docker", "exec", "--user", "root", sleep_container,
+         "rm", "-f", "/opt/data/config.yaml"],
+        capture_output=True, check=False,
+    )
+
+    r = subprocess.run(
+        ["docker", "exec",
+         "-e", f"HERMES_DOCKER_EXEC_AS_ROOT={falsy_value}",
+         sleep_container,
+         "hermes", "config", "set", "_test.falsy", "1"],
+        capture_output=True, text=True, timeout=30,
+    )
+    assert r.returncode == 0, f"falsy value {falsy_value!r} caused failure: {r.stderr}"
+
+    r = subprocess.run(
+        ["docker", "exec", sleep_container,
+         "stat", "-c", "%U:%G", "/opt/data/config.yaml"],
+        capture_output=True, text=True, timeout=10,
+    )
+    assert r.stdout.strip() == "hermes:hermes", (
+        f"falsy opt-out value {falsy_value!r} unexpectedly suppressed the drop; "
+        f"file owner is {r.stdout.strip()!r}, expected hermes:hermes"
+    )
+
+
+def test_main_cmd_path_unaffected(built_image: str) -> None:
+    """The CMD path (docker run <image> <args>) must still work.
+
+    The shim sits at /opt/hermes/bin earliest on PATH; main-wrapper.sh
+    invokes `s6-setuidgid hermes hermes <args>` which resolves `hermes`
+    through PATH. With the shim in the way, this could regress if the
+    shim recurses or interferes with TTY/exit-code propagation.
+
+    `chat --help` is cheap and exercises the full subcommand
+    passthrough path. The duplicate of test_main_invocation's
+    pre-existing test is intentional — that one would have passed
+    pre-shim too; this one specifically guards against shim regressions
+    in the CMD-as-main-program codepath.
+    """
+    r = subprocess.run(
+        ["docker", "run", "--rm", built_image, "chat", "--help"],
+        capture_output=True, text=True, timeout=60,
+    )
+    assert r.returncode == 0, f"CMD path broken by shim: stderr={r.stderr!r}"
+    assert "Traceback" not in r.stderr
+
+
+def test_e2e_login_then_supervised_gateway_can_read_auth(
+    sleep_container: str,
+) -> None:
+    """End-to-end regression for the original bug.
+
+    Pre-shim: ``docker exec <c> hermes login`` (root) wrote
+    /opt/data/auth.json as root:root 0600. The supervised gateway (UID
+    10000) couldn't read it, _load_auth_store swallowed PermissionError
+    as a parse failure, and resolve_nous_runtime_credentials raised
+    "Hermes is not logged into Nous Portal" on every message.
+
+    We can't do a real OAuth login in a unit test, but we can stand in
+    for it by writing the same file shape via `hermes config set`-style
+    writes — what matters is the *file ownership invariant* downstream
+    of `_save_auth_store`. If the shim works, every file the
+    `docker exec` path produces is hermes-readable.
+
+    Specifically: pretend the operator ran `hermes login` (writes
+    auth.json) and verify (a) the file exists and (b) it's readable by
+    the hermes UID. We use `hermes auth list` since that touches the
+    auth store on the read side and would fail with the same
+    'not logged in' shape if the file was unreadable to uid 10000.
+    """
+    # Have the shim-protected `docker exec` write the auth store.
+    # `hermes auth list` is read-only but still exercises _load_auth_store
+    # under the shim's UID. We invoke `hermes config set` first to
+    # provoke a write into HERMES_HOME so we have something concrete to
+    # owner-check.
+    r = subprocess.run(
+        ["docker", "exec", sleep_container,
+         "hermes", "config", "set", "_test.e2e_marker", "1"],
+        capture_output=True, text=True, timeout=30,
+    )
+    assert r.returncode == 0, f"config set failed: {r.stderr}"
+
+    # The supervised UID (10000) must be able to read everything under
+    # HERMES_HOME that docker exec just wrote.
+    r = subprocess.run(
+        ["docker", "exec", "--user", "hermes", sleep_container,
+         "find", "/opt/data", "-maxdepth", "2", "-type", "f",
+         "!", "-readable", "-print"],
+        capture_output=True, text=True, timeout=15,
+    )
+    assert r.returncode == 0, f"find failed: {r.stderr}"
+    unreadable = [ln for ln in r.stdout.splitlines() if ln.strip()]
+    assert not unreadable, (
+        "Files written by `docker exec` are unreadable to the hermes user "
+        f"(supervised gateway UID): {unreadable}. The shim failed to drop "
+        "privileges before the write."
+    )
diff --git a/tests/docker/test_dump_build_sha.py b/tests/docker/test_dump_build_sha.py
new file mode 100644
index 00000000000..c84a372e823
--- /dev/null
+++ b/tests/docker/test_dump_build_sha.py
@@ -0,0 +1,104 @@
+"""Regression test: ``hermes dump`` reports a real git SHA inside the container.
+
+Background: ``.dockerignore`` excludes ``.git``, so ``git rev-parse HEAD``
+fails inside the published image and ``hermes dump`` used to report
+``version: ... [(unknown)]``.  The Dockerfile now writes the build-time
+``$HERMES_GIT_SHA`` build-arg to ``/opt/hermes/.hermes_build_sha`` and
+``hermes_cli/build_info.py`` reads it as a fallback.
+
+CI (``.github/workflows/docker-publish.yml``) always sets the build-arg
+to ``${{ github.sha }}``.  Local ``docker build`` (the ``built_image``
+fixture in ``tests/docker/conftest.py``) does NOT — so locally the file
+is absent and ``hermes dump`` correctly falls back to ``(unknown)``.
+
+This test handles both cases:
+
+* If ``/opt/hermes/.hermes_build_sha`` exists in the image, assert that
+  ``hermes dump`` surfaces its content as the version SHA (not
+  ``(unknown)``).
+* If the file is absent, assert the legacy behaviour (``(unknown)``)
+  still holds — defensive guard against the helper accidentally
+  reporting bogus data from somewhere else.
+"""
+from __future__ import annotations
+
+import re
+import subprocess
+
+
+_VERSION_LINE = re.compile(r"^version:\s+(?P<rest>.+)$", re.MULTILINE)
+_SHA_BRACKET = re.compile(r"\[(?P<sha>[^\]]+)\]\s*$")
+
+
+def _run_dump(image: str) -> str:
+    """Return the stdout of ``docker run <image> dump``.
+
+    Relies on Docker's anonymous VOLUME for ``/opt/data`` (declared by the
+    Dockerfile) so the container's hermes user (UID 10000) can bootstrap
+    its config.  Anonymous volumes are auto-cleaned by ``--rm``, so unlike
+    a host bind-mount we don't have to chown anything to UID 10000 (which
+    would break cleanup on non-root hosts).
+    """
+    r = subprocess.run(
+        ["docker", "run", "--rm", image, "dump"],
+        capture_output=True, text=True, timeout=120,
+    )
+    assert r.returncode == 0, (
+        f"hermes dump exited {r.returncode}: "
+        f"stderr={r.stderr[-1000:]!r}\nstdout={r.stdout[-1000:]!r}"
+    )
+    return r.stdout
+
+
+def _read_baked_sha_from_image(image: str) -> str | None:
+    """Return the ``/opt/hermes/.hermes_build_sha`` content, or None if absent."""
+    r = subprocess.run(
+        [
+            "docker", "run", "--rm", "--entrypoint", "cat", image,
+            "/opt/hermes/.hermes_build_sha",
+        ],
+        capture_output=True, text=True, timeout=30,
+    )
+    if r.returncode != 0:
+        return None
+    return r.stdout.strip() or None
+
+
+def test_dump_reports_baked_sha_when_present(built_image: str) -> None:
+    """When the image was built with ``HERMES_GIT_SHA``, dump must surface it.
+
+    Together with the smoke-test action (which exercises ``--help``), this
+    closes the regression loop for the missing-sha bug: any future change
+    that breaks the baked-file -> dump pipeline will fail CI here.
+    """
+    baked = _read_baked_sha_from_image(built_image)
+    stdout = _run_dump(built_image)
+
+    match = _VERSION_LINE.search(stdout)
+    assert match, f"no `version:` line in dump output:\n{stdout[:2000]}"
+    sha_match = _SHA_BRACKET.search(match.group("rest"))
+    assert sha_match, (
+        f"`version:` line missing [<sha>] bracket: {match.group('rest')!r}"
+    )
+    reported = sha_match.group("sha")
+
+    if baked is None:
+        # Local-build path: no build-arg was passed.  Verify the legacy
+        # fallback ``(unknown)`` is intact — guards against the helper
+        # ever inventing a SHA from thin air.
+        assert reported == "(unknown)", (
+            f"expected '(unknown)' when no SHA baked, got {reported!r}"
+        )
+        return
+
+    # CI path: build-arg was set, baked file exists.  ``hermes dump``
+    # truncates to 8 chars via ``git rev-parse --short=8`` semantics.
+    assert reported != "(unknown)", (
+        "baked SHA file present in image but dump still reported "
+        f"'(unknown)' — the build-info fallback is broken.  "
+        f"Baked file content: {baked!r}"
+    )
+    assert reported == baked[:8], (
+        f"dump reported {reported!r} but baked file contained {baked!r} "
+        f"(expected first 8 chars: {baked[:8]!r})"
+    )
diff --git a/tests/docker/test_gateway_run_supervised.py b/tests/docker/test_gateway_run_supervised.py
index aec2257b0e2..91314d5b2f1 100644
--- a/tests/docker/test_gateway_run_supervised.py
+++ b/tests/docker/test_gateway_run_supervised.py
@@ -327,3 +327,69 @@ def test_dashboard_supervised_when_env_set(
     assert _svstat_wants_up(container_name, "dashboard"), (
         f"dashboard slot not up: {_svstat(container_name, 'dashboard')!r}"
     )
+
+
+def test_supervised_gateway_stdout_reaches_docker_logs(
+    built_image: str, container_name: str,
+) -> None:
+    """The supervised gateway's stdout — including the rich-console
+    startup banner — must reach ``docker logs``, not just the rotated
+    log file under ``${HERMES_HOME}/logs/gateways/<profile>/current``.
+
+    Without the ``1`` action directive in ``_render_log_run``, s6-log
+    swallows the gateway's stdout into the file and ``docker logs``
+    only sees stderr (Python ``logging`` defaults to stderr). That's
+    a poor user experience: the iconic "Hermes Gateway Starting…"
+    banner with the ⚕ symbol is the most visible "yes, your gateway
+    started" signal, and forcing users to ``docker exec`` + ``tail``
+    the log file just to see it is friction users don't expect.
+
+    With the ``1`` directive, s6-log forwards every line to its own
+    stdout (which propagates up through the s6-supervise pipeline to
+    /init's stdout = container stdout = ``docker logs``) AND also
+    writes a timestamped copy to the rotated file. Best of both.
+
+    We assert by looking for the literal banner glyph (``⚕``) — a
+    distinctive character that won't appear in stderr-routed
+    Python-logging output, so its presence in ``docker logs`` proves
+    the stdout-tee is working.
+    """
+    subprocess.run(
+        ["docker", "run", "-d", "--name", container_name, built_image,
+         "gateway", "run"],
+        check=True, capture_output=True, timeout=30,
+    )
+    # Banner is printed during gateway startup — give it time to
+    # initialize past the imports + config-load phase.
+    time.sleep(8)
+
+    logs = subprocess.run(
+        ["docker", "logs", container_name],
+        capture_output=True, text=True, timeout=10,
+    )
+    combined = logs.stdout + logs.stderr
+
+    # The banner ⚕ symbol is the load-bearing assertion — it's unique
+    # to gateway startup stdout output and won't appear in stderr
+    # (Python logging) or s6 boot messages.
+    assert "⚕" in combined or "Hermes Gateway Starting" in combined, (
+        "Supervised gateway's stdout banner did not reach docker logs. "
+        "This means the `1` action directive in _render_log_run isn't "
+        "forwarding stdout to /init. "
+        f"docker logs (last 2000 chars):\n{combined[-2000:]}\n"
+        f"file contents:\n{_sh(container_name, 'cat /opt/data/logs/gateways/default/current').stdout}"
+    )
+
+    # Cross-check: the same banner must also be in the rotated log
+    # file (we kept the file destination, just added stdout). The
+    # file version has s6-log's ISO 8601 timestamp prefix; the
+    # docker logs version is raw.
+    file_contents = _sh(
+        container_name, "cat /opt/data/logs/gateways/default/current",
+    ).stdout
+    assert "⚕" in file_contents or "Hermes Gateway Starting" in file_contents, (
+        "Banner also missing from rotated log file — the file "
+        "destination may have been dropped by the new s6-log script. "
+        f"File contents:\n{file_contents}"
+    )
+
diff --git a/tests/e2e/matrix_xsign_bootstrap/test_bootstrap.py b/tests/e2e/matrix_xsign_bootstrap/test_bootstrap.py
index 09147ba55e7..c24c7292073 100644
--- a/tests/e2e/matrix_xsign_bootstrap/test_bootstrap.py
+++ b/tests/e2e/matrix_xsign_bootstrap/test_bootstrap.py
@@ -27,7 +27,6 @@ isn't reachable.
 """
 from __future__ import annotations
 
-import asyncio
 import json
 import logging
 import os
@@ -37,7 +36,6 @@ import shutil
 import subprocess
 import sys
 import tempfile
-import time
 import unittest
 import urllib.error
 import urllib.request
diff --git a/tests/gateway/conftest.py b/tests/gateway/conftest.py
index 258ee15656c..2d56c7c11f4 100644
--- a/tests/gateway/conftest.py
+++ b/tests/gateway/conftest.py
@@ -419,7 +419,6 @@ def pytest_configure(config):
         lock = FileLock(str(lock_file), timeout=120)
     except ImportError:
         # Fallback: no locking (still correct, just slower under contention).
-        import contextlib
 
         class _NoLock:
             def __enter__(self):
diff --git a/tests/gateway/restart_test_helpers.py b/tests/gateway/restart_test_helpers.py
index 213c46cbad8..a91816c4e20 100644
--- a/tests/gateway/restart_test_helpers.py
+++ b/tests/gateway/restart_test_helpers.py
@@ -3,7 +3,7 @@ from collections import OrderedDict
 from unittest.mock import AsyncMock, MagicMock
 
 from gateway.config import GatewayConfig, Platform, PlatformConfig
-from gateway.platforms.base import BasePlatformAdapter, MessageEvent, SendResult
+from gateway.platforms.base import BasePlatformAdapter, SendResult
 from gateway.restart import DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT
 from gateway.run import GatewayRunner
 from gateway.session import SessionSource
diff --git a/tests/gateway/test_7100_transient_failure_transcript.py b/tests/gateway/test_7100_transient_failure_transcript.py
index 3340dc28d51..cb416e6b9ad 100644
--- a/tests/gateway/test_7100_transient_failure_transcript.py
+++ b/tests/gateway/test_7100_transient_failure_transcript.py
@@ -15,7 +15,6 @@ The gateway classifier must distinguish:
 * everything else that fails → transient → persist the user message
 """
 
-import pytest
 
 
 def _classify(agent_result: dict, history_len: int) -> tuple[bool, bool]:
diff --git a/tests/gateway/test_agent_cache.py b/tests/gateway/test_agent_cache.py
index 6ef601e0dc5..0c6e2df3bd9 100644
--- a/tests/gateway/test_agent_cache.py
+++ b/tests/gateway/test_agent_cache.py
@@ -9,12 +9,9 @@ Verifies that the agent cache correctly:
 - Preserves frozen system prompt across turns
 """
 
-import hashlib
-import json
 import threading
 from unittest.mock import MagicMock, patch
 
-import pytest
 
 
 def _make_runner():
diff --git a/tests/gateway/test_allowed_channels_widening.py b/tests/gateway/test_allowed_channels_widening.py
index 6d4c8d1ead0..0d214713a1c 100644
--- a/tests/gateway/test_allowed_channels_widening.py
+++ b/tests/gateway/test_allowed_channels_widening.py
@@ -243,7 +243,6 @@ class TestMattermostAllowedChannels:
     @staticmethod
     def _would_process(channel_id, channel_type="O", allowed_cfg=None, allowed_env=""):
         """Replicate the whitelist gate from gateway/platforms/mattermost.py."""
-        import os as _os
         if channel_type == "D":
             return True
         # config-first, env-var fallback (matching the adapter)
diff --git a/tests/gateway/test_api_server.py b/tests/gateway/test_api_server.py
index 3b0a9b24b6f..c042fd556c6 100644
--- a/tests/gateway/test_api_server.py
+++ b/tests/gateway/test_api_server.py
@@ -22,14 +22,13 @@ from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
 from aiohttp import web
-from aiohttp.test_utils import AioHTTPTestCase, TestClient, TestServer
+from aiohttp.test_utils import TestClient, TestServer
 
 from gateway.config import GatewayConfig, Platform, PlatformConfig
 from gateway.platforms.api_server import (
     APIServerAdapter,
     ResponseStore,
     _IdempotencyCache,
-    _CORS_HEADERS,
     _derive_chat_session_id,
     check_api_server_requirements,
     cors_middleware,
diff --git a/tests/gateway/test_api_server_bind_guard.py b/tests/gateway/test_api_server_bind_guard.py
index 13a09c9ec49..edab34eb382 100644
--- a/tests/gateway/test_api_server_bind_guard.py
+++ b/tests/gateway/test_api_server_bind_guard.py
@@ -1,11 +1,11 @@
 """Tests for the API server bind-address startup guard.
 
 Validates that is_network_accessible() correctly classifies addresses and
-that connect() refuses to start on non-loopback without API_SERVER_KEY.
+that connect() refuses to start without API_SERVER_KEY.
 """
 
 import socket
-from unittest.mock import AsyncMock, patch
+from unittest.mock import patch
 
 import pytest
 
@@ -111,13 +111,14 @@ class TestConnectBindGuard:
         result = await adapter.connect()
         assert result is False
 
-    def test_allows_loopback_without_key(self):
-        """Loopback with no key should pass the guard."""
+    @pytest.mark.asyncio
+    async def test_refuses_loopback_without_key(self):
+        """Loopback binds are still an auth boundary and require API_SERVER_KEY."""
         adapter = APIServerAdapter(PlatformConfig(enabled=True, extra={"host": "127.0.0.1"}))
         assert adapter._api_key == ""
-        # The guard condition: is_network_accessible(host) AND NOT api_key
-        # For loopback, is_network_accessible is False so the guard does not block.
         assert is_network_accessible(adapter._host) is False
+        result = await adapter.connect()
+        assert result is False
 
     @pytest.mark.asyncio
     async def test_allows_wildcard_with_key(self):
diff --git a/tests/gateway/test_api_server_jobs.py b/tests/gateway/test_api_server_jobs.py
index 087bfc5b404..7e1f4cc4103 100644
--- a/tests/gateway/test_api_server_jobs.py
+++ b/tests/gateway/test_api_server_jobs.py
@@ -10,7 +10,6 @@ Covers:
 - Cron module unavailability (501 when _CRON_AVAILABLE is False)
 """
 
-import json
 import logging
 from unittest.mock import MagicMock, patch
 
diff --git a/tests/gateway/test_api_server_runs.py b/tests/gateway/test_api_server_runs.py
index dd25ea97160..d6e1e588506 100644
--- a/tests/gateway/test_api_server_runs.py
+++ b/tests/gateway/test_api_server_runs.py
@@ -9,10 +9,8 @@ Covers:
 """
 
 import asyncio
-import json
 import threading
-import time as _time
-from unittest.mock import AsyncMock, MagicMock, patch
+from unittest.mock import MagicMock, patch
 
 import pytest
 from aiohttp import web
diff --git a/tests/gateway/test_api_server_toolset.py b/tests/gateway/test_api_server_toolset.py
index 943d867e613..add2ce27345 100644
--- a/tests/gateway/test_api_server_toolset.py
+++ b/tests/gateway/test_api_server_toolset.py
@@ -1,9 +1,6 @@
 """Tests for hermes-api-server toolset and API server tool availability."""
-import os
-import json
 from unittest.mock import patch, MagicMock
 
-import pytest
 
 from toolsets import resolve_toolset, get_toolset, validate_toolset
 
diff --git a/tests/gateway/test_approve_deny_commands.py b/tests/gateway/test_approve_deny_commands.py
index 02834fce8e4..1c996b2baee 100644
--- a/tests/gateway/test_approve_deny_commands.py
+++ b/tests/gateway/test_approve_deny_commands.py
@@ -8,7 +8,6 @@ Supports multiple concurrent approvals (parallel subagents, execute_code)
 via a per-session queue.
 """
 
-import asyncio
 import os
 import threading
 import time
@@ -19,7 +18,7 @@ import pytest
 
 from gateway.config import GatewayConfig, Platform, PlatformConfig
 from gateway.platforms.base import MessageEvent
-from gateway.session import SessionEntry, SessionSource, build_session_key
+from gateway.session import SessionSource
 
 
 def _make_source() -> SessionSource:
@@ -635,7 +634,7 @@ class TestFallbackNoCallback:
         to ``pending_approval`` to make the state distinguishable from a
         failed tool call.
         """
-        from tools.approval import check_all_command_guards, _pending
+        from tools.approval import check_all_command_guards
 
         os.environ["HERMES_EXEC_ASK"] = "1"
         os.environ["HERMES_SESSION_KEY"] = "no-callback-test"
diff --git a/tests/gateway/test_auth_fallback.py b/tests/gateway/test_auth_fallback.py
index 5976962e651..58701160e6c 100644
--- a/tests/gateway/test_auth_fallback.py
+++ b/tests/gateway/test_auth_fallback.py
@@ -1,7 +1,6 @@
 """Test that AuthError triggers fallback provider resolution (#7230)."""
 
-import os
-from unittest.mock import patch, MagicMock
+from unittest.mock import patch
 
 import pytest
 
diff --git a/tests/gateway/test_auto_continue.py b/tests/gateway/test_auto_continue.py
index 1f44fa6ab1d..eb20abf55df 100644
--- a/tests/gateway/test_auto_continue.py
+++ b/tests/gateway/test_auto_continue.py
@@ -6,7 +6,6 @@ this and prepends a system note to the next user message so the model
 finishes the interrupted work before addressing the new input.
 """
 
-import pytest
 
 
 def _simulate_auto_continue(agent_history: list, user_message: str) -> str:
diff --git a/tests/gateway/test_background_command.py b/tests/gateway/test_background_command.py
index 9e0d71921cd..b729ab6563f 100644
--- a/tests/gateway/test_background_command.py
+++ b/tests/gateway/test_background_command.py
@@ -5,7 +5,6 @@ background session) across gateway messenger platforms.
 """
 
 import asyncio
-import os
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
diff --git a/tests/gateway/test_background_process_notifications.py b/tests/gateway/test_background_process_notifications.py
index 412b780bb6f..fe3a6588b1b 100644
--- a/tests/gateway/test_background_process_notifications.py
+++ b/tests/gateway/test_background_process_notifications.py
@@ -9,7 +9,7 @@ Contributed by @PeterFile (PR #593), reimplemented on current main.
 
 import asyncio
 from types import SimpleNamespace
-from unittest.mock import AsyncMock, patch
+from unittest.mock import AsyncMock
 
 import pytest
 
diff --git a/tests/gateway/test_bluebubbles.py b/tests/gateway/test_bluebubbles.py
index dea806fe66b..e42d050c817 100644
--- a/tests/gateway/test_bluebubbles.py
+++ b/tests/gateway/test_bluebubbles.py
@@ -302,7 +302,6 @@ class TestBlueBubblesAttachmentDownload:
         """Image MIME routes to cache_image_from_bytes."""
         adapter = _make_adapter(monkeypatch)
         import asyncio
-        import httpx
 
         # Mock the HTTP client response
         class MockResponse:
diff --git a/tests/gateway/test_busy_session_ack.py b/tests/gateway/test_busy_session_ack.py
index 798dba8462f..7fb3d3210c0 100644
--- a/tests/gateway/test_busy_session_ack.py
+++ b/tests/gateway/test_busy_session_ack.py
@@ -3,7 +3,6 @@
 Verifies that users get an immediate status response instead of total silence
 when the agent is working on a task. See PR fix for the @Lonely__MH report.
 """
-import asyncio
 import time
 from unittest.mock import AsyncMock, MagicMock, patch
 
@@ -26,7 +25,6 @@ sys.modules.setdefault("telegram.constants", _tg.constants)
 sys.modules.setdefault("telegram.ext", types.ModuleType("telegram.ext"))
 
 from gateway.platforms.base import (
-    BasePlatformAdapter,
     MessageEvent,
     MessageType,
     SessionSource,
diff --git a/tests/gateway/test_busy_session_auth_bypass.py b/tests/gateway/test_busy_session_auth_bypass.py
index 9d7146c848e..b1c25a12d87 100644
--- a/tests/gateway/test_busy_session_auth_bypass.py
+++ b/tests/gateway/test_busy_session_auth_bypass.py
@@ -5,9 +5,8 @@ messages from non-allowlisted users must be silently dropped — matching the co
 behavior in _handle_message. Previously, the busy path skipped the auth check entirely,
 allowing unauthorized users to inject text into another user's running session.
 """
-import asyncio
 import time
-from unittest.mock import AsyncMock, MagicMock, patch
+from unittest.mock import AsyncMock, MagicMock
 
 import pytest
 
@@ -27,12 +26,10 @@ sys.modules.setdefault("telegram.constants", _tg.constants)
 sys.modules.setdefault("telegram.ext", types.ModuleType("telegram.ext"))
 
 from gateway.platforms.base import (
-    BasePlatformAdapter,
     MessageEvent,
     MessageType,
     SessionSource,
     build_session_key,
-    merge_pending_message_event,
 )
 
 
diff --git a/tests/gateway/test_channel_directory.py b/tests/gateway/test_channel_directory.py
index cdaf2c540c3..18e8ae2fb09 100644
--- a/tests/gateway/test_channel_directory.py
+++ b/tests/gateway/test_channel_directory.py
@@ -3,7 +3,6 @@
 import asyncio
 import json
 import os
-from pathlib import Path
 from types import SimpleNamespace
 from unittest.mock import AsyncMock, MagicMock, patch
 
@@ -15,7 +14,6 @@ from gateway.channel_directory import (
     load_directory,
     _build_from_sessions,
     _build_slack,
-    DIRECTORY_PATH,
 )
 
 
diff --git a/tests/gateway/test_clean_shutdown_marker.py b/tests/gateway/test_clean_shutdown_marker.py
index c6d3cab5c13..45e56171b8b 100644
--- a/tests/gateway/test_clean_shutdown_marker.py
+++ b/tests/gateway/test_clean_shutdown_marker.py
@@ -7,15 +7,12 @@ suspend_recently_active() is skipped so users don't lose their sessions.
 After a crash (no marker), suspension still fires as a safety net for stuck sessions.
 """
 
-import os
 from datetime import datetime, timedelta
-from pathlib import Path
 from unittest.mock import AsyncMock, MagicMock, patch
 
-import pytest
 
-from gateway.config import GatewayConfig, Platform, PlatformConfig, SessionResetPolicy
-from gateway.session import SessionEntry, SessionSource, SessionStore
+from gateway.config import GatewayConfig, Platform
+from gateway.session import SessionSource, SessionStore
 
 
 # ---------------------------------------------------------------------------
diff --git a/tests/gateway/test_command_bypass_active_session.py b/tests/gateway/test_command_bypass_active_session.py
index 2c0a593dc55..e5e8a4fa469 100644
--- a/tests/gateway/test_command_bypass_active_session.py
+++ b/tests/gateway/test_command_bypass_active_session.py
@@ -13,7 +13,6 @@ the safety net in _run_agent discards leaked command text.
 """
 
 import asyncio
-from unittest.mock import AsyncMock, MagicMock
 
 import pytest
 
diff --git a/tests/gateway/test_config_cwd_bridge.py b/tests/gateway/test_config_cwd_bridge.py
index 6aaf9721cf2..05ffee9b8d2 100644
--- a/tests/gateway/test_config_cwd_bridge.py
+++ b/tests/gateway/test_config_cwd_bridge.py
@@ -11,7 +11,6 @@ asserting the expected env var outcomes.
 
 import os
 import json
-import pytest
 
 
 def _simulate_config_bridge(cfg: dict, initial_env: dict | None = None):
diff --git a/tests/gateway/test_config_driven_access_policy.py b/tests/gateway/test_config_driven_access_policy.py
new file mode 100644
index 00000000000..8659fb884e9
--- /dev/null
+++ b/tests/gateway/test_config_driven_access_policy.py
@@ -0,0 +1,234 @@
+"""Tests for config-driven platform access policies at the gateway layer.
+
+Background (#34515): WeCom, Weixin, Yuanbao, and QQBot expose a documented
+config-driven access surface (``dm_policy`` / ``group_policy`` / ``allow_from``
+/ ``group_allow_from`` in ``PlatformConfig.extra``) and enforce it at intake —
+a message is dropped inside the adapter and never reaches the gateway unless it
+already passed that policy.
+
+The gateway's env-based allowlist check (``_is_user_authorized``) runs *after*
+the adapter. Before the fix it fell through to an env-only default-deny when no
+``PLATFORM_ALLOWED_USERS`` env var was set, silently rejecting ``dm_policy:
+open`` and config-only allowlists even though the adapter had already
+authorized the sender.
+
+The fix is a single drift-proof contract: adapters that own their access policy
+declare ``enforces_own_access_policy`` (a ``BasePlatformAdapter`` property,
+default ``False``). The gateway trusts that flag and skips the env-only
+default-deny for those platforms, rather than re-implementing each adapter's
+policy logic a second time.
+"""
+
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from gateway.config import GatewayConfig, Platform, PlatformConfig
+from gateway.session import SessionSource
+
+
+# Platforms whose adapters own their access policy at intake.
+_OWN_POLICY_PLATFORMS = [
+    Platform.WECOM,
+    Platform.WEIXIN,
+    Platform.YUANBAO,
+    Platform.QQBOT,
+]
+
+
+def _clear_auth_env(monkeypatch) -> None:
+    for key in (
+        "WECOM_ALLOWED_USERS",
+        "WEIXIN_ALLOWED_USERS",
+        "YUANBAO_ALLOWED_USERS",
+        "QQ_ALLOWED_USERS",
+        "QQ_GROUP_ALLOWED_USERS",
+        "TELEGRAM_ALLOWED_USERS",
+        "GATEWAY_ALLOWED_USERS",
+        "GATEWAY_ALLOW_ALL_USERS",
+        "WECOM_ALLOW_ALL_USERS",
+        "WEIXIN_ALLOW_ALL_USERS",
+        "YUANBAO_ALLOW_ALL_USERS",
+        "QQ_ALLOW_ALL_USERS",
+    ):
+        monkeypatch.delenv(key, raising=False)
+
+
+def _make_runner(platform: Platform, config: GatewayConfig, *, enforces: bool):
+    """Build a bare GatewayRunner with one adapter for *platform*.
+
+    ``enforces`` controls whether the adapter declares
+    ``enforces_own_access_policy`` — i.e. whether it owns its access gate.
+    """
+    from gateway.run import GatewayRunner
+
+    runner = object.__new__(GatewayRunner)
+    runner.config = config
+    adapter = SimpleNamespace(send=AsyncMock(), enforces_own_access_policy=enforces)
+    runner.adapters = {platform: adapter}
+    runner.pairing_store = MagicMock()
+    runner.pairing_store.is_approved.return_value = False
+    runner.pairing_store._is_rate_limited.return_value = False
+    return runner, adapter
+
+
+def _source(platform: Platform, *, chat_type: str = "dm") -> SessionSource:
+    return SessionSource(
+        platform=platform,
+        user_id="some-user",
+        chat_id="some-chat",
+        user_name="tester",
+        chat_type=chat_type,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Layer 1: the base-class contract and per-adapter overrides
+# ---------------------------------------------------------------------------
+
+
+def test_base_adapter_defaults_to_not_owning_access_policy():
+    """Adapters that don't override the property delegate to the gateway."""
+    from gateway.platforms.base import BasePlatformAdapter
+
+    # The default lives on the base property descriptor.
+    assert BasePlatformAdapter.enforces_own_access_policy.fget(object()) is False
+
+
+@pytest.mark.parametrize(
+    "module_path, class_name",
+    [
+        ("gateway.platforms.wecom", "WeComAdapter"),
+        ("gateway.platforms.weixin", "WeixinAdapter"),
+        ("gateway.platforms.yuanbao", "YuanbaoAdapter"),
+        ("gateway.platforms.qqbot.adapter", "QQAdapter"),
+    ],
+)
+def test_own_policy_adapters_declare_the_flag(module_path, class_name):
+    """The four config-policy adapters override the flag to True."""
+    import importlib
+
+    module = importlib.import_module(module_path)
+    adapter_cls = getattr(module, class_name)
+    # Property is overridden on the subclass and returns True regardless of
+    # instance state (it reflects a static capability, not runtime config).
+    value = adapter_cls.enforces_own_access_policy.fget(object.__new__(adapter_cls))
+    assert value is True
+
+
+# ---------------------------------------------------------------------------
+# Layer 2: gateway trusts the adapter-enforced flag
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize("platform", _OWN_POLICY_PLATFORMS)
+def test_own_policy_platform_authorized_without_env_allowlist(monkeypatch, platform):
+    """A message reaching the gateway from an own-policy adapter is trusted.
+
+    With no env allowlist set, the gateway must NOT default-deny — the adapter
+    already authorized the sender at intake (e.g. ``dm_policy: open``).
+    """
+    _clear_auth_env(monkeypatch)
+    config = GatewayConfig(
+        platforms={platform: PlatformConfig(enabled=True, extra={"dm_policy": "open"})}
+    )
+    runner, _adapter = _make_runner(platform, config, enforces=True)
+
+    assert runner._is_user_authorized(_source(platform)) is True
+
+
+@pytest.mark.parametrize("platform", _OWN_POLICY_PLATFORMS)
+def test_own_policy_platform_authorized_for_group_chat(monkeypatch, platform):
+    """Group traffic from an own-policy adapter is trusted the same way."""
+    _clear_auth_env(monkeypatch)
+    config = GatewayConfig(
+        platforms={platform: PlatformConfig(enabled=True, extra={"group_policy": "open"})}
+    )
+    runner, _adapter = _make_runner(platform, config, enforces=True)
+
+    assert runner._is_user_authorized(_source(platform, chat_type="group")) is True
+
+
+def test_non_owning_platform_still_default_denies(monkeypatch):
+    """Adapters that don't own their policy keep the env-only default-deny."""
+    _clear_auth_env(monkeypatch)
+    config = GatewayConfig(
+        platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="t")}
+    )
+    runner, _adapter = _make_runner(Platform.TELEGRAM, config, enforces=False)
+
+    assert runner._is_user_authorized(_source(Platform.TELEGRAM)) is False
+
+
+def test_env_allowlist_still_takes_precedence_for_own_policy_platform(monkeypatch):
+    """When an env allowlist IS set, it governs — adapter trust is a fallback.
+
+    The adapter-trust branch only fires when no env allowlist exists, so an
+    operator who sets ``WECOM_ALLOWED_USERS`` still gets env-based gating and
+    a non-listed user is denied.
+    """
+    _clear_auth_env(monkeypatch)
+    monkeypatch.setenv("WECOM_ALLOWED_USERS", "allowed-user")
+    config = GatewayConfig(
+        platforms={Platform.WECOM: PlatformConfig(enabled=True, extra={"dm_policy": "open"})}
+    )
+    runner, _adapter = _make_runner(Platform.WECOM, config, enforces=True)
+
+    listed = SessionSource(
+        platform=Platform.WECOM, user_id="allowed-user", chat_id="c",
+        user_name="t", chat_type="dm",
+    )
+    stranger = SessionSource(
+        platform=Platform.WECOM, user_id="stranger", chat_id="c",
+        user_name="t", chat_type="dm",
+    )
+    assert runner._is_user_authorized(listed) is True
+    assert runner._is_user_authorized(stranger) is False
+
+
+def test_unknown_adapter_does_not_crash_trust_check(monkeypatch):
+    """No adapter registered for the platform → safe default-deny."""
+    _clear_auth_env(monkeypatch)
+    config = GatewayConfig(platforms={Platform.WECOM: PlatformConfig(enabled=True)})
+    runner, _adapter = _make_runner(Platform.WECOM, config, enforces=True)
+    runner.adapters = {}  # nothing registered
+
+    assert runner._adapter_enforces_own_access_policy(Platform.WECOM) is False
+    assert runner._is_user_authorized(_source(Platform.WECOM)) is False
+
+
+# ---------------------------------------------------------------------------
+# Layer 3: unauthorized-DM behavior reads config dm_policy
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize(
+    "dm_policy, expected",
+    [
+        ("allowlist", "ignore"),
+        ("disabled", "ignore"),
+        ("pairing", "pair"),
+    ],
+)
+def test_unauthorized_dm_behavior_follows_config_dm_policy(monkeypatch, dm_policy, expected):
+    """A restrictive dm_policy drops unauthorized DMs; pairing opts back in."""
+    _clear_auth_env(monkeypatch)
+    config = GatewayConfig(
+        platforms={Platform.WECOM: PlatformConfig(enabled=True, extra={"dm_policy": dm_policy})}
+    )
+    runner, _adapter = _make_runner(Platform.WECOM, config, enforces=True)
+
+    assert runner._get_unauthorized_dm_behavior(Platform.WECOM) == expected
+
+
+def test_unauthorized_dm_behavior_open_policy_keeps_default(monkeypatch):
+    """``dm_policy: open`` is not restrictive → falls through to the default."""
+    _clear_auth_env(monkeypatch)
+    config = GatewayConfig(
+        platforms={Platform.WECOM: PlatformConfig(enabled=True, extra={"dm_policy": "open"})}
+    )
+    runner, _adapter = _make_runner(Platform.WECOM, config, enforces=True)
+
+    # No allowlist + no restrictive policy → open-gateway pairing default.
+    assert runner._get_unauthorized_dm_behavior(Platform.WECOM) == "pair"
diff --git a/tests/gateway/test_dingtalk.py b/tests/gateway/test_dingtalk.py
index 2da55a00979..d73b687d7ac 100644
--- a/tests/gateway/test_dingtalk.py
+++ b/tests/gateway/test_dingtalk.py
@@ -1,9 +1,8 @@
 """Tests for DingTalk platform adapter."""
 import asyncio
-import json
 from datetime import datetime, timezone
 from types import SimpleNamespace
-from unittest.mock import AsyncMock, MagicMock, patch, PropertyMock
+from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
 
diff --git a/tests/gateway/test_discord_bot_auth_bypass.py b/tests/gateway/test_discord_bot_auth_bypass.py
index 7d86e034eb3..8e10dfbcb94 100644
--- a/tests/gateway/test_discord_bot_auth_bypass.py
+++ b/tests/gateway/test_discord_bot_auth_bypass.py
@@ -13,9 +13,7 @@ These tests assert both gates now pass a bot message through when
 DISCORD_ALLOW_BOTS permits it AND no user allowlist entry exists.
 """
 
-import os
 from types import SimpleNamespace
-from unittest.mock import patch
 
 import pytest
 
diff --git a/tests/gateway/test_discord_bot_filter.py b/tests/gateway/test_discord_bot_filter.py
index 09a78ae6308..90dc9f8de00 100644
--- a/tests/gateway/test_discord_bot_filter.py
+++ b/tests/gateway/test_discord_bot_filter.py
@@ -1,9 +1,8 @@
 """Tests for Discord bot message filtering (DISCORD_ALLOW_BOTS)."""
 
-import asyncio
 import os
 import unittest
-from unittest.mock import AsyncMock, MagicMock, patch
+from unittest.mock import MagicMock
 
 
 def _make_author(*, bot: bool = False, is_self: bool = False):
diff --git a/tests/gateway/test_discord_channel_skills.py b/tests/gateway/test_discord_channel_skills.py
index 33c469df60d..a1b958d06b0 100644
--- a/tests/gateway/test_discord_channel_skills.py
+++ b/tests/gateway/test_discord_channel_skills.py
@@ -1,6 +1,5 @@
 """Tests for Discord channel_skill_bindings auto-skill resolution."""
 from unittest.mock import MagicMock
-import pytest
 
 
 def _make_adapter():
diff --git a/tests/gateway/test_discord_clarify_buttons.py b/tests/gateway/test_discord_clarify_buttons.py
index 04f20195f46..d2157f2eb9b 100644
--- a/tests/gateway/test_discord_clarify_buttons.py
+++ b/tests/gateway/test_discord_clarify_buttons.py
@@ -11,7 +11,6 @@ dispatcher like Telegram — the auth + resolution path is the same:
   · already-resolved or unauthorized → ephemeral "this prompt..." reply
 """
 
-import asyncio
 import sys
 from pathlib import Path
 from types import SimpleNamespace
@@ -170,7 +169,6 @@ class TestClarifyChoiceResolve:
     async def test_choice_falls_back_to_label_text_when_entry_missing(self):
         """If the gateway entry vanished (race / stale view), the button's
         own choice text is used as the response."""
-        from tools import clarify_gateway as cm
         # Note: no cm.register() — entry intentionally absent
 
         view = ClarifyChoiceView(
diff --git a/tests/gateway/test_discord_free_response.py b/tests/gateway/test_discord_free_response.py
index 554288812b7..e2133d56c35 100644
--- a/tests/gateway/test_discord_free_response.py
+++ b/tests/gateway/test_discord_free_response.py
@@ -851,6 +851,27 @@ async def test_discord_per_user_channel_backfills_too(adapter, monkeypatch):
     assert event.channel_context == "[Recent channel messages]\n[Alice] context"
 
 
+@pytest.mark.asyncio
+async def test_discord_participated_thread_backfills_without_mention(adapter, monkeypatch):
+    """Known threads still need recent thread context when mention gating is bypassed."""
+    monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "true")
+    monkeypatch.delenv("DISCORD_FREE_RESPONSE_CHANNELS", raising=False)
+    monkeypatch.delenv("DISCORD_THREAD_REQUIRE_MENTION", raising=False)
+    adapter.config.extra["history_backfill"] = True
+    adapter._fetch_channel_context = AsyncMock(return_value="[Recent channel messages]\n[Alice] thread context")
+
+    thread = FakeThread(channel_id=456, name="follow-up")
+    adapter._threads.mark("456")
+
+    message = make_message(channel=thread, content="follow-up without mention")
+    await adapter._handle_message(message)
+
+    adapter._fetch_channel_context.assert_awaited_once()
+    event = adapter.handle_message.await_args.args[0]
+    assert event.text == "follow-up without mention"
+    assert event.channel_context == "[Recent channel messages]\n[Alice] thread context"
+
+
 @pytest.mark.asyncio
 async def test_discord_dm_does_not_backfill(adapter, monkeypatch):
     """DMs skip backfill — every DM triggers the bot, so there's no mention gap."""
@@ -884,3 +905,25 @@ async def test_discord_dm_does_not_backfill(adapter, monkeypatch):
         assert event.channel_context is None
 
 
+@pytest.mark.asyncio
+async def test_discord_auto_thread_skips_backfill(adapter, monkeypatch):
+    """Auto-created threads skip backfill — the thread is brand new with no prior context."""
+    monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "true")
+    monkeypatch.setenv("DISCORD_AUTO_THREAD", "true")
+    monkeypatch.delenv("DISCORD_NO_THREAD_CHANNELS", raising=False)
+    monkeypatch.delenv("DISCORD_FREE_RESPONSE_CHANNELS", raising=False)
+    adapter.config.extra["history_backfill"] = True
+
+    fake_thread = FakeThread(channel_id=777, name="auto-thread")
+    adapter._auto_create_thread = AsyncMock(return_value=fake_thread)
+    adapter._fetch_channel_context = AsyncMock(return_value="[Recent channel messages]\n[Alice] noise")
+
+    bot_user = adapter._client.user
+    parent = FakeTextChannel(channel_id=200, name="general")
+    message = make_message(channel=parent, content="hello", mentions=[bot_user])
+    await adapter._handle_message(message)
+
+    adapter._auto_create_thread.assert_awaited_once()
+    adapter._fetch_channel_context.assert_not_awaited()
+
+
diff --git a/tests/gateway/test_discord_lazy_install_views.py b/tests/gateway/test_discord_lazy_install_views.py
index 2ed926e0f8f..7ca100ef81b 100644
--- a/tests/gateway/test_discord_lazy_install_views.py
+++ b/tests/gateway/test_discord_lazy_install_views.py
@@ -15,10 +15,8 @@ Fixes: lazy-install path NameError for ExecApprovalView, SlashConfirmView,
 UpdatePromptView, ModelPickerView, ClarifyChoiceView.
 """
 import importlib
-import sys
 from unittest.mock import patch
 
-import pytest
 
 _VIEW_NAMES = [
     "ExecApprovalView",
diff --git a/tests/gateway/test_discord_roles_dm_scope.py b/tests/gateway/test_discord_roles_dm_scope.py
index ee2939aae3b..19d65a5998c 100644
--- a/tests/gateway/test_discord_roles_dm_scope.py
+++ b/tests/gateway/test_discord_roles_dm_scope.py
@@ -18,7 +18,6 @@ opts into a single trusted guild.
 from types import SimpleNamespace
 from unittest.mock import MagicMock
 
-import pytest
 
 from plugins.platforms.discord.adapter import DiscordAdapter
 
diff --git a/tests/gateway/test_discord_slash_commands.py b/tests/gateway/test_discord_slash_commands.py
index d5ed297faad..8d44f77302e 100644
--- a/tests/gateway/test_discord_slash_commands.py
+++ b/tests/gateway/test_discord_slash_commands.py
@@ -624,6 +624,13 @@ class _FakeTextChannel:
         self.guild = SimpleNamespace(name=guild_name, id=1)
         self.topic = None
 
+    def history(self, *args, **kwargs):
+        async def _empty():
+            return
+            yield  # pragma: no cover — make this an async generator
+
+        return _empty()
+
 
 class _FakeThreadChannel(_discord_mod.Thread):
     """isinstance(ch, discord.Thread) → True."""
@@ -636,6 +643,13 @@ class _FakeThreadChannel(_discord_mod.Thread):
         self.topic = None
         self.parent = SimpleNamespace(id=parent_id, name="general", guild=SimpleNamespace(name=guild_name, id=1))
 
+    def history(self, *args, **kwargs):
+        async def _empty():
+            return
+            yield  # pragma: no cover — make this an async generator
+
+        return _empty()
+
 
 def _fake_message(channel, *, content="Hello", author_id=42, display_name="Jezza"):
     return SimpleNamespace(
diff --git a/tests/gateway/test_discord_thread_persistence.py b/tests/gateway/test_discord_thread_persistence.py
index 75237f6403f..41ffcb2b5bb 100644
--- a/tests/gateway/test_discord_thread_persistence.py
+++ b/tests/gateway/test_discord_thread_persistence.py
@@ -8,7 +8,6 @@ import json
 import os
 from unittest.mock import patch
 
-import pytest
 
 
 class TestDiscordThreadPersistence:
diff --git a/tests/gateway/test_display_config.py b/tests/gateway/test_display_config.py
index 5f23edbd4f5..75230e5b9cd 100644
--- a/tests/gateway/test_display_config.py
+++ b/tests/gateway/test_display_config.py
@@ -1,5 +1,4 @@
 """Tests for gateway.display_config — per-platform display/verbosity resolver."""
-import pytest
 
 
 # ---------------------------------------------------------------------------
diff --git a/tests/gateway/test_dm_topics.py b/tests/gateway/test_dm_topics.py
index 332375229c5..3f6b0942803 100644
--- a/tests/gateway/test_dm_topics.py
+++ b/tests/gateway/test_dm_topics.py
@@ -9,12 +9,11 @@ Covers:
 - _build_message_event: DM topic resolution in message events
 """
 
-import asyncio
 import os
 import sys
 from pathlib import Path
 from types import SimpleNamespace
-from unittest.mock import AsyncMock, MagicMock, patch, mock_open
+from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
 
diff --git a/tests/gateway/test_duplicate_reply_suppression.py b/tests/gateway/test_duplicate_reply_suppression.py
index 7e54515d6a6..c7c047fdb65 100644
--- a/tests/gateway/test_duplicate_reply_suppression.py
+++ b/tests/gateway/test_duplicate_reply_suppression.py
@@ -14,7 +14,6 @@ Covers four fix paths:
 
 import asyncio
 from types import SimpleNamespace
-from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
 
@@ -22,8 +21,6 @@ from gateway.config import Platform, PlatformConfig
 from gateway.platforms.base import (
     BasePlatformAdapter,
     MessageEvent,
-    MessageType,
-    ProcessingOutcome,
     SendResult,
 )
 from gateway.session import SessionSource, build_session_key
diff --git a/tests/gateway/test_email.py b/tests/gateway/test_email.py
index 78034fe8075..2354f9ec201 100644
--- a/tests/gateway/test_email.py
+++ b/tests/gateway/test_email.py
@@ -18,8 +18,6 @@ from email.mime.text import MIMEText
 from email.mime.multipart import MIMEMultipart
 from email.mime.base import MIMEBase
 from email import encoders
-from pathlib import Path
-from types import SimpleNamespace
 from unittest.mock import patch, MagicMock, AsyncMock
 
 from gateway.platforms.base import SendResult
@@ -660,7 +658,6 @@ class TestSendMethods(unittest.TestCase):
     def test_send_image_includes_url(self):
         """send_image should include image URL in email body."""
         import asyncio
-        from unittest.mock import AsyncMock
         adapter = self._make_adapter()
 
         adapter.send = AsyncMock(return_value=SendResult(success=True))
diff --git a/tests/gateway/test_extract_local_files.py b/tests/gateway/test_extract_local_files.py
index 568b311cb9b..1a1111f948c 100644
--- a/tests/gateway/test_extract_local_files.py
+++ b/tests/gateway/test_extract_local_files.py
@@ -8,7 +8,6 @@ deduplication, text cleanup, and extension routing.
 Based on PR #1636 by sudoingX (salvaged + hardened).
 """
 
-import os
 from unittest.mock import patch
 
 import pytest
diff --git a/tests/gateway/test_fallback_eviction.py b/tests/gateway/test_fallback_eviction.py
index ae3ed07aa54..677172e8e0b 100644
--- a/tests/gateway/test_fallback_eviction.py
+++ b/tests/gateway/test_fallback_eviction.py
@@ -7,9 +7,7 @@ Eviction should only happen on successful runs where fallback activated.
 
 import sys
 from pathlib import Path
-from unittest.mock import MagicMock, patch
 
-import pytest
 
 sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent))
 
diff --git a/tests/gateway/test_feishu.py b/tests/gateway/test_feishu.py
index 75f61923956..0f65fd052be 100644
--- a/tests/gateway/test_feishu.py
+++ b/tests/gateway/test_feishu.py
@@ -3126,8 +3126,6 @@ class TestWebhookSecurity(unittest.TestCase):
 
     def test_signature_valid_passes(self):
         import hashlib
-        from gateway.platforms.feishu import FeishuAdapter
-        from gateway.config import PlatformConfig
 
         encrypt_key = "test_secret"
         adapter = self._make_adapter(encrypt_key)
@@ -4885,3 +4883,62 @@ class TestFeishuMentionEndToEnd(unittest.TestCase):
         # Body: leading @Hermes stripped, Alice preserved, trailing text intact.
         self.assertIn("@Alice review the spec with Alice", event.text)
         self.assertNotIn("@Hermes @Alice", event.text)
+
+
+class TestChatLockEviction(unittest.TestCase):
+    """_get_chat_lock is LRU-bounded so _chat_locks cannot grow unbounded."""
+
+    def _make_adapter(self, max_size=5):
+        import collections as _collections
+
+        from gateway.platforms.feishu import FeishuAdapter
+
+        adapter = object.__new__(FeishuAdapter)
+        adapter._chat_locks = _collections.OrderedDict()
+        adapter.CHAT_LOCK_MAX_SIZE = max_size
+        return adapter
+
+    def test_chat_locks_is_ordered_dict(self):
+        import collections as _collections
+
+        adapter = self._make_adapter()
+        self.assertIsInstance(adapter._chat_locks, _collections.OrderedDict)
+
+    def test_same_id_returns_same_lock_and_stays_bounded(self):
+        adapter = self._make_adapter(max_size=5)
+        locks = [adapter._get_chat_lock(f"c{i}") for i in range(5)]
+        self.assertEqual(len(adapter._chat_locks), 5)
+        # Re-requesting an existing id returns the identical lock, no growth.
+        self.assertIs(adapter._get_chat_lock("c2"), locks[2])
+        self.assertEqual(len(adapter._chat_locks), 5)
+
+    def test_lru_eviction_respects_recent_access(self):
+        adapter = self._make_adapter(max_size=5)
+        for i in range(5):
+            adapter._get_chat_lock(f"c{i}")
+        # Touch c0 so it is no longer the LRU entry, then add a new chat.
+        adapter._get_chat_lock("c0")
+        adapter._get_chat_lock("c_new")
+        self.assertEqual(len(adapter._chat_locks), 5)
+        self.assertNotIn("c1", adapter._chat_locks)  # c1 was the true LRU
+        self.assertIn("c0", adapter._chat_locks)
+        self.assertIn("c_new", adapter._chat_locks)
+
+    def test_eviction_skips_held_locks(self):
+        adapter = self._make_adapter(max_size=3)
+
+        async def _run():
+            held = adapter._get_chat_lock("held")
+            await held.acquire()
+            try:
+                adapter._get_chat_lock("x")
+                adapter._get_chat_lock("y")
+                # At capacity; "held" is LRU but locked, so "x" should go instead.
+                adapter._get_chat_lock("z")
+                self.assertIn("held", adapter._chat_locks)
+                self.assertNotIn("x", adapter._chat_locks)
+                self.assertEqual(len(adapter._chat_locks), 3)
+            finally:
+                held.release()
+
+        asyncio.run(_run())
diff --git a/tests/gateway/test_feishu_bot_admission.py b/tests/gateway/test_feishu_bot_admission.py
index 5ccc386d83e..2d71ad06de1 100644
--- a/tests/gateway/test_feishu_bot_admission.py
+++ b/tests/gateway/test_feishu_bot_admission.py
@@ -3,7 +3,6 @@
 from __future__ import annotations
 
 from types import SimpleNamespace
-from typing import Any
 
 import pytest
 
diff --git a/tests/gateway/test_feishu_comment.py b/tests/gateway/test_feishu_comment.py
index 0a09481ac8c..6241de6f86e 100644
--- a/tests/gateway/test_feishu_comment.py
+++ b/tests/gateway/test_feishu_comment.py
@@ -1,7 +1,6 @@
 """Tests for feishu_comment — event filtering, access control integration, wiki reverse lookup."""
 
 import asyncio
-import json
 import unittest
 from types import SimpleNamespace
 from unittest.mock import AsyncMock, Mock, patch
diff --git a/tests/gateway/test_fresh_reset_skill_injection.py b/tests/gateway/test_fresh_reset_skill_injection.py
index 885dd0f15d6..1f7b4f1a32f 100644
--- a/tests/gateway/test_fresh_reset_skill_injection.py
+++ b/tests/gateway/test_fresh_reset_skill_injection.py
@@ -25,7 +25,6 @@ a "Session automatically reset due to inactivity" user-facing notice and
 a context-note prepend into the agent's prompt — both wrong for an explicit
 /new or /reset.
 """
-import pytest
 
 from gateway.config import GatewayConfig, Platform
 from gateway.session import SessionEntry, SessionSource, SessionStore
diff --git a/tests/gateway/test_gateway_inactivity_timeout.py b/tests/gateway/test_gateway_inactivity_timeout.py
index 28e22b05797..2c57bf6ed32 100644
--- a/tests/gateway/test_gateway_inactivity_timeout.py
+++ b/tests/gateway/test_gateway_inactivity_timeout.py
@@ -14,9 +14,7 @@ import os
 import sys
 import time
 from pathlib import Path
-from unittest.mock import MagicMock, patch
 
-import pytest
 
 sys.path.insert(0, str(Path(__file__).parent.parent.parent))
 
diff --git a/tests/gateway/test_hooks.py b/tests/gateway/test_hooks.py
index ac9e51919c4..a614f9cbe0e 100644
--- a/tests/gateway/test_hooks.py
+++ b/tests/gateway/test_hooks.py
@@ -1,7 +1,5 @@
 """Tests for gateway/hooks.py — event hook system."""
 
-import asyncio
-from pathlib import Path
 from unittest.mock import patch
 
 import pytest
diff --git a/tests/gateway/test_internal_event_bypass_pairing.py b/tests/gateway/test_internal_event_bypass_pairing.py
index 88788425387..f0348a759da 100644
--- a/tests/gateway/test_internal_event_bypass_pairing.py
+++ b/tests/gateway/test_internal_event_bypass_pairing.py
@@ -9,7 +9,7 @@ pairing code to the chat.
 
 import asyncio
 from types import SimpleNamespace
-from unittest.mock import AsyncMock, patch
+from unittest.mock import AsyncMock
 
 import pytest
 
diff --git a/tests/gateway/test_irc_adapter.py b/tests/gateway/test_irc_adapter.py
index 246dbfdf0ec..1320152c637 100644
--- a/tests/gateway/test_irc_adapter.py
+++ b/tests/gateway/test_irc_adapter.py
@@ -1,11 +1,8 @@
 """Tests for the IRC platform adapter plugin."""
 
 import asyncio
-import os
-import sys
 import pytest
-from pathlib import Path
-from unittest.mock import AsyncMock, MagicMock, patch
+from unittest.mock import AsyncMock, MagicMock
 
 from tests.gateway._plugin_adapter_loader import load_plugin_adapter
 
diff --git a/tests/gateway/test_kanban_notifier.py b/tests/gateway/test_kanban_notifier.py
index 8e85f045037..9dd5aa3749b 100644
--- a/tests/gateway/test_kanban_notifier.py
+++ b/tests/gateway/test_kanban_notifier.py
@@ -1,7 +1,6 @@
 import asyncio
 from pathlib import Path
 
-import pytest
 
 from gateway.config import Platform
 from gateway.run import GatewayRunner
diff --git a/tests/gateway/test_line_plugin.py b/tests/gateway/test_line_plugin.py
index e7fd2cf9946..4f42c0f08da 100644
--- a/tests/gateway/test_line_plugin.py
+++ b/tests/gateway/test_line_plugin.py
@@ -19,8 +19,7 @@ import hashlib
 import hmac
 import base64
 import json
-import os
-from unittest.mock import AsyncMock, MagicMock, patch
+from unittest.mock import AsyncMock, MagicMock
 
 import pytest
 
diff --git a/tests/gateway/test_load_transcript_db_only.py b/tests/gateway/test_load_transcript_db_only.py
index 2425e495a6b..6d9025a4edc 100644
--- a/tests/gateway/test_load_transcript_db_only.py
+++ b/tests/gateway/test_load_transcript_db_only.py
@@ -1,7 +1,5 @@
 """Verify load_transcript returns SQLite messages without any JSONL file."""
-from pathlib import Path
 
-import pytest
 
 from gateway.session import SessionStore
 from gateway.config import GatewayConfig
diff --git a/tests/gateway/test_matrix.py b/tests/gateway/test_matrix.py
index c7c03b1a8b1..c0294b41ec9 100644
--- a/tests/gateway/test_matrix.py
+++ b/tests/gateway/test_matrix.py
@@ -1,9 +1,6 @@
 """Tests for Matrix platform adapter (mautrix-python backend)."""
 import asyncio
-import json
-import re
 import sys
-import time
 import types
 import pytest
 from unittest.mock import MagicMock, patch, AsyncMock
@@ -974,7 +971,6 @@ class TestDeviceKeyReVerification:
         mock_olm.account.identity_keys = {"ed25519": "local_new_key"}
         mock_olm.share_keys = AsyncMock()
 
-        from gateway.platforms.matrix import MatrixAdapter
         result = await adapter._verify_device_keys_on_server(mock_client, mock_olm)
 
         assert result is False
@@ -986,7 +982,7 @@ class TestMatrixE2EEHardFail:
 
     @pytest.mark.asyncio
     async def test_connect_fails_when_encryption_true_but_no_e2ee_deps(self):
-        from gateway.platforms.matrix import MatrixAdapter, _check_e2ee_deps
+        from gateway.platforms.matrix import MatrixAdapter
 
         config = PlatformConfig(
             enabled=True,
@@ -1208,7 +1204,6 @@ class TestMatrixPasswordLoginDeviceId:
 
         fake_mautrix_mods["mautrix.client"].Client = MagicMock(return_value=mock_client)
 
-        from gateway.platforms import matrix as matrix_mod
         with patch.dict("sys.modules", fake_mautrix_mods):
             with patch.object(adapter, "_refresh_dm_cache", AsyncMock()):
                 with patch.object(adapter, "_sync_loop", AsyncMock(return_value=None)):
diff --git a/tests/gateway/test_matrix_approval_reaction_fail_closed.py b/tests/gateway/test_matrix_approval_reaction_fail_closed.py
new file mode 100644
index 00000000000..c9b5277ee6a
--- /dev/null
+++ b/tests/gateway/test_matrix_approval_reaction_fail_closed.py
@@ -0,0 +1,130 @@
+"""Tests for Matrix adapter fail-closed approval reaction auth.
+
+When MATRIX_ALLOWED_USERS is not configured, _on_reaction must deny
+approval reactions by default unless GATEWAY_ALLOW_ALL_USERS=true.
+Mirrors the Telegram _is_callback_user_authorized fix (commit 89d32052e,
+PR #28494).
+"""
+
+import asyncio
+import sys
+import types
+from collections import deque
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, patch
+
+import pytest
+
+
+# ---------------------------------------------------------------------------
+# Stub mautrix so gateway.platforms.matrix can be imported without the SDK.
+# ---------------------------------------------------------------------------
+
+def _stub_mautrix():
+    stub = types.ModuleType("mautrix")
+    for sub in ("mautrix.types", "mautrix.client", "mautrix.client.api",
+                "mautrix.errors", "mautrix.crypto", "mautrix.util",
+                "mautrix.util.config"):
+        sys.modules.setdefault(sub, types.ModuleType(sub))
+    sys.modules.setdefault("mautrix", stub)
+    m = sys.modules["mautrix.types"]
+    for attr in (
+        "ContentURI", "EventID", "EventType", "PaginationDirection",
+        "PresenceState", "RoomCreatePreset", "RoomID", "SyncToken",
+        "TrustState", "UserID",
+    ):
+        if not hasattr(m, attr):
+            setattr(m, attr, str)
+
+
+_stub_mautrix()
+
+from gateway.platforms.matrix import MatrixAdapter, _MatrixApprovalPrompt  # noqa: E402
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _make_adapter(allowed_user_ids=None):
+    """Construct a MatrixAdapter with only the state needed by _on_reaction."""
+    adapter = object.__new__(MatrixAdapter)
+    adapter._user_id = "@bot:matrix.org"
+    adapter._allowed_user_ids = set(allowed_user_ids) if allowed_user_ids else set()
+    adapter._approval_reaction_map = {"✅": "once", "❎": "deny"}
+    adapter._approval_prompts_by_event = {}
+    adapter._approval_prompt_by_session = {}
+    adapter._processed_events = deque(maxlen=512)
+    adapter._processed_events_set = set()
+    return adapter
+
+
+def _make_event(sender, reacts_to, key="✅"):
+    """Minimal Matrix reaction event."""
+    return SimpleNamespace(
+        sender=sender,
+        event_id=f"$reaction-{sender.split(':')[0]}",
+        room_id="!testroom:matrix.org",
+        content={"m.relates_to": {"event_id": reacts_to, "key": key}},
+    )
+
+
+def _make_prompt(chat_id="!testroom:matrix.org"):
+    return _MatrixApprovalPrompt(
+        session_key="session-abc",
+        chat_id=chat_id,
+        message_id="$prompt-event-1",
+    )
+
+
+def _run(adapter, event):
+    """Run _on_reaction and return whether the prompt was resolved."""
+    prompt_event_id = "$prompt-event-1"
+    prompt = _make_prompt()
+    adapter._approval_prompts_by_event[prompt_event_id] = prompt
+    adapter._redact_bot_approval_reactions = AsyncMock()
+
+    fake_approval = types.ModuleType("tools.approval")
+    fake_approval.resolve_gateway_approval = lambda session_key, choice: 1
+    with patch.dict(sys.modules, {"tools.approval": fake_approval}):
+        asyncio.run(adapter._on_reaction(event))
+
+    return prompt.resolved
+
+
+# ---------------------------------------------------------------------------
+# Test class
+# ---------------------------------------------------------------------------
+
+class TestApprovalReactionFailClosed:
+    """_on_reaction approval auth must be fail-closed (parity with Telegram)."""
+
+    def test_no_allowlist_no_allow_all_denies(self, monkeypatch):
+        """No MATRIX_ALLOWED_USERS + no GATEWAY_ALLOW_ALL_USERS → deny."""
+        monkeypatch.delenv("MATRIX_ALLOWED_USERS", raising=False)
+        monkeypatch.delenv("GATEWAY_ALLOW_ALL_USERS", raising=False)
+        adapter = _make_adapter(allowed_user_ids=None)
+        event = _make_event("@stranger:matrix.org", "$prompt-event-1")
+        assert _run(adapter, event) is False
+
+    def test_no_allowlist_allow_all_permits(self, monkeypatch):
+        """No MATRIX_ALLOWED_USERS + GATEWAY_ALLOW_ALL_USERS=true → allow."""
+        monkeypatch.delenv("MATRIX_ALLOWED_USERS", raising=False)
+        monkeypatch.setenv("GATEWAY_ALLOW_ALL_USERS", "true")
+        adapter = _make_adapter(allowed_user_ids=None)
+        event = _make_event("@anyone:matrix.org", "$prompt-event-1")
+        assert _run(adapter, event) is True
+
+    def test_listed_sender_permits(self, monkeypatch):
+        """Sender in MATRIX_ALLOWED_USERS → allow."""
+        monkeypatch.delenv("GATEWAY_ALLOW_ALL_USERS", raising=False)
+        adapter = _make_adapter(allowed_user_ids=["@alice:matrix.org"])
+        event = _make_event("@alice:matrix.org", "$prompt-event-1")
+        assert _run(adapter, event) is True
+
+    def test_unlisted_sender_denies(self, monkeypatch):
+        """Sender not in MATRIX_ALLOWED_USERS → deny."""
+        monkeypatch.delenv("GATEWAY_ALLOW_ALL_USERS", raising=False)
+        adapter = _make_adapter(allowed_user_ids=["@alice:matrix.org"])
+        event = _make_event("@mallory:matrix.org", "$prompt-event-1")
+        assert _run(adapter, event) is False
diff --git a/tests/gateway/test_matrix_mention.py b/tests/gateway/test_matrix_mention.py
index 6c34dbce892..634c1c765f9 100644
--- a/tests/gateway/test_matrix_mention.py
+++ b/tests/gateway/test_matrix_mention.py
@@ -1,7 +1,6 @@
 """Tests for Matrix require-mention gating and auto-thread features."""
 
 import json
-import sys
 import time
 from types import SimpleNamespace
 from unittest.mock import AsyncMock, MagicMock, patch
diff --git a/tests/gateway/test_matrix_voice.py b/tests/gateway/test_matrix_voice.py
index 3b3e08d1422..51bf150b29b 100644
--- a/tests/gateway/test_matrix_voice.py
+++ b/tests/gateway/test_matrix_voice.py
@@ -2,7 +2,6 @@
 
 Updated for the mautrix-python SDK (no more matrix-nio / nio imports).
 """
-import io
 import os
 import tempfile
 import types
diff --git a/tests/gateway/test_media_download_retry.py b/tests/gateway/test_media_download_retry.py
index 5991b85e4eb..bb45061f842 100644
--- a/tests/gateway/test_media_download_retry.py
+++ b/tests/gateway/test_media_download_retry.py
@@ -536,7 +536,7 @@ import gateway.platforms.slack as _slack_mod  # noqa: E402
 _slack_mod.SLACK_AVAILABLE = True
 
 from gateway.platforms.slack import SlackAdapter  # noqa: E402
-from gateway.config import Platform, PlatformConfig  # noqa: E402
+from gateway.config import PlatformConfig  # noqa: E402
 
 
 def _make_slack_adapter():
diff --git a/tests/gateway/test_media_extraction.py b/tests/gateway/test_media_extraction.py
index 20f7d73a8fe..f5a5e104f52 100644
--- a/tests/gateway/test_media_extraction.py
+++ b/tests/gateway/test_media_extraction.py
@@ -5,6 +5,10 @@ Verifies that MEDIA tags (e.g., from TTS tool) are only extracted from
 messages in the CURRENT turn, not from the full conversation history.
 This prevents voice messages from accumulating and being sent multiple
 times per reply. (Regression test for #160)
+
+Also covers #34608: a stale MEDIA: path emitted by an execute_code /
+make_image tool several turns earlier must not leak onto a later
+text-only reply, even when the path-based dedup set fails to capture it.
 """
 
 import pytest
@@ -43,6 +47,37 @@ def extract_media_tags_fixed(result_messages, history_len):
     return media_tags, has_voice_directive
 
 
+def extract_media_tags_production(result_messages, history_len, history_media_paths):
+    """Mirror of the production scan in gateway/run.py after the #34608 fix.
+
+    Primary guard: scope the scan to the current turn via ``history_len``
+    slicing (matching how ``agent_history`` is passed as
+    ``conversation_history`` into ``run_conversation``). Secondary guard:
+    path-based dedup against ``history_media_paths`` (the #160 compression-safe
+    fallback, also used when compression shrinks the list below history_len).
+    """
+    media_tags = []
+    has_voice_directive = False
+
+    if len(result_messages) >= history_len and history_len:
+        scan_msgs = result_messages[history_len:]
+    else:
+        scan_msgs = result_messages
+
+    for msg in scan_msgs:
+        if msg.get("role") == "tool" or msg.get("role") == "function":
+            content = msg.get("content", "")
+            if "MEDIA:" in content:
+                for match in re.finditer(r'MEDIA:(\S+)', content):
+                    path = match.group(1).strip().rstrip('",}')
+                    if path and path not in history_media_paths:
+                        media_tags.append(f"MEDIA:{path}")
+                if "[[audio_as_voice]]" in content:
+                    has_voice_directive = True
+
+    return media_tags, has_voice_directive
+
+
 def extract_media_tags_broken(result_messages):
     """
     The BROKEN behavior: extract MEDIA tags from ALL messages including history.
@@ -180,5 +215,104 @@ class TestMediaExtraction:
         assert len(unique) == 2  # After dedup: same.ogg and different.ogg
 
 
+class TestStaleToolMediaLeak:
+    """Regression tests for #34608.
+
+    A MEDIA: path emitted by an execute_code / make_image tool several turns
+    earlier remains in the full conversation message list. A later text-only
+    reply (zero MEDIA directives) must NOT attach that stale image.
+
+    The production code previously relied solely on path-based dedup against
+    paths reconstructed from the replayable transcript. When that
+    reconstruction does not byte-match the in-memory tool content (timestamp
+    stripping, observed-context withholding, compression rewrites), the stale
+    path is absent from the dedup set and leaks. Turn-scoped slicing closes
+    this class of bug deterministically.
+    """
+
+    def test_stale_execute_code_media_not_attached_to_text_only_reply(self):
+        """The exact #34608 scenario: make_image cover from an earlier turn."""
+        # Prior turn generated an image via execute_code stdout.
+        history = [
+            {"role": "user", "content": "Make a cover image"},
+            {"role": "assistant", "content": None,
+             "tool_calls": [{"id": "1", "function": {"name": "execute_code"}}]},
+            {"role": "tool", "tool_call_id": "1",
+             "content": "Generating cover...\nMEDIA:/tmp/seosmi_cover.png\nDone."},
+            {"role": "assistant", "content": "Here is your cover."},
+        ]
+        # Current turn: plain text status update, zero MEDIA directives.
+        new_messages = [
+            {"role": "user", "content": "What skill version am I on?"},
+            {"role": "assistant", "content": "You're on v0.15.1."},
+        ]
+        all_messages = history + new_messages
+        history_len = len(history)
+
+        # Simulate the dedup set FAILING to capture the stale path (the real
+        # #34608 condition: replayable-history reconstruction diverged from
+        # the in-memory tool content, so the path is not in the set).
+        history_media_paths = set()
+
+        tags, voice = extract_media_tags_production(
+            all_messages, history_len, history_media_paths
+        )
+        assert tags == [], (
+            "Stale tool MEDIA from a prior turn must not leak onto a "
+            f"later text-only reply, got {tags}"
+        )
+        assert voice is False
+
+        # The pre-fix production behaviour (scan everything, dedup only) would
+        # have leaked the stale path when the dedup set missed it.
+        broken_tags, _ = extract_media_tags_broken(all_messages)
+        assert any("seosmi_cover.png" in t for t in broken_tags), (
+            "Sanity: the unscoped scan does surface the stale path"
+        )
+
+    def test_current_turn_media_still_attached_when_dedup_set_empty(self):
+        """Turn-scoping must not suppress genuinely new media."""
+        history = [
+            {"role": "user", "content": "hi"},
+            {"role": "assistant", "content": "hello"},
+        ]
+        new_messages = [
+            {"role": "user", "content": "Make me a cover image"},
+            {"role": "assistant", "content": None,
+             "tool_calls": [{"id": "9", "function": {"name": "execute_code"}}]},
+            {"role": "tool", "tool_call_id": "9",
+             "content": "MEDIA:/tmp/fresh_cover.png"},
+            {"role": "assistant", "content": "Here it is."},
+        ]
+        all_messages = history + new_messages
+        tags, _ = extract_media_tags_production(
+            all_messages, len(history), set()
+        )
+        assert len(tags) == 1 and "fresh_cover.png" in tags[0]
+
+    def test_compression_shrink_falls_back_to_path_dedup(self):
+        """When the list is shorter than history_len (mid-run compression),
+        fall back to scanning everything with path-based dedup so the #160
+        compression-safe guarantee is preserved."""
+        # Post-compression list is shorter than the original history length.
+        compressed_messages = [
+            {"role": "user", "content": "summary so far..."},
+            {"role": "tool", "tool_call_id": "7",
+             "content": "MEDIA:/tmp/old_from_history.png"},
+            {"role": "assistant", "content": "ok"},
+        ]
+        original_history_len = 12  # larger than the compressed list
+        # The old path IS captured in the dedup set here (history scan ran
+        # before compression), so it must still be excluded.
+        history_media_paths = {"/tmp/old_from_history.png"}
+        tags, _ = extract_media_tags_production(
+            compressed_messages, original_history_len, history_media_paths
+        )
+        assert tags == [], (
+            "On the compression fallback path, path-dedup must still exclude "
+            f"known-old media, got {tags}"
+        )
+
+
 if __name__ == "__main__":
     pytest.main([__file__, "-v"])
diff --git a/tests/gateway/test_message_deduplicator.py b/tests/gateway/test_message_deduplicator.py
index 4a140f2761b..e6470075284 100644
--- a/tests/gateway/test_message_deduplicator.py
+++ b/tests/gateway/test_message_deduplicator.py
@@ -10,7 +10,6 @@ the past, the entry is treated as expired and the message is allowed through.
 """
 
 import time
-from unittest.mock import patch
 
 from gateway.platforms.helpers import MessageDeduplicator
 
diff --git a/tests/gateway/test_mirror.py b/tests/gateway/test_mirror.py
index 918e0bff6c7..88183d0079b 100644
--- a/tests/gateway/test_mirror.py
+++ b/tests/gateway/test_mirror.py
@@ -1,7 +1,6 @@
 """Tests for gateway/mirror.py — session mirroring."""
 
 import json
-from pathlib import Path
 from unittest.mock import patch, MagicMock
 
 import gateway.mirror as mirror_mod
diff --git a/tests/gateway/test_model_switch_persistence.py b/tests/gateway/test_model_switch_persistence.py
index 07fa5d5f435..29adf19e6f8 100644
--- a/tests/gateway/test_model_switch_persistence.py
+++ b/tests/gateway/test_model_switch_persistence.py
@@ -15,7 +15,6 @@ from datetime import datetime
 from types import SimpleNamespace
 from unittest.mock import AsyncMock, MagicMock
 
-import pytest
 
 from gateway.config import GatewayConfig, Platform, PlatformConfig
 from gateway.session import SessionEntry, SessionSource, build_session_key
diff --git a/tests/gateway/test_msgraph_webhook.py b/tests/gateway/test_msgraph_webhook.py
index bddcf419014..caa141c6a44 100644
--- a/tests/gateway/test_msgraph_webhook.py
+++ b/tests/gateway/test_msgraph_webhook.py
@@ -1,7 +1,6 @@
 """Tests for the Microsoft Graph webhook adapter."""
 
 import asyncio
-import json
 
 import pytest
 
@@ -11,6 +10,7 @@ from gateway.platforms.msgraph_webhook import AIOHTTP_AVAILABLE, MSGraphWebhookA
 
 def _make_adapter(**extra_overrides) -> MSGraphWebhookAdapter:
     extra = {
+        "host": "127.0.0.1",
         "client_state": "expected-client-state",
         "accepted_resources": ["communications/onlineMeetings"],
     }
@@ -80,6 +80,27 @@ class TestMSGraphValidationHandshake:
         # is_connected is a @property on the base adapter, not a method.
         assert adapter.is_connected is False
 
+    @pytest.mark.anyio
+    async def test_connect_requires_source_allowlist_on_public_bind(self):
+        if not AIOHTTP_AVAILABLE:
+            pytest.skip("aiohttp not installed")
+        adapter = _make_adapter(host="0.0.0.0", port=0, allowed_source_cidrs=[])
+        connected = await adapter.connect()
+        assert connected is False
+        assert adapter.is_connected is False
+
+    @pytest.mark.anyio
+    async def test_connect_allows_loopback_without_source_allowlist(self):
+        if not AIOHTTP_AVAILABLE:
+            pytest.skip("aiohttp not installed")
+        adapter = _make_adapter(host="127.0.0.1", port=0, allowed_source_cidrs=[])
+        try:
+            connected = await adapter.connect()
+            assert connected is True
+            assert adapter.is_connected is True
+        finally:
+            await adapter.disconnect()
+
     @pytest.mark.anyio
     async def test_validation_token_echo_on_get(self):
         adapter = _make_adapter()
@@ -381,9 +402,9 @@ class TestMSGraphNotifications:
 
 class TestMSGraphSourceIPAllowlist:
     @pytest.mark.anyio
-    async def test_disabled_by_default_allows_all(self):
-        """Empty allowlist preserves pre-existing behavior (dev tunnels, localhost)."""
-        adapter = _make_adapter()  # no allowed_source_cidrs set
+    async def test_public_bind_without_allowlist_fails_closed(self):
+        """Public binds must not accept requests until a source allowlist is configured."""
+        adapter = _make_adapter(host="0.0.0.0", allowed_source_cidrs=[])
         payload = {
             "value": [
                 {
@@ -396,6 +417,24 @@ class TestMSGraphSourceIPAllowlist:
         resp = await adapter._handle_notification(
             _FakeRequest(json_payload=payload, remote="203.0.113.99")
         )
+        assert resp.status == 403
+
+    @pytest.mark.anyio
+    async def test_loopback_bind_without_allowlist_still_accepts_local_requests(self):
+        """Loopback-only listeners may rely on local proxying/tunnels instead of CIDRs."""
+        adapter = _make_adapter(host="127.0.0.1", allowed_source_cidrs=[])
+        payload = {
+            "value": [
+                {
+                    "id": "notif-ip-local",
+                    "resource": "communications/onlineMeetings/m",
+                    "clientState": "expected-client-state",
+                }
+            ]
+        }
+        resp = await adapter._handle_notification(
+            _FakeRequest(json_payload=payload, remote="127.0.0.1")
+        )
         assert resp.status == 202
 
     @pytest.mark.anyio
@@ -441,6 +480,13 @@ class TestMSGraphSourceIPAllowlist:
         )
         assert resp.status == 403
 
+    @pytest.mark.anyio
+    async def test_health_endpoint_also_respects_allowlist(self):
+        """The readiness endpoint should not leak counters to arbitrary sources."""
+        adapter = _make_adapter(allowed_source_cidrs=["10.0.0.0/8"])
+        resp = await adapter._handle_health(_FakeRequest(remote="203.0.113.99"))
+        assert resp.status == 403
+
     @pytest.mark.anyio
     async def test_invalid_cidr_entries_are_ignored_at_init(self):
         """Malformed CIDR strings should log a warning and be ignored, not crash."""
diff --git a/tests/gateway/test_ntfy_plugin.py b/tests/gateway/test_ntfy_plugin.py
index 40cf148de44..f59ee2d6a2a 100644
--- a/tests/gateway/test_ntfy_plugin.py
+++ b/tests/gateway/test_ntfy_plugin.py
@@ -15,7 +15,6 @@ the ``platform_registry``.
 from __future__ import annotations
 
 import asyncio
-import os
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
@@ -487,6 +486,22 @@ class TestSend:
         call_headers = mock_client.post.call_args[1]["headers"]
         assert "X-Markdown" not in call_headers
 
+    def test_send_emits_echo_tag_header(self):
+        """Outgoing messages carry the echo-prevention tag so the adapter
+        can recognise and skip its own replies when subscribe topic ==
+        publish topic (the default config that causes the loop)."""
+        adapter = self._make_adapter(topic="hermes-in")
+        mock_resp = MagicMock()
+        mock_resp.status_code = 200
+        mock_resp.json.return_value = {"id": "abc123"}
+        mock_client = AsyncMock()
+        mock_client.post = AsyncMock(return_value=mock_resp)
+        adapter._http_client = mock_client
+
+        _run(adapter.send("hermes-in", "Hello!"))
+        call_headers = mock_client.post.call_args[1]["headers"]
+        assert call_headers.get("X-Tags") == _ntfy._ECHO_TAG
+
 
 # ---------------------------------------------------------------------------
 # 8. Inbound message processing (identity invariant — security-critical)
@@ -544,6 +559,47 @@ class TestOnMessage:
         _run(adapter._on_message(event))
         assert len(calls) == 1
 
+    def test_own_tagged_message_skipped(self):
+        """An incoming event carrying the adapter's echo tag is the agent's
+        own reply echoed back by ntfy — it must not be dispatched, otherwise
+        the agent replies to itself forever (issue #34447)."""
+        adapter = self._make_adapter()
+        calls = []
+
+        async def handler(event):
+            calls.append(event)
+
+        adapter.set_message_handler(handler)
+        _run(adapter._on_message({
+            "id": "echo-1",
+            "event": "message",
+            "topic": "hermes-in",
+            "message": "my own reply",
+            "tags": [_ntfy._ECHO_TAG],
+            "time": None,
+        }))
+        assert calls == []
+
+    def test_message_with_other_tags_still_dispatched(self):
+        """Tags unrelated to the echo sentinel must not suppress genuine
+        user messages."""
+        adapter = self._make_adapter()
+        calls = []
+
+        async def handler(event):
+            calls.append(event)
+
+        adapter.set_message_handler(handler)
+        _run(adapter._on_message({
+            "id": "user-1",
+            "event": "message",
+            "topic": "hermes-in",
+            "message": "hello",
+            "tags": ["warning", "skull"],
+            "time": None,
+        }))
+        assert len(calls) == 1
+
     def test_timestamp_parsed_from_event(self):
         from datetime import timezone
         adapter = self._make_adapter()
@@ -743,6 +799,28 @@ class TestStandaloneSend:
         posted_url = mock_client.post.call_args[0][0]
         assert posted_url == "https://ntfy.example.com/hermes-in"
 
+    def test_emits_echo_tag_header(self, monkeypatch):
+        """Out-of-process cron / send_message deliveries also carry the echo
+        tag, so a gateway subscribed to the same topic skips them too."""
+        monkeypatch.setenv("NTFY_TOPIC", "hermes-in")
+        pconfig = MagicMock()
+        pconfig.extra = {"topic": "hermes-in"}
+
+        mock_resp = MagicMock()
+        mock_resp.status_code = 200
+        mock_resp.json.return_value = {"id": "id-99"}
+        mock_client = AsyncMock()
+        mock_client.post = AsyncMock(return_value=mock_resp)
+        mock_client.__aenter__ = AsyncMock(return_value=mock_client)
+        mock_client.__aexit__ = AsyncMock(return_value=None)
+
+        with patch.object(_ntfy, "httpx") as mock_httpx:
+            mock_httpx.AsyncClient.return_value = mock_client
+            _run(_standalone_send(pconfig, "hermes-in", "hi"))
+
+        headers = mock_client.post.call_args[1]["headers"]
+        assert headers.get("X-Tags") == _ntfy._ECHO_TAG
+
     def test_emits_bearer_token_when_configured(self, monkeypatch):
         monkeypatch.setenv("NTFY_TOPIC", "hermes-in")
         pconfig = MagicMock()
diff --git a/tests/gateway/test_pairing.py b/tests/gateway/test_pairing.py
index 0bff131ed1a..74e718f181a 100644
--- a/tests/gateway/test_pairing.py
+++ b/tests/gateway/test_pairing.py
@@ -4,7 +4,6 @@ import json
 import os
 import sys
 import time
-from pathlib import Path
 from unittest.mock import patch
 
 import pytest
@@ -17,7 +16,6 @@ from gateway.pairing import (
     RATE_LIMIT_SECONDS,
     MAX_PENDING_PER_PLATFORM,
     MAX_FAILED_ATTEMPTS,
-    LOCKOUT_SECONDS,
     _secure_write,
 )
 
diff --git a/tests/gateway/test_planned_stop_watcher.py b/tests/gateway/test_planned_stop_watcher.py
new file mode 100644
index 00000000000..451a3d8f8a7
--- /dev/null
+++ b/tests/gateway/test_planned_stop_watcher.py
@@ -0,0 +1,393 @@
+"""Tests for the planned-stop marker watcher thread (gateway/run.py).
+
+The watcher is the Windows-fallback path for the v0.13.0 session-resume
+feature — on Windows ``asyncio.add_signal_handler`` raises
+NotImplementedError, so the SIGTERM signal handler never runs and the
+shutdown drain (which writes ``resume_pending=True``) is skipped. The
+watcher closes this gap by polling for the planned-stop marker file
+and translating its existence into the same shutdown-handler call a
+real SIGTERM would have produced.
+
+See issue #33778 for the original Windows session-loss bug report.
+"""
+
+import asyncio
+import json
+import os
+import threading
+import time
+from unittest.mock import MagicMock
+
+
+from gateway.run import _run_planned_stop_watcher
+from gateway import status as status_mod
+
+
+def _write_self_marker(marker, *, stale: bool = False):
+    """Write a planned-stop marker that targets the CURRENT process.
+
+    The watcher only fires for markers naming our PID + start_time (the
+    fix for issue #34597), so tests that expect a fire must write a
+    self-targeting marker. Pass ``stale=True`` to backdate ``written_at``
+    past the TTL.
+    """
+    written_at = "2000-01-01T00:00:00+00:00" if stale else status_mod._utc_now_iso()
+    record = {
+        "target_pid": os.getpid(),
+        "target_start_time": status_mod._get_process_start_time(os.getpid()),
+        "stopper_pid": os.getpid(),
+        "written_at": written_at,
+    }
+    marker.write_text(json.dumps(record), encoding="utf-8")
+
+
+class _FakeRunner:
+    """Stand-in for GatewayRunner — only exposes the two flags the watcher reads."""
+
+    def __init__(self, *, running: bool = True, draining: bool = False):
+        self._running = running
+        self._draining = draining
+
+
+def _make_loop_capturing_calls():
+    """Build a fake asyncio loop whose call_soon_threadsafe records its args."""
+    loop = MagicMock(spec=asyncio.AbstractEventLoop)
+    loop._captured = []
+
+    def fake_call_soon_threadsafe(fn, *args):
+        loop._captured.append((fn, args))
+
+    loop.call_soon_threadsafe = fake_call_soon_threadsafe
+    return loop
+
+
+def test_watcher_fires_shutdown_when_marker_appears(tmp_path, monkeypatch):
+    """When a marker targeting THIS process exists, fire the shutdown handler."""
+    marker = tmp_path / ".gateway-planned-stop.json"
+
+    # Patch the marker-path resolver so the watcher polls our temp location.
+    monkeypatch.setattr(status_mod, "_get_planned_stop_marker_path", lambda: marker)
+
+    runner = _FakeRunner(running=True, draining=False)
+    loop = _make_loop_capturing_calls()
+    shutdown_handler = MagicMock(name="shutdown_signal_handler")
+    stop_event = threading.Event()
+
+    # Drop a self-targeting marker before the thread starts.
+    _write_self_marker(marker)
+
+    watcher = threading.Thread(
+        target=_run_planned_stop_watcher,
+        args=(stop_event, runner, loop, shutdown_handler),
+        kwargs={"poll_interval": 0.05},
+        daemon=True,
+    )
+    watcher.start()
+    watcher.join(timeout=2.0)
+
+    assert not watcher.is_alive(), "Watcher should exit after firing"
+    assert len(loop._captured) == 1, (
+        f"Expected exactly one shutdown invocation, got {loop._captured}"
+    )
+    fn, args = loop._captured[0]
+    assert fn is shutdown_handler
+    # The handler must be called with signal=None (planned stop sentinel).
+    assert args == (None,)
+
+
+def test_watcher_does_not_fire_when_marker_absent(tmp_path, monkeypatch):
+    """No marker = no shutdown call. Watcher just spins until stop_event."""
+    marker = tmp_path / ".gateway-planned-stop.json"
+    # Deliberately do NOT create the marker.
+
+    from gateway import status as status_mod
+    monkeypatch.setattr(status_mod, "_get_planned_stop_marker_path", lambda: marker)
+
+    runner = _FakeRunner(running=True, draining=False)
+    loop = _make_loop_capturing_calls()
+    shutdown_handler = MagicMock()
+    stop_event = threading.Event()
+
+    watcher = threading.Thread(
+        target=_run_planned_stop_watcher,
+        args=(stop_event, runner, loop, shutdown_handler),
+        kwargs={"poll_interval": 0.05},
+        daemon=True,
+    )
+    watcher.start()
+    time.sleep(0.3)  # let it poll a few times
+    stop_event.set()
+    watcher.join(timeout=2.0)
+
+    assert not watcher.is_alive()
+    assert loop._captured == [], (
+        f"No marker present, but watcher fired shutdown: {loop._captured}"
+    )
+    shutdown_handler.assert_not_called()
+
+
+def test_watcher_skips_when_runner_already_draining(tmp_path, monkeypatch):
+    """If shutdown is already in progress, don't re-fire the handler.
+
+    This prevents a race where the SIGTERM handler is mid-drain and the
+    watcher would double-tap the shutdown path. We check ``_draining``
+    so the watcher backs off once any shutdown is in flight.
+    """
+    marker = tmp_path / ".gateway-planned-stop.json"
+    _write_self_marker(marker)
+
+    monkeypatch.setattr(status_mod, "_get_planned_stop_marker_path", lambda: marker)
+
+    # Already draining — watcher should be a no-op.
+    runner = _FakeRunner(running=False, draining=True)
+    loop = _make_loop_capturing_calls()
+    shutdown_handler = MagicMock()
+    stop_event = threading.Event()
+
+    watcher = threading.Thread(
+        target=_run_planned_stop_watcher,
+        args=(stop_event, runner, loop, shutdown_handler),
+        kwargs={"poll_interval": 0.05},
+        daemon=True,
+    )
+    watcher.start()
+    time.sleep(0.2)
+    stop_event.set()
+    watcher.join(timeout=2.0)
+
+    assert loop._captured == [], "Watcher fired while runner was already draining"
+
+
+def test_watcher_skips_when_runner_not_started(tmp_path, monkeypatch):
+    """If the runner hasn't started, the marker is for a previous instance —
+    we shouldn't shutdown a not-yet-running gateway.
+    """
+    marker = tmp_path / ".gateway-planned-stop.json"
+    marker.write_text('{"target_pid": 9999}', encoding="utf-8")
+
+    from gateway import status as status_mod
+    monkeypatch.setattr(status_mod, "_get_planned_stop_marker_path", lambda: marker)
+
+    runner = _FakeRunner(running=False, draining=False)
+    loop = _make_loop_capturing_calls()
+    shutdown_handler = MagicMock()
+    stop_event = threading.Event()
+
+    watcher = threading.Thread(
+        target=_run_planned_stop_watcher,
+        args=(stop_event, runner, loop, shutdown_handler),
+        kwargs={"poll_interval": 0.05},
+        daemon=True,
+    )
+    watcher.start()
+    time.sleep(0.2)
+    stop_event.set()
+    watcher.join(timeout=2.0)
+
+    assert loop._captured == [], "Watcher fired before runner was running"
+
+
+def test_watcher_responds_to_stop_event_promptly(tmp_path, monkeypatch):
+    """Setting stop_event must exit the watcher within ~poll_interval seconds."""
+    marker = tmp_path / ".gateway-planned-stop.json"
+    from gateway import status as status_mod
+    monkeypatch.setattr(status_mod, "_get_planned_stop_marker_path", lambda: marker)
+
+    runner = _FakeRunner(running=True, draining=False)
+    loop = _make_loop_capturing_calls()
+    stop_event = threading.Event()
+
+    watcher = threading.Thread(
+        target=_run_planned_stop_watcher,
+        args=(stop_event, runner, loop, MagicMock()),
+        kwargs={"poll_interval": 0.1},
+        daemon=True,
+    )
+    watcher.start()
+    time.sleep(0.05)
+    started_stop = time.monotonic()
+    stop_event.set()
+    watcher.join(timeout=2.0)
+    elapsed = time.monotonic() - started_stop
+
+    assert not watcher.is_alive()
+    assert elapsed < 0.5, f"Watcher took {elapsed:.2f}s to honour stop_event"
+
+
+def test_watcher_fires_only_once_when_marker_persists(tmp_path, monkeypatch):
+    """Marker file existing for multiple polls must NOT spam the handler.
+
+    The watcher fires once and exits its loop (the shutdown handler is
+    responsible for consuming the marker on its own thread). If we
+    re-fired on every tick, the handler would be invoked dozens of
+    times before the gateway actually shuts down.
+    """
+    marker = tmp_path / ".gateway-planned-stop.json"
+    _write_self_marker(marker)
+
+    monkeypatch.setattr(status_mod, "_get_planned_stop_marker_path", lambda: marker)
+
+    runner = _FakeRunner(running=True, draining=False)
+    loop = _make_loop_capturing_calls()
+    stop_event = threading.Event()
+
+    watcher = threading.Thread(
+        target=_run_planned_stop_watcher,
+        args=(stop_event, runner, loop, MagicMock()),
+        kwargs={"poll_interval": 0.05},
+        daemon=True,
+    )
+    watcher.start()
+    # Let the watcher tick several times — but it should exit after the first fire.
+    watcher.join(timeout=1.0)
+
+    assert not watcher.is_alive()
+    assert len(loop._captured) == 1, (
+        f"Watcher fired {len(loop._captured)} times; should fire once "
+        f"and exit (events={loop._captured})"
+    )
+
+
+def test_watcher_tolerates_marker_path_resolution_errors(tmp_path, monkeypatch, caplog):
+    """If _get_planned_stop_marker_path() raises, the watcher logs and continues."""
+    from gateway import status as status_mod
+
+    call_count = [0]
+    def explode():
+        call_count[0] += 1
+        # First call (the one outside the loop, at thread start) is fine —
+        # but subsequent .exists() calls on a corrupt Path could explode.
+        if call_count[0] == 1:
+            return tmp_path / "nonexistent"
+        raise OSError("filesystem failed")
+
+    monkeypatch.setattr(status_mod, "_get_planned_stop_marker_path", explode)
+
+    runner = _FakeRunner(running=True, draining=False)
+    loop = _make_loop_capturing_calls()
+    stop_event = threading.Event()
+
+    watcher = threading.Thread(
+        target=_run_planned_stop_watcher,
+        args=(stop_event, runner, loop, MagicMock()),
+        kwargs={"poll_interval": 0.05},
+        daemon=True,
+    )
+    watcher.start()
+    time.sleep(0.2)
+    stop_event.set()
+    watcher.join(timeout=2.0)
+
+    assert not watcher.is_alive(), "Watcher should still honour stop_event after errors"
+    # No shutdown fired because the marker never reported existence.
+    assert loop._captured == []
+
+
+# ---------------------------------------------------------------------------
+# Regression coverage for issue #34597:
+# A marker left behind by a PREVIOUS gateway instance (different PID, or
+# past its TTL) must NOT crash the freshly booted gateway. The watcher
+# only fires when the marker targets the current process, and self-heals
+# by cleaning up stale/malformed markers.
+# ---------------------------------------------------------------------------
+
+
+def test_watcher_does_not_fire_for_foreign_pid_marker(tmp_path, monkeypatch):
+    """A marker naming a DIFFERENT process must not trigger our shutdown.
+
+    This is the core #34597 regression: a stale marker from a prior
+    gateway instance was firing the handler, driving the new gateway into
+    a false "Received UNKNOWN" shutdown and a watchdog crash loop.
+    """
+    marker = tmp_path / ".gateway-planned-stop.json"
+    # Foreign PID + a start_time that cannot match ours, freshly written
+    # so the TTL does NOT remove it — the watcher must still decline.
+    record = {
+        "target_pid": os.getpid() + 1,
+        "target_start_time": -1,
+        "stopper_pid": os.getpid() + 1,
+        "written_at": status_mod._utc_now_iso(),
+    }
+    marker.write_text(json.dumps(record), encoding="utf-8")
+
+    monkeypatch.setattr(status_mod, "_get_planned_stop_marker_path", lambda: marker)
+
+    runner = _FakeRunner(running=True, draining=False)
+    loop = _make_loop_capturing_calls()
+    shutdown_handler = MagicMock(name="shutdown_signal_handler")
+    stop_event = threading.Event()
+
+    watcher = threading.Thread(
+        target=_run_planned_stop_watcher,
+        args=(stop_event, runner, loop, shutdown_handler),
+        kwargs={"poll_interval": 0.05},
+        daemon=True,
+    )
+    watcher.start()
+    time.sleep(0.3)  # several poll cycles
+    stop_event.set()
+    watcher.join(timeout=2.0)
+
+    assert not watcher.is_alive()
+    assert loop._captured == [], (
+        f"Watcher fired on a foreign-PID marker (#34597 regression): {loop._captured}"
+    )
+    shutdown_handler.assert_not_called()
+    # Foreign (but live) marker is left in place — it may still belong to
+    # the process it names.
+    assert marker.exists()
+
+
+def test_watcher_cleans_up_stale_marker_and_keeps_running(tmp_path, monkeypatch):
+    """A marker older than the TTL is unlinked and never fires shutdown."""
+    marker = tmp_path / ".gateway-planned-stop.json"
+    # Self-targeting but backdated past the TTL: must be treated as dead.
+    _write_self_marker(marker, stale=True)
+
+    monkeypatch.setattr(status_mod, "_get_planned_stop_marker_path", lambda: marker)
+
+    runner = _FakeRunner(running=True, draining=False)
+    loop = _make_loop_capturing_calls()
+    shutdown_handler = MagicMock(name="shutdown_signal_handler")
+    stop_event = threading.Event()
+
+    watcher = threading.Thread(
+        target=_run_planned_stop_watcher,
+        args=(stop_event, runner, loop, shutdown_handler),
+        kwargs={"poll_interval": 0.05},
+        daemon=True,
+    )
+    watcher.start()
+    time.sleep(0.3)
+    stop_event.set()
+    watcher.join(timeout=2.0)
+
+    assert not watcher.is_alive()
+    assert loop._captured == [], "Stale marker must not fire shutdown"
+    shutdown_handler.assert_not_called()
+    assert not marker.exists(), "Stale marker should have been cleaned up"
+
+
+def test_planned_stop_marker_targets_self_probe_is_non_destructive(tmp_path, monkeypatch):
+    """The probe returns True for a self-marker WITHOUT unlinking it.
+
+    The shutdown handler performs the authoritative consume on its own
+    thread, so the watcher's probe must leave a matching marker intact.
+    """
+    marker = tmp_path / ".gateway-planned-stop.json"
+    _write_self_marker(marker)
+    monkeypatch.setattr(status_mod, "_get_planned_stop_marker_path", lambda: marker)
+
+    assert status_mod.planned_stop_marker_targets_self() is True
+    assert marker.exists(), "Probe must not consume a matching marker"
+    # Idempotent: still True on a second call.
+    assert status_mod.planned_stop_marker_targets_self() is True
+
+
+def test_planned_stop_marker_targets_self_drops_malformed(tmp_path, monkeypatch):
+    """A malformed marker reports False and is cleaned up."""
+    marker = tmp_path / ".gateway-planned-stop.json"
+    marker.write_text("{not valid json", encoding="utf-8")
+    monkeypatch.setattr(status_mod, "_get_planned_stop_marker_path", lambda: marker)
+
+    assert status_mod.planned_stop_marker_targets_self() is False
diff --git a/tests/gateway/test_platform_base.py b/tests/gateway/test_platform_base.py
index b7d96d4dc3e..e0f2c80cb04 100644
--- a/tests/gateway/test_platform_base.py
+++ b/tests/gateway/test_platform_base.py
@@ -10,9 +10,9 @@ from gateway.platforms.base import (
     BasePlatformAdapter,
     GATEWAY_SECRET_CAPTURE_UNSUPPORTED_MESSAGE,
     MessageEvent,
-    MessageType,
     safe_url_for_log,
     utf16_len,
+    _log_safe_path,
     _prefix_within_utf16_limit,
 )
 
@@ -362,12 +362,65 @@ class TestExtractMedia:
         assert "[[as_document]]" not in cleaned
 
 
+class TestMediaExtensionAllowlistParity:
+    """Regression coverage for issue #34517 — the MEDIA: extension black hole.
+
+    extract_media used to carry a narrow extension allowlist that omitted
+    .md/.json/.yaml/.xml/.html etc., while extract_local_files had a broad one.
+    Combined with an unconditional ``MEDIA:\\s*\\S+`` strip at the dispatch
+    sites, an unmatched MEDIA: tag for one of those extensions was deleted from
+    the body before extract_local_files could pick up the bare path — the file
+    was silently dropped. Both extractors now derive from the single
+    MEDIA_DELIVERY_EXTS source of truth, and the strip is anchored to that set.
+    """
+
+    DROPPED_BEFORE = ["md", "json", "yaml", "yml", "xml", "html", "htm",
+                      "tsv", "svg"]
+
+    def test_previously_dropped_extensions_now_extract(self):
+        for ext in self.DROPPED_BEFORE:
+            path = f"/tmp/report.{ext}"
+            media, _ = BasePlatformAdapter.extract_media(f"Here: MEDIA:{path}")
+            assert media == [(path, False)], f".{ext} should extract via MEDIA:"
+
+    def test_extract_media_and_local_files_share_one_extension_set(self):
+        from gateway.platforms.base import MEDIA_DELIVERY_EXTS
+        # Both functions reference MEDIA_DELIVERY_EXTS; assert the documents
+        # that motivated the bug are present in the shared set.
+        for ext in (".md", ".json", ".yaml", ".yml", ".xml", ".html", ".htm"):
+            assert ext in MEDIA_DELIVERY_EXTS
+
+    def test_unknown_extension_not_black_holed_by_cleanup(self):
+        """A MEDIA: tag with an unknown extension is NOT stripped from the
+        body — it survives so extract_local_files can still see the bare path,
+        rather than vanishing entirely (the core of issue #34517)."""
+        from gateway.platforms.base import MEDIA_TAG_CLEANUP_RE
+        text = "Saved to MEDIA:/tmp/data.weirdext done"
+        media, _ = BasePlatformAdapter.extract_media(text)
+        assert media == []  # unknown extension is not a deliverable MEDIA tag
+        stripped = MEDIA_TAG_CLEANUP_RE.sub("", text)
+        assert "/tmp/data.weirdext" in stripped  # path preserved, not dropped
+
+    def test_known_extension_tag_is_stripped_from_body(self):
+        from gateway.platforms.base import MEDIA_TAG_CLEANUP_RE
+        text = "Here is your report: MEDIA:/tmp/report.md"
+        stripped = MEDIA_TAG_CLEANUP_RE.sub("", text).strip()
+        assert "MEDIA:" not in stripped
+        assert "/tmp/report.md" not in stripped
+        assert "Here is your report:" in stripped
+
+
 class TestMediaDeliveryPathValidation:
     def _patch_roots(self, monkeypatch, *roots):
         monkeypatch.setattr(
             "gateway.platforms.base.MEDIA_DELIVERY_SAFE_ROOTS",
             tuple(roots),
         )
+        # All tests in this class cover strict-mode behavior (allowlist +
+        # recency window + denylist). Force strict on so they keep
+        # exercising the legacy path even though the public default
+        # flipped to off in 2026-05.
+        monkeypatch.setenv("HERMES_MEDIA_DELIVERY_STRICT", "1")
         # Disable recency-based trust by default so the original allowlist
         # tests continue to exercise the strict-allowlist path. Tests that
         # specifically cover recency trust re-enable it themselves.
@@ -536,6 +589,149 @@ class TestMediaDeliveryPathValidation:
         assert out == [str(fresh.resolve())]
 
 
+class TestMediaDeliveryDefaultMode:
+    """Default (non-strict) mode — denylist gates delivery, nothing else.
+
+    Symmetric with inbound delivery: Telegram/Discord/Slack accept any
+    document type the user uploads, and the agent can hand back any file
+    that isn't a credential. Strict mode is opt-in for operators running
+    public-facing gateways.
+    """
+
+    def _patch_roots(self, monkeypatch, *roots):
+        # Empty cache allowlist so the only positive path through
+        # validate_media_delivery_path in these tests is the
+        # default-mode "anything not denied" branch.
+        monkeypatch.setattr(
+            "gateway.platforms.base.MEDIA_DELIVERY_SAFE_ROOTS",
+            tuple(roots),
+        )
+        # Pin strict OFF — the public default. Tests that exercise the
+        # strict path live in TestMediaDeliveryPathValidation.
+        monkeypatch.delenv("HERMES_MEDIA_DELIVERY_STRICT", raising=False)
+        monkeypatch.delenv("HERMES_MEDIA_ALLOW_DIRS", raising=False)
+
+    def test_accepts_stale_file_outside_allowlist(self, tmp_path, monkeypatch):
+        """The motivating case — agent says ``MEDIA:/home/user/notes.md``
+        for an .md it has been working with for hours. Strict mode would
+        reject this (outside allowlist, outside recency window). Default
+        mode delivers it.
+        """
+        self._patch_roots(monkeypatch)
+
+        notes = tmp_path / "notes.md"
+        notes.write_text("# Old notes\n")
+        old_mtime = time.time() - 7200  # 2 hours ago — far outside any window
+        os.utime(notes, (old_mtime, old_mtime))
+
+        assert BasePlatformAdapter.validate_media_delivery_path(str(notes)) == str(notes.resolve())
+
+    def test_accepts_any_extension_not_on_denylist(self, tmp_path, monkeypatch):
+        """No extension allowlist — .md, .txt, .json, .py all deliver."""
+        self._patch_roots(monkeypatch)
+
+        for name in ("report.md", "log.txt", "data.json", "script.py", "blob.bin"):
+            f = tmp_path / name
+            f.write_bytes(b"x")
+            assert BasePlatformAdapter.validate_media_delivery_path(str(f)) == str(f.resolve())
+
+    def test_denylist_still_blocks_credentials(self, tmp_path, monkeypatch):
+        """Default mode is permissive but not naive — credential paths
+        remain blocked. Simulate $HOME so ~/.ssh resolves into tmp_path.
+        """
+        self._patch_roots(monkeypatch)
+
+        fake_home = tmp_path / "home"
+        ssh_dir = fake_home / ".ssh"
+        ssh_dir.mkdir(parents=True)
+        secret = ssh_dir / "id_rsa"
+        secret.write_bytes(b"-----BEGIN ...")
+        monkeypatch.setenv("HOME", str(fake_home))
+
+        assert BasePlatformAdapter.validate_media_delivery_path(str(secret)) is None
+
+    def test_denylist_blocks_system_prefixes(self, tmp_path, monkeypatch):
+        """Files under /etc, /proc, /sys, /root, /boot, /var/{log,lib,run}
+        are denied. We construct the test by patching the denylist root
+        to a tmp dir so we don't need to read /etc.
+        """
+        self._patch_roots(monkeypatch)
+
+        fake_etc = tmp_path / "fake-etc"
+        fake_etc.mkdir()
+        secret = fake_etc / "shadow"
+        secret.write_bytes(b"root:!:0:0::/root:/bin/sh")
+
+        monkeypatch.setattr(
+            "gateway.platforms.base._MEDIA_DELIVERY_DENIED_PREFIXES",
+            (str(fake_etc),),
+        )
+
+        assert BasePlatformAdapter.validate_media_delivery_path(str(secret)) is None
+
+    def test_denylist_blocks_hermes_credentials(self, tmp_path, monkeypatch):
+        """~/.hermes/.env and ~/.hermes/auth.json stay blocked even in
+        default mode. They live under $HOME (not the system prefix list)
+        so this exercises the home-relative denied paths.
+        """
+        self._patch_roots(monkeypatch)
+
+        fake_home = tmp_path / "home"
+        hermes_dir = fake_home / ".hermes"
+        hermes_dir.mkdir(parents=True)
+        env_file = hermes_dir / ".env"
+        env_file.write_text("OPENAI_API_KEY=sk-...")
+        monkeypatch.setenv("HOME", str(fake_home))
+        monkeypatch.setattr(
+            "gateway.platforms.base._HERMES_HOME",
+            hermes_dir,
+        )
+
+        assert BasePlatformAdapter.validate_media_delivery_path(str(env_file)) is None
+
+    def test_strict_mode_envvar_restores_legacy_behavior(self, tmp_path, monkeypatch):
+        """Setting HERMES_MEDIA_DELIVERY_STRICT=1 reactivates the older
+        allowlist+recency logic. A stale file outside the allowlist is
+        rejected.
+        """
+        self._patch_roots(monkeypatch)
+        monkeypatch.setenv("HERMES_MEDIA_DELIVERY_STRICT", "1")
+        monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_FILES", "0")
+
+        stale = tmp_path / "old.pdf"
+        stale.write_bytes(b"%PDF-1.4")
+        old_mtime = time.time() - 7200
+        os.utime(stale, (old_mtime, old_mtime))
+
+        assert BasePlatformAdapter.validate_media_delivery_path(str(stale)) is None
+
+    def test_strict_mode_truthy_aliases(self, monkeypatch, tmp_path):
+        """``HERMES_MEDIA_DELIVERY_STRICT=true|yes|on|1`` all enable strict mode."""
+        self._patch_roots(monkeypatch)
+        from gateway.platforms.base import _media_delivery_strict_mode
+
+        for raw in ("1", "true", "TRUE", "yes", "on"):
+            monkeypatch.setenv("HERMES_MEDIA_DELIVERY_STRICT", raw)
+            assert _media_delivery_strict_mode() is True
+
+        for raw in ("0", "false", "no", "off", ""):
+            monkeypatch.setenv("HERMES_MEDIA_DELIVERY_STRICT", raw)
+            assert _media_delivery_strict_mode() is False
+
+    def test_filter_passes_default_files_through(self, tmp_path, monkeypatch):
+        """End-to-end: filter_local_delivery_paths accepts a stale .md in
+        default mode where strict mode would drop it.
+        """
+        self._patch_roots(monkeypatch)
+
+        notes = tmp_path / "notes.md"
+        notes.write_text("# old\n")
+        os.utime(notes, (time.time() - 86400, time.time() - 86400))
+
+        out = BasePlatformAdapter.filter_local_delivery_paths([str(notes)])
+        assert out == [str(notes.resolve())]
+
+
 # ---------------------------------------------------------------------------
 # should_send_media_as_audio
 # ---------------------------------------------------------------------------
@@ -903,3 +1099,52 @@ class TestProxyKwargsForAiohttp:
             sess_kw, req_kw = proxy_kwargs_for_aiohttp("http://proxy:8080")
             assert sess_kw == {}
             assert req_kw == {"proxy": "http://proxy:8080"}
+
+
+class TestMediaDeliveryDiagnosability:
+    """Diagnosable rejection logging + crafted-path robustness (#33251)."""
+
+    def test_rejected_path_appears_in_log(self, tmp_path, caplog):
+        outside = tmp_path / "outside.ogg"
+        outside.write_bytes(b"OggS")
+        with patch.dict(os.environ, {"HERMES_MEDIA_DELIVERY_STRICT": "1",
+                                     "HERMES_MEDIA_TRUST_RECENT_FILES": "0"}), \
+                patch("gateway.platforms.base.MEDIA_DELIVERY_SAFE_ROOTS", ()):
+            with caplog.at_level("WARNING"):
+                out = BasePlatformAdapter.filter_media_delivery_paths([(str(outside), False)])
+        assert out == []
+        # The dropped path must be in the log so operators can diagnose it.
+        assert str(outside) in caplog.text
+
+    def test_crafted_null_path_does_not_abort_batch(self, tmp_path, monkeypatch):
+        """One crafted ~\\x00 path must not drop every other attachment."""
+        good = tmp_path / "good.png"
+        good.write_bytes(b"\x89PNG")
+        monkeypatch.setenv("HERMES_MEDIA_DELIVERY_STRICT", "0")
+        out = BasePlatformAdapter.filter_media_delivery_paths([
+            ("~\x00evil.png", False),
+            (str(good), False),
+        ])
+        assert out == [(str(good.resolve()), False)]
+
+    def test_extract_media_tolerates_crafted_null_path(self):
+        """extract_media must not raise on a crafted ~\\x00 MEDIA tag."""
+        content = "here\nMEDIA:`~\x00evil.png`\ntrailing"
+        # Must not raise ValueError("embedded null byte").
+        media, cleaned = BasePlatformAdapter.extract_media(content)
+        assert all("\x00" not in p for p, _ in media)
+
+    def test_log_safe_path_neutralises_line_breaks(self):
+        forged = "/tmp/a.png\nWARNING forged second line"
+        assert "\n" not in _log_safe_path(forged)
+        # Unicode separators that split log lines are also neutralised.
+        for sep in ("\u2028", "\u2029", "\x85"):
+            assert sep not in _log_safe_path(f"/tmp/a{sep}b.png")
+
+    def test_canonical_cache_roots_present(self):
+        from gateway.platforms.base import MEDIA_DELIVERY_SAFE_ROOTS
+        roots = {str(r) for r in MEDIA_DELIVERY_SAFE_ROOTS}
+        assert any(r.endswith("cache/images") for r in roots)
+        assert any(r.endswith("cache/documents") for r in roots)
+        # Legacy layout still present.
+        assert any(r.endswith("image_cache") for r in roots)
diff --git a/tests/gateway/test_platform_http_client_limits.py b/tests/gateway/test_platform_http_client_limits.py
index fe613fb1f08..074a6d52ec3 100644
--- a/tests/gateway/test_platform_http_client_limits.py
+++ b/tests/gateway/test_platform_http_client_limits.py
@@ -12,7 +12,6 @@ behaviour is only observable at runtime under load.
 
 from __future__ import annotations
 
-import os
 
 import pytest
 
diff --git a/tests/gateway/test_platform_reconnect.py b/tests/gateway/test_platform_reconnect.py
index e4362a02562..1a5a35a42e7 100644
--- a/tests/gateway/test_platform_reconnect.py
+++ b/tests/gateway/test_platform_reconnect.py
@@ -7,7 +7,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
 import pytest
 
 from gateway.config import GatewayConfig, Platform, PlatformConfig
-from gateway.platforms.base import BasePlatformAdapter, MessageEvent, SendResult
+from gateway.platforms.base import BasePlatformAdapter, SendResult
 from gateway.run import GatewayRunner
 
 
diff --git a/tests/gateway/test_platform_registry.py b/tests/gateway/test_platform_registry.py
index 9ca80fe8a1f..881ec1f3dba 100644
--- a/tests/gateway/test_platform_registry.py
+++ b/tests/gateway/test_platform_registry.py
@@ -2,11 +2,10 @@
 
 import os
 import pytest
-from unittest.mock import MagicMock, patch
-from dataclasses import dataclass
+from unittest.mock import MagicMock
 
-from gateway.platform_registry import PlatformRegistry, PlatformEntry, platform_registry
-from gateway.config import Platform, PlatformConfig, GatewayConfig
+from gateway.platform_registry import PlatformRegistry, PlatformEntry
+from gateway.config import Platform, GatewayConfig
 
 
 # ── Platform enum dynamic members ─────────────────────────────────────────
diff --git a/tests/gateway/test_proxy_mode.py b/tests/gateway/test_proxy_mode.py
index 7ed6a19cb22..0c7fa80a0ba 100644
--- a/tests/gateway/test_proxy_mode.py
+++ b/tests/gateway/test_proxy_mode.py
@@ -1,8 +1,5 @@
 """Tests for gateway proxy mode — forwarding messages to a remote API server."""
 
-import asyncio
-import json
-import os
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
diff --git a/tests/gateway/test_qqbot.py b/tests/gateway/test_qqbot.py
index 8cef85bfafd..e1f41aeccdc 100644
--- a/tests/gateway/test_qqbot.py
+++ b/tests/gateway/test_qqbot.py
@@ -1,15 +1,13 @@
 """Tests for the QQ Bot platform adapter."""
 
 import asyncio
-import json
 import os
-import sys
 from types import SimpleNamespace
 from unittest import mock
 
 import pytest
 
-from gateway.config import Platform, PlatformConfig
+from gateway.config import PlatformConfig
 
 
 # ---------------------------------------------------------------------------
diff --git a/tests/gateway/test_queue_consumption.py b/tests/gateway/test_queue_consumption.py
index 9bb4d0aac36..178d1965af9 100644
--- a/tests/gateway/test_queue_consumption.py
+++ b/tests/gateway/test_queue_consumption.py
@@ -6,9 +6,8 @@ after the agent finishes its current task — not silently dropped.
 """
 
 import asyncio
-from unittest.mock import AsyncMock, MagicMock, patch
+from unittest.mock import MagicMock
 
-import pytest
 
 from gateway.run import _dequeue_pending_event
 from gateway.platforms.base import (
diff --git a/tests/gateway/test_replay_entry_fields.py b/tests/gateway/test_replay_entry_fields.py
index 4858cf62522..c0891d3721f 100644
--- a/tests/gateway/test_replay_entry_fields.py
+++ b/tests/gateway/test_replay_entry_fields.py
@@ -16,7 +16,6 @@ These tests pin the expanded whitelist so it doesn't regress.
 """
 from __future__ import annotations
 
-import pytest
 
 from gateway.run import _ASSISTANT_REPLAY_FIELDS, _build_replay_entry
 
diff --git a/tests/gateway/test_restart_notification.py b/tests/gateway/test_restart_notification.py
index 3d5d5ee9557..e7a931f8f8a 100644
--- a/tests/gateway/test_restart_notification.py
+++ b/tests/gateway/test_restart_notification.py
@@ -1,6 +1,5 @@
 """Tests for /restart notification — the gateway notifies the requester on comeback."""
 
-import asyncio
 import json
 from pathlib import Path
 from unittest.mock import AsyncMock, MagicMock
diff --git a/tests/gateway/test_restart_redelivery_dedup.py b/tests/gateway/test_restart_redelivery_dedup.py
index aa4e4330caf..88cb0223d07 100644
--- a/tests/gateway/test_restart_redelivery_dedup.py
+++ b/tests/gateway/test_restart_redelivery_dedup.py
@@ -5,7 +5,6 @@ with a network error, Telegram re-delivers the `/restart` message to the new
 gateway process.  Without a dedup guard, the new gateway would process
 `/restart` again and immediately restart — a self-perpetuating loop.
 """
-import asyncio
 import json
 import time
 from unittest.mock import MagicMock
diff --git a/tests/gateway/test_restart_resume_pending.py b/tests/gateway/test_restart_resume_pending.py
index 996153239fc..346a6ad1ad4 100644
--- a/tests/gateway/test_restart_resume_pending.py
+++ b/tests/gateway/test_restart_resume_pending.py
@@ -32,7 +32,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
 
-from gateway.config import GatewayConfig, HomeChannel, Platform, PlatformConfig
+from gateway.config import GatewayConfig, HomeChannel, Platform
 from gateway.platforms.base import MessageEvent, MessageType, SendResult
 from gateway.run import (
     _auto_continue_freshness_window,
diff --git a/tests/gateway/test_run_progress_interrupt.py b/tests/gateway/test_run_progress_interrupt.py
index 23969677e06..cc25b8db868 100644
--- a/tests/gateway/test_run_progress_interrupt.py
+++ b/tests/gateway/test_run_progress_interrupt.py
@@ -8,7 +8,6 @@ of tool-progress bubbles for calls that were already parsed from the LLM
 response — making the interrupt feel ignored.
 """
 
-import asyncio
 import importlib
 import sys
 import time
diff --git a/tests/gateway/test_runner_startup_failures.py b/tests/gateway/test_runner_startup_failures.py
index b82062e4090..3fbf3708852 100644
--- a/tests/gateway/test_runner_startup_failures.py
+++ b/tests/gateway/test_runner_startup_failures.py
@@ -248,7 +248,7 @@ async def test_start_gateway_replace_writes_takeover_marker_before_sigterm(
             (tmp_path / ".gateway-takeover.json").exists() is False  # not yet
         )
         # Actually write the marker so we can verify cleanup later
-        from gateway.status import _get_takeover_marker_path, _write_json_file, _get_process_start_time
+        from gateway.status import _get_takeover_marker_path, _write_json_file
         _write_json_file(_get_takeover_marker_path(), {
             "target_pid": target_pid,
             "target_start_time": 0,
diff --git a/tests/gateway/test_send_image_file.py b/tests/gateway/test_send_image_file.py
index b769d2be9fb..9cbf48fd0d7 100644
--- a/tests/gateway/test_send_image_file.py
+++ b/tests/gateway/test_send_image_file.py
@@ -14,7 +14,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
 import pytest
 
 from gateway.config import PlatformConfig
-from gateway.platforms.base import BasePlatformAdapter, SendResult
+from gateway.platforms.base import BasePlatformAdapter
 
 
 def _run(coro):
diff --git a/tests/gateway/test_send_multiple_images.py b/tests/gateway/test_send_multiple_images.py
index 6bff0f09a36..5fab55c4a70 100644
--- a/tests/gateway/test_send_multiple_images.py
+++ b/tests/gateway/test_send_multiple_images.py
@@ -13,7 +13,6 @@ Signal's native implementation is covered by test_signal.py.
 """
 
 import asyncio
-import os
 import sys
 from unittest.mock import AsyncMock, MagicMock, patch
 
diff --git a/tests/gateway/test_session_api.py b/tests/gateway/test_session_api.py
index a2d00d9c8c7..d5262e9aecb 100644
--- a/tests/gateway/test_session_api.py
+++ b/tests/gateway/test_session_api.py
@@ -1,6 +1,5 @@
 """Focused tests for API server session-control endpoints."""
 
-import asyncio
 from unittest.mock import AsyncMock, patch
 
 import pytest
@@ -269,6 +268,75 @@ async def test_session_chat_stream_emits_lifecycle_events_and_keepalive_safe_sha
     assert "event: done" in body
 
 
+@pytest.mark.asyncio
+async def test_session_chat_stream_run_completed_carries_turn_transcript(adapter, session_db):
+    """run.completed must include the full interleaved turn transcript so a
+    client that lost intermediate (pre-tool-call) assistant text from the live
+    delta stream can reconcile without a separate /messages fetch. Refs #34703.
+    """
+    import json as _json
+
+    session_id = session_db.create_session("transcript-session", "api_server")
+
+    async def fake_run(**kwargs):
+        # Stream the intermediate planning text the way a real turn would.
+        kwargs["stream_delta_callback"]("Let me search for that:")
+        kwargs["stream_delta_callback"]("Here is the summary.")
+        result = {
+            "final_response": "Here is the summary.",
+            "session_id": session_id,
+            "messages": [
+                {"role": "user", "content": "search then summarize"},
+                {
+                    "role": "assistant",
+                    "content": "Let me search for that:",
+                    "tool_calls": [
+                        {
+                            "id": "call_1",
+                            "type": "function",
+                            "function": {"name": "web_search", "arguments": "{}"},
+                        }
+                    ],
+                },
+                {"role": "tool", "content": "results", "tool_call_id": "call_1", "tool_name": "web_search"},
+                {"role": "assistant", "content": "Here is the summary."},
+            ],
+        }
+        return result, {"total_tokens": 6}
+
+    app = _create_session_app(adapter)
+    with patch.object(adapter, "_run_agent", side_effect=fake_run):
+        async with TestClient(TestServer(app)) as cli:
+            resp = await cli.post(
+                f"/api/sessions/{session_id}/chat/stream",
+                json={"message": "search then summarize"},
+            )
+            assert resp.status == 200
+            body = await resp.text()
+
+    # Pull the run.completed event payload out of the SSE body.
+    run_completed_payload = None
+    for block in body.split("\n\n"):
+        if "event: run.completed" in block:
+            for line in block.splitlines():
+                if line.startswith("data: "):
+                    run_completed_payload = _json.loads(line[len("data: "):])
+            break
+    assert run_completed_payload is not None, body
+    messages = run_completed_payload.get("messages")
+    assert isinstance(messages, list) and messages, run_completed_payload
+
+    # The colon-ended intermediate text that preceded the tool call must be present.
+    contents = [m.get("content") for m in messages]
+    assert "Let me search for that:" in contents
+    assert "Here is the summary." in contents
+    # No prior-turn user message should leak into the per-turn slice.
+    assert all(m.get("role") in ("assistant", "tool") for m in messages)
+    # The tool call is preserved alongside the intermediate text.
+    assert any(m.get("tool_calls") for m in messages)
+
+
+
 @pytest.mark.asyncio
 async def test_session_endpoints_require_auth_when_key_configured(auth_adapter):
     app = _create_session_app(auth_adapter)
diff --git a/tests/gateway/test_session_dm_thread_seeding.py b/tests/gateway/test_session_dm_thread_seeding.py
index 415e953baa2..bcb1e7fee52 100644
--- a/tests/gateway/test_session_dm_thread_seeding.py
+++ b/tests/gateway/test_session_dm_thread_seeding.py
@@ -15,10 +15,9 @@ Covers:
 """
 
 import pytest
-from unittest.mock import patch
 
 from gateway.config import Platform, GatewayConfig
-from gateway.session import SessionSource, SessionStore, build_session_key
+from gateway.session import SessionSource, SessionStore
 
 
 @pytest.fixture()
diff --git a/tests/gateway/test_session_hygiene.py b/tests/gateway/test_session_hygiene.py
index fb8b273f411..b54f588cb10 100644
--- a/tests/gateway/test_session_hygiene.py
+++ b/tests/gateway/test_session_hygiene.py
@@ -13,7 +13,7 @@ import sys
 import types
 from datetime import datetime
 from types import SimpleNamespace
-from unittest.mock import patch, MagicMock, AsyncMock
+from unittest.mock import MagicMock, AsyncMock
 
 import pytest
 
diff --git a/tests/gateway/test_session_info.py b/tests/gateway/test_session_info.py
index d8c65305f7b..ec05b31b735 100644
--- a/tests/gateway/test_session_info.py
+++ b/tests/gateway/test_session_info.py
@@ -1,8 +1,7 @@
 """Tests for GatewayRunner._format_session_info — session config surfacing."""
 
 import pytest
-from unittest.mock import patch, MagicMock
-from pathlib import Path
+from unittest.mock import patch
 
 from gateway.run import GatewayRunner
 
diff --git a/tests/gateway/test_session_reset_notify.py b/tests/gateway/test_session_reset_notify.py
index a4e9d71d0f8..c73ed640ccd 100644
--- a/tests/gateway/test_session_reset_notify.py
+++ b/tests/gateway/test_session_reset_notify.py
@@ -8,14 +8,11 @@ Verifies that:
 """
 
 from datetime import datetime, timedelta
-from unittest.mock import MagicMock
 
-import pytest
 
 from gateway.config import (
     GatewayConfig,
     Platform,
-    PlatformConfig,
     SessionResetPolicy,
 )
 from gateway.session import SessionEntry, SessionSource, SessionStore
diff --git a/tests/gateway/test_session_split_brain_11016.py b/tests/gateway/test_session_split_brain_11016.py
index 0b2972ac173..85fe274ab2e 100644
--- a/tests/gateway/test_session_split_brain_11016.py
+++ b/tests/gateway/test_session_split_brain_11016.py
@@ -17,7 +17,7 @@ Covers three layers of the fix:
 """
 
 import asyncio
-from unittest.mock import AsyncMock, MagicMock, patch
+from unittest.mock import MagicMock
 
 import pytest
 
@@ -27,7 +27,7 @@ from gateway.platforms.base import (
     MessageEvent,
     MessageType,
 )
-from gateway.run import GatewayRunner, _AGENT_PENDING_SENTINEL
+from gateway.run import GatewayRunner
 from gateway.session import SessionSource, build_session_key
 
 
diff --git a/tests/gateway/test_session_state_cleanup.py b/tests/gateway/test_session_state_cleanup.py
index 3c708736c3b..ffbb465b7aa 100644
--- a/tests/gateway/test_session_state_cleanup.py
+++ b/tests/gateway/test_session_state_cleanup.py
@@ -19,7 +19,6 @@ leaving WAL locks in place until Python actually exited.
 import threading
 from unittest.mock import MagicMock
 
-import pytest
 
 
 def _make_runner():
diff --git a/tests/gateway/test_session_store_prune.py b/tests/gateway/test_session_store_prune.py
index 34fa21e25a8..d6af52edf45 100644
--- a/tests/gateway/test_session_store_prune.py
+++ b/tests/gateway/test_session_store_prune.py
@@ -19,7 +19,6 @@ import threading
 from datetime import datetime, timedelta
 from unittest.mock import patch
 
-import pytest
 
 from gateway.config import GatewayConfig, Platform, SessionResetPolicy
 from gateway.session import SessionEntry, SessionStore
diff --git a/tests/gateway/test_shutdown_cache_cleanup.py b/tests/gateway/test_shutdown_cache_cleanup.py
index 82970d20c50..fd3f1e9b952 100644
--- a/tests/gateway/test_shutdown_cache_cleanup.py
+++ b/tests/gateway/test_shutdown_cache_cleanup.py
@@ -12,7 +12,7 @@ The fix adds an explicit sweep of ``_agent_cache`` after
 import asyncio
 import threading
 from collections import OrderedDict
-from unittest.mock import MagicMock, patch
+from unittest.mock import MagicMock
 
 import pytest
 
diff --git a/tests/gateway/test_signal.py b/tests/gateway/test_signal.py
index af81f59e8cd..c2cf76d9f89 100644
--- a/tests/gateway/test_signal.py
+++ b/tests/gateway/test_signal.py
@@ -1,7 +1,6 @@
 """Tests for Signal messenger platform adapter."""
 import asyncio
 import base64
-import json
 import pytest
 from pathlib import Path
 from unittest.mock import MagicMock, patch, AsyncMock
diff --git a/tests/gateway/test_signal_format.py b/tests/gateway/test_signal_format.py
index ef50f62fd0a..0050a980f59 100644
--- a/tests/gateway/test_signal_format.py
+++ b/tests/gateway/test_signal_format.py
@@ -430,7 +430,6 @@ class TestSignalStreamingPatch:
         """send() returns message_id=None so stream consumer uses no-edit path."""
         monkeypatch.setenv("SIGNAL_GROUP_ALLOWED_USERS", "")
         from gateway.platforms.signal import SignalAdapter
-        from gateway.config import PlatformConfig
 
         config = PlatformConfig(enabled=True)
         config.extra = {
diff --git a/tests/gateway/test_signal_rate_limit.py b/tests/gateway/test_signal_rate_limit.py
index 963f8b9303b..d2111cb3d28 100644
--- a/tests/gateway/test_signal_rate_limit.py
+++ b/tests/gateway/test_signal_rate_limit.py
@@ -1,11 +1,9 @@
 """Tests for the SignalAttachmentScheduler token-bucket simulator."""
 import asyncio
-import time
 
 import pytest
 
 from gateway.platforms.signal_rate_limit import (
-    SIGNAL_MAX_ATTACHMENTS_PER_MSG,
     SIGNAL_RATE_LIMIT_BUCKET_CAPACITY,
     SIGNAL_RATE_LIMIT_DEFAULT_RETRY_AFTER,
     SignalAttachmentScheduler,
diff --git a/tests/gateway/test_simplex_plugin.py b/tests/gateway/test_simplex_plugin.py
index 0b1b1b21a85..1048168aa6e 100644
--- a/tests/gateway/test_simplex_plugin.py
+++ b/tests/gateway/test_simplex_plugin.py
@@ -8,7 +8,6 @@ sibling platform-plugin tests on the same xdist worker.
 from __future__ import annotations
 
 import json
-import os
 from unittest.mock import AsyncMock, MagicMock
 
 import pytest
diff --git a/tests/gateway/test_slack.py b/tests/gateway/test_slack.py
index c1e521a7bcc..97618f4482a 100644
--- a/tests/gateway/test_slack.py
+++ b/tests/gateway/test_slack.py
@@ -20,8 +20,6 @@ from gateway.config import Platform, PlatformConfig
 from gateway.platforms.base import (
     MessageEvent,
     MessageType,
-    SendResult,
-    SUPPORTED_DOCUMENT_TYPES,
     is_host_excluded_by_no_proxy,
 )
 
diff --git a/tests/gateway/test_slack_approval_buttons.py b/tests/gateway/test_slack_approval_buttons.py
index bc12d0072bd..16f991118b8 100644
--- a/tests/gateway/test_slack_approval_buttons.py
+++ b/tests/gateway/test_slack_approval_buttons.py
@@ -1,7 +1,5 @@
 """Tests for Slack Block Kit approval buttons and thread context fetching."""
 
-import asyncio
-import os
 import sys
 from pathlib import Path
 from unittest.mock import AsyncMock, MagicMock, patch
@@ -44,7 +42,7 @@ def _ensure_slack_mock():
 _ensure_slack_mock()
 
 from gateway.platforms.slack import SlackAdapter
-from gateway.config import Platform, PlatformConfig
+from gateway.config import PlatformConfig
 
 
 def _make_adapter():
diff --git a/tests/gateway/test_slash_access.py b/tests/gateway/test_slash_access.py
index 5e21ac8b610..c939a446c9e 100644
--- a/tests/gateway/test_slash_access.py
+++ b/tests/gateway/test_slash_access.py
@@ -8,7 +8,6 @@ from __future__ import annotations
 from gateway.config import GatewayConfig, Platform, PlatformConfig
 from gateway.session import SessionSource
 from gateway.slash_access import (
-    SlashAccessPolicy,
     policy_for_source,
     policy_from_extra,
 )
diff --git a/tests/gateway/test_slash_access_dispatch.py b/tests/gateway/test_slash_access_dispatch.py
index 1e26c93e0eb..1a597cf688f 100644
--- a/tests/gateway/test_slash_access_dispatch.py
+++ b/tests/gateway/test_slash_access_dispatch.py
@@ -337,7 +337,6 @@ async def test_running_agent_fastpath_blocks_non_admin_command():
     )
     src = _make_source(user_id="999")
     # Mark the session as having an in-flight agent so the fast-path runs.
-    from gateway.session import build_session_key
     sk = build_session_key(src)
     runner._running_agents[sk] = MagicMock()
     runner._running_agents_ts[sk] = 0  # not stale (epoch + small delta on this machine)
@@ -361,7 +360,6 @@ async def test_running_agent_fastpath_allows_admin_command():
         }
     )
     src = _make_source(user_id="111")  # admin
-    from gateway.session import build_session_key
     sk = build_session_key(src)
     runner._running_agents[sk] = MagicMock()
     runner._running_agents_ts[sk] = 0
@@ -384,7 +382,6 @@ async def test_running_agent_fastpath_status_always_works():
         }
     )
     src = _make_source(user_id="999")  # non-admin
-    from gateway.session import build_session_key
     sk = build_session_key(src)
     runner._running_agents[sk] = MagicMock()
     runner._running_agents_ts[sk] = 0
diff --git a/tests/gateway/test_sms.py b/tests/gateway/test_sms.py
index e3ec86d90af..8d8b73614aa 100644
--- a/tests/gateway/test_sms.py
+++ b/tests/gateway/test_sms.py
@@ -12,7 +12,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
 
-from gateway.config import Platform, PlatformConfig, HomeChannel
+from gateway.config import Platform, PlatformConfig
 
 
 # ── Config loading ──────────────────────────────────────────────────
diff --git a/tests/gateway/test_sse_agent_cancel.py b/tests/gateway/test_sse_agent_cancel.py
index 6b5306fbe6e..2958a5b3e8c 100644
--- a/tests/gateway/test_sse_agent_cancel.py
+++ b/tests/gateway/test_sse_agent_cancel.py
@@ -7,11 +7,9 @@ task wrapper is cancelled.
 """
 
 import asyncio
-import json
 import queue
 from unittest.mock import AsyncMock, MagicMock, patch
 
-import pytest
 
 
 # ---------------------------------------------------------------------------
diff --git a/tests/gateway/test_ssl_certs.py b/tests/gateway/test_ssl_certs.py
index f98eb03a6cd..2fc34ea9d5c 100644
--- a/tests/gateway/test_ssl_certs.py
+++ b/tests/gateway/test_ssl_certs.py
@@ -1,6 +1,5 @@
 """Tests for SSL certificate auto-detection in gateway/run.py."""
 
-import importlib
 import os
 from unittest.mock import patch, MagicMock
 
diff --git a/tests/gateway/test_status.py b/tests/gateway/test_status.py
index b92c0cd4d11..ae378e0b753 100644
--- a/tests/gateway/test_status.py
+++ b/tests/gateway/test_status.py
@@ -707,6 +707,33 @@ class TestTakeoverMarker:
 
         assert result is False
 
+    def test_consume_returns_true_on_windows_when_start_time_unavailable(
+        self, tmp_path, monkeypatch
+    ):
+        """Takeover consume must also recognise a self-marker on platforms
+        without ``/proc`` (macOS / native Windows).
+
+        ``consume_takeover_marker_for_self`` shares ``_consume_pid_marker_for_self``
+        with the planned-stop path, so the same start_time fallback applies:
+        a ``--replace`` SIGTERM on Windows (where start_time is None on both
+        sides) must be recognised as a planned takeover and exit 0, not be
+        misclassified as an unexpected UNKNOWN exit. With start_time
+        unavailable we fall back to PID equality alone, bounded by the TTL.
+        """
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        # Simulate Windows: no start_time available for any PID.
+        monkeypatch.setattr(status, "_get_process_start_time", lambda pid: None)
+
+        ok = status.write_takeover_marker(target_pid=os.getpid())
+        assert ok is True
+        payload = json.loads((tmp_path / ".gateway-takeover.json").read_text())
+        assert payload["target_start_time"] is None
+
+        result = status.consume_takeover_marker_for_self()
+
+        assert result is True
+        assert not (tmp_path / ".gateway-takeover.json").exists()
+
     def test_consume_returns_false_when_marker_missing(self, tmp_path, monkeypatch):
         monkeypatch.setenv("HERMES_HOME", str(tmp_path))
 
@@ -899,6 +926,74 @@ class TestPlannedStopMarker:
 
         assert ok is False
 
+    def test_consume_returns_true_on_windows_when_start_time_unavailable(
+        self, tmp_path, monkeypatch
+    ):
+        """Regression for #34597: a legitimate stop must be recognised on
+        platforms without ``/proc``.
+
+        ``_get_process_start_time`` returns None on macOS / native Windows
+        (no ``/proc/<pid>/stat``). The planned-stop watcher only runs there,
+        so if the authoritative consume required a non-None start_time match
+        it would always return False — and ``hermes gateway stop`` would be
+        misclassified as an unexpected ``UNKNOWN`` exit, exit 1, and revived
+        by the service manager (the very crash loop #34597 set out to fix).
+        With start_time unavailable on BOTH sides we fall back to PID
+        equality alone, bounded by the marker TTL.
+        """
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        # Simulate Windows: no start_time available for any PID.
+        monkeypatch.setattr(status, "_get_process_start_time", lambda pid: None)
+
+        ok = status.write_planned_stop_marker(target_pid=os.getpid())
+        assert ok is True
+        # Marker carries a null start_time, exactly as written on Windows.
+        payload = json.loads((tmp_path / ".gateway-planned-stop.json").read_text())
+        assert payload["target_start_time"] is None
+
+        result = status.consume_planned_stop_marker_for_self()
+
+        assert result is True
+        assert not (tmp_path / ".gateway-planned-stop.json").exists()
+
+    def test_consume_still_rejects_foreign_pid_when_start_time_unavailable(
+        self, tmp_path, monkeypatch
+    ):
+        """The PID-only fallback must NOT match a marker naming another PID.
+
+        Falling back to PID equality when start_time is unknown must remain
+        a PID check — a marker for a different process is never ours.
+        """
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        monkeypatch.setattr(status, "_get_process_start_time", lambda pid: None)
+
+        ok = status.write_planned_stop_marker(target_pid=os.getpid() + 9999)
+        assert ok is True
+
+        result = status.consume_planned_stop_marker_for_self()
+
+        assert result is False
+
+    def test_consume_still_rejects_start_time_mismatch_when_both_known(
+        self, tmp_path, monkeypatch
+    ):
+        """PID-reuse defence is preserved when BOTH start_times are present.
+
+        The Windows fallback only relaxes matching when a start_time is
+        unavailable. When both sides report one (Linux), a mismatch must
+        still reject — otherwise PID reuse could resurrect a stale marker.
+        """
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        monkeypatch.setattr(status, "_get_process_start_time", lambda pid: 100)
+        status.write_planned_stop_marker(target_pid=os.getpid())
+
+        # Simulate PID reuse: same PID, different start_time.
+        monkeypatch.setattr(status, "_get_process_start_time", lambda pid: 9999)
+
+        result = status.consume_planned_stop_marker_for_self()
+
+        assert result is False
+
 
 class TestReadProcessCmdlinePsFallback:
     """Tests for _read_process_cmdline falling back to ps on non-Linux."""
diff --git a/tests/gateway/test_status_command.py b/tests/gateway/test_status_command.py
index d8504370a5f..01222597224 100644
--- a/tests/gateway/test_status_command.py
+++ b/tests/gateway/test_status_command.py
@@ -495,7 +495,7 @@ async def test_status_command_bypasses_active_session_guard():
     import asyncio
     from gateway.platforms.base import BasePlatformAdapter, MessageEvent, MessageType
     from gateway.session import build_session_key
-    from gateway.config import Platform, PlatformConfig, GatewayConfig
+    from gateway.config import Platform, PlatformConfig
 
     source = _make_source()
     session_key = build_session_key(source)
diff --git a/tests/gateway/test_step_callback_compat.py b/tests/gateway/test_step_callback_compat.py
index cdfc3fb04ac..3111f011a52 100644
--- a/tests/gateway/test_step_callback_compat.py
+++ b/tests/gateway/test_step_callback_compat.py
@@ -6,9 +6,7 @@ while also providing the enriched ``tools`` list with results.
 """
 
 import asyncio
-from unittest.mock import AsyncMock, MagicMock, patch
 
-import pytest
 
 
 class TestStepCallbackNormalization:
diff --git a/tests/gateway/test_sticker_cache.py b/tests/gateway/test_sticker_cache.py
index a8fc9121954..9223a11e17d 100644
--- a/tests/gateway/test_sticker_cache.py
+++ b/tests/gateway/test_sticker_cache.py
@@ -1,7 +1,5 @@
 """Tests for gateway/sticker_cache.py — sticker description cache."""
 
-import json
-import time
 from unittest.mock import patch
 
 from gateway.sticker_cache import (
@@ -11,7 +9,6 @@ from gateway.sticker_cache import (
     cache_sticker_description,
     build_sticker_injection,
     build_animated_sticker_injection,
-    STICKER_VISION_PROMPT,
 )
 
 
diff --git a/tests/gateway/test_stream_consumer.py b/tests/gateway/test_stream_consumer.py
index 3a6baa65b05..9a445532d0d 100644
--- a/tests/gateway/test_stream_consumer.py
+++ b/tests/gateway/test_stream_consumer.py
@@ -939,6 +939,133 @@ class TestFinalResponseDeliveryGuard:
         assert consumer._final_response_sent is True
 
 
+class TestFinalContentDeliveredGuard:
+    """Regression coverage for #25010 — _final_content_delivered must only be
+    set when the final response is actually confirmed delivered to the user,
+    not when a mid-stream edit happened to show partial content.  Prematurely
+    setting this flag causes the gateway to suppress the normal final send,
+    leaving the user with an incomplete partial message."""
+
+    @pytest.mark.asyncio
+    async def test_mid_stream_edit_success_does_not_mark_content_delivered(self):
+        """When the mid-stream edit with finalize=True succeeds but the
+        subsequent finalize edit fails, _final_content_delivered must stay
+        False so the gateway does not suppress its fallback send (#25010).
+
+        Simulates TelegramAdapter which sets REQUIRES_EDIT_FINALIZE=True,
+        requiring a second finalize edit even when content is unchanged."""
+        adapter = MagicMock()
+        adapter.REQUIRES_EDIT_FINALIZE = True  # Telegram adapter behavior
+        # First send (initial streaming message) succeeds
+        # Mid-stream finalize edit succeeds
+        # Final finalize edit FAILS (e.g. flood control on Telegram)
+        adapter.edit_message = AsyncMock(side_effect=[
+            SimpleNamespace(success=True),   # mid-stream edit
+            SimpleNamespace(success=True),   # finalize edit on line 548
+            SimpleNamespace(success=False),  # final finalize on line 580 (FAILS)
+        ])
+        adapter.send = AsyncMock(
+            return_value=SimpleNamespace(success=True, message_id="msg_1"),
+        )
+        adapter.MAX_MESSAGE_LENGTH = 4096
+
+        config = StreamConsumerConfig(edit_interval=0.01, buffer_threshold=5)
+        consumer = GatewayStreamConsumer(adapter, "chat_123", config)
+
+        # Simulate streaming: send initial text, then more text, then done
+        consumer.on_delta("Part one of the response...\n")
+        task = asyncio.create_task(consumer.run())
+        await asyncio.sleep(0.05)
+
+        consumer.on_delta("Part two, the complete final answer.\n")
+        await asyncio.sleep(0.05)
+
+        consumer.finish()
+        await task
+
+        # The key assertion: _final_content_delivered must NOT be True,
+        # because the final edit failed and the complete response was never
+        # confirmed delivered.
+        assert consumer._final_content_delivered is False, (
+            "_final_content_delivered was prematurely set to True — gateway "
+            "will wrongly suppress its fallback send, leaving the user with "
+            "an incomplete partial message (#25010)"
+        )
+        # The gateway must still be allowed to send the complete response
+        assert consumer._final_response_sent is False, (
+            "_final_response_sent must also be False when the final edit failed"
+        )
+
+    @pytest.mark.asyncio
+    async def test_final_edit_success_does_mark_content_delivered(self):
+        """When the final finalize edit succeeds, _final_content_delivered
+        must be True — the normal happy path should still work."""
+        adapter = MagicMock()
+        adapter.edit_message = AsyncMock(return_value=SimpleNamespace(success=True))
+        adapter.send = AsyncMock(
+            return_value=SimpleNamespace(success=True, message_id="msg_1"),
+        )
+        adapter.MAX_MESSAGE_LENGTH = 4096
+
+        config = StreamConsumerConfig(edit_interval=0.01, buffer_threshold=5)
+        consumer = GatewayStreamConsumer(adapter, "chat_123", config)
+
+        consumer.on_delta("The complete response.\n")
+        task = asyncio.create_task(consumer.run())
+        await asyncio.sleep(0.05)
+
+        consumer.finish()
+        await task
+
+        assert consumer._final_content_delivered is True, (
+            "_final_content_delivered must be True when the final edit succeeds"
+        )
+        assert consumer._final_response_sent is True
+
+    @pytest.mark.asyncio
+    async def test_fallback_partial_send_does_not_mark_final_sent(self):
+        """When fallback final send delivers only some chunks before failing,
+        _final_response_sent must stay False so the gateway can still attempt
+        a complete final send (#25010)."""
+        call_count = 0
+
+        async def fake_send(*, chat_id, content, **kwargs):
+            nonlocal call_count
+            call_count += 1
+            if call_count <= 2:
+                return SimpleNamespace(success=True, message_id="msg_1")
+            # Third chunk (fallback continuation) FAILS
+            return SimpleNamespace(success=False, error="flood_control:13.0")
+
+        adapter = MagicMock()
+        adapter.send = AsyncMock(side_effect=fake_send)
+        adapter.edit_message = AsyncMock(
+            return_value=SimpleNamespace(success=False, error="flood_control:13.0"),
+        )
+        adapter.MAX_MESSAGE_LENGTH = 4096
+
+        config = StreamConsumerConfig(edit_interval=0.01, buffer_threshold=5)
+        consumer = GatewayStreamConsumer(adapter, "chat_123", config)
+
+        # Trigger enough delta to enter fallback mode
+        consumer.on_delta("Initial streaming text...\n")
+        task = asyncio.create_task(consumer.run())
+        await asyncio.sleep(0.05)
+
+        # Send a very long text that will trigger overflow/fallback
+        long_text = ("x" * 3000 + "\n") + ("y" * 3000 + "\n") + "Final answer.\n"
+        consumer.on_delta(long_text)
+        await asyncio.sleep(0.1)
+
+        consumer.finish()
+        await task
+
+        assert consumer._final_response_sent is False, (
+            "Partial fallback send must not set _final_response_sent — gateway "
+            "must still be able to deliver the complete response (#25010)"
+        )
+
+
 class TestEditOverflowSplitAndDeliver:
     """When edit_message split-and-delivers an oversized payload across the
     original message + N continuations (Telegram >4096 UTF-16), the consumer
diff --git a/tests/gateway/test_stream_consumer_thread_routing.py b/tests/gateway/test_stream_consumer_thread_routing.py
index 80477574d87..ec4611abfa3 100644
--- a/tests/gateway/test_stream_consumer_thread_routing.py
+++ b/tests/gateway/test_stream_consumer_thread_routing.py
@@ -6,15 +6,13 @@ the main group chat.
 
 Covers: #6969, #9916, #7355
 """
-import asyncio
-from unittest.mock import AsyncMock, MagicMock, patch
+from unittest.mock import AsyncMock, MagicMock
 from types import SimpleNamespace
 
 import pytest
 
 from gateway.stream_consumer import (
     GatewayStreamConsumer,
-    StreamConsumerConfig,
 )
 
 
diff --git a/tests/gateway/test_stuck_loop.py b/tests/gateway/test_stuck_loop.py
index a26f29a2b57..31f9858869a 100644
--- a/tests/gateway/test_stuck_loop.py
+++ b/tests/gateway/test_stuck_loop.py
@@ -6,7 +6,6 @@ is auto-suspended on startup so the user gets a clean slate.
 """
 
 import json
-from pathlib import Path
 from unittest.mock import MagicMock
 
 import pytest
diff --git a/tests/gateway/test_teams.py b/tests/gateway/test_teams.py
index 6c7173fe931..b9f575ef9f5 100644
--- a/tests/gateway/test_teams.py
+++ b/tests/gateway/test_teams.py
@@ -1,13 +1,10 @@
 """Tests for the Microsoft Teams platform adapter plugin."""
 
-import asyncio
 import json
-import os
 import sys
 import types
-from pathlib import Path
 from types import SimpleNamespace
-from unittest.mock import AsyncMock, MagicMock, patch
+from unittest.mock import AsyncMock, MagicMock
 
 import httpx
 import pytest
diff --git a/tests/gateway/test_telegram_approval_buttons.py b/tests/gateway/test_telegram_approval_buttons.py
index e2ca8566827..5810b87a59b 100644
--- a/tests/gateway/test_telegram_approval_buttons.py
+++ b/tests/gateway/test_telegram_approval_buttons.py
@@ -1,6 +1,5 @@
 """Tests for Telegram inline keyboard approval buttons."""
 
-import asyncio
 import os
 import sys
 from pathlib import Path
diff --git a/tests/gateway/test_telegram_audio_vs_voice.py b/tests/gateway/test_telegram_audio_vs_voice.py
index d8ad38e299c..5af5cb920a7 100644
--- a/tests/gateway/test_telegram_audio_vs_voice.py
+++ b/tests/gateway/test_telegram_audio_vs_voice.py
@@ -12,7 +12,7 @@ These tests confirm that:
   3. Mixed media lists (voice + audio) split correctly.
 """
 
-from unittest.mock import AsyncMock, MagicMock, patch
+from unittest.mock import patch
 
 import pytest
 
diff --git a/tests/gateway/test_telegram_caption_merge.py b/tests/gateway/test_telegram_caption_merge.py
index 09cfd8c3d7e..f5d4390f483 100644
--- a/tests/gateway/test_telegram_caption_merge.py
+++ b/tests/gateway/test_telegram_caption_merge.py
@@ -1,6 +1,5 @@
 """Tests for TelegramPlatform._merge_caption caption deduplication logic."""
 
-import pytest
 
 from gateway.platforms.telegram import TelegramAdapter
 
diff --git a/tests/gateway/test_telegram_clarify_buttons.py b/tests/gateway/test_telegram_clarify_buttons.py
index 56c0f9e60c4..729ee22359a 100644
--- a/tests/gateway/test_telegram_clarify_buttons.py
+++ b/tests/gateway/test_telegram_clarify_buttons.py
@@ -4,11 +4,9 @@ Mirrors test_telegram_approval_buttons.py for the new ``send_clarify`` and
 ``cl:`` callback dispatch added in feat/clarify-gateway-buttons.
 """
 
-import asyncio
 import os
 import sys
 from pathlib import Path
-from types import SimpleNamespace
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
@@ -50,7 +48,7 @@ def _ensure_telegram_mock():
 _ensure_telegram_mock()
 
 from gateway.platforms.telegram import TelegramAdapter
-from gateway.config import Platform, PlatformConfig
+from gateway.config import PlatformConfig
 
 
 def _make_adapter(extra=None):
diff --git a/tests/gateway/test_telegram_documents.py b/tests/gateway/test_telegram_documents.py
index 8b2e1943cc2..f4155107aa0 100644
--- a/tests/gateway/test_telegram_documents.py
+++ b/tests/gateway/test_telegram_documents.py
@@ -9,7 +9,6 @@ We mock the telegram module at import time to avoid collection errors.
 """
 
 import asyncio
-import importlib
 import os
 import sys
 from types import SimpleNamespace
@@ -17,12 +16,11 @@ from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
 
-from gateway.config import Platform, PlatformConfig
+from gateway.config import PlatformConfig
 from gateway.platforms.base import (
     MessageEvent,
     MessageType,
     SendResult,
-    SUPPORTED_DOCUMENT_TYPES,
     SUPPORTED_VIDEO_TYPES,
 )
 
diff --git a/tests/gateway/test_telegram_photo_interrupts.py b/tests/gateway/test_telegram_photo_interrupts.py
index e808e68dbe8..74fb8baa3e3 100644
--- a/tests/gateway/test_telegram_photo_interrupts.py
+++ b/tests/gateway/test_telegram_photo_interrupts.py
@@ -1,4 +1,3 @@
-import asyncio
 from unittest.mock import MagicMock
 
 import pytest
diff --git a/tests/gateway/test_telegram_progress_edit_transient.py b/tests/gateway/test_telegram_progress_edit_transient.py
index 22cd6605348..33df94a90bf 100644
--- a/tests/gateway/test_telegram_progress_edit_transient.py
+++ b/tests/gateway/test_telegram_progress_edit_transient.py
@@ -17,8 +17,6 @@ Two layers are tested:
 
 from __future__ import annotations
 
-import asyncio
-from unittest.mock import AsyncMock
 
 import pytest
 
diff --git a/tests/gateway/test_telegram_send_path_health.py b/tests/gateway/test_telegram_send_path_health.py
index 940633224e4..05972bdba43 100644
--- a/tests/gateway/test_telegram_send_path_health.py
+++ b/tests/gateway/test_telegram_send_path_health.py
@@ -6,7 +6,6 @@ but nothing reaches the recipient.  ``_send_path_degraded`` short-circuits
 ``send()`` so cron's live-adapter branch falls through to standalone HTTP.
 """
 import sys
-import types
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
diff --git a/tests/gateway/test_telegram_text_batch_perf.py b/tests/gateway/test_telegram_text_batch_perf.py
index 518dee24604..194dd0d3ffb 100644
--- a/tests/gateway/test_telegram_text_batch_perf.py
+++ b/tests/gateway/test_telegram_text_batch_perf.py
@@ -13,8 +13,6 @@ or out-of-bounds values that could break asyncio.sleep().
 from __future__ import annotations
 
 import math
-import os
-from unittest.mock import MagicMock
 
 import pytest
 
diff --git a/tests/gateway/test_telegram_text_batching.py b/tests/gateway/test_telegram_text_batching.py
index 14c3f0dd67e..5cd45190067 100644
--- a/tests/gateway/test_telegram_text_batching.py
+++ b/tests/gateway/test_telegram_text_batching.py
@@ -6,12 +6,14 @@ from the same session and aggregate them before dispatching.
 """
 
 import asyncio
-from unittest.mock import AsyncMock, MagicMock, patch
+from types import SimpleNamespace
+from unittest.mock import AsyncMock
 
 import pytest
 
 from gateway.config import Platform, PlatformConfig
 from gateway.platforms.base import MessageEvent, MessageType, SessionSource
+from gateway.session import build_session_key
 
 
 def _make_adapter():
@@ -119,3 +121,46 @@ class TestTextBatching:
 
         assert len(adapter._pending_text_batches) == 0
         assert len(adapter._pending_text_batch_tasks) == 0
+
+    @pytest.mark.asyncio
+    async def test_dm_topic_batching_recovers_thread_before_keying(self):
+        """DM-topic text batches should use the recovered topic lane."""
+        adapter = _make_adapter()
+        adapter.set_topic_recovery_fn(
+            lambda source: "222" if str(source.thread_id or "") == "1" else None
+        )
+        event = MessageEvent(
+            text="hello from DM topic",
+            message_type=MessageType.TEXT,
+            source=SessionSource(
+                platform=Platform.TELEGRAM,
+                chat_id="12345",
+                chat_type="dm",
+                user_id="user-1",
+                thread_id="1",
+            ),
+        )
+
+        adapter._enqueue_text_event(event)
+
+        def _key(thread_id: str) -> str:
+            return build_session_key(
+                SimpleNamespace(
+                    platform=Platform.TELEGRAM,
+                    chat_id="12345",
+                    chat_type="dm",
+                    thread_id=thread_id,
+                ),
+                group_sessions_per_user=True,
+                thread_sessions_per_user=False,
+            )
+
+        assert _key("222") in adapter._pending_text_batches
+        assert _key("1") not in adapter._pending_text_batches
+        assert event.source.thread_id == "222"
+
+        await asyncio.sleep(0.2)
+
+        adapter.handle_message.assert_called_once()
+        dispatched = adapter.handle_message.call_args[0][0]
+        assert dispatched.source.thread_id == "222"
diff --git a/tests/gateway/test_telegram_topic_mode.py b/tests/gateway/test_telegram_topic_mode.py
index 1941bb89e20..c887153508c 100644
--- a/tests/gateway/test_telegram_topic_mode.py
+++ b/tests/gateway/test_telegram_topic_mode.py
@@ -448,6 +448,89 @@ async def test_new_inside_telegram_topic_rewrites_binding_to_new_session(tmp_pat
     assert binding["session_id"] == "new-topic-session"
 
 
+@pytest.mark.asyncio
+async def test_topic_binding_follows_compression_tip_on_read(tmp_path, monkeypatch):
+    """Stale topic bindings auto-heal to the compression child on next inbound.
+
+    Regression for #20470 / #29712 / #33414. After compression rotates the
+    session_id, the binding row still pointed at the parent. On the next
+    inbound message in that topic, the gateway used to reload the oversized
+    parent transcript and re-run preflight compression — sometimes in a loop.
+    The read path now walks ``SessionDB.get_compression_tip()`` and rewrites
+    the binding to the descendant.
+    """
+    import gateway.run as gateway_run
+
+    session_db = SessionDB(db_path=tmp_path / "state.db")
+    session_db.enable_telegram_topic_mode(chat_id="208214988", user_id="208214988")
+    # Build a parent -> compression child chain. end_session sets ended_at;
+    # create_session sets started_at to "now", so the child's started_at is
+    # always >= parent's ended_at on a real clock.
+    session_db.create_session(
+        session_id="parent-session", source="telegram", user_id="208214988",
+    )
+    session_db.end_session("parent-session", end_reason="compression")
+    session_db.create_session(
+        session_id="child-session",
+        source="telegram",
+        user_id="208214988",
+        parent_session_id="parent-session",
+    )
+    topic_source = _make_source(thread_id="17585")
+    topic_key = build_session_key(topic_source)
+    # Pre-bug binding: topic still pointed at the pre-compression parent.
+    session_db.bind_telegram_topic(
+        chat_id="208214988",
+        thread_id="17585",
+        user_id="208214988",
+        session_key=topic_key,
+        session_id="parent-session",
+    )
+
+    runner = _make_runner(session_db=session_db)
+    # switch_session() returns a SessionEntry pointing at whatever id was
+    # requested; capture the requested id for assertion.
+    switched_to: dict = {}
+
+    def fake_switch(_key, new_session_id):
+        switched_to["id"] = new_session_id
+        return SessionEntry(
+            session_key=topic_key,
+            session_id=new_session_id,
+            created_at=datetime.now(),
+            updated_at=datetime.now(),
+            platform=Platform.TELEGRAM,
+            chat_type="dm",
+            origin=topic_source,
+        )
+
+    runner.session_store.switch_session = MagicMock(side_effect=fake_switch)
+    runner._run_agent = AsyncMock(
+        return_value={
+            "success": True,
+            "final_response": "ok",
+            "session_id": "child-session",
+            "messages": [],
+        }
+    )
+
+    monkeypatch.setattr(
+        gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"}
+    )
+
+    await runner._handle_message(_make_event("follow up after compression", thread_id="17585"))
+
+    # The route was advanced to the compression tip, not the stale parent.
+    assert switched_to.get("id") == "child-session"
+    # The binding row was rewritten to point at the descendant so future
+    # inbound messages skip the tip walk and resolve directly.
+    refreshed = session_db.get_telegram_topic_binding(
+        chat_id="208214988", thread_id="17585",
+    )
+    assert refreshed is not None
+    assert refreshed["session_id"] == "child-session"
+
+
 @pytest.mark.asyncio
 async def test_topic_root_command_explicitly_migrates_and_enables_topic_mode(tmp_path, monkeypatch):
     import gateway.run as gateway_run
@@ -960,7 +1043,6 @@ def test_lobby_reminder_is_debounced_per_chat(tmp_path):
 
 def test_binding_survives_session_deletion_via_cascade(tmp_path):
     """Deleting a session with a topic binding must not raise FK errors."""
-    import sqlite3
     db = SessionDB(db_path=tmp_path / "state.db")
     db.enable_telegram_topic_mode(chat_id="208214988", user_id="208214988")
     db.create_session(session_id="sess-to-delete", source="telegram", user_id="208214988")
@@ -988,7 +1070,6 @@ def test_binding_survives_session_deletion_via_cascade(tmp_path):
 
 def test_migration_rebuilds_v1_binding_table_with_cascade_fk(tmp_path):
     """v1 → v2 migration rebuilds the bindings table when FK lacks ON DELETE CASCADE."""
-    import sqlite3
     db_path = tmp_path / "state.db"
     db = SessionDB(db_path=db_path)
 
diff --git a/tests/gateway/test_telegram_webhook_secret.py b/tests/gateway/test_telegram_webhook_secret.py
index 0f1e786367a..268a52e327e 100644
--- a/tests/gateway/test_telegram_webhook_secret.py
+++ b/tests/gateway/test_telegram_webhook_secret.py
@@ -13,7 +13,6 @@ import re
 import sys
 from pathlib import Path
 
-import pytest
 
 _repo = str(Path(__file__).resolve().parents[2])
 if _repo not in sys.path:
diff --git a/tests/gateway/test_text_batching.py b/tests/gateway/test_text_batching.py
index 7154ae4ae09..c0e7bf5d4b6 100644
--- a/tests/gateway/test_text_batching.py
+++ b/tests/gateway/test_text_batching.py
@@ -9,8 +9,7 @@ Telegram and Feishu.
 """
 
 import asyncio
-import os
-from unittest.mock import AsyncMock, MagicMock, patch
+from unittest.mock import AsyncMock
 
 import pytest
 
diff --git a/tests/gateway/test_title_command.py b/tests/gateway/test_title_command.py
index c09a2202f48..17b6fbe7102 100644
--- a/tests/gateway/test_title_command.py
+++ b/tests/gateway/test_title_command.py
@@ -4,9 +4,8 @@ Tests the _handle_title_command handler (set/show session titles)
 across all gateway messenger platforms.
 """
 
-import os
 from types import SimpleNamespace
-from unittest.mock import AsyncMock, MagicMock, patch
+from unittest.mock import AsyncMock, MagicMock
 
 import pytest
 
diff --git a/tests/gateway/test_transcript_offset.py b/tests/gateway/test_transcript_offset.py
index 7cbb519ee3a..23f5e72d182 100644
--- a/tests/gateway/test_transcript_offset.py
+++ b/tests/gateway/test_transcript_offset.py
@@ -12,7 +12,6 @@ messages.  The fix adds ``history_offset`` (the filtered history length)
 to ``_run_agent``'s return dict and uses it for the slice.
 """
 
-import pytest
 
 from gateway.run import _preserve_queued_followup_history_offset
 
diff --git a/tests/gateway/test_tts_media_routing.py b/tests/gateway/test_tts_media_routing.py
index eeb740f8f62..eaf9c592808 100644
--- a/tests/gateway/test_tts_media_routing.py
+++ b/tests/gateway/test_tts_media_routing.py
@@ -8,7 +8,7 @@ only renders as a voice bubble when explicitly flagged) and via
 """
 
 from types import SimpleNamespace
-from unittest.mock import AsyncMock, MagicMock
+from unittest.mock import AsyncMock
 
 import pytest
 
@@ -234,9 +234,12 @@ async def test_streaming_delivery_blocks_media_path_outside_allowed_roots(tmp_pa
         "gateway.platforms.base.MEDIA_DELIVERY_SAFE_ROOTS",
         (allowed_root,),
     )
-    # This test exercises the strict-allowlist path; disable recency trust so
-    # the freshly-written tmp_path file is not auto-accepted by the trust
-    # window. (Recency trust is covered separately in test_platform_base.py.)
+    # This test exercises the strict-allowlist path; force strict mode on
+    # and disable recency trust so the freshly-written tmp_path file is not
+    # auto-accepted by the trust window. (Recency trust is covered separately
+    # in test_platform_base.py. The public default flipped to non-strict in
+    # 2026-05; this test pins strict on explicitly.)
+    monkeypatch.setenv("HERMES_MEDIA_DELIVERY_STRICT", "1")
     monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_FILES", "0")
     adapter = SimpleNamespace(
         name="test",
diff --git a/tests/gateway/test_update_command.py b/tests/gateway/test_update_command.py
index aa6240aa5b5..154603898b3 100644
--- a/tests/gateway/test_update_command.py
+++ b/tests/gateway/test_update_command.py
@@ -5,7 +5,6 @@ the _send_update_notification startup hook (sends results after restart).
 """
 
 import json
-import os
 from pathlib import Path
 from unittest.mock import patch, MagicMock, AsyncMock
 
@@ -74,7 +73,6 @@ class TestHandleUpdateCommand:
             pass
 
         # Simpler approach — mock at method level using a wrapper
-        from gateway.run import GatewayRunner
         runner = _make_runner()
 
         with patch("gateway.run._hermes_home", tmp_path):
diff --git a/tests/gateway/test_update_streaming.py b/tests/gateway/test_update_streaming.py
index eb0f0cfa890..e15e1c3f1f6 100644
--- a/tests/gateway/test_update_streaming.py
+++ b/tests/gateway/test_update_streaming.py
@@ -11,7 +11,6 @@ import json
 import os
 import time
 import asyncio
-from pathlib import Path
 from unittest.mock import patch, MagicMock, AsyncMock
 
 import pytest
diff --git a/tests/gateway/test_usage_command.py b/tests/gateway/test_usage_command.py
index feced75b255..e0297b3e6d5 100644
--- a/tests/gateway/test_usage_command.py
+++ b/tests/gateway/test_usage_command.py
@@ -1,6 +1,5 @@
 """Tests for gateway /usage command — agent cache lookup and output fields."""
 
-import asyncio
 import threading
 from unittest.mock import MagicMock, patch
 
@@ -44,7 +43,7 @@ def _make_mock_agent(**overrides):
 
 def _make_runner(session_key, agent=None, cached_agent=None):
     """Build a bare GatewayRunner with just the fields _handle_usage_command needs."""
-    from gateway.run import GatewayRunner, _AGENT_PENDING_SENTINEL
+    from gateway.run import GatewayRunner
 
     runner = object.__new__(GatewayRunner)
     runner._running_agents = {}
diff --git a/tests/gateway/test_verbose_command.py b/tests/gateway/test_verbose_command.py
index 055d61c262f..04399b1da50 100644
--- a/tests/gateway/test_verbose_command.py
+++ b/tests/gateway/test_verbose_command.py
@@ -1,6 +1,5 @@
 """Tests for gateway /verbose command (config-gated tool progress cycling)."""
 
-import asyncio
 from unittest.mock import AsyncMock, MagicMock
 
 import pytest
diff --git a/tests/gateway/test_voice_command.py b/tests/gateway/test_voice_command.py
index 160b35c6449..5066f4952f6 100644
--- a/tests/gateway/test_voice_command.py
+++ b/tests/gateway/test_voice_command.py
@@ -905,7 +905,6 @@ class TestVoiceChannelCommands:
     @pytest.mark.asyncio
     async def test_input_no_adapter(self, runner):
         """No Discord adapter — early return, no crash."""
-        from gateway.config import Platform
         # No adapters set
         await runner._handle_voice_channel_input(111, 42, "Hello")
 
@@ -1355,7 +1354,7 @@ class TestCallbackWiringOrder:
 
     def test_callback_set_before_join(self):
         """_handle_voice_channel_join wires callback before calling join."""
-        import ast, inspect
+        import inspect
         from gateway.run import GatewayRunner
         source = inspect.getsource(GatewayRunner._handle_voice_channel_join)
         lines = source.split("\n")
@@ -1490,7 +1489,7 @@ class TestAutoTtsEmptyTextGuard:
 
     def test_base_empty_check_in_source(self):
         """base.py must check speech_text is non-empty before calling TTS."""
-        import ast, inspect
+        import inspect
         from gateway.platforms.base import BasePlatformAdapter
         source = inspect.getsource(BasePlatformAdapter._process_message_background)
         assert "if not speech_text" in source or "not speech_text" in source, (
@@ -2228,7 +2227,6 @@ class TestDisconnectVoiceCleanup:
 
     @pytest.mark.asyncio
     async def test_disconnect_clears_voice_state(self):
-        from unittest.mock import AsyncMock
 
         adapter = MagicMock()
         adapter._voice_clients = {111: MagicMock(), 222: MagicMock()}
@@ -2665,7 +2663,7 @@ class TestVoiceTTSPlayback:
 
     def _call_should_reply(self, runner, voice_mode, msg_type, response="Hello",
                            agent_msgs=None, already_sent=False):
-        from gateway.platforms.base import MessageType, MessageEvent, SessionSource
+        from gateway.platforms.base import MessageEvent, SessionSource
         from gateway.config import Platform
         runner._voice_mode["discord:ch1"] = voice_mode
         source = SessionSource(
diff --git a/tests/gateway/test_voice_mode_platform_isolation.py b/tests/gateway/test_voice_mode_platform_isolation.py
index 444c2d5789e..1cf45adbb8f 100644
--- a/tests/gateway/test_voice_mode_platform_isolation.py
+++ b/tests/gateway/test_voice_mode_platform_isolation.py
@@ -11,7 +11,6 @@ import tempfile
 from pathlib import Path
 from unittest.mock import MagicMock, patch
 
-import pytest
 
 from gateway.config import Platform
 from gateway.run import GatewayRunner
diff --git a/tests/gateway/test_webhook_adapter.py b/tests/gateway/test_webhook_adapter.py
index 9cf61c3c3b5..606bd80e46e 100644
--- a/tests/gateway/test_webhook_adapter.py
+++ b/tests/gateway/test_webhook_adapter.py
@@ -27,7 +27,7 @@ from aiohttp import web
 from aiohttp.test_utils import TestClient, TestServer
 
 from gateway.config import Platform, PlatformConfig
-from gateway.platforms.base import MessageEvent, MessageType, SendResult
+from gateway.platforms.base import SendResult
 from gateway.platforms.webhook import (
     WebhookAdapter,
     _INSECURE_NO_AUTH,
diff --git a/tests/gateway/test_webhook_deliver_only.py b/tests/gateway/test_webhook_deliver_only.py
index 3e40d95c6ee..987c396ee6f 100644
--- a/tests/gateway/test_webhook_deliver_only.py
+++ b/tests/gateway/test_webhook_deliver_only.py
@@ -14,8 +14,6 @@ Covers:
 """
 
 import asyncio
-import hashlib
-import hmac
 import json
 from unittest.mock import AsyncMock, MagicMock, patch
 
diff --git a/tests/gateway/test_webhook_dynamic_routes.py b/tests/gateway/test_webhook_dynamic_routes.py
index 98c0db26492..31f7f0ac77a 100644
--- a/tests/gateway/test_webhook_dynamic_routes.py
+++ b/tests/gateway/test_webhook_dynamic_routes.py
@@ -1,9 +1,7 @@
 """Tests for webhook adapter dynamic route loading."""
 
 import json
-import os
 import pytest
-from pathlib import Path
 
 from gateway.config import PlatformConfig
 from gateway.platforms.webhook import (
diff --git a/tests/gateway/test_webhook_integration.py b/tests/gateway/test_webhook_integration.py
index 5c6fe011110..9312ac0e999 100644
--- a/tests/gateway/test_webhook_integration.py
+++ b/tests/gateway/test_webhook_integration.py
@@ -19,11 +19,10 @@ from aiohttp.test_utils import TestClient, TestServer
 
 from gateway.config import (
     GatewayConfig,
-    HomeChannel,
     Platform,
     PlatformConfig,
 )
-from gateway.platforms.base import MessageEvent, MessageType, SendResult
+from gateway.platforms.base import MessageEvent, SendResult
 from gateway.platforms.webhook import WebhookAdapter, _INSECURE_NO_AUTH
 
 
diff --git a/tests/gateway/test_wecom.py b/tests/gateway/test_wecom.py
index 02d04daf64e..ad46a0bfb65 100644
--- a/tests/gateway/test_wecom.py
+++ b/tests/gateway/test_wecom.py
@@ -9,7 +9,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
 
-from gateway.config import Platform, PlatformConfig
+from gateway.config import PlatformConfig
 from gateway.platforms.base import SendResult
 
 
diff --git a/tests/gateway/test_weixin.py b/tests/gateway/test_weixin.py
index 64258f7a29a..ac535865df8 100644
--- a/tests/gateway/test_weixin.py
+++ b/tests/gateway/test_weixin.py
@@ -4,7 +4,6 @@ import asyncio
 import base64
 import json
 import os
-from pathlib import Path
 from unittest.mock import AsyncMock, Mock, patch
 
 import pytest
diff --git a/tests/gateway/test_whatsapp_formatting.py b/tests/gateway/test_whatsapp_formatting.py
index 81b1a57c0c9..04b3174cdc2 100644
--- a/tests/gateway/test_whatsapp_formatting.py
+++ b/tests/gateway/test_whatsapp_formatting.py
@@ -7,11 +7,11 @@ Covers:
 """
 
 import asyncio
-from unittest.mock import AsyncMock, MagicMock, patch
+from unittest.mock import AsyncMock, MagicMock
 
 import pytest
 
-from gateway.config import Platform, PlatformConfig
+from gateway.config import Platform
 
 
 # ---------------------------------------------------------------------------
diff --git a/tests/gateway/test_whatsapp_group_gating.py b/tests/gateway/test_whatsapp_group_gating.py
index 206c75830b7..75560633839 100644
--- a/tests/gateway/test_whatsapp_group_gating.py
+++ b/tests/gateway/test_whatsapp_group_gating.py
@@ -306,7 +306,6 @@ def test_status_broadcast_chats_are_always_dropped():
     (a contact's WhatsApp Story update). These pseudo-chats aren't real
     conversations and the adapter must drop them regardless of dm_policy.
     """
-    from gateway.platforms.whatsapp import WhatsAppAdapter
 
     # Even on the most permissive config — open DMs, no allowlist — Stories
     # and Channel posts must not reach the agent.
diff --git a/tests/gateway/test_whatsapp_reply_prefix.py b/tests/gateway/test_whatsapp_reply_prefix.py
index bf7a45c3dac..61f37332665 100644
--- a/tests/gateway/test_whatsapp_reply_prefix.py
+++ b/tests/gateway/test_whatsapp_reply_prefix.py
@@ -7,12 +7,10 @@ Covers:
 - Config version covers all ENV_VARS_BY_VERSION keys (regression guard)
 """
 
-from pathlib import Path
-from unittest.mock import MagicMock, patch
+from unittest.mock import patch
 
-import pytest
 
-from gateway.config import GatewayConfig, Platform, PlatformConfig
+from gateway.config import Platform, PlatformConfig
 
 
 # ---------------------------------------------------------------------------
diff --git a/tests/gateway/test_ws_auth_retry.py b/tests/gateway/test_ws_auth_retry.py
index e413a30f938..ada5799538b 100644
--- a/tests/gateway/test_ws_auth_retry.py
+++ b/tests/gateway/test_ws_auth_retry.py
@@ -9,7 +9,6 @@ of stopping. These tests verify that auth errors now stop the reconnect.
 import asyncio
 from unittest.mock import AsyncMock, MagicMock, patch
 
-import pytest
 
 
 # ---------------------------------------------------------------------------
diff --git a/tests/hermes_cli/test_anthropic_model_flow_stale_oauth.py b/tests/hermes_cli/test_anthropic_model_flow_stale_oauth.py
index e5526a34789..b6582776467 100644
--- a/tests/hermes_cli/test_anthropic_model_flow_stale_oauth.py
+++ b/tests/hermes_cli/test_anthropic_model_flow_stale_oauth.py
@@ -6,11 +6,8 @@ Claude Code credentials are available. The fast-path silently proceeds to
 model selection with a broken token instead of offering re-auth.
 """
 
-import json
-import pytest
-from unittest.mock import patch, MagicMock
 
-from hermes_cli.config import load_env, save_env_value
+from hermes_cli.config import save_env_value
 
 
 class TestStaleOAuthTokenDetection:
diff --git a/tests/hermes_cli/test_api_key_providers.py b/tests/hermes_cli/test_api_key_providers.py
index 902ff7a50f6..af576ed2995 100644
--- a/tests/hermes_cli/test_api_key_providers.py
+++ b/tests/hermes_cli/test_api_key_providers.py
@@ -6,7 +6,6 @@ import pytest
 
 from hermes_cli.auth import (
     PROVIDER_REGISTRY,
-    ProviderConfig,
     resolve_provider,
     get_api_key_provider_status,
     resolve_api_key_provider_credentials,
diff --git a/tests/hermes_cli/test_apply_profile_override.py b/tests/hermes_cli/test_apply_profile_override.py
index c17c10c439f..6396faabd27 100644
--- a/tests/hermes_cli/test_apply_profile_override.py
+++ b/tests/hermes_cli/test_apply_profile_override.py
@@ -15,7 +15,6 @@ import os
 import sys
 from pathlib import Path
 
-import pytest
 
 
 def _run_apply_profile_override(
diff --git a/tests/hermes_cli/test_argparse_flag_propagation.py b/tests/hermes_cli/test_argparse_flag_propagation.py
index c3d8e80db32..87db493850c 100644
--- a/tests/hermes_cli/test_argparse_flag_propagation.py
+++ b/tests/hermes_cli/test_argparse_flag_propagation.py
@@ -14,7 +14,6 @@ so the subparser only sets the attribute when the user explicitly provides it.
 import argparse
 import os
 import sys
-from unittest.mock import patch
 
 import pytest
 
diff --git a/tests/hermes_cli/test_atomic_json_write.py b/tests/hermes_cli/test_atomic_json_write.py
index 08bed89ff36..6c3e94f6bb8 100644
--- a/tests/hermes_cli/test_atomic_json_write.py
+++ b/tests/hermes_cli/test_atomic_json_write.py
@@ -1,7 +1,6 @@
 """Tests for utils.atomic_json_write — crash-safe JSON file writes."""
 
 import json
-import os
 from pathlib import Path
 from unittest.mock import patch
 
diff --git a/tests/hermes_cli/test_atomic_yaml_write.py b/tests/hermes_cli/test_atomic_yaml_write.py
index 6a9e4f00d5e..c76649fce66 100644
--- a/tests/hermes_cli/test_atomic_yaml_write.py
+++ b/tests/hermes_cli/test_atomic_yaml_write.py
@@ -1,6 +1,5 @@
 """Tests for utils.atomic_yaml_write — crash-safe YAML file writes."""
 
-from pathlib import Path
 from unittest.mock import patch
 
 import pytest
diff --git a/tests/hermes_cli/test_auth_codex_provider.py b/tests/hermes_cli/test_auth_codex_provider.py
index 7b1bec33929..52a8a4a2c45 100644
--- a/tests/hermes_cli/test_auth_codex_provider.py
+++ b/tests/hermes_cli/test_auth_codex_provider.py
@@ -7,7 +7,6 @@ from pathlib import Path
 from types import SimpleNamespace
 
 import pytest
-import yaml
 
 from hermes_cli.auth import (
     AuthError,
@@ -17,8 +16,6 @@ from hermes_cli.auth import (
     _save_codex_tokens,
     _import_codex_cli_tokens,
     _login_openai_codex,
-    get_codex_auth_status,
-    get_provider_auth_state,
     refresh_codex_oauth_pure,
     resolve_codex_runtime_credentials,
     resolve_provider,
@@ -303,6 +300,88 @@ def test_save_codex_tokens_syncs_credential_pool(tmp_path, monkeypatch):
     assert auth["providers"]["openai-codex"]["tokens"]["access_token"] == "new-at"
 
 
+def test_save_codex_tokens_syncs_manual_device_code_entries(tmp_path, monkeypatch):
+    """Re-auth must also refresh ``manual:device_code`` pool entries.
+
+    Regression for #33538: a user who hit #33000 before the #33164 fix landed
+    would have run ``hermes auth add openai-codex`` as a workaround, leaving
+    a pool entry with ``source="manual:device_code"``.  On every subsequent
+    re-auth via setup/model picker, the singleton-seeded ``device_code`` entry
+    got refreshed but the ``manual:device_code`` entry stayed stale, recreating
+    the same 401 token_invalidated symptom that #33164 was supposed to fix.
+
+    An interactive Codex device-code re-auth proves the user owns the ChatGPT
+    account, so it is safe to refresh every device-code-backed entry in the
+    pool — but NOT independent ``manual:api_key`` entries (separate accounts /
+    explicit API keys).
+    """
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    (hermes_home / "auth.json").write_text(json.dumps({
+        "version": 1,
+        "providers": {
+            "openai-codex": {
+                "tokens": {"access_token": "old-at", "refresh_token": "old-rt"},
+                "last_refresh": "2026-01-01T00:00:00Z",
+                "auth_mode": "chatgpt",
+            },
+        },
+        "credential_pool": {
+            "openai-codex": [
+                {
+                    "id": "seeded",
+                    "source": "device_code",
+                    "auth_type": "oauth",
+                    "access_token": "old-at",
+                    "refresh_token": "old-rt",
+                },
+                {
+                    "id": "auth-add",
+                    "source": "manual:device_code",
+                    "auth_type": "oauth",
+                    "access_token": "stale-manual-at",
+                    "refresh_token": "stale-manual-rt",
+                    "last_status": "exhausted",
+                    "last_error_code": 401,
+                    "last_error_reason": "token_invalidated",
+                },
+                {
+                    "id": "api-key",
+                    "source": "manual:api_key",
+                    "auth_type": "api_key",
+                    "access_token": "user-api-key",
+                },
+            ],
+        },
+    }))
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    _save_codex_tokens({"access_token": "fresh-at", "refresh_token": "fresh-rt"},
+                       last_refresh="2026-05-28T00:00:00Z")
+
+    auth = json.loads((hermes_home / "auth.json").read_text())
+    pool = auth["credential_pool"]["openai-codex"]
+
+    # Singleton-seeded device_code entry: refreshed and error markers cleared.
+    seeded = next(e for e in pool if e["source"] == "device_code")
+    assert seeded["access_token"] == "fresh-at"
+    assert seeded["refresh_token"] == "fresh-rt"
+
+    # manual:device_code entry: ALSO refreshed (the new behavior).
+    manual_dc = next(e for e in pool if e["source"] == "manual:device_code")
+    assert manual_dc["access_token"] == "fresh-at"
+    assert manual_dc["refresh_token"] == "fresh-rt"
+    assert manual_dc["last_refresh"] == "2026-05-28T00:00:00Z"
+    assert manual_dc["last_status"] is None
+    assert manual_dc["last_error_code"] is None
+    assert manual_dc["last_error_reason"] is None
+
+    # manual:api_key entry: untouched — independent credential.
+    api_key = next(e for e in pool if e["source"] == "manual:api_key")
+    assert api_key["access_token"] == "user-api-key"
+    assert "refresh_token" not in api_key or api_key.get("refresh_token") is None
+
+
 def test_import_codex_cli_tokens(tmp_path, monkeypatch):
     codex_home = tmp_path / "codex-cli"
     codex_home.mkdir(parents=True, exist_ok=True)
diff --git a/tests/hermes_cli/test_auth_commands.py b/tests/hermes_cli/test_auth_commands.py
index 801b190cd79..ae95c2747e9 100644
--- a/tests/hermes_cli/test_auth_commands.py
+++ b/tests/hermes_cli/test_auth_commands.py
@@ -107,15 +107,15 @@ def test_auth_add_nous_oauth_persists_pool_entry(tmp_path, monkeypatch):
             "portal_base_url": "https://portal.example.com",
             "inference_base_url": "https://inference.example.com/v1",
             "client_id": "hermes-cli",
-            "scope": "inference:invoke inference:mint_agent_key",
+            "scope": "inference:invoke",
             "token_type": "Bearer",
             "access_token": token,
             "refresh_token": "refresh-token",
             "obtained_at": "2026-03-23T10:00:00+00:00",
             "expires_at": "2026-03-23T11:00:00+00:00",
             "expires_in": 3600,
-            "agent_key": "ak-test",
-            "agent_key_id": "ak-id",
+            "agent_key": token,
+            "agent_key_id": None,
             "agent_key_expires_at": "2026-03-23T10:30:00+00:00",
             "agent_key_expires_in": 1800,
             "agent_key_reused": False,
@@ -155,17 +155,17 @@ def test_auth_add_nous_oauth_persists_pool_entry(tmp_path, monkeypatch):
     assert not any(item["source"] == "manual:device_code" for item in entries)
     entry = device_code_entries[0]
     assert entry["source"] == "device_code"
-    assert entry["agent_key"] == "ak-test"
+    assert entry["agent_key"] == token
     assert entry["portal_base_url"] == "https://portal.example.com"
 
     # `hermes auth add nous` must also populate providers.nous so the
-    # 401-recovery path (resolve_nous_runtime_credentials) can mint a fresh
-    # agent_key when the 24h TTL expires. If this mirror is missing, recovery
+    # 401-recovery path (resolve_nous_runtime_credentials) can refresh an
+    # invoke JWT when the token expires. If this mirror is missing, recovery
     # raises "Hermes is not logged into Nous Portal" and the agent dies.
     singleton = payload["providers"]["nous"]
     assert singleton["access_token"] == token
     assert singleton["refresh_token"] == "refresh-token"
-    assert singleton["agent_key"] == "ak-test"
+    assert singleton["agent_key"] == token
     assert singleton["portal_base_url"] == "https://portal.example.com"
     assert singleton["inference_base_url"] == "https://inference.example.com/v1"
 
@@ -228,15 +228,15 @@ def test_auth_add_nous_oauth_honors_custom_label(tmp_path, monkeypatch):
             "portal_base_url": "https://portal.example.com",
             "inference_base_url": "https://inference.example.com/v1",
             "client_id": "hermes-cli",
-            "scope": "inference:invoke inference:mint_agent_key",
+            "scope": "inference:invoke",
             "token_type": "Bearer",
             "access_token": token,
             "refresh_token": "refresh-token",
             "obtained_at": "2026-03-23T10:00:00+00:00",
             "expires_at": "2026-03-23T11:00:00+00:00",
             "expires_in": 3600,
-            "agent_key": "ak-test",
-            "agent_key_id": "ak-id",
+            "agent_key": token,
+            "agent_key_id": None,
             "agent_key_expires_at": "2026-03-23T10:30:00+00:00",
             "agent_key_expires_in": 1800,
             "agent_key_reused": False,
diff --git a/tests/hermes_cli/test_auth_loopback_ssh_hint.py b/tests/hermes_cli/test_auth_loopback_ssh_hint.py
index 87dcd526467..4525e89fcfc 100644
--- a/tests/hermes_cli/test_auth_loopback_ssh_hint.py
+++ b/tests/hermes_cli/test_auth_loopback_ssh_hint.py
@@ -11,7 +11,6 @@ import io
 import contextlib
 import socket
 
-import pytest
 
 from hermes_cli import auth as auth_mod
 
diff --git a/tests/hermes_cli/test_auth_manual_paste.py b/tests/hermes_cli/test_auth_manual_paste.py
index 3f0fa2a59e4..2c567ff6ee5 100644
--- a/tests/hermes_cli/test_auth_manual_paste.py
+++ b/tests/hermes_cli/test_auth_manual_paste.py
@@ -330,6 +330,107 @@ def test_xai_loopback_login_manual_paste_state_mismatch_raises(monkeypatch):
     assert exc.value.code == "xai_state_mismatch"
 
 
+def test_xai_loopback_login_manual_paste_bare_code_succeeds(monkeypatch):
+    """Bare-code paste (state=None) must complete login under manual_paste.
+
+    xAI's consent page renders the authorization code in-page rather than
+    redirecting through 127.0.0.1, so on remote/headless setups the only
+    value the user can obtain is the opaque code with no ``state=``
+    parameter. ``_parse_pasted_callback`` correctly returns
+    ``state=None`` for that input. The login flow must accept this case
+    (PKCE still protects the exchange); historically it raised
+    ``xai_state_mismatch``. Regression for the bare-code branch of #26923.
+    """
+    monkeypatch.setattr(
+        auth_mod, "_xai_oauth_discovery",
+        lambda *_a, **_k: {
+            "authorization_endpoint": "https://auth.x.ai/oauth2/authorize",
+            "token_endpoint": "https://auth.x.ai/oauth2/token",
+        },
+    )
+    monkeypatch.setattr(
+        auth_mod, "_prompt_manual_callback_paste",
+        lambda _ru: {
+            "code": "bare-opaque-code",
+            "state": None,
+            "error": None,
+            "error_description": None,
+        },
+    )
+
+    def _fake_token_post(*_a, **_k):
+        return _StubTokenResponse(
+            {
+                "access_token": "at",
+                "refresh_token": "rt",
+                "id_token": "",
+                "expires_in": 3600,
+                "token_type": "Bearer",
+            }
+        )
+
+    monkeypatch.setattr(auth_mod.httpx, "post", _fake_token_post)
+
+    with contextlib.redirect_stdout(io.StringIO()):
+        creds = auth_mod._xai_oauth_loopback_login(manual_paste=True)
+
+    assert creds["tokens"]["access_token"] == "at"
+    assert creds["tokens"]["refresh_token"] == "rt"
+
+
+def test_xai_loopback_login_loopback_path_rejects_missing_state(monkeypatch):
+    """Loopback (manual_paste=False) must NOT accept ``state=None``.
+
+    The bare-code relaxation only applies to the manual-paste path,
+    where the user demonstrably has no way to supply ``state``. The
+    HTTP-server path always sees ``state`` populated from the real
+    callback query string, so missing state there means something is
+    wrong (a malformed callback, an attacker-supplied request) and
+    must still raise ``xai_state_mismatch``.
+    """
+    monkeypatch.setattr(
+        auth_mod, "_xai_oauth_discovery",
+        lambda *_a, **_k: {
+            "authorization_endpoint": "https://auth.x.ai/oauth2/authorize",
+            "token_endpoint": "https://auth.x.ai/oauth2/token",
+        },
+    )
+
+    class _StubServer:
+        def shutdown(self):
+            return None
+
+        def server_close(self):
+            return None
+
+    monkeypatch.setattr(
+        auth_mod, "_xai_start_callback_server",
+        lambda *_a, **_k: (
+            _StubServer(),
+            None,
+            {"code": "fake", "state": None, "error": None,
+             "error_description": None},
+            "http://127.0.0.1:56121/callback",
+        ),
+    )
+    monkeypatch.setattr(
+        auth_mod, "_xai_wait_for_callback",
+        lambda *_a, **_k: {
+            "code": "fake",
+            "state": None,
+            "error": None,
+            "error_description": None,
+        },
+    )
+    monkeypatch.setattr(auth_mod, "_xai_validate_loopback_redirect_uri", lambda _u: None)
+    monkeypatch.setattr(auth_mod, "_print_loopback_ssh_hint", lambda *_a, **_k: None)
+
+    with contextlib.redirect_stdout(io.StringIO()):
+        with pytest.raises(auth_mod.AuthError) as exc:
+            auth_mod._xai_oauth_loopback_login(manual_paste=False, open_browser=False)
+    assert exc.value.code == "xai_state_mismatch"
+
+
 def test_xai_loopback_login_manual_paste_missing_code_raises(monkeypatch):
     """Empty paste must surface as ``xai_code_missing``, not crash."""
     monkeypatch.setattr(
@@ -363,6 +464,163 @@ def test_xai_loopback_login_manual_paste_missing_code_raises(monkeypatch):
     assert exc.value.code == "xai_code_missing"
 
 
+def test_xai_loopback_login_timeout_falls_back_to_manual_paste(monkeypatch):
+    """Loopback timeout should offer the existing manual-paste path."""
+    monkeypatch.setattr(
+        auth_mod, "_xai_oauth_discovery",
+        lambda *_a, **_k: {
+            "authorization_endpoint": "https://auth.x.ai/oauth2/authorize",
+            "token_endpoint": "https://auth.x.ai/oauth2/token",
+        },
+    )
+
+    class _StubServer:
+        def shutdown(self):
+            return None
+
+        def server_close(self):
+            return None
+
+    class _StubThread:
+        def join(self, timeout=None):
+            return None
+
+    monkeypatch.setattr(
+        auth_mod,
+        "_xai_start_callback_server",
+        lambda: (
+            _StubServer(),
+            _StubThread(),
+            {
+                "code": None,
+                "state": None,
+                "error": None,
+                "error_description": None,
+            },
+            "http://127.0.0.1:56121/callback",
+        ),
+    )
+
+    captured: dict = {"state": None, "prompt_calls": 0}
+    original_build = auth_mod._xai_oauth_build_authorize_url
+
+    def _capture(**kwargs):
+        captured["state"] = kwargs["state"]
+        return original_build(**kwargs)
+
+    monkeypatch.setattr(auth_mod, "_xai_oauth_build_authorize_url", _capture)
+
+    def _raise_timeout(*_a, **_k):
+        raise auth_mod.AuthError(
+            "xAI authorization timed out waiting for the local callback.",
+            provider="xai-oauth",
+            code="xai_callback_timeout",
+        )
+
+    monkeypatch.setattr(auth_mod, "_xai_wait_for_callback", _raise_timeout)
+
+    def _fake_prompt(_redirect_uri):
+        captured["prompt_calls"] += 1
+        return {
+            "code": "manual-auth-code",
+            "state": captured["state"],
+            "error": None,
+            "error_description": None,
+        }
+
+    monkeypatch.setattr(auth_mod, "_prompt_manual_callback_paste", _fake_prompt)
+    monkeypatch.setattr(
+        auth_mod.sys, "stdin", type("StubStdin", (), {"isatty": lambda self: True})()
+    )
+    monkeypatch.setattr(
+        auth_mod.httpx,
+        "post",
+        lambda *_a, **_k: _StubTokenResponse(
+            {
+                "access_token": "at-timeout",
+                "refresh_token": "rt-timeout",
+                "id_token": "",
+                "expires_in": 3600,
+                "token_type": "Bearer",
+            }
+        ),
+    )
+
+    buf = io.StringIO()
+    with contextlib.redirect_stdout(buf):
+        creds = auth_mod._xai_oauth_loopback_login(manual_paste=False)
+
+    rendered = buf.getvalue()
+    assert "xAI loopback callback timed out." in rendered
+    assert "--manual-paste" in rendered
+    assert captured["prompt_calls"] == 1
+    assert creds["tokens"]["access_token"] == "at-timeout"
+    assert creds["tokens"]["refresh_token"] == "rt-timeout"
+
+
+def test_xai_loopback_login_timeout_noninteractive_reraises(monkeypatch):
+    """Non-interactive stdin must keep the original timeout error."""
+    monkeypatch.setattr(
+        auth_mod, "_xai_oauth_discovery",
+        lambda *_a, **_k: {
+            "authorization_endpoint": "https://auth.x.ai/oauth2/authorize",
+            "token_endpoint": "https://auth.x.ai/oauth2/token",
+        },
+    )
+
+    class _StubServer:
+        def shutdown(self):
+            return None
+
+        def server_close(self):
+            return None
+
+    class _StubThread:
+        def join(self, timeout=None):
+            return None
+
+    monkeypatch.setattr(
+        auth_mod,
+        "_xai_start_callback_server",
+        lambda: (
+            _StubServer(),
+            _StubThread(),
+            {
+                "code": None,
+                "state": None,
+                "error": None,
+                "error_description": None,
+            },
+            "http://127.0.0.1:56121/callback",
+        ),
+    )
+
+    monkeypatch.setattr(
+        auth_mod,
+        "_xai_wait_for_callback",
+        lambda *_a, **_k: (_ for _ in ()).throw(
+            auth_mod.AuthError(
+                "xAI authorization timed out waiting for the local callback.",
+                provider="xai-oauth",
+                code="xai_callback_timeout",
+            )
+        ),
+    )
+    monkeypatch.setattr(
+        auth_mod.sys, "stdin", type("StubStdin", (), {"isatty": lambda self: False})()
+    )
+    monkeypatch.setattr(
+        auth_mod,
+        "_prompt_manual_callback_paste",
+        lambda *_a, **_k: pytest.fail("manual-paste fallback should not run"),
+    )
+
+    with contextlib.redirect_stdout(io.StringIO()):
+        with pytest.raises(auth_mod.AuthError) as exc:
+            auth_mod._xai_oauth_loopback_login(manual_paste=False)
+    assert exc.value.code == "xai_callback_timeout"
+
+
 # ---------------------------------------------------------------------------
 # _print_loopback_ssh_hint — now also mentions --manual-paste
 # ---------------------------------------------------------------------------
diff --git a/tests/hermes_cli/test_auth_nous_provider.py b/tests/hermes_cli/test_auth_nous_provider.py
index 55903b11816..32b175a5b12 100644
--- a/tests/hermes_cli/test_auth_nous_provider.py
+++ b/tests/hermes_cli/test_auth_nous_provider.py
@@ -1,4 +1,4 @@
-"""Regression tests for Nous OAuth refresh + agent-key mint interactions."""
+"""Regression tests for Nous OAuth refresh and inference JWT interactions."""
 
 import base64
 import json
@@ -126,14 +126,15 @@ class TestResolveVerifyFallback:
 def _setup_nous_auth(
     hermes_home: Path,
     *,
-    access_token: str = "access-old",
+    access_token: str = "",
     refresh_token: str = "refresh-old",
-    scope: str = "inference:mint_agent_key",
+    scope: str = "inference:invoke",
     expires_at: str = "2026-02-01T00:00:00+00:00",
     expires_in: int = 0,
     agent_key: str | None = None,
     agent_key_expires_at: str | None = None,
 ) -> None:
+    access_token = access_token or _invoke_jwt(seconds=3600, scope=scope)
     hermes_home.mkdir(parents=True, exist_ok=True)
     auth_store = {
         "version": 1,
@@ -162,16 +163,6 @@ def _setup_nous_auth(
     (hermes_home / "auth.json").write_text(json.dumps(auth_store, indent=2))
 
 
-def _mint_payload(api_key: str = "agent-key") -> dict:
-    return {
-        "api_key": api_key,
-        "key_id": "key-id-1",
-        "expires_at": datetime.now(timezone.utc).isoformat(),
-        "expires_in": 1800,
-        "reused": False,
-    }
-
-
 def _jwt_with_claims(claims: dict) -> str:
     def _part(payload: dict) -> str:
         raw = json.dumps(payload, separators=(",", ":")).encode("utf-8")
@@ -184,7 +175,7 @@ def _future_iso(seconds: int = 3600) -> str:
     return datetime.fromtimestamp(time.time() + seconds, tz=timezone.utc).isoformat()
 
 
-def _invoke_jwt(*, seconds: int = 3600, scope: object = "inference:invoke inference:mint_agent_key") -> str:
+def _invoke_jwt(*, seconds: int = 3600, scope: object = "inference:invoke") -> str:
     return _jwt_with_claims({
         "sub": "test-user",
         "scope": scope,
@@ -209,12 +200,7 @@ def test_resolve_nous_runtime_credentials_prefers_invoke_jwt_and_mirrors(
     )
     monkeypatch.setenv("HERMES_HOME", str(hermes_home))
 
-    def _unexpected_mint(*args, **kwargs):
-        raise AssertionError("legacy agent-key mint should not run for invoke JWT")
-
-    monkeypatch.setattr(auth_mod, "_mint_agent_key", _unexpected_mint)
-
-    creds = auth_mod.resolve_nous_runtime_credentials(min_key_ttl_seconds=300)
+    creds = auth_mod.resolve_nous_runtime_credentials()
 
     assert creds["api_key"] == token
     assert creds["source"] == auth_mod.NOUS_AUTH_PATH_INVOKE_JWT
@@ -278,15 +264,11 @@ def test_resolve_nous_runtime_credentials_invoke_jwt_is_idempotent(
     before_mtime = auth_path.stat().st_mtime_ns
     monkeypatch.setenv("HERMES_HOME", str(hermes_home))
 
-    def _unexpected_mint(*args, **kwargs):
-        raise AssertionError("stable invoke JWT should not mint a legacy key")
-
     def _unexpected_shared_write(*args, **kwargs):
         raise AssertionError("unchanged invoke JWT resolution should not sync shared store")
 
     sync_calls = []
 
-    monkeypatch.setattr(auth_mod, "_mint_agent_key", _unexpected_mint)
     monkeypatch.setattr(auth_mod, "_write_shared_nous_state", _unexpected_shared_write)
     monkeypatch.setattr(
         auth_mod,
@@ -294,7 +276,7 @@ def test_resolve_nous_runtime_credentials_invoke_jwt_is_idempotent(
         lambda: sync_calls.append(True),
     )
 
-    creds = auth_mod.resolve_nous_runtime_credentials(min_key_ttl_seconds=300)
+    creds = auth_mod.resolve_nous_runtime_credentials()
 
     assert creds["api_key"] == token
     assert creds["source"] == auth_mod.NOUS_AUTH_PATH_INVOKE_JWT
@@ -330,13 +312,9 @@ def test_resolve_nous_runtime_credentials_trusts_invoke_jwt_exp_over_stale_metad
     def _unexpected_refresh(*args, **kwargs):
         raise AssertionError("valid invoke JWT should not be refreshed because metadata is stale")
 
-    def _unexpected_mint(*args, **kwargs):
-        raise AssertionError("valid invoke JWT should not fall back to legacy mint")
-
     monkeypatch.setattr(auth_mod, "_refresh_access_token", _unexpected_refresh)
-    monkeypatch.setattr(auth_mod, "_mint_agent_key", _unexpected_mint)
 
-    creds = auth_mod.resolve_nous_runtime_credentials(min_key_ttl_seconds=300)
+    creds = auth_mod.resolve_nous_runtime_credentials()
 
     assert creds["api_key"] == token
     assert creds["source"] == auth_mod.NOUS_AUTH_PATH_INVOKE_JWT
@@ -347,7 +325,7 @@ def test_resolve_nous_runtime_credentials_trusts_invoke_jwt_exp_over_stale_metad
     assert datetime.fromisoformat(singleton["agent_key_expires_at"]).timestamp() > time.time() + 300
 
 
-def test_resolve_nous_runtime_credentials_does_not_apply_legacy_ttl_to_invoke_jwt(
+def test_resolve_nous_runtime_credentials_does_not_apply_agent_key_ttl_to_invoke_jwt(
     tmp_path,
     monkeypatch,
 ):
@@ -364,12 +342,7 @@ def test_resolve_nous_runtime_credentials_does_not_apply_legacy_ttl_to_invoke_jw
     )
     monkeypatch.setenv("HERMES_HOME", str(hermes_home))
 
-    def _unexpected_mint(*args, **kwargs):
-        raise AssertionError("1800s legacy min TTL should not force opaque mint for invoke JWT")
-
-    monkeypatch.setattr(auth_mod, "_mint_agent_key", _unexpected_mint)
-
-    creds = auth_mod.resolve_nous_runtime_credentials(min_key_ttl_seconds=1800)
+    creds = auth_mod.resolve_nous_runtime_credentials()
 
     assert creds["api_key"] == token
     assert creds["source"] == auth_mod.NOUS_AUTH_PATH_INVOKE_JWT
@@ -378,42 +351,56 @@ def test_resolve_nous_runtime_credentials_does_not_apply_legacy_ttl_to_invoke_jw
     assert payload["credential_pool"]["nous"][0]["agent_key"] == token
 
 
-def test_legacy_auth_mode_bypasses_usable_invoke_jwt(tmp_path, monkeypatch):
+def test_resolve_nous_runtime_credentials_refreshes_legacy_agent_key_to_invoke_jwt(
+    tmp_path,
+    monkeypatch,
+):
     import hermes_cli.auth as auth_mod
 
     hermes_home = tmp_path / "hermes"
-    token = _invoke_jwt(seconds=3600)
+    refreshed_token = _invoke_jwt(seconds=3600)
     _setup_nous_auth(
         hermes_home,
-        access_token=token,
+        access_token="legacy-access-token",
+        refresh_token="refresh-old",
         scope=auth_mod.DEFAULT_NOUS_SCOPE,
         expires_at=_future_iso(3600),
         expires_in=3600,
+        agent_key="legacy-opaque-session-key",
+        agent_key_expires_at=_future_iso(3600),
     )
     monkeypatch.setenv("HERMES_HOME", str(hermes_home))
 
-    mint_calls = []
+    refresh_calls = []
 
-    def _fake_mint_agent_key(*, client, portal_base_url, access_token, min_ttl_seconds):
-        del client, portal_base_url, min_ttl_seconds
-        mint_calls.append(access_token)
-        return _mint_payload(api_key="legacy-after-jwt-401")
+    def _fake_refresh_access_token(*, client, portal_base_url, client_id, refresh_token):
+        del client, portal_base_url, client_id
+        refresh_calls.append(refresh_token)
+        return {
+            "access_token": refreshed_token,
+            "refresh_token": "refresh-new",
+            "expires_in": 3600,
+            "token_type": "Bearer",
+            "scope": auth_mod.DEFAULT_NOUS_SCOPE,
+        }
 
-    monkeypatch.setattr(auth_mod, "_mint_agent_key", _fake_mint_agent_key)
+    monkeypatch.setattr(auth_mod, "_refresh_access_token", _fake_refresh_access_token)
 
-    creds = auth_mod.resolve_nous_runtime_credentials(
-        min_key_ttl_seconds=300,
-        inference_auth_mode=auth_mod.NOUS_INFERENCE_AUTH_MODE_LEGACY,
-    )
+    creds = auth_mod.resolve_nous_runtime_credentials()
 
-    assert mint_calls == [token]
-    assert creds["api_key"] == "legacy-after-jwt-401"
-    assert creds["auth_path"] == auth_mod.NOUS_AUTH_PATH_LEGACY_SESSION_KEY_MINT
+    assert refresh_calls == ["refresh-old"]
+    assert creds["api_key"] == refreshed_token
+    assert creds["source"] == auth_mod.NOUS_AUTH_PATH_INVOKE_JWT
     payload = json.loads((hermes_home / "auth.json").read_text())
-    assert payload["providers"]["nous"]["agent_key"] == "legacy-after-jwt-401"
+    singleton = payload["providers"]["nous"]
+    assert singleton["access_token"] == refreshed_token
+    assert singleton["refresh_token"] == "refresh-new"
+    assert singleton["agent_key"] == refreshed_token
+    assert singleton["agent_key_id"] is None
+    assert payload["credential_pool"]["nous"][0]["agent_key"] == refreshed_token
 
 
-def test_resolve_nous_runtime_credentials_falls_back_when_invoke_scope_missing(
+def test_resolve_nous_runtime_credentials_reauths_when_invoke_scope_missing(
     tmp_path,
     monkeypatch,
 ):
@@ -428,32 +415,24 @@ def test_resolve_nous_runtime_credentials_falls_back_when_invoke_scope_missing(
     _setup_nous_auth(
         hermes_home,
         access_token=token,
-        scope=auth_mod.NOUS_LEGACY_AGENT_KEY_SCOPE,
+        refresh_token="",
+        scope="inference:mint_agent_key",
         expires_at=_future_iso(3600),
         expires_in=3600,
     )
     monkeypatch.setenv("HERMES_HOME", str(hermes_home))
 
-    calls = []
+    with pytest.raises(AuthError) as exc:
+        auth_mod.resolve_nous_runtime_credentials()
 
-    def _fake_mint_agent_key(*, client, portal_base_url, access_token, min_ttl_seconds):
-        del client, portal_base_url, min_ttl_seconds
-        calls.append(access_token)
-        return _mint_payload(api_key="opaque-agent-key")
-
-    monkeypatch.setattr(auth_mod, "_mint_agent_key", _fake_mint_agent_key)
-
-    creds = auth_mod.resolve_nous_runtime_credentials(min_key_ttl_seconds=300)
-
-    assert calls == [token]
-    assert creds["api_key"] == "opaque-agent-key"
-    assert creds["source"] == "portal"
+    assert exc.value.code == "missing_inference_invoke_scope"
+    assert exc.value.relogin_required is True
     payload = json.loads((hermes_home / "auth.json").read_text())
-    assert payload["providers"]["nous"]["agent_key"] == "opaque-agent-key"
-    assert payload["credential_pool"]["nous"][0]["agent_key"] == "opaque-agent-key"
+    assert payload["providers"]["nous"]["agent_key"] is None
+    assert "credential_pool" not in payload or not payload["credential_pool"].get("nous")
 
 
-def test_nous_device_code_login_retries_legacy_scope_when_invoke_refused(monkeypatch):
+def test_nous_device_code_login_does_not_retry_legacy_scope_when_invoke_refused(monkeypatch):
     import hermes_cli.auth as auth_mod
 
     scopes = []
@@ -461,59 +440,31 @@ def test_nous_device_code_login_retries_legacy_scope_when_invoke_refused(monkeyp
     def _fake_request_device_code(*, client, portal_base_url, client_id, scope):
         del client, portal_base_url, client_id
         scopes.append(scope)
-        if len(scopes) == 1:
-            request = httpx.Request("POST", "https://portal.example.com/api/oauth/device/code")
-            response = httpx.Response(
-                400,
-                json={
-                    "error": "invalid_scope",
-                    "error_description": "unsupported inference:invoke",
-                },
-                request=request,
-            )
-            raise httpx.HTTPStatusError("invalid_scope", request=request, response=response)
-        return {
-            "device_code": "device",
-            "user_code": "user",
-            "verification_uri": "https://portal.example.com/device",
-            "verification_uri_complete": "https://portal.example.com/device?code=user",
-            "expires_in": 600,
-            "interval": 1,
-        }
-
-    def _fake_poll_for_token(**kwargs):
-        del kwargs
-        return {
-            "access_token": "access-legacy",
-            "refresh_token": "refresh-legacy",
-            "expires_in": 900,
-            "scope": auth_mod.NOUS_LEGACY_AGENT_KEY_SCOPE,
-        }
-
-    def _fake_refresh(state, **kwargs):
-        del kwargs
-        refreshed = dict(state)
-        refreshed["agent_key"] = "opaque-agent-key"
-        refreshed["agent_key_expires_at"] = _future_iso(1800)
-        return refreshed
+        request = httpx.Request("POST", "https://portal.example.com/api/oauth/device/code")
+        response = httpx.Response(
+            400,
+            json={
+                "error": "invalid_scope",
+                "error_description": "unsupported inference:invoke",
+            },
+            request=request,
+        )
+        raise httpx.HTTPStatusError("invalid_scope", request=request, response=response)
 
     monkeypatch.setattr(auth_mod, "_request_device_code", _fake_request_device_code)
-    monkeypatch.setattr(auth_mod, "_poll_for_token", _fake_poll_for_token)
-    monkeypatch.setattr(auth_mod, "refresh_nous_oauth_from_state", _fake_refresh)
 
-    result = auth_mod._nous_device_code_login(
-        portal_base_url="https://portal.example.com",
-        inference_base_url="https://inference.example.com/v1",
-        open_browser=False,
-        timeout_seconds=1,
-    )
+    with pytest.raises(httpx.HTTPStatusError):
+        auth_mod._nous_device_code_login(
+            portal_base_url="https://portal.example.com",
+            inference_base_url="https://inference.example.com/v1",
+            open_browser=False,
+            timeout_seconds=1,
+        )
 
-    assert scopes == [auth_mod.DEFAULT_NOUS_SCOPE, auth_mod.NOUS_LEGACY_AGENT_KEY_SCOPE]
-    assert result["scope"] == auth_mod.NOUS_LEGACY_AGENT_KEY_SCOPE
-    assert result["agent_key"] == "opaque-agent-key"
+    assert scopes == [auth_mod.DEFAULT_NOUS_SCOPE]
 
 
-def test_forced_legacy_env_skips_invoke_scope_and_jwt_storage(tmp_path, monkeypatch):
+def test_removed_legacy_session_env_var_does_not_change_jwt_auth(tmp_path, monkeypatch):
     import hermes_cli.auth as auth_mod
 
     hermes_home = tmp_path / "hermes"
@@ -526,25 +477,16 @@ def test_forced_legacy_env_skips_invoke_scope_and_jwt_storage(tmp_path, monkeypa
         expires_in=3600,
     )
     monkeypatch.setenv("HERMES_HOME", str(hermes_home))
-    monkeypatch.setenv(auth_mod.NOUS_LEGACY_SESSION_KEYS_ENV, "true")
+    monkeypatch.setenv("HERMES_AGENT_USE_LEGACY_SESSION_KEYS", "true")
 
-    mint_calls = []
+    creds = auth_mod.resolve_nous_runtime_credentials()
 
-    def _fake_mint_agent_key(*, client, portal_base_url, access_token, min_ttl_seconds):
-        del client, portal_base_url, min_ttl_seconds
-        mint_calls.append(access_token)
-        return _mint_payload(api_key="forced-legacy-key")
-
-    monkeypatch.setattr(auth_mod, "_mint_agent_key", _fake_mint_agent_key)
-
-    creds = auth_mod.resolve_nous_runtime_credentials(min_key_ttl_seconds=300)
-
-    assert mint_calls == [token]
-    assert creds["api_key"] == "forced-legacy-key"
+    assert creds["api_key"] == token
     payload = json.loads((hermes_home / "auth.json").read_text())
-    assert payload["providers"]["nous"]["agent_key"] == "forced-legacy-key"
+    assert payload["providers"]["nous"]["agent_key"] == token
 
     requested_scopes = []
+    login_token = _invoke_jwt(seconds=3600)
 
     def _fake_request_device_code(*, client, portal_base_url, client_id, scope):
         del client, portal_base_url, client_id
@@ -561,31 +503,24 @@ def test_forced_legacy_env_skips_invoke_scope_and_jwt_storage(tmp_path, monkeypa
     def _fake_poll_for_token(**kwargs):
         del kwargs
         return {
-            "access_token": "access-legacy",
-            "refresh_token": "refresh-legacy",
+            "access_token": login_token,
+            "refresh_token": "refresh-token",
             "expires_in": 900,
-            "scope": auth_mod.NOUS_LEGACY_AGENT_KEY_SCOPE,
+            "scope": auth_mod.DEFAULT_NOUS_SCOPE,
         }
 
-    def _fake_refresh(state, **kwargs):
-        del kwargs
-        refreshed = dict(state)
-        refreshed["agent_key"] = "forced-legacy-login-key"
-        refreshed["agent_key_expires_at"] = _future_iso(1800)
-        return refreshed
-
     monkeypatch.setattr(auth_mod, "_request_device_code", _fake_request_device_code)
     monkeypatch.setattr(auth_mod, "_poll_for_token", _fake_poll_for_token)
-    monkeypatch.setattr(auth_mod, "refresh_nous_oauth_from_state", _fake_refresh)
 
-    auth_mod._nous_device_code_login(
+    result = auth_mod._nous_device_code_login(
         portal_base_url="https://portal.example.com",
         inference_base_url="https://inference.example.com/v1",
         open_browser=False,
         timeout_seconds=1,
     )
 
-    assert requested_scopes == [auth_mod.NOUS_LEGACY_AGENT_KEY_SCOPE]
+    assert requested_scopes == [auth_mod.DEFAULT_NOUS_SCOPE]
+    assert result["agent_key"] == login_token
 
 
 def test_nous_inference_auth_logs_do_not_include_secret_values(
@@ -596,37 +531,41 @@ def test_nous_inference_auth_logs_do_not_include_secret_values(
     import hermes_cli.auth as auth_mod
 
     hermes_home = tmp_path / "hermes"
-    token = _jwt_with_claims({
-        "sub": "secret-user",
-        "scope": "inference:mint_agent_key",
-        "exp": int(time.time() + 3600),
-    })
+    token = _invoke_jwt(seconds=3600)
+    refreshed_token = _invoke_jwt(seconds=7200)
     refresh_token = "refresh-secret-token"
-    opaque_key = "opaque-secret-agent-key"
     _setup_nous_auth(
         hermes_home,
         access_token=token,
         refresh_token=refresh_token,
-        scope=auth_mod.NOUS_LEGACY_AGENT_KEY_SCOPE,
+        scope=auth_mod.DEFAULT_NOUS_SCOPE,
         expires_at=_future_iso(3600),
         expires_in=3600,
     )
     monkeypatch.setenv("HERMES_HOME", str(hermes_home))
 
-    def _fake_mint_agent_key(*, client, portal_base_url, access_token, min_ttl_seconds):
-        del client, portal_base_url, access_token, min_ttl_seconds
-        return _mint_payload(api_key=opaque_key)
+    def _fake_refresh_access_token(*, client, portal_base_url, client_id, refresh_token):
+        del client, portal_base_url, client_id, refresh_token
+        return {
+            "access_token": refreshed_token,
+            "refresh_token": "refresh-new",
+            "expires_in": 7200,
+            "token_type": "Bearer",
+            "scope": auth_mod.DEFAULT_NOUS_SCOPE,
+        }
 
-    monkeypatch.setattr(auth_mod, "_mint_agent_key", _fake_mint_agent_key)
+    monkeypatch.setattr(auth_mod, "_refresh_access_token", _fake_refresh_access_token)
 
     caplog.set_level(logging.INFO, logger="hermes_cli.auth")
-    auth_mod.resolve_nous_runtime_credentials(min_key_ttl_seconds=300)
+    auth_mod.resolve_nous_runtime_credentials(
+        force_refresh=True,
+    )
 
     logged = caplog.text
-    assert "legacy session key path" in logged
+    assert "using NAS invoke JWT" in logged
     assert token not in logged
+    assert refreshed_token not in logged
     assert refresh_token not in logged
-    assert opaque_key not in logged
 
 
 def test_get_nous_auth_status_checks_credential_pool(tmp_path, monkeypatch):
@@ -648,13 +587,16 @@ def test_get_nous_auth_status_checks_credential_pool(tmp_path, monkeypatch):
     # Seed the credential pool with a Nous entry
     from agent.credential_pool import PooledCredential, load_pool
     pool = load_pool("nous")
+    token = _invoke_jwt(seconds=3600)
+    expires_at = _future_iso(3600)
     entry = PooledCredential.from_dict("nous", {
-        "access_token": "test-access-token",
+        "access_token": token,
         "refresh_token": "test-refresh-token",
         "portal_base_url": "https://portal.example.com",
         "inference_base_url": "https://inference.example.com/v1",
-        "agent_key": "test-agent-key",
-        "agent_key_expires_at": "2099-01-01T00:00:00+00:00",
+        "agent_key": token,
+        "agent_key_expires_at": expires_at,
+        "scope": "inference:invoke",
         "label": "dashboard device_code",
         "auth_type": "oauth",
         "source": "manual:dashboard_device_code",
@@ -667,6 +609,42 @@ def test_get_nous_auth_status_checks_credential_pool(tmp_path, monkeypatch):
     assert "example.com" in str(status.get("portal_base_url", ""))
 
 
+def test_get_nous_auth_status_pool_opaque_key_is_not_inference_credential(tmp_path, monkeypatch):
+    from hermes_cli.auth import get_nous_auth_status, invalidate_nous_auth_status_cache
+
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    (hermes_home / "auth.json").write_text(json.dumps({
+        "version": 1, "providers": {},
+    }))
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+    invalidate_nous_auth_status_cache()
+
+    from agent.credential_pool import PooledCredential, load_pool
+    pool = load_pool("nous")
+    entry = PooledCredential.from_dict("nous", {
+        "access_token": "",
+        "agent_key": "opaque-agent-key",
+        "agent_key_expires_at": "2099-01-01T00:00:00+00:00",
+        "label": "manual opaque key",
+        "auth_type": "api_key",
+        "source": "manual",
+        "base_url": "https://inference.example.com/v1",
+        "inference_base_url": "https://inference.example.com/v1",
+    })
+    pool.add_entry(entry)
+
+    status = get_nous_auth_status()
+
+    assert status["logged_in"] is False
+    assert status["inference_credential_present"] is False
+    assert status["credential_source"] is None
+    assert status.get("access_token") is None
+    assert status.get("portal_base_url") is None
+    assert status.get("inference_base_url") is None
+    invalidate_nous_auth_status_cache()
+
+
 def test_get_nous_auth_status_auth_store_fallback(tmp_path, monkeypatch):
     """get_nous_auth_status() falls back to auth store when credential
     pool is empty.
@@ -678,7 +656,7 @@ def test_get_nous_auth_status_auth_store_fallback(tmp_path, monkeypatch):
     monkeypatch.setenv("HERMES_HOME", str(hermes_home))
     monkeypatch.setattr(
         "hermes_cli.auth.resolve_nous_runtime_credentials",
-        lambda min_key_ttl_seconds=60: {
+        lambda **kwargs: {
             "base_url": "https://inference.example.com/v1",
             "expires_at": "2099-01-01T00:00:00+00:00",
             "key_id": "key-1",
@@ -718,7 +696,7 @@ def test_get_nous_auth_status_prefers_runtime_auth_store_over_stale_pool(tmp_pat
 
     monkeypatch.setattr(
         "hermes_cli.auth.resolve_nous_runtime_credentials",
-        lambda min_key_ttl_seconds=60: {
+        lambda **kwargs: {
             "base_url": "https://inference.example.com/v1",
             "expires_at": "2099-01-01T00:00:00+00:00",
             "key_id": "key-fresh",
@@ -740,7 +718,7 @@ def test_get_nous_auth_status_reports_revoked_refresh_session(tmp_path, monkeypa
     _setup_nous_auth(hermes_home, access_token="at-123")
     monkeypatch.setenv("HERMES_HOME", str(hermes_home))
 
-    def _boom(min_key_ttl_seconds=60):
+    def _boom(**kwargs):
         raise AuthError("Refresh session has been revoked", provider="nous", relogin_required=True)
 
     monkeypatch.setattr("hermes_cli.auth.resolve_nous_runtime_credentials", _boom)
@@ -769,68 +747,75 @@ def test_get_nous_auth_status_empty_returns_not_logged_in(tmp_path, monkeypatch)
     assert status["logged_in"] is False
 
 
-def test_refresh_token_persisted_when_mint_returns_insufficient_credits(tmp_path, monkeypatch):
+def test_refresh_token_persisted_when_refreshed_jwt_lacks_invoke_scope(tmp_path, monkeypatch):
     hermes_home = tmp_path / "hermes"
-    _setup_nous_auth(hermes_home, refresh_token="refresh-old")
+    _setup_nous_auth(
+        hermes_home,
+        access_token="access-old",
+        refresh_token="refresh-old",
+    )
     monkeypatch.setenv("HERMES_HOME", str(hermes_home))
 
     refresh_calls = []
-    mint_calls = {"count": 0}
+    bad_jwt = _jwt_with_claims({
+        "sub": "test-user",
+        "scope": "profile",
+        "exp": int(time.time() + 3600),
+    })
+    good_jwt = _invoke_jwt(seconds=3600)
 
     def _fake_refresh_access_token(*, client, portal_base_url, client_id, refresh_token):
         refresh_calls.append(refresh_token)
-        idx = len(refresh_calls)
+        if len(refresh_calls) == 1:
+            token = bad_jwt
+        else:
+            token = good_jwt
         return {
-            "access_token": f"access-{idx}",
-            "refresh_token": f"refresh-{idx}",
-            "expires_in": 0,
+            "access_token": token,
+            "refresh_token": f"refresh-{len(refresh_calls)}",
+            "expires_in": 3600,
             "token_type": "Bearer",
+            "scope": "profile" if len(refresh_calls) == 1 else "inference:invoke",
         }
 
-    def _fake_mint_agent_key(*, client, portal_base_url, access_token, min_ttl_seconds):
-        mint_calls["count"] += 1
-        if mint_calls["count"] == 1:
-            raise AuthError("credits exhausted", provider="nous", code="insufficient_credits")
-        return _mint_payload(api_key="agent-key-2")
-
     monkeypatch.setattr("hermes_cli.auth._refresh_access_token", _fake_refresh_access_token)
-    monkeypatch.setattr("hermes_cli.auth._mint_agent_key", _fake_mint_agent_key)
 
     with pytest.raises(AuthError) as exc:
-        resolve_nous_runtime_credentials(min_key_ttl_seconds=300)
-    assert exc.value.code == "insufficient_credits"
+        resolve_nous_runtime_credentials()
+    assert exc.value.code == "missing_inference_invoke_scope"
 
     state_after_failure = get_provider_auth_state("nous")
     assert state_after_failure is not None
     assert state_after_failure["refresh_token"] == "refresh-1"
-    assert state_after_failure["access_token"] == "access-1"
+    assert state_after_failure["access_token"] == bad_jwt
 
-    creds = resolve_nous_runtime_credentials(min_key_ttl_seconds=300)
-    assert creds["api_key"] == "agent-key-2"
+    creds = resolve_nous_runtime_credentials()
+    assert creds["api_key"] == good_jwt
     assert refresh_calls == ["refresh-old", "refresh-1"]
 
 
-def test_refresh_token_persisted_when_mint_times_out(tmp_path, monkeypatch):
+def test_refresh_token_persisted_when_refreshed_token_is_not_jwt(tmp_path, monkeypatch):
     hermes_home = tmp_path / "hermes"
-    _setup_nous_auth(hermes_home, refresh_token="refresh-old")
+    _setup_nous_auth(
+        hermes_home,
+        access_token="access-old",
+        refresh_token="refresh-old",
+    )
     monkeypatch.setenv("HERMES_HOME", str(hermes_home))
 
     def _fake_refresh_access_token(*, client, portal_base_url, client_id, refresh_token):
         return {
             "access_token": "access-1",
             "refresh_token": "refresh-1",
-            "expires_in": 0,
+            "expires_in": 3600,
             "token_type": "Bearer",
         }
 
-    def _fake_mint_agent_key(*, client, portal_base_url, access_token, min_ttl_seconds):
-        raise httpx.ReadTimeout("mint timeout")
-
     monkeypatch.setattr("hermes_cli.auth._refresh_access_token", _fake_refresh_access_token)
-    monkeypatch.setattr("hermes_cli.auth._mint_agent_key", _fake_mint_agent_key)
 
-    with pytest.raises(httpx.ReadTimeout):
-        resolve_nous_runtime_credentials(min_key_ttl_seconds=300)
+    with pytest.raises(AuthError) as exc:
+        resolve_nous_runtime_credentials()
+    assert exc.value.code == "access_token_not_jwt"
 
     state_after_failure = get_provider_auth_state("nous")
     assert state_after_failure is not None
@@ -845,7 +830,11 @@ def test_terminal_refresh_failure_quarantines_tokens(
     from hermes_cli import auth as auth_mod
 
     hermes_home = tmp_path / "hermes"
-    _setup_nous_auth(hermes_home, refresh_token="refresh-old")
+    _setup_nous_auth(
+        hermes_home,
+        access_token="access-old",
+        refresh_token="refresh-old",
+    )
     monkeypatch.setenv("HERMES_HOME", str(hermes_home))
     from agent.credential_pool import load_pool
 
@@ -871,7 +860,7 @@ def test_terminal_refresh_failure_quarantines_tokens(
     monkeypatch.setattr(auth_mod, "_refresh_access_token", _terminal_refresh_failure)
 
     with pytest.raises(AuthError, match="Refresh session has been revoked"):
-        auth_mod.resolve_nous_runtime_credentials(min_key_ttl_seconds=300)
+        auth_mod.resolve_nous_runtime_credentials()
 
     state_after_failure = auth_mod.get_provider_auth_state("nous")
     assert state_after_failure is not None
@@ -884,7 +873,7 @@ def test_terminal_refresh_failure_quarantines_tokens(
     assert payload.get("credential_pool", {}).get("nous") == []
 
     with pytest.raises(AuthError, match="No access token found"):
-        auth_mod.resolve_nous_runtime_credentials(min_key_ttl_seconds=300)
+        auth_mod.resolve_nous_runtime_credentials()
 
     assert refresh_calls == ["refresh-old"]
 
@@ -931,35 +920,36 @@ def test_managed_access_token_refresh_failure_quarantines_tokens(
     assert refresh_calls == ["refresh-old"]
 
 
-def test_mint_retry_uses_latest_rotated_refresh_token(tmp_path, monkeypatch):
+def test_unusable_access_token_refresh_uses_latest_rotated_refresh_token(tmp_path, monkeypatch):
     hermes_home = tmp_path / "hermes"
-    _setup_nous_auth(hermes_home, refresh_token="refresh-old")
+    _setup_nous_auth(
+        hermes_home,
+        access_token="access-old",
+        refresh_token="refresh-old",
+    )
     monkeypatch.setenv("HERMES_HOME", str(hermes_home))
 
     refresh_calls = []
-    mint_calls = {"count": 0}
+    good_jwt = _invoke_jwt(seconds=3600)
 
     def _fake_refresh_access_token(*, client, portal_base_url, client_id, refresh_token):
         refresh_calls.append(refresh_token)
-        idx = len(refresh_calls)
+        token = "access-still-not-jwt" if len(refresh_calls) == 1 else good_jwt
         return {
-            "access_token": f"access-{idx}",
-            "refresh_token": f"refresh-{idx}",
-            "expires_in": 0,
+            "access_token": token,
+            "refresh_token": f"refresh-{len(refresh_calls)}",
+            "expires_in": 3600,
             "token_type": "Bearer",
+            "scope": "inference:invoke",
         }
 
-    def _fake_mint_agent_key(*, client, portal_base_url, access_token, min_ttl_seconds):
-        mint_calls["count"] += 1
-        if mint_calls["count"] == 1:
-            raise AuthError("stale access token", provider="nous", code="invalid_token")
-        return _mint_payload(api_key="agent-key")
-
     monkeypatch.setattr("hermes_cli.auth._refresh_access_token", _fake_refresh_access_token)
-    monkeypatch.setattr("hermes_cli.auth._mint_agent_key", _fake_mint_agent_key)
 
-    creds = resolve_nous_runtime_credentials(min_key_ttl_seconds=300)
-    assert creds["api_key"] == "agent-key"
+    with pytest.raises(AuthError) as exc:
+        resolve_nous_runtime_credentials()
+    assert exc.value.code == "access_token_not_jwt"
+    creds = resolve_nous_runtime_credentials()
+    assert creds["api_key"] == good_jwt
     assert refresh_calls == ["refresh-old", "refresh-1"]
 
 
@@ -1023,12 +1013,19 @@ class TestLoginNousSkipKeepsCurrent:
             lambda *a, **kw: prompt_returns,
         )
         monkeypatch.setattr(models_mod, "get_pricing_for_provider", lambda p: {})
-        monkeypatch.setattr(models_mod, "check_nous_free_tier", lambda: None)
+        free_tier_calls = []
+
+        def _check_nous_free_tier(**kwargs):
+            free_tier_calls.append(kwargs)
+            return None
+
+        monkeypatch.setattr(models_mod, "check_nous_free_tier", _check_nous_free_tier)
         monkeypatch.setattr(
             models_mod, "partition_nous_models_by_tier",
             lambda ids, p, free_tier=False: (ids, []),
         )
         monkeypatch.setattr(ns, "prompt_enable_tool_gateway", lambda cfg: None)
+        return free_tier_calls
 
     def test_skip_keep_current_preserves_provider_and_model(self, tmp_path, monkeypatch):
         """User picks Skip → config.yaml untouched, Nous creds still saved."""
@@ -1070,7 +1067,7 @@ class TestLoginNousSkipKeepsCurrent:
         hermes_home, config_path, auth_path = self._setup_home_with_openrouter(
             tmp_path, monkeypatch,
         )
-        self._patch_login_internals(
+        free_tier_calls = self._patch_login_internals(
             monkeypatch, prompt_returns="xiaomi/mimo-v2-pro",
         )
 
@@ -1083,6 +1080,7 @@ class TestLoginNousSkipKeepsCurrent:
         cfg_after = yaml.safe_load(config_path.read_text())
         assert cfg_after["model"]["provider"] == "nous"
         assert cfg_after["model"]["default"] == "xiaomi/mimo-v2-pro"
+        assert free_tier_calls == [{"force_fresh": True}]
 
         auth_after = json.loads(auth_path.read_text())
         assert auth_after["active_provider"] == "nous"
@@ -1126,21 +1124,23 @@ class TestLoginNousSkipKeepsCurrent:
 def _full_state_fixture() -> dict:
     """Shape of the dict returned by _nous_device_code_login /
     refresh_nous_oauth_from_state. Used as helper input."""
+    token = _invoke_jwt(seconds=3600)
+    expires_at = _future_iso(3600)
     return {
         "portal_base_url": "https://portal.example.com",
         "inference_base_url": "https://inference.example.com/v1",
         "client_id": "hermes-cli",
-        "scope": "inference:mint_agent_key",
+        "scope": "inference:invoke",
         "token_type": "Bearer",
-        "access_token": "access-tok",
+        "access_token": token,
         "refresh_token": "refresh-tok",
         "obtained_at": "2026-04-17T22:00:00+00:00",
-        "expires_at": "2026-04-17T22:15:00+00:00",
-        "expires_in": 900,
-        "agent_key": "agent-key-value",
-        "agent_key_id": "ak-id",
-        "agent_key_expires_at": "2026-04-18T22:00:00+00:00",
-        "agent_key_expires_in": 86400,
+        "expires_at": expires_at,
+        "expires_in": 3600,
+        "agent_key": token,
+        "agent_key_id": None,
+        "agent_key_expires_at": expires_at,
+        "agent_key_expires_in": 3600,
         "agent_key_reused": False,
         "agent_key_obtained_at": "2026-04-17T22:00:10+00:00",
         "tls": {"insecure": False, "ca_bundle": None},
@@ -1166,7 +1166,8 @@ def test_persist_nous_credentials_writes_both_pool_and_providers(tmp_path, monke
     }))
     monkeypatch.setenv("HERMES_HOME", str(hermes_home))
 
-    entry = persist_nous_credentials(_full_state_fixture())
+    state = _full_state_fixture()
+    entry = persist_nous_credentials(state)
 
     assert entry is not None
     assert entry.provider == "nous"
@@ -1176,17 +1177,17 @@ def test_persist_nous_credentials_writes_both_pool_and_providers(tmp_path, monke
 
     # providers.nous populated with the full state (new behaviour)
     singleton = payload["providers"]["nous"]
-    assert singleton["access_token"] == "access-tok"
+    assert singleton["access_token"] == state["access_token"]
     assert singleton["refresh_token"] == "refresh-tok"
-    assert singleton["agent_key"] == "agent-key-value"
-    assert singleton["agent_key_expires_at"] == "2026-04-18T22:00:00+00:00"
+    assert singleton["agent_key"] == state["agent_key"]
+    assert singleton["agent_key_expires_at"] == state["agent_key_expires_at"]
 
     # credential_pool.nous has exactly one canonical device_code entry
     pool_entries = payload["credential_pool"]["nous"]
     assert len(pool_entries) == 1, pool_entries
     pool_entry = pool_entries[0]
     assert pool_entry["source"] == NOUS_DEVICE_CODE_SOURCE
-    assert pool_entry["agent_key"] == "agent-key-value"
+    assert pool_entry["agent_key"] == state["agent_key"]
     assert pool_entry["inference_base_url"] == "https://inference.example.com/v1"
 
 
@@ -1199,7 +1200,6 @@ def test_persist_nous_credentials_allows_recovery_from_401(tmp_path, monkeypatch
     providers.nous was empty.
     """
     from hermes_cli.auth import (
-        NOUS_INFERENCE_AUTH_MODE_FRESH,
         persist_nous_credentials,
         resolve_nous_runtime_credentials,
     )
@@ -1212,29 +1212,26 @@ def test_persist_nous_credentials_allows_recovery_from_401(tmp_path, monkeypatch
     monkeypatch.setenv("HERMES_HOME", str(hermes_home))
 
     persist_nous_credentials(_full_state_fixture())
+    new_jwt = _invoke_jwt(seconds=3600)
 
     # Stub the network-touching steps so we don't actually contact the
     # portal — the point of this test is that state lookup succeeds and
     # doesn't raise "Hermes is not logged into Nous Portal".
     def _fake_refresh_access_token(*, client, portal_base_url, client_id, refresh_token):
         return {
-            "access_token": "access-new",
+            "access_token": new_jwt,
             "refresh_token": "refresh-new",
-            "expires_in": 900,
+            "expires_in": 3600,
             "token_type": "Bearer",
+            "scope": "inference:invoke",
         }
 
-    def _fake_mint_agent_key(*, client, portal_base_url, access_token, min_ttl_seconds):
-        return _mint_payload(api_key="new-agent-key")
-
     monkeypatch.setattr("hermes_cli.auth._refresh_access_token", _fake_refresh_access_token)
-    monkeypatch.setattr("hermes_cli.auth._mint_agent_key", _fake_mint_agent_key)
 
     creds = resolve_nous_runtime_credentials(
-        min_key_ttl_seconds=300,
-        inference_auth_mode=NOUS_INFERENCE_AUTH_MODE_FRESH,
+        force_refresh=True,
     )
-    assert creds["api_key"] == "new-agent-key"
+    assert creds["api_key"] == new_jwt
 
 
 def test_persist_nous_credentials_idempotent_no_duplicate_pool_entries(tmp_path, monkeypatch):
@@ -1260,21 +1257,23 @@ def test_persist_nous_credentials_idempotent_no_duplicate_pool_entries(tmp_path,
     persist_nous_credentials(first)
 
     second = _full_state_fixture()
-    second["access_token"] = "access-second"
-    second["agent_key"] = "agent-key-second"
+    second_token = _invoke_jwt(seconds=7200)
+    second["access_token"] = second_token
+    second["agent_key"] = second_token
+    second["agent_key_expires_at"] = _future_iso(7200)
     persist_nous_credentials(second)
 
     payload = json.loads((hermes_home / "auth.json").read_text())
 
     # providers.nous reflects the latest write (singleton semantics)
-    assert payload["providers"]["nous"]["access_token"] == "access-second"
-    assert payload["providers"]["nous"]["agent_key"] == "agent-key-second"
+    assert payload["providers"]["nous"]["access_token"] == second_token
+    assert payload["providers"]["nous"]["agent_key"] == second_token
 
     # credential_pool.nous has exactly one entry, carrying the latest agent_key
     pool_entries = payload["credential_pool"]["nous"]
     assert len(pool_entries) == 1, pool_entries
     assert pool_entries[0]["source"] == NOUS_DEVICE_CODE_SOURCE
-    assert pool_entries[0]["agent_key"] == "agent-key-second"
+    assert pool_entries[0]["agent_key"] == second_token
     # And no stray `manual:device_code` / `manual:dashboard_device_code` rows
     assert not any(
         e["source"].startswith("manual:") for e in pool_entries
@@ -1295,13 +1294,14 @@ def test_persist_nous_credentials_reloads_pool_after_singleton_write(tmp_path, m
     }))
     monkeypatch.setenv("HERMES_HOME", str(hermes_home))
 
-    entry = persist_nous_credentials(_full_state_fixture())
+    state = _full_state_fixture()
+    entry = persist_nous_credentials(state)
     assert entry is not None
     assert entry.source == NOUS_DEVICE_CODE_SOURCE
     # Label derived by _seed_from_singletons via label_from_token; we don't
     # assert its exact value, just that the helper returned a real entry.
-    assert entry.access_token == "access-tok"
-    assert entry.agent_key == "agent-key-value"
+    assert entry.access_token == state["access_token"]
+    assert entry.agent_key == state["agent_key"]
 
 
 def test_persist_nous_credentials_embeds_custom_label(tmp_path, monkeypatch):
@@ -1614,7 +1614,8 @@ def test_shared_store_write_and_read_roundtrip(shared_store_env):
         _write_shared_nous_state,
     )
 
-    _write_shared_nous_state(_full_state_fixture())
+    state = _full_state_fixture()
+    _write_shared_nous_state(state)
 
     path = _nous_shared_store_path()
     assert path.is_file()
@@ -1626,7 +1627,7 @@ def test_shared_store_write_and_read_roundtrip(shared_store_env):
     loaded = _read_shared_nous_state()
     assert loaded is not None
     assert loaded["refresh_token"] == "refresh-tok"
-    assert loaded["access_token"] == "access-tok"
+    assert loaded["access_token"] == state["access_token"]
     assert loaded["portal_base_url"] == "https://portal.example.com"
     assert loaded["inference_base_url"] == "https://inference.example.com/v1"
     # Volatile agent_key MUST NOT be persisted to the shared store
@@ -1716,12 +1717,12 @@ def test_try_import_shared_returns_none_on_refresh_failure(
     assert auth_mod._read_shared_nous_state() is None
 
 
-def test_try_import_shared_persists_rotated_token_when_mint_fails(
+def test_try_import_shared_persists_rotated_token_when_jwt_validation_fails(
     shared_store_env, monkeypatch,
 ):
-    """A forced shared import refresh rotates the single-use token before minting.
+    """A forced shared import refresh rotates the single-use token before validation.
 
-    If the later agent-key mint fails, the shared store must still keep the
+    If the later inference-JWT validation fails, the shared store must still keep the
     rotated refresh token; otherwise the next import attempt replays the
     consumed token and trips refresh-token reuse.
     """
@@ -1741,12 +1742,7 @@ def test_try_import_shared_persists_rotated_token_when_mint_fails(
             "token_type": "Bearer",
         }
 
-    def _fake_mint_agent_key(*, client, portal_base_url, access_token, min_ttl_seconds):
-        assert access_token == "access-new"
-        raise AuthError("credits exhausted", provider="nous", code="insufficient_credits")
-
     monkeypatch.setattr(auth_mod, "_refresh_access_token", _fake_refresh_access_token)
-    monkeypatch.setattr(auth_mod, "_mint_agent_key", _fake_mint_agent_key)
 
     assert auth_mod._try_import_shared_nous_state() is None
 
@@ -1757,27 +1753,24 @@ def test_try_import_shared_persists_rotated_token_when_mint_fails(
 
 
 def test_try_import_shared_rehydrates_on_success(shared_store_env, monkeypatch):
-    """Happy path: stored refresh_token is accepted, forced refresh+mint
-    returns a fresh access_token + agent_key, and the returned dict has
+    """Happy path: stored refresh_token is accepted, forced refresh
+    returns a fresh access_token JWT, and the returned dict has
     every field persist_nous_credentials() needs.
     """
     from hermes_cli import auth as auth_mod
 
     auth_mod._write_shared_nous_state(_full_state_fixture())
+    fresh_jwt = _invoke_jwt(seconds=7200)
 
     def _fake_refresh(state, **kwargs):
-        # Simulate portal returning fresh tokens + a new agent_key
+        # Simulate portal returning a fresh inference JWT.
         assert kwargs.get("force_refresh") is True
-        assert (
-            kwargs.get("inference_auth_mode")
-            == auth_mod.NOUS_INFERENCE_AUTH_MODE_FRESH
-        )
         return {
             **state,
-            "access_token": "fresh-access-tok",
+            "access_token": fresh_jwt,
             "refresh_token": "fresh-refresh-tok",  # rotated
-            "agent_key": "new-agent-key",
-            "agent_key_expires_at": "2026-04-19T22:00:00+00:00",
+            "agent_key": fresh_jwt,
+            "agent_key_expires_at": _future_iso(7200),
         }
 
     monkeypatch.setattr(auth_mod, "refresh_nous_oauth_from_state", _fake_refresh)
@@ -1785,9 +1778,9 @@ def test_try_import_shared_rehydrates_on_success(shared_store_env, monkeypatch):
     result = auth_mod._try_import_shared_nous_state()
 
     assert result is not None
-    assert result["access_token"] == "fresh-access-tok"
+    assert result["access_token"] == fresh_jwt
     assert result["refresh_token"] == "fresh-refresh-tok"
-    assert result["agent_key"] == "new-agent-key"
+    assert result["agent_key"] == fresh_jwt
     # Preserved from shared state
     assert result["portal_base_url"] == "https://portal.example.com"
     assert result["client_id"] == "hermes-cli"
@@ -1834,13 +1827,15 @@ def test_shared_store_survives_across_profile_switch(
     assert shared["refresh_token"] == "refresh-tok"
 
     # And a successful rehydrate + persist lands nous into profile B
+    b_jwt = _invoke_jwt(seconds=7200)
+
     def _fake_refresh(state, **kwargs):
         return {
             **state,
-            "access_token": "b-access-tok",
+            "access_token": b_jwt,
             "refresh_token": "b-refresh-tok",
-            "agent_key": "b-agent-key",
-            "agent_key_expires_at": "2026-04-19T22:00:00+00:00",
+            "agent_key": b_jwt,
+            "agent_key_expires_at": _future_iso(7200),
         }
 
     monkeypatch.setattr(auth_mod, "refresh_nous_oauth_from_state", _fake_refresh)
@@ -1880,35 +1875,26 @@ def test_runtime_refresh_uses_newer_shared_token_before_local_stale_token(
     monkeypatch.setenv("HERMES_HOME", str(profile_b))
 
     shared_state = _full_state_fixture()
-    shared_state["access_token"] = "shared-fresh-access"
+    shared_token = _invoke_jwt(seconds=3600)
+    shared_state["access_token"] = shared_token
     shared_state["refresh_token"] = "shared-fresh-refresh"
     shared_state["expires_at"] = "2099-01-01T00:00:00+00:00"
+    shared_state["scope"] = "inference:invoke"
     auth_mod._write_shared_nous_state(shared_state)
 
     def _refresh_should_not_happen(**_kwargs):
         raise AssertionError("stale profile-local refresh token was used")
 
-    minted_with: list[str] = []
-
-    def _fake_mint_agent_key(*, client, portal_base_url, access_token, min_ttl_seconds):
-        minted_with.append(access_token)
-        return _mint_payload(api_key="agent-key-from-shared-token")
-
     monkeypatch.setattr(auth_mod, "_refresh_access_token", _refresh_should_not_happen)
-    monkeypatch.setattr(auth_mod, "_mint_agent_key", _fake_mint_agent_key)
 
-    creds = auth_mod.resolve_nous_runtime_credentials(
-        min_key_ttl_seconds=300,
-        inference_auth_mode=auth_mod.NOUS_INFERENCE_AUTH_MODE_FRESH,
-    )
+    creds = auth_mod.resolve_nous_runtime_credentials()
 
-    assert creds["api_key"] == "agent-key-from-shared-token"
-    assert minted_with == ["shared-fresh-access"]
+    assert creds["api_key"] == shared_token
 
     profile_state = auth_mod.get_provider_auth_state("nous")
     assert profile_state is not None
     assert profile_state["refresh_token"] == "shared-fresh-refresh"
-    assert profile_state["access_token"] == "shared-fresh-access"
+    assert profile_state["access_token"] == shared_token
 
 
 def test_managed_gateway_access_token_uses_newer_shared_token(
diff --git a/tests/hermes_cli/test_auth_provider_gate.py b/tests/hermes_cli/test_auth_provider_gate.py
index f65ae71b856..561009d096c 100644
--- a/tests/hermes_cli/test_auth_provider_gate.py
+++ b/tests/hermes_cli/test_auth_provider_gate.py
@@ -1,7 +1,6 @@
 """Tests for is_provider_explicitly_configured()."""
 
 import json
-import os
 import pytest
 
 
diff --git a/tests/hermes_cli/test_auth_qwen_provider.py b/tests/hermes_cli/test_auth_qwen_provider.py
index a2f58df6b0b..6dd1ed91dda 100644
--- a/tests/hermes_cli/test_auth_qwen_provider.py
+++ b/tests/hermes_cli/test_auth_qwen_provider.py
@@ -6,7 +6,6 @@ resolve_qwen_runtime_credentials, get_qwen_auth_status.
 """
 
 import json
-import os
 import stat
 import time
 from pathlib import Path
diff --git a/tests/hermes_cli/test_azure_foundry_entra.py b/tests/hermes_cli/test_azure_foundry_entra.py
index 6cc2ff0ec97..f35312f0781 100644
--- a/tests/hermes_cli/test_azure_foundry_entra.py
+++ b/tests/hermes_cli/test_azure_foundry_entra.py
@@ -24,7 +24,6 @@ from __future__ import annotations
 import sys
 from types import SimpleNamespace
 from typing import cast
-from unittest.mock import MagicMock, patch
 
 import pytest
 
diff --git a/tests/hermes_cli/test_backup.py b/tests/hermes_cli/test_backup.py
index 097b0b20957..8c0f2a39874 100644
--- a/tests/hermes_cli/test_backup.py
+++ b/tests/hermes_cli/test_backup.py
@@ -1029,7 +1029,6 @@ class TestProfileRestoration:
         args = Namespace(zipfile=str(zip_path), force=True)
 
         # Simulate profiles module not being available
-        import hermes_cli.backup as backup_mod
         original_import = __builtins__.__import__ if hasattr(__builtins__, '__import__') else __import__
 
         def fake_import(name, *a, **kw):
@@ -1680,3 +1679,105 @@ class TestPreMigrationBackup:
             _t.sleep(1.05)
         # Update backup must still be there
         assert update_backup.exists(), "pre-migration rotation wrongly pruned the pre-update backup"
+
+
+# ---------------------------------------------------------------------------
+# Cron jobs auto-restore after silent migration loss (issue #34600)
+# ---------------------------------------------------------------------------
+
+class TestRestoreCronJobsIfEmptied:
+    """`hermes update` config migration can leave cron/jobs.json valid-but-empty,
+    silently dropping every scheduled job. `restore_cron_jobs_if_emptied` is the
+    post-migration safety net that restores from the pre-update snapshot."""
+
+    @staticmethod
+    def _seed_jobs(path: Path, jobs):
+        path.parent.mkdir(parents=True, exist_ok=True)
+        path.write_text(json.dumps({"jobs": jobs}))
+
+    def _make_snapshot(self, hermes_home: Path, label="pre-update"):
+        from hermes_cli.backup import create_quick_snapshot
+        return create_quick_snapshot(label=label, hermes_home=hermes_home, keep=5)
+
+    def test_restores_when_emptied_after_migration(self, tmp_path):
+        from hermes_cli.backup import restore_cron_jobs_if_emptied
+        hermes_home = tmp_path / ".hermes"
+        jobs_path = hermes_home / "cron" / "jobs.json"
+        # Pre-update: 3 real jobs.
+        self._seed_jobs(jobs_path, [{"id": "a"}, {"id": "b"}, {"id": "c"}])
+        snap_id = self._make_snapshot(hermes_home)
+        assert snap_id
+
+        # Migration silently empties the file (valid JSON, zero jobs).
+        jobs_path.write_text(json.dumps({"jobs": []}))
+
+        result = restore_cron_jobs_if_emptied(snap_id, hermes_home=hermes_home)
+        assert result is not None
+        assert result["restored"] is True
+        assert result["job_count"] == 3
+        assert result["snapshot_id"] == snap_id
+
+        # The live file now has the jobs back.
+        restored = json.loads(jobs_path.read_text())
+        assert len(restored["jobs"]) == 3
+
+    def test_noop_when_live_file_still_has_jobs(self, tmp_path):
+        from hermes_cli.backup import restore_cron_jobs_if_emptied
+        hermes_home = tmp_path / ".hermes"
+        jobs_path = hermes_home / "cron" / "jobs.json"
+        self._seed_jobs(jobs_path, [{"id": "a"}, {"id": "b"}])
+        snap_id = self._make_snapshot(hermes_home)
+
+        # Healthy path: file unchanged after update.
+        result = restore_cron_jobs_if_emptied(snap_id, hermes_home=hermes_home)
+        assert result is None
+
+    def test_noop_when_snapshot_had_no_jobs(self, tmp_path):
+        from hermes_cli.backup import restore_cron_jobs_if_emptied
+        hermes_home = tmp_path / ".hermes"
+        jobs_path = hermes_home / "cron" / "jobs.json"
+        # Pre-update genuinely had zero jobs; current is also empty.
+        self._seed_jobs(jobs_path, [])
+        snap_id = self._make_snapshot(hermes_home)
+        jobs_path.write_text(json.dumps({"jobs": []}))
+
+        result = restore_cron_jobs_if_emptied(snap_id, hermes_home=hermes_home)
+        assert result is None
+
+    def test_noop_when_live_file_unreadable(self, tmp_path):
+        """An unparseable live file is left alone — that's a different failure
+        mode the user should see, not silently overwrite."""
+        from hermes_cli.backup import restore_cron_jobs_if_emptied
+        hermes_home = tmp_path / ".hermes"
+        jobs_path = hermes_home / "cron" / "jobs.json"
+        self._seed_jobs(jobs_path, [{"id": "a"}])
+        snap_id = self._make_snapshot(hermes_home)
+        jobs_path.write_text("{ this is not valid json")
+
+        result = restore_cron_jobs_if_emptied(snap_id, hermes_home=hermes_home)
+        assert result is None
+        # File left untouched.
+        assert jobs_path.read_text() == "{ this is not valid json"
+
+    def test_noop_when_snapshot_id_missing(self, tmp_path):
+        from hermes_cli.backup import restore_cron_jobs_if_emptied
+        hermes_home = tmp_path / ".hermes"
+        jobs_path = hermes_home / "cron" / "jobs.json"
+        self._seed_jobs(jobs_path, [])
+        assert restore_cron_jobs_if_emptied(None, hermes_home=hermes_home) is None
+        assert restore_cron_jobs_if_emptied("", hermes_home=hermes_home) is None
+
+    def test_restores_legacy_bare_list_snapshot_shape(self, tmp_path):
+        """A legacy snapshot storing a bare JSON list (not {"jobs": [...]}) is
+        still counted and restored."""
+        from hermes_cli.backup import restore_cron_jobs_if_emptied
+        hermes_home = tmp_path / ".hermes"
+        jobs_path = hermes_home / "cron" / "jobs.json"
+        jobs_path.parent.mkdir(parents=True, exist_ok=True)
+        jobs_path.write_text(json.dumps([{"id": "a"}, {"id": "b"}]))
+        snap_id = self._make_snapshot(hermes_home)
+
+        jobs_path.write_text(json.dumps({"jobs": []}))
+        result = restore_cron_jobs_if_emptied(snap_id, hermes_home=hermes_home)
+        assert result is not None
+        assert result["job_count"] == 2
diff --git a/tests/hermes_cli/test_banner_git_state.py b/tests/hermes_cli/test_banner_git_state.py
index 6556145e8f1..17e9aea7f71 100644
--- a/tests/hermes_cli/test_banner_git_state.py
+++ b/tests/hermes_cli/test_banner_git_state.py
@@ -61,3 +61,56 @@ def test_get_git_banner_state_reads_origin_and_head(tmp_path):
         state = banner.get_git_banner_state(repo_dir)
 
     assert state == {"upstream": "b2f477a3", "local": "af8aad31", "ahead": 3}
+
+
+def test_get_git_banner_state_falls_back_to_build_sha_when_no_repo():
+    """Docker image case: no .git checkout — baked build SHA fills the gap.
+
+    ``_resolve_repo_dir`` returns None when neither the running code's
+    parent nor ``$HERMES_HOME/hermes-agent/`` is a git repo (the canonical
+    case inside the published container, where .git is dockerignored).
+    The banner should still report the build SHA so support bug reports
+    can identify the running commit.
+    """
+    from hermes_cli import banner
+
+    with patch.object(banner, "_resolve_repo_dir", return_value=None), \
+         patch("hermes_cli.build_info.get_build_sha", return_value="abcdef12"):
+        state = banner.get_git_banner_state()
+
+    assert state == {"upstream": "abcdef12", "local": "abcdef12", "ahead": 0}
+
+
+def test_get_git_banner_state_returns_none_when_no_repo_and_no_build_sha():
+    """Pip-installed wheel with neither git checkout nor baked SHA → None.
+
+    Banner correctly omits the upstream/local suffix in this case.
+    """
+    from hermes_cli import banner
+
+    with patch.object(banner, "_resolve_repo_dir", return_value=None), \
+         patch("hermes_cli.build_info.get_build_sha", return_value=None):
+        state = banner.get_git_banner_state()
+
+    assert state is None
+
+
+def test_get_git_banner_state_falls_back_when_live_git_returns_nothing(tmp_path):
+    """Shallow clone without origin/main → still surface build SHA if baked.
+
+    Some install paths (e.g. ``git clone --depth 1`` without a remote) have
+    a ``.git`` directory but ``git rev-parse origin/main`` fails.  When that
+    happens AND a baked SHA exists, return the baked one instead of None.
+    """
+    from hermes_cli import banner
+
+    repo_dir = tmp_path / "repo"
+    (repo_dir / ".git").mkdir(parents=True)
+
+    # All git invocations fail (returncode=1, empty stdout).
+    failed = MagicMock(returncode=1, stdout="")
+    with patch("hermes_cli.banner.subprocess.run", return_value=failed), \
+         patch("hermes_cli.build_info.get_build_sha", return_value="cafef00d"):
+        state = banner.get_git_banner_state(repo_dir)
+
+    assert state == {"upstream": "cafef00d", "local": "cafef00d", "ahead": 0}
diff --git a/tests/hermes_cli/test_banner_skills.py b/tests/hermes_cli/test_banner_skills.py
index 1006fcc8671..82518caa969 100644
--- a/tests/hermes_cli/test_banner_skills.py
+++ b/tests/hermes_cli/test_banner_skills.py
@@ -2,7 +2,6 @@
 
 from unittest.mock import patch
 
-import pytest
 
 
 _MOCK_SKILLS = [
diff --git a/tests/hermes_cli/test_bedrock_model_picker.py b/tests/hermes_cli/test_bedrock_model_picker.py
index 70335be2186..0020341d491 100644
--- a/tests/hermes_cli/test_bedrock_model_picker.py
+++ b/tests/hermes_cli/test_bedrock_model_picker.py
@@ -16,12 +16,10 @@ Covers the three paths changed by fix/bedrock-provider-model-ids-live-discovery:
 All Bedrock API calls are mocked — no real AWS credentials needed.
 """
 
-import os
 from contextlib import contextmanager
 from types import ModuleType
 from unittest.mock import MagicMock, patch
 
-import pytest
 
 
 # ---------------------------------------------------------------------------
@@ -95,7 +93,7 @@ class TestProviderModelIdsBedrock:
 
     def test_falls_back_to_static_list_when_discovery_empty(self, monkeypatch):
         """When discover_bedrock_models() returns [], fall back to curated static list."""
-        from hermes_cli.models import _PROVIDER_MODELS, provider_model_ids
+        from hermes_cli.models import provider_model_ids
 
         with patch("agent.bedrock_adapter.discover_bedrock_models", return_value=[]), \
              patch("agent.bedrock_adapter.resolve_bedrock_region", return_value="eu-central-1"):
diff --git a/tests/hermes_cli/test_build_info.py b/tests/hermes_cli/test_build_info.py
new file mode 100644
index 00000000000..994c13e1dcf
--- /dev/null
+++ b/tests/hermes_cli/test_build_info.py
@@ -0,0 +1,78 @@
+"""Tests for hermes_cli.build_info — baked-in build SHA resolution.
+
+The build SHA is written by the Dockerfile's ``HERMES_GIT_SHA`` build-arg
+into ``<project_root>/.hermes_build_sha``.  These tests cover the read-side
+helper: missing file, malformed file, truncation, and error tolerance.
+"""
+
+from pathlib import Path
+from unittest.mock import patch
+
+
+def test_get_build_sha_returns_none_when_file_absent(tmp_path):
+    """Source installs: no file present → None, callers fall back to git."""
+    from hermes_cli import build_info
+
+    missing = tmp_path / ".hermes_build_sha"  # never created
+
+    with patch.object(build_info, "_BUILD_SHA_FILE", missing):
+        assert build_info.get_build_sha() is None
+
+
+def test_get_build_sha_reads_baked_file(tmp_path):
+    """Docker image case: file exists with full 40-char SHA → truncated to 8."""
+    from hermes_cli import build_info
+
+    sha_file = tmp_path / ".hermes_build_sha"
+    sha_file.write_text("abcdef1234567890abcdef1234567890abcdef12\n")
+
+    with patch.object(build_info, "_BUILD_SHA_FILE", sha_file):
+        assert build_info.get_build_sha() == "abcdef12"
+
+
+def test_get_build_sha_respects_short_argument(tmp_path):
+    """``short=N`` truncates to N chars; ``short<=0`` returns full SHA."""
+    from hermes_cli import build_info
+
+    sha_file = tmp_path / ".hermes_build_sha"
+    full_sha = "abcdef1234567890abcdef1234567890abcdef12"
+    sha_file.write_text(full_sha + "\n")
+
+    with patch.object(build_info, "_BUILD_SHA_FILE", sha_file):
+        assert build_info.get_build_sha(short=12) == "abcdef123456"
+        assert build_info.get_build_sha(short=0) == full_sha
+        assert build_info.get_build_sha(short=-1) == full_sha
+
+
+def test_get_build_sha_strips_whitespace(tmp_path):
+    """The Dockerfile uses ``printf '%s\\n'`` — strip the trailing newline."""
+    from hermes_cli import build_info
+
+    sha_file = tmp_path / ".hermes_build_sha"
+    sha_file.write_text("  abcdef1234567890\n\n")
+
+    with patch.object(build_info, "_BUILD_SHA_FILE", sha_file):
+        assert build_info.get_build_sha() == "abcdef12"
+
+
+def test_get_build_sha_returns_none_for_empty_file(tmp_path):
+    """A whitespace-only file is treated as absent."""
+    from hermes_cli import build_info
+
+    sha_file = tmp_path / ".hermes_build_sha"
+    sha_file.write_text("   \n\n")
+
+    with patch.object(build_info, "_BUILD_SHA_FILE", sha_file):
+        assert build_info.get_build_sha() is None
+
+
+def test_get_build_sha_swallows_read_errors(tmp_path):
+    """Any IO exception from the read returns None — never raises."""
+    from hermes_cli import build_info
+
+    sha_file = tmp_path / ".hermes_build_sha"
+    sha_file.write_text("abcdef1234567890\n")
+
+    with patch.object(build_info, "_BUILD_SHA_FILE", sha_file), \
+         patch.object(Path, "read_text", side_effect=OSError("boom")):
+        assert build_info.get_build_sha() is None
diff --git a/tests/hermes_cli/test_bundles.py b/tests/hermes_cli/test_bundles.py
index b089530ca98..8cd3a66a7aa 100644
--- a/tests/hermes_cli/test_bundles.py
+++ b/tests/hermes_cli/test_bundles.py
@@ -1,8 +1,6 @@
 """Tests for hermes_cli/bundles.py — the `hermes bundles` CLI subcommand."""
 
 import argparse
-import sys
-from pathlib import Path
 
 import pytest
 
diff --git a/tests/hermes_cli/test_clear_stale_base_url.py b/tests/hermes_cli/test_clear_stale_base_url.py
index 09f721bb7f1..b174cd32bfc 100644
--- a/tests/hermes_cli/test_clear_stale_base_url.py
+++ b/tests/hermes_cli/test_clear_stale_base_url.py
@@ -2,7 +2,6 @@
 
 from __future__ import annotations
 
-from unittest.mock import patch
 
 from hermes_cli.config import load_config, save_config, save_env_value, get_env_value
 
diff --git a/tests/hermes_cli/test_cmd_update.py b/tests/hermes_cli/test_cmd_update.py
index 201fe7b3178..4f2875235f1 100644
--- a/tests/hermes_cli/test_cmd_update.py
+++ b/tests/hermes_cli/test_cmd_update.py
@@ -133,11 +133,10 @@ class TestCmdUpdateBranchFallback:
         captured = capsys.readouterr()
         assert "Already up to date!" in captured.out
 
-    @patch("hermes_cli.main._web_ui_build_needed", return_value=True)
     @patch("shutil.which")
     @patch("subprocess.run")
     def test_update_refreshes_repo_and_tui_node_dependencies(
-        self, mock_run, mock_which, _mock_web_ui_build_needed, mock_args
+        self, mock_run, mock_which, mock_args
     ):
         from hermes_cli import main as hm
 
@@ -145,7 +144,13 @@ class TestCmdUpdateBranchFallback:
         mock_run.side_effect = _make_run_side_effect(
             branch="main", verify_ok=True, commit_count="1"
         )
-        with patch.object(hm, "_is_termux_env", return_value=False):
+        # The web UI build runs through _run_with_idle_timeout now (issue
+        # #33788) so it no longer appears in subprocess.run's call list.
+        # Mock it so the test doesn't actually shell out to ``tsc``.
+        import subprocess as _subprocess
+        build_ok = _subprocess.CompletedProcess([], 0, stdout="", stderr="")
+        with patch.object(hm, "_is_termux_env", return_value=False), \
+             patch.object(hm, "_run_with_idle_timeout", return_value=build_ok) as mock_idle:
             cmd_update(mock_args)
 
         npm_calls = [
@@ -154,35 +159,43 @@ class TestCmdUpdateBranchFallback:
             if call.args and call.args[0][0] == "/usr/bin/npm"
         ]
 
-        # cmd_update runs npm commands in three locations:
-        #   1. repo root  — slash-command / TUI bridge deps
-        #   2. ui-tui/    — Ink TUI deps
-        #   3. apps/dashboard/ — install + "npm run build" for the web frontend
-        full_flags = [
+        # cmd_update runs npm commands in four locations:
+        #   1. repo root  — slash-command / TUI bridge deps  (subprocess.run)
+        #   2. ui-tui/    — Ink TUI deps                     (subprocess.run)
+        #   3. web/       — npm install                      (subprocess.run)
+        #   4. web/       — npm run build                    (_run_with_idle_timeout)
+        #
+        # Repo-root and ui-tui installs intentionally omit `--silent` and run
+        # without `capture_output` so optional postinstall scripts (e.g.
+        # `@askjo/camofox-browser`'s browser-binary fetch) print progress —
+        # otherwise long downloads look like a hang (#18840).  The web/ install
+        # keeps `--silent` because its build step is short and noisy.
+        update_flags = [
             "/usr/bin/npm",
             "ci",
-            "--silent",
-            "--no-fund",
-            "--no-audit",
-            "--progress=false",
-            "--workspaces=false",
-        ]
-        app_flags = [
-            "/usr/bin/npm",
-            "ci",
-            "--silent",
             "--no-fund",
             "--no-audit",
             "--progress=false",
         ]
+        # Repo root additionally passes --workspaces=false so npm does not
+        # recursively install every apps/* workspace (desktop, shared).
+        repo_flags = [*update_flags, "--workspaces=false"]
         assert npm_calls[:2] == [
-            (full_flags, PROJECT_ROOT),
-            (app_flags, PROJECT_ROOT / "ui-tui"),
-        ]
-        assert npm_calls[2:] == [
-            (["/usr/bin/npm", "ci", "--silent"], PROJECT_ROOT / "apps" / "dashboard"),
-            (["/usr/bin/npm", "run", "build"], PROJECT_ROOT / "apps" / "dashboard"),
+            (repo_flags, PROJECT_ROOT),
+            (update_flags, PROJECT_ROOT / "ui-tui"),
         ]
+        if len(npm_calls) > 2:
+            # Only the web/ install is left in subprocess.run; the build moved
+            # to _run_with_idle_timeout to make Vite progress visible (#33788).
+            assert npm_calls[2:] == [
+                (["/usr/bin/npm", "ci", "--silent"], PROJECT_ROOT / "web"),
+            ]
+
+        # The web UI build itself went through the streaming helper.
+        mock_idle.assert_called_once()
+        idle_args, idle_kwargs = mock_idle.call_args
+        assert idle_args[0] == ["/usr/bin/npm", "run", "build"]
+        assert idle_kwargs["cwd"] == PROJECT_ROOT / "web"
 
         # Regression for #18840: repo root + ui-tui installs must stream
         # output (capture_output=False) so postinstall progress is visible
diff --git a/tests/hermes_cli/test_cmd_update_docker.py b/tests/hermes_cli/test_cmd_update_docker.py
new file mode 100644
index 00000000000..c56a3ffcfda
--- /dev/null
+++ b/tests/hermes_cli/test_cmd_update_docker.py
@@ -0,0 +1,185 @@
+"""Tests for ``hermes update`` / ``--check`` inside the Docker container.
+
+Background: ``.dockerignore`` excludes ``.git``, so the existing git-pull
+update path can never succeed inside the published image.  Before this
+fix, ``hermes update`` would fall through to ``"✗ Not a git repository.
+Please reinstall: curl ... install.sh"`` — that script installs a *new*
+host-side Hermes, not an update to the running container, so the message
+was actively misleading.
+
+These tests pin the new behaviour: when ``detect_install_method`` reports
+``"docker"`` (stamped by ``docker/stage2-hook.sh``), both the apply path
+(``cmd_update``) and the check path (``_cmd_update_check``) print the
+``docker pull`` guidance from ``format_docker_update_message`` and exit
+with status 1, without running ``git fetch`` / ``subprocess.run``.
+"""
+
+from __future__ import annotations
+
+from types import SimpleNamespace
+from unittest.mock import patch
+
+import pytest
+
+from hermes_cli.main import _cmd_update_check, cmd_update
+
+
+# ---------- cmd_update (apply path) ----------
+
+
+@patch("hermes_cli.config.is_managed", return_value=False)
+@patch("hermes_cli.config.detect_install_method", return_value="docker")
+@patch("subprocess.run")
+def test_cmd_update_in_docker_prints_guidance_and_exits(
+    mock_run, _mock_method, _mock_managed, capsys
+):
+    """``hermes update`` inside Docker → friendly message + exit 1, no git calls."""
+    with pytest.raises(SystemExit) as excinfo:
+        cmd_update(SimpleNamespace(check=False))
+
+    assert excinfo.value.code == 1
+    out = capsys.readouterr().out
+    # Spot-check the key guidance — exhaustive wording is locked in by the
+    # config-module test below to keep these CLI tests resilient to copy edits.
+    assert "doesn't apply inside the Docker container" in out
+    assert "docker pull nousresearch/hermes-agent:latest" in out
+
+    # No git invocations — the early-return must beat every git command.
+    git_calls = [c for c in mock_run.call_args_list if c.args and c.args[0] and "git" in str(c.args[0][0])]
+    assert git_calls == [], f"expected no git calls, got: {git_calls}"
+
+
+@patch("hermes_cli.config.is_managed", return_value=False)
+@patch("hermes_cli.config.detect_install_method", return_value="docker")
+@patch("subprocess.run")
+def test_cmd_update_check_in_docker_prints_guidance_and_exits(
+    mock_run, _mock_method, _mock_managed, capsys
+):
+    """``hermes update --check`` inside Docker → same message + exit 1, no fetch."""
+    with pytest.raises(SystemExit) as excinfo:
+        cmd_update(SimpleNamespace(check=True, branch=None))
+
+    assert excinfo.value.code == 1
+    out = capsys.readouterr().out
+    assert "doesn't apply inside the Docker container" in out
+    assert "docker pull nousresearch/hermes-agent:latest" in out
+
+    git_calls = [c for c in mock_run.call_args_list if c.args and c.args[0] and "git" in str(c.args[0][0])]
+    assert git_calls == [], f"expected no git calls, got: {git_calls}"
+
+
+@patch("hermes_cli.config.is_managed", return_value=False)
+@patch("hermes_cli.config.detect_install_method", return_value="docker")
+@patch("subprocess.run")
+def test_cmd_update_in_docker_ignores_yes_and_force(
+    mock_run, _mock_method, _mock_managed, capsys
+):
+    """``--yes`` / ``--force`` don't bypass the Docker bail-out.
+
+    The point of the bail-out is "git pull will never work here", so even
+    a user trying to barge through with ``--yes --force`` should see the
+    docker-pull guidance.
+    """
+    with pytest.raises(SystemExit):
+        cmd_update(SimpleNamespace(check=False, yes=True, force=True))
+
+    assert "docker pull" in capsys.readouterr().out
+    git_calls = [c for c in mock_run.call_args_list if c.args and c.args[0] and "git" in str(c.args[0][0])]
+    assert git_calls == []
+
+
+# ---------- _cmd_update_check (check path, direct entry) ----------
+
+
+@patch("hermes_cli.config.detect_install_method", return_value="docker")
+@patch("subprocess.run")
+def test_cmd_update_check_direct_in_docker(mock_run, _mock_method, capsys):
+    """Calling ``_cmd_update_check`` directly (no apply path) also bails."""
+    with pytest.raises(SystemExit) as excinfo:
+        _cmd_update_check()
+
+    assert excinfo.value.code == 1
+    assert "docker pull" in capsys.readouterr().out
+    git_calls = [c for c in mock_run.call_args_list if c.args and c.args[0] and "git" in str(c.args[0][0])]
+    assert git_calls == []
+
+
+# ---------- Non-Docker installs unaffected ----------
+
+
+@patch("hermes_cli.config.is_managed", return_value=False)
+@patch("hermes_cli.config.detect_install_method", return_value="git")
+@patch(
+    "subprocess.run",
+    return_value=SimpleNamespace(returncode=0, stdout="0\n", stderr=""),
+)
+def test_cmd_update_on_git_install_does_not_print_docker_message(
+    _mock_run, _mock_method, _mock_managed, capsys
+):
+    """Source/git installs MUST NOT hit the Docker branch.
+
+    Regression guard: an over-eager detection refactor could accidentally
+    route git users through the docker-pull message.  We swallow
+    SystemExit / unrelated errors from the rest of the update flow —
+    those don't matter for this assertion; what matters is that the
+    docker text is absent.
+
+    ``subprocess.run`` is mocked because the git path will otherwise shell
+    out to ``git fetch upstream`` / ``git fetch origin`` — on CI runners
+    with no ``upstream`` remote configured this can hang past the 30s
+    pytest-timeout depending on git's network behaviour.  The stub
+    returns a successful CompletedProcess-shaped object with ``"0\\n"``
+    stdout, which both keeps the flow shell-free AND parses cleanly as
+    the "0 commits behind" rev-list output the check path later parses
+    via ``int(rev_result.stdout.strip())``.
+    """
+    try:
+        cmd_update(SimpleNamespace(check=True, branch=None))
+    except (SystemExit, Exception):
+        # Update flow may exit for unrelated reasons in a stubbed env —
+        # that's fine; we only care about the banner not appearing.
+        pass
+
+    assert "doesn't apply inside the Docker container" not in capsys.readouterr().out
+
+
+@patch("hermes_cli.config.detect_install_method", return_value="pip")
+@patch("hermes_cli.banner.check_via_pypi", return_value=0)
+def test_cmd_update_check_on_pip_install_still_uses_pypi(
+    _mock_pypi, _mock_method, capsys
+):
+    """PyPI installs route to PyPI check, not the Docker bail-out."""
+    _cmd_update_check()
+
+    out = capsys.readouterr().out
+    assert "Already up to date" in out
+    assert "doesn't apply inside the Docker container" not in out
+
+
+# ---------- format_docker_update_message — content lock ----------
+
+
+def test_format_docker_update_message_contents():
+    """Lock in the high-value content of the Docker update message.
+
+    These are the bits a user actually needs to act on; if any of them
+    disappear in a copy edit, the message has lost its value.  Specific
+    wording around them is free to evolve (we don't assert full text).
+    """
+    from hermes_cli.config import format_docker_update_message
+
+    msg = format_docker_update_message()
+
+    # Primary command — the entire reason this message exists.
+    assert "docker pull nousresearch/hermes-agent:latest" in msg
+
+    # The four key concepts the message must cover:
+    assert "restart" in msg.lower(), "must explain that a restart is required"
+    assert "--version" in msg, "must show how to verify the new version"
+    assert ":latest" in msg, "must mention tag pinning caveat"
+    assert "HERMES_HOME" in msg or "/opt/data" in msg, (
+        "must address config persistence across upgrades"
+    )
+
+    # Acknowledges that forks exist (build-your-own-image escape hatch).
+    assert "fork" in msg.lower() or "Dockerfile" in msg
diff --git a/tests/hermes_cli/test_coalesce_session_args.py b/tests/hermes_cli/test_coalesce_session_args.py
index 32866dd5ee1..9971bb51bb6 100644
--- a/tests/hermes_cli/test_coalesce_session_args.py
+++ b/tests/hermes_cli/test_coalesce_session_args.py
@@ -1,6 +1,5 @@
 """Tests for _coalesce_session_name_args — multi-word session name merging."""
 
-import pytest
 from hermes_cli.main import _coalesce_session_name_args
 
 
diff --git a/tests/hermes_cli/test_codex_cli_model_picker.py b/tests/hermes_cli/test_codex_cli_model_picker.py
index 4edbef2dea0..3968437a856 100644
--- a/tests/hermes_cli/test_codex_cli_model_picker.py
+++ b/tests/hermes_cli/test_codex_cli_model_picker.py
@@ -13,11 +13,8 @@ existing Codex CLI tokens via `hermes auth openai-codex`. The old
 
 import base64
 import json
-import os
-import sys
 import time
 from pathlib import Path
-from unittest.mock import patch
 
 import pytest
 
diff --git a/tests/hermes_cli/test_codex_runtime_plugin_migration.py b/tests/hermes_cli/test_codex_runtime_plugin_migration.py
index ebdc9f9ae6b..fc6df86c852 100644
--- a/tests/hermes_cli/test_codex_runtime_plugin_migration.py
+++ b/tests/hermes_cli/test_codex_runtime_plugin_migration.py
@@ -2,14 +2,12 @@
 
 from __future__ import annotations
 
-from pathlib import Path
 
 import pytest
 
 from hermes_cli.codex_runtime_plugin_migration import (
     MIGRATION_MARKER,
     MIGRATION_END_MARKER,
-    MigrationReport,
     _build_hermes_tools_mcp_entry,
     _format_toml_value,
     _looks_like_test_tempdir,
diff --git a/tests/hermes_cli/test_commands.py b/tests/hermes_cli/test_commands.py
index 7324adbe430..b6e11e4c517 100644
--- a/tests/hermes_cli/test_commands.py
+++ b/tests/hermes_cli/test_commands.py
@@ -1003,7 +1003,7 @@ class TestTelegramMenuCommands:
 
     def test_excludes_telegram_disabled_skills(self, tmp_path, monkeypatch):
         """Skills disabled for telegram should not appear in the menu."""
-        from unittest.mock import patch, MagicMock
+        from unittest.mock import patch
 
         # Set up a config with a telegram-specific disabled list
         config_file = tmp_path / "config.yaml"
diff --git a/tests/hermes_cli/test_config.py b/tests/hermes_cli/test_config.py
index d86017f2211..eb4827a4174 100644
--- a/tests/hermes_cli/test_config.py
+++ b/tests/hermes_cli/test_config.py
@@ -2,7 +2,7 @@
 
 import os
 from pathlib import Path
-from unittest.mock import patch, MagicMock
+from unittest.mock import patch
 
 import pytest
 import yaml
diff --git a/tests/hermes_cli/test_config_env_expansion.py b/tests/hermes_cli/test_config_env_expansion.py
index 4de3480f734..435a4766878 100644
--- a/tests/hermes_cli/test_config_env_expansion.py
+++ b/tests/hermes_cli/test_config_env_expansion.py
@@ -1,9 +1,7 @@
 """Tests for ${ENV_VAR} substitution in config.yaml values."""
 
-import os
 import pytest
 from hermes_cli.config import _expand_env_vars, load_config
-from unittest.mock import patch as mock_patch
 
 
 class TestExpandEnvVars:
diff --git a/tests/hermes_cli/test_config_validation.py b/tests/hermes_cli/test_config_validation.py
index 7209e638f9a..f4423f5d7d1 100644
--- a/tests/hermes_cli/test_config_validation.py
+++ b/tests/hermes_cli/test_config_validation.py
@@ -1,6 +1,5 @@
 """Tests for config.yaml structure validation (validate_config_structure)."""
 
-import pytest
 
 from hermes_cli.config import validate_config_structure, ConfigIssue
 
diff --git a/tests/hermes_cli/test_copilot_auth.py b/tests/hermes_cli/test_copilot_auth.py
index 5c8fccf936a..3d0b0bdeb72 100644
--- a/tests/hermes_cli/test_copilot_auth.py
+++ b/tests/hermes_cli/test_copilot_auth.py
@@ -1,8 +1,7 @@
 """Tests for hermes_cli.copilot_auth — Copilot token validation and resolution."""
 
-import os
 import pytest
-from unittest.mock import patch, MagicMock
+from unittest.mock import patch
 
 
 class TestTokenValidation:
diff --git a/tests/hermes_cli/test_curator_archive_prune.py b/tests/hermes_cli/test_curator_archive_prune.py
index 1ab28fb1778..ef8b914a8bf 100644
--- a/tests/hermes_cli/test_curator_archive_prune.py
+++ b/tests/hermes_cli/test_curator_archive_prune.py
@@ -12,12 +12,8 @@ Covers:
 
 from __future__ import annotations
 
-import io
-from contextlib import redirect_stdout, redirect_stderr
 from types import SimpleNamespace
-from unittest.mock import patch
 
-import pytest
 
 
 def _ns(**kwargs):
diff --git a/tests/hermes_cli/test_curses_color_compat.py b/tests/hermes_cli/test_curses_color_compat.py
index c7509cc965f..2416ded1230 100644
--- a/tests/hermes_cli/test_curses_color_compat.py
+++ b/tests/hermes_cli/test_curses_color_compat.py
@@ -12,9 +12,8 @@ clamps with ``min(8, curses.COLORS - 1)``.
 import curses
 import re
 from pathlib import Path
-from unittest.mock import patch, MagicMock, call
+from unittest.mock import patch, MagicMock
 
-import pytest
 
 
 # Path to the source files under test
diff --git a/tests/hermes_cli/test_custom_provider_model_switch.py b/tests/hermes_cli/test_custom_provider_model_switch.py
index 1c14b848439..0f3a76a1ab9 100644
--- a/tests/hermes_cli/test_custom_provider_model_switch.py
+++ b/tests/hermes_cli/test_custom_provider_model_switch.py
@@ -6,8 +6,7 @@ immediately when provider_info had a saved ``model`` field, making it
 impossible to switch models on multi-model endpoints.
 """
 
-import os
-from unittest.mock import patch, MagicMock, call
+from unittest.mock import patch
 
 import pytest
 
diff --git a/tests/hermes_cli/test_dashboard_auth_401_reauth.py b/tests/hermes_cli/test_dashboard_auth_401_reauth.py
index c866fad8252..28d410dc3ac 100644
--- a/tests/hermes_cli/test_dashboard_auth_401_reauth.py
+++ b/tests/hermes_cli/test_dashboard_auth_401_reauth.py
@@ -131,8 +131,13 @@ class TestRefreshTokenCookieDeprecation:
 
 
 class TestApi401Envelope:
+    # NOTE: probe a gated route (``/api/sessions``) here rather than
+    # ``/api/status`` — status is in the shared ``PUBLIC_API_PATHS``
+    # allowlist (portal liveness probe) so it would 200 even without a
+    # cookie and never exercise the 401-envelope code path.
+
     def test_no_cookie_returns_unauthenticated_envelope(self, gated_app):
-        r = gated_app.get("/api/status")
+        r = gated_app.get("/api/sessions")
         assert r.status_code == 401
         body = r.json()
         assert body["error"] == "unauthenticated"
@@ -141,7 +146,7 @@ class TestApi401Envelope:
 
     def test_invalid_cookie_returns_session_expired_envelope(self, gated_app):
         gated_app.cookies.set(SESSION_AT_COOKIE, "garbage")
-        r = gated_app.get("/api/status")
+        r = gated_app.get("/api/sessions")
         assert r.status_code == 401
         body = r.json()
         assert body["error"] == "session_expired"
@@ -151,7 +156,7 @@ class TestApi401Envelope:
         """Dead-cookie cleanup — Phase 6 requirement so the browser
         doesn't keep replaying the stale token on every request."""
         gated_app.cookies.set(SESSION_AT_COOKIE, "garbage")
-        r = gated_app.get("/api/status")
+        r = gated_app.get("/api/sessions")
         set_cookies = r.headers.get_list("set-cookie")
         assert any(
             c.startswith(f"{SESSION_AT_COOKIE}=") and "Max-Age=0" in c
diff --git a/tests/hermes_cli/test_dashboard_auth_cookies.py b/tests/hermes_cli/test_dashboard_auth_cookies.py
index 24d6f4b9168..7109b7b7099 100644
--- a/tests/hermes_cli/test_dashboard_auth_cookies.py
+++ b/tests/hermes_cli/test_dashboard_auth_cookies.py
@@ -1,7 +1,6 @@
 """Tests for the dashboard-auth cookie helpers."""
 from __future__ import annotations
 
-import pytest
 from fastapi import FastAPI
 from fastapi.responses import Response
 from fastapi.testclient import TestClient
diff --git a/tests/hermes_cli/test_dashboard_auth_middleware.py b/tests/hermes_cli/test_dashboard_auth_middleware.py
index 011767604f4..cbbcc6d287f 100644
--- a/tests/hermes_cli/test_dashboard_auth_middleware.py
+++ b/tests/hermes_cli/test_dashboard_auth_middleware.py
@@ -56,10 +56,61 @@ def gated_app():
 # ---------------------------------------------------------------------------
 
 
-def test_gated_status_now_requires_auth(gated_app):
-    """When gate is on, /api/status is NOT public — login bootstrap uses /api/auth/providers."""
+def test_gated_status_is_public(gated_app):
+    """``/api/status`` MUST be public under the OAuth gate.
+
+    Regression guard for the wildcard-subdomain rollout: NAS
+    (``fly-provider.ts`` ``getInstanceRuntimeStatus``) hits
+    ``/api/status`` without a cookie as its sole liveness probe. A 401
+    here surfaces every healthy agent as STARTING/down in the portal
+    UI. The endpoint returns only version + gateway/auth-gate metadata
+    (no user data, no session content), so it stays in the shared
+    ``PUBLIC_API_PATHS`` allowlist under both the legacy ``_SESSION_TOKEN``
+    gate and the OAuth gate.
+
+    The body also reports the gate's shape (``auth_required``,
+    ``auth_providers``) so the SPA's StatusPage and external monitors
+    can distinguish loopback / gated / no-providers without a separate
+    round trip.
+    """
     r = gated_app.get("/api/status")
-    assert r.status_code == 401
+    assert r.status_code == 200, (
+        f"Expected 200, got {r.status_code}: {r.text}"
+    )
+    body = r.json()
+    assert body["auth_required"] is True
+    assert "version" in body
+    assert "gateway_state" in body
+
+
+@pytest.mark.parametrize("path", [
+    "/api/config/defaults",
+    "/api/config/schema",
+    "/api/model/info",
+    "/api/dashboard/themes",
+    "/api/dashboard/plugins",
+])
+def test_other_public_api_paths_are_public_under_gate(gated_app, path):
+    """The remaining ``PUBLIC_API_PATHS`` entries must also bypass the
+    gate. They're documented as non-sensitive read-only endpoints that
+    the SPA pre-loads before login (themes, config schema, model
+    metadata). A 401 / 302-to-login here would block the dashboard
+    shell from rendering pre-auth.
+
+    Accept any non-auth-failure status: 200 when the route succeeds,
+    or any route-specific error (e.g. 400 / 404 / 500 from a missing
+    dependency) — but NEVER 401, and NEVER a 302 to ``/login``.
+    """
+    r = gated_app.get(path, follow_redirects=False)
+    assert r.status_code != 401, (
+        f"{path} returned 401 under the OAuth gate — should be public"
+    )
+    if r.status_code == 302:
+        location = r.headers.get("location", "")
+        assert "/login" not in location, (
+            f"{path} redirected to {location} — should be public, "
+            "not bounced to /login"
+        )
 
 
 def test_gated_html_redirects_to_login(gated_app):
@@ -98,7 +149,7 @@ def test_gated_static_asset_path_is_public(gated_app):
 # ---------------------------------------------------------------------------
 
 
-def test_full_login_round_trip_unlocks_api_status(gated_app):
+def test_full_login_round_trip_unlocks_gated_api(gated_app):
     # 1) Click "Sign in with Stub IdP" — /auth/login redirects to the stub
     #    with a PKCE cookie on the response.
     r1 = gated_app.get("/auth/login?provider=stub", follow_redirects=False)
@@ -128,11 +179,16 @@ def test_full_login_round_trip_unlocks_api_status(gated_app):
     assert any("hermes_session_at" in c for c in set_cookies)
     assert any("hermes_session_rt" in c for c in set_cookies)
 
-    # 3) /api/status now succeeds because we're authenticated.
-    r3 = gated_app.get("/api/status")
-    assert r3.status_code == 200
-    body = r3.json()
-    assert "version" in body
+    # 3) A gated API route (``/api/sessions``) now succeeds because we
+    #    have a valid session cookie. (We deliberately don't probe
+    #    ``/api/status`` here — it's in the shared PUBLIC_API_PATHS
+    #    allowlist and would 200 even without a login, so it can't
+    #    distinguish "logged in" from "gate accidentally disabled".)
+    r3 = gated_app.get("/api/sessions")
+    assert r3.status_code == 200, (
+        f"Expected 200 for /api/sessions post-login, got {r3.status_code}: "
+        f"{r3.text}"
+    )
 
 
 def test_login_unknown_provider_returns_404(gated_app):
diff --git a/tests/hermes_cli/test_dashboard_auth_prefix.py b/tests/hermes_cli/test_dashboard_auth_prefix.py
index c7afce226b8..74366c9c009 100644
--- a/tests/hermes_cli/test_dashboard_auth_prefix.py
+++ b/tests/hermes_cli/test_dashboard_auth_prefix.py
@@ -177,7 +177,7 @@ class TestOAuthRedirectUriRespectsPrefix:
         # The stub IDP's redirect_url echoes the redirect_uri back. The
         # real IDP would consume it and later use it to redirect the
         # user, so the byte-exact value MUST include the prefix.
-        from urllib.parse import urlparse, parse_qs, unquote
+        from urllib.parse import urlparse
         # Stub returns ``{redirect_uri}?code=stub_code&state=...`` — so
         # we read up to the first ``?``.
         redirect_uri = location.split("?", 1)[0]
diff --git a/tests/hermes_cli/test_dashboard_auth_status_endpoint.py b/tests/hermes_cli/test_dashboard_auth_status_endpoint.py
index 3b10917a1d4..9e1de3e76eb 100644
--- a/tests/hermes_cli/test_dashboard_auth_status_endpoint.py
+++ b/tests/hermes_cli/test_dashboard_auth_status_endpoint.py
@@ -59,19 +59,11 @@ def loopback_client():
     web_server.app.state.auth_required = prev_required
 
 
-def _login(client: TestClient) -> None:
-    """Drive the stub OAuth round trip so the gated client is authed."""
-    r1 = client.get("/auth/login?provider=stub", follow_redirects=False)
-    assert r1.status_code == 302
-    state = r1.headers["location"].split("state=")[1]
-    r2 = client.get(
-        f"/auth/callback?code=stub_code&state={state}", follow_redirects=False
-    )
-    assert r2.status_code == 302
-
-
 def test_status_reports_auth_required_in_gated_mode(gated_client):
-    _login(gated_client)
+    # No ``_login()`` call — ``/api/status`` is in the shared
+    # ``PUBLIC_API_PATHS`` allowlist precisely so external probes (and
+    # the SPA's pre-login bootstrap) can read the gate's shape without
+    # a cookie. Hit it cold.
     r = gated_client.get("/api/status")
     assert r.status_code == 200
     body = r.json()
diff --git a/tests/hermes_cli/test_dashboard_auth_ws_auth.py b/tests/hermes_cli/test_dashboard_auth_ws_auth.py
index 44087e53b4d..0ebed6d9519 100644
--- a/tests/hermes_cli/test_dashboard_auth_ws_auth.py
+++ b/tests/hermes_cli/test_dashboard_auth_ws_auth.py
@@ -14,7 +14,6 @@ pre-existing regression unrelated to dashboard-auth.
 from __future__ import annotations
 
 from types import SimpleNamespace
-from unittest.mock import patch
 
 import pytest
 
@@ -29,7 +28,6 @@ from fastapi.testclient import TestClient
 from hermes_cli import web_server
 from hermes_cli.dashboard_auth import clear_providers, register_provider
 from hermes_cli.dashboard_auth.ws_tickets import (
-    TicketInvalid,
     _reset_for_tests,
     consume_ticket,
     mint_ticket,
diff --git a/tests/hermes_cli/test_dashboard_lifecycle_flags.py b/tests/hermes_cli/test_dashboard_lifecycle_flags.py
index c0c505fc33a..0e6f161572f 100644
--- a/tests/hermes_cli/test_dashboard_lifecycle_flags.py
+++ b/tests/hermes_cli/test_dashboard_lifecycle_flags.py
@@ -15,7 +15,7 @@ from unittest.mock import patch, MagicMock
 
 import pytest
 
-from hermes_cli.main import cmd_dashboard, _report_dashboard_status
+from hermes_cli.main import cmd_dashboard
 
 
 def _ns(**kw):
diff --git a/tests/hermes_cli/test_debug.py b/tests/hermes_cli/test_debug.py
index aad1c8e92a5..b3ce60de2f4 100644
--- a/tests/hermes_cli/test_debug.py
+++ b/tests/hermes_cli/test_debug.py
@@ -1,10 +1,8 @@
 """Tests for ``hermes debug`` CLI command and debug utilities."""
 
 import os
-import sys
 import urllib.error
-from pathlib import Path
-from unittest.mock import MagicMock, patch, call
+from unittest.mock import MagicMock, patch
 
 import pytest
 
@@ -337,7 +335,6 @@ class TestCaptureLogSnapshotRedaction:
         redaction feature ships silently broken for users who opted out of
         runtime redaction (e.g. developers working on the redactor itself).
         """
-        import os
 
         # Force the runtime flag off so we're exercising the force=True path,
         # not the default-on path.
diff --git a/tests/hermes_cli/test_dep_ensure.py b/tests/hermes_cli/test_dep_ensure.py
index 77fee5b7ec5..a19a6de63f2 100644
--- a/tests/hermes_cli/test_dep_ensure.py
+++ b/tests/hermes_cli/test_dep_ensure.py
@@ -1,4 +1,3 @@
-from pathlib import Path
 from unittest.mock import patch
 
 
diff --git a/tests/hermes_cli/test_deprecated_cwd_warning.py b/tests/hermes_cli/test_deprecated_cwd_warning.py
index 4b438e7ebf2..2d449d20cc5 100644
--- a/tests/hermes_cli/test_deprecated_cwd_warning.py
+++ b/tests/hermes_cli/test_deprecated_cwd_warning.py
@@ -1,7 +1,5 @@
 """Tests for warn_deprecated_cwd_env_vars() migration warning."""
 
-import os
-import pytest
 
 
 class TestDeprecatedCwdWarning:
diff --git a/tests/hermes_cli/test_doctor.py b/tests/hermes_cli/test_doctor.py
index 23895477ee0..e1edc95af1a 100644
--- a/tests/hermes_cli/test_doctor.py
+++ b/tests/hermes_cli/test_doctor.py
@@ -792,7 +792,7 @@ class TestGitHubTokenCheck:
         monkeypatch.setenv("HERMES_HOME", str(home))
         monkeypatch.setenv("PATH", "/nonexistent")  # gh not found
 
-        from hermes_cli.doctor import run_doctor, _DHH
+        from hermes_cli.doctor import run_doctor
         import io, contextlib
 
         buf = io.StringIO()
diff --git a/tests/hermes_cli/test_doctor_command_install.py b/tests/hermes_cli/test_doctor_command_install.py
index 8b046b9c2c1..f394c29e92e 100644
--- a/tests/hermes_cli/test_doctor_command_install.py
+++ b/tests/hermes_cli/test_doctor_command_install.py
@@ -1,6 +1,5 @@
 """Tests for the Command Installation check in hermes doctor."""
 
-import os
 import sys
 import types
 from argparse import Namespace
diff --git a/tests/hermes_cli/test_dump_git_commit.py b/tests/hermes_cli/test_dump_git_commit.py
new file mode 100644
index 00000000000..264ad22a585
--- /dev/null
+++ b/tests/hermes_cli/test_dump_git_commit.py
@@ -0,0 +1,118 @@
+"""Tests for hermes_cli.dump._get_git_commit — git SHA resolution for ``hermes dump``.
+
+``hermes dump`` prints the running commit so support bug reports identify the
+exact version.  Source installs resolve it live via ``git rev-parse``; the
+published Docker image excludes ``.git`` and falls back to the baked SHA
+written by the Dockerfile's ``HERMES_GIT_SHA`` build-arg.
+
+These tests cover both paths plus the failure modes (no git, no baked file).
+"""
+
+from unittest.mock import MagicMock, patch
+
+
+def test_get_git_commit_uses_live_git_when_available(tmp_path):
+    """Source install: ``git rev-parse --short=8 HEAD`` wins; no fallback."""
+    from hermes_cli import dump
+
+    repo_dir = tmp_path / "repo"
+    repo_dir.mkdir()
+
+    git_result = MagicMock(returncode=0, stdout="deadbeef\n")
+    # build_info should NOT be consulted when live git succeeds.
+    with patch("hermes_cli.dump.subprocess.run", return_value=git_result) as mock_run, \
+         patch("hermes_cli.build_info.get_build_sha") as mock_build:
+        commit = dump._get_git_commit(repo_dir)
+
+    assert commit == "deadbeef"
+    mock_run.assert_called_once()
+    mock_build.assert_not_called()
+
+
+def test_get_git_commit_falls_back_to_build_sha_when_live_git_fails(tmp_path):
+    """Docker image case: live git returns non-zero → use baked SHA."""
+    from hermes_cli import dump
+
+    repo_dir = tmp_path / "no-git-here"
+    repo_dir.mkdir()
+
+    failed = MagicMock(returncode=128, stdout="")
+    with patch("hermes_cli.dump.subprocess.run", return_value=failed), \
+         patch("hermes_cli.build_info.get_build_sha", return_value="cafef00d"):
+        commit = dump._get_git_commit(repo_dir)
+
+    assert commit == "cafef00d"
+
+
+def test_get_git_commit_falls_back_when_git_returns_empty_stdout(tmp_path):
+    """Edge case: git exits 0 but prints nothing — still try the baked SHA."""
+    from hermes_cli import dump
+
+    repo_dir = tmp_path / "repo"
+    repo_dir.mkdir()
+
+    empty = MagicMock(returncode=0, stdout="\n")
+    with patch("hermes_cli.dump.subprocess.run", return_value=empty), \
+         patch("hermes_cli.build_info.get_build_sha", return_value="abcdef12"):
+        commit = dump._get_git_commit(repo_dir)
+
+    assert commit == "abcdef12"
+
+
+def test_get_git_commit_falls_back_when_git_raises(tmp_path):
+    """git binary missing (e.g. minimal container w/o git) → baked SHA path."""
+    from hermes_cli import dump
+
+    repo_dir = tmp_path / "repo"
+    repo_dir.mkdir()
+
+    with patch("hermes_cli.dump.subprocess.run", side_effect=FileNotFoundError("git")), \
+         patch("hermes_cli.build_info.get_build_sha", return_value="feedface"):
+        commit = dump._get_git_commit(repo_dir)
+
+    assert commit == "feedface"
+
+
+def test_get_git_commit_returns_unknown_when_neither_source_available(tmp_path):
+    """Pip-installed wheel: no git, no baked SHA → '(unknown)' (legacy contract)."""
+    from hermes_cli import dump
+
+    repo_dir = tmp_path / "repo"
+    repo_dir.mkdir()
+
+    failed = MagicMock(returncode=128, stdout="")
+    with patch("hermes_cli.dump.subprocess.run", return_value=failed), \
+         patch("hermes_cli.build_info.get_build_sha", return_value=None):
+        commit = dump._get_git_commit(repo_dir)
+
+    assert commit == "(unknown)"
+
+
+def test_get_git_commit_output_format_identical_between_sources(tmp_path):
+    """Regression guard: live-git and baked-SHA outputs share the same shape.
+
+    Ben explicitly asked for identical output between Docker and source installs
+    so support tooling that parses ``hermes dump`` doesn't have to special-case
+    container builds.  Both paths must return a bare 8-char SHA — no prefix,
+    no suffix, no annotation.
+    """
+    from hermes_cli import dump
+
+    repo_dir = tmp_path / "repo"
+    repo_dir.mkdir()
+
+    # Live-git path.
+    git_result = MagicMock(returncode=0, stdout="b2f477a3\n")
+    with patch("hermes_cli.dump.subprocess.run", return_value=git_result):
+        live = dump._get_git_commit(repo_dir)
+
+    # Baked-SHA path.
+    failed = MagicMock(returncode=128, stdout="")
+    with patch("hermes_cli.dump.subprocess.run", return_value=failed), \
+         patch("hermes_cli.build_info.get_build_sha", return_value="b2f477a3"):
+        baked = dump._get_git_commit(repo_dir)
+
+    assert live == baked == "b2f477a3"
+    # Same length, same charset — no decoration in either branch.
+    assert len(live) == 8
+    assert all(c in "0123456789abcdef" for c in live)
diff --git a/tests/hermes_cli/test_env_loader.py b/tests/hermes_cli/test_env_loader.py
index 2523754a84b..f719fae01ce 100644
--- a/tests/hermes_cli/test_env_loader.py
+++ b/tests/hermes_cli/test_env_loader.py
@@ -1,7 +1,6 @@
 import importlib
 import os
 import sys
-from pathlib import Path
 
 from hermes_cli.env_loader import load_hermes_dotenv
 
diff --git a/tests/hermes_cli/test_fallback_cmd.py b/tests/hermes_cli/test_fallback_cmd.py
index 2eed7d62f97..bc7817cf9c9 100644
--- a/tests/hermes_cli/test_fallback_cmd.py
+++ b/tests/hermes_cli/test_fallback_cmd.py
@@ -1,7 +1,6 @@
 """Tests for `hermes fallback` — chain reading, add/remove/clear, legacy migration."""
 from __future__ import annotations
 
-import io
 import types
 from pathlib import Path
 from unittest.mock import patch
diff --git a/tests/hermes_cli/test_gateway.py b/tests/hermes_cli/test_gateway.py
index d66cc5a729a..0988f8fb64a 100644
--- a/tests/hermes_cli/test_gateway.py
+++ b/tests/hermes_cli/test_gateway.py
@@ -2,7 +2,6 @@
 
 import sys
 from types import ModuleType, SimpleNamespace
-from unittest.mock import patch, call
 
 import pytest
 
diff --git a/tests/hermes_cli/test_gateway_platform_gating.py b/tests/hermes_cli/test_gateway_platform_gating.py
index c16875687ce..16a51d419b7 100644
--- a/tests/hermes_cli/test_gateway_platform_gating.py
+++ b/tests/hermes_cli/test_gateway_platform_gating.py
@@ -12,7 +12,6 @@ Currently:
   Windows path that works.
 """
 
-import sys
 
 
 class TestMatrixHiddenOnWindows:
diff --git a/tests/hermes_cli/test_gateway_s6_dispatch.py b/tests/hermes_cli/test_gateway_s6_dispatch.py
index d7146b2a397..c730da7219c 100644
--- a/tests/hermes_cli/test_gateway_s6_dispatch.py
+++ b/tests/hermes_cli/test_gateway_s6_dispatch.py
@@ -7,7 +7,6 @@ host systemd/launchd/windows code path.
 """
 from __future__ import annotations
 
-from typing import Any
 
 import pytest
 
diff --git a/tests/hermes_cli/test_gateway_service.py b/tests/hermes_cli/test_gateway_service.py
index b1fcadbf4f0..c6baa715632 100644
--- a/tests/hermes_cli/test_gateway_service.py
+++ b/tests/hermes_cli/test_gateway_service.py
@@ -1321,7 +1321,6 @@ class TestSystemServiceIdentityRootHandling:
 
     def test_auto_detected_root_is_rejected(self, monkeypatch):
         """When root is auto-detected (not explicitly requested), raise."""
-        import grp
 
         monkeypatch.delenv("SUDO_USER", raising=False)
         monkeypatch.setenv("USER", "root")
@@ -1343,7 +1342,6 @@ class TestSystemServiceIdentityRootHandling:
 
     def test_non_root_user_passes_through(self, monkeypatch):
         """Normal non-root user works as before."""
-        import grp
 
         monkeypatch.delenv("SUDO_USER", raising=False)
         monkeypatch.setenv("USER", "nobody")
@@ -1706,7 +1704,12 @@ class TestSystemUnitPathRemapping:
         assert str(root_home) not in unit
         # Target user paths should be present
         assert "/home/alice" in unit
-        assert "WorkingDirectory=/home/alice/.hermes/hermes-agent" in unit
+        # WorkingDirectory is anchored at the target user's HERMES_HOME (stable,
+        # always exists) — NOT the source checkout under it. Pinning cwd to the
+        # checkout is the rot bug fixed alongside this: a relocated/removed
+        # checkout would crash-loop the unit on CHDIR (status=200).
+        assert "WorkingDirectory=/home/alice/.hermes" in unit
+        assert "WorkingDirectory=/home/alice/.hermes/hermes-agent" not in unit
 
 
 class TestDockerAwareGateway:
@@ -2533,3 +2536,46 @@ class TestGatewayCommandCatchesSystemScopeError:
         # Renders the message, NOT the ``('msg', 'action')`` tuple repr
         assert "System gateway start requires root. Re-run with sudo." in out
         assert "('" not in out  # no tuple repr leaking through
+
+
+class TestServiceWorkingDirIsStable:
+    """The gateway service must anchor WorkingDirectory at a stable path
+    (HERMES_HOME), never the source checkout / worktree, so a relocated or
+    deleted checkout can't crash-loop the unit on CHDIR (status=200).
+    """
+
+    def test_stable_working_dir_uses_hermes_home(self, tmp_path, monkeypatch):
+        home = tmp_path / ".hermes"
+        home.mkdir()
+        monkeypatch.setattr(gateway_cli, "get_hermes_home", lambda: home)
+        assert Path(gateway_cli._stable_service_working_dir()) == home.resolve()
+
+    def test_stable_working_dir_falls_back_to_project_root(self, tmp_path, monkeypatch):
+        # HERMES_HOME points somewhere that does not exist -> fall back.
+        missing = tmp_path / "does-not-exist" / ".hermes"
+        monkeypatch.setattr(gateway_cli, "get_hermes_home", lambda: missing)
+        assert gateway_cli._stable_service_working_dir() == str(gateway_cli.PROJECT_ROOT)
+
+    def test_user_unit_workingdirectory_is_hermes_home_not_checkout(self, tmp_path, monkeypatch):
+        home = tmp_path / ".hermes"
+        home.mkdir()
+        monkeypatch.setattr(gateway_cli, "get_hermes_home", lambda: home)
+        unit = gateway_cli.generate_systemd_unit(system=False)
+        wd = [l for l in unit.splitlines() if l.startswith("WorkingDirectory=")]
+        assert wd, "unit has no WorkingDirectory line"
+        value = wd[0].split("=", 1)[1]
+        assert Path(value).resolve() == home.resolve()
+        # The bug class: never pin cwd inside a transient worktree checkout.
+        assert "/.worktrees/" not in value
+
+    def test_launchd_workingdirectory_is_hermes_home(self, tmp_path, monkeypatch):
+        import re
+
+        home = tmp_path / ".hermes"
+        home.mkdir()
+        monkeypatch.setattr(gateway_cli, "get_hermes_home", lambda: home)
+        plist = gateway_cli.generate_launchd_plist()
+        m = re.search(r"<key>WorkingDirectory</key>\s*<string>(.*?)</string>", plist)
+        assert m, "plist has no WorkingDirectory entry"
+        assert Path(m.group(1)).resolve() == home.resolve()
+        assert "/.worktrees/" not in m.group(1)
diff --git a/tests/hermes_cli/test_gateway_service_paths.py b/tests/hermes_cli/test_gateway_service_paths.py
index 71abc4aef24..86bca738274 100644
--- a/tests/hermes_cli/test_gateway_service_paths.py
+++ b/tests/hermes_cli/test_gateway_service_paths.py
@@ -1,4 +1,3 @@
-from pathlib import Path
 from unittest.mock import patch
 
 
diff --git a/tests/hermes_cli/test_gateway_windows.py b/tests/hermes_cli/test_gateway_windows.py
index 1bf6186fe23..e6130219828 100644
--- a/tests/hermes_cli/test_gateway_windows.py
+++ b/tests/hermes_cli/test_gateway_windows.py
@@ -481,4 +481,221 @@ def test_uninstall_access_denied_declined_keeps_task_and_cleans_files(monkeypatc
     out = capsys.readouterr().out
     assert "Skipped elevation" in out
     assert "UAC is Windows' admin approval prompt" in out
-    assert "Scheduled Task still registered" in out
\ No newline at end of file
+    assert "Scheduled Task still registered" in out
+
+
+# ---------------------------------------------------------------------------
+# stop() drain semantics — issue #33778
+#
+# Background: on Windows, asyncio.add_signal_handler raises NotImplementedError,
+# so the gateway's SIGTERM handler (which drains in-flight agents and writes
+# resume_pending=True) never fires when `hermes gateway stop` kills the
+# process. The fix: stop() writes the planned_stop_marker first, waits for
+# the gateway's marker-watcher thread to drain + exit cleanly, then escalates
+# to taskkill if drain times out.
+# ---------------------------------------------------------------------------
+
+
+def test_stop_writes_planned_stop_marker_before_killing(monkeypatch):
+    """stop() must write the planned-stop marker BEFORE any kill signal.
+
+    Without this, the gateway's drain loop never runs on Windows and
+    sessions silently lose context across restarts.
+    """
+    pid = 99999
+    events = []
+
+    monkeypatch.setattr(gateway_windows, "_assert_windows", lambda: None)
+    monkeypatch.setattr(gateway_windows, "is_task_registered", lambda: False)
+
+    # Stub the marker write so we can record the order of operations.
+    from gateway import status as status_mod
+
+    def fake_write_marker(target_pid):
+        events.append(("write_marker", target_pid))
+        return True
+
+    def fake_pid_exists(check_pid):
+        # Drain succeeds: pid "exits" right after the marker write.
+        return ("write_marker", pid) not in events
+
+    monkeypatch.setattr(status_mod, "write_planned_stop_marker", fake_write_marker)
+    monkeypatch.setattr(status_mod, "_pid_exists", fake_pid_exists)
+    monkeypatch.setattr(status_mod, "get_running_pid", lambda: pid)
+
+    def fake_kill(**kwargs):
+        events.append(("kill", kwargs.get("force", False)))
+        return 0
+
+    monkeypatch.setattr("hermes_cli.gateway.kill_gateway_processes", fake_kill)
+    monkeypatch.setattr("hermes_cli.gateway._get_restart_drain_timeout", lambda: 5.0)
+
+    gateway_windows.stop()
+
+    # Marker MUST be written before any kill.
+    kinds = [e[0] for e in events]
+    assert "write_marker" in kinds, "stop() never wrote the planned-stop marker"
+    marker_idx = kinds.index("write_marker")
+    kill_idx = kinds.index("kill") if "kill" in kinds else len(kinds)
+    assert marker_idx < kill_idx, (
+        f"stop() killed before writing the marker (events={events})"
+    )
+
+
+def test_stop_waits_for_graceful_drain_before_force_kill(monkeypatch):
+    """When drain succeeds, stop() should NOT force-kill the gateway.
+
+    drained=True means the gateway exited cleanly after seeing the
+    marker — escalating to taskkill /F afterwards would be wasted
+    work and may emit confusing "killed N processes" output.
+    """
+    pid = 88888
+    events = []
+
+    monkeypatch.setattr(gateway_windows, "_assert_windows", lambda: None)
+    monkeypatch.setattr(gateway_windows, "is_task_registered", lambda: False)
+
+    from gateway import status as status_mod
+    monkeypatch.setattr(status_mod, "write_planned_stop_marker", lambda p: True)
+
+    # Simulate the gateway exiting cleanly after one poll tick.
+    poll_count = [0]
+    def fake_pid_exists(check_pid):
+        poll_count[0] += 1
+        return poll_count[0] < 2  # alive on first poll, gone on second
+    monkeypatch.setattr(status_mod, "_pid_exists", fake_pid_exists)
+    monkeypatch.setattr(status_mod, "get_running_pid", lambda: pid)
+
+    def fake_kill(**kwargs):
+        events.append(("kill", kwargs.get("force", False)))
+        return 0
+    monkeypatch.setattr("hermes_cli.gateway.kill_gateway_processes", fake_kill)
+    monkeypatch.setattr("hermes_cli.gateway._get_restart_drain_timeout", lambda: 5.0)
+
+    gateway_windows.stop()
+
+    # kill_gateway_processes is still called as the no-op sweep, but
+    # NOT with force=True — drain succeeded, gateway is already gone.
+    assert events == [("kill", False)], (
+        f"After clean drain, force kill should be disabled (events={events})"
+    )
+
+
+def test_stop_escalates_to_force_kill_when_drain_times_out(monkeypatch):
+    """When drain times out, stop() MUST escalate to force=True.
+
+    Drain timeout = gateway is stuck or unresponsive. Without the
+    taskkill /T /F escalation, the gateway stays alive and the next
+    `hermes gateway start` fails with "another instance is running".
+    """
+    pid = 77777
+    events = []
+
+    monkeypatch.setattr(gateway_windows, "_assert_windows", lambda: None)
+    monkeypatch.setattr(gateway_windows, "is_task_registered", lambda: False)
+
+    from gateway import status as status_mod
+    monkeypatch.setattr(status_mod, "write_planned_stop_marker", lambda p: True)
+    # PID never exits — drain times out.
+    monkeypatch.setattr(status_mod, "_pid_exists", lambda check_pid: True)
+    monkeypatch.setattr(status_mod, "get_running_pid", lambda: pid)
+
+    def fake_kill(**kwargs):
+        events.append(("kill", kwargs.get("force", False)))
+        return 1
+    monkeypatch.setattr("hermes_cli.gateway.kill_gateway_processes", fake_kill)
+    # Tiny drain timeout to keep the test fast.
+    monkeypatch.setattr("hermes_cli.gateway._get_restart_drain_timeout", lambda: 1.0)
+
+    gateway_windows.stop()
+
+    # When drain times out, kill is invoked with force=True so taskkill /T /F
+    # walks the process tree.
+    assert events == [("kill", True)], (
+        f"After drain timeout, kill must use force=True (events={events})"
+    )
+
+
+def test_stop_no_running_gateway_skips_drain(monkeypatch):
+    """When no gateway is running, skip the drain wait entirely."""
+    events = []
+
+    monkeypatch.setattr(gateway_windows, "_assert_windows", lambda: None)
+    monkeypatch.setattr(gateway_windows, "is_task_registered", lambda: False)
+
+    from gateway import status as status_mod
+    monkeypatch.setattr(status_mod, "get_running_pid", lambda: None)
+
+    def fake_write_marker(target_pid):
+        events.append(("write_marker", target_pid))
+        return True
+    monkeypatch.setattr(status_mod, "write_planned_stop_marker", fake_write_marker)
+    monkeypatch.setattr(status_mod, "_pid_exists", lambda check_pid: False)
+
+    def fake_kill(**kwargs):
+        events.append(("kill", kwargs.get("force", False)))
+        return 0
+    monkeypatch.setattr("hermes_cli.gateway.kill_gateway_processes", fake_kill)
+    monkeypatch.setattr("hermes_cli.gateway._get_restart_drain_timeout", lambda: 5.0)
+
+    gateway_windows.stop()
+
+    # With no PID to drain, no marker is written.  Kill sweep still runs
+    # (defensive — covers the case where a stray gateway is alive without
+    # a PID file).  force=True because drained=False.
+    assert ("write_marker", None) not in events
+    assert all(e[0] != "write_marker" for e in events), (
+        f"Should not write marker when no PID is running (events={events})"
+    )
+    assert events == [("kill", True)]
+
+
+def test_drain_helper_handles_invalid_pid(monkeypatch):
+    """_drain_gateway_pid returns False for invalid PIDs without crashing."""
+    assert gateway_windows._drain_gateway_pid(0, 5.0) is False
+    assert gateway_windows._drain_gateway_pid(-1, 5.0) is False
+
+
+def test_drain_helper_returns_true_when_pid_exits_quickly(monkeypatch):
+    """_drain_gateway_pid polls _pid_exists until it returns False."""
+    pid = 66666
+    poll_count = [0]
+
+    def fake_pid_exists(check_pid):
+        poll_count[0] += 1
+        return poll_count[0] < 3  # alive twice, then gone
+
+    from gateway import status as status_mod
+    monkeypatch.setattr(status_mod, "write_planned_stop_marker", lambda p: True)
+    monkeypatch.setattr(status_mod, "_pid_exists", fake_pid_exists)
+
+    assert gateway_windows._drain_gateway_pid(pid, drain_timeout=5.0) is True
+
+
+def test_drain_helper_returns_false_on_timeout(monkeypatch):
+    """_drain_gateway_pid returns False when the PID never exits."""
+    from gateway import status as status_mod
+    monkeypatch.setattr(status_mod, "write_planned_stop_marker", lambda p: True)
+    monkeypatch.setattr(status_mod, "_pid_exists", lambda check_pid: True)
+
+    assert gateway_windows._drain_gateway_pid(55555, drain_timeout=1.0) is False
+
+
+def test_drain_helper_still_waits_if_marker_write_fails(monkeypatch):
+    """Marker-write failures are swallowed; drain still polls for PID exit.
+
+    If the marker can't be written (disk full, permission error), the
+    gateway can't drain — but the wait still happens so a slow-shutdown
+    gateway from a different code path (e.g. SIGTERM working on this
+    platform after all) still gets observed cleanly.
+    """
+    pid = 44444
+    def fake_write(target_pid):
+        raise OSError("disk full")
+
+    from gateway import status as status_mod
+    monkeypatch.setattr(status_mod, "write_planned_stop_marker", fake_write)
+    monkeypatch.setattr(status_mod, "_pid_exists", lambda check_pid: False)
+
+    # Returns True because _pid_exists immediately says "gone".
+    assert gateway_windows._drain_gateway_pid(pid, drain_timeout=5.0) is True
\ No newline at end of file
diff --git a/tests/hermes_cli/test_gateway_wsl.py b/tests/hermes_cli/test_gateway_wsl.py
index 8fbbe24245d..d12391a9fb0 100644
--- a/tests/hermes_cli/test_gateway_wsl.py
+++ b/tests/hermes_cli/test_gateway_wsl.py
@@ -1,8 +1,6 @@
 """Tests for WSL detection and WSL-aware gateway behavior."""
 
-import io
 import subprocess
-import sys
 from types import SimpleNamespace
 from unittest.mock import patch, MagicMock, mock_open
 
diff --git a/tests/hermes_cli/test_gemini_provider.py b/tests/hermes_cli/test_gemini_provider.py
index 1daeb281f0e..61d7bc48ebb 100644
--- a/tests/hermes_cli/test_gemini_provider.py
+++ b/tests/hermes_cli/test_gemini_provider.py
@@ -1,6 +1,5 @@
 """Tests for Google AI Studio (Gemini) provider integration."""
 
-import os
 import pytest
 from unittest.mock import patch, MagicMock
 
diff --git a/tests/hermes_cli/test_goals.py b/tests/hermes_cli/test_goals.py
index 9d8c3f48fe1..0dae684b629 100644
--- a/tests/hermes_cli/test_goals.py
+++ b/tests/hermes_cli/test_goals.py
@@ -525,7 +525,6 @@ class TestGoalStateSubgoalsBackcompat:
     def test_old_state_meta_row_loads_without_subgoals(self):
         """A goal serialized BEFORE the subgoals field existed must
         round-trip with an empty list, not crash."""
-        import json
         from hermes_cli.goals import GoalState
 
         legacy = json.dumps({
@@ -647,7 +646,7 @@ class TestJudgeGoalWithSubgoals:
         We don't actually call the model — we patch the aux client to
         capture the prompt that would be sent.
         """
-        from unittest.mock import patch, MagicMock
+        from unittest.mock import patch
         from hermes_cli import goals
 
         captured = {}
diff --git a/tests/hermes_cli/test_graphical_browser_detection.py b/tests/hermes_cli/test_graphical_browser_detection.py
new file mode 100644
index 00000000000..31b6418181f
--- /dev/null
+++ b/tests/hermes_cli/test_graphical_browser_detection.py
@@ -0,0 +1,96 @@
+"""Tests for `_can_open_graphical_browser()` in hermes_cli.auth.
+
+Guards the fix for the May 2026 report where `hermes auth add xai-oauth`
+launched a text-mode browser (w3m) INSIDE the terminal on a headless Linux
+box — `_is_remote_session()` only checked SSH/cloud-shell env vars, so a plain
+local box with no GUI browser still called `webbrowser.open()`, which resolved
+to a console browser and hijacked the TTY.
+
+The helper distinguishes "a real windowed browser will pop up" from "a console
+browser will hijack the terminal" so OAuth callsites can fall back to printing
+the URL / manual paste instead of auto-opening.
+"""
+
+from __future__ import annotations
+
+import webbrowser
+
+import pytest
+
+from hermes_cli.auth import _can_open_graphical_browser
+
+
+class _FakeController:
+    def __init__(self, name: str) -> None:
+        self.name = name
+
+    def open(self, *_a, **_kw):  # pragma: no cover - never invoked
+        return True
+
+
+@pytest.fixture(autouse=True)
+def _clean_browser_env(monkeypatch):
+    """Each test controls DISPLAY / WAYLAND_DISPLAY / BROWSER explicitly."""
+    for var in ("DISPLAY", "WAYLAND_DISPLAY", "BROWSER"):
+        monkeypatch.delenv(var, raising=False)
+    yield
+
+
+def _force_platform_linux(monkeypatch):
+    monkeypatch.setattr("hermes_cli.auth.sys.platform", "linux")
+
+
+def _force_resolved_browser(monkeypatch, name: str):
+    monkeypatch.setattr(webbrowser, "get", lambda *_a, **_kw: _FakeController(name))
+
+
+def test_headless_linux_no_display_refuses(monkeypatch):
+    """The reported bug: headless Linux, no display server → don't auto-open."""
+    _force_platform_linux(monkeypatch)
+    # Even if a GUI browser somehow resolved, no display means no GUI.
+    _force_resolved_browser(monkeypatch, "google-chrome")
+    assert _can_open_graphical_browser() is False
+
+
+def test_browser_env_pointing_at_console_browser_refuses(monkeypatch):
+    """$BROWSER=w3m must refuse even with a display server present."""
+    _force_platform_linux(monkeypatch)
+    monkeypatch.setenv("DISPLAY", ":0")
+    monkeypatch.setenv("BROWSER", "/usr/bin/w3m")
+    assert _can_open_graphical_browser() is False
+
+
+@pytest.mark.parametrize("console", ["w3m", "lynx", "links", "elinks", "browsh"])
+def test_resolved_console_browser_refuses(monkeypatch, console):
+    """When webbrowser resolves to a console browser, refuse to auto-open."""
+    _force_platform_linux(monkeypatch)
+    monkeypatch.setenv("DISPLAY", ":0")
+    _force_resolved_browser(monkeypatch, console)
+    assert _can_open_graphical_browser() is False
+
+
+def test_graphical_browser_with_display_allows(monkeypatch):
+    """Real GUI browser + display server → auto-open is fine."""
+    _force_platform_linux(monkeypatch)
+    monkeypatch.setenv("DISPLAY", ":0")
+    _force_resolved_browser(monkeypatch, "firefox")
+    assert _can_open_graphical_browser() is True
+
+
+def test_webbrowser_get_raises_refuses(monkeypatch):
+    """No resolvable browser at all → don't auto-open."""
+    _force_platform_linux(monkeypatch)
+    monkeypatch.setenv("DISPLAY", ":0")
+
+    def _boom(*_a, **_kw):
+        raise webbrowser.Error("no browser")
+
+    monkeypatch.setattr(webbrowser, "get", _boom)
+    assert _can_open_graphical_browser() is False
+
+
+def test_non_linux_with_gui_allows(monkeypatch):
+    """macOS / Windows always have a usable default GUI browser."""
+    monkeypatch.setattr("hermes_cli.auth.sys.platform", "darwin")
+    _force_resolved_browser(monkeypatch, "MacOSX")
+    assert _can_open_graphical_browser() is True
diff --git a/tests/hermes_cli/test_hooks_cli.py b/tests/hermes_cli/test_hooks_cli.py
index 6d4609c523c..3713df10ad6 100644
--- a/tests/hermes_cli/test_hooks_cli.py
+++ b/tests/hermes_cli/test_hooks_cli.py
@@ -4,7 +4,6 @@ from __future__ import annotations
 
 import io
 import json
-import sys
 from contextlib import redirect_stdout
 from pathlib import Path
 from types import SimpleNamespace
diff --git a/tests/hermes_cli/test_image_gen_picker.py b/tests/hermes_cli/test_image_gen_picker.py
index 04d46bbbb86..79e1a9a93b2 100644
--- a/tests/hermes_cli/test_image_gen_picker.py
+++ b/tests/hermes_cli/test_image_gen_picker.py
@@ -237,7 +237,7 @@ class TestConfigWriting:
         monkeypatch.setattr(
             tools_config,
             "get_nous_subscription_features",
-            lambda config: SimpleNamespace(
+            lambda config, **kwargs: SimpleNamespace(
                 features={"image_gen": SimpleNamespace(managed_by_nous=True)}
             ),
         )
diff --git a/tests/hermes_cli/test_inventory.py b/tests/hermes_cli/test_inventory.py
index 2a288b37a45..1b24ba6bdd6 100644
--- a/tests/hermes_cli/test_inventory.py
+++ b/tests/hermes_cli/test_inventory.py
@@ -21,7 +21,6 @@ from __future__ import annotations
 
 from unittest.mock import patch
 
-import pytest
 
 from hermes_cli.inventory import (
     ConfigContext,
@@ -158,8 +157,11 @@ def test_build_models_payload_returns_expected_shape():
 
 
 def test_build_models_payload_does_not_call_provider_model_ids():
-    """Curated lists must come from list_authenticated_providers, not
-    provider_model_ids — that would pull TTS/embeddings/etc.
+    """``build_models_payload`` is a thin shape adapter — it delegates the
+    actual curation to ``list_authenticated_providers`` (which DOES call
+    ``cached_provider_model_ids`` internally for live discovery, with disk
+    caching). ``build_models_payload`` itself must not call the live fetcher
+    directly; the test pins that boundary.
     """
     rows = [{"slug": "nous", "name": "Nous", "models": ["hermes-4-405b"],
              "total_models": 1, "is_current": False, "is_user_defined": False,
diff --git a/tests/hermes_cli/test_kanban_cli_dispatch_passthrough.py b/tests/hermes_cli/test_kanban_cli_dispatch_passthrough.py
new file mode 100644
index 00000000000..8bb75fe6292
--- /dev/null
+++ b/tests/hermes_cli/test_kanban_cli_dispatch_passthrough.py
@@ -0,0 +1,150 @@
+"""Regression tests for #33488 (CLI max_in_progress / max_spawn / per-profile
+config passthrough) and #29415 (kanban_swarm humanizer skill ref).
+
+These two fixes are bundled because they're both small, both touch the
+kanban dispatcher's CLI surface, and they each guard against a silent
+operator footgun that only manifests in long-running setups.
+"""
+from __future__ import annotations
+
+import argparse
+import os
+import sys
+import tempfile
+from types import SimpleNamespace
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+
+@pytest.fixture()
+def isolated_kanban_home(monkeypatch):
+    """Spin up a fresh HERMES_HOME with a clean kanban DB."""
+    test_home = tempfile.mkdtemp(prefix="kanban_cli_passthrough_")
+    os.makedirs(os.path.join(test_home, "profiles", "default"), exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", test_home)
+    for mod in list(sys.modules.keys()):
+        if mod.startswith("hermes_cli") or mod.startswith("hermes_state") or mod == "hermes_constants":
+            del sys.modules[mod]
+    yield test_home
+
+
+def test_cli_dispatch_passes_max_in_progress_from_config(isolated_kanban_home, monkeypatch):
+    """#33488: hermes kanban dispatch must pass kanban.max_in_progress from
+    config to dispatch_once. Without this, the global concurrency cap is
+    unreachable from the CLI even though it works from the gateway."""
+    from hermes_cli import kanban as kb_cli
+    from hermes_cli import kanban_db
+
+    # Configure max_in_progress in the loaded config.
+    fake_config = {
+        "kanban": {
+            "max_in_progress": 3,
+            "max_spawn": 5,
+            "default_assignee": "default",
+            "max_in_progress_per_profile": 2,
+        }
+    }
+    monkeypatch.setattr(
+        "hermes_cli.config.load_config", lambda: fake_config
+    )
+
+    captured = {}
+
+    def fake_dispatch_once(conn, **kwargs):
+        captured.update(kwargs)
+        return kanban_db.DispatchResult()
+
+    monkeypatch.setattr(kanban_db, "dispatch_once", fake_dispatch_once)
+
+    args = argparse.Namespace(dry_run=True, max=None, failure_limit=2, json=False)
+    kb_cli._cmd_dispatch(args)
+
+    # Every config value must have reached dispatch_once.
+    assert captured.get("max_in_progress") == 3, (
+        f"CLI must pass kanban.max_in_progress from config; got {captured.get('max_in_progress')!r}"
+    )
+    assert captured.get("max_spawn") == 5, (
+        f"CLI must pass kanban.max_spawn from config when --max is not provided; got {captured.get('max_spawn')!r}"
+    )
+    assert captured.get("default_assignee") == "default"
+    assert captured.get("max_in_progress_per_profile") == 2
+
+
+def test_cli_max_flag_overrides_config_max_spawn(isolated_kanban_home, monkeypatch):
+    """--max on the CLI takes precedence over kanban.max_spawn in config.
+    The CLI flag is the explicit operator signal; config is the default."""
+    from hermes_cli import kanban as kb_cli
+    from hermes_cli import kanban_db
+
+    fake_config = {"kanban": {"max_spawn": 10}}
+    monkeypatch.setattr("hermes_cli.config.load_config", lambda: fake_config)
+
+    captured = {}
+    monkeypatch.setattr(
+        kanban_db, "dispatch_once",
+        lambda conn, **kw: (captured.update(kw), kanban_db.DispatchResult())[1],
+    )
+
+    args = argparse.Namespace(dry_run=True, max=2, failure_limit=2, json=False)
+    kb_cli._cmd_dispatch(args)
+
+    assert captured.get("max_spawn") == 2, (
+        f"CLI --max=2 must override config kanban.max_spawn=10; got {captured.get('max_spawn')!r}"
+    )
+
+
+def test_cli_invalid_max_in_progress_silently_disables(isolated_kanban_home, monkeypatch):
+    """Invalid kanban.max_in_progress values (0, negative, non-int) should
+    silently fall through to None — no crash, no surprise behavior."""
+    from hermes_cli import kanban as kb_cli
+    from hermes_cli import kanban_db
+
+    for bad_val in (0, -1, "abc", "1.5"):
+        fake_config = {"kanban": {"max_in_progress": bad_val}}
+        monkeypatch.setattr("hermes_cli.config.load_config", lambda: fake_config)
+        captured = {}
+        monkeypatch.setattr(
+            kanban_db, "dispatch_once",
+            lambda conn, **kw: (captured.update(kw), kanban_db.DispatchResult())[1],
+        )
+        args = argparse.Namespace(dry_run=True, max=None, failure_limit=2, json=False)
+        kb_cli._cmd_dispatch(args)
+        assert captured.get("max_in_progress") is None, (
+            f"invalid max_in_progress={bad_val!r} should fall through to None, "
+            f"got {captured.get('max_in_progress')!r}"
+        )
+
+
+def test_kanban_swarm_uses_existing_humanizer_skill():
+    """#29415: kanban_swarm.py used to hardcode skills=['avoid-ai-writing'],
+    a skill that doesn't exist in any registry — synthesizer workers
+    crashed with 'Unknown skill(s): avoid-ai-writing' on every retry.
+
+    Verify the synthesizer card now uses the bundled 'humanizer' skill
+    which actually exists at skills/creative/humanizer/SKILL.md."""
+    import pathlib
+
+    swarm_path = (
+        pathlib.Path(__file__).resolve().parent.parent.parent
+        / "hermes_cli" / "kanban_swarm.py"
+    )
+    src = swarm_path.read_text()
+    assert "avoid-ai-writing" not in src, (
+        "kanban_swarm.py must not reference 'avoid-ai-writing' — that "
+        "skill doesn't exist in any registry, crashing synthesizers (#29415)"
+    )
+    assert '"humanizer"' in src, (
+        "kanban_swarm.py should use the bundled 'humanizer' skill for "
+        "synthesizer cards (the original intent of 'avoid-ai-writing')"
+    )
+
+    # And the replacement skill must actually exist on disk.
+    skills_root = (
+        pathlib.Path(__file__).resolve().parent.parent.parent / "skills"
+    )
+    humanizer_path = skills_root / "creative" / "humanizer" / "SKILL.md"
+    assert humanizer_path.is_file(), (
+        f"humanizer skill missing at {humanizer_path}; the kanban_swarm fix "
+        "for #29415 requires this bundled skill to exist"
+    )
diff --git a/tests/hermes_cli/test_kanban_core_functionality.py b/tests/hermes_cli/test_kanban_core_functionality.py
index 05fb31c4d5f..c28671dde51 100644
--- a/tests/hermes_cli/test_kanban_core_functionality.py
+++ b/tests/hermes_cli/test_kanban_core_functionality.py
@@ -18,7 +18,6 @@ import threading
 import time
 from pathlib import Path
 from types import SimpleNamespace
-from typing import Optional
 
 import pytest
 
diff --git a/tests/hermes_cli/test_kanban_db.py b/tests/hermes_cli/test_kanban_db.py
index 30cb8421a20..020ad4fb425 100644
--- a/tests/hermes_cli/test_kanban_db.py
+++ b/tests/hermes_cli/test_kanban_db.py
@@ -5,7 +5,9 @@ from __future__ import annotations
 import concurrent.futures
 import os
 import sqlite3
+import sys
 import time
+import types
 import unittest.mock
 from pathlib import Path
 
@@ -49,6 +51,43 @@ def test_init_creates_expected_tables(kanban_home):
     assert {"tasks", "task_links", "task_comments", "task_events"} <= names
 
 
+def test_connect_honors_kanban_busy_timeout_env(kanban_home, monkeypatch):
+    """All kanban connections should use the explicit busy-timeout knob.
+
+    A worker stampede should wait for SQLite's writer lock instead of failing
+    immediately with ``database is locked`` during first-connect/WAL/schema
+    setup.  The timeout must be queryable via PRAGMA so CLI, gateway, and tool
+    connections behave the same way.
+    """
+    monkeypatch.setenv("HERMES_KANBAN_BUSY_TIMEOUT_MS", "123456")
+
+    with kb.connect() as conn:
+        row = conn.execute("PRAGMA busy_timeout").fetchone()
+
+    assert row[0] == 123456
+
+
+def test_cross_process_init_lock_uses_windows_byte_range_lock(tmp_path, monkeypatch):
+    """Windows must use a real process lock, not a no-op sidecar open."""
+    calls: list[tuple[int, int, int]] = []
+    fake_msvcrt = types.SimpleNamespace(
+        LK_LOCK=1,
+        LK_UNLCK=2,
+        locking=lambda fd, mode, nbytes: calls.append((fd, mode, nbytes)),
+    )
+    monkeypatch.setattr(kb, "_IS_WINDOWS", True)
+    monkeypatch.setitem(sys.modules, "msvcrt", fake_msvcrt)
+
+    db_path = tmp_path / "kanban.db"
+    with kb._cross_process_init_lock(db_path):
+        assert calls == [(calls[0][0], fake_msvcrt.LK_LOCK, 1)]
+
+    assert [call[1:] for call in calls] == [
+        (fake_msvcrt.LK_LOCK, 1),
+        (fake_msvcrt.LK_UNLCK, 1),
+    ]
+
+
 def test_connect_rejects_tls_record_in_sqlite_header(tmp_path, monkeypatch):
     """Kanban should classify TLS-looking page-0 clobbers before WAL setup."""
     home = tmp_path / ".hermes"
@@ -2527,7 +2566,6 @@ def test_resolve_hermes_argv_module_actually_runs():
     Run it as a real subprocess to catch that regression.
     """
     import subprocess
-    import sys
     import hermes_cli.kanban_db as kb
     import shutil
     import unittest.mock as mock
@@ -3106,7 +3144,6 @@ def test_detect_stale_skips_recently_started_task(kanban_home, monkeypatch):
 
 def test_detect_stale_skips_when_timeout_zero(kanban_home, monkeypatch):
     """stale_timeout_seconds=0 disables stale detection entirely."""
-    import hermes_cli.kanban_db as _kb
 
     with kb.connect() as conn:
         t = kb.create_task(conn, title="disabled", assignee="worker")
@@ -3278,6 +3315,44 @@ def test_connect_refuses_corrupt_existing_file(tmp_path):
         kb.connect(db_path=db_path)
 
 
+def test_repeated_corrupt_open_reuses_single_backup(tmp_path):
+    """Repeated quarantines of the same corrupt bytes must not amplify disk usage.
+
+    Regression for the gateway dispatcher's 5-min retry loop on shared kanban
+    DBs across multi-profile fleets: each retry on an unchanged corrupt file
+    used to create a fresh ``.corrupt.<timestamp>.bak`` until disk filled. The
+    content-addressed backup name is deterministic in the DB's sha256, so
+    N retries of the same bytes share one backup.
+    """
+    db_path = tmp_path / "kanban.db"
+    original = _write_corrupt_db(db_path)
+
+    backups: set[Path] = set()
+    for _ in range(10):
+        kb._INITIALIZED_PATHS.discard(str(db_path.resolve()))
+        with pytest.raises(kb.KanbanDbCorruptError) as excinfo:
+            kb.connect(db_path=db_path)
+        assert excinfo.value.backup_path is not None
+        backups.add(excinfo.value.backup_path)
+
+    assert len(backups) == 1, f"expected 1 deterministic backup, got {len(backups)}"
+    (backup,) = backups
+    assert backup.exists()
+    assert backup.read_bytes() == original
+
+    # Mutate the corrupt bytes — fingerprint changes, separate backup preserved.
+    with db_path.open("r+b") as f:
+        f.seek(4096)
+        f.write(b"\xAB" * 64)
+    kb._INITIALIZED_PATHS.discard(str(db_path.resolve()))
+    with pytest.raises(kb.KanbanDbCorruptError) as excinfo2:
+        kb.connect(db_path=db_path)
+    second_backup = excinfo2.value.backup_path
+    assert second_backup is not None
+    assert second_backup != backup
+    assert second_backup.exists()
+
+
 def test_locked_healthy_db_does_not_classify_as_corrupt(tmp_path, monkeypatch):
     """A transient lock during the probe must not produce a .corrupt backup
     and must not be reported as :class:`KanbanDbCorruptError`. Raw sqlite
@@ -3551,7 +3626,7 @@ def test_write_txn_preserves_original_exception_when_rollback_fails(kanban_home)
     )
 def test_write_txn_healthy_commit_no_exception(tmp_path):
     """Normal commit does not trigger the torn-extend check."""
-    from hermes_cli.kanban_db import connect, write_txn, create_task
+    from hermes_cli.kanban_db import connect, write_txn
     db = tmp_path / "test.db"
     conn = connect(db_path=db)
     # Should not raise
@@ -3568,7 +3643,6 @@ def test_write_txn_healthy_commit_no_exception(tmp_path):
 def test_write_txn_raises_on_truncated_file(tmp_path):
     """A mocked smaller file size triggers the torn-extend check."""
     from hermes_cli.kanban_db import connect, write_txn
-    import hermes_cli.kanban_db as kanban_db_module
     db = tmp_path / "test.db"
     conn = connect(db_path=db)
     # Get actual page size so we can fake a smaller file
@@ -3628,7 +3702,7 @@ def test_connect_sets_wal_autocheckpoint_100(tmp_path):
 def test_write_txn_check_reads_correct_header_fields(tmp_path):
     """Synthetic DB file with mismatched header page_count triggers the check."""
     import struct
-    from hermes_cli.kanban_db import connect, write_txn, _check_file_length_invariant
+    from hermes_cli.kanban_db import connect, _check_file_length_invariant
     db = tmp_path / "synthetic.db"
     conn = connect(db_path=db)
     page_size = conn.execute("PRAGMA page_size").fetchone()[0]
@@ -3805,3 +3879,66 @@ def test_dispatch_once_still_reaps_via_extracted_fn(kanban_home):
                 pids = kb.reap_worker_zombies()
 
     assert pids == [99999]
+
+
+
+# ---------------------------------------------------------------------------
+# connect_closing(): context manager that actually closes the FD
+# Regression coverage for #33159 (kanban.db FD leak — gateway crashes after
+# ~4 days). sqlite3.Connection's built-in __exit__ commits/rollbacks but
+# does NOT close, so `with kb.connect() as conn:` leaks the FD in
+# long-lived processes (gateway run_slash, dashboard decompose handler).
+# `connect_closing()` is the leak-safe replacement.
+# ---------------------------------------------------------------------------
+
+
+def test_connect_closing_closes_connection_on_exit(tmp_path):
+    """The new context manager MUST actually close the underlying FD."""
+    db_path = tmp_path / "kanban.db"
+    kb._INITIALIZED_PATHS.discard(str(db_path.resolve()))
+    with kb.connect_closing(db_path=db_path) as conn:
+        conn.execute("SELECT 1").fetchone()
+    # After exit, the connection MUST be closed — subsequent execute
+    # should raise ProgrammingError.
+    with pytest.raises(sqlite3.ProgrammingError):
+        conn.execute("SELECT 1")
+
+
+def test_connect_closing_closes_on_exception(tmp_path):
+    """Connection closed even when the body raises."""
+    db_path = tmp_path / "kanban.db"
+    kb._INITIALIZED_PATHS.discard(str(db_path.resolve()))
+    captured = []
+    with pytest.raises(RuntimeError, match="boom"):
+        with kb.connect_closing(db_path=db_path) as conn:
+            captured.append(conn)
+            raise RuntimeError("boom")
+    with pytest.raises(sqlite3.ProgrammingError):
+        captured[0].execute("SELECT 1")
+
+
+def test_connect_closing_yields_usable_connection(tmp_path):
+    """Smoke test: schema is initialized and basic ops work."""
+    db_path = tmp_path / "kanban.db"
+    kb._INITIALIZED_PATHS.discard(str(db_path.resolve()))
+    with kb.connect_closing(db_path=db_path) as conn:
+        tid = kb.create_task(conn, title="closing-cm test")
+        task = kb.get_task(conn, tid)
+        assert task is not None
+        assert task.title == "closing-cm test"
+
+
+def test_bare_connect_does_not_close_on_context_exit(tmp_path):
+    """Document the leak that connect_closing exists to prevent.
+
+    sqlite3.Connection's __exit__ commits/rollbacks but doesn't close.
+    This is the upstream behaviour we cannot change; the regression
+    guard is to make sure connect_closing() does the right thing.
+    """
+    db_path = tmp_path / "kanban.db"
+    kb._INITIALIZED_PATHS.discard(str(db_path.resolve()))
+    with kb.connect(db_path=db_path) as conn:
+        pass
+    # Still usable after with-block exit (the leak).
+    conn.execute("SELECT 1").fetchone()
+    conn.close()  # explicit close to avoid leaking THIS test
diff --git a/tests/hermes_cli/test_kanban_decompose.py b/tests/hermes_cli/test_kanban_decompose.py
index 62937abba28..5ba17e58cae 100644
--- a/tests/hermes_cli/test_kanban_decompose.py
+++ b/tests/hermes_cli/test_kanban_decompose.py
@@ -7,14 +7,12 @@ and the assignee-fallback logic.
 
 from __future__ import annotations
 
-import argparse
 import json as jsonlib
 from pathlib import Path
 from unittest.mock import MagicMock, patch
 
 import pytest
 
-from hermes_cli import kanban as kanban_cli
 from hermes_cli import kanban_db as kb
 from hermes_cli import kanban_decompose as decomp
 
diff --git a/tests/hermes_cli/test_kanban_default_assignee.py b/tests/hermes_cli/test_kanban_default_assignee.py
new file mode 100644
index 00000000000..70b51bbdb60
--- /dev/null
+++ b/tests/hermes_cli/test_kanban_default_assignee.py
@@ -0,0 +1,154 @@
+"""Regression tests for #27145 — kanban.default_assignee for unassigned ready tasks.
+
+When the dispatcher hits an unassigned ready task and ``kanban.default_assignee``
+is set, the dispatcher applies the assignment and spawns. Without the config,
+the task is skipped (existing behavior preserved).
+"""
+from __future__ import annotations
+
+import json
+import os
+import sys
+import tempfile
+
+import pytest
+
+
+@pytest.fixture()
+def isolated_kanban_home(monkeypatch):
+    """Spin up a fresh HERMES_HOME with a clean kanban DB."""
+    test_home = tempfile.mkdtemp(prefix="kanban_default_assignee_test_")
+    monkeypatch.setenv("HERMES_HOME", test_home)
+    # Force-reimport so the fresh HERMES_HOME is picked up.
+    for mod in list(sys.modules.keys()):
+        if mod.startswith("hermes_cli") or mod.startswith("hermes_state") or mod == "hermes_constants":
+            del sys.modules[mod]
+    from hermes_cli import kanban_db
+    yield kanban_db, test_home
+    # Cleanup is best-effort; tempfile dir survives but pytest isolation
+    # gives each test its own monkeypatched HERMES_HOME so no cross-test
+    # contamination.
+
+
+def _fake_spawn(*args, **kwargs):
+    """Stand-in for the real worker spawn — returns a fake PID."""
+    return 12345
+
+
+def test_unassigned_task_skipped_without_default_assignee(isolated_kanban_home):
+    """Baseline: with no default_assignee, an unassigned ready task is
+    skipped via the existing `skipped_unassigned` bucket and the DB row
+    is untouched."""
+    kb, _home = isolated_kanban_home
+    with kb.connect_closing() as conn:
+        kb.create_board(slug="default", name="Test")
+        task_id = kb.create_task(conn, title="t1", assignee=None)
+    with kb.connect_closing() as conn:
+        res = kb.dispatch_once(conn, spawn_fn=_fake_spawn, dry_run=False)
+    assert res.skipped_unassigned == [task_id]
+    assert not res.auto_assigned_default
+    assert not res.spawned
+    with kb.connect_closing() as conn:
+        row = conn.execute("SELECT assignee FROM tasks WHERE id = ?", (task_id,)).fetchone()
+    assert row["assignee"] is None
+
+
+def test_unassigned_task_auto_assigned_with_default_assignee(isolated_kanban_home):
+    """Core #27145 contract: with default_assignee set, an unassigned ready
+    task gets the assignment applied and dispatched on the same tick. The
+    DB row is mutated (assignee column + an 'assigned' event)."""
+    kb, _home = isolated_kanban_home
+    with kb.connect_closing() as conn:
+        kb.create_board(slug="default", name="Test")
+        task_id = kb.create_task(conn, title="t1", assignee=None)
+    with kb.connect_closing() as conn:
+        res = kb.dispatch_once(
+            conn, spawn_fn=_fake_spawn, dry_run=False,
+            default_assignee="default",
+        )
+    assert res.auto_assigned_default == [task_id]
+    assert not res.skipped_unassigned
+    assert len(res.spawned) == 1
+    assert res.spawned[0][0] == task_id
+    assert res.spawned[0][1] == "default"
+
+    with kb.connect_closing() as conn:
+        row = conn.execute("SELECT assignee FROM tasks WHERE id = ?", (task_id,)).fetchone()
+    assert row["assignee"] == "default"
+
+    # 'assigned' event emitted for the audit trail
+    with kb.connect_closing() as conn:
+        evs = list(conn.execute(
+            "SELECT kind, payload FROM task_events WHERE task_id = ? AND kind = 'assigned'",
+            (task_id,),
+        ))
+    assert len(evs) == 1
+    payload = json.loads(evs[0][1])
+    assert payload["assignee"] == "default"
+    assert payload["source"] == "kanban.default_assignee"
+
+
+def test_dry_run_with_default_assignee_reports_without_mutating(isolated_kanban_home):
+    """Dry-run mode: reports what WOULD happen (task in auto_assigned_default,
+    spawn entry) but does NOT mutate the DB. Operators using
+    `hermes kanban dispatch --dry-run` see the routing decision before
+    committing."""
+    kb, _home = isolated_kanban_home
+    with kb.connect_closing() as conn:
+        kb.create_board(slug="default", name="Test")
+        task_id = kb.create_task(conn, title="t1", assignee=None)
+    with kb.connect_closing() as conn:
+        res = kb.dispatch_once(
+            conn, spawn_fn=_fake_spawn, dry_run=True,
+            default_assignee="default",
+        )
+    assert res.auto_assigned_default == [task_id]
+    assert len(res.spawned) == 1
+    with kb.connect_closing() as conn:
+        row = conn.execute("SELECT assignee FROM tasks WHERE id = ?", (task_id,)).fetchone()
+    # DB unchanged — dry_run did not commit the assignment.
+    assert row["assignee"] is None
+
+
+def test_whitespace_default_assignee_treated_as_none(isolated_kanban_home):
+    """Empty / whitespace-only default_assignee values must be treated as
+    'no fallback set' so a misconfigured kanban.default_assignee=' '
+    doesn't surprise operators by silently routing unassigned tasks."""
+    kb, _home = isolated_kanban_home
+    with kb.connect_closing() as conn:
+        kb.create_board(slug="default", name="Test")
+        task_id = kb.create_task(conn, title="t1", assignee=None)
+    with kb.connect_closing() as conn:
+        res = kb.dispatch_once(
+            conn, spawn_fn=_fake_spawn, dry_run=False,
+            default_assignee="   ",
+        )
+    assert task_id in res.skipped_unassigned
+    assert not res.auto_assigned_default
+
+
+def test_explicitly_assigned_task_untouched_by_default_assignee(isolated_kanban_home):
+    """A task with an explicit assignee must NOT be touched by the
+    default_assignee logic — that fallback only applies to genuinely
+    unassigned rows."""
+    kb, _home = isolated_kanban_home
+    with kb.connect_closing() as conn:
+        kb.create_board(slug="default", name="Test")
+        task_id = kb.create_task(conn, title="t1", assignee="default")
+    with kb.connect_closing() as conn:
+        res = kb.dispatch_once(
+            conn, spawn_fn=_fake_spawn, dry_run=False,
+            default_assignee="someother",
+        )
+    assert task_id not in res.auto_assigned_default
+    assert any(s[0] == task_id and s[1] == "default" for s in res.spawned)
+
+
+def test_dispatch_result_has_auto_assigned_default_field():
+    """Schema-level invariant: DispatchResult exposes the
+    auto_assigned_default field so CLI / dashboard / gateway can surface
+    the new routing decisions."""
+    from hermes_cli.kanban_db import DispatchResult
+    r = DispatchResult()
+    assert hasattr(r, "auto_assigned_default")
+    assert r.auto_assigned_default == []
diff --git a/tests/hermes_cli/test_kanban_notify.py b/tests/hermes_cli/test_kanban_notify.py
index 44a0bd90a03..f8109416cb5 100644
--- a/tests/hermes_cli/test_kanban_notify.py
+++ b/tests/hermes_cli/test_kanban_notify.py
@@ -298,7 +298,6 @@ def test_dispatcher_tick_does_not_call_init_db(kanban_home, monkeypatch):
     """
     import hermes_cli.kanban_db as kb
     from gateway.run import GatewayRunner
-    from unittest.mock import patch
 
     runner = object.__new__(GatewayRunner)
 
diff --git a/tests/hermes_cli/test_kanban_per_profile_cap.py b/tests/hermes_cli/test_kanban_per_profile_cap.py
new file mode 100644
index 00000000000..2cf7a3e8f21
--- /dev/null
+++ b/tests/hermes_cli/test_kanban_per_profile_cap.py
@@ -0,0 +1,167 @@
+"""Regression tests for #21582 — per-profile concurrency cap in dispatcher.
+
+When ``kanban.max_in_progress_per_profile`` is set, no single profile
+gets more than N workers running at once even if the global
+``max_in_progress`` cap would allow it. Prevents one profile's local
+model / API quota / browser pool from being overwhelmed by a fan-out.
+"""
+from __future__ import annotations
+
+import os
+import sys
+import tempfile
+
+import pytest
+
+
+@pytest.fixture()
+def isolated_kanban_home_with_profiles(monkeypatch):
+    """Spin up a fresh HERMES_HOME with kanban DB + alpha/beta profiles."""
+    test_home = tempfile.mkdtemp(prefix="kanban_per_profile_cap_test_")
+    for prof in ("alpha", "beta", "default"):
+        os.makedirs(os.path.join(test_home, "profiles", prof), exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", test_home)
+    for mod in list(sys.modules.keys()):
+        if mod.startswith("hermes_cli") or mod.startswith("hermes_state") or mod == "hermes_constants":
+            del sys.modules[mod]
+    from hermes_cli import kanban_db
+    yield kanban_db
+
+
+def _fake_spawn(*args, **kwargs):
+    return 12345
+
+
+def test_no_cap_all_tasks_dispatched(isolated_kanban_home_with_profiles):
+    """Baseline: with no per-profile cap, all ready tasks dispatch."""
+    kb = isolated_kanban_home_with_profiles
+    with kb.connect_closing() as conn:
+        kb.create_board(slug="default", name="Test")
+        for i in range(5):
+            kb.create_task(conn, title=f"a{i}", assignee="alpha")
+        for i in range(3):
+            kb.create_task(conn, title=f"b{i}", assignee="beta")
+    with kb.connect_closing() as conn:
+        res = kb.dispatch_once(conn, spawn_fn=_fake_spawn, dry_run=True)
+    assert len(res.spawned) == 8
+    assert not res.skipped_per_profile_capped
+
+
+def test_cap_2_balances_two_profiles(isolated_kanban_home_with_profiles):
+    """With cap=2: 2 alpha + 2 beta dispatched; remaining 3 alpha + 1 beta
+    deferred to skipped_per_profile_capped."""
+    kb = isolated_kanban_home_with_profiles
+    with kb.connect_closing() as conn:
+        kb.create_board(slug="default", name="Test")
+        for i in range(5):
+            kb.create_task(conn, title=f"a{i}", assignee="alpha")
+        for i in range(3):
+            kb.create_task(conn, title=f"b{i}", assignee="beta")
+    with kb.connect_closing() as conn:
+        res = kb.dispatch_once(
+            conn, spawn_fn=_fake_spawn, dry_run=True,
+            max_in_progress_per_profile=2,
+        )
+    spawn_assignees = [s[1] for s in res.spawned]
+    capped_assignees = [c[1] for c in res.skipped_per_profile_capped]
+    assert spawn_assignees.count("alpha") == 2
+    assert spawn_assignees.count("beta") == 2
+    assert capped_assignees.count("alpha") == 3
+    assert capped_assignees.count("beta") == 1
+
+
+def test_pre_existing_running_counts_against_cap(isolated_kanban_home_with_profiles):
+    """A task already in 'running' status when dispatch_once starts counts
+    toward the per-profile cap. With 1 alpha pre-running and cap=1, NO new
+    alpha tasks should spawn; beta is independent so 1 beta spawns."""
+    kb = isolated_kanban_home_with_profiles
+    with kb.connect_closing() as conn:
+        kb.create_board(slug="default", name="Test")
+        running_alpha = kb.create_task(conn, title="running alpha", assignee="alpha")
+        with kb.write_txn(conn):
+            conn.execute(
+                "UPDATE tasks SET status = 'running', claim_lock = 'test:1' WHERE id = ?",
+                (running_alpha,),
+            )
+        for i in range(2):
+            kb.create_task(conn, title=f"a{i}", assignee="alpha")
+        for i in range(2):
+            kb.create_task(conn, title=f"b{i}", assignee="beta")
+    with kb.connect_closing() as conn:
+        res = kb.dispatch_once(
+            conn, spawn_fn=_fake_spawn, dry_run=True,
+            max_in_progress_per_profile=1,
+        )
+    spawn_assignees = [s[1] for s in res.spawned]
+    capped_assignees = [c[1] for c in res.skipped_per_profile_capped]
+    assert spawn_assignees.count("alpha") == 0
+    assert spawn_assignees.count("beta") == 1
+    assert capped_assignees.count("alpha") == 2
+    assert capped_assignees.count("beta") == 1
+
+
+@pytest.mark.parametrize("cap", [0, -1, "abc", None])
+def test_invalid_cap_treated_as_no_cap(isolated_kanban_home_with_profiles, cap):
+    """Cap values that don't represent a positive int should be treated as
+    'no cap' — silently falling through rather than crashing the dispatcher."""
+    kb = isolated_kanban_home_with_profiles
+    with kb.connect_closing() as conn:
+        kb.create_board(slug="default", name="Test")
+        for i in range(3):
+            kb.create_task(conn, title=f"a{i}", assignee="alpha")
+    with kb.connect_closing() as conn:
+        res = kb.dispatch_once(
+            conn, spawn_fn=_fake_spawn, dry_run=True,
+            max_in_progress_per_profile=cap,
+        )
+    assert not res.skipped_per_profile_capped
+    assert len(res.spawned) == 3
+
+
+def test_capped_tasks_dispatched_on_subsequent_tick(isolated_kanban_home_with_profiles):
+    """A task deferred this tick because its profile was at cap should be
+    eligible for dispatch on the next tick (after running tasks complete).
+    This verifies the cap is per-tick state, not a permanent block."""
+    kb = isolated_kanban_home_with_profiles
+    with kb.connect_closing() as conn:
+        kb.create_board(slug="default", name="Test")
+        ids = [kb.create_task(conn, title=f"a{i}", assignee="alpha") for i in range(3)]
+
+    # First tick: cap=1, only 1 alpha dispatched
+    with kb.connect_closing() as conn:
+        res1 = kb.dispatch_once(
+            conn, spawn_fn=_fake_spawn, dry_run=False,
+            max_in_progress_per_profile=1,
+        )
+    assert len(res1.spawned) == 1
+    assert len(res1.skipped_per_profile_capped) == 2
+
+    # Simulate the running task completing — set it back to done so the
+    # 'running' count drops
+    spawned_id = res1.spawned[0][0]
+    with kb.connect_closing() as conn:
+        with kb.write_txn(conn):
+            conn.execute(
+                "UPDATE tasks SET status = 'done', claim_lock = NULL WHERE id = ?",
+                (spawned_id,),
+            )
+
+    # Second tick: 1 more alpha should now dispatch
+    with kb.connect_closing() as conn:
+        res2 = kb.dispatch_once(
+            conn, spawn_fn=_fake_spawn, dry_run=False,
+            max_in_progress_per_profile=1,
+        )
+    assert len(res2.spawned) == 1
+    assert len(res2.skipped_per_profile_capped) == 1
+    assert res2.spawned[0][0] != spawned_id  # different task this time
+
+
+def test_dispatch_result_has_skipped_per_profile_capped_field():
+    """Schema-level invariant: DispatchResult exposes the
+    skipped_per_profile_capped field as a list of
+    (task_id, assignee, current_running) tuples."""
+    from hermes_cli.kanban_db import DispatchResult
+    r = DispatchResult()
+    assert hasattr(r, "skipped_per_profile_capped")
+    assert r.skipped_per_profile_capped == []
diff --git a/tests/hermes_cli/test_kanban_swarm.py b/tests/hermes_cli/test_kanban_swarm.py
index 358e41d4611..eeb6f917a3c 100644
--- a/tests/hermes_cli/test_kanban_swarm.py
+++ b/tests/hermes_cli/test_kanban_swarm.py
@@ -1,4 +1,3 @@
-import json
 
 from hermes_cli import kanban_db as kb
 from hermes_cli.kanban_swarm import (
diff --git a/tests/hermes_cli/test_kanban_worker_image_extraction.py b/tests/hermes_cli/test_kanban_worker_image_extraction.py
new file mode 100644
index 00000000000..c0724a2904d
--- /dev/null
+++ b/tests/hermes_cli/test_kanban_worker_image_extraction.py
@@ -0,0 +1,238 @@
+"""Worker-side image enrichment for kanban tasks.
+
+When a kanban task body contains a local image path or an ``http(s)://``
+image URL, the worker must surface that image to the model on its first
+user turn — matching the CLI/gateway behaviour for inbound images.
+
+The dispatcher spawns the worker as
+``hermes -p <profile> chat -q "work kanban task <id>"``. The task body
+itself never appears in argv; the worker has to read it from the kanban
+DB during startup. These tests cover the round-trip:
+
+  task body  →  kanban_db.get_task  →  extract_image_refs  →
+  build_native_content_parts  →  multimodal user turn
+"""
+from __future__ import annotations
+
+import base64
+from pathlib import Path
+
+import pytest
+
+from hermes_cli import kanban_db as kb
+from agent.image_routing import (
+    build_native_content_parts,
+    extract_image_refs,
+)
+
+
+# Tiny 1×1 transparent PNG used to back any path the tests stick into a
+# task body. extract_image_refs validates the path exists on disk, so the
+# byte content has to be a real readable file (any image bytes will do).
+_PNG = base64.b64decode(
+    "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGNgYGBgAAAABQABpfZFQAAAAABJRU5ErkJggg=="
+)
+
+
+@pytest.fixture
+def kanban_home(tmp_path: Path, monkeypatch):
+    """Isolated HERMES_HOME with a fresh kanban DB for each test."""
+    home = tmp_path / ".hermes"
+    home.mkdir()
+    monkeypatch.setenv("HERMES_HOME", str(home))
+    monkeypatch.setattr(Path, "home", lambda: tmp_path)
+    kb.init_db()
+    return home
+
+
+def _add_task_with_body(body: str, *, title: str = "Look at this") -> str:
+    conn = kb.connect()
+    try:
+        task_id = kb.create_task(
+            conn,
+            title=title,
+            body=body,
+            assignee="worker-a",
+            tenant=None,
+        )
+    finally:
+        conn.close()
+    return task_id
+
+
+def _read_body(task_id: str) -> str:
+    conn = kb.connect()
+    try:
+        task = kb.get_task(conn, task_id)
+        return (task.body if task is not None else "") or ""
+    finally:
+        conn.close()
+
+
+class TestExtractFromTaskBody:
+    """Read a real kanban task body and run it through extract_image_refs."""
+
+    def test_local_path_in_body_round_trips(self, kanban_home, tmp_path):
+        img = tmp_path / "screenshot.png"
+        img.write_bytes(_PNG)
+        tid = _add_task_with_body(
+            f"Please review the screenshot at {img} and confirm "
+            "the alignment is right."
+        )
+
+        body = _read_body(tid)
+        paths, urls = extract_image_refs(body)
+        assert paths == [str(img)]
+        assert urls == []
+
+    def test_url_in_body_round_trips(self, kanban_home):
+        tid = _add_task_with_body(
+            "The design lives at https://example.com/mock/v3.png — "
+            "make the implementation match it."
+        )
+
+        body = _read_body(tid)
+        paths, urls = extract_image_refs(body)
+        assert paths == []
+        assert urls == ["https://example.com/mock/v3.png"]
+
+    def test_mixed_path_and_url_in_body(self, kanban_home, tmp_path):
+        img = tmp_path / "current.png"
+        img.write_bytes(_PNG)
+        tid = _add_task_with_body(
+            f"Compare the current screenshot {img} against the design at "
+            "https://example.com/target.png and write a diff."
+        )
+
+        body = _read_body(tid)
+        paths, urls = extract_image_refs(body)
+        assert paths == [str(img)]
+        assert urls == ["https://example.com/target.png"]
+
+    def test_body_without_images_yields_nothing(self, kanban_home):
+        tid = _add_task_with_body(
+            "Refactor the auth module to use the new session helper."
+        )
+
+        body = _read_body(tid)
+        paths, urls = extract_image_refs(body)
+        assert paths == []
+        assert urls == []
+
+    def test_empty_body_is_safe(self, kanban_home):
+        tid = _add_task_with_body("")
+
+        body = _read_body(tid)
+        paths, urls = extract_image_refs(body)
+        assert paths == []
+        assert urls == []
+
+
+class TestBuildPartsFromTaskBody:
+    """Verify the full pipeline produces a multimodal user turn."""
+
+    def test_local_path_becomes_native_image_part(self, kanban_home, tmp_path):
+        img = tmp_path / "design.png"
+        img.write_bytes(_PNG)
+        tid = _add_task_with_body(f"Check out {img} — what's broken?")
+        body = _read_body(tid)
+        paths, urls = extract_image_refs(body)
+
+        # Mirrors the cli.py wiring: pass the worker's literal -q argument
+        # (the dispatcher uses ``"work kanban task <id>"``) plus the
+        # extracted refs through build_native_content_parts.
+        parts, skipped = build_native_content_parts(
+            f"work kanban task {tid}",
+            paths,
+            image_urls=urls or None,
+        )
+
+        assert skipped == []
+        # text part + one image_url part
+        assert len(parts) == 2
+        assert parts[0]["type"] == "text"
+        assert parts[0]["text"].startswith(f"work kanban task {tid}")
+        assert f"[Image attached at: {img}]" in parts[0]["text"]
+        assert parts[1]["type"] == "image_url"
+        assert parts[1]["image_url"]["url"].startswith("data:image/png;base64,")
+
+    def test_url_becomes_image_url_part(self, kanban_home):
+        tid = _add_task_with_body(
+            "Reference: https://example.com/target.jpg — match it."
+        )
+        body = _read_body(tid)
+        paths, urls = extract_image_refs(body)
+
+        parts, skipped = build_native_content_parts(
+            f"work kanban task {tid}",
+            paths,
+            image_urls=urls or None,
+        )
+
+        assert skipped == []
+        assert len(parts) == 2
+        assert parts[0]["type"] == "text"
+        assert "[Image attached: https://example.com/target.jpg]" in parts[0]["text"]
+        assert parts[1] == {
+            "type": "image_url",
+            "image_url": {"url": "https://example.com/target.jpg"},
+        }
+
+    def test_body_with_both_yields_two_image_parts(self, kanban_home, tmp_path):
+        img = tmp_path / "local.png"
+        img.write_bytes(_PNG)
+        tid = _add_task_with_body(
+            f"Diff {img} vs https://example.com/target.png — explain it."
+        )
+        body = _read_body(tid)
+        paths, urls = extract_image_refs(body)
+
+        parts, skipped = build_native_content_parts(
+            f"work kanban task {tid}",
+            paths,
+            image_urls=urls or None,
+        )
+
+        assert skipped == []
+        image_parts = [p for p in parts if p.get("type") == "image_url"]
+        assert len(image_parts) == 2
+        # Local file is embedded as a data URL; remote URL passes through.
+        assert image_parts[0]["image_url"]["url"].startswith("data:image/png;base64,")
+        assert image_parts[1]["image_url"]["url"] == "https://example.com/target.png"
+
+    def test_body_with_no_images_leaves_query_untouched(self, kanban_home):
+        tid = _add_task_with_body(
+            "Rewrite the README intro paragraph to focus on use cases."
+        )
+        body = _read_body(tid)
+        paths, urls = extract_image_refs(body)
+
+        parts, skipped = build_native_content_parts(
+            f"work kanban task {tid}",
+            paths,
+            image_urls=urls or None,
+        )
+
+        # No images → plain text-only return (single part, no list mutation).
+        assert skipped == []
+        assert len(parts) == 1
+        assert parts[0]["type"] == "text"
+        assert parts[0]["text"] == f"work kanban task {tid}"
+
+    def test_code_block_example_is_not_attached(self, kanban_home, tmp_path):
+        # Only the real image outside the fenced code block should attach.
+        real = tmp_path / "real.png"
+        real.write_bytes(_PNG)
+        tid = _add_task_with_body(
+            f"Real screenshot:\n{real}\n\n"
+            "Example we DON'T want attached:\n"
+            "```\n"
+            "image: /tmp/example_only.png\n"
+            "url: https://example.com/example.png\n"
+            "```\n"
+        )
+        body = _read_body(tid)
+        paths, urls = extract_image_refs(body)
+
+        assert paths == [str(real)]
+        assert urls == []
diff --git a/tests/hermes_cli/test_logs.py b/tests/hermes_cli/test_logs.py
index 398c7faf50d..52fa63e3ec9 100644
--- a/tests/hermes_cli/test_logs.py
+++ b/tests/hermes_cli/test_logs.py
@@ -1,10 +1,7 @@
 """Tests for hermes_cli.logs — log viewing and filtering."""
 
-import os
 from datetime import datetime, timedelta
-from pathlib import Path
 
-import pytest
 
 from hermes_cli.logs import (
     LOG_FILES,
diff --git a/tests/hermes_cli/test_mcp_catalog.py b/tests/hermes_cli/test_mcp_catalog.py
index 13dcf50653b..bb15c48ce8c 100644
--- a/tests/hermes_cli/test_mcp_catalog.py
+++ b/tests/hermes_cli/test_mcp_catalog.py
@@ -7,7 +7,6 @@ launch an MCP is mocked.
 
 from __future__ import annotations
 
-import os
 from pathlib import Path
 from unittest.mock import patch
 
@@ -207,7 +206,7 @@ class TestManifestParsing:
 class TestInstall:
     def test_install_simple_stdio_writes_config(self, catalog_dir):
         _write_manifest(catalog_dir, "demo", _basic_manifest())
-        from hermes_cli.mcp_catalog import install_entry, get_entry
+        from hermes_cli.mcp_catalog import install_entry
         from hermes_cli.config import load_config
 
         install_entry(_entry("demo"), enable=True)
@@ -240,7 +239,7 @@ class TestInstall:
         fake_clone.mkdir()
 
         from hermes_cli import mcp_catalog
-        from hermes_cli.mcp_catalog import install_entry, get_entry
+        from hermes_cli.mcp_catalog import install_entry
         from hermes_cli.config import load_config
 
         with patch.object(mcp_catalog, "_do_git_install", return_value=fake_clone):
@@ -263,7 +262,7 @@ class TestInstall:
 
         monkeypatch.setattr(mcp_catalog, "_prompt_input", lambda *a, **kw: "secret-val")
 
-        from hermes_cli.mcp_catalog import install_entry, get_entry
+        from hermes_cli.mcp_catalog import install_entry
         from hermes_cli.config import get_env_value, load_config
 
         install_entry(_entry("demo"), enable=True)
@@ -278,7 +277,7 @@ class TestInstall:
         )
         _write_manifest(catalog_dir, "demo", body)
 
-        from hermes_cli.mcp_catalog import install_entry, get_entry
+        from hermes_cli.mcp_catalog import install_entry
         from hermes_cli.config import load_config
 
         install_entry(_entry("demo"), enable=True)
@@ -297,7 +296,7 @@ class TestInstall:
         _write_manifest(catalog_dir, "demo", body)
 
         from hermes_cli import mcp_catalog
-        from hermes_cli.mcp_catalog import install_entry, get_entry, CatalogError
+        from hermes_cli.mcp_catalog import install_entry, CatalogError
 
         # User hits enter — empty input, no default
         monkeypatch.setattr(mcp_catalog, "_prompt_input", lambda *a, **kw: "")
@@ -314,7 +313,7 @@ class TestInstall:
 class TestUninstall:
     def test_uninstall_removes_server_block(self, catalog_dir):
         _write_manifest(catalog_dir, "demo", _basic_manifest())
-        from hermes_cli.mcp_catalog import install_entry, get_entry, uninstall_entry
+        from hermes_cli.mcp_catalog import install_entry, uninstall_entry
         from hermes_cli.config import load_config
 
         install_entry(_entry("demo"), enable=True)
diff --git a/tests/hermes_cli/test_mcp_config.py b/tests/hermes_cli/test_mcp_config.py
index e136f1b3c0f..d52d3ed0e14 100644
--- a/tests/hermes_cli/test_mcp_config.py
+++ b/tests/hermes_cli/test_mcp_config.py
@@ -6,12 +6,7 @@ any actual MCP servers or API keys.
 """
 
 import argparse
-import json
-import os
-import types
 from pathlib import Path
-from typing import Any, Dict, List
-from unittest.mock import MagicMock, patch, PropertyMock
 
 import pytest
 
@@ -600,3 +595,58 @@ class TestMcpLogin:
         out = capsys.readouterr().out
         assert "no URL" in out or "not an OAuth" in out
 
+    def test_login_false_success_no_token(self, tmp_path, capsys, monkeypatch):
+        """Probe lists tools without auth (Google Drive), but no token landed.
+
+        The server allows tools/list without auth (DCR 400'd), so the probe
+        succeeds yet no OAuth token exists. Login must NOT claim success — it
+        should warn and point the user at pre-registered client_id config.
+        """
+        _seed_config(tmp_path, {
+            "googledrive": {
+                "url": "https://drivemcp.googleapis.com/mcp/v1",
+                "auth": "oauth",
+            },
+        })
+        # Probe returns tools even though auth never completed.
+        monkeypatch.setattr(
+            "hermes_cli.mcp_config._probe_single_server",
+            lambda name, cfg: [("search_files", "d"), ("read_file_content", "d")],
+        )
+        # No token file is created → _oauth_tokens_present() returns False.
+        from hermes_cli.mcp_config import cmd_mcp_login
+
+        cmd_mcp_login(_make_args(name="googledrive"))
+        out = capsys.readouterr().out
+
+        assert "no OAuth token was obtained" in out
+        assert "Authenticated" not in out
+        assert "client_id" in out
+
+    def test_login_genuine_success_with_token(self, tmp_path, capsys, monkeypatch):
+        """Probe lists tools AND a token exists → report real success."""
+        _seed_config(tmp_path, {
+            "realserver": {"url": "https://mcp.example.com/mcp", "auth": "oauth"},
+        })
+        token_dir = tmp_path / "mcp-tokens"
+
+        # cmd_mcp_login wipes tokens before probing, then the real OAuth flow
+        # writes a fresh token during the probe. Simulate that: the mocked
+        # probe drops a token file, mirroring a successful authorization.
+        def mock_probe(name, cfg):
+            token_dir.mkdir(exist_ok=True)
+            (token_dir / "realserver.json").write_text('{"access_token": "x"}')
+            return [("a", "d"), ("b", "d"), ("c", "d")]
+
+        monkeypatch.setattr(
+            "hermes_cli.mcp_config._probe_single_server", mock_probe
+        )
+
+        from hermes_cli.mcp_config import cmd_mcp_login
+
+        cmd_mcp_login(_make_args(name="realserver"))
+        out = capsys.readouterr().out
+
+        assert "Authenticated — 3 tool(s) available" in out
+        assert "no OAuth token" not in out
+
diff --git a/tests/hermes_cli/test_mcp_reload_confirm_gate.py b/tests/hermes_cli/test_mcp_reload_confirm_gate.py
index 871f46fe7e1..a7d949e765b 100644
--- a/tests/hermes_cli/test_mcp_reload_confirm_gate.py
+++ b/tests/hermes_cli/test_mcp_reload_confirm_gate.py
@@ -10,7 +10,6 @@ run silently.
 
 from __future__ import annotations
 
-from copy import deepcopy
 
 from hermes_cli.config import DEFAULT_CONFIG
 
diff --git a/tests/hermes_cli/test_mcp_tools_config.py b/tests/hermes_cli/test_mcp_tools_config.py
index ada221a3ddc..e3b73231ca9 100644
--- a/tests/hermes_cli/test_mcp_tools_config.py
+++ b/tests/hermes_cli/test_mcp_tools_config.py
@@ -1,7 +1,6 @@
 """Tests for MCP tools interactive configuration in hermes_cli.tools_config."""
 
-from types import SimpleNamespace
-from unittest.mock import MagicMock, patch
+from unittest.mock import patch
 
 from hermes_cli.tools_config import _configure_mcp_tools_interactive
 
diff --git a/tests/hermes_cli/test_memory_reset.py b/tests/hermes_cli/test_memory_reset.py
index 48f1cfda6a7..34b7f53caa1 100644
--- a/tests/hermes_cli/test_memory_reset.py
+++ b/tests/hermes_cli/test_memory_reset.py
@@ -8,10 +8,7 @@ Covers:
 - Profile-scoped reset (uses HERMES_HOME)
 """
 
-import os
 import pytest
-from argparse import Namespace
-from pathlib import Path
 
 
 @pytest.fixture
@@ -39,7 +36,7 @@ def _run_memory_reset(target="all", yes=False, monkeypatch=None, confirm_input="
 
     Simulates what happens when `hermes memory reset` is run.
     """
-    from hermes_constants import get_hermes_home, display_hermes_home
+    from hermes_constants import get_hermes_home
 
     mem_dir = get_hermes_home() / "memories"
     files_to_reset = []
diff --git a/tests/hermes_cli/test_model_catalog.py b/tests/hermes_cli/test_model_catalog.py
index d4a4b7237a8..43ad6e42cad 100644
--- a/tests/hermes_cli/test_model_catalog.py
+++ b/tests/hermes_cli/test_model_catalog.py
@@ -172,6 +172,90 @@ class TestFetchFailure:
         assert result == manifest
 
 
+class TestFallbackChain:
+    """``_fetch_manifest_with_fallback`` walks ``DEFAULT_CATALOG_FALLBACK_URLS``
+    when the primary URL fails. Regression: the Docusaurus site behind Vercel
+    occasionally returns HTTP 403 + x-vercel-mitigated: challenge for urllib;
+    without a fallback URL the user's disk cache freezes and new model
+    releases (opus 4.8, etc.) never reach the picker.
+    """
+
+    PRIMARY = "https://hermes-agent.nousresearch.com/docs/api/model-catalog.json"
+    FALLBACK = (
+        "https://raw.githubusercontent.com/NousResearch/hermes-agent"
+        "/main/website/static/api/model-catalog.json"
+    )
+
+    def test_uses_primary_when_it_succeeds(self, isolated_home):
+        from hermes_cli import model_catalog
+        calls: list[str] = []
+
+        def fake_fetch(url, timeout):
+            calls.append(url)
+            return _valid_manifest()
+
+        with patch.object(model_catalog, "_fetch_manifest", side_effect=fake_fetch):
+            result = model_catalog._fetch_manifest_with_fallback(self.PRIMARY, 5.0)
+
+        assert result is not None
+        assert calls == [self.PRIMARY], "fallback URLs must not be touched on primary success"
+
+    def test_falls_through_to_raw_github_on_primary_failure(self, isolated_home):
+        from hermes_cli import model_catalog
+        calls: list[str] = []
+
+        def fake_fetch(url, timeout):
+            calls.append(url)
+            if url == self.PRIMARY:
+                return None  # simulate Vercel 403
+            return _valid_manifest()
+
+        with patch.object(model_catalog, "_fetch_manifest", side_effect=fake_fetch):
+            result = model_catalog._fetch_manifest_with_fallback(self.PRIMARY, 5.0)
+
+        assert result is not None
+        assert calls == [self.PRIMARY, self.FALLBACK]
+
+    def test_returns_none_when_all_urls_fail(self, isolated_home):
+        from hermes_cli import model_catalog
+
+        with patch.object(model_catalog, "_fetch_manifest", return_value=None) as fetch:
+            result = model_catalog._fetch_manifest_with_fallback(self.PRIMARY, 5.0)
+
+        assert result is None
+        # Primary + every fallback URL was attempted exactly once.
+        assert fetch.call_count == 1 + len(model_catalog.DEFAULT_CATALOG_FALLBACK_URLS)
+
+    def test_dedupes_when_primary_equals_fallback(self, isolated_home):
+        """Operator who configured ``model_catalog.url`` to the raw GitHub URL
+        should not get a duplicate fetch from the fallback list."""
+        from hermes_cli import model_catalog
+
+        with patch.object(model_catalog, "_fetch_manifest", return_value=None) as fetch:
+            model_catalog._fetch_manifest_with_fallback(self.FALLBACK, 5.0)
+
+        assert fetch.call_count == 1, f"expected 1 call, got {fetch.call_count}"
+
+    def test_get_catalog_uses_fallback_chain(self, isolated_home):
+        """End-to-end: ``get_catalog`` routes through the fallback helper so
+        a primary URL failure transparently produces a working catalog."""
+        from hermes_cli import model_catalog
+        manifest = _valid_manifest()
+        calls: list[str] = []
+
+        def fake_fetch(url, timeout):
+            calls.append(url)
+            if url == self.PRIMARY:
+                return None
+            return manifest
+
+        with patch.object(model_catalog, "_fetch_manifest", side_effect=fake_fetch):
+            result = model_catalog.get_catalog(force_refresh=True)
+
+        assert result == manifest
+        assert self.FALLBACK in calls
+
+
 class TestCuratedAccessors:
     def test_openrouter_returns_tuples(self, isolated_home):
         from hermes_cli import model_catalog
diff --git a/tests/hermes_cli/test_model_normalize.py b/tests/hermes_cli/test_model_normalize.py
index f2a4bf3d684..7e4a6d22e87 100644
--- a/tests/hermes_cli/test_model_normalize.py
+++ b/tests/hermes_cli/test_model_normalize.py
@@ -8,7 +8,6 @@ import pytest
 from hermes_cli.model_normalize import (
     normalize_model_for_provider,
     _DOT_TO_HYPHEN_PROVIDERS,
-    _AGGREGATOR_PROVIDERS,
     _normalize_for_deepseek,
     detect_vendor,
 )
diff --git a/tests/hermes_cli/test_model_provider_persistence.py b/tests/hermes_cli/test_model_provider_persistence.py
index 0b350ba9adb..aef758f099e 100644
--- a/tests/hermes_cli/test_model_provider_persistence.py
+++ b/tests/hermes_cli/test_model_provider_persistence.py
@@ -6,7 +6,6 @@ isinstance(model, dict)) to silently fail — leaving the provider unset and
 falling back to auto-detection.
 """
 
-import os
 from unittest.mock import patch, MagicMock
 
 import pytest
@@ -194,7 +193,6 @@ class TestProviderPersistsAfterModelSave:
         # Patch fetch_api_models so the named custom flow returns one model;
         # patch simple_term_menu to force the input() fallback; patch input to
         # auto-select the first model from the fallback prompt.
-        from unittest.mock import MagicMock
         fake_menu_module = MagicMock()
         fake_menu_module.TerminalMenu.side_effect = OSError("no tty in test")
         with patch("hermes_cli.auth._save_model_choice"), \
diff --git a/tests/hermes_cli/test_model_switch_custom_providers.py b/tests/hermes_cli/test_model_switch_custom_providers.py
index 4d88942b3fd..8ef865ee33f 100644
--- a/tests/hermes_cli/test_model_switch_custom_providers.py
+++ b/tests/hermes_cli/test_model_switch_custom_providers.py
@@ -403,6 +403,44 @@ def test_list_authenticated_providers_same_url_different_keys_disambiguated(monk
     assert models["custom:openai-2"] == ["gpt-4.6"]
 
 
+def test_list_authenticated_providers_same_url_different_key_env_and_api_mode_stay_separate(monkeypatch):
+    """Same gateway host but different key_env/api_mode entries are distinct providers."""
+    monkeypatch.setattr("agent.models_dev.fetch_models_dev", lambda: {})
+    monkeypatch.setattr(providers_mod, "HERMES_OVERLAYS", {})
+
+    providers = list_authenticated_providers(
+        current_provider="custom:gpt",
+        current_base_url="https://gateway.example.com",
+        user_providers={},
+        custom_providers=[
+            {
+                "name": "gpt",
+                "base_url": "https://gateway.example.com",
+                "key_env": "GPT_KEY",
+                "api_mode": "codex_responses",
+                "model": "gpt-5.5",
+            },
+            {
+                "name": "claude",
+                "base_url": "https://gateway.example.com",
+                "key_env": "CLAUDE_KEY",
+                "api_mode": "anthropic_messages",
+                "model": "claude-opus-4-8",
+            },
+        ],
+        max_models=50,
+    )
+
+    custom = [p for p in providers if p.get("is_user_defined")]
+    by_slug = {p["slug"]: p for p in custom}
+
+    assert set(by_slug) == {"custom:gpt", "custom:claude"}
+    assert by_slug["custom:gpt"]["models"] == ["gpt-5.5"]
+    assert by_slug["custom:claude"]["models"] == ["claude-opus-4-8"]
+    assert by_slug["custom:gpt"]["is_current"] is True
+    assert by_slug["custom:claude"]["is_current"] is False
+
+
 def test_list_authenticated_providers_total_models_reflects_grouped_count(monkeypatch):
     """After grouping six entries into one row, total_models must reflect
     the full count, and every grouped model appears in the list."""
diff --git a/tests/hermes_cli/test_models.py b/tests/hermes_cli/test_models.py
index f4edcaf2af6..db96a6558d7 100644
--- a/tests/hermes_cli/test_models.py
+++ b/tests/hermes_cli/test_models.py
@@ -2,6 +2,7 @@
 
 from unittest.mock import patch, MagicMock
 
+from hermes_cli.nous_account import NousPortalAccountInfo
 from hermes_cli.models import (
     OPENROUTER_MODELS, fetch_openrouter_models, model_ids, detect_provider_for_model,
     is_nous_free_tier, partition_nous_models_by_tier,
@@ -308,6 +309,15 @@ class TestDetectProviderForModel:
 class TestIsNousFreeTier:
     """Tests for is_nous_free_tier — account tier detection."""
 
+    def test_paid_service_access_allowed_true_is_not_free(self):
+        assert is_nous_free_tier({"paid_service_access": {"allowed": True}}) is False
+
+    def test_paid_service_access_allowed_false_is_free(self):
+        assert is_nous_free_tier({"paid_service_access": {"allowed": False}}) is True
+
+    def test_paid_service_access_paid_access_fallback(self):
+        assert is_nous_free_tier({"paid_service_access": {"paid_access": False}}) is True
+
     def test_paid_plus_tier(self):
         assert is_nous_free_tier({"subscription": {"plan": "Plus", "tier": 2, "monthly_charge": 20}}) is False
 
@@ -657,39 +667,58 @@ class TestCheckNousFreeTierCache:
     def teardown_method(self):
         _models_mod._free_tier_cache = None
 
-    @patch("hermes_cli.models.fetch_nous_account_tier")
-    @patch("hermes_cli.models.is_nous_free_tier", return_value=True)
-    def test_result_is_cached(self, mock_is_free, mock_fetch):
-        """Second call within TTL returns cached result without API call."""
-        mock_fetch.return_value = {"subscription": {"monthly_charge": 0}}
-        with patch("hermes_cli.auth.get_provider_auth_state", return_value={"access_token": "tok"}), \
-             patch("hermes_cli.auth.resolve_nous_runtime_credentials"):
-            result1 = check_nous_free_tier()
-            result2 = check_nous_free_tier()
+    @patch("hermes_cli.nous_account.get_nous_portal_account_info")
+    def test_result_is_cached(self, mock_account):
+        """Second call within TTL returns cached result without account lookup."""
+        mock_account.return_value = NousPortalAccountInfo(
+            logged_in=True,
+            source="jwt",
+            fresh=False,
+            paid_service_access=False,
+        )
+        result1 = check_nous_free_tier()
+        result2 = check_nous_free_tier()
 
         assert result1 is True
         assert result2 is True
-        assert mock_fetch.call_count == 1
+        assert mock_account.call_count == 1
 
-    @patch("hermes_cli.models.fetch_nous_account_tier")
-    @patch("hermes_cli.models.is_nous_free_tier", return_value=False)
-    def test_cache_expires_after_ttl(self, mock_is_free, mock_fetch):
-        """After TTL expires, the API is called again."""
-        mock_fetch.return_value = {"subscription": {"monthly_charge": 20}}
-        with patch("hermes_cli.auth.get_provider_auth_state", return_value={"access_token": "tok"}), \
-             patch("hermes_cli.auth.resolve_nous_runtime_credentials"):
-            result1 = check_nous_free_tier()
-            assert mock_fetch.call_count == 1
+    @patch("hermes_cli.nous_account.get_nous_portal_account_info")
+    def test_cache_expires_after_ttl(self, mock_account):
+        """After TTL expires, account info is resolved again."""
+        mock_account.return_value = NousPortalAccountInfo(
+            logged_in=True,
+            source="jwt",
+            fresh=False,
+            paid_service_access=True,
+        )
+        result1 = check_nous_free_tier()
+        assert mock_account.call_count == 1
 
-            cached_result, cached_at = _models_mod._free_tier_cache
-            _models_mod._free_tier_cache = (cached_result, cached_at - _FREE_TIER_CACHE_TTL - 1)
+        cached_result, cached_at = _models_mod._free_tier_cache
+        _models_mod._free_tier_cache = (cached_result, cached_at - _FREE_TIER_CACHE_TTL - 1)
 
-            result2 = check_nous_free_tier()
-            assert mock_fetch.call_count == 2
+        result2 = check_nous_free_tier()
+        assert mock_account.call_count == 2
 
         assert result1 is False
         assert result2 is False
 
+    @patch("hermes_cli.nous_account.get_nous_portal_account_info")
+    def test_force_fresh_bypasses_cache(self, mock_account):
+        mock_account.return_value = NousPortalAccountInfo(
+            logged_in=True,
+            source="account_api",
+            fresh=True,
+            paid_service_access=True,
+        )
+
+        assert check_nous_free_tier() is False
+        assert check_nous_free_tier(force_fresh=True) is False
+
+        assert mock_account.call_count == 2
+        mock_account.assert_called_with(force_fresh=True)
+
     def test_cache_ttl_is_short(self):
         """TTL should be short enough to catch upgrades quickly (<=5 min)."""
         assert _FREE_TIER_CACHE_TTL <= 300
diff --git a/tests/hermes_cli/test_models_dev_preferred_merge.py b/tests/hermes_cli/test_models_dev_preferred_merge.py
index 0345643f368..c760f0da39f 100644
--- a/tests/hermes_cli/test_models_dev_preferred_merge.py
+++ b/tests/hermes_cli/test_models_dev_preferred_merge.py
@@ -17,10 +17,8 @@ Merging is what lets new models (e.g. ``mimo-v2.5-pro`` on opencode-go)
 appear in ``/model`` without a Hermes release.
 """
 
-import os
 from unittest.mock import patch
 
-import pytest
 
 from hermes_cli.models import (
     _MODELS_DEV_PREFERRED,
diff --git a/tests/hermes_cli/test_non_ascii_credential.py b/tests/hermes_cli/test_non_ascii_credential.py
index caac425c2b6..6f079442681 100644
--- a/tests/hermes_cli/test_non_ascii_credential.py
+++ b/tests/hermes_cli/test_non_ascii_credential.py
@@ -6,10 +6,7 @@ httpx tries to encode the Authorization header as ASCII.
 """
 
 import os
-import sys
-import tempfile
 
-import pytest
 
 from hermes_cli.config import _check_non_ascii_credential
 
diff --git a/tests/hermes_cli/test_nous_account.py b/tests/hermes_cli/test_nous_account.py
new file mode 100644
index 00000000000..9610f7a6b6a
--- /dev/null
+++ b/tests/hermes_cli/test_nous_account.py
@@ -0,0 +1,547 @@
+"""Tests for normalized Nous Portal account entitlement helpers."""
+
+from __future__ import annotations
+
+import base64
+import json
+import time
+from typing import Any
+
+import pytest
+
+from hermes_cli.nous_account import (
+    NousPaidServiceAccessInfo,
+    NousPortalAccountInfo,
+    format_nous_portal_entitlement_message,
+    get_nous_portal_account_info,
+    reset_nous_portal_account_info_cache,
+)
+
+
+def _jwt(claims: dict[str, Any]) -> str:
+    def _part(payload: dict[str, Any]) -> str:
+        raw = json.dumps(payload, separators=(",", ":")).encode()
+        return base64.urlsafe_b64encode(raw).decode().rstrip("=")
+
+    return f"{_part({'alg': 'none', 'typ': 'JWT'})}.{_part(claims)}.sig"
+
+
+def _state(token: str) -> dict[str, Any]:
+    return {
+        "access_token": token,
+        "portal_base_url": "https://portal.example.test",
+        "client_id": "hermes-cli",
+    }
+
+
+def _account_payload(
+    *,
+    allowed: bool,
+    subscription: dict[str, Any] | None,
+    subscription_credits: float,
+    purchased_credits: float,
+) -> dict[str, Any]:
+    return {
+        "user": {
+            "email": "alice@example.test",
+            "privy_did": "did:privy:alice",
+        },
+        "organisation": {
+            "id": "org_123",
+        },
+        "subscription": subscription,
+        "purchased_credits_remaining": purchased_credits,
+        "paid_service_access": {
+            "allowed": allowed,
+            "paid_access": allowed,
+            "reason": "usable_credits" if allowed else "no_usable_credits",
+            "organisation_id": "org_123",
+            "effective_at_ms": 123456789,
+            "has_active_subscription": subscription is not None,
+            "active_subscription_is_paid": bool(
+                subscription and subscription.get("monthly_charge", 0) > 0
+            ),
+            "subscription_tier": subscription.get("tier") if subscription else None,
+            "subscription_monthly_charge": (
+                subscription.get("monthly_charge") if subscription else None
+            ),
+            "subscription_credits_remaining": subscription_credits,
+            "purchased_credits_remaining": purchased_credits,
+            "total_usable_credits": subscription_credits + purchased_credits,
+        },
+    }
+
+
+@pytest.fixture(autouse=True)
+def _reset_cache():
+    reset_nous_portal_account_info_cache()
+    yield
+    reset_nous_portal_account_info_cache()
+
+
+def test_valid_jwt_with_paid_access_true(monkeypatch):
+    token = _jwt(
+        {
+            "sub": "user_123",
+            "org_id": "org_123",
+            "client_id": "hermes-cli",
+            "product_id": "nous-hermes-agent",
+            "nous_client": "hermes-agent",
+            "exp": int(time.time()) + 900,
+            "paid_access": True,
+            "subscription_tier": 2,
+        }
+    )
+    monkeypatch.setattr("hermes_cli.auth.get_provider_auth_state", lambda provider: _state(token))
+
+    info = get_nous_portal_account_info()
+
+    assert info.source == "jwt"
+    assert info.fresh is False
+    assert info.logged_in is True
+    assert info.user_id == "user_123"
+    assert info.org_id == "org_123"
+    assert info.product_id == "nous-hermes-agent"
+    assert info.paid_service_access is True
+    assert info.is_paid is True
+    assert info.is_free_tier is False
+
+
+def test_valid_jwt_with_paid_access_false(monkeypatch):
+    token = _jwt(
+        {
+            "sub": "user_123",
+            "org_id": "org_123",
+            "exp": int(time.time()) + 900,
+            "paid_access": False,
+        }
+    )
+    monkeypatch.setattr("hermes_cli.auth.get_provider_auth_state", lambda provider: _state(token))
+
+    info = get_nous_portal_account_info()
+
+    assert info.source == "jwt"
+    assert info.paid_service_access is False
+    assert info.is_paid is False
+    assert info.is_free_tier is True
+
+
+def test_valid_jwt_missing_paid_access_is_unknown_not_paid(monkeypatch):
+    token = _jwt(
+        {
+            "sub": "user_123",
+            "org_id": "org_123",
+            "exp": int(time.time()) + 900,
+        }
+    )
+    monkeypatch.setattr("hermes_cli.auth.get_provider_auth_state", lambda provider: _state(token))
+
+    info = get_nous_portal_account_info()
+
+    assert info.source == "jwt"
+    assert info.paid_service_access is None
+    assert info.is_paid is False
+    assert info.is_free_tier is False
+
+
+def test_expired_jwt_falls_back_to_fresh_account(monkeypatch):
+    token = _jwt(
+        {
+            "sub": "user_123",
+            "org_id": "org_123",
+            "exp": int(time.time()) - 60,
+            "paid_access": False,
+        }
+    )
+    payload = _account_payload(
+        allowed=True,
+        subscription={
+            "plan": "Tier 2",
+            "tier": 2,
+            "monthly_charge": 20,
+            "current_period_end": "2026-05-01T00:00:00.000Z",
+            "credits_remaining": 12.25,
+            "rollover_credits": 3.5,
+        },
+        subscription_credits=12.25,
+        purchased_credits=7.75,
+    )
+    monkeypatch.setattr("hermes_cli.auth.get_provider_auth_state", lambda provider: _state(token))
+    monkeypatch.setattr("hermes_cli.auth.resolve_nous_access_token", lambda: "fresh-token")
+    monkeypatch.setattr("hermes_cli.nous_account._fetch_nous_account_info", lambda *a, **kw: payload)
+
+    info = get_nous_portal_account_info()
+
+    assert info.source == "account_api"
+    assert info.fresh is True
+    assert info.paid_service_access is True
+    assert info.subscription is not None
+    assert info.subscription.monthly_charge == 20
+    assert info.paid_service_access_info is not None
+    assert info.paid_service_access_info.total_usable_credits == 20
+
+
+@pytest.mark.parametrize(
+    ("payload", "expected_paid"),
+    [
+        (
+            _account_payload(
+                allowed=True,
+                subscription={
+                    "plan": "Tier 2",
+                    "tier": 2,
+                    "monthly_charge": 20,
+                    "current_period_end": "2026-05-01T00:00:00.000Z",
+                    "credits_remaining": 12.25,
+                    "rollover_credits": 3.5,
+                },
+                subscription_credits=12.25,
+                purchased_credits=7.75,
+            ),
+            True,
+        ),
+        (
+            _account_payload(
+                allowed=False,
+                subscription={
+                    "plan": "Tier 2",
+                    "tier": 2,
+                    "monthly_charge": 20,
+                    "current_period_end": "2026-05-01T00:00:00.000Z",
+                    "credits_remaining": 0,
+                    "rollover_credits": 0,
+                },
+                subscription_credits=0,
+                purchased_credits=0,
+            ),
+            False,
+        ),
+        (
+            _account_payload(
+                allowed=True,
+                subscription=None,
+                subscription_credits=0,
+                purchased_credits=7.75,
+            ),
+            True,
+        ),
+        (
+            _account_payload(
+                allowed=False,
+                subscription=None,
+                subscription_credits=0,
+                purchased_credits=0,
+            ),
+            False,
+        ),
+    ],
+)
+def test_fresh_account_payload_normalization(monkeypatch, payload, expected_paid):
+    token = _jwt({"sub": "user_123", "org_id": "org_123", "exp": int(time.time()) + 900})
+    monkeypatch.setattr("hermes_cli.auth.get_provider_auth_state", lambda provider: _state(token))
+    monkeypatch.setattr("hermes_cli.auth.resolve_nous_access_token", lambda: "fresh-token")
+    monkeypatch.setattr("hermes_cli.nous_account._fetch_nous_account_info", lambda *a, **kw: payload)
+
+    info = get_nous_portal_account_info(force_fresh=True)
+
+    assert isinstance(info, NousPortalAccountInfo)
+    assert info.source == "account_api"
+    assert info.fresh is True
+    assert info.email == "alice@example.test"
+    assert info.privy_did == "did:privy:alice"
+    assert info.org_id == "org_123"
+    assert info.paid_service_access is expected_paid
+    assert info.is_paid is expected_paid
+    assert info.is_free_tier is (not expected_paid)
+
+
+def test_force_fresh_uses_account_api_even_when_jwt_is_valid(monkeypatch):
+    token = _jwt(
+        {
+            "sub": "user_123",
+            "org_id": "org_123",
+            "exp": int(time.time()) + 900,
+            "paid_access": False,
+        }
+    )
+    payload = _account_payload(
+        allowed=True,
+        subscription=None,
+        subscription_credits=0,
+        purchased_credits=5,
+    )
+    monkeypatch.setattr("hermes_cli.auth.get_provider_auth_state", lambda provider: _state(token))
+    monkeypatch.setattr("hermes_cli.auth.resolve_nous_access_token", lambda: "fresh-token")
+    monkeypatch.setattr("hermes_cli.nous_account._fetch_nous_account_info", lambda *a, **kw: payload)
+
+    info = get_nous_portal_account_info(force_fresh=True)
+
+    assert info.source == "account_api"
+    assert info.paid_service_access is True
+
+
+def test_no_oauth_token_reports_inference_key_present(monkeypatch):
+    monkeypatch.setattr("hermes_cli.auth.get_provider_auth_state", lambda provider: {})
+
+    class _Entry:
+        label = "manual-nous"
+        access_token = ""
+        agent_key = "opaque-runtime-key"
+        agent_key_expires_at = "2099-01-01T00:00:00+00:00"
+        expires_at = None
+        inference_base_url = "https://inference.example.test/v1"
+        base_url = "https://inference.example.test/v1"
+        priority = 0
+
+        @property
+        def runtime_api_key(self):
+            return self.agent_key
+
+        @property
+        def runtime_base_url(self):
+            return self.inference_base_url
+
+    class _Pool:
+        def has_credentials(self):
+            return True
+
+        def entries(self):
+            return [_Entry()]
+
+    monkeypatch.setattr("agent.credential_pool.load_pool", lambda provider: _Pool())
+
+    info = get_nous_portal_account_info()
+
+    assert info.logged_in is False
+    assert info.source == "inference_key"
+    assert info.inference_credential_present is True
+    assert info.credential_source == "pool:manual-nous"
+    assert info.paid_service_access is None
+
+
+def test_pool_oauth_entry_uses_jwt_snapshot(monkeypatch):
+    token = _jwt(
+        {
+            "sub": "user_123",
+            "org_id": "org_123",
+            "client_id": "hermes-cli",
+            "exp": int(time.time()) + 900,
+            "paid_access": True,
+        }
+    )
+    monkeypatch.setattr("hermes_cli.auth.get_provider_auth_state", lambda provider: {})
+
+    class _Entry:
+        label = "dashboard device_code"
+        auth_type = "oauth"
+        access_token = token
+        refresh_token = "refresh-token"
+        agent_key = "opaque-runtime-key"
+        agent_key_expires_at = "2099-01-01T00:00:00+00:00"
+        expires_at = "2099-01-01T00:00:00+00:00"
+        portal_base_url = "https://portal.example.test"
+        inference_base_url = "https://inference.example.test/v1"
+        base_url = "https://inference.example.test/v1"
+        priority = 0
+
+        @property
+        def runtime_api_key(self):
+            return self.agent_key
+
+        @property
+        def runtime_base_url(self):
+            return self.inference_base_url
+
+    class _Pool:
+        def has_credentials(self):
+            return True
+
+        def entries(self):
+            return [_Entry()]
+
+    monkeypatch.setattr("agent.credential_pool.load_pool", lambda provider: _Pool())
+
+    info = get_nous_portal_account_info()
+
+    assert info.logged_in is True
+    assert info.source == "jwt"
+    assert info.paid_service_access is True
+    assert info.credential_source == "pool:dashboard device_code"
+
+
+def test_pool_oauth_entry_force_fresh_uses_account_api(monkeypatch):
+    token = _jwt(
+        {
+            "sub": "user_123",
+            "org_id": "org_123",
+            "exp": int(time.time()) + 900,
+            "paid_access": False,
+        }
+    )
+    payload = _account_payload(
+        allowed=True,
+        subscription=None,
+        subscription_credits=0,
+        purchased_credits=3,
+    )
+    monkeypatch.setattr("hermes_cli.auth.get_provider_auth_state", lambda provider: {})
+    monkeypatch.setattr("hermes_cli.nous_account._fetch_nous_account_info", lambda *a, **kw: payload)
+
+    class _Entry:
+        label = "dashboard device_code"
+        auth_type = "oauth"
+        access_token = token
+        refresh_token = "refresh-token"
+        agent_key = "opaque-runtime-key"
+        agent_key_expires_at = "2099-01-01T00:00:00+00:00"
+        expires_at = "2099-01-01T00:00:00+00:00"
+        portal_base_url = "https://portal.example.test"
+        inference_base_url = "https://inference.example.test/v1"
+        base_url = "https://inference.example.test/v1"
+        priority = 0
+
+        @property
+        def runtime_api_key(self):
+            return self.agent_key
+
+        @property
+        def runtime_base_url(self):
+            return self.inference_base_url
+
+    class _Pool:
+        def has_credentials(self):
+            return True
+
+        def entries(self):
+            return [_Entry()]
+
+    monkeypatch.setattr("agent.credential_pool.load_pool", lambda provider: _Pool())
+
+    info = get_nous_portal_account_info(force_fresh=True)
+
+    assert info.logged_in is True
+    assert info.source == "account_api"
+    assert info.fresh is True
+    assert info.paid_service_access is True
+    assert info.credential_source == "pool:dashboard device_code"
+
+
+def test_entitlement_message_returns_none_for_paid_access():
+    info = NousPortalAccountInfo(
+        logged_in=True,
+        source="account_api",
+        fresh=True,
+        paid_service_access=True,
+        portal_base_url="https://portal.example.test",
+    )
+
+    assert format_nous_portal_entitlement_message(info, capability="paid models") is None
+
+
+def test_entitlement_message_for_inference_key_without_portal_login():
+    info = NousPortalAccountInfo(
+        logged_in=False,
+        source="inference_key",
+        fresh=False,
+        inference_credential_present=True,
+        portal_base_url="https://portal.example.test",
+    )
+
+    message = format_nous_portal_entitlement_message(
+        info,
+        capability="managed tools",
+    )
+
+    assert message is not None
+    assert "Nous inference credentials are configured" in message
+    assert "cannot verify your Nous Portal paid access" in message
+    assert "Log in with `hermes model`" in message
+
+
+def test_entitlement_message_for_active_paid_subscription_with_no_credits():
+    info = NousPortalAccountInfo(
+        logged_in=True,
+        source="account_api",
+        fresh=True,
+        paid_service_access=False,
+        portal_base_url="https://portal.example.test",
+        paid_service_access_info=NousPaidServiceAccessInfo(
+            allowed=False,
+            reason="no_usable_credits",
+            has_active_subscription=True,
+            active_subscription_is_paid=True,
+            subscription_credits_remaining=0,
+            purchased_credits_remaining=0,
+            total_usable_credits=0,
+        ),
+    )
+
+    message = format_nous_portal_entitlement_message(
+        info,
+        capability="managed tools",
+    )
+
+    assert message is not None
+    assert "credits are exhausted" in message
+    assert "managed tools" in message
+    assert "https://portal.example.test/billing" in message
+
+
+def test_entitlement_message_for_no_subscription_or_credits():
+    info = NousPortalAccountInfo(
+        logged_in=True,
+        source="account_api",
+        fresh=True,
+        paid_service_access=False,
+        portal_base_url="https://portal.example.test",
+        paid_service_access_info=NousPaidServiceAccessInfo(
+            allowed=False,
+            reason="no_usable_credits",
+            has_active_subscription=False,
+            subscription_credits_remaining=0,
+            purchased_credits_remaining=0,
+            total_usable_credits=0,
+        ),
+    )
+
+    message = format_nous_portal_entitlement_message(info, capability="paid models")
+
+    assert message is not None
+    assert "no active subscription or usable credits" in message
+    assert "Subscribe or add credits" in message
+
+
+def test_entitlement_message_for_unknown_entitlement_is_explicit():
+    info = NousPortalAccountInfo(
+        logged_in=True,
+        source="error",
+        fresh=False,
+        paid_service_access=None,
+        portal_base_url="https://portal.example.test",
+        error="account_api_timeout",
+    )
+
+    message = format_nous_portal_entitlement_message(info, capability="Tool Gateway")
+
+    assert message is not None
+    assert "could not verify" in message
+    assert "account_api_timeout" in message
+    assert "Run `hermes model`" in message
+
+
+def test_entitlement_message_for_account_missing():
+    info = NousPortalAccountInfo(
+        logged_in=True,
+        source="account_api",
+        fresh=True,
+        paid_service_access=False,
+        paid_service_access_info=NousPaidServiceAccessInfo(
+            allowed=False,
+            reason="account_missing",
+        ),
+    )
+
+    message = format_nous_portal_entitlement_message(info, capability="Tool Gateway")
+
+    assert message is not None
+    assert "could not find a Nous Portal account or organisation" in message
diff --git a/tests/hermes_cli/test_nous_inference_url_validation.py b/tests/hermes_cli/test_nous_inference_url_validation.py
index 4e688a59a74..e4c70786bf6 100644
--- a/tests/hermes_cli/test_nous_inference_url_validation.py
+++ b/tests/hermes_cli/test_nous_inference_url_validation.py
@@ -1,8 +1,8 @@
 """Regression tests for Nous Portal inference_base_url host-allowlist validation.
 
-A poisoned ``inference_base_url`` from the Portal refresh / agent-key-mint
-response (network MITM, malicious response injection) would otherwise be
-persisted to auth.json and forwarded the user's legitimate agent_key
+A poisoned ``inference_base_url`` from a Portal refresh response (network
+MITM, malicious response injection) would otherwise be persisted to
+auth.json and forwarded with the user's legitimate invoke JWT
 bearer on every subsequent proxy request, exfiltrating their inference
 budget and opening a response-injection channel into the IDE / chat
 client. ``_validate_nous_inference_url_from_network()`` blocks any URL
@@ -11,7 +11,7 @@ outside the allowlist at the source.
 These tests verify:
 
 1. The validator's host + scheme rules.
-2. Each of the five NETWORK call sites in ``auth.py`` calls the validator
+2. Each of the two NETWORK call sites in ``auth.py`` calls the validator
    rather than the unrestricted ``_optional_base_url`` helper.
 3. The proxy adapter applies the validator as belt-and-suspenders.
 4. The env-var override path (``NOUS_INFERENCE_BASE_URL``) is NOT
@@ -22,7 +22,6 @@ These tests verify:
 from __future__ import annotations
 
 import logging
-import pytest
 
 from hermes_cli.auth import (
     DEFAULT_NOUS_INFERENCE_URL,
@@ -125,7 +124,7 @@ class TestValidatorRules:
 
 
 class TestCallSiteWiring:
-    """Verify the validator is actually wired into all 5 NETWORK call sites.
+    """Verify the validator is actually wired into all auth.py NETWORK call sites.
 
     These are not behaviour-end-to-end tests (the surrounding code is
     several hundred lines per site with extensive HTTP mocking
@@ -162,7 +161,7 @@ class TestCallSiteWiring:
             )
 
     def test_validator_wired_at_all_known_call_sites(self):
-        """All 5 known NETWORK sites use the validator. If this count
+        """All 2 known auth.py NETWORK sites use the validator. If this count
         drops, someone removed protection; if it grows, audit the new
         site to be sure validation is appropriate."""
         source = self._read_auth_source()
@@ -172,8 +171,8 @@ class TestCallSiteWiring:
         mint_count = source.count(
             '_validate_nous_inference_url_from_network(mint_payload.get("inference_base_url"))'
         )
-        assert refresh_count == 3, f"expected 3 refresh sites, found {refresh_count}"
-        assert mint_count == 2, f"expected 2 mint sites, found {mint_count}"
+        assert refresh_count == 2, f"expected 2 refresh sites, found {refresh_count}"
+        assert mint_count == 0, f"expected 0 mint sites, found {mint_count}"
 
     def test_proxy_adapter_also_validates(self):
         """The Nous proxy adapter applies the validator as defense-in-depth
diff --git a/tests/hermes_cli/test_nous_subscription.py b/tests/hermes_cli/test_nous_subscription.py
index c1deaf77070..2c89d245301 100644
--- a/tests/hermes_cli/test_nous_subscription.py
+++ b/tests/hermes_cli/test_nous_subscription.py
@@ -1,14 +1,25 @@
 """Tests for Nous subscription feature detection."""
 
+from hermes_cli.nous_account import NousPortalAccountInfo
 from hermes_cli import nous_subscription as ns
 
 
+def _account(*, logged_in: bool, paid: bool | None = None) -> NousPortalAccountInfo:
+    return NousPortalAccountInfo(
+        logged_in=logged_in,
+        source="jwt" if logged_in else "none",
+        fresh=False,
+        paid_service_access=paid,
+    )
+
+
 def test_get_nous_subscription_features_recognizes_direct_exa_backend(monkeypatch):
     env = {"EXA_API_KEY": "exa-test"}
 
     monkeypatch.setattr(ns, "get_env_value", lambda name: env.get(name, ""))
-    monkeypatch.setattr(ns, "get_nous_auth_status", lambda: {})
-    monkeypatch.setattr(ns, "managed_nous_tools_enabled", lambda: False)
+    monkeypatch.setattr(
+        ns, "get_nous_portal_account_info", lambda: _account(logged_in=False)
+    )
     monkeypatch.setattr(ns, "_toolset_enabled", lambda config, key: key == "web")
     monkeypatch.setattr(ns, "_has_agent_browser", lambda: False)
     monkeypatch.setattr(ns, "resolve_openai_audio_api_key", lambda: "")
@@ -23,11 +34,34 @@ def test_get_nous_subscription_features_recognizes_direct_exa_backend(monkeypatc
     assert features.web.current_provider == "exa"
 
 
+def test_get_nous_subscription_features_force_fresh_forwards_account_request(monkeypatch):
+    calls = []
+
+    def fake_account_info(*, force_fresh=False):
+        calls.append(force_fresh)
+        return _account(logged_in=True, paid=True)
+
+    monkeypatch.setattr(ns, "get_env_value", lambda name: "")
+    monkeypatch.setattr(ns, "get_nous_portal_account_info", fake_account_info)
+    monkeypatch.setattr(ns, "_toolset_enabled", lambda config, key: False)
+    monkeypatch.setattr(ns, "_has_agent_browser", lambda: False)
+    monkeypatch.setattr(ns, "resolve_openai_audio_api_key", lambda: "")
+    monkeypatch.setattr(ns, "has_direct_modal_credentials", lambda: False)
+    monkeypatch.setattr(ns, "is_managed_tool_gateway_ready", lambda vendor: False)
+
+    features = ns.get_nous_subscription_features({}, force_fresh=True)
+
+    assert features.account_info is not None
+    assert features.account_info.paid_service_access is True
+    assert calls == [True]
+
+
 def test_get_nous_subscription_features_prefers_managed_modal_in_auto_mode(monkeypatch):
     monkeypatch.setattr("tools.tool_backend_helpers.managed_nous_tools_enabled", lambda: True)
     monkeypatch.setattr(ns, "get_env_value", lambda name: "")
-    monkeypatch.setattr(ns, "get_nous_auth_status", lambda: {"logged_in": True})
-    monkeypatch.setattr(ns, "managed_nous_tools_enabled", lambda: True)
+    monkeypatch.setattr(
+        ns, "get_nous_portal_account_info", lambda: _account(logged_in=True, paid=True)
+    )
     monkeypatch.setattr(ns, "_toolset_enabled", lambda config, key: key == "terminal")
     monkeypatch.setattr(ns, "_has_agent_browser", lambda: False)
     monkeypatch.setattr(ns, "resolve_openai_audio_api_key", lambda: "")
@@ -46,8 +80,9 @@ def test_get_nous_subscription_features_prefers_managed_modal_in_auto_mode(monke
 
 def test_get_nous_subscription_features_marks_browser_use_as_managed_when_gateway_ready(monkeypatch):
     monkeypatch.setattr(ns, "get_env_value", lambda name: "")
-    monkeypatch.setattr(ns, "get_nous_auth_status", lambda: {"logged_in": True})
-    monkeypatch.setattr(ns, "managed_nous_tools_enabled", lambda: True)
+    monkeypatch.setattr(
+        ns, "get_nous_portal_account_info", lambda: _account(logged_in=True, paid=True)
+    )
     monkeypatch.setattr(ns, "_toolset_enabled", lambda config, key: key == "browser")
     monkeypatch.setattr(ns, "_has_agent_browser", lambda: True)
     monkeypatch.setattr(ns, "resolve_openai_audio_api_key", lambda: "")
@@ -78,8 +113,9 @@ def test_get_nous_subscription_features_uses_direct_browserbase_when_no_managed_
     }
 
     monkeypatch.setattr(ns, "get_env_value", lambda name: env.get(name, ""))
-    monkeypatch.setattr(ns, "get_nous_auth_status", lambda: {"logged_in": True})
-    monkeypatch.setattr(ns, "managed_nous_tools_enabled", lambda: True)
+    monkeypatch.setattr(
+        ns, "get_nous_portal_account_info", lambda: _account(logged_in=True, paid=True)
+    )
     monkeypatch.setattr(ns, "_toolset_enabled", lambda config, key: key == "browser")
     monkeypatch.setattr(ns, "_has_agent_browser", lambda: True)
     monkeypatch.setattr(ns, "resolve_openai_audio_api_key", lambda: "")
@@ -103,8 +139,9 @@ def test_get_nous_subscription_features_prefers_camofox_over_managed_browser_use
     env = {"CAMOFOX_URL": "http://localhost:9377"}
 
     monkeypatch.setattr(ns, "get_env_value", lambda name: env.get(name, ""))
-    monkeypatch.setattr(ns, "get_nous_auth_status", lambda: {"logged_in": True})
-    monkeypatch.setattr(ns, "managed_nous_tools_enabled", lambda: True)
+    monkeypatch.setattr(
+        ns, "get_nous_portal_account_info", lambda: _account(logged_in=True, paid=True)
+    )
     monkeypatch.setattr(ns, "_toolset_enabled", lambda config, key: key == "browser")
     monkeypatch.setattr(ns, "_has_agent_browser", lambda: False)
     monkeypatch.setattr(ns, "resolve_openai_audio_api_key", lambda: "")
@@ -133,8 +170,9 @@ def test_get_nous_subscription_features_requires_agent_browser_for_browserbase(m
     }
 
     monkeypatch.setattr(ns, "get_env_value", lambda name: env.get(name, ""))
-    monkeypatch.setattr(ns, "get_nous_auth_status", lambda: {})
-    monkeypatch.setattr(ns, "managed_nous_tools_enabled", lambda: False)
+    monkeypatch.setattr(
+        ns, "get_nous_portal_account_info", lambda: _account(logged_in=False)
+    )
     monkeypatch.setattr(ns, "_toolset_enabled", lambda config, key: key == "browser")
     monkeypatch.setattr(ns, "_has_agent_browser", lambda: False)
     monkeypatch.setattr(ns, "resolve_openai_audio_api_key", lambda: "")
@@ -155,8 +193,9 @@ def test_get_nous_subscription_features_does_not_treat_quoted_false_as_gateway_o
     env = {"EXA_API_KEY": "exa-test"}
 
     monkeypatch.setattr(ns, "get_env_value", lambda name: env.get(name, ""))
-    monkeypatch.setattr(ns, "get_nous_auth_status", lambda: {"logged_in": True})
-    monkeypatch.setattr(ns, "managed_nous_tools_enabled", lambda: True)
+    monkeypatch.setattr(
+        ns, "get_nous_portal_account_info", lambda: _account(logged_in=True, paid=True)
+    )
     monkeypatch.setattr(ns, "_toolset_enabled", lambda config, key: key == "web")
     monkeypatch.setattr(ns, "_has_agent_browser", lambda: False)
     monkeypatch.setattr(ns, "resolve_openai_audio_api_key", lambda: "")
@@ -179,7 +218,7 @@ def test_get_gateway_eligible_tools_ignores_quoted_false_opt_in(monkeypatch):
     monkeypatch.setattr(
         ns,
         "_get_gateway_direct_credentials",
-        lambda: {"web": True, "image_gen": False, "tts": False, "browser": False},
+        lambda: {"web": True, "image_gen": False, "video_gen": False, "tts": False, "browser": False},
     )
 
     unconfigured, has_direct, already_managed = ns.get_gateway_eligible_tools(
@@ -191,4 +230,4 @@ def test_get_gateway_eligible_tools_ignores_quoted_false_opt_in(monkeypatch):
 
     assert "web" in has_direct
     assert "web" not in already_managed
-    assert set(unconfigured) == {"image_gen", "tts", "browser"}
+    assert set(unconfigured) == {"image_gen", "video_gen", "tts", "browser"}
diff --git a/tests/hermes_cli/test_ollama_cloud_auth.py b/tests/hermes_cli/test_ollama_cloud_auth.py
index 760832523cd..7e2dc5ff078 100644
--- a/tests/hermes_cli/test_ollama_cloud_auth.py
+++ b/tests/hermes_cli/test_ollama_cloud_auth.py
@@ -10,8 +10,6 @@ Covers:
 """
 
 import os
-import pytest
-from unittest.mock import patch, MagicMock
 
 
 # ---------------------------------------------------------------------------
diff --git a/tests/hermes_cli/test_ollama_cloud_provider.py b/tests/hermes_cli/test_ollama_cloud_provider.py
index e40ba8ccc86..e62aa899ff8 100644
--- a/tests/hermes_cli/test_ollama_cloud_provider.py
+++ b/tests/hermes_cli/test_ollama_cloud_provider.py
@@ -1,6 +1,5 @@
 """Tests for Ollama Cloud provider integration."""
 
-import os
 import pytest
 from unittest.mock import patch, MagicMock
 
diff --git a/tests/hermes_cli/test_overlay_slug_resolution.py b/tests/hermes_cli/test_overlay_slug_resolution.py
index c87c891f97e..2b03926250f 100644
--- a/tests/hermes_cli/test_overlay_slug_resolution.py
+++ b/tests/hermes_cli/test_overlay_slug_resolution.py
@@ -7,11 +7,9 @@ resolution in list_authenticated_providers() Section 2 must bridge this gap.
 Covers: #5223, #6492
 """
 
-import json
 import os
 from unittest.mock import patch
 
-import pytest
 
 from hermes_cli.model_switch import list_authenticated_providers
 
diff --git a/tests/hermes_cli/test_pip_install_detection.py b/tests/hermes_cli/test_pip_install_detection.py
index da3dd35e329..49df74f626e 100644
--- a/tests/hermes_cli/test_pip_install_detection.py
+++ b/tests/hermes_cli/test_pip_install_detection.py
@@ -1,4 +1,3 @@
-from pathlib import Path
 from unittest.mock import patch
 
 
@@ -60,3 +59,53 @@ def test_docker_detected_via_dockerenv(tmp_path):
 def test_recommended_update_command_docker():
     from hermes_cli.config import recommended_update_command_for_method
     assert "docker pull" in recommended_update_command_for_method("docker")
+
+
+def test_banner_warns_on_pip_install(tmp_path):
+    """The welcome banner surfaces a warning when the install method is pip."""
+    import io
+    from rich.console import Console
+    from hermes_cli import banner
+
+    hh = tmp_path / ".hermes"
+    hh.mkdir()
+    (hh / ".install_method").write_text("pip\n")
+
+    with patch("hermes_cli.config.get_hermes_home", return_value=hh), \
+         patch("hermes_constants.get_hermes_home", return_value=hh):
+        buf = io.StringIO()
+        # Wide console so the warning isn't wrapped across lines in the panel.
+        console = Console(file=buf, width=400, force_terminal=False, color_system=None)
+        banner.build_welcome_banner(
+            console, model="m", cwd="/tmp",
+            tools=[{"function": {"name": "terminal"}}],
+            enabled_toolsets=["terminal"],
+        )
+        out = buf.getvalue()
+
+    assert "officially" in out
+    assert "instability" in out
+
+
+def test_banner_no_pip_warning_on_git_install(tmp_path):
+    """Git installs must not show the pip-install warning."""
+    import io
+    from rich.console import Console
+    from hermes_cli import banner
+
+    hh = tmp_path / ".hermes"
+    hh.mkdir()
+    (hh / ".install_method").write_text("git\n")
+
+    with patch("hermes_cli.config.get_hermes_home", return_value=hh), \
+         patch("hermes_constants.get_hermes_home", return_value=hh):
+        buf = io.StringIO()
+        console = Console(file=buf, width=400, force_terminal=False, color_system=None)
+        banner.build_welcome_banner(
+            console, model="m", cwd="/tmp",
+            tools=[{"function": {"name": "terminal"}}],
+            enabled_toolsets=["terminal"],
+        )
+        out = buf.getvalue()
+
+    assert "officially" not in out
diff --git a/tests/hermes_cli/test_plugin_cli_registration.py b/tests/hermes_cli/test_plugin_cli_registration.py
index af923b96a0d..0deddc8506b 100644
--- a/tests/hermes_cli/test_plugin_cli_registration.py
+++ b/tests/hermes_cli/test_plugin_cli_registration.py
@@ -8,13 +8,9 @@ Covers:
   - Honcho register_cli() builds correct argparse tree
 """
 
-import argparse
-import os
 import sys
-from pathlib import Path
 from unittest.mock import MagicMock
 
-import pytest
 
 from hermes_cli.plugins import (
     PluginContext,
diff --git a/tests/hermes_cli/test_plugin_scanner_recursion.py b/tests/hermes_cli/test_plugin_scanner_recursion.py
index b6e26416811..7a2513e074a 100644
--- a/tests/hermes_cli/test_plugin_scanner_recursion.py
+++ b/tests/hermes_cli/test_plugin_scanner_recursion.py
@@ -14,7 +14,7 @@ from typing import Any, Dict
 import pytest
 import yaml
 
-from hermes_cli.plugins import PluginManager, PluginManifest
+from hermes_cli.plugins import PluginManager
 
 
 # ── Helpers ────────────────────────────────────────────────────────────────
diff --git a/tests/hermes_cli/test_plugins.py b/tests/hermes_cli/test_plugins.py
index 0c500297a2b..b78e8b2921d 100644
--- a/tests/hermes_cli/test_plugins.py
+++ b/tests/hermes_cli/test_plugins.py
@@ -1,7 +1,6 @@
 """Tests for the Hermes plugin system (hermes_cli.plugins)."""
 
 import logging
-import os
 import sys
 import types
 from pathlib import Path
@@ -13,17 +12,13 @@ import yaml
 from hermes_cli.plugins import (
     ENTRY_POINTS_GROUP,
     VALID_HOOKS,
-    LoadedPlugin,
     PluginContext,
     PluginManager,
     PluginManifest,
-    get_plugin_manager,
     get_plugin_command_handler,
     get_plugin_commands,
     get_pre_tool_call_block_message,
     resolve_plugin_command_result,
-    discover_plugins,
-    invoke_hook,
 )
 
 
@@ -1309,7 +1304,6 @@ class TestPluginCommandResultResolution:
         monkeypatch.setattr("hermes_cli.plugins.asyncio.get_running_loop", lambda: _Loop())
         monkeypatch.setattr("hermes_cli.plugins._PLUGIN_COMMAND_AWAIT_TIMEOUT_SECS", 0.1)
 
-        import pytest
         with pytest.raises(TimeoutError):
             resolve_plugin_command_result(_slow_handler())
 
diff --git a/tests/hermes_cli/test_plugins_cmd.py b/tests/hermes_cli/test_plugins_cmd.py
index e1948b00a17..8af16b8e18f 100644
--- a/tests/hermes_cli/test_plugins_cmd.py
+++ b/tests/hermes_cli/test_plugins_cmd.py
@@ -3,8 +3,6 @@
 from __future__ import annotations
 
 import logging
-import os
-import types
 from pathlib import Path
 from unittest.mock import MagicMock, patch
 
@@ -19,7 +17,6 @@ from hermes_cli.plugins_cmd import (
     _resolve_git_executable,
     _resolve_git_url,
     _sanitize_plugin_name,
-    plugins_command,
 )
 
 
@@ -259,7 +256,6 @@ class TestCmdInstall:
 
     def test_install_requires_identifier(self):
         from hermes_cli.plugins_cmd import cmd_install
-        import argparse
 
         with pytest.raises(SystemExit):
             cmd_install("")
@@ -433,7 +429,6 @@ class TestCopyExampleFiles:
     """Test example file copying."""
 
     def test_copies_example_files(self, tmp_path):
-        from hermes_cli.plugins_cmd import _copy_example_files
         from unittest.mock import MagicMock
 
         console = MagicMock()
@@ -449,7 +444,6 @@ class TestCopyExampleFiles:
         console.print.assert_called()
 
     def test_skips_existing_files(self, tmp_path):
-        from hermes_cli.plugins_cmd import _copy_example_files
         from unittest.mock import MagicMock
 
         console = MagicMock()
@@ -466,7 +460,6 @@ class TestCopyExampleFiles:
         assert real_file.read_text() == "existing: true"
 
     def test_handles_copy_error_gracefully(self, tmp_path):
-        from hermes_cli.plugins_cmd import _copy_example_files
         from unittest.mock import MagicMock, patch
 
         console = MagicMock()
diff --git a/tests/hermes_cli/test_plugins_cmd_list.py b/tests/hermes_cli/test_plugins_cmd_list.py
new file mode 100644
index 00000000000..1d9051c2822
--- /dev/null
+++ b/tests/hermes_cli/test_plugins_cmd_list.py
@@ -0,0 +1,88 @@
+import argparse
+import json
+
+from hermes_cli import plugins_cmd
+
+
+def _args(**kwargs):
+    defaults = {
+        "enabled": False,
+        "user": False,
+        "no_bundled": False,
+        "plain": False,
+        "json": False,
+    }
+    defaults.update(kwargs)
+    return argparse.Namespace(**defaults)
+
+
+def test_filter_plugin_entries_enabled_only():
+    entries = [
+        ("disk-cleanup", "2.0.0", "Bundled", "bundled", None),
+        ("web-search-plus", "2.2.0", "Search", "git", None),
+        ("old-plugin", "1.0.0", "Old", "user", None),
+    ]
+
+    filtered = plugins_cmd._filter_plugin_entries(
+        entries,
+        _args(enabled=True),
+        enabled={"disk-cleanup", "web-search-plus"},
+        disabled={"old-plugin"},
+    )
+
+    assert [entry[0] for entry in filtered] == ["disk-cleanup", "web-search-plus"]
+
+
+def test_filter_plugin_entries_no_bundled():
+    entries = [
+        ("disk-cleanup", "2.0.0", "Bundled", "bundled", None),
+        ("drawthings-grpc", "0.3.0", "Draw Things", "user", None),
+        ("web-search-plus", "2.2.0", "Search", "git", None),
+    ]
+
+    filtered = plugins_cmd._filter_plugin_entries(
+        entries,
+        _args(no_bundled=True),
+        enabled=set(),
+        disabled=set(),
+    )
+
+    assert [entry[0] for entry in filtered] == ["drawthings-grpc", "web-search-plus"]
+
+
+def test_cmd_list_plain_compact_output(monkeypatch, capsys):
+    entries = [
+        ("disk-cleanup", "2.0.0", "Bundled", "bundled", None),
+        ("web-search-plus", "2.2.0", "Search", "git", None),
+    ]
+    monkeypatch.setattr(plugins_cmd, "_discover_all_plugins", lambda: entries)
+    monkeypatch.setattr(plugins_cmd, "_get_enabled_set", lambda: {"web-search-plus"})
+    monkeypatch.setattr(plugins_cmd, "_get_disabled_set", lambda: set())
+
+    plugins_cmd.cmd_list(_args(plain=True, no_bundled=True))
+
+    out = capsys.readouterr().out
+    assert "web-search-plus" in out
+    assert "enabled" in out
+    assert "disk-cleanup" not in out
+    assert "Search" not in out  # plain mode stays compact, no descriptions
+
+
+def test_cmd_list_json_output(monkeypatch, capsys):
+    entries = [("web-search-plus", "2.2.0", "Search", "git", None)]
+    monkeypatch.setattr(plugins_cmd, "_discover_all_plugins", lambda: entries)
+    monkeypatch.setattr(plugins_cmd, "_get_enabled_set", lambda: {"web-search-plus"})
+    monkeypatch.setattr(plugins_cmd, "_get_disabled_set", lambda: set())
+
+    plugins_cmd.cmd_list(_args(json=True))
+
+    payload = json.loads(capsys.readouterr().out)
+    assert payload == [
+        {
+            "name": "web-search-plus",
+            "status": "enabled",
+            "version": "2.2.0",
+            "description": "Search",
+            "source": "git",
+        }
+    ]
diff --git a/tests/hermes_cli/test_profile_distribution.py b/tests/hermes_cli/test_profile_distribution.py
index cf27df91b69..235316bd843 100644
--- a/tests/hermes_cli/test_profile_distribution.py
+++ b/tests/hermes_cli/test_profile_distribution.py
@@ -10,7 +10,6 @@ mocking git would just test the mock.
 
 from __future__ import annotations
 
-import os
 from pathlib import Path
 
 import pytest
diff --git a/tests/hermes_cli/test_profile_export_credentials.py b/tests/hermes_cli/test_profile_export_credentials.py
index b26937e3512..f035f986f23 100644
--- a/tests/hermes_cli/test_profile_export_credentials.py
+++ b/tests/hermes_cli/test_profile_export_credentials.py
@@ -6,7 +6,6 @@ profiles; leaking credentials in the archive is a security issue.
 """
 
 import tarfile
-from pathlib import Path
 
 from hermes_cli.profiles import export_profile, _DEFAULT_EXPORT_EXCLUDE_ROOT
 
diff --git a/tests/hermes_cli/test_profiles.py b/tests/hermes_cli/test_profiles.py
index 4b521fa94da..22e36d42123 100644
--- a/tests/hermes_cli/test_profiles.py
+++ b/tests/hermes_cli/test_profiles.py
@@ -7,7 +7,6 @@ and shell completion generation.
 
 import json
 import io
-import os
 import tarfile
 from pathlib import Path
 from unittest.mock import patch, MagicMock
@@ -601,6 +600,114 @@ class TestAliasCollision:
         assert result is not None
         assert "reserved" in result.lower()
 
+    def test_uses_where_on_windows(self, profile_env, monkeypatch):
+        monkeypatch.setattr("sys.platform", "win32")
+        with patch("subprocess.run") as mock_run:
+            mock_run.return_value = MagicMock(returncode=1, stdout="")
+            check_alias_collision("mybot")
+        call_args = mock_run.call_args[0][0]
+        assert call_args[0] == "where"
+
+    def test_uses_which_on_posix(self, profile_env, monkeypatch):
+        monkeypatch.setattr("sys.platform", "darwin")
+        with patch("subprocess.run") as mock_run:
+            mock_run.return_value = MagicMock(returncode=1, stdout="")
+            check_alias_collision("mybot")
+        call_args = mock_run.call_args[0][0]
+        assert call_args[0] == "which"
+
+    def test_windows_checks_bat_extension(self, profile_env, monkeypatch):
+        monkeypatch.setattr("sys.platform", "win32")
+        wrapper_dir = profile_env / ".local" / "bin"
+        wrapper_dir.mkdir(parents=True, exist_ok=True)
+        bat_path = wrapper_dir / "mybot.bat"
+        bat_path.write_text("@echo off\r\nhermes -p mybot %*\r\n")
+        with patch("subprocess.run") as mock_run:
+            mock_run.return_value = MagicMock(
+                returncode=0, stdout=str(bat_path),
+            )
+            result = check_alias_collision("mybot")
+        assert result is None  # our own wrapper, safe to overwrite
+
+
+# ===================================================================
+# TestWrapperScript
+# ===================================================================
+
+class TestWrapperScript:
+    """Tests for create_wrapper_script() and remove_wrapper_script()."""
+
+    def test_creates_sh_on_posix(self, profile_env, monkeypatch):
+        monkeypatch.setattr("sys.platform", "darwin")
+        from hermes_cli.profiles import create_wrapper_script
+        wrapper = create_wrapper_script("mybot")
+        assert wrapper is not None
+        assert wrapper.name == "mybot"
+        content = wrapper.read_text()
+        assert content.startswith("#!/bin/sh")
+        assert "hermes -p mybot" in content
+
+    def test_creates_bat_on_windows(self, profile_env, monkeypatch):
+        monkeypatch.setattr("sys.platform", "win32")
+        from hermes_cli.profiles import create_wrapper_script
+        wrapper = create_wrapper_script("mybot")
+        assert wrapper is not None
+        assert wrapper.name == "mybot.bat"
+        content = wrapper.read_text()
+        assert "@echo off" in content
+        assert "hermes -p mybot" in content
+        assert "%*" in content
+
+    def test_remove_finds_bat_on_windows(self, profile_env, monkeypatch):
+        monkeypatch.setattr("sys.platform", "win32")
+        from hermes_cli.profiles import create_wrapper_script, remove_wrapper_script
+        wrapper = create_wrapper_script("mybot")
+        assert wrapper is not None
+        assert wrapper.exists()
+        removed = remove_wrapper_script("mybot")
+        assert removed is True
+        assert not wrapper.exists()
+
+    def test_remove_finds_sh_on_posix(self, profile_env, monkeypatch):
+        monkeypatch.setattr("sys.platform", "darwin")
+        from hermes_cli.profiles import create_wrapper_script, remove_wrapper_script
+        wrapper = create_wrapper_script("mybot")
+        assert wrapper is not None
+        assert wrapper.exists()
+        removed = remove_wrapper_script("mybot")
+        assert removed is True
+        assert not wrapper.exists()
+
+    def test_remove_returns_false_when_absent(self, profile_env):
+        from hermes_cli.profiles import remove_wrapper_script
+        assert remove_wrapper_script("nonexistent") is False
+
+    def test_custom_alias_target_on_posix(self, profile_env, monkeypatch):
+        # Custom alias name pointing at a differently-named profile: the file
+        # is named after the alias, the -p content references the profile.
+        monkeypatch.setattr("sys.platform", "darwin")
+        from hermes_cli.profiles import create_wrapper_script
+        wrapper = create_wrapper_script("rq", target="redqueen")
+        assert wrapper is not None
+        assert wrapper.name == "rq"
+        content = wrapper.read_text()
+        assert content.startswith("#!/bin/sh")
+        assert "hermes -p redqueen" in content
+
+    def test_custom_alias_target_on_windows(self, profile_env, monkeypatch):
+        # Regression: custom-name aliases must still produce an executable
+        # .bat (not a clobbered #!/bin/sh) on Windows.
+        monkeypatch.setattr("sys.platform", "win32")
+        from hermes_cli.profiles import create_wrapper_script
+        wrapper = create_wrapper_script("rq", target="redqueen")
+        assert wrapper is not None
+        assert wrapper.name == "rq.bat"
+        content = wrapper.read_text()
+        assert "@echo off" in content
+        assert "hermes -p redqueen" in content
+        assert "%*" in content
+        assert "#!/bin/sh" not in content
+
 
 # ===================================================================
 # TestRenameProfile
diff --git a/tests/hermes_cli/test_project_plugin_rce_bypass.py b/tests/hermes_cli/test_project_plugin_rce_bypass.py
index 7dc5ee803e2..1e12b47eb9d 100644
--- a/tests/hermes_cli/test_project_plugin_rce_bypass.py
+++ b/tests/hermes_cli/test_project_plugin_rce_bypass.py
@@ -31,7 +31,6 @@ These tests pin each layer of the new defence:
 from __future__ import annotations
 
 import json
-import os
 import sys
 from pathlib import Path
 from unittest.mock import patch
diff --git a/tests/hermes_cli/test_provider_config_validation.py b/tests/hermes_cli/test_provider_config_validation.py
index cbfffea7854..50cc283d90c 100644
--- a/tests/hermes_cli/test_provider_config_validation.py
+++ b/tests/hermes_cli/test_provider_config_validation.py
@@ -5,9 +5,7 @@ accepted as base_url, and unknown keys go unreported.
 """
 
 import logging
-from unittest.mock import patch
 
-import pytest
 
 from hermes_cli.config import _normalize_custom_provider_entry
 
diff --git a/tests/hermes_cli/test_proxy.py b/tests/hermes_cli/test_proxy.py
index 255610ae390..9559bb55573 100644
--- a/tests/hermes_cli/test_proxy.py
+++ b/tests/hermes_cli/test_proxy.py
@@ -4,7 +4,6 @@ from __future__ import annotations
 
 import asyncio
 import json
-import os
 import threading
 from pathlib import Path
 from typing import Any, Dict
@@ -106,7 +105,7 @@ def test_nous_adapter_authenticated_with_agent_key(tmp_path, monkeypatch):
 
 
 def test_nous_adapter_authenticated_with_refresh_token_only(tmp_path, monkeypatch):
-    """If access_token+refresh_token exist but no agent_key yet, we can still mint."""
+    """If access_token+refresh_token exist but no agent_key yet, we can still refresh."""
     monkeypatch.setenv("HERMES_HOME", str(tmp_path))
     _write_auth_store(tmp_path, {
         "access_token": "access-tok",
@@ -126,7 +125,7 @@ def test_nous_adapter_get_credential_uses_runtime_resolver(tmp_path, monkeypatch
     })
 
     refreshed_state = {
-        "api_key": "minted-bearer",
+        "api_key": "jwt-bearer",
         "base_url": "https://inference-api.nousresearch.com/v1",
         "expires_at": "2099-01-01T00:00:00Z",
     }
@@ -139,13 +138,13 @@ def test_nous_adapter_get_credential_uses_runtime_resolver(tmp_path, monkeypatch
         cred = adapter.get_credential()
 
     mock_resolve.assert_called_once()
-    assert cred.bearer == "minted-bearer"
+    assert cred.bearer == "jwt-bearer"
     assert cred.base_url == "https://inference-api.nousresearch.com/v1"
     assert cred.expires_at == "2099-01-01T00:00:00Z"
     assert cred.token_type == "Bearer"
 
 
-def test_nous_adapter_retry_credential_forces_legacy_mint(tmp_path, monkeypatch):
+def test_nous_adapter_retry_credential_force_refreshes_on_jwt_401(tmp_path, monkeypatch):
     monkeypatch.setenv("HERMES_HOME", str(tmp_path))
     _write_auth_store(tmp_path, {
         "access_token": "jwt-access",
@@ -155,9 +154,8 @@ def test_nous_adapter_retry_credential_forces_legacy_mint(tmp_path, monkeypatch)
         "inference_base_url": "https://inference-api.nousresearch.com/v1",
         "agent_key": "jwt-access",
     })
-
     refreshed_state = {
-        "api_key": "legacy-bearer",
+        "api_key": "fresh-jwt-bearer",
         "base_url": "https://inference-api.nousresearch.com/v1",
         "expires_at": "2099-01-01T00:00:00Z",
     }
@@ -176,11 +174,11 @@ def test_nous_adapter_retry_credential_forces_legacy_mint(tmp_path, monkeypatch)
         )
 
     assert cred is not None
-    assert cred.bearer == "legacy-bearer"
-    assert mock_resolve.call_args.kwargs["inference_auth_mode"] == "legacy"
+    assert cred.bearer == "fresh-jwt-bearer"
+    assert mock_resolve.call_args.kwargs["force_refresh"] is True
 
 
-def test_nous_adapter_retry_credential_skips_opaque_bearer(tmp_path, monkeypatch):
+def test_nous_adapter_retry_credential_skips_non_401(tmp_path, monkeypatch):
     monkeypatch.setenv("HERMES_HOME", str(tmp_path))
     _write_auth_store(tmp_path, {
         "access_token": "jwt-access",
@@ -197,7 +195,7 @@ def test_nous_adapter_retry_credential_skips_opaque_bearer(tmp_path, monkeypatch
                 bearer="opaque-bearer",
                 base_url="https://inference-api.nousresearch.com/v1",
             ),
-            status_code=401,
+            status_code=403,
         )
 
     assert cred is None
@@ -261,8 +259,8 @@ def test_nous_adapter_quarantines_terminal_refresh_failure(tmp_path, monkeypatch
     assert stored.get("credential_pool", {}).get("nous") == []
 
 
-def test_nous_adapter_get_credential_raises_when_no_agent_key_returned(tmp_path, monkeypatch):
-    """If the refresh helper succeeds but produces no agent_key, we surface a clear error."""
+def test_nous_adapter_get_credential_raises_when_no_jwt_returned(tmp_path, monkeypatch):
+    """If the refresh helper succeeds but produces no JWT, we surface a clear error."""
     monkeypatch.setenv("HERMES_HOME", str(tmp_path))
     _write_auth_store(tmp_path, {
         "access_token": "access-tok",
@@ -274,7 +272,7 @@ def test_nous_adapter_get_credential_raises_when_no_agent_key_returned(tmp_path,
         return_value={"access_token": "a", "refresh_token": "r"},
     ):
         adapter = NousPortalAdapter()
-        with pytest.raises(RuntimeError, match="did not return a usable agent_key"):
+        with pytest.raises(RuntimeError, match="did not return a usable inference JWT"):
             adapter.get_credential()
 
 
@@ -450,6 +448,122 @@ def test_xai_adapter_retry_refreshes_current_pool_entry(tmp_path, monkeypatch):
     assert retry.bearer == "new-access-token"
 
 
+def test_xai_adapter_retry_rotates_pool_entry_on_429(tmp_path, monkeypatch):
+    """429 from xAI must rotate to the next pool entry, not attempt refresh.
+
+    Pre-fix (#28932) ``get_retry_credential`` only fired on 401, so a 429
+    rate-limit response flowed back to the client unchanged AND the
+    rate-limited bearer stayed active for the next request — defeating
+    the whole point of pool rotation.
+
+    Post-fix: 429 lands on ``mark_exhausted_and_rotate`` (no refresh —
+    that's irrelevant for rate limits), stamps the 1-hour cooldown
+    via ``EXHAUSTED_TTL_429_SECONDS`` on the offending key, and
+    returns the next available credential.
+    """
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+
+    # Two pool entries so rotation has somewhere to go.
+    auth_path = tmp_path / "auth.json"
+    auth_path.write_text(json.dumps({
+        "version": 1,
+        "providers": {},
+        "credential_pool": {
+            "xai-oauth": [
+                {
+                    "id": "xai-first",
+                    "label": "xai-first",
+                    "auth_type": "oauth",
+                    "priority": 0,
+                    "source": "manual:xai_pkce",
+                    "access_token": "first-access-token",
+                    "refresh_token": "first-refresh-token",
+                    "base_url": "https://api.x.ai/v1",
+                },
+                {
+                    "id": "xai-second",
+                    "label": "xai-second",
+                    "auth_type": "oauth",
+                    "priority": 1,
+                    "source": "manual:xai_pkce",
+                    "access_token": "second-access-token",
+                    "refresh_token": "second-refresh-token",
+                    "base_url": "https://api.x.ai/v1",
+                },
+            ]
+        },
+    }))
+
+    # Refresh must NOT be called on the 429 path — guard against
+    # the fix accidentally trying to refresh-on-rate-limit.
+    def _refresh_must_not_run(*args, **kwargs):
+        raise AssertionError("refresh_xai_oauth_pure must not run on 429")
+
+    monkeypatch.setattr("hermes_cli.auth.refresh_xai_oauth_pure", _refresh_must_not_run)
+
+    adapter = XAIGrokAdapter()
+    failed = adapter.get_credential()
+    assert failed.bearer == "first-access-token", "starting bearer should be the first entry"
+
+    retry = adapter.get_retry_credential(
+        failed_credential=failed,
+        status_code=429,
+    )
+
+    assert retry is not None, "429 must rotate to next pool entry"
+    assert retry.bearer == "second-access-token", (
+        f"expected rotation to second entry, got {retry.bearer!r}"
+    )
+
+
+def test_xai_adapter_retry_returns_none_on_429_when_pool_exhausted(tmp_path, monkeypatch):
+    """Single-entry pool: 429 has nowhere to rotate to → return None
+    so the 429 flows back to the client unchanged (existing behavior
+    preserved)."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    _write_xai_pool_entry(tmp_path)  # single entry
+
+    def _refresh_must_not_run(*args, **kwargs):
+        raise AssertionError("refresh_xai_oauth_pure must not run on 429")
+
+    monkeypatch.setattr("hermes_cli.auth.refresh_xai_oauth_pure", _refresh_must_not_run)
+
+    adapter = XAIGrokAdapter()
+    failed = adapter.get_credential()
+    retry = adapter.get_retry_credential(
+        failed_credential=failed,
+        status_code=429,
+    )
+
+    assert retry is None, (
+        "single-entry pool: 429 must return None so the response "
+        "flows back to the client unchanged"
+    )
+
+
+def test_xai_adapter_retry_returns_none_for_unrelated_status(tmp_path, monkeypatch):
+    """Non-{401, 429} statuses must NOT trigger any retry — pool
+    untouched, no refresh attempted, return None immediately."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    _write_xai_pool_entry(tmp_path)
+
+    def _refresh_must_not_run(*args, **kwargs):
+        raise AssertionError("refresh_xai_oauth_pure must not run on non-retry status")
+
+    monkeypatch.setattr("hermes_cli.auth.refresh_xai_oauth_pure", _refresh_must_not_run)
+
+    adapter = XAIGrokAdapter()
+    failed = adapter.get_credential()
+    for status in (200, 400, 403, 500, 502, 503):
+        retry = adapter.get_retry_credential(
+            failed_credential=failed,
+            status_code=status,
+        )
+        assert retry is None, (
+            f"status {status} must not trigger retry, got {retry!r}"
+        )
+
+
 # ---------------------------------------------------------------------------
 # Server: path filtering + forwarding
 #
diff --git a/tests/hermes_cli/test_psutil_android_extract.py b/tests/hermes_cli/test_psutil_android_extract.py
new file mode 100644
index 00000000000..86477e427c9
--- /dev/null
+++ b/tests/hermes_cli/test_psutil_android_extract.py
@@ -0,0 +1,126 @@
+"""Regression tests for the Android psutil compatibility installer."""
+
+from __future__ import annotations
+
+import io
+import shutil
+import tarfile
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+from hermes_cli.psutil_android import (
+    MARKER,
+    REPLACEMENT,
+    PSUTIL_URL,
+    PsutilAndroidInstallError,
+    prepare_patched_psutil_sdist,
+)
+
+
+def _add_dir(tf: tarfile.TarFile, name: str) -> None:
+    info = tarfile.TarInfo(name)
+    info.type = tarfile.DIRTYPE
+    info.mode = 0o755
+    tf.addfile(info)
+
+
+def _add_file(tf: tarfile.TarFile, name: str, content: str) -> None:
+    payload = content.encode("utf-8")
+    info = tarfile.TarInfo(name)
+    info.size = len(payload)
+    info.mode = 0o644
+    tf.addfile(info, io.BytesIO(payload))
+
+
+def _build_psutil_archive(archive: Path, *, malicious_symlink: bool) -> None:
+    with tarfile.open(archive, "w:gz") as tf:
+        _add_dir(tf, "psutil-7.2.2")
+        if malicious_symlink:
+            link = tarfile.TarInfo("psutil-7.2.2/psutil")
+            link.type = tarfile.SYMTYPE
+            link.linkname = "../../outside"
+            tf.addfile(link)
+        else:
+            _add_dir(tf, "psutil-7.2.2/psutil")
+        _add_file(
+            tf,
+            "psutil-7.2.2/psutil/_common.py",
+            f"{MARKER}\n",
+        )
+
+
+def test_prepare_patched_psutil_sdist_rejects_symlink_member(tmp_path):
+    """A symlink member must be rejected before any file payload is written."""
+    archive = tmp_path / "evil.tar.gz"
+    _build_psutil_archive(archive, malicious_symlink=True)
+
+    destination = tmp_path / "extract"
+    with pytest.raises(PsutilAndroidInstallError, match="Unsupported archive member type"):
+        prepare_patched_psutil_sdist(archive, destination)
+
+    assert not (tmp_path / "outside" / "_common.py").exists()
+
+
+def test_install_psutil_android_compat_uses_patched_tree(tmp_path):
+    """Updater path should install from the patched temporary sdist tree."""
+    archive = tmp_path / "psutil.tar.gz"
+    _build_psutil_archive(archive, malicious_symlink=False)
+
+    from hermes_cli import main as hermes_main
+
+    captured: dict[str, object] = {}
+
+    def fake_urlretrieve(url: str, dest: Path):
+        assert url == PSUTIL_URL
+        shutil.copyfile(archive, dest)
+        return str(dest), None
+
+    def fake_run_install(cmd: list[str], *, env=None):
+        src_root = Path(cmd[-1])
+        captured["cmd"] = cmd
+        captured["env"] = env
+        captured["common_py"] = (src_root / "psutil" / "_common.py").read_text(
+            encoding="utf-8"
+        )
+
+    with patch("urllib.request.urlretrieve", side_effect=fake_urlretrieve), \
+         patch.object(hermes_main, "_run_install_with_heartbeat", side_effect=fake_run_install):
+        hermes_main._install_psutil_android_compat(
+            ["uv", "pip"],
+            env={"HERMES_TEST": "1"},
+        )
+
+    assert captured["cmd"][:4] == ["uv", "pip", "install", "--no-build-isolation"]
+    assert captured["env"] == {"HERMES_TEST": "1"}
+    assert REPLACEMENT in str(captured["common_py"])
+
+
+def test_install_psutil_android_script_uses_patched_tree(tmp_path, monkeypatch, capsys):
+    """Standalone installer script should reuse the same safe patched tree."""
+    archive = tmp_path / "psutil.tar.gz"
+    _build_psutil_archive(archive, malicious_symlink=False)
+
+    import scripts.install_psutil_android as installer
+
+    def fake_urlretrieve(url: str, dest: Path):
+        assert url == PSUTIL_URL
+        shutil.copyfile(archive, dest)
+        return str(dest), None
+
+    def fake_subprocess_run(cmd: list[str]):
+        src_root = Path(cmd[-1])
+        patched = (src_root / "psutil" / "_common.py").read_text(encoding="utf-8")
+        assert REPLACEMENT in patched
+        return type("RunResult", (), {"returncode": 0})()
+
+    monkeypatch.setattr(installer.sys, "argv", ["install_psutil_android.py"])
+    monkeypatch.setattr(installer, "_resolve_install_cmd", lambda *_args: ["python", "-m", "pip"])
+
+    with patch("urllib.request.urlretrieve", side_effect=fake_urlretrieve), \
+         patch.object(installer.subprocess, "run", side_effect=fake_subprocess_run):
+        assert installer.main() == 0
+
+    captured = capsys.readouterr()
+    assert "psutil installed via Android compatibility shim" in captured.out
diff --git a/tests/hermes_cli/test_regression_16767.py b/tests/hermes_cli/test_regression_16767.py
index 4aea5d64094..bbbae4df587 100644
--- a/tests/hermes_cli/test_regression_16767.py
+++ b/tests/hermes_cli/test_regression_16767.py
@@ -1,7 +1,4 @@
-import pytest
 import sys
-from unittest.mock import patch
-from pathlib import Path
 
 import hermes_cli.model_switch as ms
 from hermes_cli.model_switch import DirectAlias
diff --git a/tests/hermes_cli/test_run_with_idle_timeout.py b/tests/hermes_cli/test_run_with_idle_timeout.py
new file mode 100644
index 00000000000..37308f116a4
--- /dev/null
+++ b/tests/hermes_cli/test_run_with_idle_timeout.py
@@ -0,0 +1,67 @@
+"""Coverage for _run_with_idle_timeout — the streaming subprocess helper.
+
+Kept in a dedicated test file because the tests spawn real ``subprocess.Popen``
+instances; pytest-isolate runs each test file in its own worker process, so
+isolating these here prevents real-Popen state from racing with the
+``subprocess.run`` / ``_run_with_idle_timeout`` patches used by
+``test_web_ui_build.py``.
+
+Added for issue #33788: ``hermes update`` got stuck at "webui-build" because
+``npm run build`` ran with ``capture_output=True`` and no timeout. The helper
+fixes both halves — streams output AND idle-kills the process.
+"""
+
+import sys as _sys
+import time
+
+from hermes_cli.main import _run_with_idle_timeout
+
+
+def test_streams_output_and_returns_zero_on_success(tmp_path):
+    script = tmp_path / "ok.py"
+    script.write_text("print('line one'); print('line two')\n")
+    result = _run_with_idle_timeout(
+        [_sys.executable, str(script)], cwd=tmp_path, idle_timeout_seconds=10
+    )
+    assert result.returncode == 0
+    assert "line one" in result.stdout
+    assert "line two" in result.stdout
+
+
+def test_propagates_nonzero_exit(tmp_path):
+    script = tmp_path / "fail.py"
+    script.write_text("import sys; print('boom', file=sys.stderr); sys.exit(7)\n")
+    result = _run_with_idle_timeout(
+        [_sys.executable, str(script)], cwd=tmp_path, idle_timeout_seconds=10
+    )
+    assert result.returncode == 7
+    # stderr is merged into stdout in the helper.
+    assert "boom" in result.stdout
+
+
+def test_kills_process_on_idle_timeout(tmp_path):
+    # Sleeps without printing — exactly the failure mode users see when
+    # `npm run build` stalls. Idle timeout must terminate it.
+    script = tmp_path / "stall.py"
+    script.write_text("import time; time.sleep(30)\n")
+
+    start = time.monotonic()
+    result = _run_with_idle_timeout(
+        [_sys.executable, str(script)],
+        cwd=tmp_path,
+        idle_timeout_seconds=1,
+    )
+    elapsed = time.monotonic() - start
+    # Should have died well before the 30s sleep completes.
+    assert elapsed < 15
+    assert result.returncode != 0
+    assert "produced no output" in result.stdout
+
+
+def test_returns_127_when_binary_missing(tmp_path):
+    result = _run_with_idle_timeout(
+        ["/nonexistent/binary/does/not/exist"],
+        cwd=tmp_path,
+        idle_timeout_seconds=5,
+    )
+    assert result.returncode == 127
diff --git a/tests/hermes_cli/test_runtime_provider_resolution.py b/tests/hermes_cli/test_runtime_provider_resolution.py
index 129c21f04b2..2f89be93368 100644
--- a/tests/hermes_cli/test_runtime_provider_resolution.py
+++ b/tests/hermes_cli/test_runtime_provider_resolution.py
@@ -793,6 +793,54 @@ def test_named_custom_provider_uses_key_env_from_providers_dict(monkeypatch):
     assert resolved["model"] == "acme-large"
 
 
+def test_named_custom_provider_same_url_uses_matching_key_env_and_api_mode(monkeypatch):
+    """Named custom providers on one gateway must keep their own credentials and protocol."""
+    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+    monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+    monkeypatch.setenv("GPT_KEY", "gpt-secret")
+    monkeypatch.setenv("CLAUDE_KEY", "claude-secret")
+    monkeypatch.setattr(
+        rp,
+        "load_config",
+        lambda: {
+            "custom_providers": [
+                {
+                    "name": "gpt",
+                    "base_url": "https://gateway.example.com",
+                    "key_env": "GPT_KEY",
+                    "api_mode": "codex_responses",
+                    "model": "gpt-5.5",
+                },
+                {
+                    "name": "claude",
+                    "base_url": "https://gateway.example.com",
+                    "key_env": "CLAUDE_KEY",
+                    "api_mode": "anthropic_messages",
+                    "model": "claude-opus-4-8",
+                },
+            ],
+        },
+    )
+    monkeypatch.setattr(
+        rp,
+        "resolve_provider",
+        lambda *a, **k: (_ for _ in ()).throw(
+            AssertionError(
+                "resolve_provider should not be called for named custom providers"
+            )
+        ),
+    )
+
+    resolved = rp.resolve_runtime_provider(requested="custom:claude")
+
+    assert resolved["provider"] == "custom"
+    assert resolved["base_url"] == "https://gateway.example.com"
+    assert resolved["api_key"] == "claude-secret"
+    assert resolved["api_mode"] == "anthropic_messages"
+    assert resolved["requested_provider"] == "custom:claude"
+    assert resolved["model"] == "claude-opus-4-8"
+
+
 def test_named_custom_provider_falls_back_to_openai_api_key(monkeypatch):
     monkeypatch.setenv("OPENAI_API_KEY", "env-openai-key")
     monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
diff --git a/tests/hermes_cli/test_security_audit.py b/tests/hermes_cli/test_security_audit.py
index fe6abe7221c..0a8d70c1d7b 100644
--- a/tests/hermes_cli/test_security_audit.py
+++ b/tests/hermes_cli/test_security_audit.py
@@ -10,7 +10,6 @@ import json
 from pathlib import Path
 from unittest.mock import patch
 
-import pytest
 
 from hermes_cli import security_audit as sa
 
diff --git a/tests/hermes_cli/test_send_cmd.py b/tests/hermes_cli/test_send_cmd.py
index 802cff88c90..218227266b7 100644
--- a/tests/hermes_cli/test_send_cmd.py
+++ b/tests/hermes_cli/test_send_cmd.py
@@ -9,7 +9,6 @@ from __future__ import annotations
 
 import io
 import json
-from pathlib import Path
 
 import pytest
 
diff --git a/tests/hermes_cli/test_service_manager.py b/tests/hermes_cli/test_service_manager.py
index c627a9044fa..8c37c3878bc 100644
--- a/tests/hermes_cli/test_service_manager.py
+++ b/tests/hermes_cli/test_service_manager.py
@@ -361,7 +361,6 @@ def test_get_service_manager_returns_s6_instance(
 ) -> None:
     """The s6 backend ships in Phase 3 — the factory must return an
     S6ServiceManager when running inside a container."""
-    from hermes_cli.service_manager import S6ServiceManager
     monkeypatch.setattr(
         "hermes_cli.service_manager.detect_service_manager", lambda: "s6",
     )
@@ -406,7 +405,6 @@ def fake_subprocess_run(monkeypatch: pytest.MonkeyPatch):
 
 
 def test_s6_manager_kind_and_supports_registration() -> None:
-    from hermes_cli.service_manager import S6ServiceManager
     mgr = S6ServiceManager()
     assert mgr.kind == "s6"
     assert mgr.supports_runtime_registration() is True
@@ -524,7 +522,6 @@ def test_seed_supervise_skeleton_is_idempotent(tmp_path) -> None:
 def test_s6_register_creates_service_dir_and_triggers_scan(
     s6_scandir, fake_subprocess_run,
 ) -> None:
-    from hermes_cli.service_manager import S6ServiceManager
     mgr = S6ServiceManager(scandir=s6_scandir)
     mgr.register_profile_gateway("coder")
 
@@ -536,6 +533,7 @@ def test_s6_register_creates_service_dir_and_triggers_scan(
     assert run_path.is_file()
     assert run_path.stat().st_mode & 0o111  # executable
     run_text = run_path.read_text()
+    assert "export HOME=/opt/data" in run_text
     assert "hermes -p coder gateway run" in run_text
     assert "s6-setuidgid hermes" in run_text
     # Sentinel marking this as the supervised-child invocation. Without
@@ -555,6 +553,16 @@ def test_s6_register_creates_service_dir_and_triggers_scan(
     assert "/opt/data/logs/gateways/coder" not in log_text, (
         "log_dir was hard-coded; must use ${HERMES_HOME} at run time"
     )
+    # `1` action directive forwards lines to stdout BEFORE the file
+    # destination so the supervised gateway's stdout (including the
+    # rich-console banner and plain print() output) reaches docker
+    # logs, not just the rotated file. See _render_log_run's docstring
+    # for the full output-routing rationale.
+    assert "s6-log 1 " in log_text, (
+        "log/run must include the `1` action directive before the file "
+        "destination so supervised stdout reaches docker logs. Saw: "
+        f"{log_text!r}"
+    )
 
     # s6-svscanctl -a was invoked against the scandir
     assert any(
@@ -565,7 +573,6 @@ def test_s6_register_creates_service_dir_and_triggers_scan(
 
 
 def test_s6_register_extra_env_is_quoted(s6_scandir, fake_subprocess_run) -> None:
-    from hermes_cli.service_manager import S6ServiceManager
     mgr = S6ServiceManager(scandir=s6_scandir)
     mgr.register_profile_gateway(
         "x", extra_env={"FOO": "bar baz", "QUOTED": "a'b"},
@@ -576,15 +583,21 @@ def test_s6_register_extra_env_is_quoted(s6_scandir, fake_subprocess_run) -> Non
     assert "export QUOTED='a'\"'\"'b'" in run_text
 
 
+def test_render_run_script_resets_home_before_exec() -> None:
+
+    run_text = S6ServiceManager._render_run_script("coder", {})
+
+    assert "export HOME=/opt/data" in run_text
+    assert "exec s6-setuidgid hermes hermes -p coder gateway run" in run_text
+
+
 def test_s6_register_rejects_invalid_profile_name(s6_scandir) -> None:
-    from hermes_cli.service_manager import S6ServiceManager
     mgr = S6ServiceManager(scandir=s6_scandir)
     with pytest.raises(ValueError):
         mgr.register_profile_gateway("Bad/Name")
 
 
 def test_s6_register_rejects_duplicate(s6_scandir, fake_subprocess_run) -> None:
-    from hermes_cli.service_manager import S6ServiceManager
     mgr = S6ServiceManager(scandir=s6_scandir)
     (s6_scandir / "gateway-coder").mkdir(parents=True)
     with pytest.raises(ValueError, match="already registered"):
@@ -597,7 +610,6 @@ def test_s6_register_rolls_back_on_svscanctl_failure(
     """If s6-svscanctl fails the service dir must be cleaned up so the
     next register call doesn't see a stale duplicate."""
     import subprocess as _sp
-    from hermes_cli.service_manager import S6ServiceManager
 
     def _fail_scanctl(cmd, **kw):
         # Manager calls s6-svscanctl by absolute path; match on basename.
@@ -615,7 +627,6 @@ def test_s6_register_rolls_back_on_svscanctl_failure(
 def test_s6_unregister_removes_service_dir(
     s6_scandir, fake_subprocess_run,
 ) -> None:
-    from hermes_cli.service_manager import S6ServiceManager
     svc_dir = s6_scandir / "gateway-coder"
     svc_dir.mkdir(parents=True)
     (svc_dir / "type").write_text("longrun\n")
@@ -635,13 +646,11 @@ def test_s6_unregister_removes_service_dir(
 
 
 def test_s6_unregister_absent_profile_is_noop(s6_scandir) -> None:
-    from hermes_cli.service_manager import S6ServiceManager
     # Should NOT raise even though "ghost" doesn't exist
     S6ServiceManager(scandir=s6_scandir).unregister_profile_gateway("ghost")
 
 
 def test_s6_list_profile_gateways(s6_scandir) -> None:
-    from hermes_cli.service_manager import S6ServiceManager
     # Three gateway profiles + one unrelated service + one hidden dir
     (s6_scandir / "gateway-coder").mkdir()
     (s6_scandir / "gateway-assistant").mkdir()
@@ -654,7 +663,6 @@ def test_s6_list_profile_gateways(s6_scandir) -> None:
 
 
 def test_s6_list_profile_gateways_empty_when_scandir_missing(tmp_path) -> None:
-    from hermes_cli.service_manager import S6ServiceManager
     missing = tmp_path / "does-not-exist"
     assert S6ServiceManager(scandir=missing).list_profile_gateways() == []
 
@@ -662,7 +670,6 @@ def test_s6_list_profile_gateways_empty_when_scandir_missing(tmp_path) -> None:
 def test_s6_lifecycle_dispatches_to_s6_svc(
     s6_scandir, fake_subprocess_run,
 ) -> None:
-    from hermes_cli.service_manager import S6ServiceManager
     mgr = S6ServiceManager(scandir=s6_scandir)
     # _run_svc now verifies the slot exists before invoking s6-svc, so
     # we have to pre-seed the dir. In real use the slot is created by
@@ -690,7 +697,6 @@ def test_lifecycle_raises_gateway_not_registered_for_missing_slot(
     opaque CalledProcessError stacktrace."""
     from hermes_cli.service_manager import (
         GatewayNotRegisteredError,
-        S6ServiceManager,
     )
 
     mgr = S6ServiceManager(scandir=s6_scandir)
@@ -720,7 +726,6 @@ def test_all_lifecycle_methods_check_for_missing_slot(
     """start/stop/restart all check for missing slots the same way."""
     from hermes_cli.service_manager import (
         GatewayNotRegisteredError,
-        S6ServiceManager,
     )
 
     mgr = S6ServiceManager(scandir=s6_scandir)
@@ -735,7 +740,6 @@ def test_gateway_not_registered_unprefixed_service_name(s6_scandir) -> None:
     accidentally strip user-provided text."""
     from hermes_cli.service_manager import (
         GatewayNotRegisteredError,
-        S6ServiceManager,
     )
 
     mgr = S6ServiceManager(scandir=s6_scandir)
@@ -752,7 +756,7 @@ def test_lifecycle_raises_s6_command_error_on_subprocess_failure(
     CalledProcessError into a named S6CommandError carrying the
     return code and stderr."""
     import subprocess as _sp
-    from hermes_cli.service_manager import S6CommandError, S6ServiceManager
+    from hermes_cli.service_manager import S6CommandError
 
     # Pre-create the slot so we reach the s6-svc call.
     (s6_scandir / "gateway-coder").mkdir()
@@ -781,7 +785,6 @@ def test_s6_is_running_parses_svstat(
     s6_scandir, monkeypatch: pytest.MonkeyPatch,
 ) -> None:
     import subprocess as _sp
-    from hermes_cli.service_manager import S6ServiceManager
 
     def _svstat(cmd, **kw):
         if cmd[0].endswith("/s6-svstat"):
diff --git a/tests/hermes_cli/test_session_browse.py b/tests/hermes_cli/test_session_browse.py
index a9d7153c83a..833729973ae 100644
--- a/tests/hermes_cli/test_session_browse.py
+++ b/tests/hermes_cli/test_session_browse.py
@@ -6,11 +6,9 @@ Covers:
 - Argument parser registration
 """
 
-import os
 import time
-from unittest.mock import MagicMock, patch, call
+from unittest.mock import MagicMock, patch
 
-import pytest
 
 from hermes_cli.main import _session_browse_picker
 
@@ -249,7 +247,6 @@ class TestCursesBrowse:
 
     def _run_with_keys(self, sessions, key_sequence):
         """Simulate running the curses picker with a given key sequence."""
-        import curses
 
         # Build a mock stdscr that returns keys from the sequence
         mock_stdscr = MagicMock()
@@ -305,7 +302,6 @@ class TestCursesBrowse:
 
     def test_type_to_filter_then_enter(self):
         """Typing characters filters the list, Enter selects from filtered."""
-        import curses
         sessions = [
             {"id": "s1", "source": "cli", "title": "Alpha project", "preview": "", "last_active": time.time()},
             {"id": "s2", "source": "cli", "title": "Beta project", "preview": "", "last_active": time.time()},
@@ -325,7 +321,6 @@ class TestCursesBrowse:
 
     def test_backspace_removes_filter_char(self):
         """Backspace removes the last character from the filter."""
-        import curses
         sessions = [
             {"id": "s1", "source": "cli", "title": "Alpha", "preview": "", "last_active": time.time()},
             {"id": "s2", "source": "cli", "title": "Beta", "preview": "", "last_active": time.time()},
@@ -337,7 +332,6 @@ class TestCursesBrowse:
 
     def test_escape_clears_filter_first(self):
         """First Esc clears the search text, second Esc exits."""
-        import curses
         sessions = _make_sessions(3)
         # Type "ab" then Esc (clears filter) then Enter (selects first)
         keys = [ord('a'), ord('b'), 27, 10]
@@ -391,11 +385,9 @@ class TestSessionBrowseArgparse:
 
     def test_browse_subcommand_exists(self):
         """hermes sessions browse should be parseable."""
-        from hermes_cli.main import main as _main_entry
 
         # We can't run main(), but we can import and test the parser setup
         # by checking that argparse doesn't error on "sessions browse"
-        import argparse
         # Re-create the parser portion
         # Instead, let's just verify the import works and the function exists
         from hermes_cli.main import _session_browse_picker
diff --git a/tests/hermes_cli/test_session_recap.py b/tests/hermes_cli/test_session_recap.py
index 3998c06c61a..062988525f2 100644
--- a/tests/hermes_cli/test_session_recap.py
+++ b/tests/hermes_cli/test_session_recap.py
@@ -3,7 +3,6 @@ from __future__ import annotations
 
 import json
 
-import pytest
 
 from hermes_cli.session_recap import build_recap
 
diff --git a/tests/hermes_cli/test_set_config_value.py b/tests/hermes_cli/test_set_config_value.py
index 21516083c66..d404549cf52 100644
--- a/tests/hermes_cli/test_set_config_value.py
+++ b/tests/hermes_cli/test_set_config_value.py
@@ -2,8 +2,7 @@
 
 import argparse
 import os
-from pathlib import Path
-from unittest.mock import patch, call
+from unittest.mock import patch
 
 import pytest
 
diff --git a/tests/hermes_cli/test_setup.py b/tests/hermes_cli/test_setup.py
index 8f9a8494cdc..abd26a0a306 100644
--- a/tests/hermes_cli/test_setup.py
+++ b/tests/hermes_cli/test_setup.py
@@ -1,12 +1,8 @@
 """Tests for setup.py configuration flows."""
-import json
-import os
 import sys
 import types
 
-import pytest
 
-from hermes_cli.auth import get_active_provider
 from hermes_cli.config import load_config, save_config
 from hermes_cli import setup as setup_mod
 from hermes_cli.setup import setup_model_provider
diff --git a/tests/hermes_cli/test_setup_irc.py b/tests/hermes_cli/test_setup_irc.py
index 1e5baa5cc0f..31b263fec35 100644
--- a/tests/hermes_cli/test_setup_irc.py
+++ b/tests/hermes_cli/test_setup_irc.py
@@ -6,7 +6,6 @@ interactive setup menus.
 """
 
 import os
-import pytest
 
 from gateway.platform_registry import PlatformEntry, platform_registry
 
diff --git a/tests/hermes_cli/test_setup_matrix_e2ee.py b/tests/hermes_cli/test_setup_matrix_e2ee.py
index d965e354ac4..cde7618ed12 100644
--- a/tests/hermes_cli/test_setup_matrix_e2ee.py
+++ b/tests/hermes_cli/test_setup_matrix_e2ee.py
@@ -1,7 +1,6 @@
 """Test that setup.py has shutil available for Matrix E2EE auto-install."""
 import ast
 
-import pytest
 
 
 def _parse_setup_imports():
diff --git a/tests/hermes_cli/test_setup_model_provider.py b/tests/hermes_cli/test_setup_model_provider.py
index b79b33315d8..aa8a9c182ba 100644
--- a/tests/hermes_cli/test_setup_model_provider.py
+++ b/tests/hermes_cli/test_setup_model_provider.py
@@ -498,6 +498,7 @@ def test_setup_summary_shows_camofox_when_browser_feature_is_camofox(tmp_path, m
             features={
                 "web": NousFeatureState("web", "Web tools", True, False, False, False, False, True, ""),
                 "image_gen": NousFeatureState("image_gen", "Image generation", True, False, False, False, False, True, ""),
+                "video_gen": NousFeatureState("video_gen", "Video generation", False, False, False, False, False, False, ""),
                 "tts": NousFeatureState("tts", "OpenAI TTS", True, False, False, False, False, True, ""),
                 "browser": NousFeatureState("browser", "Browser automation", True, True, True, False, True, True, "Camofox"),
                 "modal": NousFeatureState("modal", "Modal execution", False, False, False, False, False, True, "local"),
@@ -525,6 +526,7 @@ def test_setup_summary_does_not_mark_incomplete_browserbase_as_available(tmp_pat
             features={
                 "web": NousFeatureState("web", "Web tools", True, False, False, False, False, True, ""),
                 "image_gen": NousFeatureState("image_gen", "Image generation", True, False, False, False, False, True, ""),
+                "video_gen": NousFeatureState("video_gen", "Video generation", False, False, False, False, False, False, ""),
                 "tts": NousFeatureState("tts", "OpenAI TTS", True, False, False, False, False, True, ""),
                 "browser": NousFeatureState("browser", "Browser automation", True, False, False, False, False, True, "Browserbase"),
                 "modal": NousFeatureState("modal", "Modal execution", False, False, False, False, False, True, "local"),
diff --git a/tests/hermes_cli/test_setup_noninteractive.py b/tests/hermes_cli/test_setup_noninteractive.py
index 68f6bd5a203..083b3069c93 100644
--- a/tests/hermes_cli/test_setup_noninteractive.py
+++ b/tests/hermes_cli/test_setup_noninteractive.py
@@ -1,7 +1,7 @@
 """Tests for non-interactive setup and first-run headless behavior."""
 
 from argparse import Namespace
-from unittest.mock import MagicMock, patch
+from unittest.mock import patch
 
 import pytest
 from hermes_cli.config import DEFAULT_CONFIG, load_config, save_config
diff --git a/tests/hermes_cli/test_setup_ollama_cloud_force_refresh.py b/tests/hermes_cli/test_setup_ollama_cloud_force_refresh.py
index b0ae2196d1d..60f6ea99341 100644
--- a/tests/hermes_cli/test_setup_ollama_cloud_force_refresh.py
+++ b/tests/hermes_cli/test_setup_ollama_cloud_force_refresh.py
@@ -5,7 +5,6 @@ serving a stale cache (models.dev only, no live API probe) for up to an hour.
 
 from __future__ import annotations
 
-from unittest.mock import patch
 
 
 def test_setup_ollama_cloud_passes_force_refresh(monkeypatch):
diff --git a/tests/hermes_cli/test_signal_handler_kanban_worker.py b/tests/hermes_cli/test_signal_handler_kanban_worker.py
new file mode 100644
index 00000000000..445e80e2f5f
--- /dev/null
+++ b/tests/hermes_cli/test_signal_handler_kanban_worker.py
@@ -0,0 +1,230 @@
+"""Regression test for #28181 — kanban worker SIGTERM must terminate the process.
+
+The single-query signal handler in cli.py (``_signal_handler_q``) raises
+``KeyboardInterrupt`` to unwind the main thread on SIGTERM/SIGHUP. That works
+for interactive ``hermes chat -q`` invocations, but kanban workers spawned by
+the dispatcher are likely to have a non-daemon thread alive (terminal_tool's
+``_wait_for_process``, custom plugin background workers, etc.). With
+``KeyboardInterrupt`` only the main thread unwinds; the non-daemon thread
+keeps the process alive after the gateway has already restarted, the kanban
+dispatcher's ``_pid_alive`` check returns True forever, and the task stays
+``running`` indefinitely.
+
+The fix: when the process is a dispatcher-spawned worker (``HERMES_KANBAN_TASK``
+env var set), flush logging + stdout/stderr and call ``os._exit(0)`` instead.
+The kernel reclaims the PID immediately, and ``detect_crashed_workers``
+reclaims the stale claim on the next dispatcher tick.
+
+These tests use a synthetic Python script that mirrors the cli.py signal
+handler shape so we can exercise the exit-path contract without booting the
+full CLI (which needs a real provider config).
+"""
+from __future__ import annotations
+
+import os
+import signal
+import subprocess
+import sys
+import textwrap
+import time
+
+import pytest
+
+
+def _synthetic_worker_script() -> str:
+    """A standalone script that mirrors cli.py's single-query SIGTERM handler.
+
+    Keeping the synthetic copy here means the test exercises the exact handler
+    shape without needing the full hermes_cli boot path (config, providers,
+    skills, etc.). If the production handler in cli.py drifts, the test
+    that loads the real handler (test_real_handler_uses_os_exit) will catch it.
+    """
+    return textwrap.dedent(
+        """
+        import os, signal, sys, threading, time
+
+        # Non-daemon thread that blocks forever — simulates the worker
+        # thread that would prevent orderly Python shutdown after
+        # KeyboardInterrupt unwinds main.
+        stuck = threading.Event()
+        threading.Thread(target=stuck.wait, daemon=False).start()
+
+        def handler(signum, frame):
+            # Mirrors cli.py:_signal_handler_q. Real handler sleeps 1.5s; the
+            # test uses a short grace so it runs fast.
+            try:
+                time.sleep(0.05)
+            except Exception:
+                pass
+            if os.environ.get("HERMES_KANBAN_TASK"):
+                try:
+                    if hasattr(signal, "SIGALRM"):
+                        signal.signal(signal.SIGALRM, lambda *_: os._exit(0))
+                        signal.alarm(2)
+                except Exception:
+                    pass
+                sys.stdout.flush()
+                sys.stderr.flush()
+                os._exit(0)
+            raise KeyboardInterrupt()
+
+        signal.signal(signal.SIGTERM, handler)
+        print("READY", flush=True)
+        try:
+            threading.Event().wait()
+        except KeyboardInterrupt:
+            sys.exit(0)
+        """
+    )
+
+
+def _is_alive_like_dispatcher(pid: int) -> bool:
+    """Mirrors hermes_cli/kanban_db.py:_pid_alive on Linux.
+
+    A zombie is treated as dead — the dispatcher's _pid_alive checks
+    /proc/<pid>/status for State: Z. We replicate that here so a clean
+    os._exit followed by zombie-state is correctly counted as dead.
+    """
+    if pid <= 0:
+        return False
+    try:
+        os.kill(pid, 0)
+    except ProcessLookupError:
+        return False
+    except PermissionError:
+        return True
+    if sys.platform == "linux":
+        try:
+            with open(f"/proc/{pid}/status") as f:
+                for line in f:
+                    if line.startswith("State:"):
+                        if "Z" in line.split(":", 1)[1]:
+                            return False
+                        break
+        except (FileNotFoundError, PermissionError, OSError):
+            pass
+    return True
+
+
+def _spawn_synthetic(env_overrides: dict) -> subprocess.Popen:
+    env = dict(os.environ)
+    env.update(env_overrides)
+    proc = subprocess.Popen(
+        [sys.executable, "-u", "-c", _synthetic_worker_script()],
+        env=env,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        start_new_session=True,
+    )
+    # Wait for "READY" so we know the signal handler is installed.
+    assert proc.stdout is not None
+    deadline = time.time() + 5.0
+    while time.time() < deadline:
+        line = proc.stdout.readline()
+        if line and line.startswith(b"READY"):
+            return proc
+    proc.kill()
+    raise RuntimeError("synthetic worker never signalled READY")
+
+
+def _cleanup(proc: subprocess.Popen) -> None:
+    try:
+        os.killpg(os.getpgid(proc.pid), signal.SIGKILL)
+    except (ProcessLookupError, PermissionError):
+        pass
+    try:
+        proc.communicate(timeout=2)
+    except subprocess.TimeoutExpired:
+        proc.kill()
+
+
+@pytest.mark.skipif(
+    sys.platform == "win32",
+    reason="SIGTERM semantics differ on Windows; kanban dispatcher is POSIX-only",
+)
+def test_sigterm_with_kanban_task_env_terminates_quickly():
+    """With HERMES_KANBAN_TASK set, SIGTERM should kill the process in <2s
+    even when a non-daemon thread is still alive."""
+    proc = _spawn_synthetic({"HERMES_KANBAN_TASK": "t_test_28181"})
+    try:
+        t0 = time.time()
+        os.kill(proc.pid, signal.SIGTERM)
+
+        # Should die in <2s. The handler sleeps ~50ms, then os._exit(0)
+        # is immediate. Give generous headroom for slow CI runners.
+        deadline = t0 + 2.0
+        while time.time() < deadline:
+            if not _is_alive_like_dispatcher(proc.pid):
+                elapsed = time.time() - t0
+                assert elapsed < 2.0
+                return
+            time.sleep(0.02)
+        pytest.fail(
+            f"process still alive 2s after SIGTERM with HERMES_KANBAN_TASK set "
+            f"(dispatcher would keep extending claim) — fix regressed"
+        )
+    finally:
+        _cleanup(proc)
+
+
+@pytest.mark.skipif(
+    sys.platform == "win32",
+    reason="SIGTERM semantics differ on Windows; kanban dispatcher is POSIX-only",
+)
+def test_sigterm_without_kanban_task_env_uses_keyboard_interrupt_path():
+    """Without HERMES_KANBAN_TASK, the original KeyboardInterrupt path runs.
+
+    This is the contrast case proving the fix is gated on the env var: in
+    interactive ``hermes chat -q`` (no env var), behavior is unchanged. The
+    process MAY hang under non-daemon threads, but that's not a kanban-worker
+    concern. We just verify the handler logs the KeyboardInterrupt branch
+    rather than os._exit'ing.
+    """
+    proc = _spawn_synthetic({})
+    try:
+        os.kill(proc.pid, signal.SIGTERM)
+        # Wait a moment for the handler to react.
+        time.sleep(0.5)
+        # The process may or may not be dead depending on whether the
+        # KeyboardInterrupt unwinds cleanly. The behavioral guarantee is
+        # only that the env-gated path didn't fire.
+        try:
+            # Drain stdout up to whatever's available.
+            if proc.stdout is not None:
+                proc.stdout.close()
+            if proc.stderr is not None:
+                proc.stderr.close()
+        except Exception:
+            pass
+    finally:
+        _cleanup(proc)
+
+
+def test_real_handler_uses_os_exit_for_kanban_workers():
+    """Source-level invariant: cli.py's _signal_handler_q must call
+    os._exit(0) when HERMES_KANBAN_TASK is set.
+
+    Catches the case where someone refactors the handler and accidentally
+    drops the env-gated exit, restoring the bug. Reading cli.py directly is
+    cheap and avoids the heavy CLI import.
+    """
+    import pathlib
+
+    cli_path = (
+        pathlib.Path(__file__).resolve().parent.parent.parent / "cli.py"
+    )
+    src = cli_path.read_text()
+    # Locate the handler body.
+    start = src.find("def _signal_handler_q(signum, frame):")
+    assert start != -1, "cli.py is missing _signal_handler_q"
+    # Look ahead for the env-gated os._exit call within ~80 lines.
+    body = src[start : start + 4000]
+    assert "HERMES_KANBAN_TASK" in body, (
+        "_signal_handler_q must gate its kanban-worker exit path on "
+        "HERMES_KANBAN_TASK — see #28181"
+    )
+    assert "os._exit(0)" in body, (
+        "_signal_handler_q must call os._exit(0) for kanban workers — "
+        "raising KeyboardInterrupt orphans the process when non-daemon "
+        "threads are alive (see #28181)"
+    )
diff --git a/tests/hermes_cli/test_skills_config.py b/tests/hermes_cli/test_skills_config.py
index 9742f0ac6f1..7e2170a3e94 100644
--- a/tests/hermes_cli/test_skills_config.py
+++ b/tests/hermes_cli/test_skills_config.py
@@ -1,6 +1,5 @@
 """Tests for hermes_cli/skills_config.py and skills_tool disabled filtering."""
-import pytest
-from unittest.mock import patch, MagicMock
+from unittest.mock import patch
 
 
 # ---------------------------------------------------------------------------
diff --git a/tests/hermes_cli/test_skills_hub.py b/tests/hermes_cli/test_skills_hub.py
index 25798dcd3eb..1e505cd758c 100644
--- a/tests/hermes_cli/test_skills_hub.py
+++ b/tests/hermes_cli/test_skills_hub.py
@@ -651,3 +651,95 @@ def test_browse_skills_dedup_uses_identifier_not_name(monkeypatch):
         "browse_skills() must not deduplicate browse-sh skills with the same name "
         "but different identifiers"
     )
+
+
+# ---------------------------------------------------------------------------
+# Regression: full identifier must be recoverable from `hermes skills search`
+# even when the slug is too long to fit the terminal width (issue #33674).
+# ---------------------------------------------------------------------------
+
+# A real browse-sh-style slug whose trailing -XXXXXX hash matters for install
+_LONG_SLUG = "browse-sh/weather.gov/get-forecast-1uezib"
+
+_LONG_RESULT = type("R", (), {
+    "name": "get-forecast",
+    "description": "Fetch the forecast",
+    "source": "browse-sh",
+    "trust_level": "community",
+    "identifier": _LONG_SLUG,
+})()
+
+
+def test_do_search_identifier_column_does_not_truncate_long_slug():
+    """The Identifier column must use overflow='fold', not the default ellipsis.
+
+    Renders into a deliberately narrow Console; the full slug (including the
+    trailing -1uezib hash) must still appear in the output. Before the fix,
+    Rich would render `browse-sh/weather…` and lose the hash.
+    """
+    from hermes_cli.skills_hub import do_search
+
+    sink = StringIO()
+    # Narrow width forces Rich to apply overflow rules — exactly the scenario
+    # the issue reports. width=40 is too small for the slug; we want the slug
+    # wrapped (not ellipsis-truncated).
+    console = Console(file=sink, force_terminal=False, color_system=None, width=40)
+
+    with patch("tools.skills_hub.unified_search", return_value=[_LONG_RESULT]), \
+         patch("tools.skills_hub.create_source_router", return_value={}), \
+         patch("tools.skills_hub.GitHubAuth"):
+        do_search("weather", console=console)
+
+    output = sink.getvalue()
+
+    # The fix is working when the Identifier column wraps the slug across
+    # multiple lines (folded chunks) rather than emitting ONE line with an
+    # ellipsis. Extract every chunk that appears in the rightmost cell of
+    # the table by walking lines that look like table rows ("│ ... │") and
+    # taking the last `│...│` cell. Concatenating those chunks must yield
+    # the full slug.
+    chunks = []
+    for line in output.splitlines():
+        # Table data rows start and end with the box-drawing vertical bar.
+        if not line.startswith("│") or not line.rstrip().endswith("│"):
+            continue
+        # Last `│ ... │` cell on the row is the Identifier column.
+        last_cell = line.rstrip().rsplit("│", 2)[-2].strip()
+        if last_cell:
+            chunks.append(last_cell)
+    reconstructed = "".join(chunks)
+    assert _LONG_SLUG in reconstructed, (
+        f"Expected full slug {_LONG_SLUG!r} to be recoverable from the "
+        f"folded Identifier column; got chunks {chunks!r}\n"
+        f"Full output:\n{output}"
+    )
+    # And the truncating ellipsis must NOT appear in the Identifier column.
+    # Rich uses U+2026 HORIZONTAL ELLIPSIS for the default overflow="ellipsis".
+    assert "\u2026" not in reconstructed, (
+        f"Identifier column still ellipsis-truncated: {reconstructed!r}"
+    )
+
+
+def test_do_search_json_flag_emits_full_identifiers(capsys):
+    """`--json` must print a parseable array with full identifiers and skip the table."""
+    from hermes_cli.skills_hub import do_search
+
+    sink = StringIO()
+    console = Console(file=sink, force_terminal=False, color_system=None, width=40)
+
+    with patch("tools.skills_hub.unified_search", return_value=[_LONG_RESULT]), \
+         patch("tools.skills_hub.create_source_router", return_value={}), \
+         patch("tools.skills_hub.GitHubAuth"):
+        do_search("weather", console=console, as_json=True)
+
+    # JSON goes to stdout via print(), not the Rich console sink.
+    captured = capsys.readouterr().out
+    import json as _json
+    payload = _json.loads(captured)
+    assert isinstance(payload, list) and len(payload) == 1
+    assert payload[0]["identifier"] == _LONG_SLUG
+    assert payload[0]["name"] == "get-forecast"
+    assert payload[0]["source"] == "browse-sh"
+    # Table render must be suppressed — sink should be empty (no "Searching for:" header).
+    assert "Searching for:" not in sink.getvalue()
+
diff --git a/tests/hermes_cli/test_skills_install_flags.py b/tests/hermes_cli/test_skills_install_flags.py
index b1608903fc6..bb8628c6700 100644
--- a/tests/hermes_cli/test_skills_install_flags.py
+++ b/tests/hermes_cli/test_skills_install_flags.py
@@ -8,7 +8,6 @@ Based on PR #1595 by 333Alden333 (salvaged).
 """
 
 import sys
-from types import SimpleNamespace
 
 
 def test_cli_skills_install_yes_sets_skip_confirm(monkeypatch):
diff --git a/tests/hermes_cli/test_skills_skip_confirm.py b/tests/hermes_cli/test_skills_skip_confirm.py
index fd430185f78..507d9115932 100644
--- a/tests/hermes_cli/test_skills_skip_confirm.py
+++ b/tests/hermes_cli/test_skills_skip_confirm.py
@@ -10,9 +10,8 @@ Based on PR #1595 by 333Alden333 (salvaged).
 Updated for PR #3586 (cache-aware install/uninstall).
 """
 
-from unittest.mock import patch, MagicMock
+from unittest.mock import patch
 
-import pytest
 
 
 class TestHandleSkillsSlashInstallFlags:
diff --git a/tests/hermes_cli/test_skills_subparser.py b/tests/hermes_cli/test_skills_subparser.py
index d2b89ed3eaa..853e422b9e0 100644
--- a/tests/hermes_cli/test_skills_subparser.py
+++ b/tests/hermes_cli/test_skills_subparser.py
@@ -17,7 +17,6 @@ def test_no_duplicate_skills_subparser():
     # Force fresh import of the module where parser is constructed
     # If there are duplicate 'skills' subparsers, this import will raise
     # argparse.ArgumentError at module load time
-    import importlib
     import sys
 
     # Remove cached module if present
diff --git a/tests/hermes_cli/test_skin_engine.py b/tests/hermes_cli/test_skin_engine.py
index 0de68b5150b..ba4d4c4ce16 100644
--- a/tests/hermes_cli/test_skin_engine.py
+++ b/tests/hermes_cli/test_skin_engine.py
@@ -1,10 +1,6 @@
 """Tests for hermes_cli.skin_engine — the data-driven skin/theme system."""
 
-import json
-import os
 import pytest
-from pathlib import Path
-from unittest.mock import patch
 
 
 @pytest.fixture(autouse=True)
@@ -185,7 +181,7 @@ class TestSkinManagement:
 
 class TestUserSkins:
     def test_load_user_skin_from_yaml(self, tmp_path, monkeypatch):
-        from hermes_cli.skin_engine import load_skin, _skins_dir
+        from hermes_cli.skin_engine import load_skin
         # Create a user skin YAML
         skins_dir = tmp_path / "skins"
         skins_dir.mkdir()
diff --git a/tests/hermes_cli/test_status.py b/tests/hermes_cli/test_status.py
index 0ce13ad3021..b3006d4bbc3 100644
--- a/tests/hermes_cli/test_status.py
+++ b/tests/hermes_cli/test_status.py
@@ -83,6 +83,56 @@ def test_show_status_reports_nous_auth_error(monkeypatch, capsys, tmp_path):
     assert "Key exp:" in output
 
 
+def test_show_status_reports_nous_inference_key_without_portal_login(monkeypatch, capsys, tmp_path):
+    from hermes_cli import status as status_mod
+    from hermes_cli.nous_account import NousPortalAccountInfo
+    import hermes_cli.auth as auth_mod
+    import hermes_cli.gateway as gateway_mod
+
+    monkeypatch.setattr(status_mod, "get_env_path", lambda: tmp_path / ".env", raising=False)
+    monkeypatch.setattr(status_mod, "get_hermes_home", lambda: tmp_path, raising=False)
+    monkeypatch.setattr(status_mod, "load_config", lambda: {"model": "gpt-5.4"}, raising=False)
+    monkeypatch.setattr(status_mod, "resolve_requested_provider", lambda requested=None: "openai-codex", raising=False)
+    monkeypatch.setattr(status_mod, "resolve_provider", lambda requested=None, **kwargs: "openai-codex", raising=False)
+    monkeypatch.setattr(status_mod, "provider_label", lambda provider: "OpenAI Codex", raising=False)
+    monkeypatch.setattr(
+        auth_mod,
+        "get_nous_auth_status",
+        lambda: {
+            "logged_in": False,
+            "inference_credential_present": True,
+            "credential_source": "pool:manual opaque key",
+            "inference_base_url": "https://inference.example.com/v1",
+            "agent_key_expires_at": "2099-01-01T00:00:00+00:00",
+        },
+        raising=False,
+    )
+    monkeypatch.setattr(
+        status_mod,
+        "get_nous_portal_account_info",
+        lambda: NousPortalAccountInfo(
+            logged_in=False,
+            source="inference_key",
+            fresh=False,
+            inference_credential_present=True,
+            inference_base_url="https://inference.example.com/v1",
+        ),
+        raising=False,
+    )
+    monkeypatch.setattr(status_mod, "managed_nous_tools_enabled", lambda: False, raising=False)
+    monkeypatch.setattr(auth_mod, "get_codex_auth_status", lambda: {}, raising=False)
+    monkeypatch.setattr(auth_mod, "get_qwen_auth_status", lambda: {}, raising=False)
+    monkeypatch.setattr(auth_mod, "get_xai_oauth_auth_status", lambda: {}, raising=False)
+    monkeypatch.setattr(gateway_mod, "find_gateway_pids", lambda exclude_pids=None: [], raising=False)
+
+    status_mod.show_status(SimpleNamespace(all=False, deep=False))
+
+    output = capsys.readouterr().out
+    assert "Nous Portal   ✗ not logged in (Nous inference key configured)" in output
+    assert "Inference:  https://inference.example.com/v1" in output
+    assert "Nous inference credentials are configured" in output
+
+
 # ---------------------------------------------------------------------------
 # Helpers shared by xAI OAuth status tests
 # ---------------------------------------------------------------------------
diff --git a/tests/hermes_cli/test_status_model_provider.py b/tests/hermes_cli/test_status_model_provider.py
index af6b90204ca..6608955d404 100644
--- a/tests/hermes_cli/test_status_model_provider.py
+++ b/tests/hermes_cli/test_status_model_provider.py
@@ -2,6 +2,7 @@
 
 from types import SimpleNamespace
 
+from hermes_cli.nous_account import NousPaidServiceAccessInfo, NousPortalAccountInfo
 from hermes_cli.nous_subscription import NousFeatureState, NousSubscriptionFeatures
 
 
@@ -87,6 +88,7 @@ def test_show_status_reports_managed_nous_features(monkeypatch, capsys, tmp_path
             features={
                 "web": NousFeatureState("web", "Web tools", True, True, True, True, False, True, "firecrawl"),
                 "image_gen": NousFeatureState("image_gen", "Image generation", True, True, True, True, False, True, "Nous Subscription"),
+                "video_gen": NousFeatureState("video_gen", "Video generation", False, False, False, False, False, False, ""),
                 "tts": NousFeatureState("tts", "OpenAI TTS", True, True, True, True, False, True, "OpenAI TTS"),
                 "browser": NousFeatureState("browser", "Browser automation", True, True, True, True, False, True, "Browser Use"),
                 "modal": NousFeatureState("modal", "Modal execution", False, True, False, False, False, True, "local"),
@@ -124,6 +126,59 @@ def test_show_status_hides_nous_subscription_section_when_feature_flag_is_off(mo
     assert "Nous Tool Gateway" not in out
 
 
+def test_show_status_reports_exhausted_nous_credits(monkeypatch, capsys, tmp_path):
+    monkeypatch.setattr("hermes_cli.status.managed_nous_tools_enabled", lambda: False)
+    from hermes_cli import status as status_mod
+    import hermes_cli.auth as auth_mod
+
+    _patch_common_status_deps(monkeypatch, status_mod, tmp_path)
+    monkeypatch.setattr(
+        auth_mod,
+        "get_nous_auth_status",
+        lambda: {
+            "logged_in": False,
+            "access_token": "jwt",
+            "portal_base_url": "https://portal.example.test",
+            "error": "credits exhausted",
+            "error_code": "insufficient_credits",
+        },
+        raising=False,
+    )
+    monkeypatch.setattr(
+        status_mod,
+        "get_nous_portal_account_info",
+        lambda: NousPortalAccountInfo(
+            logged_in=True,
+            source="account_api",
+            fresh=True,
+            paid_service_access=False,
+            portal_base_url="https://portal.example.test",
+            paid_service_access_info=NousPaidServiceAccessInfo(
+                allowed=False,
+                reason="no_usable_credits",
+                has_active_subscription=True,
+                active_subscription_is_paid=True,
+                subscription_credits_remaining=0,
+                purchased_credits_remaining=0,
+                total_usable_credits=0,
+            ),
+        ),
+        raising=False,
+    )
+    monkeypatch.setattr(status_mod, "load_config", lambda: {"model": {"provider": "nous"}}, raising=False)
+    monkeypatch.setattr(status_mod, "resolve_requested_provider", lambda requested=None: "nous", raising=False)
+    monkeypatch.setattr(status_mod, "resolve_provider", lambda requested=None, **kwargs: "nous", raising=False)
+    monkeypatch.setattr(status_mod, "provider_label", lambda provider: "Nous Portal", raising=False)
+
+    status_mod.show_status(SimpleNamespace(all=False, deep=False))
+
+    out = capsys.readouterr().out
+    assert "Nous Tool Gateway" in out
+    assert "credits are exhausted" in out
+    assert "https://portal.example.test/billing" in out
+    assert "free-tier Nous account" not in out
+
+
 def test_show_status_reports_empty_lmstudio_listing_as_reachable(monkeypatch, capsys, tmp_path):
     from hermes_cli import status as status_mod
 
diff --git a/tests/hermes_cli/test_subparser_routing_fallback.py b/tests/hermes_cli/test_subparser_routing_fallback.py
index 37b3509f134..29c9b6a4b14 100644
--- a/tests/hermes_cli/test_subparser_routing_fallback.py
+++ b/tests/hermes_cli/test_subparser_routing_fallback.py
@@ -13,7 +13,6 @@ import argparse
 import io
 import sys
 
-import pytest
 
 
 def _build_parser():
diff --git a/tests/hermes_cli/test_suppress_eio_on_interrupt.py b/tests/hermes_cli/test_suppress_eio_on_interrupt.py
index a60ebef565e..0617bf1b403 100644
--- a/tests/hermes_cli/test_suppress_eio_on_interrupt.py
+++ b/tests/hermes_cli/test_suppress_eio_on_interrupt.py
@@ -13,7 +13,6 @@ to prevent a hard crash.
 from __future__ import annotations
 
 import errno
-import os
 from unittest.mock import MagicMock
 
 import pytest
diff --git a/tests/hermes_cli/test_tencent_tokenhub_provider.py b/tests/hermes_cli/test_tencent_tokenhub_provider.py
index a673afc377e..55ab42244c8 100644
--- a/tests/hermes_cli/test_tencent_tokenhub_provider.py
+++ b/tests/hermes_cli/test_tencent_tokenhub_provider.py
@@ -10,7 +10,6 @@ from hermes_cli.auth import (
     resolve_provider,
     get_api_key_provider_status,
     resolve_api_key_provider_credentials,
-    AuthError,
 )
 
 
diff --git a/tests/hermes_cli/test_tips.py b/tests/hermes_cli/test_tips.py
index b0287df9647..31d0d116b26 100644
--- a/tests/hermes_cli/test_tips.py
+++ b/tests/hermes_cli/test_tips.py
@@ -1,6 +1,5 @@
 """Tests for hermes_cli/tips.py — random tip display at session start."""
 
-import pytest
 from hermes_cli.tips import TIPS, get_random_tip
 
 
diff --git a/tests/hermes_cli/test_tool_token_estimation.py b/tests/hermes_cli/test_tool_token_estimation.py
index 3e48980bf88..87db30e628d 100644
--- a/tests/hermes_cli/test_tool_token_estimation.py
+++ b/tests/hermes_cli/test_tool_token_estimation.py
@@ -1,6 +1,5 @@
 """Tests for tool token estimation and curses_ui status_fn support."""
 
-from unittest.mock import patch
 
 import pytest
 
@@ -20,7 +19,7 @@ _needs_tiktoken = pytest.mark.skipif(not _has_tiktoken, reason="tiktoken not ins
 @_needs_tiktoken
 def test_estimate_tool_tokens_returns_positive_counts():
     """_estimate_tool_tokens should return a non-empty dict with positive values."""
-    from hermes_cli.tools_config import _estimate_tool_tokens, _tool_token_cache
+    from hermes_cli.tools_config import _estimate_tool_tokens
 
     # Clear cache to force fresh computation
     import hermes_cli.tools_config as tc
diff --git a/tests/hermes_cli/test_tools_config.py b/tests/hermes_cli/test_tools_config.py
index 0cb42ba299a..cfef9c3b46a 100644
--- a/tests/hermes_cli/test_tools_config.py
+++ b/tests/hermes_cli/test_tools_config.py
@@ -1,9 +1,11 @@
 """Tests for hermes_cli.tools_config platform tool persistence."""
 
+from types import SimpleNamespace
 from unittest.mock import patch
 
 import pytest
 
+from hermes_cli.nous_account import NousPortalAccountInfo
 from hermes_cli.tools_config import (
     _DEFAULT_OFF_TOOLSETS,
     _apply_toolset_change,
@@ -79,6 +81,46 @@ def test_get_platform_tools_uses_default_when_platform_not_configured():
 def test_configurable_toolsets_include_messaging():
     assert any(ts_key == "messaging" for ts_key, _, _ in CONFIGURABLE_TOOLSETS)
 
+
+def test_configurable_toolsets_include_context_engine():
+    assert any(ts_key == "context_engine" for ts_key, _, _ in CONFIGURABLE_TOOLSETS)
+
+
+def test_get_platform_tools_active_context_engine_is_enabled_for_explicit_config():
+    config = {
+        "context": {"engine": "lcm"},
+        "platform_toolsets": {"cli": ["web", "terminal"]},
+    }
+
+    enabled = _get_platform_tools(config, "cli", include_default_mcp_servers=False)
+
+    assert "context_engine" in enabled
+    assert "web" in enabled
+    assert "terminal" in enabled
+
+
+def test_get_platform_tools_context_engine_not_added_for_default_compressor():
+    config = {
+        "context": {"engine": "compressor"},
+        "platform_toolsets": {"cli": ["web", "terminal"]},
+    }
+
+    enabled = _get_platform_tools(config, "cli", include_default_mcp_servers=False)
+
+    assert "context_engine" not in enabled
+
+
+def test_get_platform_tools_context_engine_respects_explicit_empty_selection():
+    config = {
+        "context": {"engine": "lcm"},
+        "platform_toolsets": {"cli": []},
+    }
+
+    enabled = _get_platform_tools(config, "cli", include_default_mcp_servers=False)
+
+    assert "context_engine" not in enabled
+
+
 def test_get_platform_tools_default_telegram_includes_messaging():
     enabled = _get_platform_tools({}, "telegram")
 
@@ -553,12 +595,16 @@ def test_save_platform_tools_still_preserves_mcp_with_platform_default_present()
 
 
 def test_visible_providers_include_nous_subscription_when_logged_in(monkeypatch):
-    monkeypatch.setattr("hermes_cli.tools_config.managed_nous_tools_enabled", lambda: True)
     config = {"model": {"provider": "nous"}}
 
     monkeypatch.setattr(
-        "hermes_cli.nous_subscription.get_nous_auth_status",
-        lambda: {"logged_in": True},
+        "hermes_cli.nous_subscription.get_nous_portal_account_info",
+        lambda: NousPortalAccountInfo(
+            logged_in=True,
+            source="jwt",
+            fresh=False,
+            paid_service_access=True,
+        ),
     )
 
     providers = _visible_providers(TOOL_CATEGORIES["browser"], config)
@@ -566,13 +612,48 @@ def test_visible_providers_include_nous_subscription_when_logged_in(monkeypatch)
     assert providers[0]["name"].startswith("Nous Subscription")
 
 
-def test_visible_providers_hide_nous_subscription_when_feature_flag_is_off(monkeypatch):
-    monkeypatch.setattr("hermes_cli.tools_config.managed_nous_tools_enabled", lambda: False)
+def test_visible_providers_force_fresh_shows_nous_subscription_after_upgrade(monkeypatch):
+    calls = []
+
+    def fake_subscription_features(config, *, force_fresh=False):
+        calls.append(("features", force_fresh))
+        return SimpleNamespace(
+            nous_auth_present=True,
+            account_info=NousPortalAccountInfo(
+                logged_in=True,
+                source="account_api" if force_fresh else "jwt",
+                fresh=force_fresh,
+                paid_service_access=True if force_fresh else False,
+            ),
+            features={},
+        )
+
+    monkeypatch.setattr(
+        "hermes_cli.tools_config.get_nous_subscription_features",
+        fake_subscription_features,
+    )
+
+    providers = _visible_providers(
+        TOOL_CATEGORIES["browser"],
+        {"model": {"provider": "nous"}},
+        force_fresh=True,
+    )
+
+    assert providers[0]["name"].startswith("Nous Subscription")
+    assert ("features", True) in calls
+
+
+def test_visible_providers_hide_nous_subscription_when_paid_access_is_false(monkeypatch):
     config = {"model": {"provider": "nous"}}
 
     monkeypatch.setattr(
-        "hermes_cli.nous_subscription.get_nous_auth_status",
-        lambda: {"logged_in": True},
+        "hermes_cli.nous_subscription.get_nous_portal_account_info",
+        lambda: NousPortalAccountInfo(
+                logged_in=True,
+                source="jwt",
+                fresh=False,
+                paid_service_access=False,
+            ),
     )
 
     providers = _visible_providers(TOOL_CATEGORIES["browser"], config)
@@ -601,7 +682,7 @@ def test_reconfigure_lists_enabled_web_without_existing_provider_config(monkeypa
 
     monkeypatch.setattr(
         "hermes_cli.tools_config._toolset_has_keys",
-        lambda ts_key, config=None: False,
+        lambda ts_key, config=None, **kwargs: False,
     )
 
     def fake_prompt_choice(question, choices, default=0):
@@ -611,7 +692,7 @@ def test_reconfigure_lists_enabled_web_without_existing_provider_config(monkeypa
     monkeypatch.setattr("hermes_cli.tools_config._prompt_choice", fake_prompt_choice)
     monkeypatch.setattr(
         "hermes_cli.tools_config._configure_tool_category_for_reconfig",
-        lambda ts_key, cat, config: configured.append(ts_key),
+        lambda ts_key, cat, config, **kwargs: configured.append(ts_key),
     )
     monkeypatch.setattr("hermes_cli.tools_config.save_config", lambda config: None)
 
@@ -622,7 +703,6 @@ def test_reconfigure_lists_enabled_web_without_existing_provider_config(monkeypa
 
 
 def test_first_install_nous_auto_configures_managed_defaults(monkeypatch):
-    monkeypatch.setattr("hermes_cli.tools_config.managed_nous_tools_enabled", lambda: True)
     monkeypatch.setattr("hermes_cli.nous_subscription.managed_nous_tools_enabled", lambda: True)
     config = {
         "model": {"provider": "nous"},
@@ -657,8 +737,13 @@ def test_first_install_nous_auto_configures_managed_defaults(monkeypatch):
         lambda: ["cli"],
     )
     monkeypatch.setattr(
-        "hermes_cli.nous_subscription.get_nous_auth_status",
-        lambda: {"logged_in": True},
+        "hermes_cli.nous_subscription.get_nous_portal_account_info",
+        lambda *args, **kwargs: NousPortalAccountInfo(
+            logged_in=True,
+            source="jwt",
+            fresh=False,
+            paid_service_access=True,
+        ),
     )
 
     configured = []
diff --git a/tests/hermes_cli/test_tui_bundled.py b/tests/hermes_cli/test_tui_bundled.py
index c49443a3f76..a5b97c2fa07 100644
--- a/tests/hermes_cli/test_tui_bundled.py
+++ b/tests/hermes_cli/test_tui_bundled.py
@@ -1,4 +1,3 @@
-from pathlib import Path
 
 
 def test_tui_finds_bundled_entry_js(tmp_path):
diff --git a/tests/hermes_cli/test_uninstall_node_symlinks.py b/tests/hermes_cli/test_uninstall_node_symlinks.py
new file mode 100644
index 00000000000..316e6d646bb
--- /dev/null
+++ b/tests/hermes_cli/test_uninstall_node_symlinks.py
@@ -0,0 +1,132 @@
+"""Tests for hermes_cli.uninstall.remove_node_symlinks.
+
+Regression for #34536: the POSIX installer drops node/npm/npx symlinks in
+~/.local/bin pointing into $HERMES_HOME/node and prepends ~/.local/bin to
+PATH, shadowing an existing nvm. Uninstall must remove those symlinks, but
+only when they still resolve into the Hermes-managed node dir.
+"""
+
+import os
+from pathlib import Path
+
+import pytest
+
+import hermes_cli.uninstall as uninstall
+
+
+@pytest.fixture
+def fake_home(tmp_path, monkeypatch):
+    """Redirect Path.home() at the home both the installer-symlink target and
+    the ~/.local/bin links live under the same temp dir."""
+    home = tmp_path / "home"
+    home.mkdir()
+    monkeypatch.setattr(Path, "home", classmethod(lambda cls: home))
+    (home / ".local" / "bin").mkdir(parents=True)
+    return home
+
+
+def _make_hermes_node(hermes_home: Path) -> Path:
+    """Create a fake $HERMES_HOME/node/bin/{node,npm,npx} tree."""
+    node_bin = hermes_home / "node" / "bin"
+    node_bin.mkdir(parents=True)
+    for name in ("node", "npm", "npx"):
+        (node_bin / name).write_text("#!/bin/sh\n")
+        (node_bin / name).chmod(0o755)
+    return node_bin
+
+
+def test_removes_symlinks_pointing_into_hermes_node(fake_home):
+    hermes_home = fake_home / ".hermes"
+    node_bin = _make_hermes_node(hermes_home)
+    local_bin = fake_home / ".local" / "bin"
+
+    for name in ("node", "npm", "npx"):
+        (local_bin / name).symlink_to(node_bin / name)
+
+    removed = uninstall.remove_node_symlinks(hermes_home)
+
+    assert sorted(p.name for p in removed) == ["node", "npm", "npx"]
+    for name in ("node", "npm", "npx"):
+        assert not (local_bin / name).exists()
+        assert not (local_bin / name).is_symlink()
+
+
+def test_leaves_unrelated_symlinks_untouched(fake_home):
+    """A node symlink the user repointed at nvm must survive uninstall."""
+    hermes_home = fake_home / ".hermes"
+    _make_hermes_node(hermes_home)
+    local_bin = fake_home / ".local" / "bin"
+
+    # Simulate nvm's node living elsewhere; user's ~/.local/bin/node -> nvm.
+    nvm_bin = fake_home / ".nvm" / "versions" / "node" / "v20.0.0" / "bin"
+    nvm_bin.mkdir(parents=True)
+    (nvm_bin / "node").write_text("#!/bin/sh\n")
+    (local_bin / "node").symlink_to(nvm_bin / "node")
+
+    removed = uninstall.remove_node_symlinks(hermes_home)
+
+    assert removed == []
+    assert (local_bin / "node").is_symlink()
+    assert (local_bin / "node").resolve() == (nvm_bin / "node").resolve()
+
+
+def test_leaves_real_binaries_untouched(fake_home):
+    """A real (non-symlink) binary in ~/.local/bin is never deleted."""
+    hermes_home = fake_home / ".hermes"
+    _make_hermes_node(hermes_home)
+    local_bin = fake_home / ".local" / "bin"
+
+    real_node = local_bin / "node"
+    real_node.write_text("#!/bin/sh\necho real\n")
+    real_node.chmod(0o755)
+
+    removed = uninstall.remove_node_symlinks(hermes_home)
+
+    assert removed == []
+    assert real_node.exists()
+    assert not real_node.is_symlink()
+
+
+def test_handles_missing_local_bin(fake_home):
+    """No symlinks present -> no-op, no error."""
+    hermes_home = fake_home / ".hermes"
+    _make_hermes_node(hermes_home)
+
+    assert uninstall.remove_node_symlinks(hermes_home) == []
+
+
+def test_removes_dangling_symlink_into_hermes_node(fake_home):
+    """A link into the Hermes node dir is removed even if the target file is
+    already gone (dangling) \u2014 the link still shadows PATH."""
+    hermes_home = fake_home / ".hermes"
+    node_bin = hermes_home / "node" / "bin"
+    node_bin.mkdir(parents=True)
+    local_bin = fake_home / ".local" / "bin"
+
+    # Create the symlink, then delete the target so it dangles.
+    (local_bin / "node").symlink_to(node_bin / "node")
+    assert (local_bin / "node").is_symlink()
+
+    removed = uninstall.remove_node_symlinks(hermes_home)
+
+    assert [p.name for p in removed] == ["node"]
+    assert not (local_bin / "node").is_symlink()
+
+
+def test_only_some_links_present(fake_home):
+    """Removes the Hermes links that exist; ignores the ones that don't."""
+    hermes_home = fake_home / ".hermes"
+    node_bin = _make_hermes_node(hermes_home)
+    local_bin = fake_home / ".local" / "bin"
+
+    # Only npm and npx are Hermes-managed; node is a real user binary.
+    (local_bin / "npm").symlink_to(node_bin / "npm")
+    (local_bin / "npx").symlink_to(node_bin / "npx")
+    (local_bin / "node").write_text("#!/bin/sh\n")
+
+    removed = uninstall.remove_node_symlinks(hermes_home)
+
+    assert sorted(p.name for p in removed) == ["npm", "npx"]
+    assert (local_bin / "node").exists()
+    assert not (local_bin / "npm").is_symlink()
+    assert not (local_bin / "npx").is_symlink()
diff --git a/tests/hermes_cli/test_update_check.py b/tests/hermes_cli/test_update_check.py
index 8a68d6a178d..c7cc1c86724 100644
--- a/tests/hermes_cli/test_update_check.py
+++ b/tests/hermes_cli/test_update_check.py
@@ -19,6 +19,7 @@ def test_version_string_no_v_prefix():
 def test_check_for_updates_uses_cache(tmp_path, monkeypatch):
     """When cache is fresh, check_for_updates should return cached value without calling git."""
     from hermes_cli.banner import check_for_updates
+    from hermes_cli import __version__
 
     # Create a fake git repo and fresh cache
     repo_dir = tmp_path / "hermes-agent"
@@ -26,7 +27,7 @@ def test_check_for_updates_uses_cache(tmp_path, monkeypatch):
     (repo_dir / ".git").mkdir()
 
     cache_file = tmp_path / ".update_check"
-    cache_file.write_text(json.dumps({"ts": time.time(), "behind": 3}))
+    cache_file.write_text(json.dumps({"ts": time.time(), "behind": 3, "ver": __version__}))
 
     monkeypatch.setenv("HERMES_HOME", str(tmp_path))
     with patch("hermes_cli.banner.subprocess.run") as mock_run:
@@ -36,6 +37,43 @@ def test_check_for_updates_uses_cache(tmp_path, monkeypatch):
     mock_run.assert_not_called()
 
 
+def test_check_for_updates_invalidates_on_version_change(tmp_path, monkeypatch):
+    """A fresh cache from a different installed version must be re-checked, not reused.
+
+    Regression for #34491: after `pip install --upgrade`, VERSION changes but the
+    cache's 6h TTL hadn't expired and rev was unchanged (both None), so the stale
+    'behind' count survived the upgrade. The version guard forces a recheck.
+    """
+    import hermes_cli.banner as banner
+
+    # No local git checkout -> the PyPI path is exercised (pip-install class).
+    fake_banner = tmp_path / "hermes_cli" / "banner.py"
+    fake_banner.parent.mkdir(parents=True, exist_ok=True)
+    fake_banner.touch()
+    monkeypatch.setattr(banner, "__file__", str(fake_banner))
+
+    # Fresh (within TTL) cache that says "behind", but stamped with an OLD version.
+    cache_file = tmp_path / ".update_check"
+    cache_file.write_text(
+        json.dumps({"ts": time.time(), "behind": 1, "rev": None, "ver": "0.0.1-old"})
+    )
+
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    monkeypatch.delenv("HERMES_REVISION", raising=False)
+    with patch("hermes_cli.banner.subprocess.run") as mock_run, \
+         patch("hermes_cli.banner.check_via_pypi", return_value=0) as mock_pypi:
+        result = banner.check_for_updates()
+
+    # Stale-version cache rejected -> fresh check ran -> up-to-date result.
+    assert result == 0
+    mock_pypi.assert_called_once()
+    mock_run.assert_not_called()
+
+    # Cache rewritten with the current installed version.
+    written = json.loads(cache_file.read_text())
+    assert written["ver"] == banner.VERSION
+
+
 def test_check_for_updates_expired_cache(tmp_path, monkeypatch):
     """When cache is expired, check_for_updates should call git fetch."""
     from hermes_cli.banner import check_for_updates
diff --git a/tests/hermes_cli/test_update_hangup_protection.py b/tests/hermes_cli/test_update_hangup_protection.py
index e5c81a45a01..5f91764b822 100644
--- a/tests/hermes_cli/test_update_hangup_protection.py
+++ b/tests/hermes_cli/test_update_hangup_protection.py
@@ -9,11 +9,8 @@ that ``hermes update`` survives a terminal disconnect mid-install
 from __future__ import annotations
 
 import io
-import os
 import signal
 import sys
-from pathlib import Path
-from unittest.mock import patch
 
 import pytest
 
diff --git a/tests/hermes_cli/test_update_stale_dashboard.py b/tests/hermes_cli/test_update_stale_dashboard.py
index e79caeb9dc6..8a04d9cbe8e 100644
--- a/tests/hermes_cli/test_update_stale_dashboard.py
+++ b/tests/hermes_cli/test_update_stale_dashboard.py
@@ -16,7 +16,7 @@ from __future__ import annotations
 import importlib
 import os
 import sys
-from unittest.mock import patch, MagicMock, call
+from unittest.mock import patch, MagicMock
 
 import pytest
 
diff --git a/tests/hermes_cli/test_update_zip_symlink_reject.py b/tests/hermes_cli/test_update_zip_symlink_reject.py
index 2585b53fa7a..4ee7f84549b 100644
--- a/tests/hermes_cli/test_update_zip_symlink_reject.py
+++ b/tests/hermes_cli/test_update_zip_symlink_reject.py
@@ -7,7 +7,6 @@ The Linux mode bits live in the upper 16 bits of ``ZipInfo.external_attr``;
 we explicitly reject any member whose type bits are S_IFLNK.
 """
 
-import io
 import os
 import stat
 import tempfile
diff --git a/tests/hermes_cli/test_web_oauth_dispatch.py b/tests/hermes_cli/test_web_oauth_dispatch.py
index b9ee20ccae8..0c6b902f7bc 100644
--- a/tests/hermes_cli/test_web_oauth_dispatch.py
+++ b/tests/hermes_cli/test_web_oauth_dispatch.py
@@ -25,6 +25,7 @@ from datetime import datetime, timezone
 from unittest.mock import patch
 
 import httpx
+import pytest
 from fastapi.testclient import TestClient
 
 from hermes_cli.web_server import _SESSION_TOKEN, app
@@ -99,7 +100,7 @@ def test_minimax_login_does_not_launch_anthropic_flow():
     assert body["expires_in"] == 600
 
 
-def test_nous_dashboard_device_flow_honors_legacy_scope_override(monkeypatch):
+def test_nous_dashboard_device_flow_ignores_legacy_scope_override(monkeypatch):
     from hermes_cli import auth as auth_mod
     from hermes_cli import web_server as ws
 
@@ -109,24 +110,24 @@ def test_nous_dashboard_device_flow_honors_legacy_scope_override(monkeypatch):
         requested_scopes.append(kwargs["scope"])
         return _fake_nous_device_data()
 
-    monkeypatch.setenv(auth_mod.NOUS_LEGACY_SESSION_KEYS_ENV, "true")
+    monkeypatch.setenv("HERMES_AGENT_USE_LEGACY_SESSION_KEYS", "true")
     monkeypatch.setattr(auth_mod, "_request_device_code", fake_request_device_code)
     monkeypatch.setattr(ws, "_nous_poller", lambda sid: None)
 
     result = asyncio.run(ws._start_device_code_flow("nous"))
     try:
-        assert requested_scopes == [auth_mod.NOUS_LEGACY_AGENT_KEY_SCOPE]
+        assert requested_scopes == [auth_mod.DEFAULT_NOUS_SCOPE]
         assert result["flow"] == "device_code"
         assert result["user_code"] == "NOUS-1234"
         assert (
             ws._oauth_sessions[result["session_id"]]["scope"]
-            == auth_mod.NOUS_LEGACY_AGENT_KEY_SCOPE
+            == auth_mod.DEFAULT_NOUS_SCOPE
         )
     finally:
         ws._oauth_sessions.pop(result["session_id"], None)
 
 
-def test_nous_dashboard_device_flow_retries_legacy_scope_on_invoke_refusal(monkeypatch):
+def test_nous_dashboard_device_flow_does_not_retry_legacy_scope_on_invoke_refusal(monkeypatch):
     from hermes_cli import auth as auth_mod
     from hermes_cli import web_server as ws
 
@@ -134,26 +135,15 @@ def test_nous_dashboard_device_flow_retries_legacy_scope_on_invoke_refusal(monke
 
     def fake_request_device_code(**kwargs):
         requested_scopes.append(kwargs["scope"])
-        if len(requested_scopes) == 1:
-            raise _invoke_scope_refusal()
-        return _fake_nous_device_data()
+        raise _invoke_scope_refusal()
 
-    monkeypatch.delenv(auth_mod.NOUS_LEGACY_SESSION_KEYS_ENV, raising=False)
+    monkeypatch.delenv("HERMES_AGENT_USE_LEGACY_SESSION_KEYS", raising=False)
     monkeypatch.setattr(auth_mod, "_request_device_code", fake_request_device_code)
     monkeypatch.setattr(ws, "_nous_poller", lambda sid: None)
 
-    result = asyncio.run(ws._start_device_code_flow("nous"))
-    try:
-        assert requested_scopes == [
-            auth_mod.DEFAULT_NOUS_SCOPE,
-            auth_mod.NOUS_LEGACY_AGENT_KEY_SCOPE,
-        ]
-        assert (
-            ws._oauth_sessions[result["session_id"]]["scope"]
-            == auth_mod.NOUS_LEGACY_AGENT_KEY_SCOPE
-        )
-    finally:
-        ws._oauth_sessions.pop(result["session_id"], None)
+    with pytest.raises(httpx.HTTPStatusError):
+        asyncio.run(ws._start_device_code_flow("nous"))
+    assert requested_scopes == [auth_mod.DEFAULT_NOUS_SCOPE]
 
 
 def test_nous_dashboard_poller_preserves_effective_scope_when_token_omits_scope(monkeypatch):
@@ -173,13 +163,13 @@ def test_nous_dashboard_poller_preserves_effective_scope_when_token_omits_scope(
         "device_code": "device-code",
         "interval": 5,
         "expires_at": time.time() + 600,
-        "scope": auth_mod.NOUS_LEGACY_AGENT_KEY_SCOPE,
+        "scope": auth_mod.DEFAULT_NOUS_SCOPE,
     }
     captured_state = {}
 
     def fake_refresh_nous_oauth_from_state(state, **kwargs):
         captured_state.update(state)
-        return {**state, "agent_key": "legacy-agent-key"}
+        return {**state, "agent_key": "jwt-agent-key"}
 
     monkeypatch.setattr(
         auth_mod,
@@ -200,7 +190,7 @@ def test_nous_dashboard_poller_preserves_effective_scope_when_token_omits_scope(
 
     try:
         ws._nous_poller(session_id)
-        assert captured_state["scope"] == auth_mod.NOUS_LEGACY_AGENT_KEY_SCOPE
+        assert captured_state["scope"] == auth_mod.DEFAULT_NOUS_SCOPE
         assert ws._oauth_sessions[session_id]["status"] == "approved"
     finally:
         ws._oauth_sessions.pop(session_id, None)
diff --git a/tests/hermes_cli/test_web_server.py b/tests/hermes_cli/test_web_server.py
index 16725115d67..04774113c63 100644
--- a/tests/hermes_cli/test_web_server.py
+++ b/tests/hermes_cli/test_web_server.py
@@ -2,17 +2,14 @@
 
 import os
 import json
-import tempfile
 from pathlib import Path
 from unittest.mock import patch, MagicMock
 
 import pytest
 
 from hermes_cli.config import (
-    DEFAULT_CONFIG,
     reload_env,
     redact_key,
-    _EXTRA_ENV_KEYS,
     OPTIONAL_ENV_VARS,
 )
 
@@ -2061,7 +2058,6 @@ class TestPluginAPIAuth:
         shared layer can't silently break the WS auth contract.
         """
         from starlette.websockets import WebSocketDisconnect
-        from hermes_cli.web_server import _SESSION_TOKEN
 
         # Without a token the WS endpoint must close the upgrade itself
         # (its own _check_ws_token), NOT 401 from the HTTP middleware.
@@ -2343,7 +2339,7 @@ class TestPtyWebSocket:
 
         winsize_script = (
             "import fcntl, struct, termios, time; "
-            "time.sleep(0.15); "
+            "time.sleep(0.5); "
             "rows, cols, *_ = struct.unpack('HHHH', "
             "fcntl.ioctl(0, termios.TIOCGWINSZ, b'\\0' * 8)); "
             "print(cols); print(rows)"
@@ -2365,7 +2361,14 @@ class TestPtyWebSocket:
 
             deadline = time.monotonic() + 5.0
             while time.monotonic() < deadline:
-                frame = conn.receive_bytes()
+                # receive_bytes() blocks; once the child prints its winsize and
+                # exits, the PTY closes and further reads raise. Without this
+                # guard a missed-marker run blocks until the 30s pytest-timeout
+                # (flaky failure) instead of failing fast on the assert below.
+                try:
+                    frame = conn.receive_bytes()
+                except Exception:
+                    break
                 if frame:
                     buf += frame
                 if b"99" in buf and b"41" in buf:
diff --git a/tests/hermes_cli/test_web_ui_build.py b/tests/hermes_cli/test_web_ui_build.py
index 3dde893d6e7..c8e28a02431 100644
--- a/tests/hermes_cli/test_web_ui_build.py
+++ b/tests/hermes_cli/test_web_ui_build.py
@@ -1,7 +1,7 @@
 """Tests for _web_ui_build_needed — staleness check for the web UI dist.
 
 Critical invariant: the dashboard Vite build outputs to hermes_cli/web_dist/
-(vite.config.ts: outDir: "../../hermes_cli/web_dist"), NOT apps/dashboard/dist/.
+(vite.config.ts: outDir: "../../hermes_cli/web_dist"), NOT web/dist/.
 The sentinel must be checked in the correct output directory or the
 freshness check is a no-op and the OOM rebuild always runs.
 """
@@ -11,7 +11,6 @@ import time
 from pathlib import Path
 from unittest.mock import patch
 
-import pytest
 
 from hermes_cli.main import _web_ui_build_needed, _build_web_ui, _run_npm_install_deterministic
 
@@ -26,7 +25,7 @@ def _touch(path: Path, offset: float = 0.0) -> None:
 
 def _make_web_dir(tmp_path: Path) -> tuple[Path, Path]:
     """Return (web_dir, dist_dir) matching real repo layout."""
-    web_dir = tmp_path / "apps" / "dashboard"
+    web_dir = tmp_path / "web"
     web_dir.mkdir(parents=True)
     (web_dir / "package.json").touch()
     dist_dir = tmp_path / "hermes_cli" / "web_dist"
@@ -58,10 +57,10 @@ class TestWebUIBuildNeeded:
         assert _web_ui_build_needed(web_dir) is False
 
     def test_web_dist_dir_not_web_dist_subdir(self, tmp_path):
-        """Regression: sentinel must be in hermes_cli/web_dist/, NOT apps/dashboard/dist/."""
+        """Regression: sentinel must be in hermes_cli/web_dist/, NOT web/dist/."""
         web_dir, dist_dir = _make_web_dir(tmp_path)
         _touch(web_dir / "src" / "App.tsx", offset=-10)
-        # Place manifest in wrong location (apps/dashboard/dist/) — should NOT count as fresh
+        # Place manifest in wrong location (web/dist/) — should NOT count as fresh
         wrong_dist = web_dir / "dist" / ".vite" / "manifest.json"
         _touch(wrong_dist)
         # Correct location is empty → still needs build
@@ -89,7 +88,7 @@ class TestWebUIBuildNeeded:
 
     def test_ignores_dist_subdir_under_web(self, tmp_path):
         web_dir, dist_dir = _make_web_dir(tmp_path)
-        # package.json older than manifest; only apps/dashboard/dist file is newer
+        # package.json older than manifest; only web/dist file is newer
         _touch(web_dir / "package.json", offset=-20)
         _touch(dist_dir / ".vite" / "manifest.json", offset=-10)
         _touch(web_dir / "dist" / "assets" / "index.js")
@@ -113,12 +112,17 @@ class TestBuildWebUISkipsWhenFresh:
         web_dir, _ = _make_web_dir(tmp_path)
 
         mock_cp = __import__("subprocess").CompletedProcess([], 0, stdout=b"", stderr=b"")
+        build_ok = __import__("subprocess").CompletedProcess([], 0, stdout="", stderr="")
         with patch("hermes_cli.main.shutil.which", return_value="/usr/bin/npm"), \
-             patch("hermes_cli.main.subprocess.run", return_value=mock_cp) as mock_run:
+             patch("hermes_cli.main.subprocess.run", return_value=mock_cp) as mock_run, \
+             patch("hermes_cli.main._run_with_idle_timeout", return_value=build_ok) as mock_idle:
             result = _build_web_ui(web_dir)
 
         assert result is True
-        assert mock_run.call_count == 2  # npm install + npm run build
+        # npm install goes through subprocess.run; npm run build goes through
+        # _run_with_idle_timeout (issue #33788).
+        assert mock_run.call_count == 1   # install only
+        assert mock_idle.call_count == 1  # build only
 
     def test_npm_install_uses_utf8_replace_output_decoding(self, tmp_path):
         web_dir, _ = _make_web_dir(tmp_path)
@@ -134,19 +138,29 @@ class TestBuildWebUISkipsWhenFresh:
         assert kwargs["encoding"] == "utf-8"
         assert kwargs["errors"] == "replace"
 
-    def test_web_build_uses_utf8_replace_output_decoding(self, tmp_path):
+    def test_web_build_uses_idle_timeout_helper(self, tmp_path):
+        """npm run build now goes through _run_with_idle_timeout (issue #33788).
+
+        The install step keeps its capture_output behavior (the existing
+        retry-on-EPERM contract depends on it); only the long-running build
+        step is streamed + idle-killed.
+        """
         web_dir, _ = _make_web_dir(tmp_path)
 
-        mock_cp = __import__("subprocess").CompletedProcess([], 0, stdout="", stderr="")
+        install_cp = __import__("subprocess").CompletedProcess([], 0, stdout="", stderr="")
+        build_cp = __import__("subprocess").CompletedProcess([], 0, stdout="", stderr="")
         with patch("hermes_cli.main.shutil.which", return_value="/usr/bin/npm"), \
-             patch("hermes_cli.main.subprocess.run", side_effect=[mock_cp, mock_cp]) as mock_run:
+             patch("hermes_cli.main.subprocess.run", return_value=install_cp), \
+             patch("hermes_cli.main._run_with_idle_timeout", return_value=build_cp) as mock_idle:
             result = _build_web_ui(web_dir)
 
         assert result is True
-        _, build_kwargs = mock_run.call_args_list[1]
-        assert build_kwargs["text"] is True
-        assert build_kwargs["encoding"] == "utf-8"
-        assert build_kwargs["errors"] == "replace"
+        # Build was invoked through the idle-timeout helper, not subprocess.run.
+        mock_idle.assert_called_once()
+        args, kwargs = mock_idle.call_args
+        # Positional: [npm, "run", "build"]; cwd passed as kwarg.
+        assert args[0] == ["/usr/bin/npm", "run", "build"]
+        assert kwargs["cwd"] == web_dir
 
 
 class TestBuildWebUIRetryAndStaleFallback:
@@ -155,18 +169,19 @@ class TestBuildWebUIRetryAndStaleFallback:
     def test_retries_build_once_on_failure(self, tmp_path):
         web_dir, _ = _make_web_dir(tmp_path)
         Subprocess = __import__("subprocess")
-        # install: success; build attempt 1: fail; build attempt 2: success
         install_ok = Subprocess.CompletedProcess([], 0, stdout="", stderr="")
-        build_fail = Subprocess.CompletedProcess([], 1, stdout="", stderr="EPERM")
+        # build attempt 1: fail; build attempt 2: success.
+        build_fail = Subprocess.CompletedProcess([], 1, stdout="EPERM", stderr="")
         build_ok = Subprocess.CompletedProcess([], 0, stdout="", stderr="")
         with patch("hermes_cli.main.shutil.which", return_value="/usr/bin/npm"), \
              patch("hermes_cli.main._time.sleep") as mock_sleep, \
-             patch("hermes_cli.main.subprocess.run",
-                   side_effect=[install_ok, build_fail, build_ok]) as mock_run:
+             patch("hermes_cli.main.subprocess.run", return_value=install_ok), \
+             patch("hermes_cli.main._run_with_idle_timeout",
+                   side_effect=[build_fail, build_ok]) as mock_idle:
             result = _build_web_ui(web_dir)
 
         assert result is True
-        assert mock_run.call_count == 3  # install + build + retry
+        assert mock_idle.call_count == 2  # build + retry
         mock_sleep.assert_called_once_with(3)
 
     def test_falls_back_to_stale_dist_when_retry_also_fails(self, tmp_path, capsys):
@@ -177,11 +192,12 @@ class TestBuildWebUIRetryAndStaleFallback:
 
         Subprocess = __import__("subprocess")
         install_ok = Subprocess.CompletedProcess([], 0, stdout="", stderr="")
-        build_fail = Subprocess.CompletedProcess([], 1, stdout="", stderr="vite ENOMEM")
+        build_fail = Subprocess.CompletedProcess([], 1, stdout="vite ENOMEM", stderr="")
         with patch("hermes_cli.main.shutil.which", return_value="/usr/bin/npm"), \
              patch("hermes_cli.main._time.sleep"), \
-             patch("hermes_cli.main.subprocess.run",
-                   side_effect=[install_ok, build_fail, build_fail]):
+             patch("hermes_cli.main.subprocess.run", return_value=install_ok), \
+             patch("hermes_cli.main._run_with_idle_timeout",
+                   side_effect=[build_fail, build_fail]):
             result = _build_web_ui(web_dir, fatal=True)
 
         # MUST return True (serve stale) — issue #23817 — even with fatal=True,
@@ -189,18 +205,19 @@ class TestBuildWebUIRetryAndStaleFallback:
         assert result is True
         out = capsys.readouterr().out
         assert "serving stale dist as fallback" in out
-        assert "vite ENOMEM" in out  # stderr surfaced to user
+        assert "vite ENOMEM" in out  # combined output surfaced to user
 
     def test_hard_fails_when_no_dist_to_fall_back_to(self, tmp_path, capsys):
         web_dir, _ = _make_web_dir(tmp_path)
 
         Subprocess = __import__("subprocess")
         install_ok = Subprocess.CompletedProcess([], 0, stdout="", stderr="")
-        build_fail = Subprocess.CompletedProcess([], 1, stdout="", stderr="vite ENOMEM")
+        build_fail = Subprocess.CompletedProcess([], 1, stdout="vite ENOMEM", stderr="")
         with patch("hermes_cli.main.shutil.which", return_value="/usr/bin/npm"), \
              patch("hermes_cli.main._time.sleep"), \
-             patch("hermes_cli.main.subprocess.run",
-                   side_effect=[install_ok, build_fail, build_fail]):
+             patch("hermes_cli.main.subprocess.run", return_value=install_ok), \
+             patch("hermes_cli.main._run_with_idle_timeout",
+                   side_effect=[build_fail, build_fail]):
             result = _build_web_ui(web_dir, fatal=True)
 
         assert result is False
diff --git a/tests/hermes_cli/test_webhook_cli.py b/tests/hermes_cli/test_webhook_cli.py
index 8d3880722bb..46f6da84980 100644
--- a/tests/hermes_cli/test_webhook_cli.py
+++ b/tests/hermes_cli/test_webhook_cli.py
@@ -5,14 +5,12 @@ import os
 import pytest
 import stat
 from argparse import Namespace
-from pathlib import Path
 
 from hermes_cli.webhook import (
     webhook_command,
     _load_subscriptions,
     _save_subscriptions,
     _subscriptions_path,
-    _is_webhook_enabled,
 )
 
 
diff --git a/tests/hermes_cli/test_whatsapp_setup_ordering.py b/tests/hermes_cli/test_whatsapp_setup_ordering.py
index 47952bcc796..0585bd0ff36 100644
--- a/tests/hermes_cli/test_whatsapp_setup_ordering.py
+++ b/tests/hermes_cli/test_whatsapp_setup_ordering.py
@@ -17,7 +17,7 @@ import io
 import os
 from contextlib import redirect_stdout
 from pathlib import Path
-from unittest.mock import MagicMock, patch
+from unittest.mock import MagicMock
 
 import pytest
 
diff --git a/tests/hermes_cli/test_xai_retirement.py b/tests/hermes_cli/test_xai_retirement.py
index c87214ff0f0..fd1884b0d08 100644
--- a/tests/hermes_cli/test_xai_retirement.py
+++ b/tests/hermes_cli/test_xai_retirement.py
@@ -1,7 +1,6 @@
 """Unit tests for hermes_cli.xai_retirement (May 15, 2026 model retirement)."""
 from __future__ import annotations
 
-import pytest
 
 from hermes_cli.xai_retirement import (
     MIGRATION_GUIDE_URL,
diff --git a/tests/hermes_cli/test_xiaomi_provider.py b/tests/hermes_cli/test_xiaomi_provider.py
index 776e42201f2..4a5a7724ad0 100644
--- a/tests/hermes_cli/test_xiaomi_provider.py
+++ b/tests/hermes_cli/test_xiaomi_provider.py
@@ -1,6 +1,5 @@
 """Tests for Xiaomi MiMo provider support."""
 
-import os
 
 import pytest
 
@@ -9,7 +8,6 @@ from hermes_cli.auth import (
     resolve_provider,
     get_api_key_provider_status,
     resolve_api_key_provider_credentials,
-    AuthError,
 )
 
 
diff --git a/tests/honcho_plugin/test_async_memory.py b/tests/honcho_plugin/test_async_memory.py
index 5df8d274540..97f4f7306d5 100644
--- a/tests/honcho_plugin/test_async_memory.py
+++ b/tests/honcho_plugin/test_async_memory.py
@@ -10,19 +10,14 @@ Covers:
 """
 
 import json
-import queue
-import threading
 import time
-from pathlib import Path
-from unittest.mock import MagicMock, patch, call
+from unittest.mock import MagicMock, patch
 
-import pytest
 
 from plugins.memory.honcho.client import HonchoClientConfig
 from plugins.memory.honcho.session import (
     HonchoSession,
     HonchoSessionManager,
-    _ASYNC_SHUTDOWN,
 )
 
 
diff --git a/tests/honcho_plugin/test_client.py b/tests/honcho_plugin/test_client.py
index b6530db9f84..a02e6937a34 100644
--- a/tests/honcho_plugin/test_client.py
+++ b/tests/honcho_plugin/test_client.py
@@ -17,7 +17,6 @@ from plugins.memory.honcho.client import (
     resolve_active_host,
     resolve_config_path,
     resolve_global_config_path,
-    HOST,
 )
 
 
diff --git a/tests/honcho_plugin/test_pin_peer_name.py b/tests/honcho_plugin/test_pin_peer_name.py
index d3d935f9a05..ef3a215f329 100644
--- a/tests/honcho_plugin/test_pin_peer_name.py
+++ b/tests/honcho_plugin/test_pin_peer_name.py
@@ -18,7 +18,6 @@ import hashlib
 import json
 from unittest.mock import MagicMock
 
-import pytest
 
 from plugins.memory.honcho.client import HonchoClientConfig
 from plugins.memory.honcho.session import HonchoSessionManager
diff --git a/tests/honcho_plugin/test_session.py b/tests/honcho_plugin/test_session.py
index cd9670af237..cf47f3a38bb 100644
--- a/tests/honcho_plugin/test_session.py
+++ b/tests/honcho_plugin/test_session.py
@@ -1237,7 +1237,6 @@ class TestDialecticCadenceAdvancesOnSuccess:
         return provider
 
     def test_empty_dialectic_result_does_not_advance_cadence(self):
-        import time as _time
         provider = self._make_provider()
         provider._session_key = "test"
         provider._manager.dialectic_query.return_value = ""  # silent failure
diff --git a/tests/integration/test_checkpoint_resumption.py b/tests/integration/test_checkpoint_resumption.py
index a5b1a2aa99f..739f0452fe3 100644
--- a/tests/integration/test_checkpoint_resumption.py
+++ b/tests/integration/test_checkpoint_resumption.py
@@ -22,7 +22,6 @@ import pytest
 pytestmark = pytest.mark.integration
 
 import json
-import os
 import shutil
 import sys
 import time
diff --git a/tests/integration/test_voice_channel_flow.py b/tests/integration/test_voice_channel_flow.py
index 420adcb0e73..4c6d5ae11cb 100644
--- a/tests/integration/test_voice_channel_flow.py
+++ b/tests/integration/test_voice_channel_flow.py
@@ -24,7 +24,6 @@ try:
         import ctypes.util
         opus_path = ctypes.util.find_library("opus")
         if not opus_path:
-            import sys
             for p in ("/opt/homebrew/lib/libopus.dylib", "/usr/local/lib/libopus.dylib"):
                 import os
                 if os.path.isfile(p):
diff --git a/tests/integration/test_web_tools.py b/tests/integration/test_web_tools.py
index 823be0392fa..f5281140066 100644
--- a/tests/integration/test_web_tools.py
+++ b/tests/integration/test_web_tools.py
@@ -30,7 +30,6 @@ from typing import List
 from tools.web_tools import (
     web_search_tool,
     web_extract_tool,
-    web_crawl_tool,
     check_firecrawl_api_key,
     check_web_api_key,
     check_auxiliary_model,
@@ -404,113 +403,6 @@ class WebToolsTester:
         except Exception as e:
             self.log_result("Extract (with LLM)", "failed", str(e))
     
-    async def test_web_crawl(self):
-        """Test web crawling functionality"""
-        print_section("Test 4: Web Crawl")
-        
-        test_sites = [
-            ("https://docs.firecrawl.dev", None, 2),  # Test docs site
-            ("https://firecrawl.dev", None, 3),  # Test main site
-        ]
-        
-        for url, instructions, expected_min_pages in test_sites:
-            try:
-                print(f"\n  Testing crawl of: {url}")
-                if instructions:
-                    print(f"  Instructions: {instructions}")
-                else:
-                    print(f"  No instructions (general crawl)")
-                print(f"  Expected minimum pages: {expected_min_pages}")
-                
-                # Show what's being called
-                if self.verbose:
-                    print(f"  Calling web_crawl_tool(url='{url}', instructions={instructions}, use_llm_processing=False)")
-                
-                result = await web_crawl_tool(
-                    url,
-                    instructions=instructions,
-                    use_llm_processing=False  # Disable LLM for faster testing
-                )
-                
-                # Check if result is valid JSON
-                try:
-                    data = json.loads(result)
-                except json.JSONDecodeError as e:
-                    self.log_result(f"Crawl: {url}", "failed", f"Invalid JSON response: {e}")
-                    if self.verbose:
-                        print(f"    Raw response (first 500 chars): {result[:500]}...")
-                    continue
-                
-                # Check for errors
-                if "error" in data:
-                    self.log_result(f"Crawl: {url}", "failed", f"API error: {data['error']}")
-                    continue
-                
-                # Get results
-                results = data.get("results", [])
-                
-                if not results:
-                    self.log_result(f"Crawl: {url}", "failed", "No pages in results array")
-                    if self.verbose:
-                        print(f"    Full response: {json.dumps(data, indent=2)[:1000]}...")
-                    continue
-                
-                # Analyze pages
-                valid_pages = 0
-                empty_pages = 0
-                total_content = 0
-                page_details = []
-                
-                for i, page in enumerate(results):
-                    content = page.get("content", "")
-                    title = page.get("title", "Untitled")
-                    error = page.get("error")
-                    
-                    if error:
-                        page_details.append(f"Page {i+1}: ERROR - {error}")
-                    elif content:
-                        valid_pages += 1
-                        content_len = len(content)
-                        total_content += content_len
-                        page_details.append(f"Page {i+1}: {title[:40]}... ({content_len} chars)")
-                    else:
-                        empty_pages += 1
-                        page_details.append(f"Page {i+1}: {title[:40]}... (EMPTY)")
-                
-                # Show detailed results if verbose
-                if self.verbose:
-                    print(f"\n  Crawl Results:")
-                    print(f"    Total pages returned: {len(results)}")
-                    print(f"    Valid pages (with content): {valid_pages}")
-                    print(f"    Empty pages: {empty_pages}")
-                    print(f"    Total content size: {total_content} characters")
-                    print(f"\n  Page Details:")
-                    for detail in page_details[:10]:  # Show first 10 pages
-                        print(f"    - {detail}")
-                    if len(page_details) > 10:
-                        print(f"    ... and {len(page_details) - 10} more pages")
-                
-                # Determine pass/fail
-                if valid_pages >= expected_min_pages:
-                    self.log_result(
-                        f"Crawl: {url}", 
-                        "passed", 
-                        f"{valid_pages}/{len(results)} valid pages, {total_content} chars total"
-                    )
-                else:
-                    self.log_result(
-                        f"Crawl: {url}", 
-                        "failed", 
-                        f"Only {valid_pages} valid pages (expected >= {expected_min_pages}), {empty_pages} empty, {len(results)} total"
-                    )
-                    
-            except Exception as e:
-                self.log_result(f"Crawl: {url}", "failed", f"Exception: {type(e).__name__}: {str(e)}")
-                if self.verbose:
-                    import traceback
-                    print(f"    Traceback:")
-                    print("    " + "\n    ".join(traceback.format_exc().split("\n")))
-    
     async def run_all_tests(self):
         """Run all tests"""
         self.start_time = datetime.now()
@@ -533,9 +425,6 @@ class WebToolsTester:
         if self.test_llm:
             await self.test_web_extract_with_llm(urls if urls else None)
         
-        # Test crawling
-        await self.test_web_crawl()
-        
         # Print summary
         self.end_time = datetime.now()
         duration = (self.end_time - self.start_time).total_seconds()
diff --git a/tests/plugins/image_gen/test_fal_provider.py b/tests/plugins/image_gen/test_fal_provider.py
index 8b3e65e0bae..a75c3da547e 100644
--- a/tests/plugins/image_gen/test_fal_provider.py
+++ b/tests/plugins/image_gen/test_fal_provider.py
@@ -16,7 +16,6 @@ from __future__ import annotations
 import json
 from unittest.mock import MagicMock
 
-import pytest
 
 
 # ---------------------------------------------------------------------------
diff --git a/tests/plugins/image_gen/test_openai_provider.py b/tests/plugins/image_gen/test_openai_provider.py
index 6411996130e..8a6a4985014 100644
--- a/tests/plugins/image_gen/test_openai_provider.py
+++ b/tests/plugins/image_gen/test_openai_provider.py
@@ -137,7 +137,6 @@ class TestGenerate:
         assert result["error_type"] == "auth_required"
 
     def test_b64_saves_to_cache(self, provider, tmp_path):
-        import base64
         png_bytes = bytes.fromhex(_PNG_HEX)
         fake_client = MagicMock()
         fake_client.images.generate.return_value = _fake_response(b64=_b64_png())
diff --git a/tests/plugins/image_gen/test_xai_provider.py b/tests/plugins/image_gen/test_xai_provider.py
index f921fe2e291..d1e7beab15e 100644
--- a/tests/plugins/image_gen/test_xai_provider.py
+++ b/tests/plugins/image_gen/test_xai_provider.py
@@ -4,7 +4,6 @@
 from __future__ import annotations
 
 import json
-import os
 from pathlib import Path
 from unittest.mock import MagicMock, patch
 
diff --git a/tests/plugins/memory/test_hindsight_provider.py b/tests/plugins/memory/test_hindsight_provider.py
index fcda46e56b0..bc62b7f2c8f 100644
--- a/tests/plugins/memory/test_hindsight_provider.py
+++ b/tests/plugins/memory/test_hindsight_provider.py
@@ -197,10 +197,32 @@ class TestConfig:
         assert provider._recall_max_input_chars == 800
         assert provider._tags is None
         assert provider._recall_tags is None
+        # Default recall narrowed to observation-only; world/experience are
+        # aggregate facts that often crowd out concrete-event signal during
+        # auto-recall. Users opt back in via the recall_types config key.
+        assert provider._recall_types == ["observation"]
         assert provider._bank_mission == ""
         assert provider._bank_retain_mission is None
         assert provider._retain_context == "conversation between Hermes Agent and the User"
 
+    def test_recall_types_default_is_observation_only(self, provider):
+        """Auto-recall must filter to observation by default."""
+        assert provider._recall_types == ["observation"]
+
+    def test_recall_types_explicit_list_overrides_default(self, provider_with_config):
+        p = provider_with_config(recall_types=["world", "experience", "observation"])
+        assert p._recall_types == ["world", "experience", "observation"]
+
+    def test_recall_types_csv_string_accepted(self, provider_with_config):
+        """For parity with recall_tags, comma-separated strings work too."""
+        p = provider_with_config(recall_types="observation, world")
+        assert p._recall_types == ["observation", "world"]
+
+    def test_recall_types_empty_list_falls_back_to_default(self, provider_with_config):
+        """An empty list shouldn't disable the filter (would be wider than default)."""
+        p = provider_with_config(recall_types=[])
+        assert p._recall_types == ["observation"]
+
     def test_custom_config_values(self, provider_with_config):
         p = provider_with_config(
             retain_tags=["tag1", "tag2"],
@@ -994,7 +1016,6 @@ class TestSessionSwitchBufferFlush:
         old session to settle before clearing _prefetch_result, otherwise
         the thread can race and re-populate the field after the clear."""
         import threading
-        import time as _time
 
         gate = threading.Event()
         finished = threading.Event()
diff --git a/tests/plugins/memory/test_mem0_v2.py b/tests/plugins/memory/test_mem0_v2.py
index 6f60771f5c4..1ef85499b54 100644
--- a/tests/plugins/memory/test_mem0_v2.py
+++ b/tests/plugins/memory/test_mem0_v2.py
@@ -4,7 +4,6 @@ Salvaged from PRs #5301 (qaqcvc) and #5117 (vvvanguards).
 """
 
 import json
-import pytest
 
 from plugins.memory.mem0 import Mem0MemoryProvider
 
diff --git a/tests/plugins/test_disk_cleanup_plugin.py b/tests/plugins/test_disk_cleanup_plugin.py
index e1463bced7a..4f7f66e028a 100644
--- a/tests/plugins/test_disk_cleanup_plugin.py
+++ b/tests/plugins/test_disk_cleanup_plugin.py
@@ -129,12 +129,40 @@ class TestGuessCategory:
 
     def test_cron_subtree_categorised(self, _isolate_env):
         dg = _load_lib()
-        cron_dir = _isolate_env / "cron"
-        cron_dir.mkdir()
-        p = cron_dir / "job_output.md"
+        # Only files under ``cron/output/`` are disposable run artifacts.
+        output_dir = _isolate_env / "cron" / "output" / "job_123"
+        output_dir.mkdir(parents=True)
+        p = output_dir / "run.md"
         p.write_text("x")
         assert dg.guess_category(p) == "cron-output"
 
+    def test_cron_jobs_json_not_tracked(self, _isolate_env):
+        """Regression for #32164: the cron registry must never be tracked."""
+        dg = _load_lib()
+        cron_dir = _isolate_env / "cron"
+        cron_dir.mkdir()
+        p = cron_dir / "jobs.json"
+        p.write_text("[]")
+        assert dg.guess_category(p) is None
+
+    def test_cron_tick_lock_not_tracked(self, _isolate_env):
+        """Regression for #32164: cron tick-lock is control-plane state."""
+        dg = _load_lib()
+        cron_dir = _isolate_env / "cron"
+        cron_dir.mkdir()
+        p = cron_dir / ".tick.lock"
+        p.write_text("")
+        assert dg.guess_category(p) is None
+
+    def test_cronjobs_top_level_not_tracked(self, _isolate_env):
+        """The legacy ``cronjobs`` alias is also control-plane at the top."""
+        dg = _load_lib()
+        cron_dir = _isolate_env / "cronjobs"
+        cron_dir.mkdir()
+        p = cron_dir / "jobs.json"
+        p.write_text("[]")
+        assert dg.guess_category(p) is None
+
     def test_ordinary_file_returns_none(self, _isolate_env):
         dg = _load_lib()
         p = _isolate_env / "notes.md"
diff --git a/tests/plugins/test_google_meet_audio.py b/tests/plugins/test_google_meet_audio.py
index 9af0f76f81f..d5207518d8f 100644
--- a/tests/plugins/test_google_meet_audio.py
+++ b/tests/plugins/test_google_meet_audio.py
@@ -6,7 +6,6 @@ without actually invoking those tools on the host.
 
 from __future__ import annotations
 
-import subprocess
 from unittest.mock import MagicMock, patch
 
 import pytest
diff --git a/tests/plugins/test_google_meet_node.py b/tests/plugins/test_google_meet_node.py
index bee1a184366..0a5ebc9aba6 100644
--- a/tests/plugins/test_google_meet_node.py
+++ b/tests/plugins/test_google_meet_node.py
@@ -12,7 +12,6 @@ import argparse
 import asyncio
 import json
 from pathlib import Path
-from unittest.mock import MagicMock, patch
 
 import pytest
 
diff --git a/tests/plugins/test_google_meet_plugin.py b/tests/plugins/test_google_meet_plugin.py
index c8dacc81d24..92815553922 100644
--- a/tests/plugins/test_google_meet_plugin.py
+++ b/tests/plugins/test_google_meet_plugin.py
@@ -764,7 +764,6 @@ def test_cmd_install_refuses_windows(capsys):
 def test_cmd_install_runs_pip_and_playwright(capsys):
     """End-to-end wiring: pip + playwright install invoked, returncodes handled."""
     from plugins.google_meet.cli import _cmd_install
-    import subprocess as _sp
 
     calls = []
     class _FakeRes:
diff --git a/tests/plugins/test_google_meet_realtime.py b/tests/plugins/test_google_meet_realtime.py
index 71d02216937..1f3f0c9c033 100644
--- a/tests/plugins/test_google_meet_realtime.py
+++ b/tests/plugins/test_google_meet_realtime.py
@@ -8,10 +8,7 @@ from __future__ import annotations
 import base64
 import json
 import sys
-import threading
 import types
-from pathlib import Path
-from unittest.mock import patch
 
 import pytest
 
diff --git a/tests/plugins/test_kanban_dashboard_plugin.py b/tests/plugins/test_kanban_dashboard_plugin.py
index 5fa1881fa32..57ce67352e4 100644
--- a/tests/plugins/test_kanban_dashboard_plugin.py
+++ b/tests/plugins/test_kanban_dashboard_plugin.py
@@ -835,8 +835,6 @@ def test_ws_events_swallows_cancellation_on_shutdown(tmp_path, monkeypatch):
     the cancellation outcome deterministically.
     """
     import asyncio
-    import types
-    import sys as _sys
 
     home = tmp_path / ".hermes"
     home.mkdir()
diff --git a/tests/plugins/test_kanban_worker_runs.py b/tests/plugins/test_kanban_worker_runs.py
index ba84d9ea9a8..74758ff4e5f 100644
--- a/tests/plugins/test_kanban_worker_runs.py
+++ b/tests/plugins/test_kanban_worker_runs.py
@@ -4,6 +4,7 @@ Covers:
   GET /workers/active
   GET /runs/{run_id}
   GET /runs/{run_id}/inspect
+  POST /runs/{run_id}/terminate
 """
 
 from __future__ import annotations
@@ -13,7 +14,7 @@ import secrets
 import sys
 import time
 from pathlib import Path
-from unittest.mock import MagicMock, patch
+from unittest.mock import MagicMock
 
 import pytest
 from fastapi import FastAPI
@@ -299,3 +300,141 @@ def test_inspect_run_live_pid(client, monkeypatch):
     assert body["memory_rss_bytes"] == fake_mem.rss
     assert body["num_threads"] == 4
     assert body["status"] == "sleeping"
+
+
+# ---------------------------------------------------------------------------
+# POST /runs/{run_id}/terminate
+# ---------------------------------------------------------------------------
+
+def _setup_running_task_with_run(conn, *, title, assignee, worker_pid):
+    """Create a task in 'running' state with a matching open task_runs row.
+
+    Mirrors what dispatcher_claim does: stamps tasks.status='running',
+    tasks.claim_lock, tasks.worker_pid; inserts task_runs row with the
+    same claim_lock so reclaim_task's preconditions are satisfied.
+    """
+    task_id = kb.create_task(conn, title=title, assignee=assignee)
+    lock = secrets.token_hex(8)
+    future = int(time.time()) + 3600
+    conn.execute(
+        "UPDATE tasks SET status='running', claim_lock=?, "
+        "claim_expires=?, worker_pid=? WHERE id=?",
+        (lock, future, worker_pid, task_id),
+    )
+    cur = conn.execute(
+        "INSERT INTO task_runs "
+        "(task_id, status, claim_lock, claim_expires, worker_pid, started_at) "
+        "VALUES (?, 'running', ?, ?, ?, ?)",
+        (task_id, lock, future, worker_pid, int(time.time())),
+    )
+    conn.commit()
+    return task_id, cur.lastrowid
+
+
+def test_terminate_run_404_unknown_id(client):
+    """POST to unknown run_id returns 404."""
+    r = client.post(
+        "/api/plugins/kanban/runs/777777/terminate",
+        json={"reason": "test"},
+    )
+    assert r.status_code == 404
+    assert "777777" in r.json()["detail"]
+
+
+def test_terminate_run_409_already_ended(client):
+    """POST against a run with ended_at set returns 409."""
+    conn = kb.connect()
+    try:
+        task_id = kb.create_task(conn, title="ended-terminate", assignee="ivy")
+        run_id = _insert_run(
+            conn, task_id, worker_pid=22222, ended_at=int(time.time()) - 30,
+        )
+    finally:
+        conn.close()
+
+    r = client.post(
+        f"/api/plugins/kanban/runs/{run_id}/terminate",
+        json={"reason": "too late"},
+    )
+    assert r.status_code == 409
+    assert "already ended" in r.json()["detail"]
+
+
+def test_terminate_run_ok(client, monkeypatch):
+    """Happy path: live run is terminated, signal fn invoked, reason recorded."""
+    conn = kb.connect()
+    try:
+        task_id, run_id = _setup_running_task_with_run(
+            conn, title="kill-me", assignee="jane", worker_pid=33333,
+        )
+    finally:
+        conn.close()
+
+    # Capture signal calls so we don't actually SIGTERM a random PID.
+    sent = []
+
+    def _fake_terminate(pid, prev_lock, *, signal_fn=None):
+        sent.append((pid, prev_lock))
+        return {"signal": "SIGTERM", "delivered": True}
+
+    monkeypatch.setattr(kb, "_terminate_reclaimed_worker", _fake_terminate)
+
+    r = client.post(
+        f"/api/plugins/kanban/runs/{run_id}/terminate",
+        json={"reason": "operator abort"},
+    )
+    assert r.status_code == 200, r.text
+    body = r.json()
+    assert body == {"ok": True, "run_id": run_id, "task_id": task_id}
+    assert sent == [(33333, sent[0][1])]
+    assert sent[0][1] is not None  # claim_lock was non-null
+
+    # Task is back to ready, claim cleared.
+    conn = kb.connect()
+    try:
+        row = conn.execute(
+            "SELECT status, claim_lock, worker_pid FROM tasks WHERE id=?",
+            (task_id,),
+        ).fetchone()
+    finally:
+        conn.close()
+    assert row["status"] == "ready"
+    assert row["claim_lock"] is None
+    assert row["worker_pid"] is None
+
+
+def test_terminate_run_409_task_not_reclaimable(client, monkeypatch):
+    """Open run row whose task is no longer claimable returns 409."""
+    conn = kb.connect()
+    try:
+        task_id = kb.create_task(conn, title="ghost-run", assignee="ken")
+        # Task left in default 'ready' state with no claim_lock — task_run
+        # exists but reclaim_task will refuse because status != 'running'
+        # and claim_lock is NULL.
+        run_id = _insert_run(conn, task_id, worker_pid=44444)
+    finally:
+        conn.close()
+
+    # Make sure no signal is ever sent on this code path.
+    def _boom(*a, **k):
+        raise AssertionError("_terminate_reclaimed_worker should not be called")
+
+    monkeypatch.setattr(kb, "_terminate_reclaimed_worker", _boom)
+
+    r = client.post(
+        f"/api/plugins/kanban/runs/{run_id}/terminate",
+        json={"reason": "stale"},
+    )
+    assert r.status_code == 409
+    assert "reclaimable" in r.json()["detail"]
+
+
+def test_terminate_run_accepts_empty_body(client):
+    """Empty JSON body (no reason) is still accepted; falls through to 404."""
+    r = client.post(
+        "/api/plugins/kanban/runs/666666/terminate",
+        json={},
+    )
+    # 404 because run doesn't exist — what we're asserting here is that
+    # the endpoint doesn't 422 on a missing 'reason' field.
+    assert r.status_code == 404
diff --git a/tests/plugins/test_retaindb_plugin.py b/tests/plugins/test_retaindb_plugin.py
index 5d517bce776..11fba5b15cf 100644
--- a/tests/plugins/test_retaindb_plugin.py
+++ b/tests/plugins/test_retaindb_plugin.py
@@ -5,13 +5,10 @@ RetainDBMemoryProvider lifecycle/tools/prefetch, thread management, connection p
 """
 
 import json
-import os
 import sqlite3
-import tempfile
-import threading
 import time
 from pathlib import Path
-from unittest.mock import MagicMock, patch, PropertyMock
+from unittest.mock import MagicMock, patch
 
 import pytest
 
@@ -67,8 +64,6 @@ from plugins.memory.retaindb import (
     _WriteQueue,
     _build_overlay,
     RetainDBMemoryProvider,
-    _ASYNC_SHUTDOWN,
-    _DEFAULT_BASE_URL,
 )
 
 
diff --git a/tests/plugins/test_security_guidance_plugin.py b/tests/plugins/test_security_guidance_plugin.py
index c4f551fba2c..10efa1061b2 100644
--- a/tests/plugins/test_security_guidance_plugin.py
+++ b/tests/plugins/test_security_guidance_plugin.py
@@ -16,9 +16,7 @@ Covers ``plugins/security-guidance/``:
   * Bundled-plugin discovery via ``PluginManager.discover_and_load``.
 """
 
-import importlib
 import importlib.util
-import json
 import sys
 import types
 from pathlib import Path
diff --git a/tests/plugins/test_teams_pipeline_plugin.py b/tests/plugins/test_teams_pipeline_plugin.py
index 862b5399720..e0bc978cefa 100644
--- a/tests/plugins/test_teams_pipeline_plugin.py
+++ b/tests/plugins/test_teams_pipeline_plugin.py
@@ -5,7 +5,6 @@ from __future__ import annotations
 import asyncio
 from types import SimpleNamespace
 from pathlib import Path
-from unittest.mock import MagicMock
 
 import pytest
 
diff --git a/tests/plugins/video_gen/test_fal_plugin.py b/tests/plugins/video_gen/test_fal_plugin.py
index fdfa9a6ec44..7af1bd40971 100644
--- a/tests/plugins/video_gen/test_fal_plugin.py
+++ b/tests/plugins/video_gen/test_fal_plugin.py
@@ -85,44 +85,72 @@ def test_fal_list_models_advertises_both_modalities():
 
 def test_fal_unavailable_without_key(monkeypatch):
     from plugins.video_gen.fal import FALVideoGenProvider
+    from plugins.video_gen import fal as fal_plugin
 
     monkeypatch.delenv("FAL_KEY", raising=False)
+    # Also ensure managed gateway is unavailable
+    monkeypatch.setattr(fal_plugin, "_resolve_managed_fal_video_gateway", lambda: None)
     assert FALVideoGenProvider().is_available() is False
 
 
 def test_fal_generate_requires_fal_key(monkeypatch):
     from plugins.video_gen.fal import FALVideoGenProvider
+    from plugins.video_gen import fal as fal_plugin
 
     monkeypatch.delenv("FAL_KEY", raising=False)
+    # Also ensure managed gateway is unavailable
+    monkeypatch.setattr(fal_plugin, "_resolve_managed_fal_video_gateway", lambda: None)
     result = FALVideoGenProvider().generate("a happy dog")
     assert result["success"] is False
     assert result["error_type"] == "auth_required"
 
 
+def test_fal_available_via_gateway(monkeypatch):
+    from plugins.video_gen.fal import FALVideoGenProvider
+    from plugins.video_gen import fal as fal_plugin
+
+    monkeypatch.delenv("FAL_KEY", raising=False)
+    monkeypatch.setattr(
+        fal_plugin,
+        "_resolve_managed_fal_video_gateway",
+        lambda: object(),  # truthy sentinel — gateway is available
+    )
+    assert FALVideoGenProvider().is_available() is True
+
+
 class TestFamilyRouting:
     """The headline behavior: image_url presence picks the endpoint."""
 
     @pytest.fixture
     def with_fake_fal(self, monkeypatch):
-        """Stub fal_client.subscribe to capture which endpoint we hit."""
+        """Stub fal_client.submit to capture which endpoint we hit."""
         import sys
         import types
 
         captured = {"endpoint": None, "arguments": None}
 
+        class FakeHandle:
+            def get(self):
+                return {"video": {"url": "https://fake/out.mp4"}}
+
         fake = types.ModuleType("fal_client")
-        def _subscribe(endpoint, arguments=None, with_logs=False):
+        def _submit(endpoint, arguments=None, headers=None):
             captured["endpoint"] = endpoint
             captured["arguments"] = arguments
-            return {"video": {"url": "https://fake/out.mp4"}}
-        fake.subscribe = _subscribe  # type: ignore
+            return FakeHandle()
+        fake.submit = _submit  # type: ignore
         monkeypatch.setitem(sys.modules, "fal_client", fake)
 
         # Reset the lazy global so it picks up our stub
         from plugins.video_gen import fal as fal_plugin
         fal_plugin._fal_client = None
+        # Also reset the managed client cache
+        fal_plugin._managed_fal_video_client = None
+        fal_plugin._managed_fal_video_client_config = None
 
         monkeypatch.setenv("FAL_KEY", "test")
+        # Force direct mode — no managed gateway
+        monkeypatch.setattr(fal_plugin, "_resolve_managed_fal_video_gateway", lambda: None)
         return captured
 
     def test_text_to_video_routes_to_text_endpoint(self, with_fake_fal):
@@ -229,7 +257,7 @@ class TestPayloadBuilder:
             seed=42,
         )
         assert p["prompt"] == "x"
-        assert p["duration"] == "8"  # FAL queue API uses strings
+        assert p["duration"] == "8s"  # veo3.1 uses "Ns" format per FAL API
         assert p["aspect_ratio"] == "16:9"
         assert p["resolution"] == "720p"
         assert p["generate_audio"] is True
diff --git a/tests/plugins/web/test_web_search_provider_plugins.py b/tests/plugins/web/test_web_search_provider_plugins.py
index 47d7791977b..2177d875c4b 100644
--- a/tests/plugins/web/test_web_search_provider_plugins.py
+++ b/tests/plugins/web/test_web_search_provider_plugins.py
@@ -20,9 +20,6 @@ from __future__ import annotations
 
 import asyncio
 import inspect
-import os
-import sys
-from typing import Any, Dict, List
 
 import pytest
 
@@ -90,20 +87,17 @@ class TestBundledPluginsRegister:
         ]
 
     @pytest.mark.parametrize(
-        "plugin_name,expected_search,expected_extract,expected_crawl",
+        "plugin_name,expected_search,expected_extract",
         [
-            ("brave-free", True, False, False),
-            ("ddgs", True, False, False),
-            ("searxng", True, False, False),
-            ("exa", True, True, False),
-            ("parallel", True, True, False),
-            ("tavily", True, True, True),
-            # firecrawl: search + extract + crawl. Crawl was originally
-            # disabled in the migration (fell through to a legacy inline
-            # path); the follow-up commit enabled it natively.
-            ("firecrawl", True, True, True),
+            ("brave-free", True, False),
+            ("ddgs", True, False),
+            ("searxng", True, False),
+            ("exa", True, True),
+            ("parallel", True, True),
+            ("tavily", True, True),
+            ("firecrawl", True, True),
             # xai: search-only via Grok's agentic web_search tool.
-            ("xai", True, False, False),
+            ("xai", True, False),
         ],
     )
     def test_capability_flags_match_spec(
@@ -111,7 +105,6 @@ class TestBundledPluginsRegister:
         plugin_name: str,
         expected_search: bool,
         expected_extract: bool,
-        expected_crawl: bool,
     ) -> None:
         _ensure_plugins_loaded()
         from agent.web_search_registry import get_provider
@@ -120,7 +113,6 @@ class TestBundledPluginsRegister:
         assert provider is not None, f"plugin {plugin_name!r} not registered"
         assert provider.supports_search() is expected_search
         assert provider.supports_extract() is expected_extract
-        assert provider.supports_crawl() is expected_crawl
 
     @pytest.mark.parametrize(
         "plugin_name",
@@ -273,7 +265,7 @@ class TestRegistryResolution:
         surfaces a precise "FOO_API_KEY is not set" error instead.
         """
         _ensure_plugins_loaded()
-        from agent.web_search_registry import _resolve, get_provider
+        from agent.web_search_registry import _resolve
 
         # No BRAVE_SEARCH_API_KEY (fixture cleared it).
         result = _resolve("brave-free", capability="search")
@@ -457,37 +449,41 @@ class TestErrorResponseShapes:
         if result:  # if anything came back, it should be an error entry
             assert "error" in result[0]
 
-    def test_tavily_crawl_returns_error_dict_when_unconfigured(self) -> None:
-        _ensure_plugins_loaded()
-        from agent.web_search_registry import get_provider
+    def test_firecrawl_config_error_points_paid_users_to_nous_subscription(self, monkeypatch):
+        from plugins.web.firecrawl import provider as firecrawl_provider
 
-        p = get_provider("tavily")
-        assert p is not None
-        result = p.crawl("https://example.com")
-        assert isinstance(result, dict)
-        assert "results" in result
-        assert isinstance(result["results"], list)
-        if result["results"]:
-            assert "error" in result["results"][0]
+        monkeypatch.setattr(
+            "tools.web_tools.managed_nous_tools_enabled",
+            lambda: True,
+            raising=False,
+        )
 
-    def test_firecrawl_crawl_returns_error_dict_when_unconfigured(self):
-        """firecrawl crawl is async (wraps SDK in to_thread); error must be
-        surfaced via the per-page result shape, not raised."""
-        _ensure_plugins_loaded()
-        from agent.web_search_registry import get_provider
+        with pytest.raises(ValueError) as exc_info:
+            firecrawl_provider._raise_web_backend_configuration_error()
 
-        p = get_provider("firecrawl")
-        assert p is not None
-        assert inspect.iscoroutinefunction(p.crawl)
-        result = asyncio.run(p.crawl("https://example.com"))
-        assert isinstance(result, dict)
-        assert "results" in result
-        assert isinstance(result["results"], list)
-        # Without FIRECRAWL_API_KEY, the plugin's _get_firecrawl_client()
-        # raises ValueError which is caught and returned as a per-page error.
-        assert len(result["results"]) >= 1
-        assert "error" in result["results"][0]
-        assert result["results"][0]["url"] == "https://example.com"
+        message = str(exc_info.value)
+        assert "With your Nous subscription you can also use the Tool Gateway" in message
+        assert "select Nous Subscription as the web provider" in message
+        assert "managed Firecrawl web tools is unavailable" not in message
+
+    def test_firecrawl_config_error_uses_entitlement_message_when_not_paid(self, monkeypatch):
+        from plugins.web.firecrawl import provider as firecrawl_provider
+
+        monkeypatch.setattr(
+            "tools.web_tools.managed_nous_tools_enabled",
+            lambda: False,
+            raising=False,
+        )
+        monkeypatch.setattr(
+            "tools.web_tools.nous_tool_gateway_unavailable_message",
+            lambda capability: f"{capability} denied by test entitlement.",
+            raising=False,
+        )
+
+        with pytest.raises(ValueError) as exc_info:
+            firecrawl_provider._raise_web_backend_configuration_error()
+
+        assert "managed Firecrawl web tools denied by test entitlement" in str(exc_info.value)
 
     def test_xai_search_returns_error_dict_when_unconfigured(self) -> None:
         """xAI returns a typed error dict (no XAI_API_KEY)."""
diff --git a/tests/providers/test_plugin_discovery.py b/tests/providers/test_plugin_discovery.py
index be5c56122ea..fba5a02df11 100644
--- a/tests/providers/test_plugin_discovery.py
+++ b/tests/providers/test_plugin_discovery.py
@@ -8,11 +8,9 @@ Verifies that:
 
 from __future__ import annotations
 
-import importlib
 import sys
 from pathlib import Path
 
-import pytest
 
 
 REPO_ROOT = Path(__file__).resolve().parents[2]
diff --git a/tests/providers/test_provider_profiles.py b/tests/providers/test_provider_profiles.py
index df96a80fd80..c9e9daa623d 100644
--- a/tests/providers/test_provider_profiles.py
+++ b/tests/providers/test_provider_profiles.py
@@ -1,6 +1,5 @@
 """Tests for the provider module registry and profiles."""
 
-import pytest
 from providers import get_provider_profile, _REGISTRY
 from providers.base import ProviderProfile, OMIT_TEMPERATURE
 
@@ -98,6 +97,11 @@ class TestOpenRouterProfile:
         body = p.build_extra_body(provider_preferences={"allow": ["anthropic"]})
         assert body["provider"] == {"allow": ["anthropic"]}
 
+    def test_extra_body_session_id(self):
+        p = get_provider_profile("openrouter")
+        body = p.build_extra_body(session_id="test-session-123")
+        assert body["session_id"] == "test-session-123"
+
     def test_extra_body_no_prefs(self):
         p = get_provider_profile("openrouter")
         body = p.build_extra_body()
diff --git a/tests/run_agent/test_1630_context_overflow_loop.py b/tests/run_agent/test_1630_context_overflow_loop.py
index f69b01241bb..3e5e3d0cfdf 100644
--- a/tests/run_agent/test_1630_context_overflow_loop.py
+++ b/tests/run_agent/test_1630_context_overflow_loop.py
@@ -8,8 +8,6 @@ Verifies that:
 3. Context-overflow failures produce helpful error messages suggesting /compact.
 """
 
-import pytest
-from types import SimpleNamespace
 from unittest.mock import MagicMock, patch
 
 
diff --git a/tests/run_agent/test_18028_content_policy_blocked.py b/tests/run_agent/test_18028_content_policy_blocked.py
new file mode 100644
index 00000000000..1edf16b87ca
--- /dev/null
+++ b/tests/run_agent/test_18028_content_policy_blocked.py
@@ -0,0 +1,152 @@
+"""Regression guard for #18028: provider content-policy / safety-filter
+blocks must classify as ``content_policy_blocked``, be non-retryable, and
+trigger the ``is_client_error`` abort path so the loop jumps straight to a
+configured fallback or surfaces a clear policy-block message — instead of
+burning ``api_max_retries`` paid attempts on a deterministic refusal and
+delivering "API failed after 3 retries" to Telegram/cron with no provider
+context.
+
+Real-world symptom from the issue:
+    ``API call failed after 3 retries — This content was flagged for
+    possible cybersecurity risk... | provider=openai-codex model=gpt-5.5``
+repeating across cron jobs and gateway sessions, with the user unable to
+tell whether the gateway was broken, the model was down, or their prompt
+was the problem.
+"""
+from __future__ import annotations
+
+
+class TestContentPolicyBlockedClassification:
+    """Verify classify_api_error returns the right shape so downstream
+    recovery (fallback activation, final_response wording) fires correctly.
+    """
+
+    def test_openai_codex_cybersecurity_no_status(self):
+        """The reported #18028 case — SDK raises without a status code."""
+        from agent.error_classifier import classify_api_error, FailoverReason
+
+        e = Exception(
+            "This content was flagged for possible cybersecurity risk. "
+            "If this seems wrong, try rephrasing your request. To get "
+            "authorized for security work, join the Trusted Access for "
+            "Cyber program."
+        )
+        result = classify_api_error(e, provider="openai-codex", model="gpt-5.5")
+        # Must NOT fall into the retryable ``unknown`` bucket — that's what
+        # caused the 3x retry burn.
+        assert result.reason == FailoverReason.content_policy_blocked
+        assert result.retryable is False
+        # Recovery is fallback model, not credential rotation or compression.
+        assert result.should_fallback is True
+        assert result.should_compress is False
+        assert result.should_rotate_credential is False
+
+
+class TestContentPolicyTriggersClientErrorAbort:
+    """Mirror the ``is_client_error`` predicate in
+    ``agent/conversation_loop.py`` and verify
+    ``FailoverReason.content_policy_blocked`` resolves to True so the loop
+    aborts (after attempting fallback) instead of falling into the
+    retry-backoff path.
+    """
+
+    def _mirror_is_client_error(
+        self,
+        *,
+        classified_retryable: bool,
+        classified_reason,
+        classified_should_compress: bool = False,
+        is_local_validation_error: bool = False,
+        is_context_length_error: bool = False,
+    ) -> bool:
+        """Exact shape of conversation_loop.py's is_client_error check.
+
+        Kept in lock-step with the source. If you change one, change both.
+        """
+        from agent.error_classifier import FailoverReason
+
+        return (
+            is_local_validation_error
+            or (
+                not classified_retryable
+                and not classified_should_compress
+                and classified_reason not in {
+                    FailoverReason.rate_limit,
+                    FailoverReason.overloaded,
+                    FailoverReason.context_overflow,
+                    FailoverReason.payload_too_large,
+                    FailoverReason.long_context_tier,
+                    FailoverReason.thinking_signature,
+                }
+            )
+        ) and not is_context_length_error
+
+    def test_content_policy_blocked_triggers_abort(self):
+        """Safety-filter block must reach is_client_error → fallback/abort."""
+        from agent.error_classifier import FailoverReason
+
+        # What classify_api_error returns for a content-policy block:
+        #   reason=content_policy_blocked, retryable=False, should_compress=False
+        assert self._mirror_is_client_error(
+            classified_retryable=False,
+            classified_reason=FailoverReason.content_policy_blocked,
+        ), (
+            "FailoverReason.content_policy_blocked must trigger the "
+            "is_client_error path so fallback fires immediately instead of "
+            "burning api_max_retries paid attempts on a deterministic "
+            "safety refusal — see #18028."
+        )
+
+
+class TestContentPolicyPatternsAreNarrow:
+    """Defensive guard: the safety-filter patterns must not collide with
+    benign error wording from billing / format / generic 400 errors. If
+    these regress to ``content_policy_blocked``, recovery will route to
+    the wrong code path (fallback model instead of credential rotation).
+    """
+
+    def test_generic_400_format_error_not_misclassified(self):
+        from agent.error_classifier import classify_api_error, FailoverReason
+
+        class _Err(Exception):
+            def __init__(self, msg, status_code):
+                super().__init__(msg)
+                self.status_code = status_code
+
+        e = _Err("Invalid request: messages must be a non-empty list", status_code=400)
+        result = classify_api_error(e, provider="openai", model="gpt-4o")
+        assert result.reason != FailoverReason.content_policy_blocked
+
+    def test_billing_402_not_misclassified(self):
+        from agent.error_classifier import classify_api_error, FailoverReason
+
+        class _Err(Exception):
+            def __init__(self, msg, status_code):
+                super().__init__(msg)
+                self.status_code = status_code
+
+        e = _Err("Insufficient credits. Top up your balance.", status_code=402)
+        result = classify_api_error(e, provider="openrouter", model="anthropic/claude-opus")
+        assert result.reason == FailoverReason.billing
+
+    def test_openrouter_account_policy_block_stays_distinct(self):
+        """``provider_policy_blocked`` (OpenRouter account-level data
+        policy) must remain a separate classification from
+        ``content_policy_blocked`` (upstream model safety filter) — they
+        have different recovery strategies.
+        """
+        from agent.error_classifier import classify_api_error, FailoverReason
+
+        class _Err(Exception):
+            def __init__(self, msg, status_code):
+                super().__init__(msg)
+                self.status_code = status_code
+
+        e = _Err(
+            "No endpoints available matching your guardrail restrictions "
+            "and data policy",
+            status_code=404,
+        )
+        result = classify_api_error(e, provider="openrouter", model="anthropic/claude-opus")
+        assert result.reason == FailoverReason.provider_policy_blocked
+        assert result.reason != FailoverReason.content_policy_blocked
diff --git a/tests/run_agent/test_413_compression.py b/tests/run_agent/test_413_compression.py
index 82fc6b3e60d..6695d6c275e 100644
--- a/tests/run_agent/test_413_compression.py
+++ b/tests/run_agent/test_413_compression.py
@@ -11,11 +11,9 @@ import pytest
 
 
 
-import uuid
 from types import SimpleNamespace
 from unittest.mock import MagicMock, patch
 
-import pytest
 
 from agent.context_compressor import SUMMARY_PREFIX
 from run_agent import AIAgent
diff --git a/tests/run_agent/test_860_dedup.py b/tests/run_agent/test_860_dedup.py
index 070936af67b..39a7c0f3154 100644
--- a/tests/run_agent/test_860_dedup.py
+++ b/tests/run_agent/test_860_dedup.py
@@ -7,14 +7,11 @@ Verifies that:
 4. The gateway doesn't double-write messages the agent already persisted
 """
 
-import json
 import os
-import sqlite3
 import tempfile
 from pathlib import Path
-from unittest.mock import MagicMock, patch
+from unittest.mock import patch
 
-import pytest
 
 
 # ---------------------------------------------------------------------------
diff --git a/tests/run_agent/test_api_max_retries_config.py b/tests/run_agent/test_api_max_retries_config.py
index 44e859986ba..5ef7ee4b044 100644
--- a/tests/run_agent/test_api_max_retries_config.py
+++ b/tests/run_agent/test_api_max_retries_config.py
@@ -4,7 +4,7 @@ Closes #11616 — make the hardcoded ``max_retries = 3`` in the agent's API
 retry loop user-configurable so fallback-provider setups can fail over
 faster on flaky primaries instead of burning ~3x180s on the same stall.
 """
-from unittest.mock import MagicMock, patch
+from unittest.mock import patch
 
 from run_agent import AIAgent
 
diff --git a/tests/run_agent/test_async_httpx_del_neuter.py b/tests/run_agent/test_async_httpx_del_neuter.py
index e91102288c0..946d73dbdf1 100644
--- a/tests/run_agent/test_async_httpx_del_neuter.py
+++ b/tests/run_agent/test_async_httpx_del_neuter.py
@@ -13,8 +13,6 @@ The three-layer defence:
 """
 
 import asyncio
-import threading
-from types import SimpleNamespace
 from unittest.mock import MagicMock, patch
 
 import pytest
diff --git a/tests/run_agent/test_background_review.py b/tests/run_agent/test_background_review.py
index 89626f857d5..f4b0faff7f5 100644
--- a/tests/run_agent/test_background_review.py
+++ b/tests/run_agent/test_background_review.py
@@ -76,6 +76,78 @@ def test_background_review_shuts_down_memory_provider_before_close(monkeypatch):
     ]
 
 
+def test_background_review_summarizer_receives_captured_messages_after_close(monkeypatch):
+    """The action summarizer must see review messages even after close cleanup.
+
+    Regression for the bug where ``review_messages`` was snapshot AFTER
+    ``review_agent.close()``. close() is allowed to clean per-session state
+    (including ``_session_messages``), so the summarizer would receive an
+    empty list and the user-visible self-improvement summary would silently
+    disappear. The fix snapshots ``_session_messages`` before teardown.
+    """
+    import json
+    import agent.background_review as bg_review
+
+    review_tool_message = {
+        "role": "tool",
+        "tool_call_id": "call_bg",
+        "content": json.dumps(
+            {"success": True, "message": "Entry added", "target": "memory"}
+        ),
+    }
+    captured: dict = {}
+    events: list[str] = []
+
+    class FakeReviewAgent:
+        def __init__(self, **kwargs):
+            self._session_messages = []
+
+        def run_conversation(self, **kwargs):
+            events.append("run_conversation")
+            self._session_messages = [review_tool_message]
+
+        def shutdown_memory_provider(self):
+            events.append("shutdown_memory_provider")
+
+        def close(self):
+            events.append("close")
+            # close() is allowed to clean _session_messages — the fix
+            # must have snapshot them before this runs.
+            self._session_messages = []
+
+    def fake_summarize(review_messages, prior_snapshot):
+        events.append("summarize")
+        captured["review_messages"] = list(review_messages)
+        captured["prior_snapshot"] = list(prior_snapshot)
+        return []
+
+    monkeypatch.setattr(run_agent_module, "AIAgent", FakeReviewAgent)
+    monkeypatch.setattr(run_agent_module.threading, "Thread", ImmediateThread)
+    monkeypatch.setattr(
+        bg_review,
+        "summarize_background_review_actions",
+        fake_summarize,
+    )
+
+    messages_snapshot = [{"role": "user", "content": "hi"}]
+    agent = _bare_agent()
+
+    AIAgent._spawn_background_review(
+        agent,
+        messages_snapshot=messages_snapshot,
+        review_memory=True,
+    )
+
+    assert events == [
+        "run_conversation",
+        "shutdown_memory_provider",
+        "close",
+        "summarize",
+    ]
+    assert captured["review_messages"] == [review_tool_message]
+    assert captured["prior_snapshot"] == messages_snapshot
+
+
 def test_background_review_installs_auto_deny_approval_callback(monkeypatch):
     """Regression guard for #15216.
 
diff --git a/tests/run_agent/test_background_review_toolset_restriction.py b/tests/run_agent/test_background_review_toolset_restriction.py
index 9682014ee44..f94ef831ae9 100644
--- a/tests/run_agent/test_background_review_toolset_restriction.py
+++ b/tests/run_agent/test_background_review_toolset_restriction.py
@@ -13,7 +13,6 @@ runtime via a thread-local whitelist on the existing
 that caused the prefix-cache miss.
 """
 
-import threading
 from unittest.mock import patch
 
 
diff --git a/tests/run_agent/test_callable_api_key.py b/tests/run_agent/test_callable_api_key.py
index 2c685643b98..9bd14462827 100644
--- a/tests/run_agent/test_callable_api_key.py
+++ b/tests/run_agent/test_callable_api_key.py
@@ -26,9 +26,8 @@ Covered:
 from __future__ import annotations
 
 import json
-from types import SimpleNamespace
 from typing import cast
-from unittest.mock import MagicMock, patch
+from unittest.mock import MagicMock
 
 import pytest
 
diff --git a/tests/run_agent/test_codex_no_tools_nonetype.py b/tests/run_agent/test_codex_no_tools_nonetype.py
index d7980e8f02e..7c4aa43f613 100644
--- a/tests/run_agent/test_codex_no_tools_nonetype.py
+++ b/tests/run_agent/test_codex_no_tools_nonetype.py
@@ -29,7 +29,6 @@ from __future__ import annotations
 
 import sys
 import types
-from types import SimpleNamespace
 from typing import Any, Dict, List
 
 import pytest
diff --git a/tests/run_agent/test_codex_xai_oauth_recovery.py b/tests/run_agent/test_codex_xai_oauth_recovery.py
index 18d6c9c4c8e..8db6c262693 100644
--- a/tests/run_agent/test_codex_xai_oauth_recovery.py
+++ b/tests/run_agent/test_codex_xai_oauth_recovery.py
@@ -31,7 +31,7 @@ Three distinct failure modes the user community hit during rollout:
 """
 
 from types import SimpleNamespace
-from unittest.mock import MagicMock, patch
+from unittest.mock import MagicMock
 
 import pytest
 
@@ -537,7 +537,6 @@ def test_recover_with_credential_pool_skips_refresh_on_entitlement_403():
     the entitlement guard, recovery returns False so the error surfaces
     normally with the friendly hint from _summarize_api_error.
     """
-    from run_agent import AIAgent
     from agent.error_classifier import FailoverReason
 
     agent = _make_codex_agent()
@@ -627,7 +626,6 @@ def test_recover_with_credential_pool_skips_refresh_on_bare_403_for_xai_oauth():
 
 def test_recover_with_credential_pool_still_refreshes_genuine_auth_failure():
     """Regression guard: legitimate auth errors must still trigger refresh."""
-    from run_agent import AIAgent
     from agent.error_classifier import FailoverReason
 
     agent = _make_codex_agent()
@@ -809,7 +807,6 @@ def test_recover_with_credential_pool_refreshes_on_xai_bad_credentials_403():
     the very body that pre-fix tripped the entitlement classifier
     and short-circuited the refresh path.
     """
-    from run_agent import AIAgent
     from agent.error_classifier import FailoverReason
 
     agent = _make_codex_agent()
@@ -866,7 +863,6 @@ def test_recover_with_credential_pool_still_blocks_real_entitlement():
     survive the new disambiguator.  A real unsubscribed-account body
     has no WKE suffix and no OAuth2-validation phrase, so the
     classifier still classifies it as entitlement and short-circuits."""
-    from run_agent import AIAgent
     from agent.error_classifier import FailoverReason
 
     agent = _make_codex_agent()
diff --git a/tests/run_agent/test_compress_focus_plugin_fallback.py b/tests/run_agent/test_compress_focus_plugin_fallback.py
index f9c1b83dcc0..1a16f82abd4 100644
--- a/tests/run_agent/test_compress_focus_plugin_fallback.py
+++ b/tests/run_agent/test_compress_focus_plugin_fallback.py
@@ -9,7 +9,6 @@ on older plugins.
 
 from unittest.mock import MagicMock
 
-import pytest
 
 from run_agent import AIAgent
 
diff --git a/tests/run_agent/test_compression_boundary.py b/tests/run_agent/test_compression_boundary.py
index db7bb67b80f..ff9455d75c2 100644
--- a/tests/run_agent/test_compression_boundary.py
+++ b/tests/run_agent/test_compression_boundary.py
@@ -4,8 +4,7 @@ Verifies that _align_boundary_backward correctly handles tool result groups
 so that parallel tool calls are never split during compression.
 """
 
-import pytest
-from unittest.mock import patch, MagicMock
+from unittest.mock import patch
 
 from agent.context_compressor import ContextCompressor
 
diff --git a/tests/run_agent/test_compression_boundary_hook.py b/tests/run_agent/test_compression_boundary_hook.py
index ef06e97e369..fba465bb264 100644
--- a/tests/run_agent/test_compression_boundary_hook.py
+++ b/tests/run_agent/test_compression_boundary_hook.py
@@ -16,7 +16,6 @@ import tempfile
 from pathlib import Path
 from unittest.mock import MagicMock, patch
 
-import pytest
 
 
 class TestCompressionBoundaryHook:
diff --git a/tests/run_agent/test_compression_persistence.py b/tests/run_agent/test_compression_persistence.py
index 46ab963d420..e8b20487cd4 100644
--- a/tests/run_agent/test_compression_persistence.py
+++ b/tests/run_agent/test_compression_persistence.py
@@ -19,9 +19,8 @@ Bug scenario (pre-fix):
 import os
 import tempfile
 from pathlib import Path
-from unittest.mock import MagicMock, patch
+from unittest.mock import patch
 
-import pytest
 
 
 # ---------------------------------------------------------------------------
diff --git a/tests/run_agent/test_compression_trigger_excludes_reasoning.py b/tests/run_agent/test_compression_trigger_excludes_reasoning.py
index 24fe2868fcb..22fb37bf525 100644
--- a/tests/run_agent/test_compression_trigger_excludes_reasoning.py
+++ b/tests/run_agent/test_compression_trigger_excludes_reasoning.py
@@ -6,8 +6,6 @@ trigger must use only prompt_tokens so sessions aren't prematurely split.
 """
 
 import types
-import pytest
-from unittest.mock import MagicMock, patch
 
 
 def _make_agent_stub(prompt_tokens, completion_tokens, threshold_tokens):
diff --git a/tests/run_agent/test_concurrent_interrupt.py b/tests/run_agent/test_concurrent_interrupt.py
index 747ecb7ca2e..4cb35695c0b 100644
--- a/tests/run_agent/test_concurrent_interrupt.py
+++ b/tests/run_agent/test_concurrent_interrupt.py
@@ -1,9 +1,8 @@
 """Tests for interrupt handling in concurrent tool execution."""
 
-import concurrent.futures
 import threading
 import time
-from unittest.mock import MagicMock, patch
+from unittest.mock import MagicMock
 
 import pytest
 
diff --git a/tests/run_agent/test_create_openai_client_reuse.py b/tests/run_agent/test_create_openai_client_reuse.py
index 8b39711b3e4..a5e3fcb86dc 100644
--- a/tests/run_agent/test_create_openai_client_reuse.py
+++ b/tests/run_agent/test_create_openai_client_reuse.py
@@ -17,7 +17,7 @@ That is the exact scenario this test reproduces at object level without a
 network, so it runs in CI on every PR.
 """
 from types import SimpleNamespace
-from unittest.mock import MagicMock, patch
+from unittest.mock import patch
 
 from run_agent import AIAgent
 
diff --git a/tests/run_agent/test_credential_pool_interrupt.py b/tests/run_agent/test_credential_pool_interrupt.py
index 8484fa003e9..8dab8da949a 100644
--- a/tests/run_agent/test_credential_pool_interrupt.py
+++ b/tests/run_agent/test_credential_pool_interrupt.py
@@ -3,7 +3,6 @@
 When has_retried_429 is lost (user cancels between 429s), the pool should
 still rotate if the current credential is already marked exhausted.
 """
-import pytest
 from unittest.mock import MagicMock, patch
 
 from agent.credential_pool import PooledCredential, STATUS_EXHAUSTED
diff --git a/tests/run_agent/test_deepseek_reasoning_content_echo.py b/tests/run_agent/test_deepseek_reasoning_content_echo.py
index 0efdb2c5a18..c8c322191ff 100644
--- a/tests/run_agent/test_deepseek_reasoning_content_echo.py
+++ b/tests/run_agent/test_deepseek_reasoning_content_echo.py
@@ -481,3 +481,85 @@ class TestNeedsKimiToolReasoning:
         )
         # model name contains 'moonshot' but host is openrouter — should be False
         assert agent._needs_kimi_tool_reasoning() is False
+
+
+class TestReapplyReasoningEchoForProviderSwitch:
+    """Mid-conversation fallover to a require-side provider must re-pad.
+
+    ``api_messages`` is built once, before the retry loop, while the *primary*
+    provider is active. When a fallback then switches to DeepSeek/Kimi/MiMo,
+    assistant turns that were built under a non-require primary (e.g. Codex,
+    which uses encrypted reasoning, not ``reasoning_content``) go out bare and
+    the new provider 400s with "reasoning_content must be passed back".
+
+    ``reapply_reasoning_echo_for_provider`` re-applies the pad against the
+    *current* provider right before the request is built. It is idempotent and
+    a no-op unless the active provider enforces echo-back.
+    """
+
+    @staticmethod
+    def _codex_built_history() -> list[dict]:
+        """Assistant turns as built under a Codex primary: some carry a
+        reasoning summary (stored as reasoning_content), some are bare."""
+        return [
+            {"role": "system", "content": "sys"},
+            {"role": "user", "content": "do the thing"},
+            {  # turn that emitted a reasoning summary
+                "role": "assistant",
+                "content": "",
+                "reasoning_content": "summary from codex",
+                "tool_calls": [{"id": "c1", "function": {"name": "terminal"}}],
+            },
+            {"role": "tool", "tool_call_id": "c1", "content": "ok"},
+            {  # bare tool-call turn (Codex emitted no summary)
+                "role": "assistant",
+                "content": "",
+                "tool_calls": [{"id": "c2", "function": {"name": "terminal"}}],
+            },
+            {"role": "tool", "tool_call_id": "c2", "content": "ok"},
+        ]
+
+    def test_switch_to_deepseek_pads_bare_turns(self) -> None:
+        from agent.agent_runtime_helpers import reapply_reasoning_echo_for_provider
+
+        agent = _make_agent(provider="deepseek", model="deepseek-v4-pro")
+        msgs = self._codex_built_history()
+        padded = reapply_reasoning_echo_for_provider(agent, msgs)
+        assert padded == 1
+        bare = [m for m in msgs if m.get("role") == "assistant" and not m.get("reasoning_content")]
+        assert bare == []
+        # existing summary preserved verbatim, not clobbered with the pad
+        assert msgs[2]["reasoning_content"] == "summary from codex"
+        assert msgs[4]["reasoning_content"] == " "
+
+    def test_noop_under_non_require_provider(self) -> None:
+        from agent.agent_runtime_helpers import reapply_reasoning_echo_for_provider
+
+        agent = _make_agent(
+            provider="openai-codex",
+            model="gpt-5.5",
+            base_url="https://chatgpt.com/backend-api/codex",
+        )
+        msgs = self._codex_built_history()
+        padded = reapply_reasoning_echo_for_provider(agent, msgs)
+        assert padded == 0
+        # the bare turn stays bare — Codex doesn't want reasoning_content
+        assert "reasoning_content" not in msgs[4]
+
+    def test_idempotent(self) -> None:
+        from agent.agent_runtime_helpers import reapply_reasoning_echo_for_provider
+
+        agent = _make_agent(provider="deepseek", model="deepseek-v4-pro")
+        msgs = self._codex_built_history()
+        assert reapply_reasoning_echo_for_provider(agent, msgs) == 1
+        assert reapply_reasoning_echo_for_provider(agent, msgs) == 0
+
+    def test_non_assistant_messages_untouched(self) -> None:
+        from agent.agent_runtime_helpers import reapply_reasoning_echo_for_provider
+
+        agent = _make_agent(provider="deepseek", model="deepseek-v4-pro")
+        msgs = self._codex_built_history()
+        reapply_reasoning_echo_for_provider(agent, msgs)
+        assert "reasoning_content" not in msgs[0]  # system
+        assert "reasoning_content" not in msgs[1]  # user
+        assert "reasoning_content" not in msgs[3]  # tool
diff --git a/tests/run_agent/test_exit_cleanup_interrupt.py b/tests/run_agent/test_exit_cleanup_interrupt.py
index 1e5d8431c38..b8c6b661e02 100644
--- a/tests/run_agent/test_exit_cleanup_interrupt.py
+++ b/tests/run_agent/test_exit_cleanup_interrupt.py
@@ -6,9 +6,7 @@ abort remaining cleanup steps.  These tests exercise the actual production
 code paths — not a copy of the try/except pattern.
 """
 
-import atexit
-import weakref
-from unittest.mock import MagicMock, patch, call
+from unittest.mock import MagicMock, patch
 
 import pytest
 
diff --git a/tests/run_agent/test_fallback_credential_isolation.py b/tests/run_agent/test_fallback_credential_isolation.py
index a32eaa2a309..54e352b3b88 100644
--- a/tests/run_agent/test_fallback_credential_isolation.py
+++ b/tests/run_agent/test_fallback_credential_isolation.py
@@ -11,13 +11,9 @@ _swap_credential continue operating on the PRIMARY's credential pool during
 fallback calls, contaminating primary state with fallback-provider errors.
 """
 
-import logging
 import sys
-import types
-from dataclasses import dataclass, replace
-from unittest.mock import MagicMock, patch
+from unittest.mock import MagicMock
 
-import pytest
 
 
 # ── Helpers ──────────────────────────────────────────────────────────
diff --git a/tests/run_agent/test_image_shrink_recovery.py b/tests/run_agent/test_image_shrink_recovery.py
index 7435bb7a13c..c5114ffef04 100644
--- a/tests/run_agent/test_image_shrink_recovery.py
+++ b/tests/run_agent/test_image_shrink_recovery.py
@@ -18,9 +18,7 @@ payload rewriter.
 from __future__ import annotations
 
 import base64
-from pathlib import Path
 
-import pytest
 
 from agent.error_classifier import FailoverReason, classify_api_error
 
diff --git a/tests/run_agent/test_interactive_interrupt.py b/tests/run_agent/test_interactive_interrupt.py
index 762621f2203..27d3bff91c0 100644
--- a/tests/run_agent/test_interactive_interrupt.py
+++ b/tests/run_agent/test_interactive_interrupt.py
@@ -8,9 +8,6 @@ Logs every step to stderr (which isn't affected by redirect_stdout)
 so we can see exactly where the interrupt gets lost.
 """
 
-import contextlib
-import io
-import json
 import logging
 import queue
 import sys
@@ -27,7 +24,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspa
 
 from unittest.mock import MagicMock, patch
 from run_agent import AIAgent, IterationBudget
-from tools.interrupt import set_interrupt, is_interrupted
+from tools.interrupt import set_interrupt
 
 def make_slow_response(delay=2.0):
     """API response that takes a while."""
diff --git a/tests/run_agent/test_interrupt_propagation.py b/tests/run_agent/test_interrupt_propagation.py
index 9dd8ce327e6..7e3085f1d2c 100644
--- a/tests/run_agent/test_interrupt_propagation.py
+++ b/tests/run_agent/test_interrupt_propagation.py
@@ -4,13 +4,12 @@ Reproduces the CLI scenario: user sends a message while delegate_task is
 running, main thread calls parent.interrupt(), child should stop.
 """
 
-import json
 import threading
 import time
 import unittest
-from unittest.mock import MagicMock, patch, PropertyMock
+from unittest.mock import MagicMock
 
-from tools.interrupt import set_interrupt, is_interrupted, _interrupt_event
+from tools.interrupt import set_interrupt, is_interrupted
 
 
 class TestInterruptPropagationToChild(unittest.TestCase):
diff --git a/tests/run_agent/test_invalid_context_length_warning.py b/tests/run_agent/test_invalid_context_length_warning.py
index 14b2e0f2a15..a38980e9774 100644
--- a/tests/run_agent/test_invalid_context_length_warning.py
+++ b/tests/run_agent/test_invalid_context_length_warning.py
@@ -1,6 +1,6 @@
 """Tests that invalid context_length values in config produce visible warnings."""
 
-from unittest.mock import patch, MagicMock, call
+from unittest.mock import patch
 
 
 def _build_agent(model_cfg, custom_providers=None, model="anthropic/claude-opus-4.6"):
diff --git a/tests/run_agent/test_iteration_budget_race.py b/tests/run_agent/test_iteration_budget_race.py
index e8aa70fbf6f..9b75b1f4c5b 100644
--- a/tests/run_agent/test_iteration_budget_race.py
+++ b/tests/run_agent/test_iteration_budget_race.py
@@ -3,11 +3,8 @@
 The `used` property must acquire the lock before reading `_used` to prevent
 data races with concurrent `consume()` / `refund()` calls.
 """
-import threading
-import time
 from concurrent.futures import ThreadPoolExecutor
 
-import pytest
 
 
 def test_iteration_budget_used_is_thread_safe():
diff --git a/tests/run_agent/test_long_context_tier_429.py b/tests/run_agent/test_long_context_tier_429.py
index 07e569bed95..79185cfbb74 100644
--- a/tests/run_agent/test_long_context_tier_429.py
+++ b/tests/run_agent/test_long_context_tier_429.py
@@ -8,9 +8,7 @@ reduce context_length to 200k and compress instead of retrying.
 Only Sonnet is affected — Opus 1M is general access.
 """
 
-import pytest
 from types import SimpleNamespace
-from unittest.mock import MagicMock, patch
 
 
 # ---------------------------------------------------------------------------
diff --git a/tests/run_agent/test_memory_sync_interrupted.py b/tests/run_agent/test_memory_sync_interrupted.py
index feeb028927b..3a118002e2b 100644
--- a/tests/run_agent/test_memory_sync_interrupted.py
+++ b/tests/run_agent/test_memory_sync_interrupted.py
@@ -91,6 +91,45 @@ class TestSyncExternalMemoryForTurn:
             session_id="test_session_001",
         )
 
+    def test_completed_turn_syncs_messages_when_present(self):
+        agent = _bare_agent()
+        messages = [
+            {
+                "role": "assistant",
+                "content": None,
+                "tool_calls": [
+                    {
+                        "id": "call-1",
+                        "type": "function",
+                        "function": {
+                            "name": "terminal",
+                            "arguments": "{\"command\":\"pytest\"}",
+                        },
+                    }
+                ],
+            },
+            {
+                "role": "tool",
+                "name": "terminal",
+                "tool_call_id": "call-1",
+                "content": "final Hermes-processed output",
+            }
+        ]
+
+        agent._sync_external_memory_for_turn(
+            original_user_message="run tests",
+            final_response="tests passed",
+            interrupted=False,
+            messages=messages,
+        )
+
+        agent._memory_manager.sync_all.assert_called_once_with(
+            "run tests",
+            "tests passed",
+            session_id="test_session_001",
+            messages=messages,
+        )
+
     # --- Edge cases (pre-existing behaviour preserved) ------------------
 
     def test_no_final_response_skips(self):
diff --git a/tests/run_agent/test_multimodal_tool_content_recovery.py b/tests/run_agent/test_multimodal_tool_content_recovery.py
index 63ee49f97c0..0d9deef9394 100644
--- a/tests/run_agent/test_multimodal_tool_content_recovery.py
+++ b/tests/run_agent/test_multimodal_tool_content_recovery.py
@@ -25,7 +25,6 @@ See: https://github.com/NousResearch/hermes-agent/issues/27344
 
 from __future__ import annotations
 
-import pytest
 
 from agent.error_classifier import FailoverReason, classify_api_error
 
diff --git a/tests/run_agent/test_plugin_context_engine_init.py b/tests/run_agent/test_plugin_context_engine_init.py
index 83895ac6dce..7285cb1f625 100644
--- a/tests/run_agent/test_plugin_context_engine_init.py
+++ b/tests/run_agent/test_plugin_context_engine_init.py
@@ -26,6 +26,17 @@ class _StubEngine(ContextEngine):
         return messages
 
 
+class _ToolEngine(_StubEngine):
+    def get_tool_schemas(self):
+        return [
+            {
+                "name": "stub_recover",
+                "description": "Recover context from the stub engine.",
+                "parameters": {"type": "object", "properties": {}},
+            }
+        ]
+
+
 def test_plugin_engine_gets_context_length_on_init():
     """Plugin context engine should have context_length set during AIAgent init."""
     engine = _StubEngine()
@@ -56,6 +67,46 @@ def test_plugin_engine_gets_context_length_on_init():
     assert engine.threshold_tokens == int(204_800 * engine.threshold_percent)
 
 
+def test_active_context_engine_tools_survive_explicit_platform_toolsets():
+    """LCM-style recovery tools must survive saved `hermes tools` lists."""
+    engine = _ToolEngine()
+    cfg = {
+        "context": {"engine": "stub"},
+        "platform_toolsets": {"cli": ["web", "terminal"]},
+        "agent": {},
+    }
+
+    from hermes_cli.tools_config import _get_platform_tools
+
+    enabled_toolsets = _get_platform_tools(cfg, "cli", include_default_mcp_servers=False)
+    assert "context_engine" in enabled_toolsets
+
+    with (
+        patch("hermes_cli.config.load_config", return_value=cfg),
+        patch("plugins.context_engine.load_context_engine", return_value=engine),
+        patch("agent.model_metadata.get_model_context_length", return_value=204_800),
+        patch("run_agent.get_tool_definitions", return_value=[]),
+        patch("run_agent.check_toolset_requirements", return_value={}),
+        patch("run_agent.OpenAI"),
+    ):
+        from run_agent import AIAgent
+
+        agent = AIAgent(
+            api_key="test-key-1234567890",
+            base_url="https://openrouter.ai/api/v1",
+            enabled_toolsets=sorted(enabled_toolsets),
+            quiet_mode=True,
+            skip_context_files=True,
+            skip_memory=True,
+        )
+
+    assert "stub_recover" in getattr(agent, "valid_tool_names", set())
+    assert "stub_recover" in {
+        tool.get("function", {}).get("name")
+        for tool in getattr(agent, "tools", [])
+    }
+
+
 def test_plugin_engine_update_model_args():
     """Verify update_model() receives model, context_length, base_url, api_key, provider."""
     engine = _StubEngine()
diff --git a/tests/run_agent/test_primary_runtime_restore.py b/tests/run_agent/test_primary_runtime_restore.py
index d082f047f27..7aee1418782 100644
--- a/tests/run_agent/test_primary_runtime_restore.py
+++ b/tests/run_agent/test_primary_runtime_restore.py
@@ -10,10 +10,8 @@ Verifies that:
 """
 
 import time
-from types import SimpleNamespace
-from unittest.mock import MagicMock, patch, PropertyMock
+from unittest.mock import MagicMock, patch
 
-import pytest
 
 from run_agent import AIAgent
 
diff --git a/tests/run_agent/test_provider_parity.py b/tests/run_agent/test_provider_parity.py
index f0e1aadb51d..523c5b09d64 100644
--- a/tests/run_agent/test_provider_parity.py
+++ b/tests/run_agent/test_provider_parity.py
@@ -4,12 +4,12 @@ and handles responses properly for all supported providers.
 Ensures changes to one provider path don't silently break another.
 """
 
+import base64
 import json
-import os
 import sys
 import types
 from types import SimpleNamespace
-from unittest.mock import patch, MagicMock
+from unittest.mock import patch
 
 import pytest
 from agent.codex_responses_adapter import _chat_content_to_responses_parts, _chat_messages_to_responses_input, _normalize_codex_response, _preflight_codex_input_items
@@ -37,6 +37,17 @@ def _tool_defs(*names):
     ]
 
 
+def _fake_invoke_jwt() -> str:
+    def _part(payload):
+        raw = json.dumps(payload, separators=(",", ":")).encode("utf-8")
+        return base64.urlsafe_b64encode(raw).decode("ascii").rstrip("=")
+
+    return (
+        f"{_part({'alg': 'none', 'typ': 'JWT'})}."
+        f"{_part({'scope': 'inference:invoke', 'exp': 4102444800})}.sig"
+    )
+
+
 class _FakeOpenAI:
     def __init__(self, **kw):
         self.api_key = kw.get("api_key", "test")
@@ -927,7 +938,11 @@ class TestAuxiliaryClientProviderPriority:
     def test_nous_when_no_openrouter(self, monkeypatch):
         monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
         from agent.auxiliary_client import get_text_auxiliary_client
-        with patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "nous-tok"}), \
+        nous_auth = {
+            "access_token": _fake_invoke_jwt(),
+            "scope": "inference:invoke",
+        }
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=nous_auth), \
              patch("agent.auxiliary_client.OpenAI") as mock, \
              patch("hermes_cli.models.get_nous_recommended_aux_model", return_value=None):
             client, model = get_text_auxiliary_client()
diff --git a/tests/run_agent/test_real_interrupt_subagent.py b/tests/run_agent/test_real_interrupt_subagent.py
index 39b4c58e2d4..a76fb3f84fb 100644
--- a/tests/run_agent/test_real_interrupt_subagent.py
+++ b/tests/run_agent/test_real_interrupt_subagent.py
@@ -4,14 +4,13 @@ This uses a real AIAgent with mocked HTTP responses to test the complete
 interrupt flow through _run_single_child → child.run_conversation().
 """
 
-import json
 import os
 import threading
 import time
 import unittest
-from unittest.mock import MagicMock, patch, PropertyMock
+from unittest.mock import MagicMock, patch
 
-from tools.interrupt import set_interrupt, is_interrupted
+from tools.interrupt import set_interrupt
 
 
 def _make_slow_api_response(delay=5.0):
diff --git a/tests/run_agent/test_repair_tool_call_arguments.py b/tests/run_agent/test_repair_tool_call_arguments.py
index c282397fccd..dcd98b5acff 100644
--- a/tests/run_agent/test_repair_tool_call_arguments.py
+++ b/tests/run_agent/test_repair_tool_call_arguments.py
@@ -1,7 +1,6 @@
 """Tests for _repair_tool_call_arguments — malformed JSON repair pipeline."""
 
 import json
-import pytest
 
 from run_agent import _repair_tool_call_arguments
 
diff --git a/tests/run_agent/test_retry_status_buffer.py b/tests/run_agent/test_retry_status_buffer.py
new file mode 100644
index 00000000000..221c10c7596
--- /dev/null
+++ b/tests/run_agent/test_retry_status_buffer.py
@@ -0,0 +1,156 @@
+"""Tests for the retry/fallback status buffer helpers on AIAgent.
+
+These helpers defer noisy retry chatter (rate-limit retries, fallback
+switches, compression attempts) so users only see the trace when
+everything ultimately fails.  On successful recovery the buffer is
+silently dropped.
+"""
+
+from __future__ import annotations
+
+
+from run_agent import AIAgent
+
+
+def _make_bare_agent():
+    """Construct an AIAgent without running __init__ — we only need the
+    buffered-status helpers, which are pure-Python and depend only on a
+    handful of attributes."""
+    agent = object.__new__(AIAgent)
+    agent.log_prefix = ""
+    agent.status_callback = None
+    agent.suppress_status_output = False
+    agent._mute_post_response = False
+    agent._executing_tools = False
+    agent._print_fn = None
+    return agent
+
+
+def test_buffer_status_accumulates_then_flushes(capsys):
+    agent = _make_bare_agent()
+    emitted = []
+    agent._emit_status = lambda msg: emitted.append(("status", msg))
+
+    agent._buffer_status("⏳ Retrying...")
+    agent._buffer_status("⚠️ Fallback...")
+
+    # Nothing emitted yet — they are buffered.
+    assert emitted == []
+    assert agent._retry_status_buffer == [
+        ("status", "⏳ Retrying..."),
+        ("status", "⚠️ Fallback..."),
+    ]
+
+    # Flush surfaces them in order through _emit_status.
+    agent._flush_status_buffer()
+    assert emitted == [
+        ("status", "⏳ Retrying..."),
+        ("status", "⚠️ Fallback..."),
+    ]
+    # Buffer is drained.
+    assert agent._retry_status_buffer == []
+
+
+def test_clear_drops_buffered_messages_silently():
+    agent = _make_bare_agent()
+    emitted = []
+    agent._emit_status = lambda msg: emitted.append(msg)
+
+    agent._buffer_status("⏳ Retrying...")
+    agent._buffer_status("⚠️ Fallback...")
+    agent._clear_status_buffer()
+
+    # Nothing was emitted — clear is the success path.
+    assert emitted == []
+    assert agent._retry_status_buffer == []
+
+    # Subsequent flush is a no-op.
+    agent._flush_status_buffer()
+    assert emitted == []
+
+
+def test_buffer_vprint_replays_via_vprint_with_log_prefix():
+    agent = _make_bare_agent()
+    agent.log_prefix = "[abc] "
+    seen = []
+    agent._vprint = lambda msg, force=False, **kw: seen.append((msg, force))
+
+    agent._buffer_vprint("⚠️  API call failed")
+    agent._flush_status_buffer()
+
+    # Replays through _vprint with force=True and the agent's log_prefix
+    # prepended (matching the original direct-emit format).
+    assert seen == [("[abc] ⚠️  API call failed", True)]
+
+
+def test_flush_empty_buffer_is_noop():
+    agent = _make_bare_agent()
+    emitted = []
+    agent._emit_status = lambda msg: emitted.append(msg)
+    agent._vprint = lambda msg, force=False, **kw: emitted.append(msg)
+
+    # No buffer attribute yet — flush should be a quiet no-op.
+    agent._flush_status_buffer()
+    assert emitted == []
+
+    # Even after touching the buffer (via clear on an empty/missing buffer).
+    agent._clear_status_buffer()
+    agent._flush_status_buffer()
+    assert emitted == []
+
+
+def test_re_buffer_after_flush_works():
+    agent = _make_bare_agent()
+    emitted = []
+    agent._emit_status = lambda msg: emitted.append(msg)
+
+    agent._buffer_status("first")
+    agent._flush_status_buffer()
+    agent._buffer_status("second")
+    agent._flush_status_buffer()
+
+    assert emitted == ["first", "second"]
+
+
+def test_mixed_kinds_replay_through_correct_channels():
+    agent = _make_bare_agent()
+    agent.log_prefix = ""
+    statuses = []
+    vprints = []
+    warns = []
+    agent._emit_status = lambda msg: statuses.append(msg)
+    agent._vprint = lambda msg, force=False, **kw: vprints.append((msg, force))
+    agent._emit_warning = lambda msg: warns.append(msg)
+
+    agent._buffer_status("status-1")
+    agent._buffer_vprint("vprint-1")
+    # Manually mix in a "warn" record to verify the dispatch still works.
+    agent._retry_status_buffer.append(("warn", "warn-1"))
+    agent._buffer_status("status-2")
+
+    agent._flush_status_buffer()
+
+    assert statuses == ["status-1", "status-2"]
+    assert vprints == [("vprint-1", True)]
+    assert warns == ["warn-1"]
+
+
+def test_flush_swallows_callback_exceptions():
+    agent = _make_bare_agent()
+    seen = []
+
+    def boom(msg):
+        seen.append(msg)
+        raise RuntimeError("simulated callback failure")
+
+    agent._emit_status = boom
+
+    agent._buffer_status("first")
+    agent._buffer_status("second")
+    # Should not raise even though _emit_status raises for every message.
+    agent._flush_status_buffer()
+
+    # Both messages were attempted.
+    assert seen == ["first", "second"]
+    # Buffer drained regardless of failures.
+    assert agent._retry_status_buffer == []
diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py
index 67b9b4df702..831491c266c 100644
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@@ -16,7 +16,7 @@ from types import SimpleNamespace
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
-from agent.codex_responses_adapter import _chat_messages_to_responses_input, _normalize_codex_response, _preflight_codex_input_items
+from agent.codex_responses_adapter import _normalize_codex_response
 
 import run_agent
 from run_agent import AIAgent
@@ -1323,6 +1323,178 @@ class TestToolUseEnforcementConfig:
             assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt
 
 
+class TestTaskCompletionGuidance:
+    """Tests for the universal task-completion / no-fabrication guidance
+    (config.yaml ``agent.task_completion_guidance``).
+
+    Unlike tool_use_enforcement, this block is model-family-agnostic — it
+    targets cross-model failure modes (stopping after a stub; fabricating
+    output when blocked) and should appear for every model by default."""
+
+    def _make_agent(self, model="anthropic/claude-opus-4.8",
+                    task_completion_guidance=True, **extra_cfg):
+        agent_cfg = {"task_completion_guidance": task_completion_guidance}
+        agent_cfg.update(extra_cfg)
+        with (
+            patch(
+                "run_agent.get_tool_definitions",
+                return_value=_make_tool_defs("terminal", "web_search"),
+            ),
+            patch("run_agent.check_toolset_requirements", return_value={}),
+            patch("run_agent.OpenAI"),
+            patch(
+                "hermes_cli.config.load_config",
+                return_value={"agent": agent_cfg},
+            ),
+        ):
+            a = AIAgent(
+                model=model,
+                api_key="test-key-1234567890",
+                base_url="https://openrouter.ai/api/v1",
+                quiet_mode=True,
+                skip_context_files=True,
+                skip_memory=True,
+            )
+            a.client = MagicMock()
+            return a
+
+    def test_default_injects_for_claude(self):
+        """The block must reach Claude by default — that's the
+        primary motivating model family."""
+        from agent.prompt_builder import TASK_COMPLETION_GUIDANCE
+        agent = self._make_agent(model="anthropic/claude-opus-4.8")
+        prompt = agent._build_system_prompt()
+        assert TASK_COMPLETION_GUIDANCE in prompt
+
+    def test_default_injects_for_deepseek(self):
+        """And for DeepSeek — the other model that failed the Sarasota
+        real-estate task by fabricating output."""
+        from agent.prompt_builder import TASK_COMPLETION_GUIDANCE
+        agent = self._make_agent(model="deepseek/deepseek-v4-flash")
+        prompt = agent._build_system_prompt()
+        assert TASK_COMPLETION_GUIDANCE in prompt
+
+    def test_default_injects_for_gpt(self):
+        """Also reaches model families that already get enforcement —
+        it's additive, not exclusive."""
+        from agent.prompt_builder import TASK_COMPLETION_GUIDANCE
+        agent = self._make_agent(model="openai/gpt-5.4")
+        prompt = agent._build_system_prompt()
+        assert TASK_COMPLETION_GUIDANCE in prompt
+
+    def test_false_disables(self):
+        from agent.prompt_builder import TASK_COMPLETION_GUIDANCE
+        agent = self._make_agent(
+            model="anthropic/claude-opus-4.8", task_completion_guidance=False
+        )
+        prompt = agent._build_system_prompt()
+        assert TASK_COMPLETION_GUIDANCE not in prompt
+
+    def test_no_tools_no_injection(self):
+        """Same gate as tool_use_enforcement — no tools means no guidance.
+        The guidance refers to ``tool calls`` and ``tool output``; without
+        tools it would be advice for a capability the agent doesn't have."""
+        from agent.prompt_builder import TASK_COMPLETION_GUIDANCE
+        with (
+            patch("run_agent.get_tool_definitions", return_value=[]),
+            patch("run_agent.check_toolset_requirements", return_value={}),
+            patch("run_agent.OpenAI"),
+            patch(
+                "hermes_cli.config.load_config",
+                return_value={"agent": {"task_completion_guidance": True}},
+            ),
+        ):
+            a = AIAgent(
+                api_key="test-key-1234567890",
+                base_url="https://openrouter.ai/api/v1",
+                quiet_mode=True,
+                skip_context_files=True,
+                skip_memory=True,
+                enabled_toolsets=[],
+            )
+            a.client = MagicMock()
+            assert TASK_COMPLETION_GUIDANCE not in a._build_system_prompt()
+
+
+class TestEnvironmentProbeIntegration:
+    """Tests for the local Python toolchain probe wiring (config.yaml
+    ``agent.environment_probe``).  The probe itself is unit-tested in
+    tests/tools/test_env_probe.py; this class confirms it lands in the
+    system prompt when enabled and stays out when disabled."""
+
+    def _make_agent(self, model="anthropic/claude-opus-4.8",
+                    environment_probe=True):
+        with (
+            patch(
+                "run_agent.get_tool_definitions",
+                return_value=_make_tool_defs("terminal"),
+            ),
+            patch("run_agent.check_toolset_requirements", return_value={}),
+            patch("run_agent.OpenAI"),
+            patch(
+                "hermes_cli.config.load_config",
+                return_value={"agent": {"environment_probe": environment_probe}},
+            ),
+        ):
+            a = AIAgent(
+                model=model,
+                api_key="test-key-1234567890",
+                base_url="https://openrouter.ai/api/v1",
+                quiet_mode=True,
+                skip_context_files=True,
+                skip_memory=True,
+            )
+            a.client = MagicMock()
+            return a
+
+    def test_probe_appears_when_problem_detected(self, monkeypatch):
+        """When the probe finds something off, the line lands in the prompt."""
+        from tools import env_probe
+        env_probe._reset_cache_for_tests()
+        monkeypatch.setattr(env_probe, "_python_version_of",
+                            lambda b: {"python3": "3.11.15"}.get(b))
+        monkeypatch.setattr(env_probe, "_has_pip_module", lambda b: False)
+        monkeypatch.setattr(env_probe, "_detect_pep668", lambda b: True)
+        monkeypatch.setattr(env_probe, "_pip_python_version", lambda: "3.12")
+        monkeypatch.setattr(env_probe.shutil, "which",
+                            lambda name: None if name == "uv" else "/usr/bin/" + name)
+
+        agent = self._make_agent(environment_probe=True)
+        prompt = agent._build_system_prompt()
+        assert "Python toolchain:" in prompt
+        assert "3.11.15" in prompt
+
+    def test_probe_silent_on_clean_env(self, monkeypatch):
+        """Clean environment → probe emits nothing → no line in prompt."""
+        from tools import env_probe
+        env_probe._reset_cache_for_tests()
+        monkeypatch.setattr(env_probe, "_python_version_of",
+                            lambda b: "3.13.3" if b == "python3" else None)
+        monkeypatch.setattr(env_probe, "_has_pip_module", lambda b: True)
+        monkeypatch.setattr(env_probe, "_detect_pep668", lambda b: False)
+        monkeypatch.setattr(env_probe, "_pip_python_version", lambda: "3.13")
+        monkeypatch.setattr(env_probe.shutil, "which", lambda name: None)
+
+        agent = self._make_agent(environment_probe=True)
+        prompt = agent._build_system_prompt()
+        assert "Python toolchain:" not in prompt
+
+    def test_probe_disabled_by_config(self, monkeypatch):
+        """Even with detectable problems, the probe stays out when disabled."""
+        from tools import env_probe
+        env_probe._reset_cache_for_tests()
+        monkeypatch.setattr(env_probe, "_python_version_of",
+                            lambda b: {"python3": "3.11.15"}.get(b))
+        monkeypatch.setattr(env_probe, "_has_pip_module", lambda b: False)
+        monkeypatch.setattr(env_probe, "_detect_pep668", lambda b: True)
+        monkeypatch.setattr(env_probe, "_pip_python_version", lambda: "3.12")
+        monkeypatch.setattr(env_probe.shutil, "which", lambda name: None)
+
+        agent = self._make_agent(environment_probe=False)
+        prompt = agent._build_system_prompt()
+        assert "Python toolchain:" not in prompt
+
+
 class TestInvalidateSystemPrompt:
     def test_clears_cache(self, agent):
         agent._cached_system_prompt = "cached value"
@@ -2256,6 +2428,8 @@ class TestConcurrentToolExecution:
                 session_id=agent.session_id,
                 enabled_tools=list(agent.valid_tool_names),
                 skip_pre_tool_call_hook=True,
+                enabled_toolsets=agent.enabled_toolsets,
+                disabled_toolsets=agent.disabled_toolsets,
             )
             assert result == "result"
 
@@ -3295,8 +3469,13 @@ class TestRunConversation:
         assert result["final_response"] == "Recovered after compression"
         assert result["completed"] is True
 
-    def test_non_minimax_delta_overflow_still_probes_down(self, agent):
-        """Non-MiniMax providers should keep the generic probe-down behavior."""
+    def test_non_minimax_overflow_without_provider_limit_keeps_context(self, agent):
+        """Generic overflow without a provider-reported max must NOT probe-step down.
+
+        Previously a 200K configured window would silently drop to the 128K probe
+        tier on a generic overflow error.  Now we keep the configured window and
+        rely on compression — see #33669 / PR #33826.
+        """
         self._setup_agent(agent)
         agent.provider = "openrouter"
         agent.model = "some/unknown-model"
@@ -3330,7 +3509,8 @@ class TestRunConversation:
             result = agent.run_conversation("hello", conversation_history=prefill)
 
         mock_compress.assert_called_once()
-        assert agent.context_compressor.context_length == 128_000
+        # Context length preserved — no guessed probe-tier step-down.
+        assert agent.context_compressor.context_length == 200_000
         assert result["final_response"] == "Recovered after compression"
         assert result["completed"] is True
 
@@ -3601,9 +3781,18 @@ class TestRunConversation:
         mock_handle_function_call.assert_not_called()
 
     def test_kanban_block_called_on_iteration_exhaustion(self, agent, monkeypatch):
-        """Regression: kanban worker must call kanban_block when iteration
-        budget is exhausted, otherwise the dispatcher sees a protocol
-        violation and gives up after 1 failure (issue #23216)."""
+        """Regression: kanban worker must signal the dispatcher when its
+        iteration budget is exhausted, otherwise the task silently re-runs
+        forever without ever tripping the failure_limit circuit breaker
+        (issue #23216 / #29747 gap 2).
+
+        As of #29747, the exhaustion path routes through
+        ``kanban_db._record_task_failure(outcome="timed_out")`` so the
+        ``consecutive_failures`` counter increments and the dispatcher's
+        ``failure_limit`` breaker eventually trips. The legacy
+        ``kanban_block`` call was replaced because blocked-outcome runs
+        bypass the failure counter.
+        """
         self._setup_agent(agent)
         agent.max_iterations = 2
 
@@ -3622,8 +3811,14 @@ class TestRunConversation:
             tool_resp, tool_resp, summary_resp,
         ]
 
+        mock_record_failure = MagicMock(return_value=False)
+        mock_connect = MagicMock(return_value=MagicMock())
+
         with (
-            patch("run_agent.handle_function_call", return_value="ok") as mock_hfc,
+            patch("run_agent.handle_function_call", return_value="ok"),
+            patch("hermes_cli.kanban_db._record_task_failure",
+                  mock_record_failure),
+            patch("hermes_cli.kanban_db.connect", mock_connect),
             patch.object(agent, "_persist_session"),
             patch.object(agent, "_save_trajectory"),
             patch.object(agent, "_cleanup_task_resources"),
@@ -3633,23 +3828,24 @@ class TestRunConversation:
         # The agent should have reported the task as not completed.
         assert result["completed"] is False
 
-        # Among all handle_function_call invocations, one must be
-        # kanban_block with the correct task_id and a reason mentioning
-        # iteration exhaustion.
-        kanban_block_calls = [
-            c for c in mock_hfc.call_args_list
-            if c[0][0] == "kanban_block"
-        ]
-        assert len(kanban_block_calls) == 1, (
-            f"Expected exactly 1 kanban_block call, got {len(kanban_block_calls)}. "
-            f"All calls: {mock_hfc.call_args_list}"
+        # _record_task_failure should have been called exactly once for
+        # the exhaustion event, with outcome="timed_out".
+        assert mock_record_failure.call_count == 1, (
+            f"Expected exactly 1 _record_task_failure call, "
+            f"got {mock_record_failure.call_count}. "
+            f"Calls: {mock_record_failure.call_args_list}"
         )
-        call = kanban_block_calls[0]
-        assert call[0][1]["task_id"] == "t_test_task_123"
-        assert "Iteration budget exhausted" in call[0][1]["reason"]
+        call = mock_record_failure.call_args_list[0]
+        # Positional: (conn, task_id, ...)
+        assert call.args[1] == "t_test_task_123"
+        assert call.kwargs.get("outcome") == "timed_out"
+        assert call.kwargs.get("release_claim") is True
+        assert call.kwargs.get("end_run") is True
+        assert "Iteration budget exhausted" in call.kwargs.get("error", "")
 
     def test_no_kanban_block_when_not_in_kanban_mode(self, agent, monkeypatch):
-        """kanban_block must NOT be called when HERMES_KANBAN_TASK is unset."""
+        """The exhaustion bridge must NOT fire when HERMES_KANBAN_TASK
+        is unset (non-kanban runs are unaffected by #29747 gap 2)."""
         self._setup_agent(agent)
         agent.max_iterations = 2
 
@@ -3666,20 +3862,20 @@ class TestRunConversation:
             tool_resp, tool_resp, summary_resp,
         ]
 
+        mock_record_failure = MagicMock(return_value=False)
+
         with (
-            patch("run_agent.handle_function_call", return_value="ok") as mock_hfc,
+            patch("run_agent.handle_function_call", return_value="ok"),
+            patch("hermes_cli.kanban_db._record_task_failure",
+                  mock_record_failure),
             patch.object(agent, "_persist_session"),
             patch.object(agent, "_save_trajectory"),
             patch.object(agent, "_cleanup_task_resources"),
         ):
             agent.run_conversation("do stuff")
 
-        kanban_block_calls = [
-            c for c in mock_hfc.call_args_list
-            if c[0][0] == "kanban_block"
-        ]
-        assert len(kanban_block_calls) == 0, (
-            "kanban_block should not be called outside kanban mode"
+        assert mock_record_failure.call_count == 0, (
+            "_record_task_failure should not be called outside kanban mode"
         )
 
 
@@ -3867,7 +4063,7 @@ class TestNousCredentialRefresh:
 
         assert ok is True
         assert closed["value"] is True
-        assert captured["inference_auth_mode"] == "legacy"
+        assert captured["force_refresh"] is True
         assert rebuilt["kwargs"]["api_key"] == "new-nous-key"
         assert (
             rebuilt["kwargs"]["base_url"] == "https://inference-api.nousresearch.com/v1"
@@ -4394,7 +4590,6 @@ class TestSafeWriter:
 
     def test_double_wrap_prevented(self):
         """Wrapping an already-wrapped stream doesn't add layers."""
-        import sys
         from run_agent import _SafeWriter
         from io import StringIO
         inner = StringIO()
diff --git a/tests/run_agent/test_run_agent_codex_responses.py b/tests/run_agent/test_run_agent_codex_responses.py
index 638c1dd99e6..7f899c601d1 100644
--- a/tests/run_agent/test_run_agent_codex_responses.py
+++ b/tests/run_agent/test_run_agent_codex_responses.py
@@ -383,6 +383,149 @@ def test_build_api_kwargs_copilot_responses_omits_reasoning_for_non_reasoning_mo
     assert "prompt_cache_key" not in kwargs
 
 
+# ---------------------------------------------------------------------------
+# #27907: xAI tool-schema sanitization must NOT mutate ``agent.tools`` in place
+#
+# ``strip_slash_enum`` and ``strip_pattern_and_format`` are documented to
+# mutate their input in place ("Callers that need to preserve the original
+# should deep-copy first" — see ``tools/schema_sanitizer.py``).  Until this
+# fix, ``chat_completion_helpers.build_api_kwargs`` and ``auxiliary_client``
+# passed ``agent.tools`` straight through to the sanitizers.  The first xAI
+# request would permanently strip slash-containing enum constraints and the
+# ``pattern``/``format`` keywords from the per-agent tool registry — any
+# subsequent non-xAI call from the same agent (auxiliary task routed to
+# Anthropic, OpenRouter fallback, mid-session model switch) saw the
+# already-stripped schema.
+#
+# Fix: deepcopy ``tools_for_api`` before handing it to the sanitizers.
+# ---------------------------------------------------------------------------
+
+
+def _build_xai_agent_with_slash_enum_tool(monkeypatch):
+    """Build an xAI agent whose tool registry has a slash-containing enum.
+
+    Mirrors the Brave Search MCP shape that originally triggered #27907.
+    """
+
+    def _fake_get_tool_definitions(**_kwargs):
+        return [
+            {
+                "type": "function",
+                "function": {
+                    "name": "brave_like",
+                    "description": "Tool with slash-containing enum + pattern/format",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "accept": {
+                                "type": "string",
+                                "enum": ["application/json", "*/*"],
+                            },
+                            "match": {
+                                "type": "string",
+                                "pattern": "^[a-z]+$",
+                                "format": "regex",
+                            },
+                        },
+                    },
+                },
+            }
+        ]
+
+    monkeypatch.setattr(run_agent, "get_tool_definitions", _fake_get_tool_definitions)
+    monkeypatch.setattr(run_agent, "check_toolset_requirements", lambda: {})
+
+    agent = run_agent.AIAgent(
+        model="grok-4.3",
+        provider="xai-oauth",
+        api_mode="codex_responses",
+        base_url="https://api.x.ai/v1",
+        api_key="xai-token",
+        quiet_mode=True,
+        max_iterations=4,
+        skip_context_files=True,
+        skip_memory=True,
+    )
+    agent._cleanup_task_resources = lambda task_id: None
+    agent._persist_session = lambda messages, history=None: None
+    agent._save_trajectory = lambda messages, user_message, completed: None
+    return agent
+
+
+def test_build_api_kwargs_xai_strips_slash_enum_from_outgoing_request(monkeypatch):
+    """The xAI request sent to the API must NOT contain slash-enum values."""
+    agent = _build_xai_agent_with_slash_enum_tool(monkeypatch)
+    kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}])
+
+    # ``tools`` comes back in Responses format from the codex transport;
+    # find the parameters dict for our function regardless of shape.
+    out_tool = kwargs["tools"][0]
+    params = out_tool["parameters"]
+    assert "enum" not in params["properties"]["accept"], (
+        "outgoing xAI request must not carry slash-containing enums — "
+        "xAI would 400 with 'Invalid arguments passed to the model'"
+    )
+    # pattern/format must also be stripped (existing #27197 contract).
+    assert "pattern" not in params["properties"]["match"]
+    assert "format" not in params["properties"]["match"]
+
+
+def test_build_api_kwargs_xai_does_not_mutate_agent_tools(monkeypatch):
+    """Headline #27907 regression: ``agent.tools`` must survive intact.
+
+    Pre-fix the sanitizers mutated ``agent.tools`` in place, so a subsequent
+    non-xAI call from the same agent saw an already-stripped schema —
+    silent constraint loss with no way for the user to notice from their
+    config.
+    """
+    agent = _build_xai_agent_with_slash_enum_tool(monkeypatch)
+
+    # Snapshot the schema before the request.
+    accept_before = agent.tools[0]["function"]["parameters"]["properties"]["accept"]
+    match_before = agent.tools[0]["function"]["parameters"]["properties"]["match"]
+    assert accept_before["enum"] == ["application/json", "*/*"]
+    assert match_before.get("pattern") == "^[a-z]+$"
+    assert match_before.get("format") == "regex"
+
+    # Build the API kwargs (which runs the sanitizers).
+    agent._build_api_kwargs([{"role": "user", "content": "hi"}])
+
+    # The agent's tool registry must be UNCHANGED.
+    accept_after = agent.tools[0]["function"]["parameters"]["properties"]["accept"]
+    match_after = agent.tools[0]["function"]["parameters"]["properties"]["match"]
+    assert accept_after.get("enum") == ["application/json", "*/*"], (
+        "agent.tools mutated — slash-containing enum was stripped from the "
+        "shared per-agent registry, will leak to non-xAI calls"
+    )
+    assert match_after.get("pattern") == "^[a-z]+$", (
+        "agent.tools mutated — pattern stripped from shared registry"
+    )
+    assert match_after.get("format") == "regex", (
+        "agent.tools mutated — format stripped from shared registry"
+    )
+
+
+def test_build_api_kwargs_xai_is_idempotent_across_repeated_calls(monkeypatch):
+    """Multiple xAI requests must each produce the same sanitized output
+    AND must not progressively erode the source schema."""
+    agent = _build_xai_agent_with_slash_enum_tool(monkeypatch)
+
+    kwargs1 = agent._build_api_kwargs([{"role": "user", "content": "first"}])
+    kwargs2 = agent._build_api_kwargs([{"role": "user", "content": "second"}])
+    kwargs3 = agent._build_api_kwargs([{"role": "user", "content": "third"}])
+
+    for k in (kwargs1, kwargs2, kwargs3):
+        params = k["tools"][0]["parameters"]
+        assert "enum" not in params["properties"]["accept"]
+        assert "pattern" not in params["properties"]["match"]
+        assert "format" not in params["properties"]["match"]
+
+    # Source schema still untouched after three rounds.
+    assert agent.tools[0]["function"]["parameters"]["properties"]["accept"].get(
+        "enum"
+    ) == ["application/json", "*/*"]
+
+
 def test_run_codex_stream_returns_collected_items_when_stream_ends_without_terminal(monkeypatch):
     """The event-driven path tolerates streams that end without a terminal frame.
 
diff --git a/tests/run_agent/test_session_meta_filtering.py b/tests/run_agent/test_session_meta_filtering.py
index 08fc96e9fe4..23628b8848a 100644
--- a/tests/run_agent/test_session_meta_filtering.py
+++ b/tests/run_agent/test_session_meta_filtering.py
@@ -6,8 +6,6 @@ _sanitize_api_messages() and the CLI session-restore paths.
 """
 
 import logging
-import types
-from unittest.mock import MagicMock, patch
 
 from run_agent import AIAgent
 
diff --git a/tests/run_agent/test_session_reset_fix.py b/tests/run_agent/test_session_reset_fix.py
index 1fd1223ced2..2b86642fd41 100644
--- a/tests/run_agent/test_session_reset_fix.py
+++ b/tests/run_agent/test_session_reset_fix.py
@@ -10,7 +10,6 @@ import sys
 import types
 from pathlib import Path
 
-import pytest
 
 # Ensure repo root is importable
 sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent))
diff --git a/tests/run_agent/test_steer.py b/tests/run_agent/test_steer.py
index d99a0af8057..42f3ada985d 100644
--- a/tests/run_agent/test_steer.py
+++ b/tests/run_agent/test_steer.py
@@ -281,7 +281,7 @@ class TestSteerCommandRegistry:
         """The /steer slash command must be registered so it reaches all
         platforms (CLI, gateway, TUI autocomplete, Telegram/Slack menus).
         """
-        from hermes_cli.commands import resolve_command, ACTIVE_SESSION_BYPASS_COMMANDS
+        from hermes_cli.commands import resolve_command
 
         cmd = resolve_command("steer")
         assert cmd is not None
diff --git a/tests/run_agent/test_stream_drop_logging.py b/tests/run_agent/test_stream_drop_logging.py
index f424a4f403f..3ba6400a7da 100644
--- a/tests/run_agent/test_stream_drop_logging.py
+++ b/tests/run_agent/test_stream_drop_logging.py
@@ -21,9 +21,7 @@ import logging
 import time
 from unittest.mock import patch
 
-import pytest
 
-import run_agent
 from run_agent import AIAgent
 
 
@@ -203,7 +201,7 @@ def test_emit_stream_drop_ui_includes_elapsed_when_available():
     diag = AIAgent._stream_diag_init()
     diag["started_at"] = time.time() - 8.0  # 8s on the wire before drop
 
-    with patch.object(agent, "_emit_status") as mock_emit:
+    with patch.object(agent, "_buffer_status") as mock_emit:
         agent._emit_stream_drop(
             error=ConnectionError("x"),
             attempt=2,
@@ -223,7 +221,7 @@ def test_emit_stream_drop_ui_omits_suffix_without_diag():
     agent = _make_agent()
     agent.provider = "openrouter"
 
-    with patch.object(agent, "_emit_status") as mock_emit:
+    with patch.object(agent, "_buffer_status") as mock_emit:
         agent._emit_stream_drop(
             error=ConnectionError("x"),
             attempt=2,
diff --git a/tests/run_agent/test_streaming.py b/tests/run_agent/test_streaming.py
index d21dee34201..5af349fa859 100644
--- a/tests/run_agent/test_streaming.py
+++ b/tests/run_agent/test_streaming.py
@@ -3,11 +3,8 @@
 Tests the unified streaming API call, delta callbacks, tool-call
 suppression, provider fallback, and CLI streaming display.
 """
-import json
-import threading
-import uuid
 from types import SimpleNamespace
-from unittest.mock import MagicMock, patch, PropertyMock
+from unittest.mock import MagicMock, patch
 
 import pytest
 
diff --git a/tests/run_agent/test_streaming_tool_call_repair.py b/tests/run_agent/test_streaming_tool_call_repair.py
index e85c0e22d18..a70c65e47a5 100644
--- a/tests/run_agent/test_streaming_tool_call_repair.py
+++ b/tests/run_agent/test_streaming_tool_call_repair.py
@@ -12,7 +12,6 @@ unclosed brackets, Python None) don't kill the session.
 """
 
 import json
-import pytest
 
 from run_agent import _repair_tool_call_arguments
 
diff --git a/tests/run_agent/test_strict_api_validation.py b/tests/run_agent/test_strict_api_validation.py
index a4a53d97db3..16b26b44a94 100644
--- a/tests/run_agent/test_strict_api_validation.py
+++ b/tests/run_agent/test_strict_api_validation.py
@@ -2,9 +2,7 @@
 
 import sys
 import types
-from unittest.mock import patch, MagicMock
 
-import pytest
 
 sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
 sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
diff --git a/tests/run_agent/test_strip_reasoning_tags_cli.py b/tests/run_agent/test_strip_reasoning_tags_cli.py
index 7eb15daf43b..0b5c701bc1e 100644
--- a/tests/run_agent/test_strip_reasoning_tags_cli.py
+++ b/tests/run_agent/test_strip_reasoning_tags_cli.py
@@ -6,7 +6,6 @@ final displayed assistant text (after streaming) without depending on the
 AIAgent instance. It must stay in sync with run_agent.py::_strip_think_blocks
 for tool-call tag coverage."""
 
-import pytest
 
 from cli import _strip_reasoning_tags
 
diff --git a/tests/run_agent/test_tls_fd_recycle_corruption.py b/tests/run_agent/test_tls_fd_recycle_corruption.py
index 062276db961..29c35612fdf 100644
--- a/tests/run_agent/test_tls_fd_recycle_corruption.py
+++ b/tests/run_agent/test_tls_fd_recycle_corruption.py
@@ -26,9 +26,8 @@ import logging
 import socket as _socket
 import threading
 from types import SimpleNamespace
-from unittest.mock import MagicMock, patch
+from unittest.mock import MagicMock
 
-import pytest
 
 
 # ---------------------------------------------------------------------------
@@ -185,7 +184,6 @@ def test_close_from_stranger_thread_aborts_only_no_close():
     the worker's eventual ``finally`` must still see the client in the
     holder so IT can be the one releasing the FD.
     """
-    from agent.chat_completion_helpers import interruptible_api_call
 
     # We can't easily invoke just `_close_request_client_once` because it's
     # a closure local to ``interruptible_api_call``. Re-extract the same
diff --git a/tests/run_agent/test_tool_arg_coercion.py b/tests/run_agent/test_tool_arg_coercion.py
index d9ac5dd20fa..e5bbdd93d80 100644
--- a/tests/run_agent/test_tool_arg_coercion.py
+++ b/tests/run_agent/test_tool_arg_coercion.py
@@ -6,7 +6,6 @@ coerce_tool_args() fixes these type mismatches by comparing argument values
 against the tool's JSON Schema before dispatch.
 """
 
-import pytest
 from unittest.mock import patch
 
 from model_tools import (
diff --git a/tests/run_agent/test_tool_executor_contextvar_propagation.py b/tests/run_agent/test_tool_executor_contextvar_propagation.py
index 2e1d543705a..0395dcbba30 100644
--- a/tests/run_agent/test_tool_executor_contextvar_propagation.py
+++ b/tests/run_agent/test_tool_executor_contextvar_propagation.py
@@ -197,6 +197,19 @@ def test_run_agent_concurrent_executor_wraps_submit_with_copy_context():
             and call.args[1].id == "_run_tool"
         ):
             tool_submits.append(("fixed", call))
+        # Fixed (shared helper): executor.submit(
+        #     propagate_context_to_thread(_run_tool), ...) — the helper in
+        # tools/thread_context.py does copy_context().run(...) internally and
+        # additionally propagates the thread-local approval/sudo callbacks.
+        elif (
+            isinstance(first, ast.Call)
+            and isinstance(first.func, ast.Name)
+            and first.func.id == "propagate_context_to_thread"
+            and first.args
+            and isinstance(first.args[0], ast.Name)
+            and first.args[0].id == "_run_tool"
+        ):
+            tool_submits.append(("fixed", call))
 
     assert tool_submits, (
         "Could not locate `executor.submit(... _run_tool ...)` in "
diff --git a/tests/run_agent/test_vision_aware_preprocessing.py b/tests/run_agent/test_vision_aware_preprocessing.py
index 056754862cc..7a5b6131359 100644
--- a/tests/run_agent/test_vision_aware_preprocessing.py
+++ b/tests/run_agent/test_vision_aware_preprocessing.py
@@ -15,7 +15,6 @@ from __future__ import annotations
 
 from unittest.mock import MagicMock, patch
 
-import pytest
 
 from run_agent import AIAgent
 
diff --git a/tests/skills/test_google_workspace_api.py b/tests/skills/test_google_workspace_api.py
index 7ecfb4b7b7b..30a1441d634 100644
--- a/tests/skills/test_google_workspace_api.py
+++ b/tests/skills/test_google_workspace_api.py
@@ -2,7 +2,6 @@
 
 import importlib.util
 import json
-import os
 import subprocess
 import sys
 import types
diff --git a/tests/skills/test_google_workspace_credential_files.py b/tests/skills/test_google_workspace_credential_files.py
index de59b2fe6e4..9abe3e7e5b2 100644
--- a/tests/skills/test_google_workspace_credential_files.py
+++ b/tests/skills/test_google_workspace_credential_files.py
@@ -11,7 +11,6 @@ import os
 from pathlib import Path
 from unittest.mock import patch
 
-import pytest
 
 SKILL_MD = (
     Path(__file__).resolve().parents[2]
diff --git a/tests/skills/test_memento_cards.py b/tests/skills/test_memento_cards.py
index c1e29039c57..6cca138cedd 100644
--- a/tests/skills/test_memento_cards.py
+++ b/tests/skills/test_memento_cards.py
@@ -2,7 +2,6 @@
 
 import csv
 import json
-import os
 import sys
 import uuid
 from datetime import datetime, timedelta, timezone
diff --git a/tests/skills/test_telephony_skill.py b/tests/skills/test_telephony_skill.py
index b9025ee5944..0b9483da61c 100644
--- a/tests/skills/test_telephony_skill.py
+++ b/tests/skills/test_telephony_skill.py
@@ -2,7 +2,6 @@ from __future__ import annotations
 
 import importlib.util
 import json
-import os
 import sys
 from pathlib import Path
 
diff --git a/tests/skills/test_youtube_quiz.py b/tests/skills/test_youtube_quiz.py
index 182889ff6de..810ab71f288 100644
--- a/tests/skills/test_youtube_quiz.py
+++ b/tests/skills/test_youtube_quiz.py
@@ -3,7 +3,6 @@
 import json
 import sys
 from pathlib import Path
-from types import SimpleNamespace
 from unittest import mock
 
 import pytest
diff --git a/tests/stress/_fake_worker.py b/tests/stress/_fake_worker.py
index be05bcbedc7..9a4d17bbbbb 100644
--- a/tests/stress/_fake_worker.py
+++ b/tests/stress/_fake_worker.py
@@ -9,7 +9,6 @@ exactly the way `hermes chat -q` would be, minus the LLM cost.
 import json
 import os
 import subprocess
-import sys
 import time
 
 
diff --git a/tests/stress/test_atypical_scenarios.py b/tests/stress/test_atypical_scenarios.py
index e7e83eabccb..d667a97a7cb 100644
--- a/tests/stress/test_atypical_scenarios.py
+++ b/tests/stress/test_atypical_scenarios.py
@@ -18,7 +18,6 @@ together at the end. Script exits 0 iff every scenario passed or was
 cleanly SKIPPED (with reason).
 """
 
-import json
 import multiprocessing as mp
 import os
 import shutil
diff --git a/tests/stress/test_concurrency.py b/tests/stress/test_concurrency.py
index 5cbe455cb02..f5695e4bde1 100644
--- a/tests/stress/test_concurrency.py
+++ b/tests/stress/test_concurrency.py
@@ -21,7 +21,6 @@ import multiprocessing as mp
 import os
 import random
 import sqlite3
-import subprocess
 import sys
 import tempfile
 import time
diff --git a/tests/stress/test_property_fuzzing.py b/tests/stress/test_property_fuzzing.py
index b8facc62493..b8270b2b352 100644
--- a/tests/stress/test_property_fuzzing.py
+++ b/tests/stress/test_property_fuzzing.py
@@ -27,7 +27,6 @@ import os
 import random
 import sys
 import tempfile
-import time
 from pathlib import Path
 
 WT = str(Path(__file__).resolve().parents[2])
diff --git a/tests/stress/test_subprocess_e2e.py b/tests/stress/test_subprocess_e2e.py
index ea05123000b..ebdab39fd23 100644
--- a/tests/stress/test_subprocess_e2e.py
+++ b/tests/stress/test_subprocess_e2e.py
@@ -10,7 +10,6 @@ This validates the IPC + lifecycle story that mocks can't:
   - crash detection works against a real dead PID
 """
 
-import json
 import os
 from pathlib import Path
 import subprocess
diff --git a/tests/test_batch_runner_checkpoint.py b/tests/test_batch_runner_checkpoint.py
index 526c095563d..78bb8c98702 100644
--- a/tests/test_batch_runner_checkpoint.py
+++ b/tests/test_batch_runner_checkpoint.py
@@ -1,10 +1,8 @@
 """Tests for batch_runner checkpoint behavior — incremental writes, resume, atomicity."""
 
 import json
-import os
 from pathlib import Path
 from threading import Lock
-from unittest.mock import patch, MagicMock
 
 import pytest
 
diff --git a/tests/test_bitwarden_secrets.py b/tests/test_bitwarden_secrets.py
index 3938585469f..dbedd0cb7a5 100644
--- a/tests/test_bitwarden_secrets.py
+++ b/tests/test_bitwarden_secrets.py
@@ -15,7 +15,6 @@ import os
 import stat
 import subprocess
 import sys
-import tempfile
 import time
 import zipfile
 from pathlib import Path
diff --git a/tests/test_cli_file_drop.py b/tests/test_cli_file_drop.py
index 5161e435f0d..426f2f89c6d 100644
--- a/tests/test_cli_file_drop.py
+++ b/tests/test_cli_file_drop.py
@@ -1,9 +1,6 @@
 """Tests for _detect_file_drop — file path detection that prevents
 dragged/pasted absolute paths from being mistaken for slash commands."""
 
-import os
-import tempfile
-from pathlib import Path
 
 import pytest
 
diff --git a/tests/test_ctx_halving_fix.py b/tests/test_ctx_halving_fix.py
index 0dd3ca4e7eb..63c965ac965 100644
--- a/tests/test_ctx_halving_fix.py
+++ b/tests/test_ctx_halving_fix.py
@@ -11,6 +11,9 @@ The fix introduces:
     error class and returns the available output token budget.
   * _ephemeral_max_output_tokens on AIAgent — a one-shot override that
     caps the output for one retry without touching context_length.
+  * get_context_length_from_provider_error() — accepts only concrete
+    provider-reported lower context limits and refuses guessed probe-tier
+    step-downs when the provider gives no maximum.
 
 Naming note
 -----------
@@ -22,11 +25,10 @@ separate.
 
 import sys
 import os
-from unittest.mock import MagicMock, patch, PropertyMock
+from unittest.mock import MagicMock
 
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
 
-import pytest
 
 
 # ---------------------------------------------------------------------------
@@ -75,7 +77,7 @@ class TestParseAvailableOutputTokens:
     # ── Should NOT detect (returns None) ─────────────────────────────────
 
     def test_prompt_too_long_is_not_output_cap_error(self):
-        """'prompt is too long' errors must NOT be caught — they need context halving."""
+        """'prompt is too long' errors must NOT be caught — they need context-overflow recovery."""
         msg = "prompt is too long: 205000 tokens > 200000 maximum"
         assert self._parse(msg) is None
 
@@ -101,6 +103,49 @@ class TestParseAvailableOutputTokens:
         assert self._parse(msg) is None
 
 
+# ---------------------------------------------------------------------------
+# Context-overflow recovery — only trust provider-reported limits
+# ---------------------------------------------------------------------------
+
+class TestContextOverflowLimitSelection:
+    """Context-overflow recovery must not invent a lower window size.
+
+    Some providers only say "input exceeds the context window" without telling
+    Hermes what the actual maximum is.  In that case we may compress the
+    conversation, but must not silently probe-step from a user-configured 1M
+    window down to 256K/128K/64K/etc.
+    """
+
+    def test_generic_overflow_without_provider_limit_keeps_context_length(self):
+        from agent.model_metadata import get_context_length_from_provider_error
+        from agent.model_metadata import get_next_probe_tier
+        from agent.model_metadata import parse_context_limit_from_error
+
+        old_ctx = 1_000_000
+        error_msg = (
+            "Your input exceeds the context window of this model. "
+            "Please adjust your input and try again."
+        )
+
+        assert parse_context_limit_from_error(error_msg) is None
+        assert get_next_probe_tier(old_ctx) == 256_000
+        assert get_context_length_from_provider_error(error_msg, old_ctx) is None
+
+    def test_explicit_provider_limit_still_selects_that_limit(self):
+        from agent.model_metadata import get_context_length_from_provider_error
+
+        error_msg = "prompt is too long: 300000 tokens > 272000 maximum"
+
+        assert get_context_length_from_provider_error(error_msg, 1_000_000) == 272_000
+
+    def test_reported_limit_not_lower_than_current_is_ignored(self):
+        from agent.model_metadata import get_context_length_from_provider_error
+
+        error_msg = "maximum context length is 1000000 tokens"
+
+        assert get_context_length_from_provider_error(error_msg, 272_000) is None
+
+
 # ---------------------------------------------------------------------------
 # build_anthropic_kwargs — output cap clamping
 # ---------------------------------------------------------------------------
@@ -261,7 +306,6 @@ class TestContextNotHalvedOnOutputCapError:
         """On 'max_tokens too large' error, _ephemeral_max_output_tokens is set
         and compressor.context_length is left unchanged."""
         from agent.model_metadata import parse_available_output_tokens_from_error
-        from agent.model_metadata import get_next_probe_tier
 
         error_msg = (
             "max_tokens: 128000 > context_window: 200000 "
@@ -282,19 +326,16 @@ class TestContextNotHalvedOnOutputCapError:
         assert agent.context_compressor.context_length == old_ctx
         assert agent._ephemeral_max_output_tokens == 19_936
 
-    def test_prompt_too_long_still_triggers_probe_tier(self):
-        """Genuine prompt-too-long errors must still use get_next_probe_tier."""
+    def test_prompt_too_long_with_explicit_limit_uses_provider_limit(self):
+        """Prompt-too-long errors only change context_length when they report a concrete limit."""
+        from agent.model_metadata import get_context_length_from_provider_error
         from agent.model_metadata import parse_available_output_tokens_from_error
-        from agent.model_metadata import get_next_probe_tier
 
         error_msg = "prompt is too long: 205000 tokens > 200000 maximum"
 
         available_out = parse_available_output_tokens_from_error(error_msg)
         assert available_out is None, "prompt-too-long must not be caught by output-cap parser"
-
-        # The old halving path is still used for this class of error
-        new_ctx = get_next_probe_tier(200_000)
-        assert new_ctx == 128_000
+        assert get_context_length_from_provider_error(error_msg, 1_000_000) == 200_000
 
     def test_output_cap_error_safety_margin(self):
         """The ephemeral value includes a 64-token safety margin below available_out."""
diff --git a/tests/test_docker_home_override_scripts.py b/tests/test_docker_home_override_scripts.py
new file mode 100644
index 00000000000..0ad9f61c9ad
--- /dev/null
+++ b/tests/test_docker_home_override_scripts.py
@@ -0,0 +1,48 @@
+"""Regression tests for Docker HOME overrides under s6/with-contenv."""
+
+from pathlib import Path
+
+
+REPO_ROOT = Path(__file__).resolve().parent.parent
+DASHBOARD_RUN = REPO_ROOT / "docker" / "s6-rc.d" / "dashboard" / "run"
+
+
+def test_dashboard_run_resets_home_before_dropping_privileges() -> None:
+    text = DASHBOARD_RUN.read_text(encoding="utf-8")
+
+    assert "#!/command/with-contenv sh" in text
+    assert "export HOME=/opt/data" in text
+    assert "exec s6-setuidgid hermes hermes dashboard" in text
+
+
+def test_dashboard_run_does_not_derive_insecure_from_bind_host() -> None:
+    """The s6 dashboard run script MUST NOT auto-add ``--insecure`` based on
+    ``HERMES_DASHBOARD_HOST``. Doing so disables the OAuth auth gate on
+    every non-loopback bind even when an auth provider is registered —
+    the exact regression that exposed every wildcard-subdomain agent
+    dashboard publicly until early 2026.
+
+    The opt-in is now explicit: ``HERMES_DASHBOARD_INSECURE=1`` (truthy).
+    The auth gate is the authority on whether non-loopback binds are safe.
+    """
+    text = DASHBOARD_RUN.read_text(encoding="utf-8")
+
+    # No legacy host-derived flip.
+    assert '127.0.0.1|localhost' not in text, (
+        "Run script still derives --insecure from the bind host. The gate "
+        "is the authority now — opt in via HERMES_DASHBOARD_INSECURE instead."
+    )
+    assert 'case "$dash_host" in' not in text, (
+        "Legacy host-derived --insecure case-statement is back."
+    )
+
+    # New opt-in env var present.
+    assert "HERMES_DASHBOARD_INSECURE" in text, (
+        "Explicit HERMES_DASHBOARD_INSECURE opt-in is missing."
+    )
+    # Truthy values aligned with the rest of the s6 scripts
+    # (HERMES_DASHBOARD, HERMES_DASHBOARD_TUI).
+    for truthy in ("1", "true", "TRUE", "True", "yes", "YES", "Yes"):
+        assert truthy in text, (
+            f"HERMES_DASHBOARD_INSECURE should accept truthy value {truthy!r}"
+        )
diff --git a/tests/test_empty_model_fallback.py b/tests/test_empty_model_fallback.py
index b5f4286727f..32689b325ee 100644
--- a/tests/test_empty_model_fallback.py
+++ b/tests/test_empty_model_fallback.py
@@ -1,7 +1,6 @@
 """Tests for empty model fallback — when provider is configured but model is missing."""
 
-from unittest.mock import MagicMock, patch
-import pytest
+from unittest.mock import patch
 
 
 class TestGetDefaultModelForProvider:
diff --git a/tests/test_evidence_store.py b/tests/test_evidence_store.py
index ff4a0efe243..0bdc16ed163 100644
--- a/tests/test_evidence_store.py
+++ b/tests/test_evidence_store.py
@@ -1,5 +1,3 @@
-import os
-import json
 import pytest
 from pathlib import Path
 import importlib.util
diff --git a/tests/test_gateway_streaming_nested_config.py b/tests/test_gateway_streaming_nested_config.py
index 8db8988f40c..d69d6b3c601 100644
--- a/tests/test_gateway_streaming_nested_config.py
+++ b/tests/test_gateway_streaming_nested_config.py
@@ -1,10 +1,7 @@
 """Regression test for #25676 — nested gateway.streaming config must be loaded."""
 from pathlib import Path
 from unittest.mock import patch, MagicMock
-import json
 
-import pytest
-import yaml
 
 
 def _load_with_yaml_dict(yaml_dict: dict):
diff --git a/tests/test_hermes_bootstrap.py b/tests/test_hermes_bootstrap.py
index a044d644abe..69f3c6b7c03 100644
--- a/tests/test_hermes_bootstrap.py
+++ b/tests/test_hermes_bootstrap.py
@@ -24,7 +24,6 @@ import os
 import subprocess
 import sys
 import textwrap
-import unittest.mock as mock
 
 import pytest
 
diff --git a/tests/test_hermes_constants.py b/tests/test_hermes_constants.py
index edbb4eb7b84..3bd31c2bf1c 100644
--- a/tests/test_hermes_constants.py
+++ b/tests/test_hermes_constants.py
@@ -2,7 +2,6 @@
 
 import os
 from pathlib import Path
-from unittest.mock import patch
 
 import pytest
 
diff --git a/tests/test_hermes_logging.py b/tests/test_hermes_logging.py
index 180a627bd01..febef0a4789 100644
--- a/tests/test_hermes_logging.py
+++ b/tests/test_hermes_logging.py
@@ -826,3 +826,174 @@ class TestReadLoggingConfig:
 
         level, max_size, backup = hermes_logging._read_logging_config()
         assert level is None
+
+
+class TestExternalRotationRecovery:
+    """_ManagedRotatingFileHandler recovers from external rotation.
+
+    External rotation = anything that renames, unlinks, or replaces the
+    log file without going through ``doRollover()``: logrotate, manual
+    ``mv``, another process rotating under us, or a transient ``rm``.
+    Before this fix the open file descriptor stayed pinned to the old
+    inode forever, so every subsequent write went to the rotated backup
+    instead of the file the operator expects to read.
+    """
+
+    def _make_handler(self, log_path: Path) -> hermes_logging._ManagedRotatingFileHandler:
+        handler = hermes_logging._ManagedRotatingFileHandler(
+            str(log_path), maxBytes=10 * 1024 * 1024, backupCount=3,
+            encoding="utf-8",
+        )
+        handler.setLevel(logging.INFO)
+        handler.setFormatter(logging.Formatter("%(message)s"))
+        return handler
+
+    def _emit(self, handler: logging.Handler, msg: str) -> None:
+        record = logging.LogRecord(
+            name="gateway.run", level=logging.INFO, pathname="", lineno=0,
+            msg=msg, args=(), exc_info=None,
+        )
+        # Match the record factory that hermes_logging installs at import time.
+        record.session_tag = ""
+        handler.emit(record)
+        handler.flush()
+
+    def test_recovers_after_external_rename(self, tmp_path):
+        """logrotate-style external rename: ``mv gateway.log gateway.log.1``.
+
+        Handler's fd was pinned to the renamed inode; new writes used to
+        go to ``gateway.log.1`` forever.  After fix, the handler reopens
+        ``gateway.log`` at the original path.
+        """
+        log_path = tmp_path / "gateway.log"
+        rotated = tmp_path / "gateway.log.1"
+        handler = self._make_handler(log_path)
+        try:
+            self._emit(handler, "before rotation")
+            assert log_path.read_text() == "before rotation\n"
+
+            # External rotation (NOT via handler.doRollover()).
+            os.rename(log_path, rotated)
+            assert not log_path.exists()
+
+            self._emit(handler, "after rotation")
+
+            # The new write should land in a freshly recreated gateway.log,
+            # not appended to the rotated backup.
+            assert log_path.exists(), "handler did not recreate gateway.log"
+            assert log_path.read_text() == "after rotation\n"
+            assert rotated.read_text() == "before rotation\n"
+        finally:
+            handler.close()
+
+    def test_recovers_after_external_unlink(self, tmp_path):
+        """``rm gateway.log`` then keep writing — handler recreates the file."""
+        log_path = tmp_path / "gateway.log"
+        handler = self._make_handler(log_path)
+        try:
+            self._emit(handler, "before unlink")
+            assert log_path.read_text() == "before unlink\n"
+
+            os.unlink(log_path)
+            assert not log_path.exists()
+
+            self._emit(handler, "after unlink")
+            assert log_path.exists()
+            assert log_path.read_text() == "after unlink\n"
+        finally:
+            handler.close()
+
+    def test_external_truncate_does_not_force_reopen(self, tmp_path):
+        """``: > gateway.log`` keeps the same inode — no reopen needed.
+
+        Truncation in place preserves the inode, so subsequent writes
+        continue to the same file descriptor.  We assert the post-truncate
+        content reflects the truncate (size shrinks) and then grows with
+        new writes — i.e. the handler correctly does NOT detect this as
+        an inode change.
+        """
+        log_path = tmp_path / "gateway.log"
+        handler = self._make_handler(log_path)
+        try:
+            self._emit(handler, "AAAA" * 32)
+            assert log_path.stat().st_size > 0
+
+            with open(log_path, "w"):
+                pass  # truncate to zero
+            assert log_path.stat().st_size == 0
+
+            self._emit(handler, "after truncate")
+            assert log_path.read_text() == "after truncate\n"
+        finally:
+            handler.close()
+
+    def test_normal_rollover_still_works(self, tmp_path):
+        """Handler-driven ``doRollover()`` must continue to work normally.
+
+        Regression guard: the inode-snapshot bookkeeping must be refreshed
+        in ``doRollover()`` so the very next emit doesn't mistake our own
+        rollover for an external one and double-reopen.
+        """
+        log_path = tmp_path / "gateway.log"
+        rotated = tmp_path / "gateway.log.1"
+
+        # Tiny maxBytes forces rollover after the first record.
+        handler = hermes_logging._ManagedRotatingFileHandler(
+            str(log_path), maxBytes=1, backupCount=1, encoding="utf-8",
+        )
+        handler.setLevel(logging.INFO)
+        handler.setFormatter(logging.Formatter("%(message)s"))
+        try:
+            self._emit(handler, "first record")
+            self._emit(handler, "second record")
+            self._emit(handler, "third record")
+
+            # After rollover we should have BOTH files, with the most
+            # recent record in the live file.
+            assert log_path.exists()
+            assert rotated.exists()
+            assert "third record" in log_path.read_text()
+        finally:
+            handler.close()
+
+    def test_gateway_log_attached_after_external_rotation_then_re_setup(
+        self, hermes_home,
+    ):
+        """End-to-end Allen-reproduction: gateway.log gets externally rotated,
+        ``setup_logging(mode='gateway')`` is re-called, the handler keeps
+        working.
+
+        Reproduces Allen's symptom (gateway.log frozen mid-write, all gateway
+        records leaking to agent.log) when something external rotates the
+        file between setup_logging() calls.
+        """
+        hermes_logging.setup_logging(hermes_home=hermes_home, mode="gateway")
+        gw_path = hermes_home / "logs" / "gateway.log"
+        rotated = hermes_home / "logs" / "gateway.log.1"
+
+        logging.getLogger("gateway.run").info("line BEFORE rotation")
+        for h in logging.getLogger().handlers:
+            try: h.flush()
+            except Exception: pass
+        assert "BEFORE rotation" in gw_path.read_text()
+
+        # External actor renames the file out from under us.
+        os.rename(gw_path, rotated)
+        assert not gw_path.exists()
+
+        # Caller (or some restart path) re-enters setup_logging.  This used
+        # to silently no-op due to the per-path dedup check, leaving the
+        # stale fd in place.
+        hermes_logging.setup_logging(hermes_home=hermes_home, mode="gateway")
+
+        logging.getLogger("gateway.run").info("line AFTER rotation")
+        for h in logging.getLogger().handlers:
+            try: h.flush()
+            except Exception: pass
+
+        # The new record must reach the live gateway.log, not the rotated
+        # backup.  Allen's logs had everything past the rotation point
+        # going into agent.log only, never gateway.log.
+        assert gw_path.exists(), "gateway.log was never recreated"
+        assert "AFTER rotation" in gw_path.read_text()
+        assert "AFTER rotation" not in rotated.read_text()
diff --git a/tests/test_hermes_state.py b/tests/test_hermes_state.py
index a8f94bb40a8..1d7b922c2dd 100644
--- a/tests/test_hermes_state.py
+++ b/tests/test_hermes_state.py
@@ -2,7 +2,6 @@
 
 import time
 import pytest
-from pathlib import Path
 
 from hermes_state import SessionDB
 
@@ -2327,7 +2326,6 @@ class TestCompressionChainProjection:
 
         Returns (root_id, delegate_id, mid_id, tip_id).
         """
-        import time as _time
         # Root that gets compressed
         db.create_session("root1", "cli")
         db._conn.execute("UPDATE sessions SET started_at=? WHERE id=?", (t0, "root1"))
@@ -2660,7 +2658,6 @@ class TestConcurrentWriteSafety:
         """Connection timeout should be >= 30s to survive CLI/gateway contention."""
         # Access the underlying connection timeout via sqlite3 introspection.
         # There is no public API, so we check the kwarg via the module default.
-        import sqlite3
         import inspect
         from hermes_state import SessionDB as _SessionDB
         src = inspect.getsource(_SessionDB.__init__)
@@ -2697,6 +2694,64 @@ class TestVacuum:
         db.vacuum()
 
 
+class TestOptimizeFts:
+    def test_optimize_returns_index_count(self, db):
+        """A fresh DB has both FTS indexes; optimize merges both."""
+        db.create_session(session_id="s1", source="cli")
+        db.append_message(session_id="s1", role="user", content="hello world")
+        assert db.optimize_fts() == 2
+
+    def test_optimize_preserves_search_and_snippet(self, db):
+        """Optimize is layout-only: MATCH results + snippets are unchanged."""
+        db.create_session(session_id="s1", source="cli")
+        for i in range(50):
+            db.append_message(
+                session_id="s1",
+                role="user",
+                content=f"needle alpha bravo charlie message {i}",
+            )
+        before = db.search_messages("needle")
+        n = db.optimize_fts()
+        assert n == 2
+        after = db.search_messages("needle")
+        assert len(after) == len(before)
+        assert len(after) > 0
+        # Snippet must still be populated (would be empty/None if the FTS
+        # content shadow were lost during optimize).
+        assert all(row.get("snippet") for row in after)
+        # IDs and snippets are identical before/after — pure layout change.
+        assert [r["id"] for r in after] == [r["id"] for r in before]
+        assert [r["snippet"] for r in after] == [r["snippet"] for r in before]
+
+    def test_optimize_skips_missing_trigram_table(self, db):
+        """When the trigram index is absent, optimize handles only the porter
+        index and does not raise."""
+        db.create_session(session_id="s1", source="cli")
+        db.append_message(session_id="s1", role="user", content="hello")
+        # Drop the trigram table + triggers to simulate a disabled/absent index.
+        with db._lock:
+            for trig in (
+                "messages_fts_trigram_insert",
+                "messages_fts_trigram_delete",
+                "messages_fts_trigram_update",
+            ):
+                db._conn.execute(f"DROP TRIGGER IF EXISTS {trig}")
+            db._conn.execute("DROP TABLE IF EXISTS messages_fts_trigram")
+        assert db._fts_table_exists("messages_fts_trigram") is False
+        assert db._fts_table_exists("messages_fts") is True
+        # Only the porter index remains -> 1 optimized, no error.
+        assert db.optimize_fts() == 1
+
+    def test_optimize_idempotent(self, db):
+        """Running optimize twice is safe (second pass is a no-op merge)."""
+        db.create_session(session_id="s1", source="cli")
+        db.append_message(session_id="s1", role="user", content="repeat me")
+        assert db.optimize_fts() == 2
+        assert db.optimize_fts() == 2
+        # Search still works after repeated optimization.
+        assert len(db.search_messages("repeat")) == 1
+
+
 class TestAutoMaintenance:
     def _make_old_ended(self, db, sid: str, days_old: int = 100):
         """Create a session that is ended and was started `days_old` days ago."""
diff --git a/tests/test_hermes_state_compression_locks.py b/tests/test_hermes_state_compression_locks.py
new file mode 100644
index 00000000000..53e3bc0dec4
--- /dev/null
+++ b/tests/test_hermes_state_compression_locks.py
@@ -0,0 +1,149 @@
+"""Tests for ``SessionDB`` compression-lock primitives.
+
+These cover the atomic per-session lock that prevents two compression
+paths from racing on the same ``session_id`` and producing orphan child
+sessions (Damien's "parent → two orphan children" repro shape, see
+``tests/agent/test_compression_concurrent_fork.py`` for the
+behavioural regression test).
+
+Focus here: the lock primitives themselves (acquire, release, TTL,
+diagnostic accessor) — not the wiring into compression.
+"""
+
+from __future__ import annotations
+
+import threading
+import time
+from pathlib import Path
+
+import pytest
+
+from hermes_state import SessionDB
+
+
+@pytest.fixture
+def db(tmp_path: Path) -> SessionDB:
+    return SessionDB(tmp_path / "state.db")
+
+
+# ----------------------------------------------------------------------
+# Single-holder semantics
+# ----------------------------------------------------------------------
+
+
+def test_acquire_succeeds_when_unlocked(db: SessionDB) -> None:
+    assert db.try_acquire_compression_lock("sess1", "holder1") is True
+    assert db.get_compression_lock_holder("sess1") == "holder1"
+
+
+def test_acquire_blocks_second_holder(db: SessionDB) -> None:
+    assert db.try_acquire_compression_lock("sess1", "holder1") is True
+    assert db.try_acquire_compression_lock("sess1", "holder2") is False
+    # First holder still owns it
+    assert db.get_compression_lock_holder("sess1") == "holder1"
+
+
+def test_release_allows_reacquire(db: SessionDB) -> None:
+    db.try_acquire_compression_lock("sess1", "holder1")
+    db.release_compression_lock("sess1", "holder1")
+    assert db.get_compression_lock_holder("sess1") is None
+    assert db.try_acquire_compression_lock("sess1", "holder2") is True
+
+
+def test_release_with_wrong_holder_is_noop(db: SessionDB) -> None:
+    db.try_acquire_compression_lock("sess1", "holder1")
+    # Late-returning compressor must not release a lock it doesn't own
+    db.release_compression_lock("sess1", "holder_other")
+    assert db.get_compression_lock_holder("sess1") == "holder1"
+
+
+def test_release_when_unlocked_is_noop(db: SessionDB) -> None:
+    # No exception, no state change
+    db.release_compression_lock("never_locked", "holder1")
+    assert db.get_compression_lock_holder("never_locked") is None
+
+
+# ----------------------------------------------------------------------
+# Per-session isolation
+# ----------------------------------------------------------------------
+
+
+def test_locks_are_per_session(db: SessionDB) -> None:
+    assert db.try_acquire_compression_lock("sess1", "holder1") is True
+    # Different session: independent lock
+    assert db.try_acquire_compression_lock("sess2", "holder2") is True
+    assert db.get_compression_lock_holder("sess1") == "holder1"
+    assert db.get_compression_lock_holder("sess2") == "holder2"
+
+
+# ----------------------------------------------------------------------
+# TTL / expiry recovery
+# ----------------------------------------------------------------------
+
+
+def test_expired_lock_is_reclaimable(db: SessionDB) -> None:
+    """A crashed compressor must not permanently block the session."""
+    # Acquire with a very short TTL
+    db.try_acquire_compression_lock("sess1", "crashed_holder", ttl_seconds=0.05)
+    time.sleep(0.1)
+    # Holder check honours expiry
+    assert db.get_compression_lock_holder("sess1") is None
+    # New holder can claim it
+    assert db.try_acquire_compression_lock("sess1", "fresh_holder") is True
+    assert db.get_compression_lock_holder("sess1") == "fresh_holder"
+
+
+def test_non_expired_lock_is_held(db: SessionDB) -> None:
+    db.try_acquire_compression_lock("sess1", "holder1", ttl_seconds=60)
+    # Immediately after, still held
+    assert db.try_acquire_compression_lock("sess1", "holder2") is False
+
+
+# ----------------------------------------------------------------------
+# Empty / invalid input
+# ----------------------------------------------------------------------
+
+
+def test_acquire_empty_session_id_returns_false(db: SessionDB) -> None:
+    assert db.try_acquire_compression_lock("", "holder1") is False
+
+
+def test_release_empty_session_id_is_noop(db: SessionDB) -> None:
+    # No exception
+    db.release_compression_lock("", "holder1")
+
+
+def test_holder_empty_session_id_returns_none(db: SessionDB) -> None:
+    assert db.get_compression_lock_holder("") is None
+
+
+# ----------------------------------------------------------------------
+# Concurrency: real threads racing on the same session_id
+# ----------------------------------------------------------------------
+
+
+def test_concurrent_acquire_only_one_winner(db: SessionDB) -> None:
+    """Damien's race shape: N threads call acquire on the same session_id;
+    exactly one must win, the rest must be cleanly rejected."""
+    results: list[bool] = []
+    barrier = threading.Barrier(8)
+    lock = threading.Lock()
+
+    def try_acquire(idx: int) -> None:
+        holder = f"thread_{idx}"
+        barrier.wait()  # synchronize start
+        got = db.try_acquire_compression_lock("contended_session", holder)
+        with lock:
+            results.append(got)
+
+    threads = [threading.Thread(target=try_acquire, args=(i,)) for i in range(8)]
+    for t in threads:
+        t.start()
+    for t in threads:
+        t.join()
+
+    # Exactly one thread acquired
+    assert sum(1 for r in results if r is True) == 1
+    assert sum(1 for r in results if r is False) == 7
+    # The single winner still owns it
+    assert db.get_compression_lock_holder("contended_session") is not None
diff --git a/tests/test_honcho_client_config.py b/tests/test_honcho_client_config.py
index feb0eb41d7c..d4c62d610e9 100644
--- a/tests/test_honcho_client_config.py
+++ b/tests/test_honcho_client_config.py
@@ -2,10 +2,7 @@
 
 import json
 import os
-import tempfile
-from pathlib import Path
 
-import pytest
 
 from plugins.memory.honcho.client import HonchoClientConfig
 
diff --git a/tests/test_install_sh_symlink_stomp.py b/tests/test_install_sh_symlink_stomp.py
index 450d6fe2088..0fbe508509f 100644
--- a/tests/test_install_sh_symlink_stomp.py
+++ b/tests/test_install_sh_symlink_stomp.py
@@ -20,7 +20,6 @@ import stat
 import subprocess
 from pathlib import Path
 
-import pytest
 
 
 REPO_ROOT = Path(__file__).resolve().parent.parent
diff --git a/tests/test_ipv4_preference.py b/tests/test_ipv4_preference.py
index c57016e2235..c4e5d114782 100644
--- a/tests/test_ipv4_preference.py
+++ b/tests/test_ipv4_preference.py
@@ -2,9 +2,7 @@
 
 import importlib
 import socket
-from unittest.mock import patch, MagicMock
 
-import pytest
 
 
 def _reload_constants():
diff --git a/tests/test_lazy_session_regressions.py b/tests/test_lazy_session_regressions.py
index 511554a4170..0c1ea022064 100644
--- a/tests/test_lazy_session_regressions.py
+++ b/tests/test_lazy_session_regressions.py
@@ -11,10 +11,8 @@ Tests cover:
 import threading
 import time
 import types
-from pathlib import Path
 from unittest.mock import MagicMock, patch
 
-import pytest
 
 
 # ===========================================================================
diff --git a/tests/test_lint_config.py b/tests/test_lint_config.py
index 23ca0d6a43a..5d8eda2ae4e 100644
--- a/tests/test_lint_config.py
+++ b/tests/test_lint_config.py
@@ -82,7 +82,6 @@ class TestLintWorkflow:
         # Look for the blocking step's named line + its command.  We want
         # at least one ``ruff check .`` that does NOT have ``--exit-zero``
         # nearby.
-        import re
         # Split into lines and find ruff check invocations
         lines = content.splitlines()
         found_blocking = False
diff --git a/tests/test_mcp_serve.py b/tests/test_mcp_serve.py
index 86e3ae0bd38..11c3b65b609 100644
--- a/tests/test_mcp_serve.py
+++ b/tests/test_mcp_serve.py
@@ -15,8 +15,7 @@ import os
 import sqlite3
 import time
 import threading
-from pathlib import Path
-from unittest.mock import MagicMock, patch
+from unittest.mock import MagicMock
 
 import pytest
 
diff --git a/tests/test_minimax_oauth.py b/tests/test_minimax_oauth.py
index f29209cee8c..4b5ca5d54fa 100644
--- a/tests/test_minimax_oauth.py
+++ b/tests/test_minimax_oauth.py
@@ -15,7 +15,6 @@ import hashlib
 import json
 import time
 from datetime import datetime, timezone
-from types import SimpleNamespace
 from unittest.mock import MagicMock, patch
 
 import pytest
@@ -26,8 +25,6 @@ from hermes_cli.auth import (
     MINIMAX_OAUTH_CLIENT_ID,
     MINIMAX_OAUTH_GLOBAL_BASE,
     MINIMAX_OAUTH_GLOBAL_INFERENCE,
-    MINIMAX_OAUTH_CN_BASE,
-    MINIMAX_OAUTH_CN_INFERENCE,
     MINIMAX_OAUTH_REFRESH_SKEW_SECONDS,
     _minimax_pkce_pair,
     _minimax_request_user_code,
@@ -37,7 +34,6 @@ from hermes_cli.auth import (
     resolve_minimax_oauth_runtime_credentials,
     get_minimax_oauth_auth_status,
     get_auth_status,
-    get_provider_auth_state,
 )
 
 
diff --git a/tests/test_model_picker_scroll.py b/tests/test_model_picker_scroll.py
index e20c330ea07..f37a82fe611 100644
--- a/tests/test_model_picker_scroll.py
+++ b/tests/test_model_picker_scroll.py
@@ -14,7 +14,6 @@ isolation without requiring a real TTY.
 
 import sys
 import os
-import pytest
 
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
 
diff --git a/tests/test_model_tools.py b/tests/test_model_tools.py
index beae3daa65e..cb8f9f7a945 100644
--- a/tests/test_model_tools.py
+++ b/tests/test_model_tools.py
@@ -3,7 +3,6 @@
 import json
 from unittest.mock import ANY, call, patch
 
-import pytest
 
 from model_tools import (
     handle_function_call,
diff --git a/tests/test_model_tools_async_bridge.py b/tests/test_model_tools_async_bridge.py
index ed0a85cd355..81ffb2cc624 100644
--- a/tests/test_model_tools_async_bridge.py
+++ b/tests/test_model_tools_async_bridge.py
@@ -14,7 +14,7 @@ import asyncio
 import json
 import threading
 from types import SimpleNamespace
-from unittest.mock import AsyncMock, MagicMock, patch
+from unittest.mock import AsyncMock, patch
 
 import pytest
 
@@ -129,7 +129,6 @@ class TestRunAsyncWorkerThread:
     def test_parallel_workers_get_separate_loops(self):
         """Different worker threads must get their own loops to avoid
         contention (the original reason for the worker-thread branch)."""
-        import time
         from concurrent.futures import ThreadPoolExecutor, as_completed
         from model_tools import _run_async
 
@@ -356,7 +355,7 @@ class TestVisionDispatchLoopSafety:
     def test_vision_dispatch_keeps_loop_alive(self, tmp_path):
         """After dispatching vision_analyze via the registry, the event
         loop must remain open so cached async clients don't crash on GC."""
-        from model_tools import _run_async, _get_tool_loop
+        from model_tools import _get_tool_loop
         from tools.registry import registry
 
         fake_response = _mock_vision_response()
diff --git a/tests/test_ollama_num_ctx.py b/tests/test_ollama_num_ctx.py
index fff0144d33b..94b1d7fd6a0 100644
--- a/tests/test_ollama_num_ctx.py
+++ b/tests/test_ollama_num_ctx.py
@@ -7,7 +7,6 @@ Covers:
 
 from unittest.mock import patch, MagicMock
 
-import pytest
 
 from agent.model_metadata import query_ollama_num_ctx
 
diff --git a/tests/test_packaging_metadata.py b/tests/test_packaging_metadata.py
index ce6d4793fd1..d72c0224a69 100644
--- a/tests/test_packaging_metadata.py
+++ b/tests/test_packaging_metadata.py
@@ -1,10 +1,66 @@
 from pathlib import Path
 import tomllib
 
+import pytest
+
+# setuptools is declared in the [dev] extra and is the build backend, but
+# guard the import so a runner without it skips these packaging checks
+# instead of erroring out collection for the whole shard (it used to be
+# picked up ambiently from the CI image; newer ubuntu-latest images don't
+# ship it in the test venv).
+find_packages = pytest.importorskip("setuptools", exc_type=ImportError).find_packages
+
 
 REPO_ROOT = Path(__file__).resolve().parents[1]
 
 
+def _packages_find_include():
+    data = tomllib.loads((REPO_ROOT / "pyproject.toml").read_text(encoding="utf-8"))
+    return data["tool"]["setuptools"]["packages"]["find"]["include"]
+
+
+def test_every_on_disk_subpackage_is_covered_by_packages_find():
+    """Regression test for #34701 (and the bug class behind #34034 / #28149).
+
+    ``[tool.setuptools.packages.find]`` ``include`` is hand-maintained. Every
+    top-level package is listed twice — bare (``hermes_cli``) for the package
+    itself and ``hermes_cli.*`` for its subpackages — EXCEPT when someone
+    forgets the wildcard. v0.15.x listed ``hermes_cli`` without ``hermes_cli.*``,
+    so the wheel shipped ``hermes_cli/*.py`` but dropped the ``dashboard_auth``
+    and ``proxy`` subpackages. The dashboard then died on every install with
+    ``ModuleNotFoundError: No module named 'hermes_cli.dashboard_auth'``.
+
+    This drives setuptools' own discovery against the live tree: every package
+    that exists on disk and would be found by a permissive ``<name>.*`` scan
+    must also be found by the actual ``include`` list. A subpackage added under
+    any listed package without the matching wildcard fails here instead of in a
+    user's container.
+    """
+    include = _packages_find_include()
+
+    # What the real include list actually selects.
+    selected = set(find_packages(where=str(REPO_ROOT), include=include))
+
+    # Top-level packages we ship (bare names in the include list, no wildcard).
+    top_level = sorted({name for name in include if "." not in name})
+
+    # For each shipped top-level package, every on-disk subpackage must be
+    # covered by the include list.
+    expected = set(
+        find_packages(
+            where=str(REPO_ROOT),
+            include=[pattern for name in top_level for pattern in (name, f"{name}.*")],
+        )
+    )
+
+    missing = sorted(expected - selected)
+    assert not missing, (
+        "These packages exist on disk but are dropped from the wheel because "
+        "[tool.setuptools.packages.find] include is missing a wildcard. Add the "
+        f"matching '<name>.*' entry in pyproject.toml: {missing}"
+    )
+
+
 def test_faster_whisper_is_not_a_base_dependency():
     data = tomllib.loads((REPO_ROOT / "pyproject.toml").read_text(encoding="utf-8"))
     deps = data["project"]["dependencies"]
@@ -20,3 +76,42 @@ def test_manifest_includes_bundled_skills():
 
     assert "graft skills" in manifest
     assert "graft optional-skills" in manifest
+
+
+def test_bundled_plugin_manifests_ship_in_both_wheel_and_sdist():
+    """Regression test for #34034 / #28149.
+
+    Plugin discovery (hermes_cli/plugins.py) registers each bundled plugin by
+    reading its ``plugin.yaml`` / ``plugin.yml`` manifest. Those manifests are
+    data files, not Python modules, so they only reach installed packages when
+    declared explicitly:
+
+    - wheel  -> ``[tool.setuptools.package-data]`` ``plugins`` glob
+    - sdist  -> ``MANIFEST.in`` (Homebrew and other downstream packagers build
+                from the sdist)
+
+    v0.15.0 declared neither, so the wheel shipped every adapter's Python code
+    but none of its manifests, and *every* gateway platform failed with
+    "No adapter available for <platform>". Both channels must cover manifests.
+    """
+    # There must actually be manifests on disk for the globs to match.
+    on_disk = list((REPO_ROOT / "plugins").rglob("plugin.yaml")) + list(
+        (REPO_ROOT / "plugins").rglob("plugin.yml")
+    )
+    assert on_disk, "expected bundled plugin manifests under plugins/"
+
+    # Wheel channel: package-data must declare a glob that matches plugin
+    # manifests anywhere under the plugins package.
+    data = tomllib.loads((REPO_ROOT / "pyproject.toml").read_text(encoding="utf-8"))
+    plugins_pkg_data = data["tool"]["setuptools"]["package-data"].get("plugins", [])
+    assert any(
+        g.endswith("plugin.yaml") or g.endswith("plugin.yml")
+        for g in plugins_pkg_data
+    ), "pyproject package-data 'plugins' must ship plugin.yaml/plugin.yml (wheel)"
+
+    # Sdist channel: MANIFEST.in must recursively include the manifests so
+    # downstream packagers building from the sdist also get them.
+    manifest = (REPO_ROOT / "MANIFEST.in").read_text(encoding="utf-8")
+    assert "recursive-include plugins" in manifest and "plugin.yaml" in manifest, (
+        "MANIFEST.in must recursive-include plugins plugin.yaml/plugin.yml (sdist)"
+    )
diff --git a/tests/test_plugin_skills.py b/tests/test_plugin_skills.py
index 9764da92b6e..d528b99b5ad 100644
--- a/tests/test_plugin_skills.py
+++ b/tests/test_plugin_skills.py
@@ -8,9 +8,6 @@ Covers:
 
 import json
 import logging
-import os
-from pathlib import Path
-from unittest.mock import MagicMock
 
 import pytest
 
diff --git a/tests/test_process_loop_event_loop_warning.py b/tests/test_process_loop_event_loop_warning.py
index 5955544241c..7251060ef85 100644
--- a/tests/test_process_loop_event_loop_warning.py
+++ b/tests/test_process_loop_event_loop_warning.py
@@ -8,7 +8,6 @@ running loop.
 """
 
 import asyncio
-import sys
 import threading
 import warnings
 
diff --git a/tests/test_project_metadata.py b/tests/test_project_metadata.py
index 45afb3c1aa4..51ca9cf9128 100644
--- a/tests/test_project_metadata.py
+++ b/tests/test_project_metadata.py
@@ -73,6 +73,7 @@ def test_lazy_installable_extras_excluded_from_all():
         "modal", "daytona",
         "messaging", "slack", "matrix", "dingtalk", "feishu",
         "honcho", "hindsight",
+        "mistral",  # mistralai — Voxtral STT/TTS, lazy-installed (stt.mistral / tts.mistral)
     }
     all_extra_specs = optional_dependencies["all"]
     for extra in lazy_covered_extras:
diff --git a/tests/test_subprocess_home_isolation.py b/tests/test_subprocess_home_isolation.py
index 28401fa6644..4c69c719b6e 100644
--- a/tests/test_subprocess_home_isolation.py
+++ b/tests/test_subprocess_home_isolation.py
@@ -10,9 +10,7 @@ See: https://github.com/NousResearch/hermes-agent/issues/4426
 import os
 import threading
 from pathlib import Path
-from unittest.mock import patch
 
-import pytest
 
 
 # ---------------------------------------------------------------------------
diff --git a/tests/test_timezone.py b/tests/test_timezone.py
index f91a27b6a75..b5da11e49fa 100644
--- a/tests/test_timezone.py
+++ b/tests/test_timezone.py
@@ -14,7 +14,7 @@ import logging
 import sys
 import pytest
 from datetime import datetime, timedelta, timezone
-from unittest.mock import patch, MagicMock
+from unittest.mock import patch
 from zoneinfo import ZoneInfo
 
 import hermes_time
diff --git a/tests/test_toolset_distributions.py b/tests/test_toolset_distributions.py
index 6485208be84..9b59946ef21 100644
--- a/tests/test_toolset_distributions.py
+++ b/tests/test_toolset_distributions.py
@@ -1,7 +1,6 @@
 """Tests for toolset_distributions.py — distribution CRUD, sampling, validation."""
 
 import pytest
-from unittest.mock import patch
 
 from toolset_distributions import (
     DISTRIBUTIONS,
diff --git a/tests/test_toolsets.py b/tests/test_toolsets.py
index a6f4fc6b72e..1773d281af9 100644
--- a/tests/test_toolsets.py
+++ b/tests/test_toolsets.py
@@ -7,7 +7,6 @@ from toolsets import (
     resolve_toolset,
     resolve_multiple_toolsets,
     get_all_toolsets,
-    get_toolset_names,
     validate_toolset,
     create_custom_toolset,
     get_toolset_info,
diff --git a/tests/test_trajectory_compressor.py b/tests/test_trajectory_compressor.py
index 7978aab4c25..74d63002923 100644
--- a/tests/test_trajectory_compressor.py
+++ b/tests/test_trajectory_compressor.py
@@ -1,7 +1,6 @@
 """Tests for trajectory_compressor.py — config, metrics, and compression logic."""
 
 import importlib
-import json
 import os
 import sys
 from types import SimpleNamespace
diff --git a/tests/test_trajectory_compressor_async.py b/tests/test_trajectory_compressor_async.py
index 369b980b8fb..89c381b5242 100644
--- a/tests/test_trajectory_compressor_async.py
+++ b/tests/test_trajectory_compressor_async.py
@@ -10,7 +10,6 @@ The fix creates the AsyncOpenAI client lazily via _get_async_client() so
 each asyncio.run() gets a client bound to the current loop.
 """
 
-import types
 from types import SimpleNamespace
 from unittest.mock import MagicMock, patch
 
diff --git a/tests/test_transform_tool_result_hook.py b/tests/test_transform_tool_result_hook.py
index 508c0bdc0c7..1b414642200 100644
--- a/tests/test_transform_tool_result_hook.py
+++ b/tests/test_transform_tool_result_hook.py
@@ -5,10 +5,8 @@ Mirrors the ``transform_terminal_output`` hook tests from Phase 1 but
 targets the generic tool-result seam that runs for every tool dispatch.
 """
 
-import json
 import os
 from pathlib import Path
-from unittest.mock import MagicMock
 
 import hermes_cli.plugins as plugins_mod
 import model_tools
diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py
index a731290c9d3..7b4ca867a4d 100644
--- a/tests/test_tui_gateway_server.py
+++ b/tests/test_tui_gateway_server.py
@@ -5246,6 +5246,8 @@ def test_notification_poller_skips_consumed(monkeypatch):
 
 def test_notification_poller_requeues_when_busy(monkeypatch):
     """When the agent is busy, the poller requeues the event."""
+    import queue as _queue_mod
+
     from tools.process_registry import process_registry
 
     emitted = []
@@ -5254,8 +5256,13 @@ def test_notification_poller_requeues_when_busy(monkeypatch):
     server._sessions["sid_busy"] = sess
     monkeypatch.setattr(server, "_emit", lambda *a, **kw: emitted.append(a))
 
-    while not process_registry.completion_queue.empty():
-        process_registry.completion_queue.get_nowait()
+    # Isolate the completion queue for the duration of this test. The poller
+    # reads process_registry.completion_queue by attribute at runtime, so a
+    # fresh Queue here means no concurrently-running test in the same xdist
+    # worker can put/get on the shared singleton mid-run and drain the event
+    # we expect to be requeued. monkeypatch restores the original on teardown.
+    isolated_queue: _queue_mod.Queue = _queue_mod.Queue()
+    monkeypatch.setattr(process_registry, "completion_queue", isolated_queue)
     process_registry._completion_consumed.discard("proc_busy_test")
 
     evt = {
@@ -5265,7 +5272,7 @@ def test_notification_poller_requeues_when_busy(monkeypatch):
         "exit_code": 0,
         "output": "ok",
     }
-    process_registry.completion_queue.put(evt)
+    isolated_queue.put(evt)
 
     stop = threading.Event()
     stop.set()
@@ -5278,10 +5285,8 @@ def test_notification_poller_requeues_when_busy(monkeypatch):
         assert len(status_calls) == 1
 
         # Event was requeued (agent was busy, no turn triggered)
-        assert not process_registry.completion_queue.empty()
-        requeued = process_registry.completion_queue.get_nowait()
+        assert not isolated_queue.empty()
+        requeued = isolated_queue.get_nowait()
         assert requeued["session_id"] == "proc_busy_test"
     finally:
         server._sessions.pop("sid_busy", None)
-        while not process_registry.completion_queue.empty():
-            process_registry.completion_queue.get_nowait()
diff --git a/tests/test_yuanbao_integration.py b/tests/test_yuanbao_integration.py
index 48579c0f886..0b3f0114a73 100644
--- a/tests/test_yuanbao_integration.py
+++ b/tests/test_yuanbao_integration.py
@@ -20,7 +20,7 @@ if _REPO_ROOT not in sys.path:
     sys.path.insert(0, _REPO_ROOT)
 
 import pytest
-from unittest.mock import AsyncMock, MagicMock, patch
+from unittest.mock import MagicMock, patch
 from gateway.config import Platform, PlatformConfig, GatewayConfig
 from gateway.platforms.yuanbao import YuanbaoAdapter
 
@@ -108,7 +108,6 @@ class TestGatewayRunnerRegistration:
     def _make_minimal_runner(self, config):
         """通过 __new__ + 最小初始化绕过 run.py 的模块级 dotenv/ssl 副作用"""
         import sys
-        from unittest.mock import MagicMock
 
         # Stub out heavy dependencies if not already present
         stubs = [
@@ -141,7 +140,6 @@ class TestGatewayRunnerRegistration:
     def test_runner_creates_yuanbao_adapter(self):
         """GatewayRunner._create_adapter 能为 YUANBAO 返回 YuanbaoAdapter 实例"""
         from gateway.config import GatewayConfig
-        from unittest.mock import patch
         config = make_config(enabled=True)
         gw_config = GatewayConfig(platforms={Platform.YUANBAO: config})
 
@@ -159,7 +157,6 @@ class TestGatewayRunnerRegistration:
     def test_runner_adapter_platform_attr(self):
         """创建的 adapter.PLATFORM 为 Platform.YUANBAO"""
         from gateway.config import GatewayConfig
-        from unittest.mock import patch
         config = make_config(enabled=True)
         gw_config = GatewayConfig(platforms={Platform.YUANBAO: config})
 
diff --git a/tests/test_yuanbao_pipeline.py b/tests/test_yuanbao_pipeline.py
index 659f1e70565..c1be92d6802 100644
--- a/tests/test_yuanbao_pipeline.py
+++ b/tests/test_yuanbao_pipeline.py
@@ -13,7 +13,6 @@ Tests cover:
 import sys
 import os
 import json
-import asyncio
 
 # Ensure project root is on the path
 _REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
@@ -21,7 +20,7 @@ if _REPO_ROOT not in sys.path:
     sys.path.insert(0, _REPO_ROOT)
 
 import pytest
-from unittest.mock import AsyncMock, MagicMock, patch, PropertyMock
+from unittest.mock import AsyncMock, MagicMock
 
 from gateway.platforms.yuanbao import (
     InboundContext,
@@ -43,7 +42,7 @@ from gateway.platforms.yuanbao import (
     InboundPipelineBuilder,
     YuanbaoAdapter,
 )
-from gateway.config import Platform, PlatformConfig
+from gateway.config import PlatformConfig
 
 
 # ============================================================
diff --git a/tests/test_yuanbao_proto.py b/tests/test_yuanbao_proto.py
index d5dc1fa2fd0..7971a9df8e1 100644
--- a/tests/test_yuanbao_proto.py
+++ b/tests/test_yuanbao_proto.py
@@ -28,9 +28,6 @@ from gateway.platforms.yuanbao_proto import (
     _fields_to_dict,
     _encode_msg_body_element,
     _decode_msg_body_element,
-    _encode_msg_content,
-    _decode_msg_content,
-    # conn 层
     encode_conn_msg,
     decode_conn_msg,
     encode_conn_msg_full,
@@ -49,8 +46,6 @@ from gateway.platforms.yuanbao_proto import (
     PB_MSG_TYPES,
     BIZ_SERVICES,
     CMD_TYPE,
-    CMD,
-    MODULE,
     next_seq_no,
 )
 
@@ -434,7 +429,7 @@ class TestEncodeOutbound:
 
     def test_c2c_biz_payload_contains_to_account(self):
         """验证 biz payload 包含 to_account 字段"""
-        from gateway.platforms.yuanbao_proto import _parse_fields, _fields_to_dict, _get_string
+        from gateway.platforms.yuanbao_proto import _get_string
         msg_body = [{"msg_type": "TIMTextElem", "msg_content": {"text": "test"}}]
         result = encode_send_c2c_message(
             to_account="target_user",
@@ -448,7 +443,7 @@ class TestEncodeOutbound:
         assert to_acc == "target_user"
 
     def test_group_biz_payload_contains_group_code(self):
-        from gateway.platforms.yuanbao_proto import _parse_fields, _fields_to_dict, _get_string
+        from gateway.platforms.yuanbao_proto import _get_string
         msg_body = [{"msg_type": "TIMTextElem", "msg_content": {"text": "test"}}]
         result = encode_send_group_message(
             group_code="group-xyz",
@@ -595,7 +590,7 @@ class TestEndToEnd:
 
         # 从 biz payload 中读取 to_account 和 msg_body
         from gateway.platforms.yuanbao_proto import (
-            _parse_fields, _fields_to_dict, _get_string, _get_repeated_bytes, WT_LEN
+            _get_string, _get_repeated_bytes
         )
         biz = dec["data"]
         fdict = _fields_to_dict(_parse_fields(biz))
diff --git a/tests/tools/test_accretion_caps.py b/tests/tools/test_accretion_caps.py
index dcd3c09fd97..16be619b2fb 100644
--- a/tests/tools/test_accretion_caps.py
+++ b/tests/tools/test_accretion_caps.py
@@ -18,7 +18,6 @@ churn accumulated ~20B per session_id until the process exited.
 These tests pin the new caps + prune hooks.
 """
 
-import pytest
 
 
 class TestReadTrackerCaps:
diff --git a/tests/tools/test_approval.py b/tests/tools/test_approval.py
index 942d27cbe13..3863bc01d06 100644
--- a/tests/tools/test_approval.py
+++ b/tests/tools/test_approval.py
@@ -17,7 +17,6 @@ from tools.approval import (
     is_approved,
     load_permanent,
     prompt_dangerous_approval,
-    submit_pending,
 )
 
 
diff --git a/tests/tools/test_approval_heartbeat.py b/tests/tools/test_approval_heartbeat.py
index c725a24eb45..d8531403ec8 100644
--- a/tests/tools/test_approval_heartbeat.py
+++ b/tests/tools/test_approval_heartbeat.py
@@ -12,9 +12,6 @@ between slices, mirroring ``_wait_for_process`` in ``tools/environments/base.py`
 """
 
 import os
-import threading
-import time
-from unittest.mock import patch
 
 
 def _clear_approval_state():
diff --git a/tests/tools/test_approval_plugin_hooks.py b/tests/tools/test_approval_plugin_hooks.py
index 3b01e620778..58ccb2f8a76 100644
--- a/tests/tools/test_approval_plugin_hooks.py
+++ b/tests/tools/test_approval_plugin_hooks.py
@@ -13,9 +13,6 @@ import pytest
 import tools.approval as approval_module
 from tools.approval import (
     check_all_command_guards,
-    register_gateway_notify,
-    unregister_gateway_notify,
-    resolve_gateway_approval,
     set_current_session_key,
     clear_session,
 )
diff --git a/tests/tools/test_base_environment.py b/tests/tools/test_base_environment.py
index eb3661cafd3..88fa6a7ea0f 100644
--- a/tests/tools/test_base_environment.py
+++ b/tests/tools/test_base_environment.py
@@ -4,10 +4,9 @@ Tests _wrap_command(), _extract_cwd_from_output(), _embed_stdin_heredoc(),
 init_session() failure handling, and the CWD marker contract.
 """
 
-import uuid
 from unittest.mock import MagicMock
 
-from tools.environments.base import BaseEnvironment, _cwd_marker
+from tools.environments.base import BaseEnvironment
 
 
 class _TestableEnv(BaseEnvironment):
diff --git a/tests/tools/test_browser_camofox.py b/tests/tools/test_browser_camofox.py
index cf1c32592f0..b8fc1a4d702 100644
--- a/tests/tools/test_browser_camofox.py
+++ b/tests/tools/test_browser_camofox.py
@@ -1,10 +1,8 @@
 """Tests for the Camofox browser backend."""
 
 import json
-import os
 from unittest.mock import MagicMock, patch
 
-import pytest
 
 from tools.browser_camofox import (
     camofox_back,
@@ -20,6 +18,7 @@ from tools.browser_camofox import (
     camofox_vision,
     check_camofox_available,
     is_camofox_mode,
+    _rewrite_loopback_url_for_camofox,
 )
 
 
@@ -59,6 +58,10 @@ class TestCamofoxMode:
 # ---------------------------------------------------------------------------
 
 
+def _config_with_camofox(**camofox_config):
+    return {"browser": {"camofox": camofox_config}}
+
+
 def _mock_response(status=200, json_data=None):
     resp = MagicMock()
     resp.status_code = status
@@ -73,6 +76,60 @@ def _mock_response(status=200, json_data=None):
 # ---------------------------------------------------------------------------
 
 
+class TestCamofoxLoopbackRewrite:
+    @patch("tools.browser_camofox.load_config")
+    def test_rewrites_localhost_when_enabled(self, mock_config, monkeypatch):
+        monkeypatch.delenv("CAMOFOX_REWRITE_LOOPBACK_URLS", raising=False)
+        monkeypatch.delenv("CAMOFOX_LOOPBACK_HOST_ALIAS", raising=False)
+        mock_config.return_value = _config_with_camofox(rewrite_loopback_urls=True)
+
+        rewritten, metadata = _rewrite_loopback_url_for_camofox("http://127.0.0.1:8766/#settings")
+
+        assert rewritten == "http://host.docker.internal:8766/#settings"
+        assert metadata == {
+            "from": "127.0.0.1",
+            "to": "host.docker.internal",
+            "original_url": "http://127.0.0.1:8766/#settings",
+            "rewritten_url": "http://host.docker.internal:8766/#settings",
+        }
+
+    @patch("tools.browser_camofox.load_config")
+    def test_rewrite_is_opt_in(self, mock_config, monkeypatch):
+        monkeypatch.delenv("CAMOFOX_REWRITE_LOOPBACK_URLS", raising=False)
+        mock_config.return_value = _config_with_camofox(rewrite_loopback_urls=False)
+
+        rewritten, metadata = _rewrite_loopback_url_for_camofox("http://localhost:3000/app?x=1")
+
+        assert rewritten == "http://localhost:3000/app?x=1"
+        assert metadata is None
+
+    @patch("tools.browser_camofox.load_config")
+    def test_preserves_public_urls_when_enabled(self, mock_config, monkeypatch):
+        monkeypatch.delenv("CAMOFOX_REWRITE_LOOPBACK_URLS", raising=False)
+        mock_config.return_value = _config_with_camofox(rewrite_loopback_urls=True)
+
+        rewritten, metadata = _rewrite_loopback_url_for_camofox("https://example.com:8443/path?q=1#top")
+
+        assert rewritten == "https://example.com:8443/path?q=1#top"
+        assert metadata is None
+
+    @patch("tools.browser_camofox.load_config")
+    def test_env_alias_takes_precedence(self, mock_config, monkeypatch):
+        monkeypatch.setenv("CAMOFOX_REWRITE_LOOPBACK_URLS", "true")
+        monkeypatch.setenv("CAMOFOX_LOOPBACK_HOST_ALIAS", "192.168.1.10")
+        mock_config.return_value = _config_with_camofox(
+            rewrite_loopback_urls=False,
+            loopback_host_alias="host.docker.internal",
+        )
+
+        rewritten, metadata = _rewrite_loopback_url_for_camofox("http://[::1]:8080/path")
+
+        assert rewritten == "http://192.168.1.10:8080/path"
+        assert metadata is not None
+        assert metadata["from"] == "::1"
+        assert metadata["to"] == "192.168.1.10"
+
+
 class TestCamofoxNavigate:
     @patch("tools.browser_camofox.requests.post")
     def test_creates_tab_on_first_navigate(self, mock_post, monkeypatch):
@@ -83,6 +140,24 @@ class TestCamofoxNavigate:
         assert result["success"] is True
         assert result["url"] == "https://example.com"
 
+    @patch("tools.browser_camofox.load_config")
+    @patch("tools.browser_camofox.requests.post")
+    def test_navigate_uses_rewritten_loopback_url(self, mock_post, mock_config, monkeypatch):
+        monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
+        monkeypatch.delenv("CAMOFOX_REWRITE_LOOPBACK_URLS", raising=False)
+        monkeypatch.delenv("CAMOFOX_LOOPBACK_HOST_ALIAS", raising=False)
+        mock_config.return_value = _config_with_camofox(rewrite_loopback_urls=True)
+        mock_post.return_value = _mock_response(json_data={"tabId": "tab_rewrite"})
+
+        result = json.loads(camofox_navigate("http://127.0.0.1:8766/#settings", task_id="t_rewrite"))
+
+        assert result["success"] is True
+        assert result["url"] == "http://host.docker.internal:8766/#settings"
+        assert result["requested_url"] == "http://127.0.0.1:8766/#settings"
+        assert result["url_rewrite"]["to"] == "host.docker.internal"
+        assert "Rewrote loopback URL" in result["warning"]
+        assert mock_post.call_args.kwargs["json"]["url"] == "http://host.docker.internal:8766/#settings"
+
     @patch("tools.browser_camofox.requests.post")
     def test_navigates_existing_tab(self, mock_post, monkeypatch):
         monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
diff --git a/tests/tools/test_browser_camofox_state.py b/tests/tools/test_browser_camofox_state.py
index f0e632ad5f6..153bb865874 100644
--- a/tests/tools/test_browser_camofox_state.py
+++ b/tests/tools/test_browser_camofox_state.py
@@ -2,7 +2,6 @@
 
 from unittest.mock import patch
 
-import pytest
 
 
 def _load_module():
diff --git a/tests/tools/test_browser_chromium_check.py b/tests/tools/test_browser_chromium_check.py
index 760dfa5d230..33df88735d5 100644
--- a/tests/tools/test_browser_chromium_check.py
+++ b/tests/tools/test_browser_chromium_check.py
@@ -7,7 +7,6 @@ for the full command timeout before surfacing a useless error.
 """
 
 import os
-from pathlib import Path
 
 import pytest
 
diff --git a/tests/tools/test_browser_cloud_fallback.py b/tests/tools/test_browser_cloud_fallback.py
index e4f8afd39c9..2759275b61e 100644
--- a/tests/tools/test_browser_cloud_fallback.py
+++ b/tests/tools/test_browser_cloud_fallback.py
@@ -4,7 +4,7 @@ Covers the fallback logic in _get_session_info() when a cloud provider
 is configured but fails at runtime (issue #10883).
 """
 import logging
-from unittest.mock import Mock, patch
+from unittest.mock import Mock
 
 import pytest
 
diff --git a/tests/tools/test_browser_console.py b/tests/tools/test_browser_console.py
index b058fb3f365..6b49087a696 100644
--- a/tests/tools/test_browser_console.py
+++ b/tests/tools/test_browser_console.py
@@ -3,7 +3,6 @@
 import json
 import os
 import sys
-from pathlib import Path
 from unittest.mock import patch, MagicMock
 
 import pytest
@@ -251,6 +250,83 @@ class TestBrowserVisionConfig:
         assert mock_llm.call_args.kwargs["temperature"] == 0.1
         assert mock_llm.call_args.kwargs["timeout"] == 120.0
 
+    def test_browser_vision_native_fast_path_returns_multimodal(self, tmp_path):
+        """supports_vision override → screenshot attached natively, no aux call."""
+        from agent.auxiliary_client import clear_runtime_main, set_runtime_main
+        from tools.browser_tool import browser_vision
+
+        shots_dir, screenshot = self._setup_screenshot(tmp_path)
+        annotations = [{"id": 1, "label": "Search box"}]
+        set_runtime_main("brand-new-provider", "llava-v1.6")
+        try:
+            with (
+                patch("hermes_constants.get_hermes_dir", return_value=shots_dir),
+                patch("tools.browser_tool._cleanup_old_screenshots"),
+                patch(
+                    "tools.browser_tool._run_browser_command",
+                    return_value={
+                        "success": True,
+                        "data": {"path": str(screenshot), "annotations": annotations},
+                    },
+                ),
+                patch(
+                    "hermes_cli.config.load_config",
+                    return_value={"model": {"supports_vision": True}},
+                ),
+                patch("tools.browser_tool._get_vision_model") as mock_get_vision_model,
+                patch("tools.browser_tool.call_llm") as mock_llm,
+            ):
+                result = browser_vision("what is on the page?", annotate=True, task_id="test")
+        finally:
+            clear_runtime_main()
+
+        assert isinstance(result, dict)
+        assert result["_multimodal"] is True
+        assert result["meta"]["screenshot_path"] == str(screenshot)
+        assert result["meta"]["annotations"] == annotations
+        assert any(p.get("type") == "image_url" for p in result["content"])
+        assert f"Screenshot path: {screenshot}" in result["text_summary"]
+        mock_get_vision_model.assert_not_called()
+        mock_llm.assert_not_called()
+
+    def test_browser_vision_text_mode_blocks_native_fast_path(self, tmp_path):
+        """Explicit text routing → aux LLM used even with supports_vision."""
+        from agent.auxiliary_client import clear_runtime_main, set_runtime_main
+        from tools.browser_tool import browser_vision
+
+        shots_dir, screenshot = self._setup_screenshot(tmp_path)
+        mock_response = MagicMock()
+        mock_choice = MagicMock()
+        mock_choice.message.content = "Text-mode screenshot analysis"
+        mock_response.choices = [mock_choice]
+
+        set_runtime_main("brand-new-provider", "llava-v1.6")
+        try:
+            with (
+                patch("hermes_constants.get_hermes_dir", return_value=shots_dir),
+                patch("tools.browser_tool._cleanup_old_screenshots"),
+                patch(
+                    "tools.browser_tool._run_browser_command",
+                    return_value={"success": True, "data": {"path": str(screenshot)}},
+                ),
+                patch(
+                    "hermes_cli.config.load_config",
+                    return_value={
+                        "agent": {"image_input_mode": "text"},
+                        "model": {"supports_vision": True},
+                    },
+                ),
+                patch("tools.browser_tool._get_vision_model", return_value="test-model"),
+                patch("tools.browser_tool.call_llm", return_value=mock_response) as mock_llm,
+            ):
+                result = json.loads(browser_vision("what is on the page?", task_id="test"))
+        finally:
+            clear_runtime_main()
+
+        assert result["success"] is True
+        assert result["analysis"] == "Text-mode screenshot analysis"
+        mock_llm.assert_called_once()
+
 
 # ── auto-recording config ────────────────────────────────────────────
 
diff --git a/tests/tools/test_browser_content_none_guard.py b/tests/tools/test_browser_content_none_guard.py
index 6952bb938cc..c1e8984822e 100644
--- a/tests/tools/test_browser_content_none_guard.py
+++ b/tests/tools/test_browser_content_none_guard.py
@@ -9,9 +9,8 @@ These tests verify both sites are guarded.
 """
 
 import types
-from unittest.mock import MagicMock, patch
+from unittest.mock import patch
 
-import pytest
 
 
 # ── helpers ────────────────────────────────────────────────────────────────
diff --git a/tests/tools/test_browser_eval_supervisor_path.py b/tests/tools/test_browser_eval_supervisor_path.py
index 8528b099489..09a3bcbcaef 100644
--- a/tests/tools/test_browser_eval_supervisor_path.py
+++ b/tests/tools/test_browser_eval_supervisor_path.py
@@ -8,7 +8,7 @@ real browser, no real WebSocket.  Real-CDP coverage lives in
 from __future__ import annotations
 
 import json
-from unittest.mock import MagicMock, patch
+from unittest.mock import MagicMock
 
 import pytest
 
diff --git a/tests/tools/test_browser_hardening.py b/tests/tools/test_browser_hardening.py
index 374f7af614a..657edad2a20 100644
--- a/tests/tools/test_browser_hardening.py
+++ b/tests/tools/test_browser_hardening.py
@@ -1,7 +1,6 @@
 """Tests for browser_tool.py hardening: caching, security, thread safety, truncation."""
 
 import inspect
-import os
 from unittest.mock import MagicMock, patch
 
 import pytest
diff --git a/tests/tools/test_browser_homebrew_paths.py b/tests/tools/test_browser_homebrew_paths.py
index 7edf6f6c67d..16b7f5607ba 100644
--- a/tests/tools/test_browser_homebrew_paths.py
+++ b/tests/tools/test_browser_homebrew_paths.py
@@ -2,7 +2,6 @@
 
 import json
 import os
-import subprocess
 from pathlib import Path
 from unittest.mock import patch, MagicMock, mock_open
 
diff --git a/tests/tools/test_browser_orphan_reaper.py b/tests/tools/test_browser_orphan_reaper.py
index edd8bda6c2d..3f2be1ace00 100644
--- a/tests/tools/test_browser_orphan_reaper.py
+++ b/tests/tools/test_browser_orphan_reaper.py
@@ -2,10 +2,7 @@
 daemons whose Python parent exited without cleaning up."""
 
 import os
-import signal
-import textwrap
-from pathlib import Path
-from unittest.mock import patch, MagicMock
+from unittest.mock import patch
 
 import pytest
 
diff --git a/tests/tools/test_browser_supervisor.py b/tests/tools/test_browser_supervisor.py
index 179a94506ed..9274145e814 100644
--- a/tests/tools/test_browser_supervisor.py
+++ b/tests/tools/test_browser_supervisor.py
@@ -17,7 +17,6 @@ from __future__ import annotations
 import asyncio
 import base64
 import json
-import os
 import shutil
 import subprocess
 import tempfile
@@ -48,7 +47,6 @@ def chrome_cdp(request):
     Always launches with ``--site-per-process`` so cross-origin iframes
     become real OOPIFs (needed by the iframe interaction tests).
     """
-    import socket
 
     # xdist worker_id is "master" in single-process mode or "gw0".."gwN" otherwise.
     # Under subprocess-per-file isolation there's no xdist, so we fall back
@@ -89,18 +87,45 @@ def chrome_cdp(request):
         except Exception:
             time.sleep(0.25)
     if ws_url is None:
-        proc.terminate()
-        proc.wait(timeout=5)
+        try:
+            proc.terminate()
+            proc.wait(timeout=5)
+        except (subprocess.TimeoutExpired, AssertionError, Exception):
+            try:
+                proc.kill()
+            except Exception:
+                pass
+            try:
+                proc.wait(timeout=2)
+            except (AssertionError, Exception):
+                pass
         shutil.rmtree(profile, ignore_errors=True)
         pytest.skip("Chrome didn't expose CDP in time")
 
     yield ws_url, port
 
-    proc.terminate()
+    # Tear down Chrome. The stdlib `subprocess._wait()` POSIX implementation
+    # has a known race (https://bugs.python.org/issue38630): when SIGCHLD
+    # arrives concurrently with `proc.wait()`, `_try_wait(WNOHANG)` can
+    # return a foreign pid and the `assert pid == self.pid or pid == 0`
+    # fires. We saw this in CI on slice 1 after this fixture's teardown
+    # (PR #33661 follow-up). Swallow the stdlib race + force-kill if wait
+    # hangs, then always reap so we don't leak a zombie.
+    try:
+        proc.terminate()
+    except Exception:
+        pass
     try:
         proc.wait(timeout=3)
-    except Exception:
-        proc.kill()
+    except (subprocess.TimeoutExpired, AssertionError, Exception):
+        try:
+            proc.kill()
+        except Exception:
+            pass
+        try:
+            proc.wait(timeout=2)
+        except (AssertionError, Exception):
+            pass
     shutil.rmtree(profile, ignore_errors=True)
 
 
diff --git a/tests/tools/test_checkpoint_manager.py b/tests/tools/test_checkpoint_manager.py
index 84955f224de..5c6db10c011 100644
--- a/tests/tools/test_checkpoint_manager.py
+++ b/tests/tools/test_checkpoint_manager.py
@@ -23,8 +23,6 @@ from tools.checkpoint_manager import (
     _project_meta_path,
     _touch_project,
     format_checkpoint_list,
-    DEFAULT_EXCLUDES,
-    CHECKPOINT_BASE,
     prune_checkpoints,
     maybe_auto_prune_checkpoints,
     store_status,
diff --git a/tests/tools/test_clarify_gateway.py b/tests/tools/test_clarify_gateway.py
index 86385be3571..8356d7904ac 100644
--- a/tests/tools/test_clarify_gateway.py
+++ b/tests/tools/test_clarify_gateway.py
@@ -12,7 +12,6 @@ import threading
 import time
 from concurrent.futures import ThreadPoolExecutor
 
-import pytest
 
 
 def _clear_clarify_state():
diff --git a/tests/tools/test_clarify_tool.py b/tests/tools/test_clarify_tool.py
index bcdc41929f1..8659e1f13af 100644
--- a/tests/tools/test_clarify_tool.py
+++ b/tests/tools/test_clarify_tool.py
@@ -3,7 +3,6 @@
 import json
 from typing import List, Optional
 
-import pytest
 
 from tools.clarify_tool import (
     clarify_tool,
diff --git a/tests/tools/test_clipboard.py b/tests/tools/test_clipboard.py
index 750874400c4..4a3b31ee56a 100644
--- a/tests/tools/test_clipboard.py
+++ b/tests/tools/test_clipboard.py
@@ -13,7 +13,7 @@ import queue
 import subprocess
 import sys
 from pathlib import Path
-from unittest.mock import patch, MagicMock, PropertyMock, mock_open
+from unittest.mock import patch, MagicMock, mock_open
 
 import pytest
 
diff --git a/tests/tools/test_code_execution.py b/tests/tools/test_code_execution.py
index 2d08265fb7b..bcb46136b7a 100644
--- a/tests/tools/test_code_execution.py
+++ b/tests/tools/test_code_execution.py
@@ -31,7 +31,6 @@ def _force_local_terminal(monkeypatch):
     """
     monkeypatch.setenv("TERMINAL_ENV", "local")
 import sys
-import time
 import threading
 import unittest
 from unittest.mock import patch, MagicMock
diff --git a/tests/tools/test_code_execution_windows_env.py b/tests/tools/test_code_execution_windows_env.py
index 70508818fc1..495eff1536b 100644
--- a/tests/tools/test_code_execution_windows_env.py
+++ b/tests/tools/test_code_execution_windows_env.py
@@ -21,16 +21,13 @@ bytes.  The child then fails to import with a SyntaxError:
 """
 
 import os
-import socket
 import subprocess
 import sys
 import textwrap
-import unittest.mock as mock
 
 import pytest
 
 from tools.code_execution_tool import (
-    _SAFE_ENV_PREFIXES,
     _SECRET_SUBSTRINGS,
     _WINDOWS_ESSENTIAL_ENV_VARS,
     _scrub_child_env,
@@ -256,20 +253,24 @@ class TestWindowsSocketSmokeTest:
 # ---------------------------------------------------------------------------
 
 def _legacy_posix_scrubber(source_env, is_passthrough):
-    """Verbatim copy of the pre-Windows-fix inline scrubbing logic.
+    """Independent oracle for TestPosixEquivalence — a from-scratch reimpl of
+    _scrub_child_env's POSIX behavior, used to prove the production helper does
+    what we think it does.
 
-    This is the oracle used by TestPosixEquivalence to prove the refactor
-    did not change POSIX behavior.  DO NOT edit this to "match" a future
-    production change — if _scrub_child_env's POSIX behavior legitimately
-    needs to evolve, delete this function and adjust the equivalence test
-    on purpose, so the churn is visible in review.
+    Deliberately updated for #27303 (the broad ``HERMES_`` prefix was dropped
+    in favor of an explicit operational allowlist, and DSN/WEBHOOK were added
+    to the secret substrings).  The original docstring said: if POSIX behavior
+    legitimately needs to evolve, adjust this oracle on purpose so the churn is
+    visible in review — that is what this change is.
     """
     _SAFE_ENV_PREFIXES = ("PATH", "HOME", "USER", "LANG", "LC_", "TERM",
                           "TMPDIR", "TMP", "TEMP", "SHELL", "LOGNAME",
-                          "XDG_", "PYTHONPATH", "VIRTUAL_ENV", "CONDA",
-                          "HERMES_")
+                          "XDG_", "PYTHONPATH", "VIRTUAL_ENV", "CONDA")
     _SECRET_SUBSTRINGS = ("KEY", "TOKEN", "SECRET", "PASSWORD", "CREDENTIAL",
-                          "PASSWD", "AUTH")
+                          "PASSWD", "AUTH", "DSN", "WEBHOOK")
+    _HERMES_CHILD_ALLOWED = frozenset({
+        "HERMES_HOME", "HERMES_PROFILE", "HERMES_CONFIG", "HERMES_ENV",
+    })
     out = {}
     for k, v in source_env.items():
         if is_passthrough(k):
@@ -279,6 +280,9 @@ def _legacy_posix_scrubber(source_env, is_passthrough):
             continue
         if any(k.startswith(p) for p in _SAFE_ENV_PREFIXES):
             out[k] = v
+            continue
+        if k in _HERMES_CHILD_ALLOWED:
+            out[k] = v
     return out
 
 
@@ -311,13 +315,20 @@ class TestPosixEquivalence:
         "PYTHONPATH": "/opt/lib",
         "VIRTUAL_ENV": "/home/alice/.venv",
         "CONDA_PREFIX": "/opt/conda",
-        "HERMES_HOME": "/home/alice/.hermes",
-        "HERMES_INTERACTIVE": "1",
+        # HERMES_* handling (#27303): only the operational allowlist passes;
+        # every other HERMES_* is dropped (the broad prefix was removed).
+        "HERMES_HOME": "/home/alice/.hermes",        # allowlisted → kept
+        "HERMES_PROFILE": "default",                 # allowlisted → kept
+        "HERMES_INTERACTIVE": "1",                   # not allowlisted → dropped
+        "HERMES_BASE_URL": "https://api.internal",   # not allowlisted → dropped
+        "HERMES_KANBAN_DB": "postgres://u:p@h/db",   # not allowlisted → dropped
         # Secret-substring blocks
         "OPENAI_API_KEY": "sk-xxx",
         "GITHUB_TOKEN": "ghp_xxx",
         "AWS_SECRET_ACCESS_KEY": "yyy",
         "MY_PASSWORD": "hunter2",
+        "SENTRY_DSN": "https://abc@sentry.io/1",     # DSN substring → blocked
+        "SLACK_WEBHOOK": "https://hooks.slack/x",    # WEBHOOK substring → blocked
         # Uncategorized — must be dropped
         "RANDOM_UNKNOWN": "drop-me",
         "DISPLAY": ":0",
diff --git a/tests/tools/test_computer_use.py b/tests/tools/test_computer_use.py
index 44a97db47ac..c60a5426f9c 100644
--- a/tests/tools/test_computer_use.py
+++ b/tests/tools/test_computer_use.py
@@ -5,7 +5,7 @@ from __future__ import annotations
 import json
 import os
 import sys
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, List
 from unittest.mock import MagicMock, patch
 
 import pytest
diff --git a/tests/tools/test_computer_use_capture_routing.py b/tests/tools/test_computer_use_capture_routing.py
index 44084fabbea..3eeb983ecea 100644
--- a/tests/tools/test_computer_use_capture_routing.py
+++ b/tests/tools/test_computer_use_capture_routing.py
@@ -24,8 +24,6 @@ from __future__ import annotations
 import base64
 import json
 import os
-from pathlib import Path
-from typing import Any
 from unittest.mock import MagicMock, patch
 
 import pytest
diff --git a/tests/tools/test_config_null_guard.py b/tests/tools/test_config_null_guard.py
index a6ab64009ce..cb80ab8ecf5 100644
--- a/tests/tools/test_config_null_guard.py
+++ b/tests/tools/test_config_null_guard.py
@@ -6,7 +6,6 @@ return ``None`` instead of the default — calling ``.lower()`` on that raises
 """
 
 from unittest.mock import patch
-import pytest
 
 
 # ── TTS tool ──────────────────────────────────────────────────────────────
diff --git a/tests/tools/test_credential_files.py b/tests/tools/test_credential_files.py
index e0ec46a8563..32b4c7664df 100644
--- a/tests/tools/test_credential_files.py
+++ b/tests/tools/test_credential_files.py
@@ -1,6 +1,5 @@
 """Tests for credential file passthrough and skills directory mounting."""
 
-import json
 import os
 from pathlib import Path
 from unittest.mock import patch
diff --git a/tests/tools/test_cron_approval_mode.py b/tests/tools/test_cron_approval_mode.py
index 8aae20659a6..007c777e267 100644
--- a/tests/tools/test_cron_approval_mode.py
+++ b/tests/tools/test_cron_approval_mode.py
@@ -1,6 +1,5 @@
 """Tests for approvals.cron_mode — configurable approval behavior for cron jobs."""
 
-import os
 import pytest
 
 import tools.approval as approval_module
diff --git a/tests/tools/test_cronjob_tools.py b/tests/tools/test_cronjob_tools.py
index 22d4525af19..18c13b18998 100644
--- a/tests/tools/test_cronjob_tools.py
+++ b/tests/tools/test_cronjob_tools.py
@@ -2,7 +2,6 @@
 
 import json
 import pytest
-from pathlib import Path
 
 from tools.cronjob_tools import (
     _scan_cron_prompt,
diff --git a/tests/tools/test_cross_profile_guard.py b/tests/tools/test_cross_profile_guard.py
index 20814fea1ff..9ea1dd68fd0 100644
--- a/tests/tools/test_cross_profile_guard.py
+++ b/tests/tools/test_cross_profile_guard.py
@@ -13,7 +13,6 @@ This file tests that the tool surfaces:
 from __future__ import annotations
 
 import json
-import os
 from pathlib import Path
 
 import pytest
diff --git a/tests/tools/test_daytona_environment.py b/tests/tools/test_daytona_environment.py
index 229a4e20e5c..6f50bb7eb37 100644
--- a/tests/tools/test_daytona_environment.py
+++ b/tests/tools/test_daytona_environment.py
@@ -2,7 +2,7 @@
 
 import threading
 from types import SimpleNamespace
-from unittest.mock import MagicMock, patch, PropertyMock
+from unittest.mock import MagicMock
 
 import pytest
 
diff --git a/tests/tools/test_delegate.py b/tests/tools/test_delegate.py
index 72c4c67f570..3efe21389c5 100644
--- a/tests/tools/test_delegate.py
+++ b/tests/tools/test_delegate.py
@@ -11,7 +11,6 @@ Run with:  python -m pytest tests/test_delegate.py -v
 
 import json
 import os
-import sys
 import threading
 import time
 import unittest
diff --git a/tests/tools/test_delegate_composite_toolsets.py b/tests/tools/test_delegate_composite_toolsets.py
index 85460239949..2c310702f14 100644
--- a/tests/tools/test_delegate_composite_toolsets.py
+++ b/tests/tools/test_delegate_composite_toolsets.py
@@ -1,7 +1,6 @@
 """Tests for composite toolset expansion in delegate_task intersection."""
 
 import unittest
-from unittest.mock import patch
 
 from tools.delegate_tool import _expand_parent_toolsets
 
diff --git a/tests/tools/test_delegate_subagent_timeout_diagnostic.py b/tests/tools/test_delegate_subagent_timeout_diagnostic.py
index 9bb49125a11..ec596f963a6 100644
--- a/tests/tools/test_delegate_subagent_timeout_diagnostic.py
+++ b/tests/tools/test_delegate_subagent_timeout_diagnostic.py
@@ -16,12 +16,10 @@ These tests pin:
 """
 from __future__ import annotations
 
-import os
 import threading
 import time
 from pathlib import Path
-from typing import Optional
-from unittest.mock import MagicMock, patch
+from unittest.mock import MagicMock
 
 import pytest
 
diff --git a/tests/tools/test_delegate_toolset_scope.py b/tests/tools/test_delegate_toolset_scope.py
index d853dbb042c..175cd8f6485 100644
--- a/tests/tools/test_delegate_toolset_scope.py
+++ b/tests/tools/test_delegate_toolset_scope.py
@@ -6,7 +6,6 @@ parent's enabled_toolsets, it can escalate privileges by requesting
 arbitrary toolsets.
 """
 
-from unittest.mock import MagicMock, patch
 from types import SimpleNamespace
 
 from tools.delegate_tool import _strip_blocked_tools
diff --git a/tests/tools/test_discord_tool.py b/tests/tools/test_discord_tool.py
index 7aae982f732..ac94ce5e751 100644
--- a/tests/tools/test_discord_tool.py
+++ b/tests/tools/test_discord_tool.py
@@ -1,7 +1,6 @@
 """Tests for the Discord server introspection and management tool."""
 
 import json
-import os
 import urllib.error
 from io import BytesIO
 from unittest.mock import MagicMock, patch
@@ -14,7 +13,6 @@ from tools.discord_tool import (
     _ADMIN_ACTIONS,
     _CORE_ACTIONS,
     _available_actions,
-    _build_schema,
     _channel_type_name,
     _detect_capabilities,
     _discord_request,
diff --git a/tests/tools/test_docker_environment.py b/tests/tools/test_docker_environment.py
index 439d59bd76c..688e0f5c85c 100644
--- a/tests/tools/test_docker_environment.py
+++ b/tests/tools/test_docker_environment.py
@@ -1,8 +1,6 @@
 import logging
 from io import StringIO
 import subprocess
-import sys
-import types
 
 import pytest
 
@@ -203,25 +201,43 @@ def test_auto_mount_replaces_persistent_workspace_bind(monkeypatch, tmp_path):
 
 
 def test_non_persistent_cleanup_removes_container(monkeypatch):
-    """When persistent=false, cleanup() must schedule docker stop + rm."""
+    """When persist_across_processes=false, cleanup() must docker stop AND
+    docker rm so containers don't leak across hermes processes.
+
+    Updated for issue #20561: the previous implementation used fire-and-forget
+    ``subprocess.Popen("... &", shell=True)`` which raced with parent exit;
+    the new implementation uses ``subprocess.run`` on a daemon thread with
+    bounded timeouts. See test_cleanup_with_persist_disabled_stops_and_rms
+    for the full behavior contract.
+    """
     monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
-    calls = _mock_subprocess_run(monkeypatch)
+    monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
+    _mock_subprocess_run(monkeypatch)
+    # Run the worker thread synchronously so assertions can observe its work.
+    import threading
+    monkeypatch.setattr(threading, "Thread", _FakeThread)
 
-    popen_cmds = []
-    monkeypatch.setattr(
-        docker_env.subprocess, "Popen",
-        lambda cmd, **kw: (popen_cmds.append(cmd), type("P", (), {"poll": lambda s: 0, "wait": lambda s, **k: None, "returncode": 0, "stdout": iter([]), "stdin": None})())[1],
+    env = docker_env.DockerEnvironment(
+        image="python:3.11", cwd="/root", timeout=60,
+        task_id="ephemeral-task", persistent_filesystem=False,
+        persist_across_processes=False,
     )
-
-    env = _make_dummy_env(persistent_filesystem=False, task_id="ephemeral-task")
-    assert env._container_id
     container_id = env._container_id
+    assert container_id
 
+    # Capture cleanup-time docker calls (everything before this was init).
+    cleanup_calls = []
+    real_run = docker_env.subprocess.run
+
+    def _capture(cmd, **kw):
+        cleanup_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kw))
+        return real_run(cmd, **kw)
+
+    monkeypatch.setattr(docker_env.subprocess, "run", _capture)
     env.cleanup()
 
-    # Should have stop and rm calls via Popen
-    stop_cmds = [c for c in popen_cmds if container_id in str(c) and "stop" in str(c)]
-    assert len(stop_cmds) >= 1, f"cleanup() should schedule docker stop for {container_id}"
+    stops = [c for c in cleanup_calls if isinstance(c[0], list) and c[0][1:2] == ["stop"]]
+    assert stops, f"cleanup() should docker stop {container_id}; got {cleanup_calls}"
 
 
 class _FakePopen:
@@ -514,3 +530,956 @@ def test_run_as_host_user_warns_and_skips_when_no_posix_ids(monkeypatch, caplog)
         "does not expose POSIX uid/gid" in rec.getMessage()
         for rec in caplog.records
     ), "expected a warning when POSIX ids are unavailable"
+
+
+# ── Docker labels (issue #20561) ──────────────────────────────────
+
+
+def _run_args_from_calls(calls):
+    """Pull the argv list passed to the first ``docker run`` invocation."""
+    run_calls = [
+        c for c in calls
+        if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"
+    ]
+    assert run_calls, "docker run should have been called"
+    return run_calls[0][0]
+
+
+def _labels_in_run_args(run_args):
+    """Return the set of ``key=value`` strings passed via ``--label``."""
+    return {
+        run_args[i + 1]
+        for i, flag in enumerate(run_args[:-1])
+        if flag == "--label"
+    }
+
+
+def test_run_command_tags_hermes_agent_label(monkeypatch):
+    """Every container hermes-agent starts must carry the hermes-agent=1 label
+    so the orphan reaper (and external operators) can identify them with a
+    single ``docker ps --filter label=hermes-agent=1`` call. Regression test
+    for issue #20561 — without the label there is no global sweep target."""
+    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
+    calls = _mock_subprocess_run(monkeypatch)
+
+    _make_dummy_env(task_id="my-task")
+
+    labels = _labels_in_run_args(_run_args_from_calls(calls))
+    assert "hermes-agent=1" in labels, (
+        f"hermes-agent=1 label missing; got labels: {sorted(labels)}"
+    )
+
+
+def test_run_command_tags_task_and_profile_labels(monkeypatch):
+    """task_id and the active profile name are surfaced as labels so future
+    cross-process reuse logic can filter to a specific (task, profile) pair
+    without parsing container names. Profile resolution uses the helper that
+    returns ``"default"`` for the root Hermes home."""
+    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
+    monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "research-bot")
+    calls = _mock_subprocess_run(monkeypatch)
+
+    _make_dummy_env(task_id="kanban-42")
+
+    labels = _labels_in_run_args(_run_args_from_calls(calls))
+    assert "hermes-task-id=kanban-42" in labels, (
+        f"hermes-task-id=kanban-42 missing; got: {sorted(labels)}"
+    )
+    assert "hermes-profile=research-bot" in labels, (
+        f"hermes-profile=research-bot missing; got: {sorted(labels)}"
+    )
+
+
+def test_label_sanitizer_rejects_invalid_characters():
+    """Docker label values must be alnum + ``_.-`` and ≤63 chars. Profile or
+    task names containing slashes, colons, or unicode would otherwise emit
+    invalid labels that round-trip badly through ``docker ps --filter``."""
+    assert docker_env._sanitize_label_value("plain-name_1.0") == "plain-name_1.0"
+    assert docker_env._sanitize_label_value("with/slash") == "with_slash"
+    assert docker_env._sanitize_label_value("with:colon") == "with_colon"
+    assert docker_env._sanitize_label_value("emoji-😀-here") == "emoji-_-here"
+    # Empty / non-string inputs must collapse to a queryable token, not "".
+    assert docker_env._sanitize_label_value("") == "unknown"
+    assert docker_env._sanitize_label_value(None) == "unknown"  # type: ignore[arg-type]
+    # >63 chars must truncate, not error.
+    long_value = "x" * 100
+    assert len(docker_env._sanitize_label_value(long_value)) == 63
+
+
+def test_run_command_sanitizes_unsafe_task_id(monkeypatch):
+    """A task_id containing characters Docker rejects in label values must be
+    sanitized before reaching ``docker run --label``; otherwise the daemon
+    refuses the run with an inscrutable error and the agent's first command
+    blows up."""
+    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
+    calls = _mock_subprocess_run(monkeypatch)
+
+    _make_dummy_env(task_id="task/with:weird*chars")
+
+    labels = _labels_in_run_args(_run_args_from_calls(calls))
+    # Each non-OK character becomes an underscore; the safe chars survive.
+    assert "hermes-task-id=task_with_weird_chars" in labels, (
+        f"sanitized task-id label missing; got: {sorted(labels)}"
+    )
+
+
+def test_labels_attribute_populated_after_init(monkeypatch):
+    """``self._labels`` must be set to the same key/value pairs that went onto
+    docker run, so subsequent reuse / reaper paths can match without re-running
+    the sanitizer or re-importing the profile module."""
+    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
+    monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
+    _mock_subprocess_run(monkeypatch)
+
+    env = _make_dummy_env(task_id="abc")
+
+    assert env._labels == {
+        "hermes-agent": "1",
+        "hermes-task-id": "abc",
+        "hermes-profile": "default",
+    }
+
+
+# ── Cross-process container reuse (issue #20561) ──────────────────
+
+
+def _mock_subprocess_run_with_reuse(monkeypatch, ps_state: str | None,
+                                     start_succeeds: bool = True):
+    """Reuse-aware subprocess.run mock.
+
+    ``ps_state`` controls what ``docker ps -a --filter ...`` returns:
+      * ``None`` → no match (empty stdout). Forces a fresh ``docker run``.
+      * ``"running"`` / ``"exited"`` / ... → emit ``CID\\tSTATE`` so the reuse
+        path picks it up. ``"running"`` skips ``docker start``; other states
+        trigger ``docker start`` (which can be forced to fail via
+        ``start_succeeds=False``).
+
+    Returns the captured call list so the test can verify which docker
+    commands actually ran.
+    """
+    calls = []
+
+    def _run(cmd, **kwargs):
+        calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
+        if isinstance(cmd, list) and len(cmd) >= 2:
+            sub = cmd[1]
+            if sub == "version":
+                return subprocess.CompletedProcess(cmd, 0, stdout="Docker version", stderr="")
+            if sub == "ps":
+                if ps_state is None:
+                    return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
+                return subprocess.CompletedProcess(
+                    cmd, 0, stdout=f"reused-cid\t{ps_state}\n", stderr="",
+                )
+            if sub == "start":
+                if not start_succeeds:
+                    # Real subprocess.run with check=True raises on non-zero exit;
+                    # mirror that so the production code's except clause fires.
+                    raise subprocess.CalledProcessError(1, cmd, output="", stderr="no such container")
+                return subprocess.CompletedProcess(cmd, 0, stdout="reused-cid\n", stderr="")
+            if sub == "run":
+                return subprocess.CompletedProcess(cmd, 0, stdout="fresh-cid\n", stderr="")
+        return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
+
+    monkeypatch.setattr(docker_env.subprocess, "run", _run)
+    return calls
+
+
+def test_reuse_attaches_to_running_container_without_docker_run(monkeypatch):
+    """When a labeled container is already ``running``, the reuse probe
+    must pick it up and skip ``docker run`` entirely. Regression for the
+    issue #20561 root cause: every Hermes process spawning a new container
+    despite docs claiming "ONE long-lived container shared across sessions"."""
+    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
+    monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
+    calls = _mock_subprocess_run_with_reuse(monkeypatch, ps_state="running")
+
+    env = _make_dummy_env(task_id="reuse-test")
+
+    # The reuse path must populate _container_id from the ps probe output.
+    assert env._container_id == "reused-cid", (
+        f"expected reused container id, got {env._container_id!r}"
+    )
+    # And it must NOT have run `docker run`.
+    run_invocations = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
+    assert not run_invocations, (
+        f"docker run should be skipped on reuse, got: {run_invocations}"
+    )
+    # And it must have NOT issued a `docker start` for an already-running container.
+    start_invocations = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "start"]
+    assert not start_invocations, (
+        f"docker start should be skipped when container already running, got: {start_invocations}"
+    )
+
+
+def test_reuse_starts_stopped_container_before_attaching(monkeypatch):
+    """A labeled container in ``exited`` state must be restarted via
+    ``docker start`` before the new Hermes process uses it. Without this
+    step, ``docker exec`` against a stopped container errors out and the
+    first agent command fails opaquely."""
+    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
+    monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
+    calls = _mock_subprocess_run_with_reuse(monkeypatch, ps_state="exited")
+
+    env = _make_dummy_env(task_id="reuse-stopped")
+
+    assert env._container_id == "reused-cid"
+    start_invocations = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "start"]
+    assert start_invocations, "expected docker start for exited container"
+    run_invocations = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
+    assert not run_invocations, "should not docker run when reusing an exited container"
+
+
+def test_reuse_falls_back_to_fresh_run_when_start_fails(monkeypatch):
+    """If ``docker start`` on the matched container fails (container was
+    removed between probe and start, daemon paused, etc.), the code must
+    silently fall through to a fresh ``docker run`` rather than leaving the
+    user with a broken environment. Defensive recovery — the probe is best-
+    effort, not authoritative."""
+    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
+    monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
+    calls = _mock_subprocess_run_with_reuse(
+        monkeypatch, ps_state="exited", start_succeeds=False,
+    )
+
+    env = _make_dummy_env(task_id="reuse-broken-start")
+
+    # docker start should be attempted then fail; code falls through to run.
+    assert env._container_id == "fresh-cid", (
+        f"expected fresh container id after fallback, got {env._container_id!r}"
+    )
+    run_invocations = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
+    assert run_invocations, "fallback to fresh docker run must happen on start failure"
+
+
+def test_no_reuse_when_persist_across_processes_disabled(monkeypatch):
+    """Opt-out path: ``persist_across_processes=False`` skips the ps probe
+    entirely and always starts a fresh container, matching the pre-fix
+    behavior for users who want hard per-process isolation."""
+    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
+    monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
+    # ps_state=running would trigger reuse if the probe ran — assert it doesn't.
+    calls = _mock_subprocess_run_with_reuse(monkeypatch, ps_state="running")
+
+    env = docker_env.DockerEnvironment(
+        image="python:3.11", cwd="/root", timeout=60,
+        task_id="no-reuse", persist_across_processes=False,
+    )
+
+    # Must NOT have issued docker ps (the probe is gated by the flag).
+    ps_invocations = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "ps"]
+    assert not ps_invocations, (
+        f"docker ps probe should be skipped when persist_across_processes=False, got: {ps_invocations}"
+    )
+    # Should have started a fresh container.
+    assert env._container_id == "fresh-cid"
+
+
+def test_find_reusable_container_prefers_running_over_stopped(monkeypatch):
+    """When the probe returns multiple matches (shouldn't normally happen,
+    but can after a crash leaves stale duplicates), a ``running`` container
+    is preferred over any stopped one. The duplicate gets reaped later by
+    the orphan reaper; we don't try to be heroic about it here."""
+    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
+    monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
+
+    def _run(cmd, **kwargs):
+        if isinstance(cmd, list) and len(cmd) >= 2:
+            if cmd[1] == "version":
+                return subprocess.CompletedProcess(cmd, 0, stdout="ok", stderr="")
+            if cmd[1] == "ps":
+                # Two matches: stopped first, running second.
+                return subprocess.CompletedProcess(
+                    cmd, 0,
+                    stdout="stopped-cid\texited\nrunning-cid\trunning\n",
+                    stderr="",
+                )
+        return subprocess.CompletedProcess(cmd, 0, stdout="fresh-cid\n", stderr="")
+
+    monkeypatch.setattr(docker_env.subprocess, "run", _run)
+
+    env = _make_dummy_env(task_id="dup-match")
+    assert env._container_id == "running-cid", (
+        f"running container should win over stopped duplicate, got {env._container_id!r}"
+    )
+
+
+# ── Cleanup correctness (issue #20561) ────────────────────────────
+
+
+class _FakeThread:
+    """Stand-in for threading.Thread that captures target/args and calls
+    target() synchronously when .start() runs, so cleanup behavior is
+    observable without actually backgrounding subprocess calls."""
+
+    def __init__(self, target=None, daemon=None, name=None):
+        self._target = target
+        self.daemon = daemon
+        self.name = name
+        self._done = False
+
+    def start(self):
+        if self._target is not None:
+            self._target()
+        self._done = True
+
+    def is_alive(self):
+        return not self._done
+
+    def join(self, timeout=None):
+        self._done = True
+
+
+def _install_fake_thread(monkeypatch):
+    import threading
+    monkeypatch.setattr(threading, "Thread", _FakeThread)
+
+
+def test_cleanup_with_persist_is_noop_for_container(monkeypatch):
+    """``persist_across_processes=True`` (default) cleanup must NEITHER stop
+    NOR remove the container — the docs promise "ONE long-lived container
+    shared across sessions", and any docker stop would kill background
+    processes inside the container (npm watchers, pytest watchers, etc.).
+
+    Resource reclamation in this mode happens via the orphan reaper on next
+    Hermes startup, not on graceful exit. Issue #20561 — the first iteration
+    of this PR did docker stop here, which Ben caught as contradicting the
+    "ONE long-lived container" semantics."""
+    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
+    monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
+    _mock_subprocess_run(monkeypatch)
+    _install_fake_thread(monkeypatch)
+
+    env = _make_dummy_env(task_id="cleanup-persist", persistent_filesystem=False)
+    # Default persist_across_processes=True.
+    container_id = env._container_id
+    assert container_id
+
+    cleanup_calls = []
+    real_run = docker_env.subprocess.run
+
+    def _capturing_run(cmd, **kwargs):
+        cleanup_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
+        return real_run(cmd, **kwargs)
+
+    monkeypatch.setattr(docker_env.subprocess, "run", _capturing_run)
+
+    env.cleanup()
+
+    stops = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "stop"]
+    rms = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "rm"]
+    assert not stops, (
+        f"docker stop must NOT be called when persist_across_processes=True; "
+        f"container has to stay running so background processes survive. "
+        f"Got: {stops}"
+    )
+    assert not rms, (
+        f"docker rm must NOT be called when persist_across_processes=True; "
+        f"reuse would be impossible. Got: {rms}"
+    )
+    # The in-process handle must still be cleared so the next __init__
+    # re-probes via labels (and reuses the still-running container).
+    assert env._container_id is None, (
+        "in-process container_id should be cleared even in no-op cleanup"
+    )
+
+
+def test_cleanup_force_remove_stops_and_rms_even_in_persist_mode(monkeypatch):
+    """``cleanup(force_remove=True)`` must stop AND rm the container even
+    when ``persist_across_processes=True``. This is the explicit-teardown
+    path for ``/reset``, ``cleanup_vm(task_id, force_remove=True)``, and any
+    future caller that wants a guaranteed fresh container.
+
+    Without this kwarg, callers in persist mode would have no way to force a
+    fresh container without also flipping the global config — too coarse for
+    a per-task reset.
+    """
+    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
+    monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
+    _mock_subprocess_run(monkeypatch)
+    _install_fake_thread(monkeypatch)
+
+    env = _make_dummy_env(task_id="cleanup-force", persistent_filesystem=False)
+    assert env._container_id
+
+    cleanup_calls = []
+    real_run = docker_env.subprocess.run
+
+    def _capturing_run(cmd, **kwargs):
+        cleanup_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
+        return real_run(cmd, **kwargs)
+
+    monkeypatch.setattr(docker_env.subprocess, "run", _capturing_run)
+
+    env.cleanup(force_remove=True)
+
+    stops = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "stop"]
+    rms = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "rm"]
+    assert stops, f"force_remove must docker stop; got: {cleanup_calls}"
+    assert rms, f"force_remove must docker rm; got: {cleanup_calls}"
+
+
+def test_cleanup_vm_default_honors_persist_mode(monkeypatch):
+    """``cleanup_vm(task_id)`` without ``force_remove=True`` must be a no-op
+    for a persist-mode container.
+
+    Regression for the bug Ben caught after commit 4: ``AIAgent.close()``
+    (which is called from ``tui_gateway/server.py`` on session.close, from
+    ``gateway/run.py`` on per-session teardown, and from per-turn cleanup)
+    calls ``cleanup_vm(task_id)``. If that defaulted to ``force_remove=True``
+    we'd tear down the container on every TUI session close, defeating the
+    "ONE long-lived container shared across sessions" contract.
+    """
+    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
+    monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
+    _mock_subprocess_run(monkeypatch)
+    _install_fake_thread(monkeypatch)
+
+    from tools import terminal_tool
+
+    env = _make_dummy_env(task_id="session-close-test")
+    container_id = env._container_id
+    terminal_tool._active_environments["session-close-test"] = env
+
+    cleanup_calls = []
+    real_run = docker_env.subprocess.run
+
+    def _capturing_run(cmd, **kwargs):
+        cleanup_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
+        return real_run(cmd, **kwargs)
+
+    monkeypatch.setattr(docker_env.subprocess, "run", _capturing_run)
+
+    try:
+        terminal_tool.cleanup_vm("session-close-test")
+    finally:
+        terminal_tool._active_environments.pop("session-close-test", None)
+
+    stops = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "stop"]
+    rms = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "rm"]
+    assert not stops, (
+        f"cleanup_vm() default must not docker stop a persist-mode container; "
+        f"got: {stops}"
+    )
+    assert not rms, (
+        f"cleanup_vm() default must not docker rm a persist-mode container; "
+        f"got: {rms}"
+    )
+
+
+def test_cleanup_vm_force_remove_tears_down_persist_container(monkeypatch):
+    """``cleanup_vm(task_id, force_remove=True)`` tears down a persist-mode
+    container — the explicit-teardown path for ``/reset``-style flows.
+
+    Also pins the runtime-signature-inspection plumbing: the kwarg must
+    actually flow through ``cleanup_vm`` into the backend's ``cleanup()``.
+    """
+    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
+    monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
+    _mock_subprocess_run(monkeypatch)
+    _install_fake_thread(monkeypatch)
+
+    from tools import terminal_tool
+
+    env = _make_dummy_env(task_id="explicit-teardown-test")
+    terminal_tool._active_environments["explicit-teardown-test"] = env
+
+    cleanup_calls = []
+    real_run = docker_env.subprocess.run
+
+    def _capturing_run(cmd, **kwargs):
+        cleanup_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
+        return real_run(cmd, **kwargs)
+
+    monkeypatch.setattr(docker_env.subprocess, "run", _capturing_run)
+
+    try:
+        terminal_tool.cleanup_vm("explicit-teardown-test", force_remove=True)
+    finally:
+        terminal_tool._active_environments.pop("explicit-teardown-test", None)
+
+    stops = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "stop"]
+    rms = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "rm"]
+    assert stops, f"force_remove must reach docker stop; got: {cleanup_calls}"
+    assert rms, f"force_remove must reach docker rm; got: {cleanup_calls}"
+
+
+def test_cleanup_with_persist_disabled_stops_and_rms(monkeypatch):
+    """``persist_across_processes=False`` cleanup must docker stop AND docker
+    rm so containers don't leak. Crucially, this runs regardless of the
+    ``persistent_filesystem`` setting — the original code only rm'd when
+    ``not self._persistent``, which meant the default-on ``container_persistent:
+    true`` users (the documented happy path) leaked Exited containers forever.
+    Issue #20561 root-cause fix."""
+    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
+    monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
+    _mock_subprocess_run(monkeypatch)
+    _install_fake_thread(monkeypatch)
+
+    # Note: persistent_filesystem=True (the prior-leak scenario) + the new
+    # cross-process toggle OFF must still result in a clean rm.
+    env = docker_env.DockerEnvironment(
+        image="python:3.11", cwd="/root", timeout=60,
+        task_id="cleanup-no-persist", persistent_filesystem=True,
+        persist_across_processes=False,
+    )
+
+    cleanup_calls = []
+    real_run = docker_env.subprocess.run
+
+    def _capturing_run(cmd, **kwargs):
+        cleanup_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
+        return real_run(cmd, **kwargs)
+
+    monkeypatch.setattr(docker_env.subprocess, "run", _capturing_run)
+
+    env.cleanup()
+
+    stops = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "stop"]
+    rms = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "rm"]
+    assert stops, "expected docker stop"
+    assert rms, (
+        "docker rm MUST run when persist_across_processes=False, even with "
+        "persistent_filesystem=True — that gating was the leak source in #20561."
+    )
+
+
+def test_cleanup_uses_subprocess_run_not_detached_shell(monkeypatch):
+    """The pre-fix code used ``subprocess.Popen("... &", shell=True)`` which
+    raced with parent-process exit and silently dropped cleanup work. The
+    new code must use ``subprocess.run`` with bounded ``timeout=`` so the
+    work actually completes within the process lifetime.
+
+    Asserts cleanup never reaches into shell-mode Popen. Uses
+    ``force_remove=True`` so cleanup actually issues docker calls — the
+    default persist-mode path is now a no-op (commit 4) and would trivially
+    pass this assertion without exercising the docker code at all.
+    """
+    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
+    monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
+    _mock_subprocess_run(monkeypatch)
+    _install_fake_thread(monkeypatch)
+
+    def _forbidden_popen(*args, **kwargs):
+        raise AssertionError(
+            f"cleanup must not use subprocess.Popen anymore (issue #20561); "
+            f"got args={args} kwargs={kwargs}"
+        )
+
+    monkeypatch.setattr(docker_env.subprocess, "Popen", _forbidden_popen)
+
+    env = _make_dummy_env(task_id="no-popen-cleanup")
+    env.cleanup(force_remove=True)  # must not raise
+
+
+def test_wait_for_cleanup_returns_true_when_no_thread_started():
+    """``wait_for_cleanup`` must be a no-op when ``cleanup`` was never called
+    (or the env has no live cleanup thread) — atexit calls it unconditionally
+    across all active envs, so a False return would falsely flag healthy
+    shutdowns."""
+    env = docker_env.DockerEnvironment.__new__(docker_env.DockerEnvironment)
+    # No _cleanup_thread set — simulates an env that was never cleanup()'d.
+    assert env.wait_for_cleanup(timeout=1.0) is True
+
+
+def test_wait_for_cleanup_after_cleanup_returns_true(monkeypatch):
+    """End-to-end: cleanup() starts a thread, wait_for_cleanup() joins it
+    and reports completion. Atexit relies on this contract to ensure docker
+    stop/rm actually finishes before the Python interpreter exits.
+
+    Uses ``force_remove=True`` so cleanup actually starts a worker thread —
+    the default persist-mode cleanup is a no-op (commit 4) and never spawns
+    a thread, so the trivial "no thread" branch of wait_for_cleanup is
+    already covered by the previous test.
+    """
+    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
+    monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
+    _mock_subprocess_run(monkeypatch)
+    _install_fake_thread(monkeypatch)
+
+    env = _make_dummy_env(task_id="wait-test")
+    env.cleanup(force_remove=True)
+    assert env.wait_for_cleanup(timeout=5.0) is True
+
+
+def test_cleanup_on_env_with_no_container_id_does_not_raise(monkeypatch):
+    """A DockerEnvironment whose ``__init__`` failed before the container_id
+    was set (image-pull error, docker daemon down) should still be safe to
+    cleanup() — the post-creation failure path in callers always tries.
+    Without this guard the daemon-down case used to NameError on the cleanup
+    branch."""
+    env = docker_env.DockerEnvironment.__new__(docker_env.DockerEnvironment)
+    env._container_id = None
+    env._persistent = False
+    env._workspace_dir = None
+    env._home_dir = None
+    # No exception expected.
+    env.cleanup()
+
+
+# ── Orphan reaper (issue #20561) ──────────────────────────────────
+
+
+def _now_iso(offset_seconds: int = 0) -> str:
+    """Return an RFC3339 timestamp ``offset_seconds`` in the past."""
+    import datetime
+    t = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(seconds=offset_seconds)
+    # Format like Docker emits — with nanoseconds-style trailing digits.
+    return t.isoformat().replace("+00:00", ".123456789Z")
+
+
+def _reaper_run_mock(monkeypatch, ps_ids: list[str], inspect_responses: dict[str, str],
+                      rm_succeeds: bool = True):
+    """Build a subprocess.run mock for reaper tests.
+
+    * ``ps_ids`` — what ``docker ps -a --filter ... --format '{{.ID}}'`` returns
+    * ``inspect_responses[cid]`` — what ``docker inspect ... FinishedAt`` returns
+      for each cid; ``""`` means "field unset".
+    * ``rm_succeeds`` — whether ``docker rm -f`` returns 0.
+
+    Captures every call so tests can assert which containers were rm'd.
+    """
+    calls = []
+
+    def _run(cmd, **kwargs):
+        calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
+        if not isinstance(cmd, list) or len(cmd) < 2:
+            return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
+        sub = cmd[1]
+        if sub == "ps":
+            return subprocess.CompletedProcess(
+                cmd, 0, stdout="\n".join(ps_ids) + ("\n" if ps_ids else ""), stderr="",
+            )
+        if sub == "inspect":
+            # cmd is [docker, inspect, --format, '{{.State.FinishedAt}}', cid]
+            cid = cmd[-1]
+            return subprocess.CompletedProcess(
+                cmd, 0, stdout=inspect_responses.get(cid, "") + "\n", stderr="",
+            )
+        if sub == "rm":
+            return subprocess.CompletedProcess(
+                cmd, 0 if rm_succeeds else 1,
+                stdout="", stderr="" if rm_succeeds else "no such container",
+            )
+        return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
+
+    monkeypatch.setattr(docker_env.subprocess, "run", _run)
+    return calls
+
+
+def test_reap_orphan_returns_zero_when_no_matches(monkeypatch):
+    """No labeled containers → no rm calls, returns 0. Establishes the
+    happy-path baseline for the orphan reaper (issue #20561)."""
+    calls = _reaper_run_mock(monkeypatch, ps_ids=[], inspect_responses={})
+
+    removed = docker_env.reap_orphan_containers(
+        max_age_seconds=600, profile_filter="default", docker_exe="/usr/bin/docker",
+    )
+
+    assert removed == 0
+    rms = [c for c in calls if isinstance(c[0], list) and c[0][1:2] == ["rm"]]
+    assert not rms, "no rm calls expected when ps returns empty"
+
+
+def test_reap_orphan_removes_stale_exited_container(monkeypatch):
+    """An Exited container older than max_age_seconds must be removed.
+    This is the core repair path for issue #20561 — without the reaper,
+    SIGKILL'd Hermes processes leak containers permanently."""
+    old = _now_iso(offset_seconds=900)  # 15 minutes ago
+    calls = _reaper_run_mock(
+        monkeypatch, ps_ids=["old-cid"], inspect_responses={"old-cid": old},
+    )
+
+    removed = docker_env.reap_orphan_containers(
+        max_age_seconds=600, profile_filter="default", docker_exe="/usr/bin/docker",
+    )
+
+    assert removed == 1
+    rms = [c for c in calls if isinstance(c[0], list) and c[0][1:2] == ["rm"]]
+    assert len(rms) == 1
+    assert "old-cid" in rms[0][0], f"expected rm of old-cid, got {rms[0][0]}"
+
+
+def test_reap_orphan_spares_recently_exited_container(monkeypatch):
+    """A container exited within max_age_seconds must NOT be reaped — that
+    container belongs to a Hermes process that just finished and may be
+    about to be replaced. Conservative window prevents racing sibling
+    processes."""
+    recent = _now_iso(offset_seconds=60)  # 1 minute ago
+    calls = _reaper_run_mock(
+        monkeypatch, ps_ids=["recent-cid"], inspect_responses={"recent-cid": recent},
+    )
+
+    removed = docker_env.reap_orphan_containers(
+        max_age_seconds=600, profile_filter="default", docker_exe="/usr/bin/docker",
+    )
+
+    assert removed == 0
+    rms = [c for c in calls if isinstance(c[0], list) and c[0][1:2] == ["rm"]]
+    assert not rms, f"recent container must not be reaped, got rm calls: {rms}"
+
+
+def test_reap_orphan_scopes_to_profile_filter_via_label(monkeypatch):
+    """The reaper must pass ``--filter label=hermes-profile=<profile>`` to
+    docker ps so it never sweeps another profile's containers. A research
+    profile must not tear down the default profile's stragglers."""
+    calls = _reaper_run_mock(monkeypatch, ps_ids=[], inspect_responses={})
+
+    docker_env.reap_orphan_containers(
+        max_age_seconds=600, profile_filter="research-bot", docker_exe="/usr/bin/docker",
+    )
+
+    ps_calls = [c for c in calls if isinstance(c[0], list) and c[0][1:2] == ["ps"]]
+    assert ps_calls, "expected at least one docker ps call"
+    flat = " ".join(ps_calls[0][0])
+    assert "label=hermes-profile=research-bot" in flat, (
+        f"profile filter not applied to docker ps; got args: {ps_calls[0][0]}"
+    )
+    assert "label=hermes-agent=1" in flat, (
+        f"hermes-agent label filter must also be applied; got: {ps_calls[0][0]}"
+    )
+    assert "status=exited" in flat, (
+        "must filter to exited containers only — running containers may "
+        "belong to a sibling Hermes process and must NEVER be reaped"
+    )
+
+
+def test_reap_orphan_skips_container_with_unparseable_finished_at(monkeypatch):
+    """If docker inspect returns the zero-value ``0001-01-01T00:00:00Z`` (no
+    FinishedAt yet) or an unparseable timestamp, the reaper must leave the
+    container alone. Defensive — never reap a container whose age we can't
+    determine."""
+    calls = _reaper_run_mock(
+        monkeypatch,
+        ps_ids=["never-finished", "garbage-ts"],
+        inspect_responses={
+            "never-finished": "0001-01-01T00:00:00Z",
+            "garbage-ts": "not-a-timestamp",
+        },
+    )
+
+    removed = docker_env.reap_orphan_containers(
+        max_age_seconds=600, profile_filter="default", docker_exe="/usr/bin/docker",
+    )
+
+    assert removed == 0
+    rms = [c for c in calls if isinstance(c[0], list) and c[0][1:2] == ["rm"]]
+    assert not rms, (
+        f"reaper must NOT remove containers with unparseable FinishedAt; got: {rms}"
+    )
+
+
+def test_reap_orphan_handles_docker_ps_failure_gracefully(monkeypatch):
+    """If docker ps itself fails (daemon down, permission denied), the
+    reaper returns 0 without crashing. The reaper is best-effort plumbing,
+    not a critical path — it must never block container creation."""
+    def _failing_ps(cmd, **kwargs):
+        if isinstance(cmd, list) and len(cmd) >= 2 and cmd[1] == "ps":
+            return subprocess.CompletedProcess(cmd, 1, stdout="", stderr="Cannot connect to daemon")
+        return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
+
+    monkeypatch.setattr(docker_env.subprocess, "run", _failing_ps)
+
+    # Must not raise
+    removed = docker_env.reap_orphan_containers(
+        max_age_seconds=600, profile_filter="default", docker_exe="/usr/bin/docker",
+    )
+    assert removed == 0
+
+
+def test_reap_orphan_continues_after_individual_rm_failure(monkeypatch):
+    """If ``docker rm -f`` fails on one container (already removed by a
+    concurrent process, container locked, etc.), the reaper must log and
+    continue to the next candidate rather than aborting the whole sweep."""
+    old = _now_iso(offset_seconds=900)
+    rm_calls = []
+
+    def _run(cmd, **kwargs):
+        if not isinstance(cmd, list) or len(cmd) < 2:
+            return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
+        sub = cmd[1]
+        if sub == "ps":
+            return subprocess.CompletedProcess(
+                cmd, 0, stdout="cid-a\ncid-b\ncid-c\n", stderr="",
+            )
+        if sub == "inspect":
+            return subprocess.CompletedProcess(cmd, 0, stdout=old + "\n", stderr="")
+        if sub == "rm":
+            rm_calls.append(cmd[-1])
+            # cid-b fails; cid-a and cid-c succeed.
+            if cmd[-1] == "cid-b":
+                return subprocess.CompletedProcess(cmd, 1, stdout="", stderr="no such container")
+            return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
+        return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
+
+    monkeypatch.setattr(docker_env.subprocess, "run", _run)
+
+    removed = docker_env.reap_orphan_containers(
+        max_age_seconds=600, profile_filter="default", docker_exe="/usr/bin/docker",
+    )
+
+    # All three were attempted, two succeeded.
+    assert removed == 2
+    assert set(rm_calls) == {"cid-a", "cid-b", "cid-c"}, (
+        f"reaper must attempt all candidates even when one fails; got: {rm_calls}"
+    )
+
+
+def test_container_finished_at_parses_nanosecond_timestamp(monkeypatch):
+    """Docker emits FinishedAt with nanosecond precision (RFC3339 with up to
+    9 fractional digits), but Python's fromisoformat caps at microseconds.
+    The helper must trim the extra digits without raising — otherwise every
+    candidate gets skipped and the reaper does nothing."""
+
+    def _run(cmd, **kwargs):
+        return subprocess.CompletedProcess(
+            cmd, 0,
+            stdout="2026-05-28T13:45:00.123456789Z\n",
+            stderr="",
+        )
+
+    monkeypatch.setattr(docker_env.subprocess, "run", _run)
+
+    result = docker_env._container_finished_at("/usr/bin/docker", "test-cid")
+    assert result is not None, "must parse RFC3339 with nanoseconds"
+    import datetime
+    assert result.tzinfo == datetime.timezone.utc
+    assert result.year == 2026 and result.month == 5 and result.day == 28
+
+
+def test_container_finished_at_returns_none_on_zero_value():
+    """Docker's zero-value ``0001-01-01T00:00:00Z`` (never finished) must
+    map to None so the reaper treats the container as unreapable."""
+    # Direct test of the parsing helper — no subprocess needed since the
+    # check happens after the inspect call returns.
+    import subprocess as _subprocess
+
+    class _MockRun:
+        def __init__(self, stdout):
+            self.returncode = 0
+            self.stdout = stdout
+            self.stderr = ""
+
+    import unittest.mock
+    with unittest.mock.patch.object(
+        docker_env.subprocess, "run", return_value=_MockRun("0001-01-01T00:00:00Z\n"),
+    ):
+        result = docker_env._container_finished_at("/usr/bin/docker", "never-finished")
+    assert result is None
+
+
+def test_credential_mount_skipped_when_source_is_directory(monkeypatch, tmp_path, caplog):
+    """Credential mount should be skipped when source path is a directory.
+
+    In Docker-in-Docker scenarios, Docker may auto-create the source path as
+    a directory when it doesn't exist on the host.  Mounting a directory over
+    a file destination causes exit 125.
+    """
+    # Create a directory that looks like a corrupted credential file path
+    corrupted_dir = tmp_path / "google_token.json"
+    corrupted_dir.mkdir()
+
+    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
+    calls = _mock_subprocess_run(monkeypatch)
+
+    # Mock get_credential_file_mounts to return the corrupted entry
+    fake_mounts = [
+        {"host_path": str(corrupted_dir), "container_path": "/root/.hermes/google_token.json"},
+    ]
+    monkeypatch.setattr(
+        "tools.credential_files.get_credential_file_mounts",
+        lambda: fake_mounts,
+    )
+    monkeypatch.setattr(
+        "tools.credential_files.get_skills_directory_mount",
+        lambda: [],
+    )
+    monkeypatch.setattr(
+        "tools.credential_files.get_cache_directory_mounts",
+        lambda: [],
+    )
+
+    with caplog.at_level(logging.WARNING):
+        _make_dummy_env()
+
+    # The corrupted mount should be skipped
+    run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
+    assert run_calls, "docker run should have been called"
+    run_args_str = " ".join(run_calls[0][0])
+    assert "google_token.json" not in run_args_str
+
+    # Should log a warning about the directory source
+    assert any(
+        "source is a directory" in rec.getMessage()
+        for rec in caplog.records
+    )
+
+
+def test_credential_mount_skipped_when_source_missing(monkeypatch, tmp_path, caplog):
+    """Credential mount should be skipped when source file no longer exists."""
+    missing_path = tmp_path / "deleted_token.json"
+    # Don't create the file — it's "missing"
+
+    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
+    calls = _mock_subprocess_run(monkeypatch)
+
+    fake_mounts = [
+        {"host_path": str(missing_path), "container_path": "/root/.hermes/deleted_token.json"},
+    ]
+    monkeypatch.setattr(
+        "tools.credential_files.get_credential_file_mounts",
+        lambda: fake_mounts,
+    )
+    monkeypatch.setattr(
+        "tools.credential_files.get_skills_directory_mount",
+        lambda: [],
+    )
+    monkeypatch.setattr(
+        "tools.credential_files.get_cache_directory_mounts",
+        lambda: [],
+    )
+
+    with caplog.at_level(logging.WARNING):
+        _make_dummy_env()
+
+    run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
+    assert run_calls, "docker run should have been called"
+    run_args_str = " ".join(run_calls[0][0])
+    assert "deleted_token.json" not in run_args_str
+
+    assert any(
+        "source not found" in rec.getMessage()
+        for rec in caplog.records
+    )
+
+
+def test_credential_mount_works_when_source_is_valid_file(monkeypatch, tmp_path):
+    """Credential mount should proceed normally when source is a valid file."""
+    valid_file = tmp_path / "token.json"
+    valid_file.write_text('{"token": "REDACTED"}')
+
+    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
+    calls = _mock_subprocess_run(monkeypatch)
+
+    fake_mounts = [
+        {"host_path": str(valid_file), "container_path": "/root/.hermes/token.json"},
+    ]
+    monkeypatch.setattr(
+        "tools.credential_files.get_credential_file_mounts",
+        lambda: fake_mounts,
+    )
+    monkeypatch.setattr(
+        "tools.credential_files.get_skills_directory_mount",
+        lambda: [],
+    )
+    monkeypatch.setattr(
+        "tools.credential_files.get_cache_directory_mounts",
+        lambda: [],
+    )
+
+    _make_dummy_env()
+
+    run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
+    assert run_calls, "docker run should have been called"
+    run_args_str = " ".join(run_calls[0][0])
+    assert "token.json" in run_args_str
diff --git a/tests/tools/test_docker_orphan_reaper_integration.py b/tests/tools/test_docker_orphan_reaper_integration.py
new file mode 100644
index 00000000000..d52dbcdaec7
--- /dev/null
+++ b/tests/tools/test_docker_orphan_reaper_integration.py
@@ -0,0 +1,139 @@
+"""Integration tests for the docker orphan-reaper wiring in terminal_tool.
+
+The reaper itself is unit-tested in tests/tools/test_docker_environment.py
+under the "Orphan reaper" section. These tests cover the terminal_tool-side
+gates: once-per-process behavior, the disable flag, and the
+``lifetime_seconds`` doubling that determines the reaper's age threshold.
+
+Issue #20561 — without these gates, parallel subagents would each fire the
+reaper on container creation, and the ``terminal.docker_orphan_reaper: false``
+opt-out would silently do nothing.
+"""
+
+import os
+from unittest.mock import patch
+
+import tools.terminal_tool as terminal_tool
+
+
+def _reset_reaper_gate():
+    """Clear the once-per-process flag between tests."""
+    terminal_tool._docker_orphan_reaper_ran = False
+
+
+def test_maybe_reap_runs_once_per_process(monkeypatch):
+    """The reaper sweep must run at most once per Python interpreter.
+    Parallel subagents that each call _create_environment(env_type='docker')
+    would otherwise fire N concurrent docker ps + inspect storms against the
+    daemon and waste 5–10s of startup."""
+    _reset_reaper_gate()
+    call_count = {"reap": 0}
+
+    def _fake_reap(**kwargs):
+        call_count["reap"] += 1
+        return 0
+
+    with patch("tools.environments.docker.reap_orphan_containers", _fake_reap):
+        config = {"docker_orphan_reaper": True}
+        terminal_tool._maybe_reap_docker_orphans(config)
+        terminal_tool._maybe_reap_docker_orphans(config)
+        terminal_tool._maybe_reap_docker_orphans(config)
+
+    assert call_count["reap"] == 1, (
+        f"reaper must run exactly once per process; got {call_count['reap']} calls"
+    )
+
+
+def test_maybe_reap_respects_disable_flag(monkeypatch):
+    """``terminal.docker_orphan_reaper: false`` (via container_config) must
+    skip the sweep entirely — no docker ps, no inspect, no rm. The escape
+    hatch for operators running multiple Hermes processes in the same
+    profile."""
+    _reset_reaper_gate()
+    call_count = {"reap": 0}
+
+    def _fake_reap(**kwargs):
+        call_count["reap"] += 1
+        return 0
+
+    with patch("tools.environments.docker.reap_orphan_containers", _fake_reap):
+        terminal_tool._maybe_reap_docker_orphans({"docker_orphan_reaper": False})
+
+    assert call_count["reap"] == 0, "disabled reaper must not run any docker calls"
+    # The once-per-process gate must NOT be tripped when the reaper is
+    # disabled — that would prevent a subsequent toggle to true from working.
+    assert terminal_tool._docker_orphan_reaper_ran is False
+
+
+def test_maybe_reap_doubles_lifetime_for_max_age(monkeypatch):
+    """The reaper's age threshold is ``2 × lifetime_seconds`` (with a 60s
+    floor). Generous default — gives sibling Hermes processes ample grace
+    to be replaced without their just-exited containers being yanked."""
+    _reset_reaper_gate()
+    captured_args = {}
+
+    def _fake_reap(**kwargs):
+        captured_args.update(kwargs)
+        return 0
+
+    monkeypatch.setenv("TERMINAL_LIFETIME_SECONDS", "300")
+    with patch("tools.environments.docker.reap_orphan_containers", _fake_reap):
+        terminal_tool._maybe_reap_docker_orphans({"docker_orphan_reaper": True})
+
+    assert captured_args.get("max_age_seconds") == 600, (
+        f"expected 2 × 300 = 600, got {captured_args.get('max_age_seconds')}"
+    )
+
+
+def test_maybe_reap_floors_at_60_seconds(monkeypatch):
+    """A user pinning TERMINAL_LIFETIME_SECONDS=0 (or any value <30) would
+    otherwise get an effective age threshold of zero, which would race the
+    user's own just-started container creation. Floor at 60s × 2 = 120s."""
+    _reset_reaper_gate()
+    captured_args = {}
+
+    def _fake_reap(**kwargs):
+        captured_args.update(kwargs)
+        return 0
+
+    monkeypatch.setenv("TERMINAL_LIFETIME_SECONDS", "0")
+    with patch("tools.environments.docker.reap_orphan_containers", _fake_reap):
+        terminal_tool._maybe_reap_docker_orphans({"docker_orphan_reaper": True})
+
+    assert captured_args.get("max_age_seconds") == 120, (
+        f"expected floored 60 × 2 = 120, got {captured_args.get('max_age_seconds')}"
+    )
+
+
+def test_maybe_reap_passes_current_profile_as_filter(monkeypatch):
+    """The reaper must be scoped to the current Hermes profile — a research
+    profile must NEVER reap default's containers. Verifies the
+    profile-filter wiring."""
+    _reset_reaper_gate()
+    captured_args = {}
+
+    def _fake_reap(**kwargs):
+        captured_args.update(kwargs)
+        return 0
+
+    with patch("tools.environments.docker.reap_orphan_containers", _fake_reap), \
+         patch("tools.environments.docker._get_active_profile_name", return_value="research-bot"):
+        terminal_tool._maybe_reap_docker_orphans({"docker_orphan_reaper": True})
+
+    assert captured_args.get("profile_filter") == "research-bot", (
+        f"expected profile_filter='research-bot', got {captured_args.get('profile_filter')!r}"
+    )
+
+
+def test_maybe_reap_swallows_exceptions(monkeypatch):
+    """A reaper crash (docker daemon down, parse error in helper) must NOT
+    block env creation. The reaper is best-effort plumbing, not a critical
+    path; failures get logged at debug level and execution continues."""
+    _reset_reaper_gate()
+
+    def _exploding_reap(**kwargs):
+        raise RuntimeError("docker daemon ate the cat")
+
+    with patch("tools.environments.docker.reap_orphan_containers", _exploding_reap):
+        # Must not raise
+        terminal_tool._maybe_reap_docker_orphans({"docker_orphan_reaper": True})
diff --git a/tests/tools/test_env_passthrough.py b/tests/tools/test_env_passthrough.py
index eba84bdb2cb..974911e588d 100644
--- a/tests/tools/test_env_passthrough.py
+++ b/tests/tools/test_env_passthrough.py
@@ -198,7 +198,6 @@ class TestTerminalIntegration:
     def test_make_run_env_blocklist_override_rejected(self):
         """_make_run_env must NOT expose a blocklisted var to subprocess env
         even after a skill attempts to register it via passthrough."""
-        import os
         from tools.environments.local import (
             _make_run_env,
             _HERMES_PROVIDER_ENV_BLOCKLIST,
diff --git a/tests/tools/test_env_probe.py b/tests/tools/test_env_probe.py
new file mode 100644
index 00000000000..a8ae89d8402
--- /dev/null
+++ b/tests/tools/test_env_probe.py
@@ -0,0 +1,157 @@
+"""Tests for tools/env_probe.py — local Python toolchain probe."""
+
+import sys
+
+import pytest
+
+from tools import env_probe
+
+
+@pytest.fixture(autouse=True)
+def reset_probe_cache():
+    """Each test starts with a clean cache."""
+    env_probe._reset_cache_for_tests()
+    yield
+    env_probe._reset_cache_for_tests()
+
+
+class TestSilentWhenHealthy:
+    """The probe must emit nothing when the environment is clean — otherwise
+    every prompt for every user pays an unnecessary token tax."""
+
+    def test_clean_env_returns_empty(self, monkeypatch):
+        """python3 + pip module + no PEP 668 → silent."""
+        monkeypatch.setattr(env_probe, "_python_version_of",
+                            lambda b: "3.13.3" if b == "python3" else None)
+        monkeypatch.setattr(env_probe, "_has_pip_module", lambda b: True)
+        monkeypatch.setattr(env_probe, "_detect_pep668", lambda b: False)
+        monkeypatch.setattr(env_probe, "_pip_python_version", lambda: "3.13")
+        monkeypatch.setattr(env_probe.shutil, "which", lambda name: None)
+        assert env_probe.get_environment_probe_line() == ""
+
+    def test_pep668_with_uv_returns_empty(self, monkeypatch):
+        """PEP 668 alone shouldn't trigger output if uv is installed —
+        agent has a viable install path."""
+        monkeypatch.setattr(env_probe, "_python_version_of",
+                            lambda b: "3.12.4" if b == "python3" else None)
+        monkeypatch.setattr(env_probe, "_has_pip_module", lambda b: True)
+        monkeypatch.setattr(env_probe, "_detect_pep668", lambda b: True)
+        monkeypatch.setattr(env_probe, "_pip_python_version", lambda: "3.12")
+        monkeypatch.setattr(env_probe.shutil, "which",
+                            lambda name: "/usr/local/bin/uv" if name == "uv" else None)
+        assert env_probe.get_environment_probe_line() == ""
+
+
+class TestEmitsOnRealProblems:
+    """The probe must produce a usable line for the real failure modes
+    that drove this feature."""
+
+    def test_allen_scenario_python_version_mismatch(self, monkeypatch):
+        """python3 is 3.11 (no pip module), pip on PATH is 3.12, PEP 668 on,
+        no uv — the exact scenario from the Sarasota real-estate task."""
+        monkeypatch.setattr(env_probe, "_python_version_of",
+                            lambda b: {"python3": "3.11.15", "python": None}.get(b))
+        monkeypatch.setattr(env_probe, "_has_pip_module", lambda b: False)
+        monkeypatch.setattr(env_probe, "_detect_pep668", lambda b: True)
+        monkeypatch.setattr(env_probe, "_pip_python_version", lambda: "3.12")
+        monkeypatch.setattr(env_probe.shutil, "which",
+                            lambda name: None if name == "uv" else "/usr/bin/" + name)
+
+        line = env_probe.get_environment_probe_line()
+        assert line  # not silent
+        # Single line — must not blow up the system prompt.
+        assert "\n" not in line
+        # Names the real toolchain state
+        assert "3.11.15" in line
+        assert "no pip module" in line
+        assert "mismatch" in line
+        assert "PEP 668" in line
+        # Points at the right escape hatch
+        assert "venv" in line or "uv" in line
+
+    def test_missing_python3_is_named(self, monkeypatch):
+        """If python3 isn't installed at all, say so."""
+        monkeypatch.setattr(env_probe, "_python_version_of", lambda b: None)
+        monkeypatch.setattr(env_probe, "_has_pip_module", lambda b: False)
+        monkeypatch.setattr(env_probe, "_detect_pep668", lambda b: False)
+        monkeypatch.setattr(env_probe, "_pip_python_version", lambda: None)
+        monkeypatch.setattr(env_probe.shutil, "which", lambda name: None)
+
+        line = env_probe.get_environment_probe_line()
+        assert "python3=missing" in line
+
+    def test_python_missing_but_python3_present(self, monkeypatch):
+        """Common on Debian: only python3 exists, agent shouldn't type
+        `python`."""
+        monkeypatch.setattr(env_probe, "_python_version_of",
+                            lambda b: "3.12.4" if b == "python3" else None)
+        monkeypatch.setattr(env_probe, "_has_pip_module", lambda b: True)
+        monkeypatch.setattr(env_probe, "_detect_pep668", lambda b: True)
+        monkeypatch.setattr(env_probe, "_pip_python_version", lambda: "3.12")
+        monkeypatch.setattr(env_probe.shutil, "which",
+                            lambda name: None if name == "uv" else "/usr/bin/" + name)
+
+        line = env_probe.get_environment_probe_line()
+        # `python=missing` only matters in the non-silent path; PEP 668 (without
+        # uv) is what brings us off-silent here, so check both signals.
+        assert "PEP 668" in line
+        assert "python=missing" in line
+
+
+class TestSkipsRemoteBackends:
+    """Remote backends have their own probe; this one must stay out."""
+
+    def test_docker_returns_empty(self, monkeypatch):
+        monkeypatch.setenv("TERMINAL_ENV", "docker")
+        # Even with a broken local env, docker must emit nothing.
+        monkeypatch.setattr(env_probe, "_python_version_of", lambda b: None)
+        monkeypatch.setattr(env_probe, "_has_pip_module", lambda b: False)
+        assert env_probe.get_environment_probe_line() == ""
+
+    def test_modal_returns_empty(self, monkeypatch):
+        monkeypatch.setenv("TERMINAL_ENV", "modal")
+        assert env_probe.get_environment_probe_line() == ""
+
+    def test_ssh_returns_empty(self, monkeypatch):
+        monkeypatch.setenv("TERMINAL_ENV", "ssh")
+        assert env_probe.get_environment_probe_line() == ""
+
+
+class TestCaching:
+    """The probe runs once per process — the result is deterministic for
+    the lifetime of the agent."""
+
+    def test_result_cached(self, monkeypatch):
+        calls = []
+
+        def counting_version(b):
+            calls.append(b)
+            return "3.12.4" if b == "python3" else None
+
+        monkeypatch.setattr(env_probe, "_python_version_of", counting_version)
+        monkeypatch.setattr(env_probe, "_has_pip_module", lambda b: True)
+        monkeypatch.setattr(env_probe, "_detect_pep668", lambda b: False)
+        monkeypatch.setattr(env_probe, "_pip_python_version", lambda: "3.12")
+        monkeypatch.setattr(env_probe.shutil, "which", lambda name: None)
+
+        env_probe.get_environment_probe_line()
+        env_probe.get_environment_probe_line()
+        env_probe.get_environment_probe_line()
+
+        # Only the first call probes — caller-counting confirms it.
+        # Two calls (python3 + python) on first invocation, zero after.
+        assert len(calls) == 2
+
+
+class TestRobustness:
+    """The probe must NEVER crash the prompt build."""
+
+    def test_subprocess_failure_returns_empty(self, monkeypatch):
+        """If every subprocess fails, just stay silent."""
+        def boom(*a, **kw):
+            raise OSError("simulated")
+        monkeypatch.setattr(env_probe.subprocess, "run", boom)
+        # Should not raise, should just return ""
+        result = env_probe.get_environment_probe_line()
+        # Whatever the result is, it must be a string
+        assert isinstance(result, str)
diff --git a/tests/tools/test_execute_code_approval_cluster.py b/tests/tools/test_execute_code_approval_cluster.py
new file mode 100644
index 00000000000..db3b1d9e9a3
--- /dev/null
+++ b/tests/tools/test_execute_code_approval_cluster.py
@@ -0,0 +1,349 @@
+"""Regression tests for the execute_code approval-bypass cluster.
+
+Covers the canonical fix for issues #4146, #27303, #30882, #33057:
+
+  1. tools.thread_context.propagate_context_to_thread — propagates the agent
+     turn's ContextVars AND thread-local approval/sudo callbacks into worker
+     threads, and clears the callbacks on teardown.
+  2. Both execute_code RPC threads are wrapped with that helper (source guard).
+  3. tools.approval.check_execute_code_guard — the entry-point guard decision
+     matrix (isolated backends, yolo/off, cron-deny, headless-local,
+     gateway approve/deny/timeout/missing-notify, smart mode).
+  4. tools.code_execution_tool._scrub_child_env — broad HERMES_ prefix dropped,
+     operational allowlist kept, DSN/WEBHOOK blocked, passthrough precedence.
+"""
+
+from __future__ import annotations
+
+import concurrent.futures
+import contextvars
+import threading
+
+import pytest
+
+from tools import approval as A
+from tools.thread_context import propagate_context_to_thread
+
+
+# ---------------------------------------------------------------------------
+# 1. Context + callback propagation helper
+# ---------------------------------------------------------------------------
+
+def test_helper_propagates_contextvar_and_approval_callback():
+    from tools import terminal_tool as TT
+
+    probe: contextvars.ContextVar[str] = contextvars.ContextVar(
+        "cluster_probe", default="unset"
+    )
+    probe.set("parent-value")
+    sentinel = object()
+    TT.set_approval_callback(sentinel)
+    try:
+        seen: dict = {}
+
+        def worker():
+            seen["probe"] = probe.get()
+            seen["cb"] = TT._get_approval_callback()
+
+        t = threading.Thread(target=propagate_context_to_thread(worker))
+        t.start()
+        t.join(timeout=5)
+
+        assert seen["probe"] == "parent-value"  # ContextVar propagated
+        assert seen["cb"] is sentinel            # thread-local callback propagated
+    finally:
+        TT.set_approval_callback(None)
+
+
+def test_helper_clears_callbacks_on_teardown():
+    """A recycled worker thread must not retain the propagated callback after
+    the wrapped target finishes (mirrors the GHSA-qg5c-hvr5-hjgr teardown)."""
+    from tools import terminal_tool as TT
+
+    sentinel = object()
+    TT.set_approval_callback(sentinel)
+    try:
+        seen: dict = {}
+
+        def first():
+            seen["during"] = TT._get_approval_callback()
+
+        def second():  # NOT wrapped — runs on the same recycled worker thread
+            seen["after"] = TT._get_approval_callback()
+
+        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as ex:
+            ex.submit(propagate_context_to_thread(first)).result(timeout=5)
+            ex.submit(second).result(timeout=5)
+
+        assert seen["during"] is sentinel  # installed for the wrapped target
+        assert seen["after"] is None       # cleared on teardown
+    finally:
+        TT.set_approval_callback(None)
+
+
+def test_both_rpc_threads_use_propagation_helper():
+    """Source guard: both execute_code RPC threads must wrap their target with
+    propagate_context_to_thread, or the gateway approval bypass (#33057)
+    silently returns."""
+    import inspect
+    import tools.code_execution_tool as cet
+
+    src = inspect.getsource(cet)
+    assert "propagate_context_to_thread(_rpc_server_loop)" in src, (
+        "local UDS RPC server thread is not wrapped with "
+        "propagate_context_to_thread — gateway approval routing will be lost."
+    )
+    assert "propagate_context_to_thread(_rpc_poll_loop)" in src, (
+        "remote file-RPC poll thread is not wrapped with "
+        "propagate_context_to_thread — gateway approval routing will be lost."
+    )
+
+
+# ---------------------------------------------------------------------------
+# 3. check_execute_code_guard decision matrix
+# ---------------------------------------------------------------------------
+
+@pytest.fixture
+def gw_session(monkeypatch):
+    """A clean gateway session: HERMES_GATEWAY_SESSION set, a bound session
+    key, and isolated gateway queues/callbacks. Yields the session_key."""
+    monkeypatch.setenv("HERMES_GATEWAY_SESSION", "1")
+    monkeypatch.delenv("HERMES_INTERACTIVE", raising=False)
+    monkeypatch.delenv("HERMES_CRON_SESSION", raising=False)
+    monkeypatch.delenv("HERMES_EXEC_ASK", raising=False)
+    # Force manual mode regardless of host config.
+    monkeypatch.setattr(A, "_get_approval_mode", lambda: "manual")
+
+    session_key = "cluster-test-session"
+    token = A.set_current_session_key(session_key)
+    with A._lock:
+        A._gateway_queues.pop(session_key, None)
+        A._gateway_notify_cbs.pop(session_key, None)
+    try:
+        yield session_key
+    finally:
+        A.reset_current_session_key(token)
+        with A._lock:
+            A._gateway_queues.pop(session_key, None)
+            A._gateway_notify_cbs.pop(session_key, None)
+
+
+def _register_resolver(session_key: str, result):
+    """Register a gateway notify callback that immediately resolves the most
+    recent queued approval entry with *result* (simulating a user response)."""
+    def cb(_approval_data):
+        with A._lock:
+            entries = A._gateway_queues.get(session_key, [])
+            if entries:
+                entry = entries[-1]
+                entry.result = result
+                entry.event.set()
+    with A._lock:
+        A._gateway_notify_cbs[session_key] = cb
+
+
+def test_guard_isolated_backend_approved():
+    # Container backends already sandbox the child — no-op approve.
+    assert A.check_execute_code_guard("import os", "docker")["approved"] is True
+
+
+def test_guard_headless_local_approved(monkeypatch):
+    # Documented #30882 limitation: no approval surface → preserve auto-run.
+    monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
+    monkeypatch.delenv("HERMES_INTERACTIVE", raising=False)
+    monkeypatch.delenv("HERMES_CRON_SESSION", raising=False)
+    monkeypatch.delenv("HERMES_EXEC_ASK", raising=False)
+    monkeypatch.setattr(A, "_get_approval_mode", lambda: "manual")
+    assert A.check_execute_code_guard("import os", "local")["approved"] is True
+
+
+def test_guard_cron_deny_blocks(monkeypatch):
+    monkeypatch.setenv("HERMES_CRON_SESSION", "1")
+    monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
+    monkeypatch.setattr(A, "_get_approval_mode", lambda: "manual")
+    monkeypatch.setattr(A, "_get_cron_approval_mode", lambda: "deny")
+    res = A.check_execute_code_guard("import os", "local")
+    assert res["approved"] is False
+    assert res["outcome"] == "blocked"
+
+
+def test_guard_gateway_user_approves_is_one_shot(gw_session):
+    _register_resolver(gw_session, "once")
+    res = A.check_execute_code_guard("import os; print(1)", "local")
+    assert res["approved"] is True
+    assert res.get("user_approved") is True
+    # One-shot: approval must NOT persist to future scripts.
+    assert A.is_approved(gw_session, "execute_code") is False
+
+
+def test_guard_gateway_user_denies_blocks(gw_session):
+    _register_resolver(gw_session, "deny")
+    res = A.check_execute_code_guard("import os", "local")
+    assert res["approved"] is False
+    assert res["outcome"] == "denied"
+    assert res["user_consent"] is False
+
+
+def test_guard_gateway_timeout_blocks(gw_session, monkeypatch):
+    # Register a callback that never resolves; force an immediate timeout.
+    with A._lock:
+        A._gateway_notify_cbs[gw_session] = lambda _d: None
+    monkeypatch.setattr(A, "_get_approval_config", lambda: {"gateway_timeout": 0})
+    res = A.check_execute_code_guard("import os", "local")
+    assert res["approved"] is False
+    assert res["outcome"] == "timeout"
+
+
+def test_guard_gateway_missing_notify_is_pending(gw_session):
+    # No notify callback registered → backward-compat pending approval.
+    res = A.check_execute_code_guard("import os", "local")
+    assert res["approved"] is False
+    assert res["status"] == "pending_approval"
+
+
+def test_guard_smart_mode(gw_session, monkeypatch):
+    monkeypatch.setattr(A, "_get_approval_mode", lambda: "smart")
+
+    monkeypatch.setattr(A, "_smart_approve", lambda c, d: "approve")
+    res = A.check_execute_code_guard("import os", "local")
+    assert res["approved"] is True and res.get("smart_approved") is True
+
+    monkeypatch.setattr(A, "_smart_approve", lambda c, d: "deny")
+    res = A.check_execute_code_guard("import os", "local")
+    assert res["approved"] is False and res.get("smart_denied") is True
+
+    # escalate → falls through to manual gateway approval
+    monkeypatch.setattr(A, "_smart_approve", lambda c, d: "escalate")
+    _register_resolver(gw_session, "once")
+    res = A.check_execute_code_guard("import os", "local")
+    assert res["approved"] is True
+
+
+def test_guard_session_yolo_bypasses(gw_session):
+    A.enable_session_yolo(gw_session)
+    try:
+        # Even with a denier registered, yolo short-circuits before the prompt.
+        _register_resolver(gw_session, "deny")
+        assert A.check_execute_code_guard("import os", "local")["approved"] is True
+    finally:
+        A.disable_session_yolo(gw_session)
+
+
+# ---------------------------------------------------------------------------
+# 4. Env scrubbing (#27303)
+# ---------------------------------------------------------------------------
+
+def test_env_scrub_hermes_allowlist_and_secret_blocks():
+    from tools.code_execution_tool import _scrub_child_env
+
+    env = {
+        # operational allowlist → kept
+        "HERMES_HOME": "/h", "HERMES_PROFILE": "p",
+        "HERMES_CONFIG": "/c.yaml", "HERMES_ENV": "/e",
+        # other HERMES_* → dropped (broad prefix removed)
+        "HERMES_BASE_URL": "https://x", "HERMES_INTERACTIVE": "1",
+        "HERMES_KANBAN_DB": "postgres://u:p@h/db",
+        # secret substrings (incl. new DSN/WEBHOOK) → dropped
+        "SENTRY_DSN": "https://a@s.io/1", "SLACK_WEBHOOK": "https://h/x",
+        "OPENAI_API_KEY": "sk", "GITHUB_TOKEN": "ghp",
+        # safe prefix → kept; uncategorized → dropped
+        "PATH": "/usr/bin", "RANDOM_X": "y",
+    }
+    out = _scrub_child_env(env, is_passthrough=lambda _: False, is_windows=False)
+
+    for kept in ("HERMES_HOME", "HERMES_PROFILE", "HERMES_CONFIG", "HERMES_ENV", "PATH"):
+        assert kept in out, f"{kept} should be kept"
+    for dropped in (
+        "HERMES_BASE_URL", "HERMES_INTERACTIVE", "HERMES_KANBAN_DB",
+        "SENTRY_DSN", "SLACK_WEBHOOK", "OPENAI_API_KEY", "GITHUB_TOKEN",
+        "RANDOM_X",
+    ):
+        assert dropped not in out, f"{dropped} should be dropped"
+
+
+def test_env_scrub_passthrough_overrides_secret_block():
+    """A skill/config-declared passthrough var is an explicit user opt-in and
+    passes even if it matches a secret substring (precedence is intentional)."""
+    from tools.code_execution_tool import _scrub_child_env
+
+    env = {"MY_SERVICE_DSN": "value"}
+    out = _scrub_child_env(env, is_passthrough=lambda k: k == "MY_SERVICE_DSN",
+                           is_windows=False)
+    assert out.get("MY_SERVICE_DSN") == "value"
+
+
+# ---------------------------------------------------------------------------
+# 5. File-tool sensitive-path refusal (security B1)
+# ---------------------------------------------------------------------------
+
+def test_execute_code_entry_blocks_before_spawn_when_guard_denies(monkeypatch, tmp_path):
+    """Behavioral wiring test: execute_code() consults the entry guard and, on
+    denial, returns the block message WITHOUT spawning the child — proven by a
+    marker file the script would create that never appears."""
+    import json
+
+    import tools.code_execution_tool as cet
+    from tools import terminal_tool as TT
+
+    marker = tmp_path / "child-ran.marker"
+    monkeypatch.setenv("HERMES_CRON_SESSION", "1")
+    monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
+    monkeypatch.delenv("HERMES_INTERACTIVE", raising=False)
+    monkeypatch.setattr(A, "_get_approval_mode", lambda: "manual")
+    monkeypatch.setattr(A, "_get_cron_approval_mode", lambda: "deny")
+    monkeypatch.setattr(TT, "_get_env_config", lambda: {"env_type": "local"})
+
+    result = json.loads(
+        cet.execute_code(f"open({str(marker)!r}, 'w').close()", task_id="cluster-t")
+    )
+    assert result["status"] == "error"
+    assert "BLOCKED" in result["error"]
+    assert not marker.exists()  # guard denied before the child was spawned
+
+
+# ---------------------------------------------------------------------------
+# 6. Env-scrub diagnosability mitigation (#27303 follow-up)
+# ---------------------------------------------------------------------------
+
+def test_env_scrub_logs_dropped_hermes_vars(caplog):
+    """Dropping a non-allowlisted, non-secret HERMES_* var must be diagnosable:
+    the scrub emits a one-shot debug log naming the dropped vars and pointing at
+    the env_passthrough opt-in, so the silent behavior change (#27303) doesn't
+    leave users guessing why a sandbox script sees an unset HERMES_* var."""
+    import logging
+
+    from tools.code_execution_tool import _scrub_child_env
+
+    env = {
+        "HERMES_HOME": "/h",          # allowlisted → kept, not logged
+        "HERMES_BASE_URL": "https://x",   # dropped → logged
+        "HERMES_KANBAN_DB": "postgres://u:p@h/db",  # dropped → logged
+        "HERMES_API_KEY": "sk",       # secret → dropped silently (not logged)
+        "PATH": "/usr/bin",           # safe prefix → kept
+    }
+    with caplog.at_level(logging.DEBUG, logger="tools.code_execution_tool"):
+        out = _scrub_child_env(env, is_passthrough=lambda _: False, is_windows=False)
+
+    assert "HERMES_HOME" in out and "PATH" in out
+    assert "HERMES_BASE_URL" not in out and "HERMES_KANBAN_DB" not in out
+
+    msgs = "\n".join(r.getMessage() for r in caplog.records)
+    assert "HERMES_BASE_URL" in msgs and "HERMES_KANBAN_DB" in msgs
+    assert "env_passthrough" in msgs
+    # Secret vars are dropped but must NOT be named in the diagnostic log.
+    assert "HERMES_API_KEY" not in msgs
+
+
+def test_env_scrub_no_log_when_nothing_dropped(caplog):
+    """No diagnostic noise when there are no dropped HERMES_* vars."""
+    import logging
+
+    from tools.code_execution_tool import _scrub_child_env
+
+    with caplog.at_level(logging.DEBUG, logger="tools.code_execution_tool"):
+        _scrub_child_env(
+            {"HERMES_HOME": "/h", "PATH": "/usr/bin"},
+            is_passthrough=lambda _: False,
+            is_windows=False,
+        )
+    assert "dropped" not in "\n".join(r.getMessage() for r in caplog.records)
diff --git a/tests/tools/test_file_operations.py b/tests/tools/test_file_operations.py
index 392e85d8956..f809ea5d912 100644
--- a/tests/tools/test_file_operations.py
+++ b/tests/tools/test_file_operations.py
@@ -8,8 +8,6 @@ from unittest.mock import MagicMock
 
 from tools.file_operations import (
     _is_write_denied,
-    WRITE_DENIED_PATHS,
-    WRITE_DENIED_PREFIXES,
     ReadResult,
     WriteResult,
     PatchResult,
@@ -17,8 +15,6 @@ from tools.file_operations import (
     SearchMatch,
     LintResult,
     ShellFileOperations,
-    BINARY_EXTENSIONS,
-    IMAGE_EXTENSIONS,
     MAX_LINE_LENGTH,
     normalize_read_pagination,
     normalize_search_pagination,
diff --git a/tests/tools/test_file_ops_cwd_tracking.py b/tests/tools/test_file_ops_cwd_tracking.py
index 3b9e6be4c0a..9df366a6e11 100644
--- a/tests/tools/test_file_ops_cwd_tracking.py
+++ b/tests/tools/test_file_ops_cwd_tracking.py
@@ -17,10 +17,7 @@ Fix: _exec() now prefers the LIVE ``env.cwd`` over the init-time
 
 from __future__ import annotations
 
-import os
-import tempfile
 
-import pytest
 
 from tools.file_operations import ShellFileOperations
 
diff --git a/tests/tools/test_file_read_guards.py b/tests/tools/test_file_read_guards.py
index ca44f6c3eb4..fbe09f360bc 100644
--- a/tests/tools/test_file_read_guards.py
+++ b/tests/tools/test_file_read_guards.py
@@ -21,7 +21,6 @@ from tools.file_tools import (
     _is_blocked_device,
     _invalidate_dedup_for_path,
     _READ_DEDUP_STATUS_MESSAGE,
-    _get_max_read_chars,
     _DEFAULT_MAX_READ_CHARS,
     _read_tracker,
     notify_other_tool_call,
diff --git a/tests/tools/test_file_sync_back.py b/tests/tools/test_file_sync_back.py
index 9c9da7dc502..a429b3a90da 100644
--- a/tests/tools/test_file_sync_back.py
+++ b/tests/tools/test_file_sync_back.py
@@ -5,9 +5,8 @@ import logging
 import os
 import signal
 import tarfile
-import time
 from pathlib import Path
-from unittest.mock import MagicMock, call, patch
+from unittest.mock import MagicMock, patch
 
 import pytest
 
diff --git a/tests/tools/test_file_tools.py b/tests/tools/test_file_tools.py
index 2ef8411094a..ac28e41ce89 100644
--- a/tests/tools/test_file_tools.py
+++ b/tests/tools/test_file_tools.py
@@ -9,10 +9,7 @@ import logging
 from unittest.mock import MagicMock, patch
 
 from tools.file_tools import (
-    READ_FILE_SCHEMA,
-    WRITE_FILE_SCHEMA,
     PATCH_SCHEMA,
-    SEARCH_FILES_SCHEMA,
 )
 
 
diff --git a/tests/tools/test_file_tools_live.py b/tests/tools/test_file_tools_live.py
index 6c3500eb88a..641e7dc6a0a 100644
--- a/tests/tools/test_file_tools_live.py
+++ b/tests/tools/test_file_tools_live.py
@@ -13,12 +13,10 @@ import pytest
 
 
 
-import json
 import os
 import sys
 from pathlib import Path
 
-import pytest
 
 sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
 
@@ -388,7 +386,6 @@ class TestExpandPath:
         # The path should be returned as-is (no expansion).
         assert result == malicious
         # Verify the injected command did NOT execute
-        import os
         assert not os.path.exists("/tmp/_hermes_injection_test")
 
     def test_tilde_username_with_subpath(self, ops):
diff --git a/tests/tools/test_fuzzy_match.py b/tests/tools/test_fuzzy_match.py
index b4e3640e2bd..f81d0437434 100644
--- a/tests/tools/test_fuzzy_match.py
+++ b/tests/tools/test_fuzzy_match.py
@@ -429,3 +429,118 @@ class TestFormatNoMatchHint:
         )
         assert result == ""
 
+
+class TestEscapeNormalizedNewString:
+    """Regression tests for unescaping common sequences in new_string when
+    the matched region of the file contains real control characters.
+
+    Issue #33733: LLMs overwhelmingly represent tabs as the two-character
+    sequence ``\\t`` (backslash + t) in JSON tool-call arguments. When the
+    file already contains real tab bytes (0x09), writing new_string
+    verbatim leaves literal ``\\t`` characters and corrupts the file.
+
+    The fix unescapes ``\\t`` -> tab and ``\\r`` -> CR in new_string when
+    the matched file region actually contains those control characters,
+    regardless of which match strategy fired. ``\\n`` is excluded because
+    newlines serialize correctly through JSON.
+    """
+
+    def test_tab_in_new_string_unescaped_under_escape_normalized(self):
+        """File has real tab, model sends literal \\t in BOTH old and new.
+
+        Match strategy is ``escape_normalized``.
+        """
+        content = "def hello():\n\tprint(\"before\")\n"
+        old_string = "def hello():\n\\tprint(\"before\")\n"
+        new_string = "def hello():\n\\tprint(\"after\")\n"
+        new, count, strategy, err = fuzzy_find_and_replace(content, old_string, new_string)
+        assert err is None, f"Unexpected error: {err}"
+        assert count == 1
+        assert strategy == "escape_normalized"
+        assert "\tprint(\"after\")" in new
+        assert "\\t" not in new
+
+    def test_tab_in_new_string_unescaped_under_exact(self):
+        """File has real tab, old_string has real tab too (matches via
+        ``exact``), but new_string still arrives with literal ``\\t``.
+
+        This is the issue's headline reproduction — the previous fix that
+        gated on ``strategy_name == "escape_normalized"`` missed this case.
+        """
+        content = "def hello():\n\tprint(\"before\")\n"
+        old_string = "\tprint(\"before\")"           # real tab
+        new_string = "\\tprint(\"after\")"           # literal backslash + t
+        new, count, strategy, err = fuzzy_find_and_replace(content, old_string, new_string)
+        assert err is None, f"Unexpected error: {err}"
+        assert count == 1
+        assert strategy == "exact"
+        assert "\tprint(\"after\")" in new
+        assert "\\t" not in new
+
+    def test_carriage_return_in_new_string_unescaped(self):
+        """File has real CR, model sends literal \\r in new_string."""
+        content = "line1\r\nline2\r\n"
+        old_string = "line1\\r\\nline2\\r\\n"
+        new_string = "replaced\\r\\n"
+        new, count, strategy, err = fuzzy_find_and_replace(content, old_string, new_string)
+        assert err is None, f"Unexpected error: {err}"
+        assert count == 1
+        assert strategy == "escape_normalized"
+        assert "replaced\r" in new
+
+    def test_newline_in_new_string_NOT_unescaped(self):
+        """``\\n`` is intentionally left alone — newlines serialize correctly
+        through JSON, and unescaping would corrupt source-code escape
+        sequences far more often than help.
+        """
+        content = "line1\nline2\n"
+        old_string = "line1\nline2"
+        new_string = "alpha\\nbeta"                 # literal backslash + n
+        new, count, _, err = fuzzy_find_and_replace(content, old_string, new_string)
+        assert err is None, f"Unexpected error: {err}"
+        assert count == 1
+        # The literal two-character sequence ``\n`` must survive verbatim.
+        assert "alpha\\nbeta" in new
+        # And there should be no real newline added where ``\\n`` sat.
+        assert "alpha\nbeta" not in new
+
+    def test_mixed_tab_and_newline_only_tab_unescaped(self):
+        """When new_string contains both \\t and \\n, only \\t is converted."""
+        content = "def foo():\n\tpass\n"
+        old_string = "def foo():\n\tpass\n"
+        new_string = "def bar():\\n\\treturn 1\\n"
+        new, count, _, err = fuzzy_find_and_replace(content, old_string, new_string)
+        assert err is None, f"Unexpected error: {err}"
+        assert count == 1
+        # \t -> real tab
+        assert "\treturn 1" in new
+        assert "\\t" not in new
+        # \n preserved as literal backslash-n
+        assert "\\n" in new
+
+    def test_exact_match_preserves_literal_backslash_t_in_string_literal(self):
+        """If the matched region of the file does NOT contain a real tab,
+        new_string's literal ``\\t`` is preserved — the file genuinely uses
+        a backslash-t sequence (e.g. a Python source line ``sep = "\\t"``).
+        """
+        content = 'sep = "\\t"\n'                   # source contains backslash + t
+        old_string = 'sep = "\\t"\n'
+        new_string = 'sep = "\\tab"\n'              # still backslash + t literal
+        new, count, strategy, err = fuzzy_find_and_replace(content, old_string, new_string)
+        assert err is None, f"Unexpected error: {err}"
+        assert count == 1
+        assert strategy == "exact"
+        # File still has the literal two-char ``\t`` — no tab byte injected.
+        assert 'sep = "\\tab"' in new
+        assert "\t" not in new
+
+    def test_no_escape_sequences_passthrough(self):
+        """When new_string has no \\t or \\r, the helper is a no-op."""
+        content = "def foo():\n    return 1\n"
+        old_string = "def foo():\n    return 1\n"
+        new_string = "def foo():\n    return 2\n"
+        new, count, _, err = fuzzy_find_and_replace(content, old_string, new_string)
+        assert err is None
+        assert count == 1
+        assert "return 2" in new
+
diff --git a/tests/tools/test_hardline_blocklist.py b/tests/tools/test_hardline_blocklist.py
index 109badd90fe..8d8062139b8 100644
--- a/tests/tools/test_hardline_blocklist.py
+++ b/tests/tools/test_hardline_blocklist.py
@@ -6,12 +6,10 @@ gateway /yolo, approvals.mode=off, or cron approve mode.
 
 Inspired by Mercury Agent's permission-hardened blocklist.
 """
-import os
 
 import pytest
 
 from tools.approval import (
-    DANGEROUS_PATTERNS,
     HARDLINE_PATTERNS,
     check_all_command_guards,
     check_dangerous_command,
diff --git a/tests/tools/test_heartbeat_stale_thresholds.py b/tests/tools/test_heartbeat_stale_thresholds.py
index fb7db68efb9..34a9e59ef20 100644
--- a/tests/tools/test_heartbeat_stale_thresholds.py
+++ b/tests/tools/test_heartbeat_stale_thresholds.py
@@ -1,6 +1,5 @@
 """Tests for delegate heartbeat stale threshold configuration."""
 
-import pytest
 
 
 class TestHeartbeatStaleThresholds:
diff --git a/tests/tools/test_hidden_dir_filter.py b/tests/tools/test_hidden_dir_filter.py
index c7757864f74..c72a8fab6bc 100644
--- a/tests/tools/test_hidden_dir_filter.py
+++ b/tests/tools/test_hidden_dir_filter.py
@@ -7,8 +7,7 @@ This caused quarantined skills (.hub/quarantine/) to appear as installed.
 Now uses Path.parts which is platform-independent.
 """
 
-import os
-from pathlib import Path, PurePosixPath, PureWindowsPath
+from pathlib import Path
 
 
 def _old_filter_matches(path_str: str) -> bool:
diff --git a/tests/tools/test_interrupt.py b/tests/tools/test_interrupt.py
index 61a898ac38f..27c61023147 100644
--- a/tests/tools/test_interrupt.py
+++ b/tests/tools/test_interrupt.py
@@ -65,7 +65,7 @@ class TestPreToolCheck:
 
     def test_all_tools_skipped_when_interrupted(self):
         """Mock an interrupted agent and verify no tools execute."""
-        from unittest.mock import MagicMock, patch
+        from unittest.mock import MagicMock
 
         # Build a fake assistant_message with 3 tool calls
         tc1 = MagicMock()
diff --git a/tests/tools/test_kanban_tools.py b/tests/tools/test_kanban_tools.py
index 3fc709d38de..24fa09d8ba2 100644
--- a/tests/tools/test_kanban_tools.py
+++ b/tests/tools/test_kanban_tools.py
@@ -1338,6 +1338,7 @@ def test_worker_complete_rejects_stale_run_id(worker_env, monkeypatch):
     try:
         run1 = kb.latest_run(conn, worker_env)
         kb._set_worker_pid(conn, worker_env, 98765)
+        monkeypatch.setenv("HERMES_KANBAN_CRASH_GRACE_SECONDS", "0")
         monkeypatch.setattr(_kb, "_pid_alive", lambda pid: False)
         assert kb.detect_crashed_workers(conn) == [worker_env]
 
diff --git a/tests/tools/test_lazy_deps.py b/tests/tools/test_lazy_deps.py
index 714c5995eaa..028ef0771e3 100644
--- a/tests/tools/test_lazy_deps.py
+++ b/tests/tools/test_lazy_deps.py
@@ -12,7 +12,6 @@ call is mocked — we never actually shell out during unit tests.
 
 from __future__ import annotations
 
-from typing import Iterator
 
 import pytest
 
diff --git a/tests/tools/test_line_ending_preservation.py b/tests/tools/test_line_ending_preservation.py
index 82c055cb810..902b41e5fa2 100644
--- a/tests/tools/test_line_ending_preservation.py
+++ b/tests/tools/test_line_ending_preservation.py
@@ -9,8 +9,6 @@ See issue #507 (Roo Code deep-dive, item 2c).
 """
 
 import json
-import os
-import tempfile
 
 import pytest
 
diff --git a/tests/tools/test_llm_content_none_guard.py b/tests/tools/test_llm_content_none_guard.py
index 5ecdc725d7d..f18101e8273 100644
--- a/tests/tools/test_llm_content_none_guard.py
+++ b/tests/tools/test_llm_content_none_guard.py
@@ -12,7 +12,6 @@ reasoning fields when content is empty.
 
 import asyncio
 import types
-from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
 
diff --git a/tests/tools/test_local_background_child_hang.py b/tests/tools/test_local_background_child_hang.py
index a8cc0ba1024..2ed8c575c69 100644
--- a/tests/tools/test_local_background_child_hang.py
+++ b/tests/tools/test_local_background_child_hang.py
@@ -10,7 +10,6 @@ of the backgrounded service (indefinitely for a uvicorn server).
 The fix switches ``_drain()`` to select()-based non-blocking reads and
 stops draining shortly after bash exits even if the pipe hasn't EOF'd.
 """
-import json
 import subprocess
 import time
 
diff --git a/tests/tools/test_local_env_blocklist.py b/tests/tools/test_local_env_blocklist.py
index 0377d59b361..0e0520387e1 100644
--- a/tests/tools/test_local_env_blocklist.py
+++ b/tests/tools/test_local_env_blocklist.py
@@ -93,6 +93,59 @@ class TestProviderEnvBlocklist:
         for var in registry_vars:
             assert var not in result_env, f"{var} leaked into subprocess env"
 
+    def test_bedrock_bearer_token_is_stripped(self):
+        """The Bedrock-specific bearer token is a Hermes inference secret
+        (analogous to OPENAI_API_KEY) and must not leak into subprocesses.
+
+        Regression for #32314: AWS_BEARER_TOKEN_BEDROCK leaked into terminal /
+        execute_code children because the ``bedrock`` ProviderConfig declares
+        ``api_key_env_vars=()`` (auth_type="aws_sdk") and the blocklist builder
+        only consulted that field. The reporter caught it when ``opencode
+        models`` run inside a Hermes terminal enumerated the entire Bedrock
+        catalog off the leaked bearer token.
+        """
+        result_env = _run_with_env(extra_os_env={
+            "AWS_BEARER_TOKEN_BEDROCK": "bedrock-bearer-secret",
+        })
+
+        assert "AWS_BEARER_TOKEN_BEDROCK" not in result_env, (
+            "AWS_BEARER_TOKEN_BEDROCK leaked into subprocess env (see #32314)"
+        )
+
+    def test_general_aws_credential_chain_is_preserved(self):
+        """The GENERAL AWS credential chain must STILL pass through to
+        subprocesses — this is the no-regression guard for #32314.
+
+        Per SECURITY.md §3.2 the local terminal is the user's trusted operator
+        shell. A user running ``aws``/``terraform``/``cdk``/``boto3`` in the
+        agent terminal must keep the same AWS access their own shell has.
+        Stripping these would (a) break every user who does AWS work in the
+        agent terminal — not just Bedrock users, since the registry is iterated
+        unconditionally — and (b) be unrecoverable, because env_passthrough.py
+        refuses to re-allow anything in _HERMES_PROVIDER_ENV_BLOCKLIST
+        (GHSA-rhgp-j443-p4rf). Only the Bedrock inference bearer token is
+        Hermes-managed; the rest belongs to the user.
+        """
+        general_chain = {
+            "AWS_ACCESS_KEY_ID": "AKIAIOSFODNN7EXAMPLE",
+            "AWS_SECRET_ACCESS_KEY": "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
+            "AWS_SESSION_TOKEN": "session-token",
+            "AWS_PROFILE": "production",
+            "AWS_DEFAULT_REGION": "us-east-1",
+            "AWS_REGION": "us-east-1",
+            "AWS_SHARED_CREDENTIALS_FILE": "/home/user/.aws/credentials",
+            "AWS_CONFIG_FILE": "/home/user/.aws/config",
+            "AWS_WEB_IDENTITY_TOKEN_FILE": "/var/run/secrets/token",
+            "AWS_ROLE_ARN": "arn:aws:iam::123456789012:role/example",
+        }
+        result_env = _run_with_env(extra_os_env=general_chain)
+
+        for var, value in general_chain.items():
+            assert result_env.get(var) == value, (
+                f"{var} was stripped from subprocess env — this is a "
+                f"capability regression (see #32314 discussion)"
+            )
+
     def test_non_registry_provider_vars_are_stripped(self):
         """Extra provider vars not in PROVIDER_REGISTRY must also be blocked."""
         extra_provider_vars = {
@@ -213,6 +266,36 @@ class TestBlocklistCoverage:
                     f"(provider={pconfig.id}) missing from blocklist"
                 )
 
+    def test_bedrock_bearer_token_is_in_blocklist(self):
+        """auth_type='aws_sdk' providers contribute their Hermes-managed
+        inference token (the Bedrock bearer) to the blocklist, keyed off
+        auth_type so any future SDK-cred provider is covered automatically."""
+        assert "AWS_BEARER_TOKEN_BEDROCK" in _HERMES_PROVIDER_ENV_BLOCKLIST
+
+    def test_general_aws_chain_not_in_blocklist(self):
+        """The general AWS credential chain must NOT be in the blocklist —
+        no-regression guard for #32314. These belong to the user's trusted
+        operator shell (SECURITY.md §3.2), not to Hermes, and blocklisting
+        them would be unrecoverable via env_passthrough (GHSA-rhgp-j443-p4rf).
+        """
+        general_chain = {
+            "AWS_ACCESS_KEY_ID",
+            "AWS_SECRET_ACCESS_KEY",
+            "AWS_SESSION_TOKEN",
+            "AWS_PROFILE",
+            "AWS_DEFAULT_REGION",
+            "AWS_REGION",
+            "AWS_SHARED_CREDENTIALS_FILE",
+            "AWS_CONFIG_FILE",
+            "AWS_WEB_IDENTITY_TOKEN_FILE",
+            "AWS_ROLE_ARN",
+        }
+        leaked_block = general_chain & _HERMES_PROVIDER_ENV_BLOCKLIST
+        assert not leaked_block, (
+            f"General AWS chain vars must stay inheritable, but these are "
+            f"blocklisted: {sorted(leaked_block)} (capability regression, #32314)"
+        )
+
     def test_extra_auth_vars_covered(self):
         """Non-registry auth vars (ANTHROPIC_TOKEN, CLAUDE_CODE_OAUTH_TOKEN)
         must also be in the blocklist."""
diff --git a/tests/tools/test_local_env_windows_msys.py b/tests/tools/test_local_env_windows_msys.py
index 6987c965af6..529e8b2f2ae 100644
--- a/tests/tools/test_local_env_windows_msys.py
+++ b/tests/tools/test_local_env_windows_msys.py
@@ -18,10 +18,8 @@ and ``os.path.isdir`` so the MSYS path tests as "missing" exactly like
 on the real OS.
 """
 
-import os
 from unittest.mock import patch
 
-import pytest
 
 from tools.environments import local as local_mod
 from tools.environments.local import (
diff --git a/tests/tools/test_local_interrupt_cleanup.py b/tests/tools/test_local_interrupt_cleanup.py
index 67d9e9e6b54..73b7c76dcb8 100644
--- a/tests/tools/test_local_interrupt_cleanup.py
+++ b/tests/tools/test_local_interrupt_cleanup.py
@@ -161,7 +161,6 @@ def test_wait_for_process_kills_subprocess_on_keyboardinterrupt():
         # way CPython's signal machinery would.  We use ctypes.PyThreadState_SetAsyncExc
         # which is how signal delivery to non-main threads is simulated.
         import ctypes
-        import sys as _sys
         # py-thread-state exception targets need the ident, not the Thread
         tid = t.ident
         assert tid is not None
diff --git a/tests/tools/test_local_shell_init.py b/tests/tools/test_local_shell_init.py
index 7dabaadf12a..1bdaeeeb67a 100644
--- a/tests/tools/test_local_shell_init.py
+++ b/tests/tools/test_local_shell_init.py
@@ -14,7 +14,6 @@ import pytest
 from tools.environments.local import (
     LocalEnvironment,
     _prepend_shell_init,
-    _read_terminal_shell_init_config,
     _resolve_shell_init_files,
 )
 
diff --git a/tests/tools/test_managed_browserbase_and_modal.py b/tests/tools/test_managed_browserbase_and_modal.py
index d88789706ba..fc2559dc756 100644
--- a/tests/tools/test_managed_browserbase_and_modal.py
+++ b/tests/tools/test_managed_browserbase_and_modal.py
@@ -9,6 +9,8 @@ from unittest.mock import patch
 
 import pytest
 
+from hermes_cli.nous_account import NousPortalAccountInfo
+
 
 REPO_ROOT = Path(__file__).resolve().parents[2]
 TOOLS_DIR = REPO_ROOT / "tools"
@@ -69,10 +71,17 @@ def _enable_managed_nous_tools(monkeypatch):
     The _install_fake_tools_package() helper resets and reimports tool modules,
     so a simple monkeypatch on tool_backend_helpers doesn't survive.  We patch
     the *source* modules that the reimported modules will import from — both
-    hermes_cli.auth and hermes_cli.models — so the function body returns True.
+    hermes_cli.nous_account — so the function body returns True.
     """
-    monkeypatch.setattr("hermes_cli.auth.get_nous_auth_status", lambda: {"logged_in": True})
-    monkeypatch.setattr("hermes_cli.models.check_nous_free_tier", lambda: False)
+    monkeypatch.setattr(
+        "hermes_cli.nous_account.get_nous_portal_account_info",
+        lambda: NousPortalAccountInfo(
+            logged_in=True,
+            source="jwt",
+            fresh=False,
+            paid_service_access=True,
+        ),
+    )
 
 
 def _install_fake_tools_package():
diff --git a/tests/tools/test_managed_media_gateways.py b/tests/tools/test_managed_media_gateways.py
index 4468dfe94d7..d8b60d1644d 100644
--- a/tests/tools/test_managed_media_gateways.py
+++ b/tests/tools/test_managed_media_gateways.py
@@ -5,6 +5,8 @@ from pathlib import Path
 
 import pytest
 
+from hermes_cli.nous_account import NousPortalAccountInfo
+
 
 TOOLS_DIR = Path(__file__).resolve().parents[2] / "tools"
 
@@ -48,8 +50,15 @@ def _restore_tool_and_agent_modules():
 def _enable_managed_nous_tools(monkeypatch):
     """Patch the source modules so managed_nous_tools_enabled() returns True
     even after tool modules are dynamically reloaded."""
-    monkeypatch.setattr("hermes_cli.auth.get_nous_auth_status", lambda: {"logged_in": True})
-    monkeypatch.setattr("hermes_cli.models.check_nous_free_tier", lambda: False)
+    monkeypatch.setattr(
+        "hermes_cli.nous_account.get_nous_portal_account_info",
+        lambda: NousPortalAccountInfo(
+            logged_in=True,
+            source="jwt",
+            fresh=False,
+            paid_service_access=True,
+        ),
+    )
 
 
 def _install_fake_tools_package():
@@ -296,3 +305,214 @@ def test_transcription_uses_model_specific_response_formats(monkeypatch, tmp_pat
     assert json_result["transcript"] == "hello from gpt-4o"
     assert json_capture["transcription_kwargs"]["response_format"] == "json"
     assert json_capture["close_calls"] == 1
+
+
+PLUGINS_DIR = Path(__file__).resolve().parents[2] / "plugins"
+
+
+def _load_video_gen_plugin(monkeypatch):
+    """Load the FAL video gen plugin in isolation."""
+    _install_fake_tools_package()
+
+    # Also need the agent.video_gen_provider ABC
+    agent_dir = Path(__file__).resolve().parents[2] / "agent"
+    spec = spec_from_file_location(
+        "agent.video_gen_provider",
+        agent_dir / "video_gen_provider.py",
+    )
+    assert spec and spec.loader
+    mod = module_from_spec(spec)
+    sys.modules["agent.video_gen_provider"] = mod
+    spec.loader.exec_module(mod)
+
+    # Load the plugin
+    plugin_init = PLUGINS_DIR / "video_gen" / "fal" / "__init__.py"
+    spec = spec_from_file_location("plugins.video_gen.fal", plugin_init)
+    assert spec and spec.loader
+    plugin_mod = module_from_spec(spec)
+    sys.modules["plugins.video_gen.fal"] = plugin_mod
+    spec.loader.exec_module(plugin_mod)
+    return plugin_mod
+
+
+def test_video_gen_managed_fal_submit_uses_gateway(monkeypatch):
+    """Video gen routes through the managed gateway when FAL_KEY is absent."""
+    captured = {}
+    fake_fal = _install_fake_fal_client(captured)
+    monkeypatch.delenv("FAL_KEY", raising=False)
+    monkeypatch.setenv("FAL_QUEUE_GATEWAY_URL", "http://127.0.0.1:3009")
+    monkeypatch.setenv("TOOL_GATEWAY_USER_TOKEN", "nous-video-token")
+
+    plugin = _load_video_gen_plugin(monkeypatch)
+
+    # Patch uuid for deterministic idempotency key
+    monkeypatch.setattr(plugin.uuid, "uuid4", lambda: "video-submit-456")
+
+    plugin._submit_fal_video_request(
+        "fal-ai/pixverse/v6/text-to-video",
+        {"prompt": "a cat riding a bicycle", "duration": "5"},
+    )
+
+    assert captured["submit_via"] == "managed_client"
+    assert captured["client_key"] == "nous-video-token"
+    assert captured["submit_url"] == "http://127.0.0.1:3009/fal-ai/pixverse/v6/text-to-video"
+    assert captured["method"] == "POST"
+    assert captured["arguments"] == {"prompt": "a cat riding a bicycle", "duration": "5"}
+    assert captured["headers"] == {"x-idempotency-key": "video-submit-456"}
+    assert captured["sync_client_inits"] == 1
+
+
+def test_video_gen_managed_client_reused_across_calls(monkeypatch):
+    """The managed video client is cached and reused across requests."""
+    captured = {}
+    _install_fake_fal_client(captured)
+    monkeypatch.delenv("FAL_KEY", raising=False)
+    monkeypatch.setenv("FAL_QUEUE_GATEWAY_URL", "http://127.0.0.1:3009")
+    monkeypatch.setenv("TOOL_GATEWAY_USER_TOKEN", "nous-video-token")
+
+    plugin = _load_video_gen_plugin(monkeypatch)
+
+    plugin._submit_fal_video_request("fal-ai/pixverse/v6/text-to-video", {"prompt": "first"})
+    first_client = captured["http_client"]
+    plugin._submit_fal_video_request("fal-ai/pixverse/v6/text-to-video", {"prompt": "second"})
+
+    assert captured["sync_client_inits"] == 1
+    assert captured["http_client"] is first_client
+
+
+def test_video_gen_direct_mode_when_fal_key_set(monkeypatch):
+    """When FAL_KEY is set and gateway not preferred, uses direct fal_client.submit."""
+    captured = {}
+    _install_fake_fal_client(captured)
+    monkeypatch.setenv("FAL_KEY", "direct-fal-key-123")
+    monkeypatch.delenv("FAL_QUEUE_GATEWAY_URL", raising=False)
+    monkeypatch.delenv("TOOL_GATEWAY_USER_TOKEN", raising=False)
+
+    plugin = _load_video_gen_plugin(monkeypatch)
+    monkeypatch.setattr(plugin.uuid, "uuid4", lambda: "direct-456")
+
+    # Trigger the lazy load so _fal_client is populated from our fake
+    plugin._load_fal_client()
+
+    # In direct mode, fal_client.submit is the module-level function.
+    # Our fake raises AssertionError from the managed path, so we need
+    # to patch it to actually capture the call.
+    direct_captured = {}
+
+    def direct_submit(endpoint, arguments=None, headers=None):
+        direct_captured["endpoint"] = endpoint
+        direct_captured["arguments"] = arguments
+        direct_captured["headers"] = headers
+        # Return a mock handle
+        class FakeHandle:
+            def get(self):
+                return {"video": {"url": "https://fal.media/result.mp4"}}
+        return FakeHandle()
+
+    plugin._fal_client.submit = direct_submit
+
+    plugin._submit_fal_video_request(
+        "fal-ai/pixverse/v6/text-to-video",
+        {"prompt": "test direct"},
+    )
+
+    assert direct_captured["endpoint"] == "fal-ai/pixverse/v6/text-to-video"
+    assert direct_captured["arguments"] == {"prompt": "test direct"}
+    assert direct_captured["headers"] == {"x-idempotency-key": "direct-456"}
+    # Managed client should NOT have been initialized
+    assert "submit_via" not in captured
+
+
+def test_video_gen_gateway_4xx_raises_actionable_valueerror(monkeypatch):
+    """A 4xx from the managed gateway surfaces a clear ValueError with remediation hints."""
+    captured = {}
+    _install_fake_fal_client(captured)
+    monkeypatch.delenv("FAL_KEY", raising=False)
+    monkeypatch.setenv("FAL_QUEUE_GATEWAY_URL", "http://127.0.0.1:3009")
+    monkeypatch.setenv("TOOL_GATEWAY_USER_TOKEN", "nous-video-token")
+
+    plugin = _load_video_gen_plugin(monkeypatch)
+
+    # Make _maybe_retry_request raise an exception with a 403 status
+    class FakeResponse:
+        status_code = 403
+
+    class GatewayRejectError(Exception):
+        def __init__(self):
+            super().__init__("forbidden")
+            self.response = FakeResponse()
+
+    original_retry = sys.modules["fal_client"].client._maybe_retry_request
+
+    def raising_retry(client, method, url, json=None, timeout=None, headers=None):
+        raise GatewayRejectError()
+
+    sys.modules["fal_client"].client._maybe_retry_request = raising_retry
+
+    with pytest.raises(ValueError, match=r"gateway rejected endpoint.*HTTP 403"):
+        plugin._submit_fal_video_request(
+            "fal-ai/pixverse/v6/text-to-video",
+            {"prompt": "test 4xx"},
+        )
+
+
+def test_video_gen_is_available_true_via_gateway(monkeypatch):
+    """is_available() returns True when FAL_KEY is absent but managed gateway is configured."""
+    _install_fake_fal_client({})
+    monkeypatch.delenv("FAL_KEY", raising=False)
+    monkeypatch.setenv("FAL_QUEUE_GATEWAY_URL", "http://127.0.0.1:3009")
+    monkeypatch.setenv("TOOL_GATEWAY_USER_TOKEN", "nous-video-token")
+
+    plugin = _load_video_gen_plugin(monkeypatch)
+    provider = plugin.FALVideoGenProvider()
+    assert provider.is_available() is True
+
+
+def test_video_gen_prefers_gateway_overrides_direct_key(monkeypatch):
+    """When FAL_KEY is set but prefers_gateway('video_gen') is True, routes through gateway."""
+    captured = {}
+    _install_fake_fal_client(captured)
+    monkeypatch.setenv("FAL_KEY", "direct-key-present")
+    monkeypatch.setenv("FAL_QUEUE_GATEWAY_URL", "http://127.0.0.1:3009")
+    monkeypatch.setenv("TOOL_GATEWAY_USER_TOKEN", "nous-video-token")
+
+    plugin = _load_video_gen_plugin(monkeypatch)
+
+    # Patch prefers_gateway to return True for video_gen
+    tb_helpers = sys.modules["tools.tool_backend_helpers"]
+    original_pg = tb_helpers.prefers_gateway
+    monkeypatch.setattr(tb_helpers, "prefers_gateway", lambda section: section == "video_gen")
+
+    plugin._submit_fal_video_request(
+        "fal-ai/pixverse/v6/text-to-video",
+        {"prompt": "gateway preferred"},
+    )
+
+    assert captured["submit_via"] == "managed_client"
+    assert captured["client_key"] == "nous-video-token"
+
+
+def test_video_gen_happy_horse_uses_alibaba_namespace():
+    """Verify the happy-horse family uses alibaba/ not fal-ai/ endpoints."""
+    _install_fake_tools_package()
+
+    # Load just the plugin module to check the catalog
+    plugin_init = PLUGINS_DIR / "video_gen" / "fal" / "__init__.py"
+
+    agent_dir = Path(__file__).resolve().parents[2] / "agent"
+    spec = spec_from_file_location(
+        "agent.video_gen_provider",
+        agent_dir / "video_gen_provider.py",
+    )
+    mod = module_from_spec(spec)
+    sys.modules["agent.video_gen_provider"] = mod
+    spec.loader.exec_module(mod)
+
+    spec = spec_from_file_location("plugins.video_gen.fal", plugin_init)
+    plugin_mod = module_from_spec(spec)
+    sys.modules["plugins.video_gen.fal"] = plugin_mod
+    spec.loader.exec_module(plugin_mod)
+
+    hh = plugin_mod.FAL_FAMILIES["happy-horse"]
+    assert hh["text_endpoint"] == "alibaba/happy-horse/text-to-video"
+    assert hh["image_endpoint"] == "alibaba/happy-horse/image-to-video"
diff --git a/tests/tools/test_managed_modal_environment.py b/tests/tools/test_managed_modal_environment.py
index 8380e49058c..ccf00ca612a 100644
--- a/tests/tools/test_managed_modal_environment.py
+++ b/tests/tools/test_managed_modal_environment.py
@@ -1,4 +1,3 @@
-import json
 import sys
 import tempfile
 import threading
diff --git a/tests/tools/test_mcp_cancelled_error_propagation.py b/tests/tools/test_mcp_cancelled_error_propagation.py
index c0e91f31531..13636c3caac 100644
--- a/tests/tools/test_mcp_cancelled_error_propagation.py
+++ b/tests/tools/test_mcp_cancelled_error_propagation.py
@@ -20,7 +20,6 @@ from __future__ import annotations
 import asyncio
 from unittest.mock import patch
 
-import pytest
 
 
 async def _hanging_run(self, cfg):
diff --git a/tests/tools/test_mcp_client_cert.py b/tests/tools/test_mcp_client_cert.py
new file mode 100644
index 00000000000..57ffe8ad723
--- /dev/null
+++ b/tests/tools/test_mcp_client_cert.py
@@ -0,0 +1,521 @@
+"""Tests for mTLS client certificate config on MCP HTTP/SSE transports.
+
+Covers:
+
+1. ``_resolve_client_cert`` helper — string, tuple, encrypted-key, validation
+   errors, missing-file errors.
+
+2. HTTP (new SDK ``streamable_http_client``) path forwards ``cert=`` into the
+   user-owned ``httpx.AsyncClient``.
+
+3. SSE path forwards ``cert`` and ``ssl_verify`` via an ``httpx_client_factory``
+   without breaking the OAuth/headers/timeout passthrough.
+"""
+
+from __future__ import annotations
+
+import asyncio
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+
+# ---------------------------------------------------------------------------
+# _resolve_client_cert helper
+# ---------------------------------------------------------------------------
+
+
+class TestResolveClientCert:
+    def test_returns_none_when_unset(self):
+        from tools.mcp_tool import _resolve_client_cert
+
+        assert _resolve_client_cert("srv", {}) is None
+        assert _resolve_client_cert("srv", {"url": "https://x"}) is None
+
+    def test_string_form_single_pem(self, tmp_path):
+        from tools.mcp_tool import _resolve_client_cert
+
+        pem = tmp_path / "combined.pem"
+        pem.write_text("dummy")
+
+        result = _resolve_client_cert("srv", {"client_cert": str(pem)})
+        assert result == str(pem)
+
+    def test_string_cert_with_separate_key(self, tmp_path):
+        from tools.mcp_tool import _resolve_client_cert
+
+        cert = tmp_path / "client.crt"
+        key = tmp_path / "client.key"
+        cert.write_text("cert")
+        key.write_text("key")
+
+        result = _resolve_client_cert("srv", {
+            "client_cert": str(cert),
+            "client_key": str(key),
+        })
+        assert result == (str(cert), str(key))
+
+    def test_list_form_two_elements(self, tmp_path):
+        from tools.mcp_tool import _resolve_client_cert
+
+        cert = tmp_path / "client.crt"
+        key = tmp_path / "client.key"
+        cert.write_text("cert")
+        key.write_text("key")
+
+        result = _resolve_client_cert("srv", {
+            "client_cert": [str(cert), str(key)],
+        })
+        assert result == (str(cert), str(key))
+
+    def test_list_form_with_passphrase(self, tmp_path):
+        from tools.mcp_tool import _resolve_client_cert
+
+        cert = tmp_path / "client.crt"
+        key = tmp_path / "client.key"
+        cert.write_text("cert")
+        key.write_text("key")
+
+        result = _resolve_client_cert("srv", {
+            "client_cert": [str(cert), str(key), "passphrase"],
+        })
+        assert result == (str(cert), str(key), "passphrase")
+
+    def test_tilde_expansion(self, tmp_path, monkeypatch):
+        from tools.mcp_tool import _resolve_client_cert
+
+        monkeypatch.setenv("HOME", str(tmp_path))
+        pem = tmp_path / "client.pem"
+        pem.write_text("dummy")
+
+        result = _resolve_client_cert("srv", {"client_cert": "~/client.pem"})
+        assert result == str(pem)
+
+    def test_missing_file_raises(self, tmp_path):
+        from tools.mcp_tool import _resolve_client_cert
+
+        with pytest.raises(FileNotFoundError, match=r"srv.*client_cert.*not found"):
+            _resolve_client_cert("srv", {
+                "client_cert": str(tmp_path / "nope.pem"),
+            })
+
+    def test_missing_key_file_raises(self, tmp_path):
+        from tools.mcp_tool import _resolve_client_cert
+
+        cert = tmp_path / "client.crt"
+        cert.write_text("cert")
+
+        with pytest.raises(FileNotFoundError, match=r"srv.*client_key.*not found"):
+            _resolve_client_cert("srv", {
+                "client_cert": str(cert),
+                "client_key": str(tmp_path / "missing.key"),
+            })
+
+    def test_list_with_bad_length_raises(self, tmp_path):
+        from tools.mcp_tool import _resolve_client_cert
+
+        with pytest.raises(ValueError, match=r"list form must have 2 or 3"):
+            _resolve_client_cert("srv", {"client_cert": [str(tmp_path / "x")]})
+
+    def test_list_plus_client_key_rejected(self, tmp_path):
+        from tools.mcp_tool import _resolve_client_cert
+
+        cert = tmp_path / "client.crt"
+        key = tmp_path / "client.key"
+        cert.write_text("cert")
+        key.write_text("key")
+
+        with pytest.raises(ValueError, match=r"either client_cert as a list"):
+            _resolve_client_cert("srv", {
+                "client_cert": [str(cert), str(key)],
+                "client_key": str(key),
+            })
+
+    def test_non_string_path_rejected(self):
+        from tools.mcp_tool import _resolve_client_cert
+
+        with pytest.raises(ValueError, match=r"client_cert must be a non-empty string"):
+            _resolve_client_cert("srv", {"client_cert": 123})
+
+    def test_password_must_be_string(self, tmp_path):
+        from tools.mcp_tool import _resolve_client_cert
+
+        cert = tmp_path / "client.crt"
+        key = tmp_path / "client.key"
+        cert.write_text("cert")
+        key.write_text("key")
+
+        with pytest.raises(ValueError, match=r"key passphrase.*must be a string"):
+            _resolve_client_cert("srv", {
+                "client_cert": [str(cert), str(key), 42],
+            })
+
+
+# ---------------------------------------------------------------------------
+# HTTP transport — cert forwarded into httpx.AsyncClient
+# ---------------------------------------------------------------------------
+
+
+class TestHTTPClientCert:
+    def test_cert_forwarded_to_async_client(self, tmp_path):
+        """When client_cert is set, the new-SDK HTTP path passes ``cert=``
+        into ``httpx.AsyncClient``."""
+        from tools.mcp_tool import MCPServerTask
+
+        cert = tmp_path / "client.pem"
+        cert.write_text("dummy")
+
+        server = MCPServerTask("remote")
+        captured: dict = {}
+
+        class DummyAsyncClient:
+            def __init__(self, **kwargs):
+                captured.update(kwargs)
+
+            async def __aenter__(self):
+                return self
+
+            async def __aexit__(self, *a):
+                return False
+
+        class DummyTransportCtx:
+            async def __aenter__(self):
+                return MagicMock(), MagicMock(), (lambda: None)
+
+            async def __aexit__(self, *a):
+                return False
+
+        class DummySession:
+            def __init__(self, *args, **kwargs):
+                pass
+
+            async def __aenter__(self):
+                return self
+
+            async def __aexit__(self, *a):
+                return False
+
+            async def initialize(self):
+                return None
+
+        async def _discover_tools(self):
+            self._shutdown_event.set()
+
+        async def _drive():
+            with patch("tools.mcp_tool._MCP_HTTP_AVAILABLE", True), \
+                 patch("tools.mcp_tool._MCP_NEW_HTTP", True), \
+                 patch("httpx.AsyncClient", DummyAsyncClient), \
+                 patch("tools.mcp_tool.streamable_http_client",
+                       return_value=DummyTransportCtx()), \
+                 patch("tools.mcp_tool.ClientSession", DummySession), \
+                 patch.object(MCPServerTask, "_discover_tools", _discover_tools):
+                await server._run_http({
+                    "url": "https://example.com/mcp",
+                    "client_cert": str(cert),
+                })
+
+        asyncio.run(_drive())
+        assert captured.get("cert") == str(cert)
+
+    def test_cert_tuple_forwarded(self, tmp_path):
+        """List/tuple form resolves to a tuple in ``cert=``."""
+        from tools.mcp_tool import MCPServerTask
+
+        cert = tmp_path / "client.crt"
+        key = tmp_path / "client.key"
+        cert.write_text("cert")
+        key.write_text("key")
+
+        server = MCPServerTask("remote")
+        captured: dict = {}
+
+        class DummyAsyncClient:
+            def __init__(self, **kwargs):
+                captured.update(kwargs)
+
+            async def __aenter__(self):
+                return self
+
+            async def __aexit__(self, *a):
+                return False
+
+        class DummyTransportCtx:
+            async def __aenter__(self):
+                return MagicMock(), MagicMock(), (lambda: None)
+
+            async def __aexit__(self, *a):
+                return False
+
+        class DummySession:
+            def __init__(self, *args, **kwargs):
+                pass
+
+            async def __aenter__(self):
+                return self
+
+            async def __aexit__(self, *a):
+                return False
+
+            async def initialize(self):
+                return None
+
+        async def _discover_tools(self):
+            self._shutdown_event.set()
+
+        async def _drive():
+            with patch("tools.mcp_tool._MCP_HTTP_AVAILABLE", True), \
+                 patch("tools.mcp_tool._MCP_NEW_HTTP", True), \
+                 patch("httpx.AsyncClient", DummyAsyncClient), \
+                 patch("tools.mcp_tool.streamable_http_client",
+                       return_value=DummyTransportCtx()), \
+                 patch("tools.mcp_tool.ClientSession", DummySession), \
+                 patch.object(MCPServerTask, "_discover_tools", _discover_tools):
+                await server._run_http({
+                    "url": "https://example.com/mcp",
+                    "client_cert": [str(cert), str(key)],
+                })
+
+        asyncio.run(_drive())
+        assert captured.get("cert") == (str(cert), str(key))
+
+    def test_no_cert_means_no_cert_kwarg(self):
+        """When client_cert is unset, ``cert`` is not passed to ``httpx.AsyncClient``
+        (matches SDK defaults)."""
+        from tools.mcp_tool import MCPServerTask
+
+        server = MCPServerTask("remote")
+        captured: dict = {}
+
+        class DummyAsyncClient:
+            def __init__(self, **kwargs):
+                captured.update(kwargs)
+
+            async def __aenter__(self):
+                return self
+
+            async def __aexit__(self, *a):
+                return False
+
+        class DummyTransportCtx:
+            async def __aenter__(self):
+                return MagicMock(), MagicMock(), (lambda: None)
+
+            async def __aexit__(self, *a):
+                return False
+
+        class DummySession:
+            def __init__(self, *args, **kwargs):
+                pass
+
+            async def __aenter__(self):
+                return self
+
+            async def __aexit__(self, *a):
+                return False
+
+            async def initialize(self):
+                return None
+
+        async def _discover_tools(self):
+            self._shutdown_event.set()
+
+        async def _drive():
+            with patch("tools.mcp_tool._MCP_HTTP_AVAILABLE", True), \
+                 patch("tools.mcp_tool._MCP_NEW_HTTP", True), \
+                 patch("httpx.AsyncClient", DummyAsyncClient), \
+                 patch("tools.mcp_tool.streamable_http_client",
+                       return_value=DummyTransportCtx()), \
+                 patch("tools.mcp_tool.ClientSession", DummySession), \
+                 patch.object(MCPServerTask, "_discover_tools", _discover_tools):
+                await server._run_http({"url": "https://example.com/mcp"})
+
+        asyncio.run(_drive())
+        assert "cert" not in captured
+
+    def test_missing_cert_file_surfaces_clear_error(self, tmp_path):
+        """A missing cert file fails fast with a server-scoped error message."""
+        from tools.mcp_tool import MCPServerTask
+
+        server = MCPServerTask("remote")
+
+        async def _drive():
+            with patch("tools.mcp_tool._MCP_HTTP_AVAILABLE", True), \
+                 patch("tools.mcp_tool._MCP_NEW_HTTP", True):
+                await server._run_http({
+                    "url": "https://example.com/mcp",
+                    "client_cert": str(tmp_path / "nope.pem"),
+                })
+
+        with pytest.raises(FileNotFoundError, match=r"remote.*client_cert.*not found"):
+            asyncio.run(_drive())
+
+
+# ---------------------------------------------------------------------------
+# SSE transport — cert + verify routed via httpx_client_factory
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+def patch_sse_client():
+    """Replace ``sse_client`` with a MagicMock that records its kwargs.
+
+    Returns the captured kwargs dict so tests can assert how ``_run_http``
+    called it.
+    """
+    captured_kwargs: dict = {}
+
+    class _FakeStream:
+        def __init__(self):
+            self._read = AsyncMock()
+            self._write = AsyncMock()
+
+        async def __aenter__(self):
+            return (self._read, self._write)
+
+        async def __aexit__(self, *a):
+            return False
+
+    def fake_sse_client(**kwargs):
+        captured_kwargs.clear()
+        captured_kwargs.update(kwargs)
+        return _FakeStream()
+
+    class _FakeSession:
+        def __init__(self, *args, **kwargs):
+            pass
+
+        async def __aenter__(self):
+            mock_session = MagicMock()
+            mock_session.initialize = AsyncMock()
+            return mock_session
+
+        async def __aexit__(self, *a):
+            return False
+
+    with patch("tools.mcp_tool.sse_client", new=fake_sse_client), \
+         patch("tools.mcp_tool.ClientSession", new=_FakeSession):
+        yield captured_kwargs
+
+
+class TestSSEClientCert:
+    def test_no_factory_when_defaults(self, patch_sse_client):
+        """With no cert and ssl_verify=True (default), the SDK's own factory is
+        used — we don't inject one."""
+        from tools.mcp_tool import MCPServerTask
+
+        server = MCPServerTask("sse-test")
+        server._auth_type = ""
+        server._sampling = None
+
+        async def drive():
+            with patch.object(MCPServerTask, "_wait_for_lifecycle_event",
+                              new=AsyncMock(return_value="shutdown")), \
+                 patch.object(MCPServerTask, "_discover_tools", new=AsyncMock()):
+                try:
+                    await asyncio.wait_for(
+                        server._run_http({
+                            "url": "https://example.com/mcp/sse",
+                            "transport": "sse",
+                        }),
+                        timeout=2.0,
+                    )
+                except (asyncio.TimeoutError, StopAsyncIteration, Exception):
+                    pass
+
+        asyncio.run(drive())
+        assert "httpx_client_factory" not in patch_sse_client
+
+    def test_factory_injected_when_cert_set(self, patch_sse_client, tmp_path):
+        """With client_cert set, an httpx_client_factory is injected that
+        applies the cert (and follow_redirects=True to match the SDK)."""
+        from tools.mcp_tool import MCPServerTask
+
+        cert = tmp_path / "client.pem"
+        cert.write_text("dummy")
+
+        server = MCPServerTask("sse-test")
+        server._auth_type = ""
+        server._sampling = None
+
+        async def drive():
+            with patch.object(MCPServerTask, "_wait_for_lifecycle_event",
+                              new=AsyncMock(return_value="shutdown")), \
+                 patch.object(MCPServerTask, "_discover_tools", new=AsyncMock()):
+                try:
+                    await asyncio.wait_for(
+                        server._run_http({
+                            "url": "https://example.com/mcp/sse",
+                            "transport": "sse",
+                            "client_cert": str(cert),
+                        }),
+                        timeout=2.0,
+                    )
+                except (asyncio.TimeoutError, StopAsyncIteration, Exception):
+                    pass
+
+        asyncio.run(drive())
+
+        factory = patch_sse_client.get("httpx_client_factory")
+        assert factory is not None, "expected httpx_client_factory to be injected"
+
+        # Invoke the factory the way the SDK would; capture the resulting
+        # httpx.AsyncClient kwargs.
+        captured_client_kwargs: dict = {}
+
+        class DummyAsyncClient:
+            def __init__(self, **kwargs):
+                captured_client_kwargs.update(kwargs)
+
+        import httpx
+        with patch.object(httpx, "AsyncClient", DummyAsyncClient):
+            factory(headers={"x": "y"}, timeout=httpx.Timeout(30.0), auth=None)
+
+        assert captured_client_kwargs["cert"] == str(cert)
+        assert captured_client_kwargs["verify"] is True
+        assert captured_client_kwargs["follow_redirects"] is True
+        assert captured_client_kwargs["headers"] == {"x": "y"}
+
+    def test_factory_forwards_custom_ca_bundle(self, patch_sse_client, tmp_path):
+        """ssl_verify as a path is forwarded to the factory's httpx client."""
+        from tools.mcp_tool import MCPServerTask
+
+        ca_bundle = tmp_path / "ca.pem"
+        ca_bundle.write_text("dummy")
+
+        server = MCPServerTask("sse-test")
+        server._auth_type = ""
+        server._sampling = None
+
+        async def drive():
+            with patch.object(MCPServerTask, "_wait_for_lifecycle_event",
+                              new=AsyncMock(return_value="shutdown")), \
+                 patch.object(MCPServerTask, "_discover_tools", new=AsyncMock()):
+                try:
+                    await asyncio.wait_for(
+                        server._run_http({
+                            "url": "https://example.com/mcp/sse",
+                            "transport": "sse",
+                            "ssl_verify": str(ca_bundle),
+                        }),
+                        timeout=2.0,
+                    )
+                except (asyncio.TimeoutError, StopAsyncIteration, Exception):
+                    pass
+
+        asyncio.run(drive())
+
+        factory = patch_sse_client.get("httpx_client_factory")
+        assert factory is not None
+
+        captured_client_kwargs: dict = {}
+
+        class DummyAsyncClient:
+            def __init__(self, **kwargs):
+                captured_client_kwargs.update(kwargs)
+
+        import httpx
+        with patch.object(httpx, "AsyncClient", DummyAsyncClient):
+            factory(headers=None, timeout=None, auth=None)
+
+        assert captured_client_kwargs["verify"] == str(ca_bundle)
+        assert "cert" not in captured_client_kwargs
diff --git a/tests/tools/test_mcp_empty_error_message.py b/tests/tools/test_mcp_empty_error_message.py
index 6c04089f670..b518973085c 100644
--- a/tests/tools/test_mcp_empty_error_message.py
+++ b/tests/tools/test_mcp_empty_error_message.py
@@ -7,11 +7,7 @@ nothing after the colon, making debugging impossible.
 Fix: ``_exc_str()`` falls back to ``repr(exc)`` when ``str(exc)`` is empty.
 """
 
-import json
-from types import SimpleNamespace
-from unittest.mock import MagicMock, patch
 
-import pytest
 
 from tools.mcp_tool import _exc_str, _sanitize_error
 
diff --git a/tests/tools/test_mcp_image_content.py b/tests/tools/test_mcp_image_content.py
index ba60fdfecbd..fecce18f927 100644
--- a/tests/tools/test_mcp_image_content.py
+++ b/tests/tools/test_mcp_image_content.py
@@ -18,9 +18,7 @@ from __future__ import annotations
 
 import base64
 from types import SimpleNamespace
-from unittest.mock import patch
 
-import pytest
 
 
 def _png_bytes():
diff --git a/tests/tools/test_mcp_oauth.py b/tests/tools/test_mcp_oauth.py
index b858127cd07..e43bf0a1851 100644
--- a/tests/tools/test_mcp_oauth.py
+++ b/tests/tools/test_mcp_oauth.py
@@ -5,8 +5,7 @@ import os
 import stat
 import sys
 from io import BytesIO
-from pathlib import Path
-from unittest.mock import patch, MagicMock, AsyncMock
+from unittest.mock import patch, MagicMock
 
 import pytest
 
diff --git a/tests/tools/test_mcp_stability.py b/tests/tools/test_mcp_stability.py
index 163a05963e0..1dd76959854 100644
--- a/tests/tools/test_mcp_stability.py
+++ b/tests/tools/test_mcp_stability.py
@@ -1,12 +1,9 @@
 """Tests for MCP stability fixes — event loop handler, PID tracking, shutdown robustness."""
 
 import asyncio
-import os
 import signal
-import threading
 from unittest.mock import patch, MagicMock
 
-import pytest
 
 
 # ---------------------------------------------------------------------------
@@ -227,7 +224,7 @@ class TestMCPInitialConnectionRetry:
 
     def test_initial_connect_retry_succeeds_on_second_attempt(self):
         """Server succeeds after one transient initial failure."""
-        from tools.mcp_tool import MCPServerTask, _MAX_INITIAL_CONNECT_RETRIES
+        from tools.mcp_tool import MCPServerTask
 
         call_count = 0
 
diff --git a/tests/tools/test_mcp_tool.py b/tests/tools/test_mcp_tool.py
index b9a3cfcf8d9..e2575664748 100644
--- a/tests/tools/test_mcp_tool.py
+++ b/tests/tools/test_mcp_tool.py
@@ -5,7 +5,6 @@ All tests use mocks -- no real MCP servers or subprocesses are started.
 
 import asyncio
 import json
-import os
 import threading
 import time
 from types import SimpleNamespace
@@ -1770,7 +1769,7 @@ class TestConfigurableTimeouts:
 
     def test_timeout_passed_to_handler(self):
         """The tool handler uses the server's configured timeout."""
-        from tools.mcp_tool import _make_tool_handler, _servers, MCPServerTask
+        from tools.mcp_tool import _make_tool_handler, _servers
 
         mock_session = MagicMock()
         mock_session.call_tool = AsyncMock(
@@ -2225,8 +2224,6 @@ class TestUtilityToolRegistration:
 # SamplingHandler tests
 # ===========================================================================
 
-import math
-import time
 
 class _CompatType:
     def __init__(self, **kwargs):
diff --git a/tests/tools/test_mcp_tool_issue_948.py b/tests/tools/test_mcp_tool_issue_948.py
index c3e04220260..aefb32481df 100644
--- a/tests/tools/test_mcp_tool_issue_948.py
+++ b/tests/tools/test_mcp_tool_issue_948.py
@@ -1,10 +1,8 @@
 import asyncio
 import os
-import sys
 from types import SimpleNamespace
 from unittest.mock import AsyncMock, MagicMock, patch
 
-import pytest
 
 from tools.mcp_tool import MCPServerTask, _format_connect_error, _resolve_stdio_command, _MCP_AVAILABLE
 
@@ -34,6 +32,39 @@ def test_resolve_stdio_command_falls_back_to_hermes_node_bin(tmp_path):
     assert env["PATH"].split(os.pathsep)[0] == str(node_bin)
 
 
+def test_resolve_stdio_command_falls_back_to_usr_local_bin():
+    """When ``npx`` isn't on the filtered PATH and isn't under ``$HERMES_HOME/node/bin``
+    or ``~/.local/bin``, the resolver should still locate it at ``/usr/local/bin/npx``.
+
+    This is the canonical install location for Node on Linux from-source builds,
+    the upstream ``node:bookworm-slim`` image (which the Hermes Docker image
+    copies ``node + npm + corepack`` from since #4977), and macOS Homebrew on
+    Intel. Without this candidate, MCP servers run with an ``env.PATH`` that
+    omits ``/usr/local/bin`` (common when users hand-author PATH for sandboxing)
+    fail with ENOENT at ``execvp``.
+    """
+    target = os.path.join(os.sep, "usr", "local", "bin", "npx")
+
+    # Pretend ONLY the /usr/local/bin/npx candidate exists and is executable —
+    # the other candidates ($HERMES_HOME/node/bin/npx and ~/.local/bin/npx)
+    # should fail isfile() and the resolver must fall through to /usr/local/bin.
+    def _fake_isfile(path):
+        return path == target
+
+    def _fake_access(path, _mode):
+        return path == target
+
+    with patch("tools.mcp_tool.shutil.which", return_value=None), \
+         patch("tools.mcp_tool.os.path.isfile", side_effect=_fake_isfile), \
+         patch("tools.mcp_tool.os.access", side_effect=_fake_access):
+        command, env = _resolve_stdio_command("npx", {"PATH": "/opt/data/bin:/usr/bin:/bin"})
+
+    assert command == target
+    # /usr/local/bin must be prepended so npx's shebang (`/usr/bin/env node`)
+    # can find node in the same directory.
+    assert env["PATH"].split(os.pathsep)[0] == os.path.dirname(target)
+
+
 def test_resolve_stdio_command_respects_explicit_empty_path():
     seen_paths = []
 
diff --git a/tests/tools/test_mcp_tool_session_expired.py b/tests/tools/test_mcp_tool_session_expired.py
index 59601ba1c3d..b17e6484aab 100644
--- a/tests/tools/test_mcp_tool_session_expired.py
+++ b/tests/tools/test_mcp_tool_session_expired.py
@@ -12,8 +12,7 @@ affected MCP server failed until the gateway was manually restarted.
 """
 import json
 import threading
-import time
-from unittest.mock import AsyncMock, MagicMock
+from unittest.mock import MagicMock
 
 import pytest
 
diff --git a/tests/tools/test_mcp_utility_capability_gating.py b/tests/tools/test_mcp_utility_capability_gating.py
index 971711d75c4..aecee95cc04 100644
--- a/tests/tools/test_mcp_utility_capability_gating.py
+++ b/tests/tools/test_mcp_utility_capability_gating.py
@@ -29,7 +29,6 @@ from __future__ import annotations
 from types import SimpleNamespace
 from unittest.mock import MagicMock
 
-import pytest
 
 
 def _make_init_result(*, resources: bool, prompts: bool):
diff --git a/tests/tools/test_memory_tool.py b/tests/tools/test_memory_tool.py
index a081e71c690..f23deeff16a 100644
--- a/tests/tools/test_memory_tool.py
+++ b/tests/tools/test_memory_tool.py
@@ -8,7 +8,6 @@ from tools.memory_tool import (
     MemoryStore,
     memory_tool,
     _scan_memory_content,
-    ENTRY_DELIMITER,
     MEMORY_SCHEMA,
 )
 
diff --git a/tests/tools/test_modal_bulk_upload.py b/tests/tools/test_modal_bulk_upload.py
index e179e702aa2..4d69a8da594 100644
--- a/tests/tools/test_modal_bulk_upload.py
+++ b/tests/tools/test_modal_bulk_upload.py
@@ -4,8 +4,7 @@ import asyncio
 import base64
 import io
 import tarfile
-from pathlib import Path
-from unittest.mock import AsyncMock, MagicMock, patch
+from unittest.mock import AsyncMock, MagicMock
 
 import pytest
 
diff --git a/tests/tools/test_notify_on_complete.py b/tests/tools/test_notify_on_complete.py
index db086ef6717..84bf5f1f6b5 100644
--- a/tests/tools/test_notify_on_complete.py
+++ b/tests/tools/test_notify_on_complete.py
@@ -10,11 +10,9 @@ Covers:
 
 import json
 import os
-import queue
 import time
 import pytest
-from pathlib import Path
-from unittest.mock import MagicMock, patch
+from unittest.mock import patch
 
 from tools.process_registry import (
     ProcessRegistry,
diff --git a/tests/tools/test_process_registry.py b/tests/tools/test_process_registry.py
index 10e4421e5f0..bc1ec06d66c 100644
--- a/tests/tools/test_process_registry.py
+++ b/tests/tools/test_process_registry.py
@@ -7,14 +7,12 @@ import subprocess
 import sys
 import time
 import pytest
-from pathlib import Path
 from unittest.mock import MagicMock, patch
 
 from tools.environments.local import _HERMES_PROVIDER_ENV_FORCE_PREFIX
 from tools.process_registry import (
     ProcessRegistry,
     ProcessSession,
-    MAX_OUTPUT_CHARS,
     FINISHED_TTL_SECONDS,
     MAX_PROCESSES,
 )
@@ -563,9 +561,18 @@ class TestPopenLeakOnSetupFailure:
         def boom(*args, **kwargs):
             raise RuntimeError("Thread creation failed")
 
+        # proc.pid is a MagicMock-backed fake; os.getpgid(fake_pid) would query
+        # the real OS for an arbitrary PID. On a busy host that PID may exist,
+        # in which case spawn_local's primary cleanup path
+        # (os.killpg(os.getpgid(pid), SIGKILL)) succeeds against an UNRELATED
+        # real process group and proc.kill() is never reached — flaky failure,
+        # and a real risk of SIGKILLing an innocent process group. Force the
+        # ProcessLookupError fallback so the test deterministically exercises
+        # proc.kill() and never issues a real killpg.
         with patch("tools.process_registry._find_shell", return_value="/bin/bash"), \
              patch("subprocess.Popen", return_value=proc), \
              patch("threading.Thread", side_effect=boom), \
+             patch("os.getpgid", side_effect=ProcessLookupError), \
              patch.object(registry, "_write_checkpoint"):
             with pytest.raises(RuntimeError, match="Thread creation failed"):
                 registry.spawn_local("echo hello", cwd="/tmp")
@@ -590,9 +597,14 @@ class TestPopenLeakOnSetupFailure:
 
         fake_thread = MagicMock()
 
+        # See note in test_popen_killed_when_thread_creation_fails: force the
+        # ProcessLookupError fallback so cleanup deterministically calls
+        # proc.kill() instead of issuing a real os.killpg against whatever
+        # process group happens to own the fake PID on the host.
         with patch("tools.process_registry._find_shell", return_value="/bin/bash"), \
              patch("subprocess.Popen", return_value=proc), \
              patch("threading.Thread", return_value=fake_thread), \
+             patch("os.getpgid", side_effect=ProcessLookupError), \
              patch.object(registry, "_write_checkpoint", side_effect=OSError("disk full")):
             with pytest.raises(OSError, match="disk full"):
                 registry.spawn_local("echo hello", cwd="/tmp")
diff --git a/tests/tools/test_resolve_path.py b/tests/tools/test_resolve_path.py
index cd4d868961f..4c9cbe30ab6 100644
--- a/tests/tools/test_resolve_path.py
+++ b/tests/tools/test_resolve_path.py
@@ -4,7 +4,6 @@ import os
 from pathlib import Path
 from types import SimpleNamespace
 
-import pytest
 
 
 class TestResolvePath:
diff --git a/tests/tools/test_search_hidden_dirs.py b/tests/tools/test_search_hidden_dirs.py
index ac963ab1b71..0c214c1583a 100644
--- a/tests/tools/test_search_hidden_dirs.py
+++ b/tests/tools/test_search_hidden_dirs.py
@@ -13,7 +13,6 @@ Fix: _search_files (find) and _search_with_grep both now exclude hidden
 directories, matching ripgrep's default behavior.
 """
 
-import os
 import subprocess
 
 import pytest
diff --git a/tests/tools/test_send_message_tool.py b/tests/tools/test_send_message_tool.py
index 922a7d7bdc2..3a6ad11fdea 100644
--- a/tests/tools/test_send_message_tool.py
+++ b/tests/tools/test_send_message_tool.py
@@ -4,7 +4,6 @@ import asyncio
 import json
 import os
 import sys
-from pathlib import Path
 from types import SimpleNamespace
 from unittest.mock import AsyncMock, MagicMock, patch
 
@@ -40,7 +39,6 @@ from tools.send_message_tool import (
 # and provide a thin ``_send_discord(token, ...)`` shim that mirrors the
 # pre-migration signature so the existing test bodies keep working.
 from plugins.platforms.discord.adapter import (
-    _DISCORD_CHANNEL_TYPE_PROBE_CACHE,
     _derive_forum_thread_name,
     _probe_is_forum_cached,
     _remember_channel_is_forum,
@@ -378,9 +376,12 @@ class TestSendMessageTool:
         )
 
     def test_media_tag_outside_allowed_roots_is_not_sent(self, tmp_path, monkeypatch):
-        # This test exercises the strict-allowlist path; disable recency trust
-        # so the freshly-written tmp_path file is not auto-accepted by the
-        # trust window. (Recency trust is covered in test_platform_base.py.)
+        # This test exercises the strict-allowlist path; force strict mode on
+        # and disable recency trust so the freshly-written tmp_path file is
+        # not auto-accepted by the trust window. (Recency trust is covered
+        # in test_platform_base.py. The public default flipped to non-strict
+        # in 2026-05; this test pins strict on explicitly.)
+        monkeypatch.setenv("HERMES_MEDIA_DELIVERY_STRICT", "1")
         monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_FILES", "0")
         config, telegram_cfg = _make_config()
         secret = tmp_path / "secret.pdf"
@@ -1514,7 +1515,6 @@ class TestSendMatrixUrlEncoding:
 
     def test_room_id_is_percent_encoded_in_url(self):
         """Matrix room IDs with ! and : are percent-encoded in the PUT URL."""
-        import aiohttp
 
         mock_resp = MagicMock()
         mock_resp.status = 200
@@ -1891,10 +1891,6 @@ class TestForumProbeCache:
         discord_adapter._DISCORD_CHANNEL_TYPE_PROBE_CACHE.clear()
 
     def test_cache_round_trip(self):
-        from plugins.platforms.discord.adapter import (
-            _probe_is_forum_cached,
-            _remember_channel_is_forum,
-        )
         assert _probe_is_forum_cached("xyz") is None
         _remember_channel_is_forum("xyz", True)
         assert _probe_is_forum_cached("xyz") is True
diff --git a/tests/tools/test_signal_media.py b/tests/tools/test_signal_media.py
index ee483c08193..6d1bc2112eb 100644
--- a/tests/tools/test_signal_media.py
+++ b/tests/tools/test_signal_media.py
@@ -2,7 +2,6 @@
 
 import asyncio
 import sys
-from pathlib import Path
 from types import ModuleType
 from unittest.mock import MagicMock, AsyncMock, patch
 
diff --git a/tests/tools/test_skill_env_passthrough.py b/tests/tools/test_skill_env_passthrough.py
index b4999d83e59..fe15488fa10 100644
--- a/tests/tools/test_skill_env_passthrough.py
+++ b/tests/tools/test_skill_env_passthrough.py
@@ -1,8 +1,6 @@
 """Test that skill_view registers required env vars in the passthrough registry."""
 
 import json
-import os
-from pathlib import Path
 from unittest.mock import patch
 
 import pytest
diff --git a/tests/tools/test_skill_improvements.py b/tests/tools/test_skill_improvements.py
index 6e781309f2c..08ca970a469 100644
--- a/tests/tools/test_skill_improvements.py
+++ b/tests/tools/test_skill_improvements.py
@@ -1,9 +1,6 @@
 """Tests for skill fuzzy patching via tools.fuzzy_match."""
 
 import json
-import os
-from pathlib import Path
-from unittest.mock import patch
 
 import pytest
 
diff --git a/tests/tools/test_skill_manager_tool.py b/tests/tools/test_skill_manager_tool.py
index 33efbb98ae8..e7e5e4a78b2 100644
--- a/tests/tools/test_skill_manager_tool.py
+++ b/tests/tools/test_skill_manager_tool.py
@@ -12,8 +12,6 @@ from tools.skill_manager_tool import (
     _validate_category,
     _validate_frontmatter,
     _validate_file_path,
-    _find_skill,
-    _resolve_skill_dir,
     _create_skill,
     _edit_skill,
     _patch_skill,
@@ -21,8 +19,6 @@ from tools.skill_manager_tool import (
     _write_file,
     _remove_file,
     skill_manage,
-    VALID_NAME_RE,
-    ALLOWED_SUBDIRS,
     MAX_NAME_LENGTH,
 )
 
diff --git a/tests/tools/test_skill_provenance.py b/tests/tools/test_skill_provenance.py
index 8cbecc000bc..6c1aedef771 100644
--- a/tests/tools/test_skill_provenance.py
+++ b/tests/tools/test_skill_provenance.py
@@ -2,7 +2,6 @@
 
 import contextvars
 
-import pytest
 
 
 
diff --git a/tests/tools/test_skill_size_limits.py b/tests/tools/test_skill_size_limits.py
index c94ba02e81d..6468d6bda30 100644
--- a/tests/tools/test_skill_size_limits.py
+++ b/tests/tools/test_skill_size_limits.py
@@ -6,15 +6,11 @@ Hand-placed and hub-installed skills have no hard limit.
 """
 
 import json
-import os
-from pathlib import Path
-from unittest.mock import patch
 
 import pytest
 
 from tools.skill_manager_tool import (
     MAX_SKILL_CONTENT_CHARS,
-    MAX_SKILL_FILE_BYTES,
     _validate_content_size,
     skill_manage,
 )
diff --git a/tests/tools/test_skill_usage.py b/tests/tools/test_skill_usage.py
index 8251e609993..ad306b9c512 100644
--- a/tests/tools/test_skill_usage.py
+++ b/tests/tools/test_skill_usage.py
@@ -339,7 +339,7 @@ def test_agent_created_skips_archive_and_hub_dirs(skills_home):
 # ---------------------------------------------------------------------------
 
 def test_archive_skill_moves_directory(skills_home):
-    from tools.skill_usage import archive_skill, get_record, STATE_ARCHIVED
+    from tools.skill_usage import archive_skill, get_record
     skills_dir = skills_home / "skills"
     skill_dir = _write_skill(skills_dir, "old-skill")
     assert skill_dir.exists()
diff --git a/tests/tools/test_skill_view_traversal.py b/tests/tools/test_skill_view_traversal.py
index 55d84d8c3f3..426ebb111b7 100644
--- a/tests/tools/test_skill_view_traversal.py
+++ b/tests/tools/test_skill_view_traversal.py
@@ -6,7 +6,6 @@ reading arbitrary files (e.g., ~/.hermes/.env) via path traversal.
 
 import json
 import pytest
-from pathlib import Path
 from unittest.mock import patch
 
 from tools.skills_tool import skill_view
diff --git a/tests/tools/test_skills_ast_audit.py b/tests/tools/test_skills_ast_audit.py
index c70d6a1f41c..a9de3d57cb9 100644
--- a/tests/tools/test_skills_ast_audit.py
+++ b/tests/tools/test_skills_ast_audit.py
@@ -1,7 +1,6 @@
 """Tests for tools.skills_ast_audit — opt-in AST diagnostic scanner."""
 
 import sys
-from pathlib import Path
 
 from tools.skills_ast_audit import ast_scan_path, format_ast_report
 
diff --git a/tests/tools/test_skills_guard.py b/tests/tools/test_skills_guard.py
index 524da52baa8..015a48ef597 100644
--- a/tests/tools/test_skills_guard.py
+++ b/tests/tools/test_skills_guard.py
@@ -1,7 +1,5 @@
 """Tests for tools/skills_guard.py - security scanner for skills."""
 
-import os
-import stat
 import tempfile
 from pathlib import Path
 
@@ -33,8 +31,6 @@ from tools.skills_guard import (
     _resolve_trust_level,
     _check_structure,
     _unicode_char_name,
-    INSTALL_POLICY,
-    INVISIBLE_CHARS,
     MAX_FILE_COUNT,
     MAX_SINGLE_FILE_KB,
 )
@@ -54,6 +50,14 @@ class TestResolveTrustLevel:
         assert _resolve_trust_level("anthropics/skills") == "trusted"
         assert _resolve_trust_level("openai/skills/some-skill") == "trusted"
 
+    def test_nvidia_skills_is_trusted(self):
+        # NVIDIA/skills ships NVIDIA-verified skills with detached OMS
+        # signatures and governance skill cards. It's wired through the
+        # same trust path as the OpenAI / Anthropic / HuggingFace taps.
+        assert _resolve_trust_level("NVIDIA/skills") == "trusted"
+        assert _resolve_trust_level("NVIDIA/skills/aiq-deploy") == "trusted"
+        assert _resolve_trust_level("skills-sh/NVIDIA/skills/cuopt") == "trusted"
+
     def test_trusted_repo_sibling_prefixes_are_not_trusted(self):
         assert _resolve_trust_level("openai/skills-evil") == "community"
         assert _resolve_trust_level("anthropics/skills-foo/frontend-design") == "community"
diff --git a/tests/tools/test_skills_hub.py b/tests/tools/test_skills_hub.py
index 22406a8bacd..ec2f108072a 100644
--- a/tests/tools/test_skills_hub.py
+++ b/tests/tools/test_skills_hub.py
@@ -1,7 +1,6 @@
 """Tests for tools/skills_hub.py — source adapters, lock file, taps, dedup logic."""
 
 import json
-from pathlib import Path
 from unittest.mock import patch, MagicMock
 
 import httpx
@@ -71,6 +70,143 @@ class TestParseFrontmatterQuick:
         assert fm == {}
 
 
+# ---------------------------------------------------------------------------
+# GitHubSource skills.sh.json grouping sidecar (category support)
+# ---------------------------------------------------------------------------
+
+
+class TestSkillsShGroupings:
+    """Parsing + stamping of the skills.sh.json grouping sidecar.
+
+    A tap can ship a repo-root ``skills.sh.json`` declaring category
+    groupings; we flatten it to {skill_name: title} and stamp the title onto
+    each SkillMeta's ``extra["category"]``. This is the generic cross-ecosystem
+    mechanism behind NVIDIA-style categorization — not NVIDIA-specific.
+    """
+
+    def test_parse_basic_groupings(self):
+        content = json.dumps({
+            "$schema": "https://skills.sh/schemas/skills.sh.schema.json",
+            "groupings": [
+                {"title": "Inference AI", "skills": ["dynamo-router", "dynamo-recipe"]},
+                {"title": "Decision Optimization", "skills": ["cuopt-developer"]},
+            ],
+        })
+        mapping = GitHubSource._parse_skillsh_groupings(content)
+        assert mapping == {
+            "dynamo-router": "Inference AI",
+            "dynamo-recipe": "Inference AI",
+            "cuopt-developer": "Decision Optimization",
+        }
+
+    def test_parse_invalid_json_returns_none(self):
+        assert GitHubSource._parse_skillsh_groupings("not json{{") is None
+
+    def test_parse_non_dict_returns_none(self):
+        assert GitHubSource._parse_skillsh_groupings("[1, 2, 3]") is None
+
+    def test_parse_missing_groupings_returns_none(self):
+        assert GitHubSource._parse_skillsh_groupings('{"foo": 1}') is None
+
+    def test_parse_empty_groupings_returns_empty_map(self):
+        assert GitHubSource._parse_skillsh_groupings('{"groupings": []}') == {}
+
+    def test_parse_tolerates_malformed_group(self):
+        # A group missing its skills list is skipped; the valid one survives.
+        content = json.dumps({"groupings": [
+            {"title": "X"},                              # no skills -> skipped
+            {"skills": ["a"]},                           # no title -> skipped
+            {"title": "Y", "skills": ["b", 5, None]},    # only valid string members kept
+        ]})
+        assert GitHubSource._parse_skillsh_groupings(content) == {"b": "Y"}
+
+    def test_parse_first_grouping_wins_on_duplicate(self):
+        content = json.dumps({"groupings": [
+            {"title": "First", "skills": ["dup"]},
+            {"title": "Second", "skills": ["dup"]},
+        ]})
+        assert GitHubSource._parse_skillsh_groupings(content) == {"dup": "First"}
+
+    def test_get_groupings_caches_per_repo(self):
+        auth = MagicMock()
+        src = GitHubSource(auth=auth)
+        content = json.dumps({"groupings": [{"title": "T", "skills": ["s"]}]})
+        with patch.object(src, "_fetch_file_content", return_value=content) as mock_fetch:
+            first = src._get_skillsh_groupings("acme/skills")
+            second = src._get_skillsh_groupings("acme/skills")
+        assert first == {"s": "T"}
+        assert second == {"s": "T"}
+        # Second call must hit the per-repo cache, not GitHub again.
+        mock_fetch.assert_called_once_with("acme/skills", "skills.sh.json")
+
+    def test_get_groupings_no_sidecar_returns_none_and_caches(self):
+        auth = MagicMock()
+        src = GitHubSource(auth=auth)
+        with patch.object(src, "_fetch_file_content", return_value=None) as mock_fetch:
+            assert src._get_skillsh_groupings("acme/skills") is None
+            assert src._get_skillsh_groupings("acme/skills") is None
+        mock_fetch.assert_called_once()
+
+    def test_list_skills_stamps_category_from_sidecar(self):
+        auth = MagicMock()
+        src = GitHubSource(auth=auth)
+
+        meta = SkillMeta(
+            name="cuopt-developer", description="d", source="github",
+            identifier="NVIDIA/skills/skills/cuopt-developer", trust_level="trusted",
+        )
+        contents = [{"type": "dir", "name": "cuopt-developer"}]
+        groupings = {"cuopt-developer": "Decision Optimization"}
+
+        resp = MagicMock()
+        resp.status_code = 200
+        resp.json.return_value = contents
+
+        with patch.object(src, "_read_cache", return_value=None), \
+             patch.object(src, "_write_cache"), \
+             patch.object(src, "_get_skillsh_groupings", return_value=groupings), \
+             patch.object(src, "inspect", return_value=meta), \
+             patch("tools.skills_hub.httpx.get", return_value=resp):
+            skills = src._list_skills_in_repo("NVIDIA/skills", "skills/")
+
+        assert len(skills) == 1
+        assert skills[0].extra["category"] == "Decision Optimization"
+
+    def test_list_skills_no_sidecar_leaves_extra_empty(self):
+        auth = MagicMock()
+        src = GitHubSource(auth=auth)
+
+        meta = SkillMeta(
+            name="foo", description="d", source="github",
+            identifier="acme/skills/skills/foo", trust_level="community",
+        )
+        resp = MagicMock()
+        resp.status_code = 200
+        resp.json.return_value = [{"type": "dir", "name": "foo"}]
+
+        with patch.object(src, "_read_cache", return_value=None), \
+             patch.object(src, "_write_cache"), \
+             patch.object(src, "_get_skillsh_groupings", return_value=None), \
+             patch.object(src, "inspect", return_value=meta), \
+             patch("tools.skills_hub.httpx.get", return_value=resp):
+            skills = src._list_skills_in_repo("acme/skills", "skills/")
+
+        assert len(skills) == 1
+        assert "category" not in skills[0].extra
+
+    def test_meta_to_dict_roundtrip_preserves_extra(self):
+        meta = SkillMeta(
+            name="x", description="d", source="github",
+            identifier="acme/skills/x", trust_level="trusted",
+            extra={"category": "Inference AI"},
+        )
+        d = GitHubSource._meta_to_dict(meta)
+        assert d["extra"] == {"category": "Inference AI"}
+        # Round-trips back through the cache deserialization path.
+        restored = SkillMeta(**d)
+        assert restored.extra == {"category": "Inference AI"}
+
+
 # ---------------------------------------------------------------------------
 # GitHubSource.trust_level_for
 # ---------------------------------------------------------------------------
@@ -103,6 +239,36 @@ class TestTrustLevelFor:
         # No path part — still resolves repo correctly
         assert result in {"trusted", "community"}
 
+    def test_nvidia_skills_tap_is_registered_and_trusted(self):
+        # Invariant: every trusted repo in TRUSTED_REPOS that we want
+        # browseable/searchable through `hermes skills browse` must also
+        # appear as a default tap on GitHubSource. Without the tap, the
+        # repo's skills don't show up in search results or the docs-site
+        # Skills Hub page even though the trust level is correct.
+        from tools.skills_guard import TRUSTED_REPOS
+
+        assert "NVIDIA/skills" in TRUSTED_REPOS
+        tap_repos = {tap["repo"] for tap in GitHubSource.DEFAULT_TAPS}
+        assert "NVIDIA/skills" in tap_repos
+
+        src = self._source()
+        assert src.trust_level_for("NVIDIA/skills/aiq-deploy") == "trusted"
+
+    def test_browseable_trusted_repos_have_taps(self):
+        # General invariant covering all current and future trusted repos
+        # that publish under a single `skills/`-style path. openai/skills
+        # is the deliberate exception — it has two taps (`.curated/` and
+        # `.system/`) — so we just assert membership not path equality.
+        from tools.skills_guard import TRUSTED_REPOS
+
+        tap_repos = {tap["repo"] for tap in GitHubSource.DEFAULT_TAPS}
+        for repo in TRUSTED_REPOS:
+            assert repo in tap_repos, (
+                f"Trusted repo {repo!r} is in TRUSTED_REPOS but missing "
+                "from GitHubSource.DEFAULT_TAPS — its skills will not be "
+                "browsable via `hermes skills browse`."
+            )
+
 
 # ---------------------------------------------------------------------------
 # SkillsShSource
@@ -472,6 +638,68 @@ class TestSkillsShSource:
         requested_urls = [call.args[0] for call in mock_get.call_args_list]
         assert root_url not in requested_urls
 
+    @patch("tools.skills_hub._write_index_cache")
+    @patch("tools.skills_hub._read_index_cache", return_value=None)
+    @patch("tools.skills_hub.httpx.get")
+    def test_empty_query_walks_sitemap_not_homepage(
+        self, mock_get, _mock_read_cache, _mock_write_cache,
+    ):
+        """Empty query must walk the full sitemap.
+
+        Regression for skills.sh shipping ~858/20000 skills: the previous
+        empty-query path scraped the homepage's featured strip (~200 entries),
+        and build_skills_index.py supplemented it with 28 popular keyword
+        searches to drag the count to ~850. The sitemap walker hits the
+        full ~20k catalog in one pass.
+        """
+        index_xml = """<?xml version="1.0" encoding="UTF-8"?>
+<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
+  <sitemap><loc>https://www.skills.sh/sitemap-misc.xml</loc></sitemap>
+  <sitemap><loc>https://www.skills.sh/sitemap-skills-1.xml</loc></sitemap>
+  <sitemap><loc>https://www.skills.sh/sitemap-skills-2.xml</loc></sitemap>
+</sitemapindex>"""
+        skills_1_xml = """<?xml version="1.0" encoding="UTF-8"?>
+<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
+  <url><loc>https://www.skills.sh/anthropics/skills/frontend-design</loc></url>
+  <url><loc>https://www.skills.sh/anthropics/skills/pdf</loc></url>
+  <url><loc>https://www.skills.sh/vercel-labs/agent-skills/react-best-practices</loc></url>
+</urlset>"""
+        skills_2_xml = """<?xml version="1.0" encoding="UTF-8"?>
+<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
+  <url><loc>https://www.skills.sh/microsoft/azure-skills/azure-ai</loc></url>
+  <url><loc>https://www.skills.sh/anthropics/skills/frontend-design</loc></url>
+</urlset>"""
+
+        def side_effect(url, *args, **kwargs):
+            resp = MagicMock(status_code=200)
+            if url.endswith("/sitemap.xml"):
+                resp.text = index_xml
+            elif "sitemap-skills-1" in url:
+                resp.text = skills_1_xml
+            elif "sitemap-skills-2" in url:
+                resp.text = skills_2_xml
+            else:
+                resp.status_code = 404
+                resp.text = ""
+            return resp
+
+        mock_get.side_effect = side_effect
+
+        results = self._source().search("", limit=0)
+
+        # 4 unique skills (the frontend-design dup across sitemaps collapsed).
+        assert len(results) == 4
+        identifiers = {r.identifier for r in results}
+        assert identifiers == {
+            "skills-sh/anthropics/skills/frontend-design",
+            "skills-sh/anthropics/skills/pdf",
+            "skills-sh/vercel-labs/agent-skills/react-best-practices",
+            "skills-sh/microsoft/azure-skills/azure-ai",
+        }
+        # Homepage was NOT fetched — the sitemap path is taken on empty query.
+        urls_called = [call.args[0] for call in mock_get.call_args_list]
+        assert not any(u == "https://skills.sh" or u == "https://skills.sh/" for u in urls_called)
+
 
 class TestFindSkillInRepoTree:
     """Tests for GitHubSource._find_skill_in_repo_tree."""
diff --git a/tests/tools/test_skills_hub_clawhub.py b/tests/tools/test_skills_hub_clawhub.py
index 2b2863498a3..6b45d081d09 100644
--- a/tests/tools/test_skills_hub_clawhub.py
+++ b/tests/tools/test_skills_hub_clawhub.py
@@ -298,6 +298,58 @@ class TestClawHubSource(unittest.TestCase):
         self.assertIsNone(bundle)
         self.assertEqual(mock_get.call_count, 3)
 
+    @patch("tools.skills_hub._write_index_cache")
+    @patch("tools.skills_hub._read_index_cache", return_value=None)
+    @patch("tools.skills_hub.httpx.get")
+    def test_search_empty_query_paginates_full_catalog(
+        self, mock_get, _mock_read_cache, _mock_write_cache
+    ):
+        """Empty query must walk the cursor-paginated catalog.
+
+        Regression for the silent 200-skill truncation: ClawHub's listing
+        endpoint caps any single page at 200 items + returns a `nextCursor`.
+        The build_skills_index.py crawler calls `search("", limit=N)` with a
+        large N to dump the full catalog. Before the fix, that hit a single
+        unpaginated request and silently dropped 99% of the catalog.
+        """
+        # Three pages: 200 + 200 + 50 items, then no cursor → stop.
+        page_calls = {"n": 0}
+        pages = [
+            {
+                "items": [{"slug": f"a-skill-{i}", "displayName": f"A {i}"} for i in range(200)],
+                "nextCursor": "cursor-page-2",
+            },
+            {
+                "items": [{"slug": f"b-skill-{i}", "displayName": f"B {i}"} for i in range(200)],
+                "nextCursor": "cursor-page-3",
+            },
+            {
+                "items": [{"slug": f"c-skill-{i}", "displayName": f"C {i}"} for i in range(50)],
+                "nextCursor": None,
+            },
+        ]
+
+        def side_effect(url, *args, **kwargs):
+            if url.endswith("/skills"):
+                idx = page_calls["n"]
+                page_calls["n"] += 1
+                if idx < len(pages):
+                    return _MockResponse(status_code=200, json_data=pages[idx])
+                return _MockResponse(status_code=200, json_data={"items": []})
+            return _MockResponse(status_code=404, json_data={})
+
+        mock_get.side_effect = side_effect
+
+        results = self.src.search("", limit=10_000)
+
+        # 200 + 200 + 50 = 450 unique skills, all retrieved via cursor pagination.
+        self.assertEqual(len(results), 450)
+        self.assertEqual(page_calls["n"], 3, "expected exactly 3 cursor-paginated pages")
+        identifiers = {meta.identifier for meta in results}
+        self.assertIn("a-skill-0", identifiers)
+        self.assertIn("b-skill-199", identifiers)
+        self.assertIn("c-skill-49", identifiers)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/tools/test_skills_sync.py b/tests/tools/test_skills_sync.py
index d0bee8eb78c..1813f4c50e7 100644
--- a/tests/tools/test_skills_sync.py
+++ b/tests/tools/test_skills_sync.py
@@ -15,8 +15,6 @@ from tools.skills_sync import (
     sync_skills,
     reset_bundled_skill,
     restore_official_optional_skill,
-    MANIFEST_FILE,
-    SKILLS_DIR,
 )
 
 
diff --git a/tests/tools/test_ssh_bulk_upload.py b/tests/tools/test_ssh_bulk_upload.py
index afad54cf4f4..a2fa82e6c47 100644
--- a/tests/tools/test_ssh_bulk_upload.py
+++ b/tests/tools/test_ssh_bulk_upload.py
@@ -2,7 +2,6 @@
 
 import os
 import subprocess
-from pathlib import Path
 from unittest.mock import MagicMock, patch
 
 import pytest
diff --git a/tests/tools/test_stage2_hook_puid_pgid.py b/tests/tools/test_stage2_hook_puid_pgid.py
new file mode 100644
index 00000000000..ee45ebfba89
--- /dev/null
+++ b/tests/tools/test_stage2_hook_puid_pgid.py
@@ -0,0 +1,86 @@
+"""Contract test: the s6-overlay stage2 hook accepts PUID/PGID as aliases for
+HERMES_UID/HERMES_GID.
+
+Regression guard for #15290.  NAS platforms (UGOS, Synology, unRAID) bind-mount
+/opt/data from a host directory owned by the user's own UID and expect the
+LinuxServer.io PUID/PGID convention.  Without the alias those vars are silently
+ignored, the s6-setuidgid drop lands on UID 10000, and the runtime cannot read
+the volume.  HERMES_UID/HERMES_GID must still take precedence when both are
+set.
+
+The s6-overlay rework moved bootstrap from docker/entrypoint.sh (now a shim)
+to docker/stage2-hook.sh, which is installed as /etc/cont-init.d/01-hermes-setup
+by the Dockerfile.  This test targets the post-rework location.
+"""
+from __future__ import annotations
+
+import os
+import shutil
+import subprocess
+from pathlib import Path
+
+import pytest
+
+REPO_ROOT = Path(__file__).resolve().parents[2]
+STAGE2_HOOK = REPO_ROOT / "docker" / "stage2-hook.sh"
+
+
+@pytest.fixture(scope="module")
+def stage2_text() -> str:
+    if not STAGE2_HOOK.exists():
+        pytest.skip("docker/stage2-hook.sh not present in this checkout")
+    return STAGE2_HOOK.read_text()
+
+
+def _alias_lines(text: str) -> list[str]:
+    """The stage2 hook lines that resolve HERMES_UID/HERMES_GID from aliases."""
+    return [
+        line.strip()
+        for line in text.splitlines()
+        if line.strip().startswith(("HERMES_UID=", "HERMES_GID="))
+    ]
+
+
+def test_stage2_hook_resolves_puid_pgid_aliases(stage2_text: str) -> None:
+    alias_lines = _alias_lines(stage2_text)
+    assert any("PUID" in line for line in alias_lines), (
+        "docker/stage2-hook.sh must resolve HERMES_UID from a PUID alias; see #15290"
+    )
+    assert any("PGID" in line for line in alias_lines), (
+        "docker/stage2-hook.sh must resolve HERMES_GID from a PGID alias; see #15290"
+    )
+
+
+def _resolve(stage2_text: str, env: dict[str, str]) -> str:
+    """Run the stage2 hook's alias-resolution lines in isolation and report the
+    resolved ``HERMES_UID:HERMES_GID`` pair."""
+    bash = shutil.which("bash")
+    if bash is None:
+        pytest.skip("bash not available")
+    script = "\n".join(_alias_lines(stage2_text))
+    script += '\necho "${HERMES_UID:-}:${HERMES_GID:-}"\n'
+    proc = subprocess.run(
+        [bash, "-ec", script],
+        env={"PATH": os.environ.get("PATH", "")} | env,
+        capture_output=True,
+        text=True,
+    )
+    assert proc.returncode == 0, proc.stderr
+    return proc.stdout.strip()
+
+
+def test_puid_pgid_populate_hermes_uid_gid(stage2_text: str) -> None:
+    assert _resolve(stage2_text, {"PUID": "1000", "PGID": "10"}) == "1000:10"
+
+
+def test_hermes_uid_gid_take_precedence_over_aliases(stage2_text: str) -> None:
+    resolved = _resolve(
+        stage2_text,
+        {"HERMES_UID": "2000", "HERMES_GID": "2001", "PUID": "1000", "PGID": "10"},
+    )
+    assert resolved == "2000:2001"
+
+
+def test_no_uid_vars_leaves_values_empty(stage2_text: str) -> None:
+    # An empty resolution means the stage2 hook keeps the default hermes user.
+    assert _resolve(stage2_text, {}) == ":"
diff --git a/tests/tools/test_symlink_prefix_confusion.py b/tests/tools/test_symlink_prefix_confusion.py
index c0a7cd7c55f..05a9e281cd1 100644
--- a/tests/tools/test_symlink_prefix_confusion.py
+++ b/tests/tools/test_symlink_prefix_confusion.py
@@ -6,7 +6,6 @@ for 'axolotl/' because the string prefix matched. Now uses
 Path.is_relative_to() which handles directory boundaries correctly.
 """
 
-import os
 import pytest
 from pathlib import Path
 
diff --git a/tests/tools/test_sync_back_backends.py b/tests/tools/test_sync_back_backends.py
index 97bec17e28a..0f808512ee7 100644
--- a/tests/tools/test_sync_back_backends.py
+++ b/tests/tools/test_sync_back_backends.py
@@ -3,7 +3,7 @@
 import asyncio
 import subprocess
 from pathlib import Path
-from unittest.mock import AsyncMock, MagicMock, call, patch
+from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
 
diff --git a/tests/tools/test_terminal_compound_background.py b/tests/tools/test_terminal_compound_background.py
index d8922bcf556..eeef435772e 100644
--- a/tests/tools/test_terminal_compound_background.py
+++ b/tests/tools/test_terminal_compound_background.py
@@ -12,7 +12,6 @@ The rewriter fixes this by wrapping the tail in a brace group —
 the current shell. No subshell fork, no wait.
 """
 
-import pytest
 
 from tools.terminal_tool import _rewrite_compound_background as rewrite
 
diff --git a/tests/tools/test_terminal_config_env_sync.py b/tests/tools/test_terminal_config_env_sync.py
index 1aecea0cd7c..16131843417 100644
--- a/tests/tools/test_terminal_config_env_sync.py
+++ b/tests/tools/test_terminal_config_env_sync.py
@@ -224,3 +224,39 @@ def test_docker_env_is_bridged_everywhere():
     assert "docker_env" in _gateway_env_map_keys()
     assert "docker_env" in _save_config_env_sync_keys()
     assert "TERMINAL_DOCKER_ENV" in _terminal_tool_env_var_names()
+
+
+def test_docker_persist_across_processes_is_bridged_everywhere():
+    """Regression pin for the cross-process container reuse toggle.
+
+    ``terminal.docker_persist_across_processes`` (issue #20561) controls
+    whether ``DockerEnvironment.__init__`` probes for and reuses an existing
+    labeled container at startup, and whether ``cleanup()`` removes the
+    container on Hermes exit or just stops it (keeping it for the next
+    process).  Same four-bridge invariant as docker_run_as_host_user /
+    docker_env / docker_mount_cwd_to_workspace — drift between any of the
+    four sites means ``terminal.docker_persist_across_processes: false`` in
+    config.yaml silently does nothing for that entry point, leaving the
+    user unable to opt out of the documented "ONE long-lived container
+    shared across sessions" behavior.
+    """
+    assert "docker_persist_across_processes" in _cli_env_map_keys()
+    assert "docker_persist_across_processes" in _gateway_env_map_keys()
+    assert "docker_persist_across_processes" in _save_config_env_sync_keys()
+    assert "TERMINAL_DOCKER_PERSIST_ACROSS_PROCESSES" in _terminal_tool_env_var_names()
+
+
+def test_docker_orphan_reaper_is_bridged_everywhere():
+    """Regression pin for the startup orphan reaper toggle (issue #20561).
+
+    ``terminal.docker_orphan_reaper`` controls whether Hermes sweeps stale
+    Exited containers from prior SIGKILL'd processes at startup.  Same
+    four-site bridge invariant — drift means
+    ``terminal.docker_orphan_reaper: false`` silently does nothing for one
+    entry point, and the reaper either runs when the operator disabled it
+    or fails to run when they enabled it.
+    """
+    assert "docker_orphan_reaper" in _cli_env_map_keys()
+    assert "docker_orphan_reaper" in _gateway_env_map_keys()
+    assert "docker_orphan_reaper" in _save_config_env_sync_keys()
+    assert "TERMINAL_DOCKER_ORPHAN_REAPER" in _terminal_tool_env_var_names()
diff --git a/tests/tools/test_terminal_foreground_timeout_cap.py b/tests/tools/test_terminal_foreground_timeout_cap.py
index 54848f62924..0e9893cbad1 100644
--- a/tests/tools/test_terminal_foreground_timeout_cap.py
+++ b/tests/tools/test_terminal_foreground_timeout_cap.py
@@ -4,7 +4,6 @@ Ensures that foreground commands with timeout > FOREGROUND_MAX_TIMEOUT
 are rejected with an error suggesting background=true.
 """
 import json
-import os
 from unittest.mock import patch, MagicMock
 
 
@@ -123,7 +122,7 @@ class TestForegroundTimeoutCap:
         Only the model's explicit timeout parameter triggers rejection,
         not the user's configured default.
         """
-        from tools.terminal_tool import terminal_tool, FOREGROUND_MAX_TIMEOUT
+        from tools.terminal_tool import terminal_tool
 
         # User configured TERMINAL_TIMEOUT=900 in their env
         with patch("tools.terminal_tool._get_env_config",
diff --git a/tests/tools/test_terminal_requirements.py b/tests/tools/test_terminal_requirements.py
index a557dcd9f20..a2c1f00e12f 100644
--- a/tests/tools/test_terminal_requirements.py
+++ b/tests/tools/test_terminal_requirements.py
@@ -1,7 +1,6 @@
 import importlib
 import logging
 
-import pytest
 
 terminal_tool_module = importlib.import_module("tools.terminal_tool")
 
@@ -165,7 +164,7 @@ def test_modal_backend_managed_mode_does_not_fall_back_to_direct(monkeypatch, ca
 
     assert ok is False
     assert any(
-        "paid Nous subscription is required" in record.getMessage()
+        "Nous Tool Gateway access is not currently available" in record.getMessage()
         for record in caplog.records
     )
 
@@ -183,6 +182,6 @@ def test_modal_backend_managed_mode_without_feature_flag_logs_clear_error(monkey
 
     assert ok is False
     assert any(
-        "paid Nous subscription is required" in record.getMessage()
+        "Nous Tool Gateway access is not currently available" in record.getMessage()
         for record in caplog.records
     )
diff --git a/tests/tools/test_tirith_security.py b/tests/tools/test_tirith_security.py
index 6c771c6d482..4229ae82c6b 100644
--- a/tests/tools/test_tirith_security.py
+++ b/tests/tools/test_tirith_security.py
@@ -1,8 +1,10 @@
 """Tests for the tirith security scanning subprocess wrapper."""
 
+import io
 import json
 import os
 import subprocess
+import tarfile
 import time
 from unittest.mock import MagicMock, patch
 
@@ -716,6 +718,89 @@ class TestCosignVerification:
         assert mock_cosign.called  # cosign was invoked
 
 
+class TestInstallArchiveMemberValidation:
+    def _write_archive(self, tmp_path, member: tarfile.TarInfo, data: bytes | None = None):
+        archive = tmp_path / "tirith-aarch64-apple-darwin.tar.gz"
+        checksums = tmp_path / "checksums.txt"
+        with tarfile.open(archive, "w:gz") as tar:
+            if data is None:
+                tar.addfile(member)
+            else:
+                tar.addfile(member, io.BytesIO(data))
+        checksums.write_text(
+            "ignored  tirith-aarch64-apple-darwin.tar.gz\n",
+            encoding="utf-8",
+        )
+        return archive, checksums
+
+    def _download_side_effect(self, archive, checksums):
+        def _download(url, dest, timeout=10):
+            del timeout
+            if url.endswith(".tar.gz"):
+                with open(archive, "rb") as src, open(dest, "wb") as dst:
+                    dst.write(src.read())
+                return
+            if url.endswith("checksums.txt"):
+                with open(checksums, "rb") as src, open(dest, "wb") as dst:
+                    dst.write(src.read())
+                return
+            raise AssertionError(f"unexpected download URL: {url}")
+
+        return _download
+
+    @patch("tools.tirith_security._verify_checksum", return_value=True)
+    @patch("tools.tirith_security.shutil.which", return_value=None)
+    @patch("tools.tirith_security._detect_target", return_value="aarch64-apple-darwin")
+    def test_install_extracts_regular_tirith_member(self, mock_target, mock_which,
+                                                    mock_checksum, tmp_path, monkeypatch):
+        """A valid regular-file tirith member is installed as a plain file."""
+        del mock_target, mock_which, mock_checksum
+        from tools.tirith_security import _install_tirith
+
+        payload = b"#!/bin/sh\nexit 0\n"
+        member = tarfile.TarInfo("bin/tirith")
+        member.mode = 0o755
+        member.size = len(payload)
+        archive, checksums = self._write_archive(tmp_path, member, payload)
+
+        hermes_home = tmp_path / "hermes-home"
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+        with patch("tools.tirith_security._download_file",
+                   side_effect=self._download_side_effect(archive, checksums)):
+            path, reason = _install_tirith(log_failures=False)
+
+        assert reason == ""
+        assert path == str(hermes_home / "bin" / "tirith")
+        assert os.path.isfile(path)
+        assert not os.path.islink(path)
+        with open(path, "rb") as f:
+            assert f.read() == payload
+
+    @patch("tools.tirith_security._verify_checksum", return_value=True)
+    @patch("tools.tirith_security.shutil.which", return_value=None)
+    @patch("tools.tirith_security._detect_target", return_value="aarch64-apple-darwin")
+    def test_install_rejects_non_regular_tirith_member(self, mock_target, mock_which,
+                                                       mock_checksum, tmp_path, monkeypatch):
+        """Symlink or hardlink tar members must not be installed as tirith."""
+        del mock_target, mock_which, mock_checksum
+        from tools.tirith_security import _install_tirith
+
+        member = tarfile.TarInfo("bin/tirith")
+        member.type = tarfile.SYMTYPE
+        member.linkname = "/bin/sh"
+        archive, checksums = self._write_archive(tmp_path, member)
+
+        hermes_home = tmp_path / "hermes-home"
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+        with patch("tools.tirith_security._download_file",
+                   side_effect=self._download_side_effect(archive, checksums)):
+            path, reason = _install_tirith(log_failures=False)
+
+        assert path is None
+        assert reason == "binary_not_regular_file"
+        assert not os.path.lexists(hermes_home / "bin" / "tirith")
+
+
 # ---------------------------------------------------------------------------
 # Background install / non-blocking startup (P2)
 # ---------------------------------------------------------------------------
@@ -963,7 +1048,7 @@ class TestDiskFailureMarker:
 
     def test_cosign_missing_disk_marker_allows_retry(self):
         """Disk marker with cosign_missing reason allows retry when cosign appears."""
-        from tools.tirith_security import _resolve_tirith_path, _INSTALL_FAILED
+        from tools.tirith_security import _resolve_tirith_path
         _tirith_mod._resolved_path = None
 
         # _is_install_failed_on_disk sees "cosign_missing" + cosign on PATH → returns False
diff --git a/tests/tools/test_tool_backend_helpers.py b/tests/tools/test_tool_backend_helpers.py
index 014b25c827f..03bb7f20d7a 100644
--- a/tests/tools/test_tool_backend_helpers.py
+++ b/tests/tools/test_tool_backend_helpers.py
@@ -16,10 +16,12 @@ from unittest.mock import patch
 
 import pytest
 
+from hermes_cli.nous_account import NousPaidServiceAccessInfo, NousPortalAccountInfo
 from tools.tool_backend_helpers import (
     coerce_modal_mode,
     has_direct_modal_credentials,
     managed_nous_tools_enabled,
+    nous_tool_gateway_unavailable_message,
     normalize_browser_cloud_provider,
     normalize_modal_mode,
     prefers_gateway,
@@ -40,42 +42,93 @@ class TestManagedNousToolsEnabled:
 
     def test_disabled_when_not_logged_in(self, monkeypatch):
         monkeypatch.setattr(
-            "hermes_cli.auth.get_nous_auth_status",
-            lambda: {},
+            "hermes_cli.nous_account.get_nous_portal_account_info",
+            lambda: NousPortalAccountInfo(logged_in=False, source="none", fresh=False),
         )
         assert managed_nous_tools_enabled() is False
 
     def test_disabled_for_free_tier(self, monkeypatch):
         monkeypatch.setattr(
-            "hermes_cli.auth.get_nous_auth_status",
-            lambda: {"logged_in": True},
-        )
-        monkeypatch.setattr(
-            "hermes_cli.models.check_nous_free_tier",
-            lambda: True,
+            "hermes_cli.nous_account.get_nous_portal_account_info",
+            lambda: NousPortalAccountInfo(
+                logged_in=True,
+                source="jwt",
+                fresh=False,
+                paid_service_access=False,
+            ),
         )
         assert managed_nous_tools_enabled() is False
 
     def test_enabled_for_paid_subscriber(self, monkeypatch):
         monkeypatch.setattr(
-            "hermes_cli.auth.get_nous_auth_status",
-            lambda: {"logged_in": True},
-        )
-        monkeypatch.setattr(
-            "hermes_cli.models.check_nous_free_tier",
-            lambda: False,
+            "hermes_cli.nous_account.get_nous_portal_account_info",
+            lambda: NousPortalAccountInfo(
+                logged_in=True,
+                source="jwt",
+                fresh=False,
+                paid_service_access=True,
+            ),
         )
         assert managed_nous_tools_enabled() is True
 
+    def test_force_fresh_is_forwarded(self, monkeypatch):
+        calls = []
+
+        def fake_account_info(*, force_fresh=False):
+            calls.append(force_fresh)
+            return NousPortalAccountInfo(
+                logged_in=True,
+                source="account_api",
+                fresh=True,
+                paid_service_access=True,
+            )
+
+        monkeypatch.setattr(
+            "hermes_cli.nous_account.get_nous_portal_account_info",
+            fake_account_info,
+        )
+
+        assert managed_nous_tools_enabled(force_fresh=True) is True
+        assert calls == [True]
+
     def test_returns_false_on_exception(self, monkeypatch):
         """Should never crash — returns False on any exception."""
         monkeypatch.setattr(
-            "hermes_cli.auth.get_nous_auth_status",
+            "hermes_cli.nous_account.get_nous_portal_account_info",
             _raise_import,
         )
         assert managed_nous_tools_enabled() is False
 
 
+class TestNousToolGatewayUnavailableMessage:
+    def test_uses_entitlement_reason_for_logged_in_user(self, monkeypatch):
+        monkeypatch.setattr(
+            "hermes_cli.nous_account.get_nous_portal_account_info",
+            lambda force_fresh=False: NousPortalAccountInfo(
+                logged_in=True,
+                source="account_api",
+                fresh=True,
+                paid_service_access=False,
+                portal_base_url="https://portal.example.test",
+                paid_service_access_info=NousPaidServiceAccessInfo(
+                    allowed=False,
+                    reason="no_usable_credits",
+                    has_active_subscription=True,
+                    active_subscription_is_paid=True,
+                    subscription_credits_remaining=0,
+                    purchased_credits_remaining=0,
+                    total_usable_credits=0,
+                ),
+            ),
+        )
+
+        message = nous_tool_gateway_unavailable_message("managed image generation")
+
+        assert "credits are exhausted" in message
+        assert "managed image generation" in message
+        assert "https://portal.example.test/billing" in message
+
+
 # ---------------------------------------------------------------------------
 # normalize_browser_cloud_provider
 # ---------------------------------------------------------------------------
@@ -189,6 +242,20 @@ class TestHasDirectModalCredentials:
         with patch.object(Path, "home", return_value=tmp_path):
             assert has_direct_modal_credentials() is True
 
+    def test_home_dir_permission_denied(self, monkeypatch):
+        """PermissionError on Path.home() should not crash (issue #33525)."""
+        monkeypatch.delenv("MODAL_TOKEN_ID", raising=False)
+        monkeypatch.delenv("MODAL_TOKEN_SECRET", raising=False)
+        with patch.object(Path, "home", side_effect=PermissionError("denied")):
+            assert has_direct_modal_credentials() is False
+
+    def test_home_dir_permission_denied_with_env_vars(self, monkeypatch):
+        """PermissionError on Path.home() should not prevent env var detection."""
+        monkeypatch.setenv("MODAL_TOKEN_ID", "id-123")
+        monkeypatch.setenv("MODAL_TOKEN_SECRET", "sec-456")
+        with patch.object(Path, "home", side_effect=PermissionError("denied")):
+            assert has_direct_modal_credentials() is True
+
 
 # ---------------------------------------------------------------------------
 # prefers_gateway
diff --git a/tests/tools/test_tool_result_storage.py b/tests/tools/test_tool_result_storage.py
index 17b6815c1d1..0d80581dc2a 100644
--- a/tests/tools/test_tool_result_storage.py
+++ b/tests/tools/test_tool_result_storage.py
@@ -5,7 +5,6 @@ from unittest.mock import MagicMock, patch
 
 from tools.budget_config import (
     DEFAULT_RESULT_SIZE_CHARS,
-    DEFAULT_TURN_BUDGET_CHARS,
     DEFAULT_PREVIEW_SIZE_CHARS,
     BudgetConfig,
 )
diff --git a/tests/tools/test_tool_search.py b/tests/tools/test_tool_search.py
new file mode 100644
index 00000000000..9c8c8a33c17
--- /dev/null
+++ b/tests/tools/test_tool_search.py
@@ -0,0 +1,538 @@
+"""Tests for tools/tool_search.py — progressive tool disclosure.
+
+Coverage targets — these mirror the issues called out in the OpenClaw tool
+search report. Every test that names an OpenClaw issue is the regression
+guard that would have caught that specific failure mode.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import sys
+from typing import List, Dict, Any
+
+import pytest
+
+
+_REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
+if _REPO_ROOT not in sys.path:
+    sys.path.insert(0, _REPO_ROOT)
+
+
+def _td(name: str, description: str = "", properties: Dict[str, Any] | None = None) -> Dict[str, Any]:
+    return {
+        "type": "function",
+        "function": {
+            "name": name,
+            "description": description,
+            "parameters": {
+                "type": "object",
+                "properties": properties or {},
+            },
+        },
+    }
+
+
+# ---------------------------------------------------------------------------
+# Config parsing
+# ---------------------------------------------------------------------------
+
+
+class TestConfigParsing:
+    def test_default_when_missing(self):
+        from tools.tool_search import ToolSearchConfig
+        cfg = ToolSearchConfig.from_raw(None)
+        assert cfg.enabled == "auto"
+        assert cfg.threshold_pct == 10.0
+
+    def test_bool_true_maps_to_auto(self):
+        from tools.tool_search import ToolSearchConfig
+        cfg = ToolSearchConfig.from_raw(True)
+        assert cfg.enabled == "auto"
+
+    def test_bool_false_maps_to_off(self):
+        from tools.tool_search import ToolSearchConfig
+        cfg = ToolSearchConfig.from_raw(False)
+        assert cfg.enabled == "off"
+
+    def test_explicit_on(self):
+        from tools.tool_search import ToolSearchConfig
+        cfg = ToolSearchConfig.from_raw({"enabled": "on"})
+        assert cfg.enabled == "on"
+
+    def test_invalid_enabled_falls_back_to_auto(self):
+        from tools.tool_search import ToolSearchConfig
+        cfg = ToolSearchConfig.from_raw({"enabled": "maybe"})
+        assert cfg.enabled == "auto"
+
+    def test_threshold_clamped(self):
+        from tools.tool_search import ToolSearchConfig
+        cfg = ToolSearchConfig.from_raw({"threshold_pct": 150})
+        assert cfg.threshold_pct == 100.0
+        cfg = ToolSearchConfig.from_raw({"threshold_pct": -5})
+        assert cfg.threshold_pct == 0.0
+
+    def test_search_limits_clamped(self):
+        from tools.tool_search import ToolSearchConfig
+        cfg = ToolSearchConfig.from_raw({
+            "search_default_limit": 999,
+            "max_search_limit": 999,
+        })
+        assert cfg.max_search_limit == 50
+        assert cfg.search_default_limit <= cfg.max_search_limit
+
+
+# ---------------------------------------------------------------------------
+# Classification — the hard invariant: core tools NEVER defer.
+# ---------------------------------------------------------------------------
+
+
+class TestClassification:
+    def test_core_tools_never_defer(self):
+        """The critical invariant from the OpenClaw report."""
+        from tools.tool_search import is_deferrable_tool_name
+        # Sample of core tools from _HERMES_CORE_TOOLS.
+        for core_name in ["terminal", "read_file", "write_file", "patch",
+                          "search_files", "todo", "memory", "browser_navigate",
+                          "web_search", "session_search", "clarify",
+                          "execute_code", "delegate_task", "send_message"]:
+            assert not is_deferrable_tool_name(core_name), (
+                f"Core tool '{core_name}' must NEVER be deferrable"
+            )
+
+    def test_bridge_tools_never_defer(self):
+        from tools.tool_search import is_deferrable_tool_name, BRIDGE_TOOL_NAMES
+        for name in BRIDGE_TOOL_NAMES:
+            assert not is_deferrable_tool_name(name)
+
+    def test_unknown_tool_not_deferrable(self):
+        """Defensive: a tool name we cannot resolve to a registry entry must
+        not be claimed as deferrable. This protects against the OpenClaw
+        cron regression where unresolved tools were silently dropped."""
+        from tools.tool_search import is_deferrable_tool_name
+        assert not is_deferrable_tool_name("xx_definitely_not_a_tool_xx")
+
+    def test_classify_keeps_unknown_in_visible(self):
+        """A tool we can't classify stays visible — never silently dropped.
+
+        This is the OpenClaw #84141 regression guard (cron lost ``exec``
+        because it wasn't in the catalog).
+        """
+        from tools.tool_search import classify_tools
+        # Build a tool def for something we don't have a registry entry for.
+        defs = [_td("xx_unknown_tool", "Unknown tool")]
+        visible, deferrable = classify_tools(defs)
+        names = {(td.get("function") or {}).get("name") for td in visible}
+        assert "xx_unknown_tool" in names
+        assert deferrable == []
+
+
+# ---------------------------------------------------------------------------
+# Token estimation + threshold gate
+# ---------------------------------------------------------------------------
+
+
+class TestThresholdGate:
+    def test_off_never_activates(self):
+        from tools.tool_search import ToolSearchConfig, should_activate
+        cfg = ToolSearchConfig.from_raw({"enabled": "off"})
+        assert not should_activate(cfg, deferrable_tokens=1_000_000, context_length=200_000)
+
+    def test_zero_deferrable_never_activates(self):
+        from tools.tool_search import ToolSearchConfig, should_activate
+        cfg = ToolSearchConfig.from_raw({"enabled": "on"})
+        assert not should_activate(cfg, deferrable_tokens=0, context_length=200_000)
+
+    def test_on_activates_with_any_deferrable(self):
+        from tools.tool_search import ToolSearchConfig, should_activate
+        cfg = ToolSearchConfig.from_raw({"enabled": "on"})
+        assert should_activate(cfg, deferrable_tokens=100, context_length=200_000)
+
+    def test_auto_below_threshold_does_not_activate(self):
+        from tools.tool_search import ToolSearchConfig, should_activate
+        cfg = ToolSearchConfig.from_raw({"enabled": "auto", "threshold_pct": 10})
+        # 5% of 200K = below 10% threshold
+        assert not should_activate(cfg, deferrable_tokens=10_000, context_length=200_000)
+
+    def test_auto_at_or_above_threshold_activates(self):
+        from tools.tool_search import ToolSearchConfig, should_activate
+        cfg = ToolSearchConfig.from_raw({"enabled": "auto", "threshold_pct": 10})
+        assert should_activate(cfg, deferrable_tokens=20_000, context_length=200_000)
+        assert should_activate(cfg, deferrable_tokens=50_000, context_length=200_000)
+
+    def test_auto_without_context_length_uses_20k_cutoff(self):
+        """Fallback cutoff used when the active model is unknown."""
+        from tools.tool_search import ToolSearchConfig, should_activate
+        cfg = ToolSearchConfig.from_raw({"enabled": "auto"})
+        assert not should_activate(cfg, deferrable_tokens=10_000, context_length=0)
+        assert should_activate(cfg, deferrable_tokens=25_000, context_length=0)
+
+    def test_token_estimate_proportional_to_schema_size(self):
+        from tools.tool_search import estimate_tokens_from_schemas
+        small = [_td("a", "x")]
+        big = [_td(f"name_{i}", f"description for tool {i} " * 20,
+                   {"q": {"type": "string", "description": "search query " * 10}})
+               for i in range(10)]
+        small_t = estimate_tokens_from_schemas(small)
+        big_t = estimate_tokens_from_schemas(big)
+        assert big_t > small_t * 10
+
+
+# ---------------------------------------------------------------------------
+# Retrieval (BM25 + substring fallback)
+# ---------------------------------------------------------------------------
+
+
+class TestRetrieval:
+    def _fake_catalog(self):
+        """Build a catalog directly without touching the registry."""
+        from tools.tool_search import CatalogEntry, _tokenize, _entry_search_text
+        defs = [
+            _td("github_create_issue", "Open a new issue in a GitHub repository",
+                {"title": {"type": "string"}, "body": {"type": "string"}}),
+            _td("github_search_repos", "Search GitHub for matching repositories",
+                {"query": {"type": "string"}}),
+            _td("slack_send_message", "Post a message into a Slack channel",
+                {"channel": {"type": "string"}, "text": {"type": "string"}}),
+            _td("calendar_create_event", "Add an event to the user's calendar",
+                {"title": {"type": "string"}, "start": {"type": "string"}}),
+        ]
+        catalog = []
+        for d in defs:
+            fn = d["function"]
+            e = CatalogEntry(
+                name=fn["name"], description=fn["description"],
+                schema=d, source="mcp", source_name="mcp-test",
+            )
+            e._tokens = _tokenize(_entry_search_text(d))
+            catalog.append(e)
+        return catalog
+
+    def test_search_finds_relevant_tool(self):
+        from tools.tool_search import search_catalog
+        hits = search_catalog(self._fake_catalog(), "create a github issue", limit=3)
+        names = [h.name for h in hits]
+        assert names[0] == "github_create_issue"
+
+    def test_search_returns_empty_for_irrelevant_query(self):
+        from tools.tool_search import search_catalog
+        hits = search_catalog(self._fake_catalog(), "asdf qwerty foobar", limit=3)
+        assert hits == []
+
+    def test_search_substring_fallback(self):
+        """Even when no BM25 hit, a literal substring of the tool name returns."""
+        from tools.tool_search import search_catalog
+        hits = search_catalog(self._fake_catalog(), "calendar", limit=3)
+        assert any("calendar" in h.name for h in hits)
+
+    def test_search_respects_limit(self):
+        from tools.tool_search import search_catalog
+        hits = search_catalog(self._fake_catalog(), "github", limit=1)
+        assert len(hits) <= 1
+
+
+# ---------------------------------------------------------------------------
+# Assembly — the full passthrough/activate decision.
+# ---------------------------------------------------------------------------
+
+
+class TestAssembly:
+    def test_no_deferrable_returns_unchanged(self):
+        """Pure-core toolset: pass-through, no bridge tools added."""
+        from tools.tool_search import assemble_tool_defs, ToolSearchConfig
+        defs = [_td("terminal", "Run shell"), _td("read_file", "Read a file")]
+        result = assemble_tool_defs(
+            defs,
+            context_length=200_000,
+            config=ToolSearchConfig.from_raw({"enabled": "on"}),
+        )
+        assert not result.activated
+        assert {t["function"]["name"] for t in result.tool_defs} == {"terminal", "read_file"}
+
+    def test_below_threshold_returns_unchanged(self):
+        """Tiny deferrable surface: don't bother."""
+        from tools.tool_search import assemble_tool_defs, ToolSearchConfig
+        # _td renders to ~80 chars / 20 tokens. 3 of them = ~60 tokens.
+        # 10% of 200K = 20K. Way below.
+        defs = [_td("unknown_tool_a"), _td("unknown_tool_b"), _td("unknown_tool_c")]
+        result = assemble_tool_defs(
+            defs,
+            context_length=200_000,
+            config=ToolSearchConfig.from_raw({"enabled": "auto", "threshold_pct": 10}),
+        )
+        assert not result.activated
+        names = {(t.get("function") or {}).get("name") for t in result.tool_defs}
+        assert "tool_search" not in names
+
+    def test_idempotent_when_bridge_already_present(self):
+        from tools.tool_search import assemble_tool_defs, ToolSearchConfig, BRIDGE_TOOL_NAMES
+        defs = [_td("terminal", "Run shell"), _td("tool_search", "old")]
+        result = assemble_tool_defs(
+            defs,
+            context_length=200_000,
+            config=ToolSearchConfig.from_raw({"enabled": "off"}),
+        )
+        names = [(t["function"]["name"]) for t in result.tool_defs]
+        # The pre-existing tool_search was stripped (it would be re-injected if
+        # activation happened; here it didn't).
+        assert "tool_search" not in names
+
+
+# ---------------------------------------------------------------------------
+# Bridge dispatch
+# ---------------------------------------------------------------------------
+
+
+class TestBridgeDispatch:
+    def test_tool_search_requires_query(self):
+        from tools.tool_search import dispatch_tool_search
+        result = dispatch_tool_search({}, current_tool_defs=[])
+        assert "error" in json.loads(result)
+
+    def test_tool_describe_requires_name(self):
+        from tools.tool_search import dispatch_tool_describe
+        result = dispatch_tool_describe({}, current_tool_defs=[])
+        assert "error" in json.loads(result)
+
+    def test_tool_describe_rejects_non_deferrable(self):
+        """If the model asks to describe a core tool, refuse — it's already
+        in the visible list."""
+        from tools.tool_search import dispatch_tool_describe
+        result = dispatch_tool_describe(
+            {"name": "terminal"}, current_tool_defs=[_td("terminal", "Run shell")],
+        )
+        assert "error" in json.loads(result)
+
+    def test_resolve_underlying_call_parses_object_args(self):
+        from tools.tool_search import resolve_underlying_call
+        name, args, err = resolve_underlying_call({
+            "name": "unknown_xxx",
+            "arguments": {"foo": "bar"},
+        })
+        # Will fail classification because unknown_xxx isn't deferrable.
+        assert err is not None
+
+    def test_resolve_underlying_call_parses_json_string_args(self):
+        """Some models emit ``arguments`` as a JSON string instead of object."""
+        from tools.tool_search import resolve_underlying_call
+        # Use a name that won't classify (so we don't depend on registry),
+        # but exercise the JSON parse path.
+        _, _, err = resolve_underlying_call({
+            "name": "fake",
+            "arguments": '{"a": 1}',
+        })
+        # err is about classification, but the parse worked (it would have
+        # failed earlier with "not valid JSON" otherwise).
+        assert "not valid JSON" not in (err or "")
+
+    def test_resolve_underlying_call_rejects_bad_json(self):
+        from tools.tool_search import resolve_underlying_call
+        _, _, err = resolve_underlying_call({
+            "name": "fake",
+            "arguments": "{this is not json",
+        })
+        assert err is not None
+        assert "JSON" in err
+
+    def test_resolve_underlying_call_rejects_recursion(self):
+        """tool_call cannot invoke tool_call itself."""
+        from tools.tool_search import resolve_underlying_call, TOOL_CALL_NAME
+        name, args, err = resolve_underlying_call({
+            "name": TOOL_CALL_NAME,
+            "arguments": {},
+        })
+        assert err is not None
+        assert "bridge tool" in err.lower()
+
+
+# ---------------------------------------------------------------------------
+# End-to-end via the real handle_function_call (smoke test).
+# ---------------------------------------------------------------------------
+
+
+class TestHandleFunctionCallIntegration:
+    def test_tool_search_dispatch_through_handle_function_call(self):
+        """The dispatcher recognizes the bridge tool by name."""
+        import model_tools
+        result = model_tools.handle_function_call(
+            function_name="tool_search",
+            function_args={"query": "nothing matches this"},
+        )
+        parsed = json.loads(result)
+        # Without a real registry, the matches will be empty, but the
+        # dispatch path completed without error.
+        assert "matches" in parsed or "error" in parsed
+
+
+class TestRegression_OpenClawCron84141:
+    """Regression guard for the OpenClaw cron-tool-loss class of bug.
+
+    OpenClaw #84141: ``toolsAllow: ["exec"]`` on an isolated cron turn
+    resulted in the agent receiving only ``sessions_send`` — the catalog
+    builder silently dropped the requested core tool.
+
+    Our defense: core tools are NEVER deferred. This test exercises the
+    full assembly pipeline with a mixed core+MCP toolset and asserts that
+    every core tool survives.
+    """
+
+    def test_core_tool_survives_alongside_many_mcp_tools(self):
+        from tools.tool_search import (
+            assemble_tool_defs, ToolSearchConfig, BRIDGE_TOOL_NAMES,
+            classify_tools,
+        )
+        # 1 core tool + 50 unknown/MCP-shaped tools (deferrable).
+        defs = [_td("terminal", "Run shell commands")]
+        # Pad with fake "deferrable" tools — without registry registration,
+        # classify_tools puts them in 'visible'. So instead, we just verify
+        # the core-tool side: terminal stays in visible regardless.
+        visible, deferrable = classify_tools(defs)
+        assert any(
+            (td.get("function") or {}).get("name") == "terminal"
+            for td in visible
+        ), "Core tool 'terminal' was wrongly classified as deferrable"
+
+        # Now force activation and check the resulting tool-defs list.
+        result = assemble_tool_defs(
+            defs,
+            context_length=200_000,
+            config=ToolSearchConfig.from_raw({"enabled": "on"}),
+        )
+        names = {(t.get("function") or {}).get("name") for t in result.tool_defs}
+        # terminal must be present; bridges are only added if there are
+        # deferrable tools to put behind them.
+        assert "terminal" in names
+
+    def test_unwrap_rejects_core_tool_attempt(self):
+        """Even if the model tries to invoke a core tool through tool_call,
+        we reject the call and tell the model to use it directly."""
+        from tools.tool_search import resolve_underlying_call
+        _, _, err = resolve_underlying_call({
+            "name": "terminal",
+            "arguments": {"command": "echo hi"},
+        })
+        assert err is not None
+        assert "not a deferrable" in err
+
+
+class TestRegression_ToolsetScoping:
+    """A restricted-toolset session must not see or invoke out-of-scope tools.
+
+    The bug: the bridge dispatch and the tool_executor unwrap read the
+    catalog from the *global* registry (get_tool_definitions with no
+    toolset scope = "start with everything"), so a session scoped to one
+    MCP server could tool_search the entire process registry and tool_call
+    any plugin tool it was never granted. registry.dispatch() has no
+    enabled_tools gate for non-execute_code tools, so the out-of-scope tool
+    actually ran.
+
+    The fix threads the session's enabled/disabled toolsets into the bridge
+    dispatch (model_tools.handle_function_call) and the executor unwrap
+    (agent.tool_executor), scoping both the searchable catalog and the
+    invocable set to the session's own toolsets.
+    """
+
+    @staticmethod
+    def _register(name, toolset):
+        from tools.registry import registry
+
+        def _handler(args, task_id=None, **kw):
+            return json.dumps({"ok": True, "tool": name})
+
+        registry.register(
+            name=name,
+            handler=_handler,
+            schema=_td(name, f"desc for {name}", {"repo": {"type": "string"}}),
+            toolset=toolset,
+        )
+
+    def test_search_catalog_is_scoped_to_session_toolsets(self):
+        import model_tools
+
+        for i in range(12):
+            self._register(f"mcp_scoped_gh_{i}", "mcp-scoped-gh")
+        self._register("scoped_oos_plugin", "scopedoosplugin")
+
+        # tool_search scoped to the github toolset must not count the
+        # out-of-scope plugin tool (or any of the host registry).
+        result = model_tools.handle_function_call(
+            function_name="tool_search",
+            function_args={"query": "mcp_scoped_gh", "limit": 5},
+            enabled_toolsets=["mcp-scoped-gh"],
+        )
+        parsed = json.loads(result)
+        assert parsed["total_available"] == 12, (
+            f"expected scoped catalog of 12, got {parsed['total_available']} "
+            "— catalog leaked tools outside the session's toolsets"
+        )
+        hit_names = {m["name"] for m in parsed["matches"]}
+        assert "scoped_oos_plugin" not in hit_names
+
+    def test_tool_call_rejects_out_of_scope_tool(self):
+        import model_tools
+
+        self._register("mcp_inscope_gh_op", "mcp-inscope-gh")
+        self._register("inscope_oos_plugin", "inscopeoosplugin")
+
+        # Out-of-scope plugin tool: rejected even though it is registered
+        # and deferrable in the global registry.
+        rejected = json.loads(model_tools.handle_function_call(
+            function_name="tool_call",
+            function_args={"name": "inscope_oos_plugin", "arguments": {}},
+            enabled_toolsets=["mcp-inscope-gh"],
+        ))
+        assert "error" in rejected
+        assert "not available in this session" in rejected["error"]
+
+        # In-scope tool: dispatches normally.
+        ok = json.loads(model_tools.handle_function_call(
+            function_name="tool_call",
+            function_args={"name": "mcp_inscope_gh_op", "arguments": {"repo": "a/b"}},
+            enabled_toolsets=["mcp-inscope-gh"],
+        ))
+        assert ok.get("ok") is True
+        assert ok.get("tool") == "mcp_inscope_gh_op"
+
+    def test_bridge_dispatch_does_not_pollute_global_resolved_names(self):
+        import model_tools
+
+        self._register("mcp_pollute_op_0", "mcp-pollute")
+        self._register("mcp_pollute_op_1", "mcp-pollute")
+
+        # Establish the scoped session global.
+        model_tools.get_tool_definitions(
+            enabled_toolsets=["mcp-pollute"], quiet_mode=True,
+        )
+        before = set(model_tools._last_resolved_tool_names)
+        assert "terminal" not in before
+
+        # A scoped tool_search call must not widen the process-global
+        # _last_resolved_tool_names to the whole registry (which would leak
+        # core/sandbox tools into execute_code's fallback).
+        model_tools.handle_function_call(
+            function_name="tool_search",
+            function_args={"query": "pollute"},
+            enabled_toolsets=["mcp-pollute"],
+        )
+        after = set(model_tools._last_resolved_tool_names)
+        assert "terminal" not in after, (
+            "bridge dispatch polluted _last_resolved_tool_names with "
+            "out-of-scope tools"
+        )
+
+    def test_scoped_deferrable_names_helper(self):
+        from tools.tool_search import scoped_deferrable_names
+
+        self._register("mcp_helper_op", "mcp-helper")
+        import model_tools
+        defs = model_tools.get_tool_definitions(
+            enabled_toolsets=["mcp-helper"],
+            quiet_mode=True,
+            skip_tool_search_assembly=True,
+        )
+        names = scoped_deferrable_names(defs)
+        assert "mcp_helper_op" in names
+        # core tools are never deferrable
+        assert "terminal" not in names
+
diff --git a/tests/tools/test_transcription.py b/tests/tools/test_transcription.py
index b7e399ca426..84f6c9679af 100644
--- a/tests/tools/test_transcription.py
+++ b/tests/tools/test_transcription.py
@@ -4,12 +4,10 @@ Tests cover provider selection, config loading, validation, and transcription
 dispatch.  All external dependencies (faster_whisper, openai) are mocked.
 """
 
-import json
 import os
 import tempfile
-from pathlib import Path
 from types import SimpleNamespace
-from unittest.mock import MagicMock, patch, mock_open
+from unittest.mock import MagicMock, patch
 
 import pytest
 
@@ -109,7 +107,6 @@ class TestValidateAudioFile:
         assert _validate_audio_file(str(f)) is None
 
     def test_too_large(self, tmp_path):
-        import stat as stat_mod
         f = tmp_path / "big.ogg"
         f.write_bytes(b"x")
         from tools.transcription_tools import _validate_audio_file, MAX_FILE_SIZE
@@ -292,7 +289,7 @@ class TestNormalizeLocalModel:
 
     def test_local_transcribe_normalises_model(self):
         """transcribe_audio with local provider must not pass 'whisper-1' to WhisperModel."""
-        import tempfile, os
+        import os
         from unittest.mock import MagicMock, patch
 
         with tempfile.NamedTemporaryFile(suffix=".ogg", delete=False) as f:
diff --git a/tests/tools/test_transcription_command_providers.py b/tests/tools/test_transcription_command_providers.py
index 6873b0389ea..749ab5e839c 100644
--- a/tests/tools/test_transcription_command_providers.py
+++ b/tests/tools/test_transcription_command_providers.py
@@ -19,15 +19,11 @@ identically on Linux, macOS, and Windows (with minor quoting differences).
 from __future__ import annotations
 
 import os
-import subprocess
 import sys
-import tempfile
 import wave
 from pathlib import Path
-from typing import Optional
 from unittest.mock import patch
 
-import pytest
 
 from tools.transcription_tools import (
     BUILTIN_STT_PROVIDERS,
@@ -39,12 +35,9 @@ from tools.transcription_tools import (
     _get_command_stt_timeout,
     _get_named_stt_provider_config,
     _has_any_command_stt_provider,
-    _is_command_stt_provider_config,
     _iter_command_stt_providers,
-    _quote_command_stt_placeholder,
     _render_command_stt_template,
     _resolve_command_stt_provider_config,
-    _shell_quote_context_stt,
     _transcribe_command_stt,
     transcribe_audio,
 )
diff --git a/tests/tools/test_transcription_dotenv_fallback.py b/tests/tools/test_transcription_dotenv_fallback.py
index 7f20c7a998e..6684e174fb6 100644
--- a/tests/tools/test_transcription_dotenv_fallback.py
+++ b/tests/tools/test_transcription_dotenv_fallback.py
@@ -101,12 +101,6 @@ class TestProviderSelectionGate:
             assert tt._get_provider({"enabled": True, "provider": "groq"}) == "groq"
 
     def test_explicit_mistral_sees_dotenv(self):
-        """Mistral STT is intentionally disabled (PyPI quarantine 2026-05-12).
-
-        Even with the dotenv key visible, explicit `provider: mistral` must
-        return "none" with a warning. Restore the previous behavior once
-        `mistralai` is un-quarantined on PyPI.
-        """
         from tools import transcription_tools as tt
 
         with patch.object(tt, "_HAS_FASTER_WHISPER", False), \
@@ -114,7 +108,7 @@ class TestProviderSelectionGate:
              patch.object(tt, "_has_local_command", return_value=False), \
              patch("hermes_cli.config.load_env",
                    return_value={"MISTRAL_API_KEY": "dotenv-secret"}):
-            assert tt._get_provider({"enabled": True, "provider": "mistral"}) == "none"
+            assert tt._get_provider({"enabled": True, "provider": "mistral"}) == "mistral"
 
     def test_explicit_xai_sees_dotenv(self):
         from tools import transcription_tools as tt
diff --git a/tests/tools/test_transcription_tools.py b/tests/tools/test_transcription_tools.py
index 236c89e7fa2..434971e9aac 100644
--- a/tests/tools/test_transcription_tools.py
+++ b/tests/tools/test_transcription_tools.py
@@ -1011,23 +1011,16 @@ class TestTranscribeMistral:
 # ============================================================================
 
 class TestGetProviderMistral:
-    """Mistral-specific provider selection tests.
-
-    Mistral STT is intentionally disabled in 2026-05-12+ while the
-    `mistralai` PyPI package is quarantined. These tests document that
-    explicit `provider: mistral` always returns "none" with a warning, and
-    that auto-detect skips mistral entirely.
-    """
+    """Mistral-specific provider selection tests."""
 
     def test_mistral_when_key_and_sdk_available(self, monkeypatch):
-        """Even with key + SDK, explicit mistral returns 'none' (disabled)."""
         monkeypatch.setenv("MISTRAL_API_KEY", "test-key")
         with patch("tools.transcription_tools._HAS_MISTRAL", True):
             from tools.transcription_tools import _get_provider
-            assert _get_provider({"provider": "mistral"}) == "none"
+            assert _get_provider({"provider": "mistral"}) == "mistral"
 
     def test_mistral_explicit_no_key_returns_none(self, monkeypatch):
-        """Explicit mistral with no key returns none."""
+        """Explicit mistral with no key returns none — no cross-provider fallback."""
         monkeypatch.delenv("MISTRAL_API_KEY", raising=False)
         with patch("tools.transcription_tools._HAS_MISTRAL", True):
             from tools.transcription_tools import _get_provider
@@ -1040,23 +1033,18 @@ class TestGetProviderMistral:
             from tools.transcription_tools import _get_provider
             assert _get_provider({"provider": "mistral"}) == "none"
 
-    def test_auto_detect_skips_mistral(self, monkeypatch):
-        """Auto-detect intentionally skips mistral (quarantine workaround).
-
-        With no other provider available but MISTRAL_API_KEY set, the result
-        must be 'none' — mistral is no longer in the auto-detect chain.
-        """
+    def test_auto_detect_mistral_after_openai(self, monkeypatch):
+        """Auto-detect: mistral is tried after openai when both are unavailable."""
         monkeypatch.delenv("GROQ_API_KEY", raising=False)
         monkeypatch.delenv("VOICE_TOOLS_OPENAI_KEY", raising=False)
         monkeypatch.delenv("OPENAI_API_KEY", raising=False)
-        monkeypatch.delenv("XAI_API_KEY", raising=False)
         monkeypatch.setenv("MISTRAL_API_KEY", "test-key")
         with patch("tools.transcription_tools._HAS_FASTER_WHISPER", False), \
              patch("tools.transcription_tools._has_local_command", return_value=False), \
              patch("tools.transcription_tools._HAS_OPENAI", False), \
              patch("tools.transcription_tools._HAS_MISTRAL", True):
             from tools.transcription_tools import _get_provider
-            assert _get_provider({}) == "none"
+            assert _get_provider({}) == "mistral"
 
     def test_auto_detect_openai_preferred_over_mistral(self, monkeypatch):
         """Auto-detect: openai is preferred over mistral (both paid, openai more common)."""
@@ -1330,13 +1318,8 @@ class TestGetProviderXAI:
             from tools.transcription_tools import _get_provider
             assert _get_provider({}) == "xai"
 
-    def test_auto_detect_mistral_skipped_xai_wins(self, monkeypatch):
-        """Auto-detect skips mistral entirely (quarantine) — xai wins.
-
-        Even with MISTRAL_API_KEY set, mistral is no longer in the
-        auto-detect chain. xai is the next-best fallback when the
-        local/groq/openai chain is unavailable.
-        """
+    def test_auto_detect_mistral_preferred_over_xai(self, monkeypatch):
+        """Auto-detect: mistral is preferred over xai."""
         monkeypatch.setenv("MISTRAL_API_KEY", "test-key")
         monkeypatch.setenv("XAI_API_KEY", "xai-test")
         monkeypatch.delenv("GROQ_API_KEY", raising=False)
@@ -1347,7 +1330,7 @@ class TestGetProviderXAI:
              patch("tools.transcription_tools._HAS_OPENAI", False), \
              patch("tools.transcription_tools._HAS_MISTRAL", True):
             from tools.transcription_tools import _get_provider
-            assert _get_provider({}) == "xai"
+            assert _get_provider({}) == "mistral"
 
     def test_auto_detect_no_key_returns_none(self, monkeypatch):
         """Auto-detect: xai skipped when no key is set."""
diff --git a/tests/tools/test_tts_command_providers.py b/tests/tools/test_tts_command_providers.py
index 583abcb588b..e3242274a00 100644
--- a/tests/tools/test_tts_command_providers.py
+++ b/tests/tools/test_tts_command_providers.py
@@ -14,10 +14,8 @@ differences) Windows.
 
 import json
 import os
-import subprocess
 import sys
 from pathlib import Path
-from typing import Optional
 from unittest.mock import patch
 
 import pytest
diff --git a/tests/tools/test_tts_max_text_length.py b/tests/tools/test_tts_max_text_length.py
index 38a763ea78c..49ae5ca2f4b 100644
--- a/tests/tools/test_tts_max_text_length.py
+++ b/tests/tools/test_tts_max_text_length.py
@@ -6,12 +6,9 @@ MiniMax allows 10000, and ElevenLabs allows 5000-40000 depending on model.
 """
 
 import json
-from unittest.mock import patch
 
-import pytest
 
 from tools.tts_tool import (
-    ELEVENLABS_MODEL_MAX_TEXT_LENGTH,
     FALLBACK_MAX_TEXT_LENGTH,
     PROVIDER_MAX_TEXT_LENGTH,
     _resolve_max_text_length,
diff --git a/tests/tools/test_tts_mistral.py b/tests/tools/test_tts_mistral.py
index 818a6c1d117..6e98946b6c0 100644
--- a/tests/tools/test_tts_mistral.py
+++ b/tests/tools/test_tts_mistral.py
@@ -162,34 +162,27 @@ class TestGenerateMistralTts:
 
 
 class TestTtsDispatcherMistral:
-    def test_dispatcher_returns_disabled_error(
+    def test_dispatcher_routes_to_mistral(
         self, tmp_path, mock_mistral_module, monkeypatch
     ):
-        """Mistral TTS is intentionally disabled (PyPI quarantine 2026-05-12).
-
-        The dispatcher must short-circuit with a clear status message before
-        attempting any SDK import, even when MISTRAL_API_KEY is set and a
-        mock SDK is wired in. Restore routing once `mistralai` is
-        un-quarantined on PyPI.
-        """
         import json
 
         from tools.tts_tool import text_to_speech_tool
 
         monkeypatch.setenv("MISTRAL_API_KEY", "test-key")
+        mock_mistral_module.audio.speech.complete.return_value = MagicMock(
+            audio_data=base64.b64encode(b"audio").decode()
+        )
 
         output_path = str(tmp_path / "out.mp3")
         with patch("tools.tts_tool._load_tts_config", return_value={"provider": "mistral"}):
             result = json.loads(text_to_speech_tool("Hello", output_path=output_path))
 
-        assert result["success"] is False
-        assert "temporarily disabled" in result["error"]
-        assert "quarantined" in result["error"]
-        # SDK must not have been called.
-        mock_mistral_module.audio.speech.complete.assert_not_called()
+        assert result["success"] is True
+        assert result["provider"] == "mistral"
+        mock_mistral_module.audio.speech.complete.assert_called_once()
 
     def test_dispatcher_returns_error_when_sdk_not_installed(self, tmp_path, monkeypatch):
-        """Same disabled message regardless of SDK presence."""
         import json
 
         from tools.tts_tool import text_to_speech_tool
@@ -203,7 +196,7 @@ class TestTtsDispatcherMistral:
             )
 
         assert result["success"] is False
-        assert "temporarily disabled" in result["error"]
+        assert "mistralai" in result["error"]
 
 
 class TestCheckTtsRequirementsMistral:
diff --git a/tests/tools/test_tts_piper.py b/tests/tools/test_tts_piper.py
index ef7330a18c9..c30b26dc9b9 100644
--- a/tests/tools/test_tts_piper.py
+++ b/tests/tools/test_tts_piper.py
@@ -7,7 +7,6 @@ without requiring the ``piper-tts`` package to actually be installed
 """
 
 import json
-import os
 import sys
 from pathlib import Path
 from unittest.mock import MagicMock, patch
diff --git a/tests/tools/test_video_analyze.py b/tests/tools/test_video_analyze.py
index 62987d96b20..1294ab8f558 100644
--- a/tests/tools/test_video_analyze.py
+++ b/tests/tools/test_video_analyze.py
@@ -2,20 +2,14 @@
 
 import asyncio
 import json
-import os
-from pathlib import Path
-from typing import Awaitable
 from unittest.mock import AsyncMock, MagicMock, patch
 
-import pytest
 
 from tools.vision_tools import (
     _detect_video_mime_type,
     _video_to_base64_data_url,
     _handle_video_analyze,
     _MAX_VIDEO_BASE64_BYTES,
-    _VIDEO_MIME_TYPES,
-    _VIDEO_SIZE_WARN_BYTES,
     video_analyze_tool,
     VIDEO_ANALYZE_SCHEMA,
 )
diff --git a/tests/tools/test_video_generation_tool_surface_matrix.py b/tests/tools/test_video_generation_tool_surface_matrix.py
index 3dc3257fc58..edd39b550e8 100644
--- a/tests/tools/test_video_generation_tool_surface_matrix.py
+++ b/tests/tools/test_video_generation_tool_surface_matrix.py
@@ -16,7 +16,7 @@ from __future__ import annotations
 import asyncio
 import json
 import types
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List
 
 import pytest
 import yaml
@@ -46,6 +46,18 @@ def matrix_env(tmp_path, monkeypatch):
         fal_calls.append({"endpoint": endpoint, "arguments": arguments})
         return {"video": {"url": f"https://fake-fal/{endpoint.replace('/','_')}.mp4"}}
     fake_fal.subscribe = _subscribe  # type: ignore
+
+    class _FalHandle:
+        def __init__(self, result):
+            self._result = result
+        def get(self):
+            return self._result
+
+    def _submit(endpoint, arguments=None, headers=None):
+        fal_calls.append({"endpoint": endpoint, "arguments": arguments})
+        return _FalHandle({"video": {"url": f"https://fake-fal/{endpoint.replace('/','_')}.mp4"}})
+    fake_fal.submit = _submit  # type: ignore
+
     monkeypatch.setitem(__import__("sys").modules, "fal_client", fake_fal)
 
     # httpx stub for xAI
diff --git a/tests/tools/test_vision_native_fast_path.py b/tests/tools/test_vision_native_fast_path.py
index 1df3003e5cd..9916ca369d5 100644
--- a/tests/tools/test_vision_native_fast_path.py
+++ b/tests/tools/test_vision_native_fast_path.py
@@ -11,10 +11,8 @@ from __future__ import annotations
 import asyncio
 import base64
 import json
-from pathlib import Path
 from unittest.mock import patch
 
-import pytest
 
 from tools.vision_tools import (
     _build_native_vision_tool_result,
@@ -211,3 +209,57 @@ class TestHandleVisionAnalyzeFastPath:
 
         assert not (isinstance(result, dict) and result.get("_multimodal") is True), \
             "Fast path fired for unknown provider; should have fallen through"
+
+    def test_supports_vision_override_bypasses_provider_allowlist(self, tmp_path):
+        """supports_vision=true enables the fast path on an unlisted provider."""
+        img = tmp_path / "x.png"
+        img.write_bytes(_TINY_PNG)
+
+        async def _aux_sentinel(*args, **kwargs):
+            return '{"sentinel": "aux-path"}'
+
+        from agent.auxiliary_client import set_runtime_main, clear_runtime_main
+        set_runtime_main("brand-new-provider", "llava-v1.6")
+        try:
+            with patch(
+                "hermes_cli.config.load_config",
+                return_value={"model": {"supports_vision": True}},
+            ), patch(
+                "tools.vision_tools.vision_analyze_tool", side_effect=_aux_sentinel,
+            ) as mock_aux:
+                coro = _handle_vision_analyze({"image_url": str(img), "question": "?"})
+                result = asyncio.get_event_loop().run_until_complete(coro)
+        finally:
+            clear_runtime_main()
+
+        assert isinstance(result, dict) and result.get("_multimodal") is True
+        mock_aux.assert_not_called()
+
+    def test_text_mode_wins_over_supports_vision_override(self, tmp_path):
+        """Explicit text routing blocks the fast path even with supports_vision."""
+        img = tmp_path / "x.png"
+        img.write_bytes(_TINY_PNG)
+
+        async def _aux_sentinel(*args, **kwargs):
+            return '{"sentinel": "aux-path"}'
+
+        from agent.auxiliary_client import set_runtime_main, clear_runtime_main
+        set_runtime_main("brand-new-provider", "llava-v1.6")
+        try:
+            with patch(
+                "hermes_cli.config.load_config",
+                return_value={
+                    "agent": {"image_input_mode": "text"},
+                    "model": {"supports_vision": True},
+                },
+            ), patch(
+                "tools.vision_tools.vision_analyze_tool", side_effect=_aux_sentinel,
+            ) as mock_aux:
+                coro = _handle_vision_analyze({"image_url": str(img), "question": "?"})
+                result = asyncio.get_event_loop().run_until_complete(coro)
+        finally:
+            clear_runtime_main()
+
+        assert isinstance(result, str)
+        assert json.loads(result) == {"sentinel": "aux-path"}
+        mock_aux.assert_called_once()
diff --git a/tests/tools/test_vision_tools.py b/tests/tools/test_vision_tools.py
index d8977f84927..e3bff50d56f 100644
--- a/tests/tools/test_vision_tools.py
+++ b/tests/tools/test_vision_tools.py
@@ -1,6 +1,5 @@
 """Tests for tools/vision_tools.py — URL validation, type hints, error logging."""
 
-import asyncio
 import json
 import logging
 import os
diff --git a/tests/tools/test_voice_cli_integration.py b/tests/tools/test_voice_cli_integration.py
index babdb4e7383..f43eb97c96d 100644
--- a/tests/tools/test_voice_cli_integration.py
+++ b/tests/tools/test_voice_cli_integration.py
@@ -2,7 +2,6 @@
 state management, streaming TTS activation, voice message prefix, _vprint."""
 
 import ast
-import os
 import queue
 import threading
 from types import SimpleNamespace
diff --git a/tests/tools/test_voice_mode.py b/tests/tools/test_voice_mode.py
index 4f0b31d9905..2a2b77bae2e 100644
--- a/tests/tools/test_voice_mode.py
+++ b/tests/tools/test_voice_mode.py
@@ -265,6 +265,46 @@ class TestDetectAudioEnvironment:
         assert result["warnings"] == []
         assert any("container" in n.lower() for n in result.get("notices", []))
 
+    def test_docker_with_pipewire_remote_and_no_devices_allows_voice(self, monkeypatch):
+        """PIPEWIRE_REMOTE should bypass empty PortAudio device lists in Docker."""
+        monkeypatch.delenv("SSH_CLIENT", raising=False)
+        monkeypatch.delenv("SSH_TTY", raising=False)
+        monkeypatch.delenv("SSH_CONNECTION", raising=False)
+        monkeypatch.delenv("PULSE_SERVER", raising=False)
+        monkeypatch.setenv("PIPEWIRE_REMOTE", "/run/user/1000/pipewire-0")
+        monkeypatch.setattr("hermes_constants.is_container", lambda: True)
+
+        sd = MagicMock()
+        sd.query_devices.return_value = []
+        monkeypatch.setattr("tools.voice_mode._import_audio", lambda: (sd, MagicMock()))
+
+        from tools.voice_mode import detect_audio_environment
+        result = detect_audio_environment()
+
+        assert result["available"] is True
+        assert result["warnings"] == []
+        assert any("host audio forwarding" in n.lower() for n in result.get("notices", []))
+
+    def test_docker_with_pipewire_remote_and_query_failure_allows_voice(self, monkeypatch):
+        """PIPEWIRE_REMOTE should bypass PortAudio query failures in Docker."""
+        monkeypatch.delenv("SSH_CLIENT", raising=False)
+        monkeypatch.delenv("SSH_TTY", raising=False)
+        monkeypatch.delenv("SSH_CONNECTION", raising=False)
+        monkeypatch.delenv("PULSE_SERVER", raising=False)
+        monkeypatch.setenv("PIPEWIRE_REMOTE", "/run/user/1000/pipewire-0")
+        monkeypatch.setattr("hermes_constants.is_container", lambda: True)
+
+        sd = MagicMock()
+        sd.query_devices.side_effect = RuntimeError("boom")
+        monkeypatch.setattr("tools.voice_mode._import_audio", lambda: (sd, MagicMock()))
+
+        from tools.voice_mode import detect_audio_environment
+        result = detect_audio_environment()
+
+        assert result["available"] is True
+        assert result["warnings"] == []
+        assert any("host audio forwarding" in n.lower() for n in result.get("notices", []))
+
     def test_docker_without_audio_forwarding_blocks_voice(self, monkeypatch):
         """Docker without PULSE_SERVER/PIPEWIRE_REMOTE keeps blocking voice mode."""
         monkeypatch.delenv("SSH_CLIENT", raising=False)
@@ -904,7 +944,7 @@ class TestSilenceDetection:
         mock_stream = MagicMock()
         mock_sd.InputStream.return_value = mock_stream
 
-        from tools.voice_mode import AudioRecorder, SAMPLE_RATE
+        from tools.voice_mode import AudioRecorder
 
         recorder = AudioRecorder()
         # Use very short durations for testing
@@ -1282,7 +1322,7 @@ class TestSubprocessTimeoutKill:
     """Bug: proc.wait(timeout) raised TimeoutExpired but process was not killed."""
 
     def test_timeout_kills_process(self):
-        import subprocess, os
+        import subprocess
         proc = subprocess.Popen(["sleep", "600"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
         pid = proc.pid
         assert proc.poll() is None
diff --git a/tests/tools/test_watch_patterns.py b/tests/tools/test_watch_patterns.py
index b2d8677b362..3d64acd0657 100644
--- a/tests/tools/test_watch_patterns.py
+++ b/tests/tools/test_watch_patterns.py
@@ -11,7 +11,6 @@ Covers:
 """
 
 import json
-import queue
 import time
 import pytest
 from unittest.mock import patch
@@ -19,11 +18,8 @@ from unittest.mock import patch
 from tools.process_registry import (
     ProcessRegistry,
     ProcessSession,
-    WATCH_MIN_INTERVAL_SECONDS,
     WATCH_STRIKE_LIMIT,
     WATCH_GLOBAL_MAX_PER_WINDOW,
-    WATCH_GLOBAL_WINDOW_SECONDS,
-    WATCH_GLOBAL_COOLDOWN_SECONDS,
 )
 
 
diff --git a/tests/tools/test_web_providers.py b/tests/tools/test_web_providers.py
index c94b5134ca3..bd3cce8754a 100644
--- a/tests/tools/test_web_providers.py
+++ b/tests/tools/test_web_providers.py
@@ -29,7 +29,7 @@ class TestWebProviderABCs:
     in-tree ABCs at ``tools.web_providers.base`` (separate
     ``WebSearchProvider`` + ``WebExtractProvider``) were deleted in the
     same PR — providers now advertise capabilities via
-    ``supports_search() / supports_extract() / supports_crawl()`` flags.
+    ``supports_search() / supports_extract()`` flags.
     """
 
     def test_cannot_instantiate_abc_directly(self):
@@ -65,7 +65,6 @@ class TestWebProviderABCs:
         assert d.is_available() is True
         assert d.supports_search() is True
         assert d.supports_extract() is False  # default
-        assert d.supports_crawl() is False  # default
         assert d.search("test")["success"] is True
 
     def test_concrete_multi_capability_provider_works(self):
@@ -89,27 +88,19 @@ class TestWebProviderABCs:
             def supports_extract(self) -> bool:
                 return True
 
-            def supports_crawl(self) -> bool:
-                return True
-
             def search(self, query: str, limit: int = 5) -> Dict[str, Any]:
                 return {"success": True, "data": {"web": []}}
 
             def extract(self, urls: List[str], **kwargs: Any) -> List[Dict[str, Any]]:
                 return [{"url": urls[0], "content": "x"}]
 
-            def crawl(self, url: str, **kwargs: Any) -> Dict[str, Any]:
-                return {"results": [{"url": url, "content": "x"}]}
-
         d = Dummy()
         assert d.supports_search() is True
         assert d.supports_extract() is True
-        assert d.supports_crawl() is True
         assert d.extract(["https://example.com"])[0]["url"] == "https://example.com"
-        assert d.crawl("https://example.com")["results"][0]["url"] == "https://example.com"
 
-    def test_search_only_provider_skips_extract_and_crawl(self):
-        """Search-only providers don't have to implement extract() / crawl()."""
+    def test_search_only_provider_skips_extract(self):
+        """Search-only providers don't have to implement extract()."""
         from agent.web_search_provider import WebSearchProvider
 
         class SearchOnly(WebSearchProvider):
@@ -130,13 +121,12 @@ class TestWebProviderABCs:
             def search(self, query: str, limit: int = 5) -> Dict[str, Any]:
                 return {"success": True, "data": {"web": []}}
 
-        # Should instantiate fine — extract/crawl have default
-        # supports_*() returning False and aren't required to be
-        # overridden when not advertised.
+        # Should instantiate fine — extract has default supports_*()
+        # returning False and isn't required to be overridden when not
+        # advertised.
         s = SearchOnly()
         assert s.supports_search() is True
         assert s.supports_extract() is False
-        assert s.supports_crawl() is False
 
 
 # ---------------------------------------------------------------------------
@@ -305,7 +295,6 @@ class TestUnconfiguredErrorEnvelopeParity:
         """``web_search_tool`` with no creds returns ``{"error": "Error searching web: ..."}``
         — matching main's ``tool_error()`` envelope, not a per-result shape.
         """
-        import json
         from tools import web_tools
 
         self._clear_web_creds(monkeypatch)
@@ -322,24 +311,184 @@ class TestUnconfiguredErrorEnvelopeParity:
         # No per-result burying
         assert "results" not in result
 
-    def test_unconfigured_crawl_emits_top_level_error(self, monkeypatch):
-        """``web_crawl_tool`` with no creds returns ``{"success": False, "error": "web_crawl requires Firecrawl..."}``
-        — the dispatcher gates on ``provider.is_available()`` BEFORE
-        delegating to the plugin so pre-config errors don't get wrapped
-        into ``results[]``.
+
+class TestDispatchersTriggerPluginDiscovery:
+    """Regression tests for #27580: each web_*_tool dispatcher must
+    idempotently call ``_ensure_web_plugins_loaded()`` before consulting
+    ``agent.web_search_registry``.
+
+    Without this, a tool call from a context that hasn't already loaded
+    plugins (subprocess agent runs, delegate children, standalone scripts,
+    test paths that import the registry directly) sees an empty registry
+    and returns the misleading "No web extract provider configured" error
+    even when the user has both the config key set AND the API key
+    exported.
+
+    Mirrors :func:`tools.browser_tool._ensure_browser_plugins_loaded` —
+    every other plugin-backed dispatcher (image_gen, video_gen, browser,
+    skills) already does this.
+    """
+
+    def _clear_registry(self):
+        """Reset the web_search registry to empty and return a callback
+        that restores the original contents. Used in a try/finally so the
+        snapshot is restored even when the dispatcher under test raises."""
+        from agent import web_search_registry
+
+        with web_search_registry._lock:
+            original = dict(web_search_registry._providers)
+            web_search_registry._providers.clear()
+
+        def _restore():
+            with web_search_registry._lock:
+                web_search_registry._providers.clear()
+                web_search_registry._providers.update(original)
+
+        return _restore
+
+    def test_web_extract_tool_runs_discovery_before_registry_lookup(self, monkeypatch):
+        """``web_extract_tool`` must invoke ``_ensure_web_plugins_loaded()``
+        before looking up the configured backend so the registry is
+        populated even from cold-start subprocess contexts.
+
+        Without the fix, ``get_provider('firecrawl')`` returns ``None``
+        on a fresh process and the dispatcher emits "No web extract
+        provider configured" despite the user having both
+        ``web.extract_backend: firecrawl`` and ``FIRECRAWL_API_KEY`` set
+        (issue #27580).
         """
         import asyncio
         import json
+        from unittest.mock import MagicMock
+        from agent.web_search_provider import WebSearchProvider
+        from agent import web_search_registry
         from tools import web_tools
 
-        self._clear_web_creds(monkeypatch)
-        monkeypatch.setattr(web_tools, "_firecrawl_client", None, raising=False)
-        monkeypatch.setattr(web_tools, "_firecrawl_client_config", None, raising=False)
-        monkeypatch.setattr(web_tools, "_load_web_config", lambda: {})
+        restore = self._clear_registry()
+        try:
+            class FakeFirecrawl(WebSearchProvider):
+                @property
+                def name(self) -> str:
+                    return "firecrawl"
+
+                @property
+                def display_name(self) -> str:
+                    return "Fake Firecrawl"
+
+                def is_available(self) -> bool:
+                    return True
+
+                def supports_extract(self) -> bool:
+                    return True
+
+                async def extract(self, urls, format=None):
+                    return [
+                        {"url": u, "title": "", "content": "ok",
+                         "raw_content": "ok", "metadata": {}}
+                        for u in urls
+                    ]
+
+            # Simulate "plugin discovery loads the firecrawl plugin": the
+            # wrapped helper registers the provider, mirroring what
+            # ``plugins/web/firecrawl/__init__.py:register`` does at
+            # real-process startup. Wrapping with ``MagicMock`` lets us
+            # also assert the dispatcher actually invoked the hook — if
+            # a future refactor accidentally drops the call the regression
+            # would otherwise hide behind a still-populated registry.
+            def _register_fake() -> None:
+                if web_search_registry.get_provider("firecrawl") is None:
+                    web_search_registry.register_provider(FakeFirecrawl())
+
+            mock_hook = MagicMock(wraps=_register_fake)
+            # Patch the helper on ``tools.web_tools`` directly rather than the
+            # underlying ``hermes_cli.plugins._ensure_plugins_discovered`` so
+            # the test stays valid even if the import inside the helper is
+            # later moved to module scope or renamed.
+            monkeypatch.setattr(
+                web_tools, "_ensure_web_plugins_loaded", mock_hook
+            )
+            monkeypatch.setattr(
+                web_tools, "_load_web_config",
+                lambda: {"extract_backend": "firecrawl"},
+            )
+            # Sanity: registry IS empty before the tool call.
+            assert web_search_registry.get_provider("firecrawl") is None
+
+            result = json.loads(asyncio.run(
+                web_tools.web_extract_tool(
+                    ["https://example.com"],
+                    use_llm_processing=False,
+                )
+            ))
+
+            # The hook must have been called BEFORE the registry lookup —
+            # that is the invariant under regression test. Without the
+            # explicit ``.called`` assertion the test could pass if the
+            # registry were populated by some unrelated side effect.
+            assert mock_hook.called, (
+                "web_extract_tool must call _ensure_web_plugins_loaded() "
+                "before resolving the registry"
+            )
+            assert "No web extract provider configured" not in json.dumps(result)
+            assert web_search_registry.get_provider("firecrawl") is not None
+        finally:
+            restore()
+
+    def test_web_search_tool_runs_discovery_before_registry_lookup(self, monkeypatch):
+        """``web_search_tool`` must invoke ``_ensure_web_plugins_loaded()``
+        before the registry lookup for the same reason as the extract
+        path (issue #27580 root cause applies to all dispatchers).
+        """
+        import json
+        from unittest.mock import MagicMock
+        from agent.web_search_provider import WebSearchProvider
+        from agent import web_search_registry
+        from tools import web_tools
+
+        restore = self._clear_registry()
+        try:
+            class FakeBrave(WebSearchProvider):
+                @property
+                def name(self) -> str:
+                    return "brave-free"
+
+                @property
+                def display_name(self) -> str:
+                    return "Fake Brave"
+
+                def is_available(self) -> bool:
+                    return True
+
+                def supports_search(self) -> bool:
+                    return True
+
+                def search(self, query, limit=5):
+                    return {"success": True, "data": {"web": [
+                        {"title": "ok", "url": "https://x", "description": "",
+                         "position": 0}
+                    ]}}
+
+            def _register_fake() -> None:
+                if web_search_registry.get_provider("brave-free") is None:
+                    web_search_registry.register_provider(FakeBrave())
+
+            mock_hook = MagicMock(wraps=_register_fake)
+            monkeypatch.setattr(
+                web_tools, "_ensure_web_plugins_loaded", mock_hook
+            )
+            monkeypatch.setattr(
+                web_tools, "_load_web_config",
+                lambda: {"search_backend": "brave-free"},
+            )
+            assert web_search_registry.get_provider("brave-free") is None
+
+            result = json.loads(web_tools.web_search_tool("hello", limit=1))
+            assert mock_hook.called, (
+                "web_search_tool must call _ensure_web_plugins_loaded() "
+                "before resolving the registry"
+            )
+            assert "No web search provider configured" not in json.dumps(result)
+            assert web_search_registry.get_provider("brave-free") is not None
+        finally:
+            restore()
 
-        result = json.loads(asyncio.run(web_tools.web_crawl_tool("https://example.com", use_llm_processing=False)))
-        assert result.get("success") is False
-        assert "error" in result, f"expected top-level 'error' key, got {result}"
-        assert "web_crawl requires Firecrawl" in result["error"]
-        # Crucially: no per-page burying
-        assert "results" not in result
diff --git a/tests/tools/test_web_providers_brave_free.py b/tests/tools/test_web_providers_brave_free.py
index bd09dc5a4cd..a75b9d38e4f 100644
--- a/tests/tools/test_web_providers_brave_free.py
+++ b/tests/tools/test_web_providers_brave_free.py
@@ -8,7 +8,7 @@ Covers:
 - _is_backend_available("brave-free") integration
 - _get_backend() recognizes "brave-free" as a valid configured backend
 - check_web_api_key() includes brave-free in availability check
-- web_extract / web_crawl return search-only errors when brave-free is active
+- web_extract returns a search-only error when brave-free is active
 """
 from __future__ import annotations
 
@@ -238,7 +238,7 @@ class TestBraveFreeBackendWiring:
 
 
 # ---------------------------------------------------------------------------
-# brave-free is search-only: web_extract / web_crawl return clear errors
+# brave-free is search-only: web_extract returns a clear error
 # ---------------------------------------------------------------------------
 
 
@@ -269,23 +269,3 @@ class TestBraveFreeSearchOnlyErrors:
         assert result["success"] is False
         assert "search-only" in result["error"].lower()
         assert "brave" in result["error"].lower()
-
-    def test_web_crawl_returns_search_only_error(self, monkeypatch):
-        import asyncio
-        from tools import web_tools
-
-        monkeypatch.setattr(web_tools, "_load_web_config", lambda: {"backend": "brave-free"})
-        monkeypatch.setenv("BRAVE_SEARCH_API_KEY", "BSAkey123")
-        monkeypatch.setattr(web_tools, "_is_tool_gateway_ready", lambda: False)
-        monkeypatch.setattr(web_tools, "check_firecrawl_api_key", lambda: False)
-        monkeypatch.setattr(web_tools, "is_safe_url", lambda url: True)
-        monkeypatch.setattr(web_tools, "check_website_access", lambda url: None)
-        monkeypatch.setattr("tools.interrupt.is_interrupted", lambda: False, raising=False)
-
-        result_str = asyncio.get_event_loop().run_until_complete(
-            web_tools.web_crawl_tool("https://example.com")
-        )
-        result = json.loads(result_str)
-        assert result["success"] is False
-        assert "search-only" in result["error"].lower()
-        assert "brave" in result["error"].lower()
diff --git a/tests/tools/test_web_providers_ddgs.py b/tests/tools/test_web_providers_ddgs.py
index 465b608c90a..7919931614f 100644
--- a/tests/tools/test_web_providers_ddgs.py
+++ b/tests/tools/test_web_providers_ddgs.py
@@ -5,14 +5,13 @@ Covers:
 - DDGSWebSearchProvider.search() — happy path, missing package, runtime error
 - Result normalization (title, url, description, position)
 - _is_backend_available("ddgs") / _get_backend() integration
-- web_extract / web_crawl return search-only errors when ddgs is active
+- web_extract returns a search-only error when ddgs is active
 """
 from __future__ import annotations
 
 import json
 import sys
 import types
-from unittest.mock import MagicMock
 
 import pytest
 
@@ -209,7 +208,7 @@ class TestDDGSBackendWiring:
 
 
 # ---------------------------------------------------------------------------
-# ddgs is search-only: web_extract / web_crawl return clear errors
+# ddgs is search-only: web_extract returns a clear error
 # ---------------------------------------------------------------------------
 
 
@@ -240,23 +239,3 @@ class TestDDGSSearchOnlyErrors:
         assert result["success"] is False
         assert "search-only" in result["error"].lower()
         assert "duckduckgo" in result["error"].lower() or "ddgs" in result["error"].lower()
-
-    def test_web_crawl_returns_search_only_error(self, monkeypatch):
-        import asyncio
-        from tools import web_tools
-
-        monkeypatch.setattr(web_tools, "_load_web_config", lambda: {"backend": "ddgs"})
-        monkeypatch.setattr(web_tools, "_ddgs_package_importable", lambda: True)
-        monkeypatch.setattr(web_tools, "_is_tool_gateway_ready", lambda: False)
-        monkeypatch.setattr(web_tools, "check_firecrawl_api_key", lambda: False)
-        monkeypatch.setattr(web_tools, "is_safe_url", lambda url: True)
-        monkeypatch.setattr(web_tools, "check_website_access", lambda url: None)
-        monkeypatch.setattr("tools.interrupt.is_interrupted", lambda: False, raising=False)
-
-        result_str = asyncio.get_event_loop().run_until_complete(
-            web_tools.web_crawl_tool("https://example.com")
-        )
-        result = json.loads(result_str)
-        assert result["success"] is False
-        assert "search-only" in result["error"].lower()
-        assert "duckduckgo" in result["error"].lower() or "ddgs" in result["error"].lower()
diff --git a/tests/tools/test_web_providers_searxng.py b/tests/tools/test_web_providers_searxng.py
index 8a5247f7beb..31bbaeb47ca 100644
--- a/tests/tools/test_web_providers_searxng.py
+++ b/tests/tools/test_web_providers_searxng.py
@@ -12,7 +12,6 @@ Covers:
 from __future__ import annotations
 
 import json
-import os
 from unittest.mock import MagicMock, patch
 
 import pytest
@@ -296,7 +295,7 @@ class TestCheckWebApiKey:
 
 
 # ---------------------------------------------------------------------------
-# searxng-only: web_extract and web_crawl return clear errors
+# searxng-only: web_extract returns a clear error
 # ---------------------------------------------------------------------------
 
 
@@ -312,26 +311,6 @@ class TestSearXNGOnlyExtractCrawlErrors:
         from agent.web_search_registry import _reset_for_tests
         _reset_for_tests()
 
-    def test_web_crawl_searxng_returns_clear_error(self, monkeypatch):
-        import asyncio
-        from tools import web_tools
-
-        monkeypatch.setattr(web_tools, "_load_web_config", lambda: {"backend": "searxng"})
-        monkeypatch.setenv("SEARXNG_URL", "http://localhost:8080")
-        monkeypatch.setattr(web_tools, "_is_tool_gateway_ready", lambda: False)
-        monkeypatch.setattr(web_tools, "check_firecrawl_api_key", lambda: False)
-        monkeypatch.setattr(web_tools, "is_safe_url", lambda url: True)
-        monkeypatch.setattr(web_tools, "check_website_access", lambda url: None)
-        monkeypatch.setattr("tools.interrupt.is_interrupted", lambda: False, raising=False)
-
-        import json
-        result_str = asyncio.get_event_loop().run_until_complete(
-            web_tools.web_crawl_tool("https://example.com")
-        )
-        result = json.loads(result_str)
-        assert result["success"] is False
-        assert "search-only" in result["error"].lower() or "SearXNG" in result["error"]
-
     def test_web_extract_searxng_returns_clear_error(self, monkeypatch):
         import asyncio
         from tools import web_tools
@@ -342,7 +321,6 @@ class TestSearXNGOnlyExtractCrawlErrors:
         monkeypatch.setattr(web_tools, "is_safe_url", lambda url: True)
         monkeypatch.setattr("tools.interrupt.is_interrupted", lambda: False, raising=False)
 
-        import json
         result_str = asyncio.get_event_loop().run_until_complete(
             web_tools.web_extract_tool(["https://example.com"])
         )
diff --git a/tests/tools/test_web_providers_xai.py b/tests/tools/test_web_providers_xai.py
index d5a3deaf689..2a6f0c63b81 100644
--- a/tests/tools/test_web_providers_xai.py
+++ b/tests/tools/test_web_providers_xai.py
@@ -66,7 +66,6 @@ class TestXAIProviderIdentity:
         p = XAIWebSearchProvider()
         assert p.supports_search() is True
         assert p.supports_extract() is False
-        assert p.supports_crawl() is False
 
     def test_display_name(self):
         from plugins.web.xai.provider import XAIWebSearchProvider
diff --git a/tests/tools/test_web_tools_tavily.py b/tests/tools/test_web_tools_tavily.py
index b8034efa064..de820794965 100644
--- a/tests/tools/test_web_tools_tavily.py
+++ b/tests/tools/test_web_tools_tavily.py
@@ -3,8 +3,8 @@
 Coverage:
   _tavily_request() — API key handling, endpoint construction, error propagation.
   _normalize_tavily_search_results() — search response normalization.
-  _normalize_tavily_documents() — extract/crawl response normalization, failed_results.
-  web_search_tool / web_extract_tool / web_crawl_tool — Tavily dispatch paths.
+  _normalize_tavily_documents() — extract response normalization, failed_results.
+  web_search_tool / web_extract_tool — Tavily dispatch paths.
 """
 
 import json
@@ -225,62 +225,3 @@ class TestWebExtractTavily:
             assert len(result["results"]) == 1
             assert result["results"][0]["url"] == "https://example.com"
 
-
-# ─── web_crawl_tool (Tavily dispatch) ─────────────────────────────────────────
-
-class TestWebCrawlTavily:
-    """Test web_crawl_tool dispatch to Tavily."""
-
-    _register_providers = staticmethod(register_all_web_providers)
-
-    @pytest.fixture(autouse=True)
-    def _populate_web_registry(self):
-        self._register_providers()
-        yield
-        from agent.web_search_registry import _reset_for_tests
-        _reset_for_tests()
-
-    def test_crawl_dispatches_to_tavily(self):
-        mock_response = MagicMock()
-        mock_response.json.return_value = {
-            "results": [
-                {"url": "https://example.com/page1", "raw_content": "Page 1 content", "title": "Page 1"},
-                {"url": "https://example.com/page2", "raw_content": "Page 2 content", "title": "Page 2"},
-            ]
-        }
-        mock_response.raise_for_status = MagicMock()
-
-        with patch("tools.web_tools._get_backend", return_value="tavily"), \
-             patch.dict(os.environ, {"TAVILY_API_KEY": "tvly-test"}), \
-             patch("tools.web_tools.httpx.post", return_value=mock_response), \
-             patch("tools.web_tools.check_website_access", return_value=None), \
-             patch("tools.web_tools.is_safe_url", return_value=True), \
-             patch("tools.interrupt.is_interrupted", return_value=False):
-            from tools.web_tools import web_crawl_tool
-            result = json.loads(asyncio.get_event_loop().run_until_complete(
-                web_crawl_tool("https://example.com", use_llm_processing=False)
-            ))
-            assert "results" in result
-            assert len(result["results"]) == 2
-            assert result["results"][0]["title"] == "Page 1"
-
-    def test_crawl_sends_instructions(self):
-        """Instructions are included in the Tavily crawl payload."""
-        mock_response = MagicMock()
-        mock_response.json.return_value = {"results": []}
-        mock_response.raise_for_status = MagicMock()
-
-        with patch("tools.web_tools._get_backend", return_value="tavily"), \
-             patch.dict(os.environ, {"TAVILY_API_KEY": "tvly-test"}), \
-             patch("tools.web_tools.httpx.post", return_value=mock_response) as mock_post, \
-             patch("tools.web_tools.check_website_access", return_value=None), \
-             patch("tools.web_tools.is_safe_url", return_value=True), \
-             patch("tools.interrupt.is_interrupted", return_value=False):
-            from tools.web_tools import web_crawl_tool
-            asyncio.get_event_loop().run_until_complete(
-                web_crawl_tool("https://example.com", instructions="Find docs", use_llm_processing=False)
-            )
-            call_kwargs = mock_post.call_args
-            payload = call_kwargs.kwargs.get("json") or call_kwargs[1].get("json")
-            assert payload["instructions"] == "Find docs"
-            assert payload["url"] == "https://example.com"
diff --git a/tests/tools/test_website_policy.py b/tests/tools/test_website_policy.py
index 5a163b7dc9e..bfe222ef892 100644
--- a/tests/tools/test_website_policy.py
+++ b/tests/tools/test_website_policy.py
@@ -326,7 +326,6 @@ def test_browser_navigate_returns_policy_block(monkeypatch):
 
 def test_browser_navigate_allows_when_shared_file_missing(monkeypatch, tmp_path):
     """Missing shared blocklist files are warned and skipped, not fatal."""
-    from tools import browser_tool
 
     config_path = tmp_path / "config.yaml"
     config_path.write_text(
@@ -350,7 +349,7 @@ def test_browser_navigate_allows_when_shared_file_missing(monkeypatch, tmp_path)
 
 
 class TestWebToolPolicy:
-    """Tests that exercise web_extract_tool / web_crawl_tool with website-policy gates.
+    """Tests that exercise web_extract_tool with website-policy gates.
 
     These tests need the bundled web providers to be registered in the
     agent.web_search_registry so the tool dispatchers can find an active
@@ -376,8 +375,7 @@ class TestWebToolPolicy:
         monkeypatch.setattr(web_tools, "is_safe_url", lambda url: True)
         # The per-URL website-policy gate moved into the firecrawl plugin's
         # extract() during the web-provider migration. Patch it at the new
-        # location; the dispatcher-level gate (used by web_crawl_tool's
-        # pre-flight) still lives on tools.web_tools.
+        # location.
         monkeypatch.setattr(
             firecrawl_provider,
             "check_website_access",
@@ -445,96 +443,6 @@ class TestWebToolPolicy:
         assert result["results"][0]["content"] == ""
         assert result["results"][0]["blocked_by_policy"]["rule"] == "blocked.test"
 
-    @pytest.mark.asyncio
-    async def test_web_crawl_short_circuits_blocked_url(self, monkeypatch):
-        from tools import web_tools
-
-        # web_crawl_tool checks for Firecrawl env before website policy
-        monkeypatch.setenv("FIRECRAWL_API_KEY", "fake-key")
-        # Allow test URLs past SSRF check so website policy is what gets tested
-        monkeypatch.setattr(web_tools, "is_safe_url", lambda url: True)
-        # The dispatcher-level (seed-URL) policy gate still lives on web_tools.
-        # No per-page gate runs in this test because the dispatcher returns
-        # immediately when the seed is blocked, before delegating to the plugin.
-        monkeypatch.setattr(
-            web_tools,
-            "check_website_access",
-            lambda url: {
-                "host": "blocked.test",
-                "rule": "blocked.test",
-                "source": "config",
-                "message": "Blocked by website policy",
-            },
-        )
-        # If the dispatcher ever reaches the firecrawl plugin's crawl(), the test
-        # fails — pin the plugin module's client lookup so we'd notice.
-        from plugins.web.firecrawl import provider as firecrawl_provider
-        monkeypatch.setattr(
-            firecrawl_provider,
-            "_get_firecrawl_client",
-            lambda: pytest.fail("firecrawl plugin should not run for blocked crawl URL"),
-        )
-        monkeypatch.setattr("tools.interrupt.is_interrupted", lambda: False)
-
-        result = json.loads(await web_tools.web_crawl_tool("https://blocked.test", use_llm_processing=False))
-
-        assert result["results"][0]["url"] == "https://blocked.test"
-        assert result["results"][0]["blocked_by_policy"]["rule"] == "blocked.test"
-
-    @pytest.mark.asyncio
-    async def test_web_crawl_blocks_redirected_final_url(self, monkeypatch):
-        from tools import web_tools
-        from plugins.web.firecrawl import provider as firecrawl_provider
-
-        # Force the firecrawl plugin to be the active crawl provider.
-        monkeypatch.setenv("FIRECRAWL_API_KEY", "fake-key")
-        # Allow test URLs past SSRF check so website policy is what gets tested
-        monkeypatch.setattr(web_tools, "is_safe_url", lambda url: True)
-
-        def fake_check(url):
-            # Dispatcher seed-URL gate (web_tools.check_website_access call)
-            # and plugin per-page gate (firecrawl_provider.check_website_access
-            # call) both flow through this single fake_check.
-            if url == "https://allowed.test":
-                return None
-            if url == "https://blocked.test/final":
-                return {
-                    "host": "blocked.test",
-                    "rule": "blocked.test",
-                    "source": "config",
-                    "message": "Blocked by website policy",
-                }
-            pytest.fail(f"unexpected URL checked: {url}")
-
-        class FakeCrawlClient:
-            def crawl(self, url, **kwargs):
-                return {
-                    "data": [
-                        {
-                            "markdown": "secret crawl content",
-                            "metadata": {
-                                "title": "Redirected crawl page",
-                                "sourceURL": "https://blocked.test/final",
-                            },
-                        }
-                    ]
-                }
-
-        # After PR #25182 follow-up: per-page policy gate lives in
-        # plugins.web.firecrawl.provider.crawl(). Patch the gate + client at
-        # the plugin location. The dispatcher-level (seed) gate also reads
-        # web_tools.check_website_access — patch both.
-        monkeypatch.setattr(web_tools, "check_website_access", fake_check)
-        monkeypatch.setattr(firecrawl_provider, "check_website_access", fake_check)
-        monkeypatch.setattr(firecrawl_provider, "_get_firecrawl_client", lambda: FakeCrawlClient())
-        monkeypatch.setattr("tools.interrupt.is_interrupted", lambda: False)
-
-        result = json.loads(await web_tools.web_crawl_tool("https://allowed.test", use_llm_processing=False))
-
-        assert result["results"][0]["content"] == ""
-        assert result["results"][0]["error"] == "Blocked by website policy"
-        assert result["results"][0]["blocked_by_policy"]["rule"] == "blocked.test"
-
 
 def test_check_website_access_fails_open_on_malformed_config(tmp_path, monkeypatch):
     """Malformed config with default path should fail open (return None), not crash."""
diff --git a/tests/tools/test_windows_native_support.py b/tests/tools/test_windows_native_support.py
index f92ed22dff7..baba1985950 100644
--- a/tests/tools/test_windows_native_support.py
+++ b/tests/tools/test_windows_native_support.py
@@ -11,13 +11,11 @@ Windows runner.
 
 from __future__ import annotations
 
-import importlib
 import os
 import signal
-import subprocess
 import sys
 from pathlib import Path
-from unittest.mock import MagicMock, patch
+from unittest.mock import MagicMock
 
 import pytest
 
diff --git a/tests/tools/test_write_deny.py b/tests/tools/test_write_deny.py
index 02fca0eca13..6fe6c802ace 100644
--- a/tests/tools/test_write_deny.py
+++ b/tests/tools/test_write_deny.py
@@ -2,7 +2,6 @@
 
 import os
 
-import pytest
 from pathlib import Path
 from unittest.mock import patch
 
diff --git a/tests/tools/test_zombie_process_cleanup.py b/tests/tools/test_zombie_process_cleanup.py
index 8085d112318..e31e042fb20 100644
--- a/tests/tools/test_zombie_process_cleanup.py
+++ b/tests/tools/test_zombie_process_cleanup.py
@@ -9,10 +9,8 @@ import os
 import signal
 import subprocess
 import sys
-import time
 import threading
 
-import pytest
 
 
 def _spawn_sleep(seconds: float = 60) -> subprocess.Popen:
@@ -191,7 +189,7 @@ class TestGatewayCleanupWiring:
         """gateway stop() should call close() on all running agents."""
         import asyncio
         import threading
-        from unittest.mock import AsyncMock, MagicMock, patch
+        from unittest.mock import MagicMock, patch
 
         from gateway.run import GatewayRunner
 
diff --git a/tests/tui_gateway/test_entry_sys_path.py b/tests/tui_gateway/test_entry_sys_path.py
index e7f9e47cee0..15619d2a9fc 100644
--- a/tests/tui_gateway/test_entry_sys_path.py
+++ b/tests/tui_gateway/test_entry_sys_path.py
@@ -6,7 +6,6 @@ to shadow the installed utils module.  entry.py must sanitize sys.path before
 any non-stdlib import is resolved.
 """
 
-import importlib
 import os
 import sys
 from unittest.mock import patch
diff --git a/tests/tui_gateway/test_goal_command.py b/tests/tui_gateway/test_goal_command.py
index 050b36bc877..d06f5b8fbbd 100644
--- a/tests/tui_gateway/test_goal_command.py
+++ b/tests/tui_gateway/test_goal_command.py
@@ -44,11 +44,17 @@ def server(hermes_home):
     ):
         mod = importlib.import_module("tui_gateway.server")
         yield mod
+        # Reset module-level session state without re-importing. importlib.reload
+        # would re-register the module's atexit hooks (ThreadPoolExecutor
+        # shutdown, _shutdown_sessions); the duplicates race the stderr
+        # buffer at interpreter shutdown and surface as Fatal Python error:
+        # _enter_buffered_busy. Clearing the per-session dicts gives the
+        # next test a clean slate; _methods is NOT cleared because it's
+        # populated at module import time and re-registration only happens
+        # via reload (which we don't do).
         mod._sessions.clear()
         mod._pending.clear()
         mod._answers.clear()
-        mod._methods.clear()
-        importlib.reload(mod)
 
 
 @pytest.fixture()
diff --git a/tests/tui_gateway/test_protocol.py b/tests/tui_gateway/test_protocol.py
index 549a2da6337..2c20e77a126 100644
--- a/tests/tui_gateway/test_protocol.py
+++ b/tests/tui_gateway/test_protocol.py
@@ -30,11 +30,17 @@ def server():
         import importlib
         mod = importlib.import_module("tui_gateway.server")
         yield mod
+        # Reset module-level session state without re-importing. importlib.reload
+        # would re-register the module's atexit hooks (ThreadPoolExecutor
+        # shutdown, _shutdown_sessions); the duplicates race the stderr
+        # buffer at interpreter shutdown and surface as Fatal Python error:
+        # _enter_buffered_busy. Clearing the per-session dicts gives the
+        # next test a clean slate; _methods is NOT cleared because it's
+        # populated at module import time and re-registration only happens
+        # via reload (which we don't do).
         mod._sessions.clear()
         mod._pending.clear()
         mod._answers.clear()
-        mod._methods.clear()
-        importlib.reload(mod)
 
 
 @pytest.fixture()
diff --git a/tests/tui_gateway/test_review_summary_callback.py b/tests/tui_gateway/test_review_summary_callback.py
index 9fc7f54ddc6..2c6d3cbeb7c 100644
--- a/tests/tui_gateway/test_review_summary_callback.py
+++ b/tests/tui_gateway/test_review_summary_callback.py
@@ -11,7 +11,6 @@ transcript line.
 
 from __future__ import annotations
 
-import sys
 from unittest.mock import MagicMock, patch
 
 import pytest
@@ -34,11 +33,17 @@ def server():
 
         mod = importlib.import_module("tui_gateway.server")
         yield mod
+        # Reset module-level session state without re-importing. importlib.reload
+        # would re-register the module's atexit hooks (ThreadPoolExecutor
+        # shutdown, _shutdown_sessions); the duplicates race the stderr
+        # buffer at interpreter shutdown and surface as Fatal Python error:
+        # _enter_buffered_busy. Clearing the per-session dicts gives the
+        # next test a clean slate; _methods is NOT cleared because it's
+        # populated at module import time and re-registration only happens
+        # via reload (which we don't do).
         mod._sessions.clear()
         mod._pending.clear()
         mod._answers.clear()
-        mod._methods.clear()
-        importlib.reload(mod)
 
 
 def test_init_session_attaches_background_review_callback(server, monkeypatch):
@@ -49,7 +54,7 @@ def test_init_session_attaches_background_review_callback(server, monkeypatch):
     monkeypatch.setattr(server, "_SlashWorker", lambda *a, **kw: object())
     monkeypatch.setattr(server, "_wire_callbacks", lambda sid: None)
     monkeypatch.setattr(server, "_notify_session_boundary", lambda *a, **kw: None)
-    monkeypatch.setattr(server, "_session_info", lambda agent: {"model": "m"})
+    monkeypatch.setattr(server, "_session_info", lambda agent, session=None: {"model": "m"})
     monkeypatch.setattr(server, "_load_show_reasoning", lambda: False)
     monkeypatch.setattr(server, "_load_tool_progress_mode", lambda: "all")
 
@@ -101,7 +106,7 @@ def test_review_summary_callback_survives_agent_without_attribute(server, monkey
     monkeypatch.setattr(server, "_SlashWorker", lambda *a, **kw: object())
     monkeypatch.setattr(server, "_wire_callbacks", lambda sid: None)
     monkeypatch.setattr(server, "_notify_session_boundary", lambda *a, **kw: None)
-    monkeypatch.setattr(server, "_session_info", lambda agent: {"model": "m"})
+    monkeypatch.setattr(server, "_session_info", lambda agent, session=None: {"model": "m"})
     monkeypatch.setattr(server, "_load_show_reasoning", lambda: False)
     monkeypatch.setattr(server, "_load_tool_progress_mode", lambda: "all")
     monkeypatch.setattr(server, "_emit", lambda *a, **kw: None)
diff --git a/tools/approval.py b/tools/approval.py
index 6e282c98d59..1dbb6eb6e4f 100644
--- a/tools/approval.py
+++ b/tools/approval.py
@@ -367,6 +367,13 @@ DANGEROUS_PATTERNS = [
     # terminates all running agents mid-work.
     (r'\bhermes\s+gateway\s+(stop|restart)\b', "stop/restart hermes gateway (kills running agents)"),
     (r'\bhermes\s+update\b', "hermes update (restarts gateway, kills running agents)"),
+    # Docker container lifecycle — any user with docker.sock mounted (a common
+    # Docker Compose pattern) gives the agent the ability to restart/stop/kill
+    # containers without approval.  These are agent-initiated lifecycle operations
+    # that should always require user consent, just like `hermes gateway restart`
+    # already does for the gateway process.
+    (r'\bdocker\s+compose\s+(restart|stop|kill|down)\b', "docker compose restart/stop/kill/down (container lifecycle)"),
+    (r'\bdocker\s+(restart|stop|kill)\b', "docker restart/stop/kill (container lifecycle)"),
     # Gateway protection: never start gateway outside systemd management
     (r'gateway\s+run\b.*(&\s*$|&\s*;|\bdisown\b|\bsetsid\b)', "start gateway outside systemd (use 'systemctl --user restart hermes-gateway')"),
     (r'\bnohup\b.*gateway\s+run\b', "start gateway outside systemd (use 'systemctl --user restart hermes-gateway')"),
@@ -1050,6 +1057,107 @@ def _format_tirith_description(tirith_result: dict) -> str:
     return "Security scan — " + "; ".join(parts)
 
 
+def _await_gateway_decision(session_key: str, notify_cb, approval_data: dict,
+                            *, surface: str = "gateway") -> dict:
+    """Enqueue *approval_data*, notify the user, and block the calling agent
+    thread until the request is resolved or the gateway approval timeout
+    elapses — firing pre/post approval hooks and cleaning up the queue entry.
+
+    Shared by the terminal command guard (``check_all_command_guards``) and
+    the execute_code guard (``check_execute_code_guard``) so the fiddly
+    heartbeat-polling wait loop lives in one place.
+
+    Returns ``{"resolved": bool, "choice": str|None}`` on completion, or
+    ``{"resolved": False, "choice": None, "notify_failed": True}`` if the
+    notify callback raised.  Persistence of an approved choice and building
+    the final tool-facing result dict remain the caller's responsibility.
+    """
+    command = approval_data.get("command", "")
+    description = approval_data.get("description", "")
+    primary_key = approval_data.get("pattern_key", "")
+    all_keys = approval_data.get("pattern_keys", [primary_key])
+
+    entry = _ApprovalEntry(approval_data)
+    with _lock:
+        _gateway_queues.setdefault(session_key, []).append(entry)
+
+    def _drop_entry() -> None:
+        with _lock:
+            queue = _gateway_queues.get(session_key, [])
+            if entry in queue:
+                queue.remove(entry)
+            if not queue:
+                _gateway_queues.pop(session_key, None)
+
+    # Notify plugins that an approval is being requested. Fires before the
+    # gateway notify callback so observers get the event in real time.
+    _fire_approval_hook(
+        "pre_approval_request",
+        command=command,
+        description=description,
+        pattern_key=primary_key,
+        pattern_keys=list(all_keys),
+        session_key=session_key,
+        surface=surface,
+    )
+
+    # Notify the user (bridges sync agent thread → async gateway)
+    try:
+        notify_cb(approval_data)
+    except Exception as exc:
+        logger.warning("Gateway approval notify failed: %s", exc)
+        _drop_entry()
+        return {"resolved": False, "choice": None, "notify_failed": True}
+
+    # Block until the user responds or timeout (default 5 min). Poll in short
+    # slices so we can fire activity heartbeats every ~10s to the agent's
+    # inactivity tracker — otherwise the gateway watchdog kills the agent
+    # while the user is still responding. Mirrors _wait_for_process() cadence.
+    timeout = _get_approval_config().get("gateway_timeout", 300)
+    try:
+        timeout = int(timeout)
+    except (ValueError, TypeError):
+        timeout = 300
+
+    try:
+        from tools.environments.base import touch_activity_if_due
+    except Exception:  # pragma: no cover
+        touch_activity_if_due = None
+
+    _now = time.monotonic()
+    _deadline = _now + max(timeout, 0)
+    _activity_state = {"last_touch": _now, "start": _now}
+    resolved = False
+    while True:
+        _remaining = _deadline - time.monotonic()
+        if _remaining <= 0:
+            break
+        if entry.event.wait(timeout=min(1.0, _remaining)):
+            resolved = True
+            break
+        if touch_activity_if_due is not None:
+            touch_activity_if_due(_activity_state, "waiting for user approval")
+
+    _drop_entry()
+
+    choice = entry.result
+    # Normalize outcome for the post hook. Unresolved (timeout) and None both
+    # mean the user never responded; report that explicitly so plugins can
+    # distinguish timeout from explicit deny.
+    _outcome = "timeout" if not resolved else (choice if choice else "timeout")
+    _fire_approval_hook(
+        "post_approval_response",
+        command=command,
+        description=description,
+        pattern_key=primary_key,
+        pattern_keys=list(all_keys),
+        session_key=session_key,
+        surface=surface,
+        choice=_outcome,
+    )
+    return {"resolved": resolved, "choice": choice}
+
+
 def check_all_command_guards(command: str, env_type: str,
                              approval_callback=None) -> dict:
     """Run all pre-exec security checks and return a single approval decision.
@@ -1200,113 +1308,27 @@ def check_all_command_guards(command: str, env_type: str,
 
         if notify_cb is not None:
             # --- Blocking gateway approval (queue-based) ---
-            # Each call gets its own _ApprovalEntry so parallel subagents
-            # and execute_code threads can block concurrently.
+            # Block the agent thread until the user responds; the notify +
+            # heartbeat wait loop is shared with check_execute_code_guard via
+            # _await_gateway_decision().
             approval_data = {
                 "command": command,
                 "pattern_key": primary_key,
                 "pattern_keys": all_keys,
                 "description": combined_desc,
             }
-            entry = _ApprovalEntry(approval_data)
-            with _lock:
-                _gateway_queues.setdefault(session_key, []).append(entry)
-
-            # Notify plugins that an approval is being requested. Fires before
-            # the gateway notify callback so observers (e.g. macOS notifier
-            # plugins, audit logs, Slack alerts) get the event in real time.
-            _fire_approval_hook(
-                "pre_approval_request",
-                command=command,
-                description=combined_desc,
-                pattern_key=primary_key,
-                pattern_keys=list(all_keys),
-                session_key=session_key,
-                surface="gateway",
+            decision = _await_gateway_decision(
+                session_key, notify_cb, approval_data, surface="gateway"
             )
-
-            # Notify the user (bridges sync agent thread → async gateway)
-            try:
-                notify_cb(approval_data)
-            except Exception as exc:
-                logger.warning("Gateway approval notify failed: %s", exc)
-                with _lock:
-                    queue = _gateway_queues.get(session_key, [])
-                    if entry in queue:
-                        queue.remove(entry)
-                    if not queue:
-                        _gateway_queues.pop(session_key, None)
+            if decision.get("notify_failed"):
                 return {
                     "approved": False,
                     "message": "BLOCKED: Failed to send approval request to user. Do NOT retry.",
                     "pattern_key": primary_key,
                     "description": combined_desc,
                 }
-
-            # Block until the user responds or timeout (default 5 min).
-            # Poll in short slices so we can fire activity heartbeats every
-            # ~10s to the agent's inactivity tracker.  Without this, the
-            # blocking event.wait() never touches activity, and the
-            # gateway's inactivity watchdog (agent.gateway_timeout, default
-            # 1800s) kills the agent while the user is still responding to
-            # the approval prompt.  Mirrors the _wait_for_process() cadence
-            # in tools/environments/base.py.
-            timeout = _get_approval_config().get("gateway_timeout", 300)
-            try:
-                timeout = int(timeout)
-            except (ValueError, TypeError):
-                timeout = 300
-
-            try:
-                from tools.environments.base import touch_activity_if_due
-            except Exception:  # pragma: no cover
-                touch_activity_if_due = None
-
-            _now = time.monotonic()
-            _deadline = _now + max(timeout, 0)
-            _activity_state = {"last_touch": _now, "start": _now}
-            resolved = False
-            while True:
-                _remaining = _deadline - time.monotonic()
-                if _remaining <= 0:
-                    break
-                # 1s poll slice — the event is set immediately when the
-                # user responds, so slice length only controls heartbeat
-                # cadence, not user-visible responsiveness.
-                if entry.event.wait(timeout=min(1.0, _remaining)):
-                    resolved = True
-                    break
-                if touch_activity_if_due is not None:
-                    touch_activity_if_due(
-                        _activity_state, "waiting for user approval"
-                    )
-
-            # Clean up this entry from the queue
-            with _lock:
-                queue = _gateway_queues.get(session_key, [])
-                if entry in queue:
-                    queue.remove(entry)
-                if not queue:
-                    _gateway_queues.pop(session_key, None)
-
-            choice = entry.result
-            # Normalize outcome for the post hook. Unresolved (timeout) and
-            # None both mean the user never responded; report that explicitly
-            # so plugins can distinguish timeout from explicit deny.
-            _outcome = (
-                "timeout" if not resolved
-                else (choice if choice else "timeout")
-            )
-            _fire_approval_hook(
-                "post_approval_response",
-                command=command,
-                description=combined_desc,
-                pattern_key=primary_key,
-                pattern_keys=list(all_keys),
-                session_key=session_key,
-                surface="gateway",
-                choice=_outcome,
-            )
+            resolved = decision["resolved"]
+            choice = decision["choice"]
 
             if not resolved or choice is None or choice == "deny":
                 # Consent contract: silence is NOT consent, and an explicit
@@ -1430,5 +1452,173 @@ def check_all_command_guards(command: str, env_type: str,
             "user_approved": True, "description": combined_desc}
 
 
+def check_execute_code_guard(code: str, env_type: str) -> dict:
+    """Approve an execute_code script before its child process is spawned.
+
+    execute_code runs arbitrary local Python — the script can call
+    ``subprocess``, ``os.system``, ``ctypes``, or other process/file APIs
+    directly, none of which pass through ``terminal()`` /
+    ``DANGEROUS_PATTERNS``. In gateway/ask contexts we fail closed by approving
+    the script as a whole before it runs (#30882). Returns the same dict
+    contract as ``check_all_command_guards``.
+
+    Scope (documented limitation, #30882): in a purely local non-interactive
+    non-gateway session (no TTY, not gateway, not cron-deny) this returns
+    approved — matching the existing terminal auto-approve contract. The
+    hardline floor still blocks catastrophic ``terminal()`` commands the script
+    issues; running arbitrary code headlessly without any approval surface is
+    trusted-by-config (set a gateway/ask surface or ``approvals.cron_mode`` to
+    require approval).
+    """
+    pattern_key = "execute_code"
+    description = (
+        "execute_code script execution. The script can spawn subprocesses or "
+        "mutate files without passing through terminal command approval; "
+        "approval is one-shot for this run."
+    )
+
+    # Isolated backends already sandbox the child — matches the container skip
+    # in check_all_command_guards / check_dangerous_command.
+    if env_type in {"docker", "singularity", "modal", "daytona", "vercel_sandbox"}:
+        return {"approved": True, "message": None}
+
+    # --yolo or approvals.mode=off: bypass (session- or process-scoped).
+    approval_mode = _get_approval_mode()
+    if _YOLO_MODE_FROZEN or is_current_session_yolo_enabled() or approval_mode == "off":
+        return {"approved": True, "message": None}
+
+    is_gateway = _is_gateway_approval_context()
+    is_ask = env_var_enabled("HERMES_EXEC_ASK")
+
+    # Cron: no user is present to approve arbitrary code.
+    if env_var_enabled("HERMES_CRON_SESSION"):
+        if _get_cron_approval_mode() == "deny":
+            return {
+                "approved": False,
+                "message": (
+                    "BLOCKED: execute_code runs arbitrary local Python "
+                    "(including subprocess calls that bypass shell-string "
+                    "approval checks). Cron jobs run without a user present "
+                    "to approve it. Use normal tools instead, or set "
+                    "approvals.cron_mode: approve only if this cron profile "
+                    "is intentionally trusted."
+                ),
+                "pattern_key": pattern_key,
+                "description": description,
+                "outcome": "blocked",
+                "user_consent": False,
+            }
+        return {"approved": True, "message": None}
+
+    # Only gateway/ask contexts get the one-shot whole-script approval.
+    #   * CLI interactive: the script's terminal() calls are guarded per-call
+    #     (context now propagates into the RPC thread, #33057); a whole-script
+    #     prompt would fire on every execute_code call.
+    #   * Local non-interactive non-gateway: documented limitation above.
+    if not is_gateway and not is_ask:
+        return {"approved": True, "message": None}
+
+    session_key = get_current_session_key()
+    # Built only now (past the early-return gates) so the common non-approval
+    # paths don't pay to copy a potentially-large script into this string.
+    command = f"execute_code <<'PY'\n{code}\nPY"
+
+    # Smart mode: ask the aux LLM about the whole script. An APPROVE here only
+    # suppresses the redundant whole-script prompt; the per-call terminal()
+    # guards (restored by context propagation) still run independently.
+    if approval_mode == "smart":
+        verdict = _smart_approve(command, description)
+        if verdict == "approve":
+            logger.debug("Smart approval: auto-approved execute_code for session %s",
+                         session_key)
+            return {"approved": True, "message": None,
+                    "smart_approved": True, "description": description}
+        if verdict == "deny":
+            return {
+                "approved": False,
+                "message": ("BLOCKED by smart approval: execute_code script "
+                            "execution was assessed as genuinely dangerous. "
+                            "Do NOT retry."),
+                "smart_denied": True,
+                "pattern_key": pattern_key,
+                "description": description,
+                "outcome": "denied",
+                "user_consent": False,
+            }
+        # verdict == "escalate" → fall through to manual approval
+
+    notify_cb = None
+    with _lock:
+        notify_cb = _gateway_notify_cbs.get(session_key)
+
+    if notify_cb is None:
+        # No gateway callback registered (e.g. ask-mode without a notifier):
+        # surface a pending approval for backward compatibility.
+        submit_pending(session_key, {
+            "command": command,
+            "pattern_key": pattern_key,
+            "pattern_keys": [pattern_key],
+            "description": description,
+        })
+        return {
+            "approved": False,
+            "pattern_key": pattern_key,
+            "status": "pending_approval",
+            "approval_pending": True,
+            "command": command,
+            "description": description,
+            "message": (
+                f"⚠️ {description}. Asking the user for approval.\n\n"
+                f"**Code:**\n```python\n{code}\n```"
+            ),
+        }
+
+    approval_data = {
+        "command": command,
+        "pattern_key": pattern_key,
+        "pattern_keys": [pattern_key],
+        "description": description,
+    }
+    decision = _await_gateway_decision(
+        session_key, notify_cb, approval_data, surface="gateway"
+    )
+    if decision.get("notify_failed"):
+        return {
+            "approved": False,
+            "message": ("BLOCKED: Failed to send execute_code approval request "
+                        "to user. Do NOT retry."),
+            "pattern_key": pattern_key,
+            "description": description,
+            "outcome": "notify_failed",
+            "user_consent": False,
+        }
+
+    resolved = decision["resolved"]
+    choice = decision["choice"]
+
+    if not resolved or choice is None or choice == "deny":
+        reason = "timed out without user response" if not resolved else "denied by user"
+        addendum = " Silence is not consent." if not resolved else ""
+        return {
+            "approved": False,
+            "message": (
+                f"BLOCKED: execute_code script {reason}. The user has NOT "
+                f"consented to running this code. Do NOT retry, do NOT rephrase "
+                f"the script, and do NOT attempt the same outcome via a "
+                f"different tool.{addendum}"
+            ),
+            "pattern_key": pattern_key,
+            "description": description,
+            "outcome": "timeout" if not resolved else "denied",
+            "user_consent": False,
+        }
+
+    # Approved — one-shot only. Deliberately NO approve_session/approve_permanent:
+    # each execute_code script is distinct arbitrary code, so approval never
+    # persists to future scripts.
+    return {"approved": True, "message": None,
+            "user_approved": True, "description": description}
+
+
 # Load permanent allowlist from config on module import
 load_permanent_allowlist()
diff --git a/tools/browser_camofox.py b/tools/browser_camofox.py
index 45bf885def6..b920160bd67 100644
--- a/tools/browser_camofox.py
+++ b/tools/browser_camofox.py
@@ -18,6 +18,9 @@ Setup::
     docker run -p 9377:9377 -e CAMOFOX_PORT=9377 jo-inc/camofox-browser
 
 Then set ``CAMOFOX_URL=http://localhost:9377`` in ``~/.hermes/.env``.
+For Docker Camofox, optionally set ``CAMOFOX_REWRITE_LOOPBACK_URLS=true``
+so page URLs like ``http://127.0.0.1:3000`` are opened inside the
+container as ``http://host.docker.internal:3000``.
 """
 
 from __future__ import annotations
@@ -29,6 +32,7 @@ import os
 import threading
 import uuid
 from typing import Any, Dict, Optional
+from urllib.parse import SplitResult, urlsplit, urlunsplit
 
 import requests
 
@@ -159,6 +163,89 @@ def _adopt_existing_tab_enabled(camofox_cfg: Dict[str, Any]) -> bool:
     return bool(camofox_cfg.get("adopt_existing_tab"))
 
 
+def _loopback_rewrite_enabled(camofox_cfg: Dict[str, Any]) -> bool:
+    """Return whether loopback navigation URLs should be rewritten for Docker.
+
+    ``CAMOFOX_URL`` itself often points at a host-published Docker port such as
+    ``http://127.0.0.1:9377``.  That is correct for Hermes talking to the
+    Camofox control API, but a page URL like ``http://127.0.0.1:3000`` is opened
+    by the browser *inside* the Docker container.  In that context loopback
+    points at the container, not the host running the web app.
+
+    The rewrite is opt-in because non-Docker Camofox installs run the browser on
+    the host, where loopback URLs are already correct.
+    """
+    env_value = _env_flag("CAMOFOX_REWRITE_LOOPBACK_URLS")
+    if env_value is not None:
+        return env_value
+    return bool(camofox_cfg.get("rewrite_loopback_urls"))
+
+
+def _loopback_rewrite_host(camofox_cfg: Dict[str, Any]) -> str:
+    """Return the host alias used when rewriting loopback page URLs."""
+    return (
+        os.getenv("CAMOFOX_LOOPBACK_HOST_ALIAS", "").strip()
+        or str(camofox_cfg.get("loopback_host_alias") or "").strip()
+        or "host.docker.internal"
+    )
+
+
+def _is_loopback_hostname(hostname: Optional[str]) -> bool:
+    """Return True for localhost/127.0.0.0/8/::1-style hostnames."""
+    if not hostname:
+        return False
+    host = hostname.strip().strip("[]").lower()
+    if host in {"localhost", "localhost.localdomain"}:
+        return True
+    try:
+        import ipaddress
+
+        return ipaddress.ip_address(host).is_loopback
+    except ValueError:
+        return False
+
+
+def _rewrite_loopback_url_for_camofox(url: str) -> tuple[str, Optional[Dict[str, str]]]:
+    """Rewrite loopback page URLs for Docker-hosted Camofox, if configured.
+
+    Returns ``(rewritten_url, metadata)``.  ``metadata`` is present only when a
+    rewrite happened so the tool result can disclose the change to the model.
+    """
+    camofox_cfg = _get_camofox_config()
+    if not _loopback_rewrite_enabled(camofox_cfg):
+        return url, None
+
+    try:
+        parsed = urlsplit(url)
+    except ValueError:
+        return url, None
+
+    if parsed.scheme not in {"http", "https"} or not _is_loopback_hostname(parsed.hostname):
+        return url, None
+
+    alias = _loopback_rewrite_host(camofox_cfg)
+    if not alias:
+        return url, None
+
+    userinfo = ""
+    if parsed.username:
+        userinfo = parsed.username
+        if parsed.password:
+            userinfo += f":{parsed.password}"
+        userinfo += "@"
+    host_part = f"[{alias}]" if ":" in alias and not alias.startswith("[") else alias
+    port_part = f":{parsed.port}" if parsed.port else ""
+    rewritten = urlunsplit(
+        SplitResult(parsed.scheme, f"{userinfo}{host_part}{port_part}", parsed.path, parsed.query, parsed.fragment)
+    )
+    return rewritten, {
+        "from": parsed.hostname or "",
+        "to": alias,
+        "original_url": url,
+        "rewritten_url": rewritten,
+    }
+
+
 # ---------------------------------------------------------------------------
 # Session management
 # ---------------------------------------------------------------------------
@@ -336,23 +423,31 @@ def _delete(path: str, body: dict = None, timeout: int = _DEFAULT_TIMEOUT) -> di
 def camofox_navigate(url: str, task_id: Optional[str] = None) -> str:
     """Navigate to a URL via Camofox."""
     try:
+        browser_url, rewrite_info = _rewrite_loopback_url_for_camofox(url)
         session = _get_session(task_id)
         if not session["tab_id"]:
             # Create tab with the target URL directly
-            session = _ensure_tab(task_id, url)
-            data = {"ok": True, "url": url}
+            session = _ensure_tab(task_id, browser_url)
+            data = {"ok": True, "url": browser_url}
         else:
             # Navigate existing tab
             data = _post(
                 f"/tabs/{session['tab_id']}/navigate",
-                {"userId": session["user_id"], "url": url},
+                {"userId": session["user_id"], "url": browser_url},
                 timeout=60,
             )
         result = {
             "success": True,
-            "url": data.get("url", url),
+            "url": data.get("url", browser_url),
             "title": data.get("title", ""),
         }
+        if rewrite_info:
+            result["requested_url"] = url
+            result["url_rewrite"] = rewrite_info
+            result["warning"] = (
+                "Rewrote loopback URL for Docker-hosted Camofox: "
+                f"{rewrite_info['from']} -> {rewrite_info['to']}"
+            )
         vnc = get_vnc_url()
         if vnc:
             result["vnc_url"] = vnc
diff --git a/tools/browser_cdp_tool.py b/tools/browser_cdp_tool.py
index e2aae88308f..e13264767d9 100644
--- a/tools/browser_cdp_tool.py
+++ b/tools/browser_cdp_tool.py
@@ -257,7 +257,6 @@ def _browser_cdp_via_supervisor(
         )
 
     # Dispatch onto the supervisor's loop.
-    import asyncio as _asyncio
     loop = supervisor._loop  # type: ignore[attr-defined]
     if loop is None or not loop.is_running():
         return tool_error(
diff --git a/tools/browser_tool.py b/tools/browser_tool.py
index 5320d6adfdb..f7d4d7577b4 100644
--- a/tools/browser_tool.py
+++ b/tools/browser_tool.py
@@ -33,8 +33,8 @@ Environment Variables:
   requires Scale Plan (default: "false")
 - BROWSERBASE_KEEP_ALIVE: Enable keepAlive for session reconnection after disconnects,
   requires paid plan (default: "true")
-- BROWSERBASE_SESSION_TIMEOUT: Custom session timeout in milliseconds. Set to extend
-  beyond project default. Common values: 600000 (10min), 1800000 (30min) (default: none)
+- BROWSERBASE_SESSION_TIMEOUT: Custom session timeout in seconds (max 21600 = 6h).
+  Set to extend beyond project default. Common values: 600 (10min), 1800 (30min) (default: none)
 
 Usage:
     from tools.browser_tool import browser_navigate, browser_snapshot, browser_click
@@ -55,7 +55,6 @@ import json
 import logging
 import os
 import re
-import signal
 import subprocess
 import shutil
 import sys
@@ -63,7 +62,7 @@ import tempfile
 import threading
 import time
 import requests
-from typing import Dict, Any, Optional, List, Tuple
+from typing import Dict, Any, Optional, List, Tuple, Union
 from pathlib import Path
 from agent.auxiliary_client import call_llm
 from hermes_constants import get_hermes_home
@@ -1579,7 +1578,7 @@ BROWSER_TOOL_SCHEMAS = [
     },
     {
         "name": "browser_vision",
-        "description": "Take a screenshot of the current page and analyze it with vision AI. Use this when you need to visually understand what's on the page - especially useful for CAPTCHAs, visual verification challenges, complex layouts, or when the text snapshot doesn't capture important visual information. Returns both the AI analysis and a screenshot_path that you can share with the user by including MEDIA:<screenshot_path> in your response. Requires browser_navigate to be called first.",
+        "description": "Take a screenshot of the current page so you can inspect it visually. Use this when you need to understand what the page looks like - especially for CAPTCHAs, visual verification challenges, complex layouts, or cases where the text snapshot misses important visual information. When your active model has native vision, the screenshot is attached to your context directly and you inspect it on the next turn; otherwise Hermes falls back to an auxiliary vision model and returns a text analysis. Includes a screenshot_path that you can share with the user by including MEDIA:<screenshot_path> in your response. Requires browser_navigate to be called first.",
         "parameters": {
             "type": "object",
             "properties": {
@@ -3045,17 +3044,19 @@ def browser_get_images(task_id: Optional[str] = None) -> str:
         return json.dumps(_copy_fallback_warning(response, result), ensure_ascii=False)
 
 
-def browser_vision(question: str, annotate: bool = False, task_id: Optional[str] = None) -> str:
+def browser_vision(question: str, annotate: bool = False, task_id: Optional[str] = None) -> Union[str, Dict[str, Any]]:
     """
-    Take a screenshot of the current page and analyze it with vision AI.
+    Take a screenshot of the current page for visual inspection.
 
-    This tool captures what's visually displayed in the browser and sends it
-    to Gemini for analysis. Useful for understanding visual content that the
-    text-based snapshot may not capture (CAPTCHAs, verification challenges,
-    images, complex layouts, etc.).
+    Captures what's visually displayed in the browser. When the active model
+    supports native vision, the screenshot is attached directly to the
+    conversation so the model can inspect it on the next turn; otherwise Hermes
+    falls back to the auxiliary vision model and returns a text analysis. Useful
+    for visual content the text-based snapshot may not capture (CAPTCHAs,
+    verification challenges, images, complex layouts, etc.).
 
-    The screenshot is saved persistently and its file path is returned alongside
-    the analysis, so it can be shared with users via MEDIA:<path> in the response.
+    The screenshot is saved persistently and its file path is returned so it
+    can be shared with users via MEDIA:<path> in the response.
 
     Args:
         question: What you want to know about the page visually
@@ -3063,7 +3064,8 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
         task_id: Task identifier for session isolation
 
     Returns:
-        JSON string with vision analysis results and screenshot_path
+        A JSON string with vision analysis results and screenshot_path, or a
+        multimodal tool-result envelope carrying the screenshot and metadata.
     """
     if _is_camofox_mode():
         from tools.browser_camofox import camofox_vision
@@ -3188,6 +3190,34 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
         _screenshot_b64 = base64.b64encode(_screenshot_bytes).decode("ascii")
         data_url = f"data:image/png;base64,{_screenshot_b64}"
 
+        # Fast path: when native image routing is in effect for the active main
+        # model, attach the screenshot directly instead of describing it through
+        # an auxiliary vision LLM. The model inspects the pixels on its next
+        # turn — no aux call, no information loss. Consistent with vision_analyze.
+        from tools.vision_tools import (
+            _build_native_vision_tool_result,
+            _should_use_native_vision_fast_path,
+        )
+
+        if _should_use_native_vision_fast_path():
+            native_result = _build_native_vision_tool_result(
+                image_url=str(screenshot_path),
+                question=question,
+                image_data_url=data_url,
+                image_size_bytes=len(_screenshot_bytes),
+            )
+            meta = native_result.setdefault("meta", {})
+            meta["screenshot_path"] = str(screenshot_path)
+            if _lp_fallback_warning:
+                meta["fallback_warning"] = _lp_fallback_warning
+            if annotate and result.get("data", {}).get("annotations"):
+                meta["annotations"] = result["data"]["annotations"]
+            native_result["text_summary"] = (
+                f"{native_result.get('text_summary', '')} "
+                f"Screenshot path: {screenshot_path}"
+            ).strip()
+            return native_result
+
         vision_prompt = (
             f"You are analyzing a screenshot of a web browser.\n\n"
             f"User's question: {question}\n\n"
diff --git a/tools/code_execution_tool.py b/tools/code_execution_tool.py
index 19aee58c8db..40581e57f2d 100644
--- a/tools/code_execution_tool.py
+++ b/tools/code_execution_tool.py
@@ -35,7 +35,6 @@ import logging
 import os
 import platform
 import shlex
-import signal
 import socket
 import subprocess
 import sys
@@ -47,6 +46,8 @@ import uuid
 _IS_WINDOWS = platform.system() == "Windows"
 from typing import Any, Dict, List, Optional
 
+from tools.thread_context import propagate_context_to_thread
+
 # Availability gate.  On Windows we fall back to loopback TCP for the
 # sandbox RPC transport (AF_UNIX is unreliable on Windows Python) — see
 # ``_use_tcp_rpc`` in ``_execute_local`` below.  That makes execute_code
@@ -75,13 +76,30 @@ MAX_STDERR_BYTES = 10_000    # 10 KB
 
 # Environment variable scrubbing rules (shared between the local + remote
 # backends).  Secret-substring block is applied first; anything left must
-# match either a safe prefix or, on Windows, an OS-essential name.
+# match a safe prefix, the operational HERMES_ allowlist, or (on Windows) an
+# OS-essential name.
+#
+# NB: the broad "HERMES_" prefix was deliberately removed (#27303) — it leaked
+# HERMES_*-named config that lacks a secret substring (e.g. HERMES_BASE_URL,
+# HERMES_KANBAN_DB, HERMES_*_WEBHOOK).  The child only needs the few
+# location/profile vars in _HERMES_CHILD_ALLOWED below; HERMES_RPC_SOCKET /
+# HERMES_RPC_DIR / TZ / HOME are injected explicitly after scrubbing.
 _SAFE_ENV_PREFIXES = ("PATH", "HOME", "USER", "LANG", "LC_", "TERM",
                       "TMPDIR", "TMP", "TEMP", "SHELL", "LOGNAME",
-                      "XDG_", "PYTHONPATH", "VIRTUAL_ENV", "CONDA",
-                      "HERMES_")
+                      "XDG_", "PYTHONPATH", "VIRTUAL_ENV", "CONDA")
 _SECRET_SUBSTRINGS = ("KEY", "TOKEN", "SECRET", "PASSWORD", "CREDENTIAL",
-                      "PASSWD", "AUTH")
+                      "PASSWD", "AUTH", "DSN", "WEBHOOK")
+
+# Operational HERMES_* vars the child legitimately needs by exact name — these
+# are non-secret runtime-location flags (the same set hermes_cli treats as the
+# runtime location) that repo-root modules a sandbox script imports may read at
+# import time.  None match _SECRET_SUBSTRINGS.
+_HERMES_CHILD_ALLOWED = frozenset({
+    "HERMES_HOME",
+    "HERMES_PROFILE",
+    "HERMES_CONFIG",
+    "HERMES_ENV",
+})
 
 # Windows-only: a handful of variables are required by the OS/CRT itself.
 # Without them, even stdlib calls like ``socket.socket()`` fail with
@@ -120,9 +138,10 @@ def _scrub_child_env(source_env, is_passthrough=None, is_windows=None):
 
     Rules (order matters):
       1. Passthrough vars (skill- or config-declared) always pass.
-      2. Secret-substring names (KEY/TOKEN/etc.) are blocked.
+      2. Secret-substring names (KEY/TOKEN/DSN/WEBHOOK/etc.) are blocked.
       3. Names matching a safe prefix pass.
-      4. On Windows, a small OS-essential allowlist passes by exact name
+      4. Operational HERMES_* vars (_HERMES_CHILD_ALLOWED) pass by exact name.
+      5. On Windows, a small OS-essential allowlist passes by exact name
          — without these the child can't even create a socket or spawn a
          subprocess.
 
@@ -139,6 +158,14 @@ def _scrub_child_env(source_env, is_passthrough=None, is_windows=None):
         is_windows = _IS_WINDOWS
 
     scrubbed = {}
+    # Non-secret HERMES_* vars dropped by the tightened allowlist (#27303). The
+    # broad "HERMES_" prefix used to pass these through; now only the
+    # operational set does. The drop is intentional (those vars can carry
+    # config like HERMES_KANBAN_DB / HERMES_BASE_URL), but a sandbox script
+    # that imports a repo module reading one at import time would otherwise see
+    # it silently unset. Surface the drop once so the behavior change is
+    # diagnosable and points at the env_passthrough opt-in escape hatch.
+    _dropped_hermes = []
     for k, v in source_env.items():
         if is_passthrough(k):
             scrubbed[k] = v
@@ -148,8 +175,25 @@ def _scrub_child_env(source_env, is_passthrough=None, is_windows=None):
         if any(k.startswith(p) for p in _SAFE_ENV_PREFIXES):
             scrubbed[k] = v
             continue
+        if k in _HERMES_CHILD_ALLOWED:
+            scrubbed[k] = v
+            continue
         if is_windows and k.upper() in _WINDOWS_ESSENTIAL_ENV_VARS:
             scrubbed[k] = v
+            continue
+        if k.startswith("HERMES_"):
+            # Non-secret (secrets were already dropped above) and not in any
+            # allowlist — a deliberately-dropped HERMES_* var.
+            _dropped_hermes.append(k)
+    if _dropped_hermes:
+        logger.debug(
+            "execute_code: dropped %d non-allowlisted HERMES_* var(s) from the "
+            "sandbox child env (%s). This is intentional hardening (#27303); if "
+            "a sandbox script legitimately needs one, declare it via "
+            "env_passthrough in the skill/config so it passes by explicit opt-in.",
+            len(_dropped_hermes),
+            ", ".join(sorted(_dropped_hermes)),
+        )
     return scrubbed
 
 
@@ -888,9 +932,11 @@ def _execute_remote(
         _ship_file_to_remote(env, f"{sandbox_dir}/hermes_tools.py", tools_src)
         _ship_file_to_remote(env, f"{sandbox_dir}/script.py", code)
 
-        # Start RPC polling thread
+        # Wrapped so the thread inherits the turn's approval context + callbacks
+        # (see tools.thread_context) — else sandbox RPC tool calls lose approval
+        # routing (#33057).
         rpc_thread = threading.Thread(
-            target=_rpc_poll_loop,
+            target=propagate_context_to_thread(_rpc_poll_loop),
             args=(
                 env, f"{sandbox_dir}/rpc", effective_task_id,
                 tool_call_log, tool_call_counter, max_tool_calls,
@@ -1050,6 +1096,21 @@ def execute_code(
     # Dispatch: remote backends use file-based RPC, local uses UDS
     from tools.terminal_tool import _get_env_config
     env_type = _get_env_config()["env_type"]
+
+    # execute_code runs arbitrary Python (subprocess/os.system/...) that never
+    # passes through terminal()/DANGEROUS_PATTERNS, so guard the whole script
+    # here before either dispatch path spawns it. Runs synchronously in the
+    # caller (tool-executor) thread, which holds the session context (#30882).
+    from tools.approval import check_execute_code_guard
+    _guard = check_execute_code_guard(code, env_type)
+    if not _guard.get("approved", False):
+        return json.dumps({
+            "status": "error",
+            "error": _guard.get("message") or "execute_code blocked by approval guard.",
+            "tool_calls_made": 0,
+            "duration_seconds": 0,
+        }, ensure_ascii=False)
+
     if env_type != "local":
         return _execute_remote(code, task_id, enabled_tools)
 
@@ -1136,8 +1197,11 @@ def execute_code(
             os.chmod(sock_path, 0o600)
         server_sock.listen(1)
 
+        # Wrapped so the thread inherits the turn's approval context + callbacks
+        # (see tools.thread_context) — else gateway sandbox tool calls silently
+        # auto-approve dangerous commands (#33057, #30882).
         rpc_thread = threading.Thread(
-            target=_rpc_server_loop,
+            target=propagate_context_to_thread(_rpc_server_loop),
             args=(
                 server_sock, task_id, tool_call_log,
                 tool_call_counter, max_tool_calls, sandbox_tools,
diff --git a/tools/computer_use/cua_backend.py b/tools/computer_use/cua_backend.py
index ffdeeb2a388..714ae6d3260 100644
--- a/tools/computer_use/cua_backend.py
+++ b/tools/computer_use/cua_backend.py
@@ -22,13 +22,10 @@ import base64
 import json
 import logging
 import os
-import platform
 import re
 import shutil
-import subprocess
 import sys
 import threading
-from concurrent.futures import Future
 from typing import Any, Dict, List, Optional, Tuple
 
 from tools.computer_use.backend import (
@@ -81,10 +78,6 @@ def _is_macos() -> bool:
     return sys.platform == "darwin"
 
 
-def _is_arm_mac() -> bool:
-    return _is_macos() and platform.machine() == "arm64"
-
-
 def cua_driver_binary_available() -> bool:
     """True if `cua-driver` is on $PATH or HERMES_CUA_DRIVER_CMD resolves."""
     return bool(shutil.which(_CUA_DRIVER_CMD))
@@ -707,29 +700,3 @@ class CuaDriverBackend(ComputerUseBackend):
             message = data
         return ActionResult(ok=ok, action=name, message=message,
                             meta=data if isinstance(data, dict) else {})
-
-
-def _parse_element(d: Dict[str, Any]) -> UIElement:
-    bounds = d.get("bounds") or (0, 0, 0, 0)
-    if isinstance(bounds, dict):
-        bounds = (
-            int(bounds.get("x", 0)),
-            int(bounds.get("y", 0)),
-            int(bounds.get("w", bounds.get("width", 0))),
-            int(bounds.get("h", bounds.get("height", 0))),
-        )
-    elif isinstance(bounds, (list, tuple)) and len(bounds) == 4:
-        bounds = tuple(int(v) for v in bounds)
-    else:
-        bounds = (0, 0, 0, 0)
-    return UIElement(
-        index=int(d.get("index", 0)),
-        role=str(d.get("role", "") or ""),
-        label=str(d.get("label", "") or ""),
-        bounds=bounds,  # type: ignore[arg-type]
-        app=str(d.get("app", "") or ""),
-        pid=int(d.get("pid", 0) or 0),
-        window_id=int(d.get("windowId", 0) or 0),
-        attributes={k: v for k, v in d.items()
-                    if k not in {"index", "role", "label", "bounds", "app", "pid", "windowId"}},
-    )
diff --git a/tools/cronjob_tools.py b/tools/cronjob_tools.py
index 18c68a7ce91..ada4e4af006 100644
--- a/tools/cronjob_tools.py
+++ b/tools/cronjob_tools.py
@@ -7,7 +7,6 @@ Compatibility wrappers remain for direct Python callers and legacy tests.
 
 import json
 import logging
-import os
 import re
 import sys
 from pathlib import Path
@@ -23,7 +22,6 @@ sys.path.insert(0, str(Path(__file__).parent.parent))
 from cron.jobs import (
     AmbiguousJobReference,
     create_job,
-    get_job,
     list_jobs,
     parse_schedule,
     pause_job,
diff --git a/tools/env_probe.py b/tools/env_probe.py
new file mode 100644
index 00000000000..dfb715a9871
--- /dev/null
+++ b/tools/env_probe.py
@@ -0,0 +1,247 @@
+"""Local-environment toolchain probe for the system prompt.
+
+When the terminal backend is local (the agent's tools run on the same
+machine as Hermes itself), we surface a single deterministic line about
+Python tooling state so models don't have to discover it by hitting
+walls.  Common failure modes this addresses:
+
+* Hermes ships under one Python (e.g. 3.11 in a bundled venv) while the
+  user's login shell has a different one (e.g. 3.12 system).  ``pip``
+  resolved from PATH may not match ``python3 -m pip``.
+* The bundled-venv Python has no pip module installed → ``python3 -m
+  pip`` returns ``No module named pip``.
+* The system Python is PEP-668 externally-managed → naive
+  ``pip install`` fails with ``error: externally-managed-environment``.
+
+The probe is cheap (a handful of subprocess calls, ~50ms total),
+cached for the lifetime of the process, and emits **at most one
+short line** when something non-default is detected.  When the
+environment looks normal (python3+pip both present and matched, no
+PEP 668), it emits nothing — no token cost.
+
+Remote terminal backends (docker, modal, ssh, …) are skipped: the
+host's Python state is irrelevant when tools run inside a sandbox.
+The sandbox has its own existing probe (``_probe_remote_backend``)
+in ``agent/prompt_builder.py``.
+
+Toggle via ``agent.environment_probe`` in config.yaml (default True).
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import shutil
+import subprocess
+import sys
+import threading
+from typing import Optional
+
+logger = logging.getLogger(__name__)
+
+# Module-level cache.  The probe result is deterministic for the
+# lifetime of the process — Python install state doesn't change
+# mid-session in any way that would matter for the system prompt.
+_CACHE_LOCK = threading.Lock()
+_CACHED_LINE: Optional[str] = None  # None = not probed yet; "" = probed, nothing to say.
+
+# Remote backends — keep in sync with agent/prompt_builder.py:_REMOTE_TERMINAL_BACKENDS.
+# Duplicated rather than imported to avoid a circular import (prompt_builder
+# imports nothing from tools).
+_REMOTE_BACKENDS = frozenset({
+    "docker", "singularity", "modal", "daytona", "ssh", "managed_modal",
+})
+
+
+def _run(cmd: list[str], timeout: float = 3.0) -> tuple[int, str, str]:
+    """Run a short subprocess.  Returns (returncode, stdout, stderr).
+
+    Failures (binary missing, timeout, OSError) return (-1, "", "<reason>").
+    """
+    try:
+        result = subprocess.run(
+            cmd,
+            capture_output=True,
+            text=True,
+            timeout=timeout,
+            check=False,
+        )
+        return result.returncode, (result.stdout or "").strip(), (result.stderr or "").strip()
+    except FileNotFoundError:
+        return -1, "", "not found"
+    except subprocess.TimeoutExpired:
+        return -1, "", "timeout"
+    except OSError as exc:
+        return -1, "", f"oserror: {exc}"
+
+
+def _python_version_of(binary: str) -> Optional[str]:
+    """Return a short version string like ``3.12.4`` for ``binary``, or None."""
+    if not shutil.which(binary):
+        return None
+    rc, out, err = _run([binary, "-c", "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}')"])
+    if rc == 0 and out:
+        return out
+    return None
+
+
+def _has_pip_module(binary: str) -> bool:
+    """True if ``<binary> -m pip --version`` succeeds."""
+    if not shutil.which(binary):
+        return False
+    rc, _out, _err = _run([binary, "-m", "pip", "--version"])
+    return rc == 0
+
+
+def _detect_pep668(binary: str) -> bool:
+    """True when ``<binary>``'s install location is PEP-668 externally-managed.
+
+    Looks for ``EXTERNALLY-MANAGED`` next to the stdlib (the marker file
+    Debian/Ubuntu drop in to gate naive ``pip install``).
+    """
+    if not shutil.which(binary):
+        return False
+    code = (
+        "import sys, os;"
+        "stdlib = os.path.dirname(os.__file__);"
+        "marker = os.path.join(stdlib, 'EXTERNALLY-MANAGED');"
+        "print('yes' if os.path.exists(marker) else 'no')"
+    )
+    rc, out, _err = _run([binary, "-c", code])
+    return rc == 0 and out.strip() == "yes"
+
+
+def _pip_python_version() -> Optional[str]:
+    """If ``pip`` is on PATH, return the Python version it's bound to.
+
+    ``pip --version`` output looks like::
+
+        pip 24.0 from /usr/lib/python3/dist-packages/pip (python 3.12)
+
+    Returns the parenthesised version (e.g. ``"3.12"``) or None.
+    """
+    if not shutil.which("pip"):
+        return None
+    rc, out, _err = _run(["pip", "--version"])
+    if rc != 0 or not out:
+        return None
+    # Parse trailing "(python X.Y)".
+    if "(python " in out and out.endswith(")"):
+        try:
+            tail = out.rsplit("(python ", 1)[1]
+            return tail[:-1].strip()
+        except (IndexError, AttributeError):
+            return None
+    return None
+
+
+def _build_probe_line() -> str:
+    """Build the one-liner.  Returns "" when nothing notable is detected.
+
+    Emit only when SOMETHING is off — the goal is to save the model from
+    hitting an avoidable wall, not to narrate a healthy environment.
+    """
+    # Bail out if a remote terminal backend is configured; the host's
+    # Python state isn't where the agent's tools run.
+    backend = (os.getenv("TERMINAL_ENV") or "local").strip().lower()
+    if backend in _REMOTE_BACKENDS:
+        return ""
+
+    py3_ver = _python_version_of("python3")
+    py_ver = _python_version_of("python")  # for systems with a `python` alias
+    py3_has_pip = _has_pip_module("python3") if py3_ver else False
+    pip_bound_to = _pip_python_version()
+    py3_pep668 = _detect_pep668("python3") if py3_ver else False
+    has_uv = shutil.which("uv") is not None
+
+    # If python3 exists, has pip, has uv (or no PEP 668), and there's no
+    # version mismatch between `pip` and `python3` → environment is
+    # clean enough to stay silent.  The model can discover details by
+    # running commands if it cares.
+    mismatch = bool(pip_bound_to and py3_ver and not py3_ver.startswith(pip_bound_to))
+    silent_conditions = (
+        py3_ver is not None
+        and py3_has_pip
+        and not mismatch
+        and (not py3_pep668 or has_uv)
+    )
+    if silent_conditions:
+        return ""
+
+    # Build a compact factual summary.  Keep it ONE line so it doesn't
+    # dominate the prompt; the model is good at parsing dense info.
+    bits: list[str] = []
+    if py3_ver:
+        py3_bit = f"python3={py3_ver}"
+        if not py3_has_pip:
+            py3_bit += " (no pip module)"
+        bits.append(py3_bit)
+    else:
+        bits.append("python3=missing")
+
+    if py_ver and py_ver != py3_ver:
+        bits.append(f"python={py_ver}")
+    elif not py_ver and py3_ver:
+        # Common on Debian/Ubuntu — call it out so the model doesn't
+        # type `python` and hit "command not found".
+        bits.append("python=missing (use python3)")
+
+    if pip_bound_to:
+        if mismatch:
+            bits.append(f"pip→python{pip_bound_to} (mismatch)")
+        elif not py3_has_pip:
+            # pip exists but `python3 -m pip` doesn't — the script
+            # works but the module path doesn't.
+            bits.append(f"pip→python{pip_bound_to}")
+    elif py3_has_pip:
+        # `pip` not on PATH but `python3 -m pip` works.
+        pass
+    else:
+        bits.append("pip=missing")
+
+    if py3_pep668:
+        bits.append("PEP 668=yes (use venv or uv)")
+
+    if has_uv:
+        bits.append("uv=installed")
+
+    if not bits:
+        return ""
+
+    return "Python toolchain: " + ", ".join(bits) + "."
+
+
+def get_environment_probe_line(*, force_refresh: bool = False) -> str:
+    """Return the cached probe line (building it on first call).
+
+    Returns "" when the environment is clean — the system prompt
+    assembler should drop the section in that case rather than
+    emit an empty heading.
+
+    ``force_refresh`` is for tests; real callers should never need it.
+    """
+    global _CACHED_LINE
+    if force_refresh:
+        with _CACHE_LOCK:
+            _CACHED_LINE = None
+
+    if _CACHED_LINE is not None:
+        return _CACHED_LINE
+
+    with _CACHE_LOCK:
+        if _CACHED_LINE is not None:  # raced
+            return _CACHED_LINE
+        try:
+            line = _build_probe_line()
+        except Exception as exc:  # never let probe failure block prompt build
+            logger.debug("env_probe failed: %s", exc)
+            line = ""
+        _CACHED_LINE = line
+        return line
+
+
+def _reset_cache_for_tests() -> None:
+    """Test helper — clear the cache between probe scenarios."""
+    global _CACHED_LINE
+    with _CACHE_LOCK:
+        _CACHED_LINE = None
diff --git a/tools/environments/base.py b/tools/environments/base.py
index 2666990bf18..618ea2bb922 100644
--- a/tools/environments/base.py
+++ b/tools/environments/base.py
@@ -524,8 +524,50 @@ class BaseEnvironment(ABC):
         # U+FFFD substitution rather than clobbering the whole buffer.
         decoder = codecs.getincrementaldecoder("utf-8")(errors="replace")
 
+        def _drain_iterable(stream):
+            # Fallback path: ``stream`` is not backed by a real OS file
+            # descriptor (no usable ``fileno()``).  This covers in-memory
+            # ProcessHandle adapters that expose stdout as a plain iterator of
+            # already-collected output (the legacy ``for line in proc.stdout``
+            # contract) rather than a live pipe.  Iterate it to EOF.  Without
+            # this, the drain thread would raise an unhandled exception and die
+            # silently, losing all of the process's output.
+            try:
+                for piece in stream:
+                    if piece is None:
+                        continue
+                    if isinstance(piece, bytes):
+                        output_chunks.append(decoder.decode(piece))
+                    else:
+                        output_chunks.append(str(piece))
+            except Exception:
+                pass
+            finally:
+                try:
+                    tail = decoder.decode(b"", final=True)
+                    if tail:
+                        output_chunks.append(tail)
+                except Exception:
+                    pass
+
         def _drain():
-            fd = proc.stdout.fileno()
+            # Resolve a real OS file descriptor up front.  Real subprocesses and
+            # the SDK ``_ThreadedProcessHandle`` (os.pipe-backed) both return an
+            # integer fd here.  Mocks / iterator-style stdout streams either lack
+            # ``fileno()`` entirely or return a non-integer — in that case fall
+            # back to draining the stream as an iterable instead of crashing the
+            # thread (issue: 'list_iterator' object has no attribute 'fileno').
+            stream = proc.stdout
+            if stream is None:
+                return
+            fileno = getattr(stream, "fileno", None)
+            try:
+                fd = fileno() if callable(fileno) else None
+            except Exception:
+                fd = None
+            if not isinstance(fd, int) or fd < 0:
+                _drain_iterable(stream)
+                return
             # select.select does NOT work on pipe fds on Windows (only sockets).
             # Use blocking os.read in a daemon thread instead — safe because
             # EOF arrives promptly when bash exits.
diff --git a/tools/environments/docker.py b/tools/environments/docker.py
index ed53cd07c41..8ec098083f1 100644
--- a/tools/environments/docker.py
+++ b/tools/environments/docker.py
@@ -12,6 +12,7 @@ import shutil
 import subprocess
 import sys
 import uuid
+from pathlib import Path
 from typing import Optional
 
 from tools.environments.base import BaseEnvironment, _popen_bash
@@ -98,6 +99,167 @@ def _load_hermes_env_vars() -> dict[str, str]:
         return {}
 
 
+# Docker label values must match [a-zA-Z0-9_.-] and stay ≤63 chars to round-trip
+# safely through `docker ps --filter label=key=value`. Profile and task names
+# can technically contain other characters; sanitize defensively.
+_LABEL_VALUE_OK_RE = re.compile(r"[^A-Za-z0-9_.-]")
+
+
+def _sanitize_label_value(value: str) -> str:
+    """Coerce *value* into a Docker label-safe form (alnum + ``_.-``, ≤63 chars).
+
+    Empty or all-invalid inputs collapse to ``"unknown"`` so the resulting
+    label is always queryable. Used at container-create time; never round-trip
+    a sanitized value back into application logic.
+    """
+    if not isinstance(value, str) or not value:
+        return "unknown"
+    cleaned = _LABEL_VALUE_OK_RE.sub("_", value)
+    cleaned = cleaned[:63] or "unknown"
+    return cleaned
+
+
+def _get_active_profile_name() -> str:
+    """Return the active Hermes profile name, or ``"default"`` on any error.
+
+    Resolved at container-create time so a single container is permanently
+    tagged with the profile that created it. Profile switches inside the
+    same process don't retroactively relabel running containers.
+    """
+    try:
+        from hermes_cli.profiles import get_active_profile_name
+
+        return get_active_profile_name() or "default"
+    except Exception:
+        return "default"
+
+
+def reap_orphan_containers(
+    *,
+    max_age_seconds: int = 600,
+    profile_filter: str | None = None,
+    docker_exe: str | None = None,
+) -> int:
+    """Remove stale hermes-tagged containers left behind by prior processes.
+
+    Targets containers that match all of:
+
+    * ``label=hermes-agent=1`` (created by this codebase)
+    * ``status=exited`` (running containers are NEVER reaped — they may
+      belong to a sibling Hermes process whose reuse path will pick them
+      up; killing them would crash the sibling mid-command)
+    * (optional) ``label=hermes-profile=<profile_filter>`` (sweep only the
+      caller's profile by default; a hermes process in profile A must not
+      tear down profile B's containers)
+    * ``State.FinishedAt`` older than *max_age_seconds* ago (so a sibling
+      process that just exited and is about to be replaced doesn't get
+      its container yanked out from under it)
+
+    Returns the number of containers removed. Best-effort: any failure
+    (docker daemon unreachable, slow inspect, parse error) is logged at
+    debug level and the function returns whatever it managed before the
+    failure. Safe to call repeatedly; idempotent.
+
+    Issue #20561 — this is the safety net for SIGKILL / OOM / crashed
+    terminal exits that bypass the ``atexit`` cleanup hook. Without it,
+    even with the cleanup-fix in the prior commit, a hard-killed Hermes
+    process leaves its container behind permanently because there's no
+    subsequent Hermes process scheduled to reuse that exact (task, profile)
+    pair.
+    """
+    docker = docker_exe or find_docker() or "docker"
+    filters = ["--filter", "label=hermes-agent=1", "--filter", "status=exited"]
+    if profile_filter:
+        filters.extend(["--filter", f"label=hermes-profile={_sanitize_label_value(profile_filter)}"])
+
+    try:
+        listing = subprocess.run(
+            [docker, "ps", "-a", *filters, "--format", "{{.ID}}"],
+            capture_output=True, text=True, timeout=15, check=False,
+        )
+    except (subprocess.TimeoutExpired, OSError) as e:
+        logger.debug("orphan reaper docker ps failed: %s", e)
+        return 0
+    if listing.returncode != 0:
+        logger.debug(
+            "orphan reaper docker ps returned %d: %s",
+            listing.returncode, listing.stderr.strip(),
+        )
+        return 0
+
+    candidate_ids = [ln.strip() for ln in listing.stdout.splitlines() if ln.strip()]
+    if not candidate_ids:
+        return 0
+
+    # Inspect each candidate to get FinishedAt; reap only those exited
+    # long enough ago.  Doing this per-container (rather than bulk inspect)
+    # keeps the failure blast radius to one container at a time.
+    import datetime
+    now = datetime.datetime.now(datetime.timezone.utc)
+    removed = 0
+    for cid in candidate_ids:
+        finished_at = _container_finished_at(docker, cid)
+        if finished_at is None:
+            # Couldn't determine age — be conservative and leave it alone.
+            continue
+        age = (now - finished_at).total_seconds()
+        if age < max_age_seconds:
+            continue
+        try:
+            result = subprocess.run(
+                [docker, "rm", "-f", cid],
+                capture_output=True, text=True, timeout=30,
+            )
+            if result.returncode == 0:
+                removed += 1
+                logger.info(
+                    "Reaped orphan container %s (exited %d seconds ago)",
+                    cid[:12], int(age),
+                )
+            else:
+                logger.debug(
+                    "docker rm -f %s failed: %s",
+                    cid[:12], result.stderr.strip(),
+                )
+        except (subprocess.TimeoutExpired, OSError) as e:
+            logger.debug("orphan reaper docker rm %s failed: %s", cid[:12], e)
+    return removed
+
+
+def _container_finished_at(docker_exe: str, container_id: str):
+    """Parse ``docker inspect`` FinishedAt for *container_id*.
+
+    Returns a timezone-aware datetime, or ``None`` if the field is missing,
+    unparseable, or the zero-value ``0001-01-01T00:00:00Z`` Docker emits
+    for never-finished containers. ``None`` means "don't reap" — the caller
+    leaves the container alone.
+    """
+    try:
+        result = subprocess.run(
+            [docker_exe, "inspect", "--format", "{{.State.FinishedAt}}", container_id],
+            capture_output=True, text=True, timeout=10, check=False,
+        )
+    except (subprocess.TimeoutExpired, OSError) as e:
+        logger.debug("orphan reaper docker inspect %s failed: %s", container_id[:12], e)
+        return None
+    if result.returncode != 0:
+        return None
+    raw = result.stdout.strip()
+    if not raw or raw.startswith("0001-01-01"):
+        return None
+    # Docker emits RFC3339 with nanoseconds (e.g. "2026-05-28T13:45:00.123456789Z").
+    # Python's fromisoformat handles microseconds but not nanoseconds; trim.
+    import re as _re
+    raw = _re.sub(r"(\.\d{6})\d+", r"\1", raw)
+    raw = raw.replace("Z", "+00:00")
+    try:
+        import datetime
+        return datetime.datetime.fromisoformat(raw)
+    except ValueError as e:
+        logger.debug("could not parse FinishedAt %r for %s: %s", raw, container_id[:12], e)
+        return None
+
+
 def find_docker() -> Optional[str]:
     """Locate the docker (or podman) CLI binary.
 
@@ -304,15 +466,18 @@ class DockerEnvironment(BaseEnvironment):
         auto_mount_cwd: bool = False,
         run_as_host_user: bool = False,
         extra_args: list = None,
+        persist_across_processes: bool = True,
     ):
         if cwd == "~":
             cwd = "/root"
         super().__init__(cwd=cwd, timeout=timeout)
         self._persistent = persistent_filesystem
+        self._persist_across_processes = persist_across_processes
         self._task_id = task_id
         self._forward_env = _normalize_forward_env_names(forward_env)
         self._env = _normalize_env_dict(env)
         self._container_id: Optional[str] = None
+        self._labels: dict[str, str] = {}
         logger.info(f"DockerEnvironment volumes: {volumes}")
         # Ensure volumes is a list (config.yaml could be malformed)
         if volumes is not None and not isinstance(volumes, list):
@@ -413,6 +578,22 @@ class DockerEnvironment(BaseEnvironment):
             )
 
             for mount_entry in get_credential_file_mounts():
+                src = Path(mount_entry["host_path"])
+                if src.is_dir():
+                    # Docker-in-Docker: Docker auto-created the source path as
+                    # a directory when it didn't exist on the host.  Mounting a
+                    # directory over a file destination causes exit 125.
+                    logger.warning(
+                        "Docker: skipping credential mount — source is a directory "
+                        "(likely Docker-in-Docker auto-creation): %s",
+                        src,
+                    )
+                    continue
+                if not src.is_file():
+                    logger.warning(
+                        "Docker: skipping credential mount — source not found: %s", src,
+                    )
+                    continue
                 volume_args.extend([
                     "-v",
                     f"{mount_entry['host_path']}:{mount_entry['container_path']}:ro",
@@ -426,6 +607,13 @@ class DockerEnvironment(BaseEnvironment):
             # Mount skill directories (local + external) so skill
             # scripts/templates are available inside the container.
             for skills_mount in get_skills_directory_mount():
+                src = Path(skills_mount["host_path"])
+                if not src.is_dir():
+                    logger.warning(
+                        "Docker: skipping skills mount — source is not a directory: %s",
+                        src,
+                    )
+                    continue
                 volume_args.extend([
                     "-v",
                     f"{skills_mount['host_path']}:{skills_mount['container_path']}:ro",
@@ -441,6 +629,13 @@ class DockerEnvironment(BaseEnvironment):
             # cached media from inside the container.  Read-only — the
             # container reads these but the host gateway manages writes.
             for cache_mount in get_cache_directory_mounts():
+                src = Path(cache_mount["host_path"])
+                if not src.is_dir():
+                    logger.warning(
+                        "Docker: skipping cache mount — source is not a directory: %s",
+                        src,
+                    )
+                    continue
                 volume_args.extend([
                     "-v",
                     f"{cache_mount['host_path']}:{cache_mount['container_path']}:ro",
@@ -506,25 +701,88 @@ class DockerEnvironment(BaseEnvironment):
 
         # Start the container directly via `docker run -d`.
         container_name = f"hermes-{uuid.uuid4().hex[:8]}"
-        run_cmd = [
-            self._docker_exe, "run", "-d",
-            "--init",           # tini/catatonit as PID 1 — reaps zombie children
-            "--name", container_name,
-            "-w", cwd,
-            *all_run_args,
-            image,
-            "sleep", "infinity",  # no fixed lifetime — idle reaper handles cleanup
+        # Labels make hermes-created containers identifiable to:
+        #   * the orphan reaper (`hermes-agent=1` for the global sweep filter)
+        #   * future cross-process reuse (`hermes-task-id`, `hermes-profile`)
+        #   * operators running `docker ps --filter label=hermes-agent=1`
+        # Values are limited to the safe character set defined by
+        # _sanitize_label_value(); the active Hermes profile is captured at
+        # container-start time and never changes for the container's lifetime.
+        profile_name = _sanitize_label_value(_get_active_profile_name())
+        task_label = _sanitize_label_value(task_id)
+        label_args = [
+            "--label", "hermes-agent=1",
+            "--label", f"hermes-task-id={task_label}",
+            "--label", f"hermes-profile={profile_name}",
         ]
-        logger.debug(f"Starting container: {' '.join(run_cmd)}")
-        result = subprocess.run(
-            run_cmd,
-            capture_output=True,
-            text=True,
-            timeout=120,  # image pull may take a while
-            check=True,
-        )
-        self._container_id = result.stdout.strip()
-        logger.info(f"Started container {container_name} ({self._container_id[:12]})")
+        self._labels = {
+            "hermes-agent": "1",
+            "hermes-task-id": task_label,
+            "hermes-profile": profile_name,
+        }
+
+        # Cross-process container reuse (issue #20561 — docs claim "ONE long-lived
+        # container shared across sessions").  If a prior Hermes process
+        # already started a container for this (task_id, profile) and it
+        # still exists, attach to it instead of starting a fresh one.  This
+        # restores the documented contract; opt out via
+        # ``terminal.docker_persist_across_processes: false``.
+        #
+        # Reuse matches on labels only — we deliberately do NOT compare image
+        # / mounts / resources.  Operators who need a fresh container after
+        # changing those settings should set ``docker_persist_across_processes:
+        # false`` (or run ``docker rm -f`` against the labeled container) to
+        # force a clean start.
+        reused = False
+        if persist_across_processes:
+            existing = self._find_reusable_container(task_label, profile_name)
+            if existing is not None:
+                container_id, state = existing
+                self._container_id = container_id
+                if state != "running":
+                    try:
+                        subprocess.run(
+                            [self._docker_exe, "start", container_id],
+                            capture_output=True,
+                            text=True,
+                            timeout=30,
+                            check=True,
+                        )
+                    except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e:
+                        logger.warning(
+                            "Failed to start existing container %s (state=%s): "
+                            "%s — falling back to a fresh container.",
+                            container_id[:12], state, e,
+                        )
+                        self._container_id = None
+                if self._container_id:
+                    logger.info(
+                        "Reusing container %s (task=%s, profile=%s, prior state=%s)",
+                        container_id[:12], task_label, profile_name, state,
+                    )
+                    reused = True
+
+        if not reused:
+            run_cmd = [
+                self._docker_exe, "run", "-d",
+                "--init",           # tini/catatonit as PID 1 — reaps zombie children
+                "--name", container_name,
+                *label_args,
+                "-w", cwd,
+                *all_run_args,
+                image,
+                "sleep", "infinity",  # no fixed lifetime — idle reaper handles cleanup
+            ]
+            logger.debug(f"Starting container: {' '.join(run_cmd)}")
+            result = subprocess.run(
+                run_cmd,
+                capture_output=True,
+                text=True,
+                timeout=120,  # image pull may take a while
+                check=True,
+            )
+            self._container_id = result.stdout.strip()
+            logger.info(f"Started container {container_name} ({self._container_id[:12]})")
 
         # Build the init-time env forwarding args (used only by init_session
         # to inject host env vars into the snapshot; subsequent commands get
@@ -629,31 +887,191 @@ class DockerEnvironment(BaseEnvironment):
         logger.debug("Docker --storage-opt support: %s", _storage_opt_ok)
         return _storage_opt_ok
 
-    def cleanup(self):
-        """Stop and remove the container. Bind-mount dirs persist if persistent=True."""
-        if self._container_id:
-            try:
-                # Stop in background so cleanup doesn't block
-                stop_cmd = (
-                    f"(timeout 60 {self._docker_exe} stop {self._container_id} || "
-                    f"{self._docker_exe} rm -f {self._container_id}) >/dev/null 2>&1 &"
-                )
-                subprocess.Popen(stop_cmd, shell=True)
-            except Exception as e:
-                logger.warning("Failed to stop container %s: %s", self._container_id, e)
+    def _find_reusable_container(self, task_label: str, profile_label: str) -> Optional[tuple[str, str]]:
+        """Look for an existing container labeled for this (task, profile).
 
+        Returns ``(container_id, state)`` on hit, ``None`` on miss / on any
+        failure (including ``docker ps`` itself failing). State is one of the
+        values Docker reports via ``{{.State}}`` — e.g. ``running``, ``exited``,
+        ``created``, ``paused``, ``restarting``, ``dead``. The caller decides
+        whether the state warrants ``docker start`` before reuse.
+
+        Restricted to the docker-stored label set this class creates; never
+        matches containers that happened to be named ``hermes-*`` but were
+        started by some other tool.
+        """
+        try:
+            result = subprocess.run(
+                [
+                    self._docker_exe, "ps", "-a",
+                    "--filter", "label=hermes-agent=1",
+                    "--filter", f"label=hermes-task-id={task_label}",
+                    "--filter", f"label=hermes-profile={profile_label}",
+                    "--format", "{{.ID}}\t{{.State}}",
+                ],
+                capture_output=True,
+                text=True,
+                timeout=10,
+                check=False,
+            )
+        except (subprocess.TimeoutExpired, OSError) as e:
+            logger.debug("docker ps probe failed: %s — will start a fresh container", e)
+            return None
+        if result.returncode != 0:
+            logger.debug(
+                "docker ps probe returned %d: %s — will start a fresh container",
+                result.returncode, result.stderr.strip(),
+            )
+            return None
+        lines = [ln.strip() for ln in result.stdout.splitlines() if ln.strip()]
+        if not lines:
+            return None
+        # Multiple matches are unusual (one (task, profile) should produce one
+        # container) but can happen if a previous Hermes process crashed
+        # mid-cleanup. Prefer a running one if present; otherwise pick the
+        # first listed. Stale duplicates get reaped by the orphan-reaper in a
+        # follow-up commit; we don't try to be heroic about them here.
+        running = None
+        first = None
+        for ln in lines:
+            parts = ln.split("\t", 1)
+            if len(parts) != 2:
+                continue
+            cid, state = parts[0], parts[1].lower()
+            if first is None:
+                first = (cid, state)
+            if state == "running" and running is None:
+                running = (cid, state)
+        return running or first
+
+    def cleanup(self, *, force_remove: bool = False):
+        """Tear down the container according to persist mode and *force_remove*.
+
+        Persist-mode (``persist_across_processes=True``, the default) leaves the
+        container **running** untouched. The docs promise "ONE long-lived
+        container shared across sessions" and stopping it on every Hermes exit
+        breaks that promise:
+
+        * Background processes inside the container (``npm run dev``, watchers,
+          long-running pytest) get killed every time the user runs ``/quit``.
+        * Every reuse requires ``docker start`` + waiting for the container to
+          come back up, adding 1–2s to the first tool call of the new session.
+        * The user-visible difference between "ONE long-lived container" and
+          "a new container that happens to share state" is exactly this:
+          processes survive in the former, die in the latter.
+
+        Resource reclamation for the persist-mode case lives in the
+        ``reap_orphan_containers()`` path (see issue #20561 commit 3): if no
+        Hermes process touches a labeled container for ``2 × lifetime_seconds``
+        it gets ``docker rm -f``'d at the next Hermes startup. That covers the
+        SIGKILL / OOM / abandoned-laptop cases without us needing to stop the
+        container on every graceful exit.
+
+        Opt-out mode (``persist_across_processes=False``) still does
+        ``docker stop`` + ``docker rm -f`` on every cleanup, matching the
+        pre-PR behavior for users who explicitly want per-process isolation.
+
+        ``force_remove=True`` overrides persist mode and always tears the
+        container down (``docker stop`` + ``docker rm -f``). This is the
+        explicit-teardown path for ``/reset``, ``cleanup_vm(task_id)``-driven
+        resets, or any caller that wants a guaranteed fresh container on next
+        ``DockerEnvironment(task_id=...)``. No current caller passes
+        ``force_remove=True``; the parameter is here so the explicit-teardown
+        semantics can be wired up later without changing this method's
+        signature.
+
+        Cleanup runs on a daemon thread with bounded ``subprocess.run`` calls
+        (not the racy ``Popen(... &)`` pattern from before PR #33645). The
+        atexit hook in ``tools/terminal_tool.py`` waits up to 15s for the
+        thread to finish before the interpreter exits, so ``docker stop`` /
+        ``docker rm`` actually completes when we do trigger it.
+        """
+        container_id = self._container_id
+        if not container_id:
+            # Still drop the bind-mount dirs if any were allocated and we're
+            # NOT in persist mode (persist mode preserves them).
             if not self._persistent:
-                # Also schedule removal (stop only leaves it as stopped)
-                try:
-                    subprocess.Popen(
-                        f"sleep 3 && {self._docker_exe} rm -f {self._container_id} >/dev/null 2>&1 &",
-                        shell=True,
-                    )
-                except Exception:
-                    pass
-            self._container_id = None
+                for d in (self._workspace_dir, self._home_dir):
+                    if d:
+                        shutil.rmtree(d, ignore_errors=True)
+            return
 
-        if not self._persistent:
+        # Decide what to actually do. Three cases:
+        #
+        #   force_remove=True             → stop + rm (explicit teardown)
+        #   persist_across_processes=True → no-op (leave container running)
+        #   persist_across_processes=False → stop + rm (per-process isolation)
+        #
+        # The persist-mode no-op is the issue-#20561 contract: the container
+        # outlives Hermes processes, processes inside it stay alive, and
+        # reuse on next startup is instant.
+        if force_remove:
+            should_stop = True
+            should_remove = True
+        elif self._persist_across_processes:
+            # No-op for the container. Drop the in-process handle so a fresh
+            # __init__ will re-probe via labels (and find the running
+            # container) instead of trying to reuse a stale Python reference.
+            self._container_id = None
+            return
+        else:
+            should_stop = True
+            should_remove = True
+
+        # Capture state needed by the worker before we null out the attrs —
+        # the worker thread can outlive ``self``.
+        docker_exe = self._docker_exe
+        log_id = container_id[:12]
+
+        def _do_cleanup() -> None:
+            if should_stop:
+                try:
+                    subprocess.run(
+                        [docker_exe, "stop", "-t", "10", container_id],
+                        capture_output=True, timeout=30,
+                    )
+                except (subprocess.TimeoutExpired, OSError) as e:
+                    logger.warning("docker stop %s timed out / failed: %s", log_id, e)
+            if should_remove:
+                try:
+                    subprocess.run(
+                        [docker_exe, "rm", "-f", container_id],
+                        capture_output=True, timeout=30,
+                    )
+                except (subprocess.TimeoutExpired, OSError) as e:
+                    logger.warning("docker rm -f %s failed: %s", log_id, e)
+
+        # Daemon thread: doesn't block interpreter exit (atexit returns
+        # promptly), but unlike the old ``Popen(... &)`` shell trick the
+        # Python-level join semantics let the thread actually run to
+        # completion if the interpreter is still alive. atexit registers
+        # ``_atexit_cleanup`` in terminal_tool.py which waits up to ~60s for
+        # outstanding cleanups, so most exits complete the work cleanly.
+        import threading
+        t = threading.Thread(target=_do_cleanup, daemon=True, name=f"hermes-cleanup-{log_id}")
+        t.start()
+        self._cleanup_thread = t
+        self._container_id = None
+
+        # Bind-mount dir teardown only runs when we actually removed the
+        # container (the dirs are the container's filesystem state; keeping
+        # them around with no container would orphan the data on disk).
+        if should_remove and not self._persistent:
             for d in (self._workspace_dir, self._home_dir):
                 if d:
                     shutil.rmtree(d, ignore_errors=True)
+
+    def wait_for_cleanup(self, timeout: float = 30.0) -> bool:
+        """Block up to *timeout* seconds for the cleanup worker thread.
+
+        Returns ``True`` if the thread finished (or no thread was started),
+        ``False`` on timeout. The atexit hook in terminal_tool.py calls this
+        on every active environment so docker stop/rm actually completes
+        before the Python process exits — without this, ``hermes /quit``
+        races the interpreter shutdown and leaves stopped containers behind.
+        """
+        thread = getattr(self, "_cleanup_thread", None)
+        if thread is None or not thread.is_alive():
+            return True
+        thread.join(timeout=timeout)
+        return not thread.is_alive()
diff --git a/tools/environments/local.py b/tools/environments/local.py
index 81d470f9b63..4cc65d80af5 100644
--- a/tools/environments/local.py
+++ b/tools/environments/local.py
@@ -75,6 +75,27 @@ def _resolve_safe_cwd(cwd: str) -> str:
 # Hermes-internal env vars that should NOT leak into terminal subprocesses.
 _HERMES_PROVIDER_ENV_FORCE_PREFIX = "_HERMES_FORCE_"
 
+# Hermes-managed AWS *inference* credentials for ``auth_type="aws_sdk"``
+# providers (Bedrock).  Scoped DELIBERATELY NARROW: this lists only the
+# Bedrock-specific bearer token, which is a Hermes inference secret exactly
+# analogous to ``OPENAI_API_KEY`` — nobody drives the ``aws``/``terraform``/
+# ``boto3`` toolchain off it, so stripping it from terminal/execute_code
+# subprocesses costs no user capability.
+#
+# The GENERAL AWS credential chain (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY,
+# AWS_SESSION_TOKEN, AWS_PROFILE, and the config/role pointers) is INTENTIONALLY
+# left inheritable.  Per SECURITY.md §3.2 the local terminal is the user's
+# trusted operator shell; the agent having the same general AWS access the
+# user's own shell has is the intended posture, not a leak.  Hard-blocklisting
+# those vars would (a) regress every user who runs aws/terraform/cdk/boto3 in
+# the agent terminal — not just Bedrock users, since the registry is iterated
+# unconditionally — and (b) be unrecoverable, because env_passthrough.py
+# refuses to re-allow anything in this blocklist (GHSA-rhgp-j443-p4rf).  See
+# issue #32314 discussion.
+_AWS_SDK_CREDENTIAL_ENV_VARS = frozenset({
+    "AWS_BEARER_TOKEN_BEDROCK",
+})
+
 
 def _build_provider_env_blocklist() -> frozenset:
     """Derive the blocklist from provider, tool, and gateway config."""
@@ -84,6 +105,8 @@ def _build_provider_env_blocklist() -> frozenset:
         from hermes_cli.auth import PROVIDER_REGISTRY
         for pconfig in PROVIDER_REGISTRY.values():
             blocked.update(pconfig.api_key_env_vars)
+            if pconfig.auth_type == "aws_sdk":
+                blocked.update(_AWS_SDK_CREDENTIAL_ENV_VARS)
             if pconfig.base_url_env_var:
                 blocked.add(pconfig.base_url_env_var)
     except ImportError:
@@ -316,7 +339,7 @@ def _make_run_env(env: dict) -> dict:
     # Inject ContextVar-based session vars into subprocess env.
     # ContextVars don't propagate to child processes, so we bridge them here.
     try:
-        from gateway.session_context import get_session_env, _UNSET, _VAR_MAP
+        from gateway.session_context import _UNSET, _VAR_MAP
         for var_name, var in _VAR_MAP.items():
             value = var.get()
             if value is not _UNSET and value:
diff --git a/tools/file_operations.py b/tools/file_operations.py
index e2f98278e6a..b27405c58d7 100644
--- a/tools/file_operations.py
+++ b/tools/file_operations.py
@@ -37,7 +37,6 @@ from tools.binary_extensions import BINARY_EXTENSIONS
 from agent.file_safety import (
     build_write_denied_paths,
     build_write_denied_prefixes,
-    get_safe_write_root as _shared_get_safe_write_root,
     is_write_denied as _shared_is_write_denied,
 )
 
@@ -114,17 +113,6 @@ def _normalize_line_endings(text: str, target: str) -> str:
     return text
 
 
-def _get_safe_write_root() -> Optional[str]:
-    """Return the resolved HERMES_WRITE_SAFE_ROOT path, or None if unset.
-
-    When set, all write_file/patch operations are constrained to this
-    directory tree.  Writes outside it are denied even if the target is
-    not on the static deny list.  Opt-in hardening for gateway/messaging
-    deployments that should only touch a workspace checkout.
-    """
-    return _shared_get_safe_write_root()
-
-
 def _is_write_denied(path: str) -> bool:
     """Return True if path is on the write deny list."""
     return _shared_is_write_denied(path)
diff --git a/tools/fuzzy_match.py b/tools/fuzzy_match.py
index ef6248494a4..b6991e7a24f 100644
--- a/tools/fuzzy_match.py
+++ b/tools/fuzzy_match.py
@@ -113,8 +113,29 @@ def fuzzy_find_and_replace(content: str, old_string: str, new_string: str,
             # old_string/new_string — e.g. LLM used 2-space indent but the
             # file is 4-space. Shift new_string by the indentation delta so
             # the replacement matches the file's actual indent pattern.
+            # LLMs frequently serialize tabs / carriage returns in JSON
+            # tool-call arguments as the two-character sequences ``\t`` and
+            # ``\r`` (backslash + letter) instead of the real control bytes.
+            # If we write new_string verbatim, the file ends up with literal
+            # backslash sequences where the surrounding code uses real tabs.
+            #
+            # Strategy: only unescape when the matched region of the file
+            # *actually contains* the corresponding real control character.
+            # That mirrors the region-based heuristic in
+            # ``_detect_escape_drift`` and keeps legitimate writes of the
+            # literal two-character string ``"\t"`` (e.g. patching Python
+            # source that contains a tab string literal in source text)
+            # untouched — those files have a backslash+t in the matched
+            # region, not a real tab, so we leave new_string alone.
+            #
+            # ``\n`` is intentionally excluded: newlines serialize correctly
+            # through JSON, and rewriting backslash-n would mangle escape
+            # sequences in source code constants far more often than help.
+            effective_new = _maybe_unescape_new_string(
+                new_string, content, matches,
+            )
             new_content = _apply_replacements(
-                content, matches, new_string,
+                content, matches, effective_new,
                 old_string=old_string if strategy_name != "exact" else None,
             )
             return new_content, len(matches), strategy_name, None
@@ -247,6 +268,42 @@ def _reindent_replacement(file_region: str, old_string: str, new_string: str) ->
     return "\n".join(out_lines)
 
 
+def _maybe_unescape_new_string(new_string: str,
+                               content: str,
+                               matches: List[Tuple[int, int]]) -> str:
+    """Conditionally unescape ``\\t``/``\\r`` in new_string.
+
+    LLMs frequently send the two-character sequences ``\\t`` (backslash + t)
+    and ``\\r`` (backslash + r) inside JSON tool-call arguments where they
+    meant a real tab or carriage-return byte. Writing the string verbatim
+    corrupts tab-indented files with literal backslash-letter pairs.
+
+    The unescape is only applied per-sequence when the *matched region of
+    the file* actually contains the corresponding control character — that
+    is, we only convert ``\\t`` -> tab when the file region we're replacing
+    contains a real tab byte. Files that legitimately contain the literal
+    two-character string ``"\\t"`` (e.g. a Python source line that defines
+    ``sep = "\\t"``) get a backslash+t in the matched region instead of a
+    tab, so we leave new_string alone.
+
+    ``\\n`` is intentionally excluded: newlines serialize correctly through
+    JSON and rewriting backslash-n would corrupt escape sequences in
+    string literals far more often than it would help.
+    """
+    # Cheap pre-check — bail out unless new_string actually contains one of
+    # the suspect sequences. Keeps the common case free.
+    if "\\t" not in new_string and "\\r" not in new_string:
+        return new_string
+
+    matched_regions = "".join(content[start:end] for start, end in matches)
+    out = new_string
+    if "\\t" in out and "\t" in matched_regions:
+        out = out.replace("\\t", "\t")
+    if "\\r" in out and "\r" in matched_regions:
+        out = out.replace("\\r", "\r")
+    return out
+
+
 def _apply_replacements(content: str, matches: List[Tuple[int, int]],
                         new_string: str, old_string: Optional[str] = None) -> str:
     """
diff --git a/tools/image_generation_tool.py b/tools/image_generation_tool.py
index 584f5e9fa1c..d3263eae8ad 100644
--- a/tools/image_generation_tool.py
+++ b/tools/image_generation_tool.py
@@ -66,6 +66,7 @@ from tools.managed_tool_gateway import resolve_managed_tool_gateway
 from tools.tool_backend_helpers import (
     fal_key_is_configured,
     managed_nous_tools_enabled,
+    nous_tool_gateway_unavailable_message,
     prefers_gateway,
 )
 
@@ -317,6 +318,54 @@ FAL_MODELS: Dict[str, Dict[str, Any]] = {
         },
         "upscale": False,
     },
+    # Krea 2 — Krea's first foundation image model, day-0 partner launch on
+    # fal (2026-05-27). Same model family as our direct ``plugins/image_gen/krea``
+    # backend, exposed here for users who prefer to bill through their
+    # existing FAL key / Nous Portal subscription rather than register
+    # directly with Krea.  Both variants share the same parameter schema —
+    # only model id, price, and recommended use case differ.
+    "fal-ai/krea/v2/medium/text-to-image": {
+        "display": "Krea 2 Medium",
+        "speed": "~15-25s",
+        "strengths": "Illustration, anime, painting, expressive/artistic styles",
+        "price": "$0.030 (text) / $0.035 (style refs)",
+        "size_style": "aspect_ratio",
+        # Krea natively accepts 1:1, 4:3, 3:2, 16:9, 2.35:1, 4:5, 2:3, 9:16 —
+        # we map our 3 abstract ratios to the closest match.
+        "sizes": {
+            "landscape": "16:9",
+            "square": "1:1",
+            "portrait": "9:16",
+        },
+        "defaults": {
+            "creativity": "medium",
+        },
+        "supports": {
+            "prompt", "aspect_ratio", "creativity", "seed",
+            "image_style_references",
+        },
+        "upscale": False,
+    },
+    "fal-ai/krea/v2/large/text-to-image": {
+        "display": "Krea 2 Large",
+        "speed": "~25-60s",
+        "strengths": "Photorealism, raw textured looks (motion blur, grain, film)",
+        "price": "$0.060 (text) / $0.065 (style refs)",
+        "size_style": "aspect_ratio",
+        "sizes": {
+            "landscape": "16:9",
+            "square": "1:1",
+            "portrait": "9:16",
+        },
+        "defaults": {
+            "creativity": "medium",
+        },
+        "supports": {
+            "prompt", "aspect_ratio", "creativity", "seed",
+            "image_style_references",
+        },
+        "upscale": False,
+    },
 }
 
 # Default model is the fastest reasonable option. Kept cheap and sub-1s.
@@ -404,12 +453,22 @@ def _submit_fal_request(model: str, arguments: Dict[str, Any]):
         # of a raw HTTP error from httpx.
         status = _extract_http_status(exc)
         if status is not None and 400 <= status < 500:
+            gateway_message = ""
+            if status in {401, 402, 403}:
+                gateway_message = (
+                    "\n\n"
+                    + nous_tool_gateway_unavailable_message(
+                        "managed FAL image generation",
+                        force_fresh=True,
+                    )
+                )
             raise ValueError(
                 f"Nous Subscription gateway rejected model '{model}' "
                 f"(HTTP {status}). This model may not yet be enabled on "
                 f"the Nous Portal's FAL proxy. Either:\n"
                 f"  • Set FAL_KEY in your environment to use FAL.ai directly, or\n"
                 f"  • Pick a different model via `hermes tools` → Image Generation."
+                f"{gateway_message}"
             ) from exc
         raise
 
@@ -719,6 +778,11 @@ def _build_no_backend_setup_message() -> str:
         )
     else:
         lines.append("  - FAL_KEY environment variable is not set")
+        gateway_message = nous_tool_gateway_unavailable_message(
+            "managed FAL image generation",
+        )
+        if gateway_message:
+            lines.append(f"  - {gateway_message}")
     lines.append("")
     lines.append("To enable image generation, do one of:")
     lines.append(
diff --git a/tools/kanban_tools.py b/tools/kanban_tools.py
index 29b5618e681..d3493f0f8e3 100644
--- a/tools/kanban_tools.py
+++ b/tools/kanban_tools.py
@@ -176,6 +176,90 @@ def _connect(board: Optional[str] = None):
     return kb, kb.connect(board=board)
 
 
+# ---------------------------------------------------------------------------
+# Runtime-activity → board-heartbeat bridge (#31752)
+# ---------------------------------------------------------------------------
+# When the agent ticks ``_touch_activity`` during normal work (between
+# tool calls, mid-stream chunks, etc.), we want the kanban board's
+# ``last_heartbeat_at`` columns to reflect that liveness so the dispatcher
+# watchdog (which reads ``tasks.last_heartbeat_at``, not the agent's
+# in-process timestamp) doesn't reclaim an actively-running worker as
+# stale. The model is not required to call the explicit ``kanban_heartbeat``
+# tool for this to work — that tool stays available for workers that want
+# to attach a note or pre-emptively extend a claim across a known-long op.
+#
+# Constraints:
+#   - Best-effort: never raise. The agent loop must not care if the bridge
+#     fails (board missing, DB locked, etc.).
+#   - Rate-limited to one DB write per 60s per-process; runtime activity
+#     can tick on every chunk/tool result and we don't need that resolution.
+#   - No-op outside dispatcher-spawned worker context (no ``HERMES_KANBAN_TASK``).
+#   - No durable note on these auto-heartbeats; that's reserved for the
+#     explicit tool which carries a model-supplied note.
+
+_AUTO_HEARTBEAT_MIN_INTERVAL_SECONDS = 60.0
+_auto_heartbeat_last_attempt: float = 0.0
+
+
+def heartbeat_current_worker_from_env() -> bool:
+    """Best-effort: extend the kanban claim + bump board heartbeat for the
+    current dispatcher-spawned worker, using identity from env vars.
+
+    Returns True if a write was attempted (whether or not it succeeded);
+    False if the call was skipped (not a kanban worker, rate-limited, or
+    swallowed exception). The boolean is informational — callers should
+    not branch on it.
+
+    Identity comes from:
+      * ``HERMES_KANBAN_TASK`` — task id (required; absence means no-op)
+      * ``HERMES_KANBAN_RUN_ID`` — pins the run row so we don't heartbeat
+        a stale run that may have already been reclaimed
+      * ``HERMES_KANBAN_CLAIM_LOCK`` — claim lock for ``heartbeat_claim``;
+        falls back to the default ``_claimer_id()`` for locally-driven
+        workers that never went through the dispatcher path
+
+    Rate-limited via the module-level ``_auto_heartbeat_last_attempt``
+    timestamp (monotonic clock); not thread-safe in the strict sense, but
+    the worst case is one extra DB write per race, which is harmless.
+    """
+    global _auto_heartbeat_last_attempt
+    tid = os.environ.get("HERMES_KANBAN_TASK")
+    if not tid:
+        return False
+    import time as _time
+    now = _time.monotonic()
+    if (now - _auto_heartbeat_last_attempt) < _AUTO_HEARTBEAT_MIN_INTERVAL_SECONDS:
+        return False
+    _auto_heartbeat_last_attempt = now
+    try:
+        kb, conn = _connect()
+        try:
+            claim_lock = os.environ.get("HERMES_KANBAN_CLAIM_LOCK")
+            try:
+                kb.heartbeat_claim(conn, tid, claimer=claim_lock)
+            except Exception:
+                logger.debug("auto-heartbeat: heartbeat_claim failed", exc_info=True)
+            run_id_raw = os.environ.get("HERMES_KANBAN_RUN_ID")
+            run_id: Optional[int]
+            try:
+                run_id = int(run_id_raw) if run_id_raw else None
+            except (TypeError, ValueError):
+                run_id = None
+            try:
+                kb.heartbeat_worker(conn, tid, note=None, expected_run_id=run_id)
+            except Exception:
+                logger.debug("auto-heartbeat: heartbeat_worker failed", exc_info=True)
+        finally:
+            try:
+                conn.close()
+            except Exception:
+                pass
+        return True
+    except Exception:
+        logger.debug("auto-heartbeat: bridge failed", exc_info=True)
+        return False
+
+
 def _ok(**fields: Any) -> str:
     return json.dumps({"ok": True, **fields})
 
diff --git a/tools/lazy_deps.py b/tools/lazy_deps.py
index 393397349d8..a0926a435c7 100644
--- a/tools/lazy_deps.py
+++ b/tools/lazy_deps.py
@@ -97,15 +97,16 @@ LAZY_DEPS: dict[str, tuple[str, ...]] = {
     # (see comment at top of [project.dependencies]). When bumping, update
     # both this map AND the corresponding extra in pyproject.toml.
     #
-    # NOTE: tts.mistral / stt.mistral entries are intentionally absent —
-    # the `mistralai` PyPI project is quarantined as of 2026-05-12 (Mini
-    # Shai-Hulud worm). Re-add when PyPI restores a clean release; see
-    # comment in pyproject.toml above the (removed) `mistral` extra for
-    # the full restoration checklist.
+    # mistralai pin tracks the `mistral` extra in pyproject.toml. PyPI
+    # quarantined the project 2026-05-12 (malicious 2.4.6, Mini Shai-Hulud);
+    # 2.4.6 was removed and clean releases resumed (2.4.7, 2.4.8). Voxtral
+    # STT + TTS share the same SDK.
+    "tts.mistral": ("mistralai==2.4.8",),
     "tts.edge": ("edge-tts==7.2.7",),
     "tts.elevenlabs": ("elevenlabs==1.59.0",),
 
     # ─── Speech-to-text providers ──────────────────────────────────────────
+    "stt.mistral": ("mistralai==2.4.8",),
     "stt.faster_whisper": (
         "faster-whisper==1.2.1",
         "sounddevice==0.5.5",
diff --git a/tools/mcp_tool.py b/tools/mcp_tool.py
index a42031463dd..593994caa09 100644
--- a/tools/mcp_tool.py
+++ b/tools/mcp_tool.py
@@ -422,6 +422,17 @@ def _resolve_stdio_command(command: str, env: dict) -> tuple[str, dict]:
             candidates = [
                 os.path.join(hermes_home, "node", "bin", resolved_command),
                 os.path.join(os.path.expanduser("~"), ".local", "bin", resolved_command),
+                # /usr/local/bin is the canonical install location for Node on
+                # Linux from-source builds, the upstream node:bookworm-slim
+                # image (which the Hermes Docker image copies node + npm +
+                # corepack from since #4977), and macOS Homebrew on Intel.
+                # Without this candidate, any MCP server configured with an
+                # env.PATH that omits /usr/local/bin (a common pattern when
+                # users hand-author PATH for sandboxing) fails with ENOENT
+                # at execvp, and a naive symlink workaround into the user's
+                # PATH only fails one layer deeper because npx's shebang
+                # re-execs /usr/bin/env node which needs the same directory.
+                os.path.join(os.sep, "usr", "local", "bin", resolved_command),
             ]
             for candidate in candidates:
                 if os.path.isfile(candidate) and os.access(candidate, os.X_OK):
@@ -559,6 +570,78 @@ def _validate_remote_mcp_url(server_name: str, url: Any) -> str:
     return stripped
 
 
+def _resolve_client_cert(server_name: str, config: dict):
+    """Resolve the ``client_cert`` / ``client_key`` config for mTLS.
+
+    Returns whatever ``httpx``'s ``cert=`` parameter accepts, or ``None`` when
+    no client certificate is configured:
+
+      - ``None`` if neither ``client_cert`` nor ``client_key`` is set.
+      - A single absolute path string if ``client_cert`` is a string and
+        ``client_key`` is unset (PEM file with cert + key combined).
+      - A ``(cert_path, key_path)`` tuple when both are set, or when
+        ``client_cert`` is a 2-element list/tuple.
+      - A ``(cert_path, key_path, password)`` tuple when ``client_cert`` is
+        a 3-element list/tuple — the third element is the key passphrase.
+
+    User paths support ``~`` expansion. Missing files raise ``FileNotFoundError``
+    with a server-scoped message so the failure surfaces as a clear setup
+    error rather than an opaque TLS handshake error.
+    """
+    raw_cert = config.get("client_cert")
+    raw_key = config.get("client_key")
+
+    if raw_cert is None and raw_key is None:
+        return None
+
+    def _expand(path: Any, label: str) -> str:
+        if not isinstance(path, str) or not path.strip():
+            raise ValueError(
+                f"MCP server '{server_name}': {label} must be a non-empty "
+                f"string path (got {type(path).__name__})"
+            )
+        expanded = os.path.expanduser(path.strip())
+        if not os.path.isfile(expanded):
+            raise FileNotFoundError(
+                f"MCP server '{server_name}': {label} not found at "
+                f"{expanded!r}"
+            )
+        return expanded
+
+    # Tuple/list form for client_cert — (cert, key) or (cert, key, password).
+    if isinstance(raw_cert, (list, tuple)):
+        if raw_key is not None:
+            raise ValueError(
+                f"MCP server '{server_name}': specify either client_cert as "
+                f"a list [cert, key] OR client_cert + client_key, not both"
+            )
+        if len(raw_cert) == 2:
+            cert_path = _expand(raw_cert[0], "client_cert[0]")
+            key_path = _expand(raw_cert[1], "client_cert[1]")
+            return (cert_path, key_path)
+        if len(raw_cert) == 3:
+            cert_path = _expand(raw_cert[0], "client_cert[0]")
+            key_path = _expand(raw_cert[1], "client_cert[1]")
+            password = raw_cert[2]
+            if not isinstance(password, str):
+                raise ValueError(
+                    f"MCP server '{server_name}': client_cert[2] (key "
+                    f"passphrase) must be a string"
+                )
+            return (cert_path, key_path, password)
+        raise ValueError(
+            f"MCP server '{server_name}': client_cert list form must have 2 "
+            f"or 3 elements (got {len(raw_cert)})"
+        )
+
+    # String form for client_cert.
+    cert_path = _expand(raw_cert, "client_cert")
+    if raw_key is not None:
+        key_path = _expand(raw_key, "client_key")
+        return (cert_path, key_path)
+    # Single combined PEM file (cert + key in one file).
+    return cert_path
+
 
 def _format_connect_error(exc: BaseException) -> str:
     """Render nested MCP connection errors into an actionable short message."""
@@ -1363,6 +1446,7 @@ class MCPServerTask:
             headers["mcp-protocol-version"] = LATEST_PROTOCOL_VERSION
         connect_timeout = config.get("connect_timeout", _DEFAULT_CONNECT_TIMEOUT)
         ssl_verify = config.get("ssl_verify", True)
+        client_cert = _resolve_client_cert(self.name, config)
 
         # OAuth 2.1 PKCE: route through the central MCPOAuthManager so the
         # same provider instance is reused across reconnects, pre-flow
@@ -1414,6 +1498,37 @@ class MCPServerTask:
                 # behind OAuth 2.1 PKCE work. Previously built but never
                 # forwarded — SSE OAuth would silently fail with 401s.
                 _sse_kwargs["auth"] = _oauth_auth
+            if client_cert is not None or ssl_verify is not True:
+                # SSE transport doesn't expose verify/cert as kwargs, so route
+                # them through an httpx_client_factory that wraps the SDK's
+                # defaults (follow_redirects=True) and adds our TLS settings.
+                # The SDK calls the factory with (headers, auth, timeout); we
+                # forward all of those and layer verify/cert on top.
+                import httpx as _httpx_mod
+
+                _cert_for_factory = client_cert
+                _verify_for_factory = ssl_verify
+
+                def _mcp_http_client_factory(
+                    headers=None, timeout=None, auth=None,
+                ):
+                    kwargs: dict = {
+                        "follow_redirects": True,
+                        "verify": _verify_for_factory,
+                    }
+                    if timeout is not None:
+                        kwargs["timeout"] = timeout
+                    else:
+                        kwargs["timeout"] = _httpx_mod.Timeout(30.0, read=300.0)
+                    if headers is not None:
+                        kwargs["headers"] = headers
+                    if auth is not None:
+                        kwargs["auth"] = auth
+                    if _cert_for_factory is not None:
+                        kwargs["cert"] = _cert_for_factory
+                    return _httpx_mod.AsyncClient(**kwargs)
+
+                _sse_kwargs["httpx_client_factory"] = _mcp_http_client_factory
             async with sse_client(**_sse_kwargs) as (read_stream, write_stream):
                 async with ClientSession(
                     read_stream, write_stream, **sampling_kwargs
@@ -1457,6 +1572,8 @@ class MCPServerTask:
                 client_kwargs["headers"] = headers
             if _oauth_auth is not None:
                 client_kwargs["auth"] = _oauth_auth
+            if client_cert is not None:
+                client_kwargs["cert"] = client_cert
 
             # Caller owns the client lifecycle — the SDK skips cleanup when
             # http_client is provided, so we wrap in async-with.
@@ -1536,6 +1653,21 @@ class MCPServerTask:
                 "this warning.",
                 self.name,
             )
+
+        # Validate remote URL once, up front.  Raising here (rather than
+        # letting it blow up inside the SDK's httpx layer on every retry)
+        # means a typo in config.yaml fails fast with a clear error — and
+        # critically, no reconnect-backoff burn.  (Ported from
+        # anomalyco/opencode#25019.)
+        if self._is_http():
+            try:
+                _validate_remote_mcp_url(self.name, config.get("url"))
+            except InvalidMcpUrlError as exc:
+                logger.warning("%s", exc)
+                self._error = exc
+                self._ready.set()
+                return
+
         retries = 0
         initial_retries = 0
         backoff = 1.0
@@ -3234,7 +3366,7 @@ def register_mcp_servers(servers: Dict[str, dict]) -> List[str]:
             return_exceptions=True,
         )
         for name, result in zip(server_names, results):
-            if isinstance(result, Exception):
+            if isinstance(result, BaseException):
                 command = new_servers.get(name, {}).get("command")
                 logger.warning(
                     "Failed to connect to MCP server '%s'%s: %s",
diff --git a/tools/memory_tool.py b/tools/memory_tool.py
index 5b9af55928e..281c806ea09 100644
--- a/tools/memory_tool.py
+++ b/tools/memory_tool.py
@@ -26,7 +26,6 @@ Design:
 import json
 import logging
 import os
-import re
 import tempfile
 import time
 from contextlib import contextmanager
diff --git a/tools/send_message_tool.py b/tools/send_message_tool.py
index 4494fbd0cf9..9ea0b9af41b 100644
--- a/tools/send_message_tool.py
+++ b/tools/send_message_tool.py
@@ -13,7 +13,6 @@ import re
 import ssl
 import time
 from email.utils import formatdate
-from typing import Dict, Optional
 
 from agent.redact import redact_sensitive_text
 
@@ -139,7 +138,7 @@ SEND_MESSAGE_SCHEMA = {
             },
             "message": {
                 "type": "string",
-                "description": "The message text to send. To send an image or file, include MEDIA:<local_path> for a file under a Hermes media cache or HERMES_MEDIA_ALLOW_DIRS — the platform will deliver it as a native media attachment."
+                "description": "The message text to send. To send an image or file, include MEDIA:<local_path> (e.g. 'MEDIA:/tmp/report.pdf') in the message — the platform will deliver it as a native media attachment."
             }
         },
         "required": []
@@ -1270,7 +1269,6 @@ async def _send_email(extra, chat_id, message):
     """Send via SMTP (one-shot, no persistent connection needed)."""
     import smtplib
     from email.mime.text import MIMEText
-    from email.utils import formatdate
 
     address = extra.get("address") or os.getenv("EMAIL_ADDRESS", "")
     password = os.getenv("EMAIL_PASSWORD", "")
diff --git a/tools/skill_usage.py b/tools/skill_usage.py
index 52a6d74dbac..745b68ead8c 100644
--- a/tools/skill_usage.py
+++ b/tools/skill_usage.py
@@ -31,7 +31,7 @@ import tempfile
 from contextlib import contextmanager
 from datetime import datetime, timezone
 from pathlib import Path
-from typing import Any, Dict, Iterable, List, Optional, Set, Tuple
+from typing import Any, Dict, List, Optional, Set, Tuple
 
 from hermes_constants import get_hermes_home
 from agent.skill_utils import is_excluded_skill_path
diff --git a/tools/skills_guard.py b/tools/skills_guard.py
index 31949d7731d..4545c918d4d 100644
--- a/tools/skills_guard.py
+++ b/tools/skills_guard.py
@@ -36,7 +36,16 @@ from typing import List, Tuple
 # Hardcoded trust configuration
 # ---------------------------------------------------------------------------
 
-TRUSTED_REPOS = {"openai/skills", "anthropics/skills", "huggingface/skills"}
+TRUSTED_REPOS = {
+    "openai/skills",
+    "anthropics/skills",
+    "huggingface/skills",
+    # NVIDIA-verified skills: each entry ships a signed `skill.oms.sig`
+    # and a governance `skill-card.md` (sync pipeline drops anything
+    # missing the signature or card). Catalog details:
+    # https://github.com/NVIDIA/skills
+    "NVIDIA/skills",
+}
 
 INSTALL_POLICY = {
     #                  safe      caution    dangerous
diff --git a/tools/skills_hub.py b/tools/skills_hub.py
index 01b53b68691..c2a22eef3fe 100644
--- a/tools/skills_hub.py
+++ b/tools/skills_hub.py
@@ -401,6 +401,14 @@ class GitHubSource(SkillSource):
         {"repo": "openai/skills", "path": "skills/.system/"},
         {"repo": "anthropics/skills", "path": "skills/"},
         {"repo": "huggingface/skills", "path": "skills/"},
+        # NVIDIA/skills: NVIDIA-verified skills for CUDA-X, AIQ, cuOpt,
+        # cuPyNumeric, DeepStream, NeMo, NemoClaw, etc. Each skill ships
+        # alongside a signed `skill.oms.sig`, an OMS-signed `skill-card.md`
+        # (governance card), and an `evals/` directory — synced daily from
+        # the NVIDIA product repos. Treated as `trusted` (see
+        # `tools/skills_guard.py::TRUSTED_REPOS`). Sample layout:
+        # https://github.com/NVIDIA/skills/tree/main/skills
+        {"repo": "NVIDIA/skills", "path": "skills/"},
         {"repo": "garrytan/gstack", "path": ""},
     ]
 
@@ -412,6 +420,10 @@ class GitHubSource(SkillSource):
         # Per-instance cache: repo -> (default_branch, tree_entries)
         # Survives within a single search/install flow, avoiding redundant API calls.
         self._tree_cache: Dict[str, Tuple[str, List[dict]]] = {}
+        # Per-repo cache of the optional skills.sh.json grouping sidecar,
+        # mapping skill_name -> human-readable grouping title. ``None`` means
+        # "fetched, no sidecar"; a missing key means "not fetched yet".
+        self._skillsh_groupings: Dict[str, Optional[Dict[str, str]]] = {}
         # Set when GitHub returns 403 with rate limit exhausted
         self._rate_limited: bool = False
 
@@ -550,6 +562,7 @@ class GitHubSource(SkillSource):
             return []
 
         skills: List[SkillMeta] = []
+        groupings = self._get_skillsh_groupings(repo)
         for entry in entries:
             if entry.get("type") != "dir":
                 continue
@@ -562,6 +575,10 @@ class GitHubSource(SkillSource):
             skill_identifier = f"{repo}/{prefix}/{dir_name}" if prefix else f"{repo}/{dir_name}"
             meta = self.inspect(skill_identifier)
             if meta:
+                if groupings:
+                    category = groupings.get(meta.name) or groupings.get(dir_name)
+                    if category:
+                        meta.extra["category"] = category
                 skills.append(meta)
 
         # Cache the results
@@ -764,6 +781,61 @@ class GitHubSource(SkillSource):
             logger.debug("GitHub contents API fetch failed: %s", e)
         return None
 
+    def _get_skillsh_groupings(self, repo: str) -> Optional[Dict[str, str]]:
+        """Fetch and parse the repo-root ``skills.sh.json`` grouping sidecar.
+
+        ``skills.sh.json`` is a published cross-ecosystem standard
+        (``$schema: https://skills.sh/schemas/skills.sh.schema.json``) that
+        lets a tap declare human-readable category groupings for its skills:
+
+            {"groupings": [{"title": "Inference AI", "skills": ["dynamo-..."]}]}
+
+        We flatten it into ``{skill_name: grouping_title}`` so the Skills Hub
+        UI can show a real category pill instead of a tag-derived guess. Any
+        tap that ships this file gets categorization for free — this is not
+        NVIDIA-specific.
+
+        Returns the map (possibly empty) on success, or ``None`` when the repo
+        has no sidecar / it couldn't be parsed. Cached per-repo on the instance.
+        """
+        if repo in self._skillsh_groupings:
+            return self._skillsh_groupings[repo]
+
+        content = self._fetch_file_content(repo, "skills.sh.json")
+        groupings = self._parse_skillsh_groupings(content) if content else None
+        self._skillsh_groupings[repo] = groupings
+        return groupings
+
+    @staticmethod
+    def _parse_skillsh_groupings(content: str) -> Optional[Dict[str, str]]:
+        """Flatten a ``skills.sh.json`` document into ``{skill_name: title}``.
+
+        Returns ``None`` when the content isn't a usable grouping document.
+        """
+        try:
+            data = json.loads(content)
+        except (json.JSONDecodeError, TypeError):
+            return None
+        if not isinstance(data, dict):
+            return None
+        groupings = data.get("groupings")
+        if not isinstance(groupings, list):
+            return None
+
+        mapping: Dict[str, str] = {}
+        for group in groupings:
+            if not isinstance(group, dict):
+                continue
+            title = group.get("title")
+            members = group.get("skills")
+            if not isinstance(title, str) or not isinstance(members, list):
+                continue
+            for member in members:
+                if isinstance(member, str) and member:
+                    # First grouping wins if a skill is listed twice.
+                    mapping.setdefault(member, title)
+        return mapping
+
     def _read_cache(self, key: str) -> Optional[list]:
         """Read cached index if not expired."""
         cache_file = INDEX_CACHE_DIR / f"{key}.json"
@@ -797,6 +869,7 @@ class GitHubSource(SkillSource):
             "repo": meta.repo,
             "path": meta.path,
             "tags": meta.tags,
+            "extra": meta.extra,
         }
 
     @staticmethod
@@ -1217,6 +1290,16 @@ class SkillsShSource(SkillSource):
 
     BASE_URL = "https://skills.sh"
     SEARCH_URL = f"{BASE_URL}/api/search"
+    # Sitemap index — the real catalog source. The homepage scrape only
+    # exposes a curated featured strip (~200 entries); the sitemap covers
+    # the full ~20k+ catalog. https://www.skills.sh/sitemap.xml points at
+    # sitemap-skills-1.xml + sitemap-skills-2.xml, each up to 10k URLs.
+    SITEMAP_INDEX_URL = "https://www.skills.sh/sitemap.xml"
+    _SITEMAP_LOC_RE = re.compile(r"<loc>([^<]+)</loc>", re.IGNORECASE)
+    _SITEMAP_SKILL_RE = re.compile(
+        r"^https?://(?:www\.)?skills\.sh/(?P<owner>[^/]+)/(?P<repo>[^/]+)/(?P<skill>[^/]+)/?$",
+        re.IGNORECASE,
+    )
     _SKILL_LINK_RE = re.compile(r'href=["\']/(?P<id>(?!agents/|_next/|api/)[^"\'/]+/[^"\'/]+/[^"\'/]+)["\']')
     _INSTALL_CMD_RE = re.compile(
         r'npx\s+skills\s+add\s+(?P<repo>https?://github\.com/[^\s<]+|[^\s<]+)'
@@ -1246,7 +1329,10 @@ class SkillsShSource(SkillSource):
 
     def search(self, query: str, limit: int = 10) -> List[SkillMeta]:
         if not query.strip():
-            return self._featured_skills(limit)
+            # Empty query = bulk catalog dump (what build_skills_index.py
+            # calls with). The homepage scrape only sees ~200 featured
+            # entries; the sitemap walks the full ~20k+ catalog.
+            return self._sitemap_catalog(limit)
 
         cache_key = f"skills_sh_search_{hashlib.md5(f'{query}|{limit}'.encode()).hexdigest()}"
         cached = _read_index_cache(cache_key)
@@ -1307,6 +1393,97 @@ class SkillsShSource(SkillSource):
             return self._finalize_inspect_meta(meta, canonical, detail)
         return None
 
+    def _sitemap_catalog(self, limit: int) -> List[SkillMeta]:
+        """Walk the skills.sh sitemap to enumerate the full catalog.
+
+        Cached for the standard index TTL so we don't refetch ~2 MB of
+        sitemap XML per build. Falls back to ``_featured_skills`` if the
+        sitemap is unreachable or empty (network failure, hostname
+        change, etc.).
+        """
+        cache_key = "skills_sh_sitemap_v1"
+        cached = _read_index_cache(cache_key)
+        if cached is not None:
+            metas = [SkillMeta(**item) for item in cached]
+            return metas[:limit] if limit > 0 else metas
+
+        # skills.sh serves the per-skill sitemaps brotli-compressed, and
+        # httpx's optional brotlicffi backend has a streaming-decode bug
+        # that fails on these specific payloads. Excluding "br" from
+        # Accept-Encoding makes the server fall back to gzip (or
+        # identity), which works on every httpx install.
+        sitemap_headers = {"Accept-Encoding": "gzip"}
+
+        # Step 1: fetch the sitemap index → list of skill-sitemap URLs.
+        skill_sitemap_urls: List[str] = []
+        try:
+            resp = httpx.get(
+                self.SITEMAP_INDEX_URL,
+                timeout=20,
+                follow_redirects=True,
+                headers=sitemap_headers,
+            )
+            if resp.status_code != 200:
+                return self._featured_skills(limit)
+            for match in self._SITEMAP_LOC_RE.finditer(resp.text):
+                loc = match.group(1).strip()
+                # Sitemap index entries that point at the per-skill maps.
+                if "sitemap-skills" in loc:
+                    skill_sitemap_urls.append(loc)
+        except httpx.HTTPError:
+            return self._featured_skills(limit)
+
+        if not skill_sitemap_urls:
+            return self._featured_skills(limit)
+
+        # Step 2: fetch each skill sitemap and collect canonical "owner/repo/skill" IDs.
+        seen: set[str] = set()
+        results: List[SkillMeta] = []
+        for sitemap_url in skill_sitemap_urls:
+            try:
+                resp = httpx.get(
+                    sitemap_url,
+                    timeout=30,
+                    follow_redirects=True,
+                    headers=sitemap_headers,
+                )
+                if resp.status_code != 200:
+                    continue
+            except httpx.HTTPError:
+                continue
+            for loc_match in self._SITEMAP_LOC_RE.finditer(resp.text):
+                url = loc_match.group(1).strip()
+                m = self._SITEMAP_SKILL_RE.match(url)
+                if not m:
+                    continue
+                owner = m.group("owner")
+                repo_name = m.group("repo")
+                skill_name = m.group("skill")
+                canonical = f"{owner}/{repo_name}/{skill_name}"
+                if canonical in seen:
+                    continue
+                seen.add(canonical)
+                repo = f"{owner}/{repo_name}"
+                results.append(SkillMeta(
+                    name=skill_name,
+                    description=f"Indexed by skills.sh from {repo}",
+                    source="skills.sh",
+                    identifier=self._wrap_identifier(canonical),
+                    trust_level=self.github.trust_level_for(canonical),
+                    repo=repo,
+                    path=skill_name,
+                    extra={
+                        "detail_url": f"{self.BASE_URL}/{canonical}",
+                        "repo_url": f"https://github.com/{repo}",
+                    },
+                ))
+
+        if not results:
+            return self._featured_skills(limit)
+
+        _write_index_cache(cache_key, [_skill_meta_to_dict(item) for item in results])
+        return results[:limit] if limit > 0 else results
+
     def _featured_skills(self, limit: int) -> List[SkillMeta]:
         cache_key = "skills_sh_featured"
         cached = _read_index_cache(cache_key)
@@ -1859,8 +2036,18 @@ class ClawHubSource(SkillSource):
             results = self._search_catalog(query, limit=limit)
             if results:
                 return results
+        else:
+            # Empty query: route through the paginating catalog walker so the
+            # full ClawHub catalog (20k+ skills) lands in the index. The
+            # single-request listing path below caps at one page (200 items)
+            # regardless of `limit`, which silently truncates the public
+            # skills index. The catalog walker follows `nextCursor`.
+            catalog = self._load_catalog_index()
+            if catalog:
+                return self._dedupe_results(catalog)[:limit] if limit > 0 else self._dedupe_results(catalog)
 
-        # Empty query or catalog fallback failure: use the lightweight listing API.
+        # Non-empty query catalog miss, or catalog walker failure: fall back to
+        # the lightweight listing API for a best-effort response.
         cache_key = f"clawhub_search_listing_v1_{hashlib.md5(query.encode()).hexdigest()}_{limit}"
         cached = _read_index_cache(cache_key)
         if cached is not None:
@@ -1989,7 +2176,12 @@ class ClawHubSource(SkillSource):
         cursor: Optional[str] = None
         results: List[SkillMeta] = []
         seen: set[str] = set()
-        max_pages = 50
+        # ClawHub has 50k+ skills as of May 2026 (live E2E walked 49,698 with
+        # an active cursor still pending); 750 pages * 200/page = 150k ceiling
+        # leaves room for catalog growth. Walk-to-exhaustion typically
+        # terminates well before this on `nextCursor` going None — the cap is
+        # a safety rail against an infinite-cursor loop.
+        max_pages = 750
 
         for _ in range(max_pages):
             params: Dict[str, Any] = {"limit": 200}
diff --git a/tools/skills_tool.py b/tools/skills_tool.py
index 054be4cae3d..bc19ff8b5ce 100644
--- a/tools/skills_tool.py
+++ b/tools/skills_tool.py
@@ -629,49 +629,6 @@ def _sort_skills(skills: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
     return sorted(skills, key=lambda s: (s.get("category") or "", s["name"]))
 
 
-def _load_category_description(category_dir: Path) -> Optional[str]:
-    """
-    Load category description from DESCRIPTION.md if it exists.
-
-    Args:
-        category_dir: Path to the category directory
-
-    Returns:
-        Description string or None if not found
-    """
-    desc_file = category_dir / "DESCRIPTION.md"
-    if not desc_file.exists():
-        return None
-
-    try:
-        content = desc_file.read_text(encoding="utf-8")
-        # Parse frontmatter if present
-        frontmatter, body = _parse_frontmatter(content)
-
-        # Prefer frontmatter description, fall back to first non-header line
-        description = frontmatter.get("description", "")
-        if not description:
-            for line in body.strip().split("\n"):
-                line = line.strip()
-                if line and not line.startswith("#"):
-                    description = line
-                    break
-
-        # Truncate to reasonable length
-        if len(description) > MAX_DESCRIPTION_LENGTH:
-            description = description[: MAX_DESCRIPTION_LENGTH - 3] + "..."
-
-        return description if description else None
-    except (UnicodeDecodeError, PermissionError) as e:
-        logger.debug("Failed to read category description %s: %s", desc_file, e)
-        return None
-    except Exception as e:
-        logger.warning(
-            "Error parsing category description %s: %s", desc_file, e, exc_info=True
-        )
-        return None
-
-
 def skills_list(category: str = None, task_id: str = None) -> str:
     """
     List all available skills (progressive disclosure tier 1 - minimal metadata).
diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py
index 80fa67a7b8e..8351d61eb93 100644
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@@ -71,6 +71,7 @@ from tools.tool_backend_helpers import (
     coerce_modal_mode,
     has_direct_modal_credentials,
     managed_nous_tools_enabled,
+    nous_tool_gateway_unavailable_message,
     resolve_modal_backend_state,
 )
 
@@ -860,6 +861,78 @@ _creation_locks_lock = threading.Lock()  # Protects _creation_locks dict itself
 _cleanup_thread = None
 _cleanup_running = False
 
+# Once-per-process guard for the docker orphan reaper (issue #20561).
+# Set when _maybe_reap_docker_orphans first runs; concurrent _create_environment
+# calls for parallel subagents won't re-trigger the sweep.
+_docker_orphan_reaper_ran = False
+_docker_orphan_reaper_lock = threading.Lock()
+
+
+def _maybe_reap_docker_orphans(container_config: Dict[str, Any]) -> None:
+    """Run the docker orphan reaper once per process, if enabled.
+
+    Sweeps long-Exited containers labeled ``hermes-agent=1`` for the current
+    profile that match the issue #20561 leak class — containers left behind
+    by Hermes processes that exited without firing ``atexit`` (SIGKILL,
+    OOM, terminal-window-close). The reaper is conservative by default:
+    only Exited containers older than ``2 × lifetime_seconds`` and scoped to
+    the current profile.
+
+    Gates:
+
+    * ``terminal.docker_orphan_reaper: false`` disables it entirely (the
+      operator opted out — usually because they're running multiple
+      Hermes processes in the same profile and don't trust the
+      conservative defaults).
+    * ``_docker_orphan_reaper_ran`` flag — sweep runs once per Python
+      interpreter, not on every subagent / RL-rollout / parallel
+      ``terminal()`` call.
+    """
+    global _docker_orphan_reaper_ran
+    if not container_config.get("docker_orphan_reaper", True):
+        return
+    # Cheap double-checked-locking: read without the lock, take the lock
+    # only on first run, recheck inside.
+    if _docker_orphan_reaper_ran:
+        return
+    with _docker_orphan_reaper_lock:
+        if _docker_orphan_reaper_ran:
+            return
+        _docker_orphan_reaper_ran = True
+
+    # 2 × lifetime_seconds gives sibling Hermes processes a generous grace
+    # window. Floor at 60s so an operator with TERMINAL_LIFETIME_SECONDS=0
+    # doesn't get an instant-reap that races their own setup.
+    # ``container_config`` only carries container_* keys, so read
+    # lifetime_seconds from the env var the rest of the module uses.
+    try:
+        lifetime = int(os.getenv("TERMINAL_LIFETIME_SECONDS", "300"))
+    except (TypeError, ValueError):
+        lifetime = 300
+    lifetime = max(60, lifetime)
+    max_age = lifetime * 2
+
+    try:
+        from tools.environments.docker import (
+            reap_orphan_containers, _get_active_profile_name,
+        )
+    except ImportError:
+        return
+    try:
+        profile = _get_active_profile_name()
+        removed = reap_orphan_containers(
+            max_age_seconds=max_age, profile_filter=profile,
+        )
+        if removed:
+            logger.info(
+                "Docker orphan reaper removed %d stale container(s) for profile %s",
+                removed, profile,
+            )
+    except Exception as e:
+        # Never fail the env-creation path because of a janitor problem.
+        logger.debug("Docker orphan reaper raised: %s", e)
+
+
 # Per-task environment overrides registry.
 # Allows environments (e.g., TerminalBench2Env) to specify a custom Docker/Modal
 # image for a specific task_id BEFORE the agent loop starts. When the terminal or
@@ -1023,6 +1096,22 @@ def _get_env_config() -> Dict[str, Any]:
         "docker_env": _parse_env_var("TERMINAL_DOCKER_ENV", "{}", json.loads, "valid JSON"),
         "docker_run_as_host_user": os.getenv("TERMINAL_DOCKER_RUN_AS_HOST_USER", "false").lower() in {"true", "1", "yes"},
         "docker_extra_args": _parse_env_var("TERMINAL_DOCKER_EXTRA_ARGS", "[]", json.loads, "valid JSON"),
+        # Cross-process container reuse (issue #20561).  The docs claim
+        # "ONE long-lived container shared across sessions" — this toggle
+        # makes that real by probing for a labeled container at startup and
+        # attaching to it instead of always starting a fresh one.  Set to
+        # ``false`` for hard per-process isolation (no reuse, container is
+        # removed on exit).
+        "docker_persist_across_processes": os.getenv(
+            "TERMINAL_DOCKER_PERSIST_ACROSS_PROCESSES", "true"
+        ).lower() in {"true", "1", "yes"},
+        # Startup orphan reaper for hermes-tagged containers left behind by
+        # crashed / SIGKILL'd previous processes that bypassed atexit.
+        # Conservative: only sweeps Exited containers older than 2× the
+        # idle-reap window AND scoped to the current profile. Issue #20561.
+        "docker_orphan_reaper": os.getenv(
+            "TERMINAL_DOCKER_ORPHAN_REAPER", "true"
+        ).lower() in {"true", "1", "yes"},
     }
 
 
@@ -1071,6 +1160,13 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
         return _LocalEnvironment(cwd=cwd, timeout=timeout)
     
     elif env_type == "docker":
+        # One-shot orphan reaper: clean up labeled containers left behind by
+        # prior Hermes processes that hit SIGKILL / OOM / a closed terminal
+        # before the atexit cleanup hook could run.  Gated to once per
+        # process so concurrent _create_environment calls (parallel
+        # subagents, RL benchmarks) don't run the reaper N times.
+        # Disable via ``terminal.docker_orphan_reaper: false`` (issue #20561).
+        _maybe_reap_docker_orphans(cc)
         return _DockerEnvironment(
             image=image, cwd=cwd, timeout=timeout,
             cpu=cpu, memory=memory, disk=disk,
@@ -1082,6 +1178,7 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
             env=docker_env,
             run_as_host_user=cc.get("docker_run_as_host_user", False),
             extra_args=docker_extra_args,
+            persist_across_processes=cc.get("docker_persist_across_processes", True),
         )
     
     elif env_type == "singularity":
@@ -1118,13 +1215,19 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
             if modal_state["managed_mode_blocked"]:
                 raise ValueError(
                     "Modal backend is configured for managed mode, but "
-                    "a paid Nous subscription is required for the Tool Gateway and no direct "
-                    "Modal credentials/config were found. Log in with `hermes model` or "
-                    "choose TERMINAL_MODAL_MODE=direct/auto."
+                    "Nous Tool Gateway access is not currently available and no direct "
+                    "Modal credentials/config were found. "
+                    + nous_tool_gateway_unavailable_message(
+                        "managed Modal execution",
+                    )
+                    + " Choose TERMINAL_MODAL_MODE=direct/auto to use direct Modal credentials."
                 )
             if modal_state["mode"] == "managed":
                 raise ValueError(
-                    "Modal backend is configured for managed mode, but the managed tool gateway is unavailable."
+                    "Modal backend is configured for managed mode, but the managed tool gateway is unavailable. "
+                    + nous_tool_gateway_unavailable_message(
+                        "managed Modal execution",
+                    )
                 )
             if modal_state["mode"] == "direct":
                 raise ValueError(
@@ -1323,8 +1426,27 @@ def cleanup_all_environments():
     return cleaned
 
 
-def cleanup_vm(task_id: str):
-    """Manually clean up a specific environment by task_id."""
+def cleanup_vm(task_id: str, *, force_remove: bool = False):
+    """Manually clean up a specific environment by task_id.
+
+    *force_remove* (default False) is forwarded to backends that accept it
+    — currently only ``DockerEnvironment``. The default of False matches
+    session-lifecycle semantics: this function is called from
+    ``AIAgent.close()`` (TUI session close, gateway session teardown) and the
+    per-turn cleanup branch for non-persistent envs, both of which should
+    honor the user's persist-mode preference. Stopping the container here
+    would defeat the "ONE long-lived container shared across sessions"
+    contract — exactly the bug Ben reported when the container was killed
+    on every TUI session close.
+
+    Pass ``force_remove=True`` for actual user-initiated teardown
+    (e.g. ``/reset``-style flows that haven't been wired yet, or future
+    "destroy my sandbox" commands).
+
+    The idle reaper passes the env through ``env.cleanup()`` directly (not
+    via this function), so persist-mode idle envs are similarly no-op'd —
+    only the orphan reaper at next startup reclaims them.
+    """
     # Remove from tracking dicts while holding the lock, but defer the
     # actual (potentially slow) env.cleanup() call to outside the lock
     # so other tool calls aren't blocked.
@@ -1349,7 +1471,14 @@ def cleanup_vm(task_id: str):
 
     try:
         if hasattr(env, 'cleanup'):
-            env.cleanup()
+            # Pass force_remove only if the env's cleanup() accepts it
+            # (DockerEnvironment after issue #20561; other backends don't).
+            import inspect
+            sig = inspect.signature(env.cleanup)
+            if "force_remove" in sig.parameters:
+                env.cleanup(force_remove=force_remove)
+            else:
+                env.cleanup()
         elif hasattr(env, 'stop'):
             env.stop()
         elif hasattr(env, 'terminate'):
@@ -1371,7 +1500,23 @@ def _atexit_cleanup():
     if _active_environments:
         count = len(_active_environments)
         logger.info("Shutting down %d remaining sandbox(es)...", count)
+        # Snapshot the env objects BEFORE cleanup_all_environments empties
+        # the dict; we need them to wait on docker cleanup threads after the
+        # registry has been cleared.
+        envs_to_wait = list(_active_environments.values())
         cleanup_all_environments()
+        # Block briefly so docker stop/rm actually completes before the
+        # interpreter exits. Issue #20561 — without this join, the daemon
+        # cleanup threads were getting torn down mid-`docker stop`, leaving
+        # Exited containers piled up on the host.
+        for env in envs_to_wait:
+            wait_fn = getattr(env, "wait_for_cleanup", None)
+            if wait_fn is None:
+                continue
+            try:
+                wait_fn(timeout=15.0)
+            except Exception as e:  # never block shutdown on a bad backend
+                logger.debug("wait_for_cleanup raised on exit: %s", e)
 
 atexit.register(_atexit_cleanup)
 
@@ -1739,6 +1884,8 @@ def terminal_tool(
                                 "docker_env": config.get("docker_env", {}),
                                 "docker_run_as_host_user": config.get("docker_run_as_host_user", False),
                                 "docker_extra_args": config.get("docker_extra_args", []),
+                                "docker_persist_across_processes": config.get("docker_persist_across_processes", True),
+                                "docker_orphan_reaper": config.get("docker_orphan_reaper", True),
                             }
 
                         local_config = None
@@ -2214,16 +2361,21 @@ def check_terminal_requirements() -> bool:
                 if modal_state["managed_mode_blocked"]:
                     logger.error(
                         "Modal backend selected with TERMINAL_MODAL_MODE=managed, but "
-                        "a paid Nous subscription is required for the Tool Gateway and no direct "
-                        "Modal credentials/config were found. Log in with `hermes model` "
-                        "or choose TERMINAL_MODAL_MODE=direct/auto."
+                        "Nous Tool Gateway access is not currently available and no direct "
+                        "Modal credentials/config were found. %s Choose "
+                        "TERMINAL_MODAL_MODE=direct/auto to use direct Modal credentials.",
+                        nous_tool_gateway_unavailable_message(
+                            "managed Modal execution",
+                        ),
                     )
                     return False
                 if modal_state["mode"] == "managed":
                     logger.error(
                         "Modal backend selected with TERMINAL_MODAL_MODE=managed, but the managed "
-                        "tool gateway is unavailable. Configure the managed gateway or choose "
-                        "TERMINAL_MODAL_MODE=direct/auto."
+                        "tool gateway is unavailable. %s",
+                        nous_tool_gateway_unavailable_message(
+                            "managed Modal execution",
+                        ),
                     )
                     return False
                 elif modal_state["mode"] == "direct":
diff --git a/tools/thread_context.py b/tools/thread_context.py
new file mode 100644
index 00000000000..8d9a2722902
--- /dev/null
+++ b/tools/thread_context.py
@@ -0,0 +1,120 @@
+#!/usr/bin/env python3
+"""Propagate agent-turn context into worker threads that dispatch Hermes tools.
+
+A bare ``threading.Thread`` / ``ThreadPoolExecutor`` worker starts with an
+empty ``contextvars.Context`` and no thread-local approval/sudo callbacks.
+Tool dispatch inside such a thread therefore silently loses:
+
+  * the approval *session/platform* ContextVars (``tools.approval`` /
+    ``gateway.session_context``) — so gateway sessions fall into
+    ``check_dangerous_command``'s non-interactive auto-approve branch and
+    dangerous commands run without prompting (#33057, #30882);
+  * the thread-local CLI approval/sudo callbacks (``tools.terminal_tool``) —
+    so ``prompt_dangerous_approval`` cannot reach the user
+    (GHSA-qg5c-hvr5-hjgr, #15216).
+
+This helper factors out that capture/install/clear lifecycle so the several
+places that fan tool dispatch onto worker threads (``agent.tool_executor`` and
+the ``execute_code`` RPC threads) share one audited implementation instead of
+divergent copies.
+
+Usage — call :func:`propagate_context_to_thread` **on the parent thread**
+(it snapshots the parent's ContextVars and callbacks at call time) and use the
+returned callable as the worker's target::
+
+    t = threading.Thread(target=propagate_context_to_thread(loop_fn), args=(...))
+    # or
+    executor.submit(propagate_context_to_thread(worker_fn), *args)
+
+Approval/sudo callbacks are installed for the worker's lifetime and **always
+cleared on exit**, so a recycled thread never holds a stale reference to a
+disposed CLI instance.
+"""
+
+from __future__ import annotations
+
+import contextvars
+import logging
+from typing import Callable
+
+logger = logging.getLogger(__name__)
+
+
+def _callback_api():
+    """Resolve the terminal_tool callback getters/setters.
+
+    Imported lazily: ``tools.terminal_tool`` imports ``tools.approval`` at
+    module load, so a top-level import here would risk an import cycle for
+    callers that live in ``tools.approval``.
+    """
+    from tools.terminal_tool import (
+        _get_approval_callback,
+        _get_sudo_password_callback,
+        set_approval_callback,
+        set_sudo_password_callback,
+    )
+    return (
+        _get_approval_callback,
+        _get_sudo_password_callback,
+        set_approval_callback,
+        set_sudo_password_callback,
+    )
+
+
+def propagate_context_to_thread(target: Callable) -> Callable:
+    """Wrap *target* for execution on a worker thread with the *current*
+    thread's ContextVars and approval/sudo callbacks propagated.
+
+    Call this on the parent thread; pass the returned callable as the
+    thread/executor target.  The returned callable forwards its positional
+    and keyword arguments to *target* and returns its result.
+
+    Fail-closed: if callback installation raises, the callbacks are left
+    unset (``None``).  That is the safe outcome — ``prompt_dangerous_approval``
+    denies dangerous commands when no callback is registered in an interactive
+    context, and the gateway approval queue blocks when its notify callback is
+    absent.
+    """
+    ctx = contextvars.copy_context()
+    parent_approval_cb = parent_sudo_cb = None
+    setters = None
+    try:
+        get_approval, get_sudo, set_approval, set_sudo = _callback_api()
+        parent_approval_cb = get_approval()
+        parent_sudo_cb = get_sudo()
+        setters = (set_approval, set_sudo)
+    except Exception:
+        logger.debug("Could not capture parent approval/sudo callbacks", exc_info=True)
+
+    def _runner(*args, **kwargs):
+        def _inner():
+            if setters is not None:
+                set_approval, set_sudo = setters
+                try:
+                    if parent_approval_cb is not None:
+                        set_approval(parent_approval_cb)
+                    if parent_sudo_cb is not None:
+                        set_sudo(parent_sudo_cb)
+                except Exception:
+                    logger.debug(
+                        "Failed to install propagated approval/sudo callbacks; "
+                        "dangerous-command approval will fail closed",
+                        exc_info=True,
+                    )
+            try:
+                return target(*args, **kwargs)
+            finally:
+                if setters is not None:
+                    set_approval, set_sudo = setters
+                    try:
+                        set_approval(None)
+                        set_sudo(None)
+                    except Exception:
+                        logger.debug(
+                            "Failed to clear propagated approval/sudo callbacks",
+                            exc_info=True,
+                        )
+
+        return ctx.run(_inner)
+
+    return _runner
diff --git a/tools/tirith_security.py b/tools/tirith_security.py
index 83b222c8887..f40da60e52d 100644
--- a/tools/tirith_security.py
+++ b/tools/tirith_security.py
@@ -326,6 +326,32 @@ def _verify_checksum(archive_path: str, checksums_path: str, archive_name: str)
     return True
 
 
+def _extract_tirith_binary(tar: tarfile.TarFile, dest_dir: str, log) -> tuple[str | None, str]:
+    """Extract the tirith binary from a release archive into dest_dir."""
+    for member in tar.getmembers():
+        if member.name == "tirith" or member.name.endswith("/tirith"):
+            if ".." in member.name:
+                continue
+            if not member.isfile():
+                log("tirith archive member is not a regular file: %s", member.name)
+                return None, "binary_not_regular_file"
+            src_file = tar.extractfile(member)
+            if src_file is None:
+                log("tirith binary could not be read from archive")
+                return None, "binary_extract_failed"
+
+            dest_path = os.path.join(dest_dir, "tirith")
+            try:
+                with open(dest_path, "wb") as out:
+                    shutil.copyfileobj(src_file, out)
+            finally:
+                src_file.close()
+            return dest_path, ""
+
+    log("tirith binary not found in archive")
+    return None, "binary_not_in_archive"
+
+
 def _install_tirith(*, log_failures: bool = True) -> tuple[str | None, str]:
     """Download and install tirith to $HERMES_HOME/bin/tirith.
 
@@ -394,19 +420,10 @@ def _install_tirith(*, log_failures: bool = True) -> tuple[str | None, str]:
             return None, "checksum_failed"
 
         with tarfile.open(archive_path, "r:gz") as tar:
-            # Extract only the tirith binary (safety: reject paths with ..)
-            for member in tar.getmembers():
-                if member.name == "tirith" or member.name.endswith("/tirith"):
-                    if ".." in member.name:
-                        continue
-                    member.name = "tirith"
-                    tar.extract(member, tmpdir)
-                    break
-            else:
-                log("tirith binary not found in archive")
-                return None, "binary_not_in_archive"
+            src, reason = _extract_tirith_binary(tar, tmpdir, log)
+            if src is None:
+                return None, reason
 
-        src = os.path.join(tmpdir, "tirith")
         dest = os.path.join(_hermes_bin_dir(), "tirith")
         try:
             shutil.move(src, dest)
diff --git a/tools/tool_backend_helpers.py b/tools/tool_backend_helpers.py
index b1c5b7600c7..b1e0f834c7e 100644
--- a/tools/tool_backend_helpers.py
+++ b/tools/tool_backend_helpers.py
@@ -14,29 +14,55 @@ _DEFAULT_MODAL_MODE = "auto"
 _VALID_MODAL_MODES = {"auto", "direct", "managed"}
 
 
-def managed_nous_tools_enabled() -> bool:
-    """Return True when the user has an active paid Nous subscription.
+def managed_nous_tools_enabled(*, force_fresh: bool = False) -> bool:
+    """Return True when the user has paid Nous Portal service access.
 
-    The Tool Gateway is available to any Nous subscriber who is NOT on
-    the free tier.  We intentionally catch all exceptions and return
-    False — never block the agent startup path.
+    Tool Gateway availability fails closed on unknown/error entitlement.  We
+    intentionally catch all exceptions and return False — never block startup.
+    ``force_fresh=True`` is for interactive configuration flows that should
+    reflect a just-purchased subscription or credits immediately.
     """
     try:
-        from hermes_cli.auth import get_nous_auth_status
+        from hermes_cli.nous_account import get_nous_portal_account_info
 
-        status = get_nous_auth_status()
-        if not status.get("logged_in"):
+        if force_fresh:
+            account_info = get_nous_portal_account_info(force_fresh=True)
+        else:
+            account_info = get_nous_portal_account_info()
+        if not account_info.logged_in:
             return False
-
-        from hermes_cli.models import check_nous_free_tier
-
-        if check_nous_free_tier():
-            return False  # free-tier users don't get gateway access
-        return True
+        return account_info.paid_service_access is True
     except Exception:
         return False
 
 
+def nous_tool_gateway_unavailable_message(
+    capability: str = "the Nous Tool Gateway",
+    *,
+    force_fresh: bool = False,
+) -> str:
+    """Return account-aware guidance for an unavailable Nous Tool Gateway path."""
+    try:
+        from hermes_cli.nous_account import (
+            format_nous_portal_entitlement_message,
+            get_nous_portal_account_info,
+        )
+
+        account_info = get_nous_portal_account_info(force_fresh=force_fresh)
+        message = format_nous_portal_entitlement_message(
+            account_info,
+            capability=capability,
+        )
+        if message:
+            return message
+    except Exception:
+        pass
+    return (
+        f"{capability} is unavailable. Run `hermes model` to refresh your "
+        "Nous Portal login and billing status."
+    )
+
+
 def normalize_browser_cloud_provider(value: object | None) -> str:
     """Return a normalized browser provider key."""
     provider = str(value or _DEFAULT_BROWSER_PROVIDER).strip().lower()
@@ -58,9 +84,13 @@ def normalize_modal_mode(value: object | None) -> str:
 
 def has_direct_modal_credentials() -> bool:
     """Return True when direct Modal credentials/config are available."""
+    try:
+        modal_file_exists = (Path.home() / ".modal.toml").exists()
+    except (PermissionError, OSError):
+        modal_file_exists = False
     return bool(
         (os.getenv("MODAL_TOKEN_ID") and os.getenv("MODAL_TOKEN_SECRET"))
-        or (Path.home() / ".modal.toml").exists()
+        or modal_file_exists
     )
 
 
@@ -69,6 +99,7 @@ def resolve_modal_backend_state(
     *,
     has_direct: bool,
     managed_ready: bool,
+    managed_enabled: bool | None = None,
 ) -> Dict[str, Any]:
     """Resolve direct vs managed Modal backend selection.
 
@@ -79,16 +110,18 @@ def resolve_modal_backend_state(
     """
     requested_mode = coerce_modal_mode(modal_mode)
     normalized_mode = normalize_modal_mode(modal_mode)
+    if managed_enabled is None:
+        managed_enabled = managed_nous_tools_enabled()
     managed_mode_blocked = (
-        requested_mode == "managed" and not managed_nous_tools_enabled()
+        requested_mode == "managed" and not managed_enabled
     )
 
     if normalized_mode == "managed":
-        selected_backend = "managed" if managed_nous_tools_enabled() and managed_ready else None
+        selected_backend = "managed" if managed_enabled and managed_ready else None
     elif normalized_mode == "direct":
         selected_backend = "direct" if has_direct else None
     else:
-        selected_backend = "managed" if managed_nous_tools_enabled() and managed_ready else "direct" if has_direct else None
+        selected_backend = "managed" if managed_enabled and managed_ready else "direct" if has_direct else None
 
     return {
         "requested_mode": requested_mode,
diff --git a/tools/tool_search.py b/tools/tool_search.py
new file mode 100644
index 00000000000..e885a5d7b88
--- /dev/null
+++ b/tools/tool_search.py
@@ -0,0 +1,735 @@
+"""Progressive tool disclosure ("tool search") for Hermes Agent.
+
+When enabled, MCP and non-core plugin tools are replaced in the model-visible
+tools array by three bridge tools — ``tool_search``, ``tool_describe``,
+``tool_call`` — and surfaced on demand. Core Hermes tools never defer.
+
+Design constraints this module is built around (see ``openclaw-tool-search-report``
+for the full rationale):
+
+* Core tools defined in ``toolsets._HERMES_CORE_TOOLS`` are *never* deferred.
+  Always-load means always-load. No exceptions.
+* The threshold gate runs every assembly: when deferrable tools would consume
+  less than ``threshold_pct`` of the model's context window (default 10%),
+  tool search is a no-op and the tools array passes through unchanged.
+* The catalog is stateless across turns and tools-array assemblies. It is
+  rebuilt from the current tool-defs list every time. This is the lesson
+  from OpenClaw's cron regression (openclaw/openclaw#84141): a session-keyed
+  catalog that drifts out of sync with the live tool registry produces
+  silent tool dropouts.
+* Bridge tools route through ``model_tools.handle_function_call`` exactly
+  like a direct call, so guardrails, plugin pre/post hooks, approval flows,
+  and tool-result truncation all fire identically.
+* Display and trajectory unwrap is implemented here so the user (CLI activity
+  feed, gateway, saved trajectories) always sees the underlying tool, not
+  the bridge.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import math
+import re
+from dataclasses import dataclass, field
+from typing import Any, Dict, Iterable, List, Optional, Tuple
+
+logger = logging.getLogger("tools.tool_search")
+
+
+# Bridge tool names. These names are reserved and may not collide with a
+# user/plugin/MCP tool — registration of any tool with these names is
+# rejected by the registry's existing override-protection logic.
+TOOL_SEARCH_NAME = "tool_search"
+TOOL_DESCRIBE_NAME = "tool_describe"
+TOOL_CALL_NAME = "tool_call"
+
+BRIDGE_TOOL_NAMES = frozenset({TOOL_SEARCH_NAME, TOOL_DESCRIBE_NAME, TOOL_CALL_NAME})
+
+# When estimating tokens from char count without a real tokenizer, this is
+# the cheap rule of thumb that's stable across providers. Roughly 4 chars
+# per token for English+JSON. Underestimating leads to false negatives
+# (tool search not activated when it should); overestimating leads to false
+# positives (activated when not needed). 4.0 errs slightly toward
+# underestimating, which is the safer default.
+CHARS_PER_TOKEN = 4.0
+
+
+# ---------------------------------------------------------------------------
+# Configuration plumbing
+# ---------------------------------------------------------------------------
+
+
+@dataclass(frozen=True)
+class ToolSearchConfig:
+    """Resolved, validated tool-search configuration for a single assembly."""
+
+    enabled: str  # "auto" | "on" | "off"
+    threshold_pct: float  # 0..100 — only used when enabled == "auto"
+    search_default_limit: int
+    max_search_limit: int
+
+    @classmethod
+    def from_raw(cls, raw: Any) -> "ToolSearchConfig":
+        """Build a config from a raw dict / bool / None.
+
+        Accepts the legacy bool shape (``tools.tool_search: true``) and the
+        dict shape (``tools.tool_search: {enabled: auto, ...}``). Validates
+        and clamps every numeric field; unknown values fall back to safe
+        defaults rather than raising, so a typo in user config does not
+        break the agent.
+        """
+        if raw is True:
+            return cls(enabled="auto", threshold_pct=10.0,
+                       search_default_limit=5, max_search_limit=20)
+        if raw is False:
+            return cls(enabled="off", threshold_pct=10.0,
+                       search_default_limit=5, max_search_limit=20)
+        if not isinstance(raw, dict):
+            return cls(enabled="auto", threshold_pct=10.0,
+                       search_default_limit=5, max_search_limit=20)
+
+        enabled_raw = str(raw.get("enabled", "auto")).strip().lower()
+        if enabled_raw in ("true", "1", "yes"):
+            enabled = "on"
+        elif enabled_raw in ("false", "0", "no"):
+            enabled = "off"
+        elif enabled_raw in ("auto", "on", "off"):
+            enabled = enabled_raw
+        else:
+            enabled = "auto"
+
+        threshold_pct = _safe_float(raw.get("threshold_pct"), 10.0)
+        threshold_pct = max(0.0, min(100.0, threshold_pct))
+
+        max_search_limit = max(1, min(50, _safe_int(raw.get("max_search_limit"), 20)))
+        search_default_limit = max(1, min(max_search_limit,
+                                          _safe_int(raw.get("search_default_limit"), 5)))
+
+        return cls(
+            enabled=enabled,
+            threshold_pct=threshold_pct,
+            search_default_limit=search_default_limit,
+            max_search_limit=max_search_limit,
+        )
+
+
+def _safe_int(value: Any, fallback: int) -> int:
+    try:
+        return int(value)
+    except (TypeError, ValueError):
+        return fallback
+
+
+def _safe_float(value: Any, fallback: float) -> float:
+    try:
+        return float(value)
+    except (TypeError, ValueError):
+        return fallback
+
+
+def load_config() -> ToolSearchConfig:
+    """Load tool-search config from the user config file."""
+    try:
+        from hermes_cli.config import load_config as _load
+        cfg = _load() or {}
+        tools_cfg = cfg.get("tools") if isinstance(cfg.get("tools"), dict) else {}
+        if not isinstance(tools_cfg, dict):
+            tools_cfg = {}
+        return ToolSearchConfig.from_raw(tools_cfg.get("tool_search"))
+    except Exception as e:
+        logger.debug("Failed to load tool-search config: %s", e)
+        return ToolSearchConfig.from_raw(None)
+
+
+# ---------------------------------------------------------------------------
+# Tool classification
+# ---------------------------------------------------------------------------
+
+
+def _core_tool_names() -> frozenset[str]:
+    """Return the set of tool names that must NEVER be deferred.
+
+    Imported lazily because ``toolsets`` imports from ``tools.registry``
+    and we don't want a hard cycle.
+    """
+    try:
+        from toolsets import _HERMES_CORE_TOOLS
+        return frozenset(_HERMES_CORE_TOOLS)
+    except Exception:
+        return frozenset()
+
+
+def is_deferrable_tool_name(name: str) -> bool:
+    """Return True if a tool with this name is *eligible* for deferral.
+
+    A tool is deferrable iff it is registered with an MCP toolset prefix
+    OR it is not in ``_HERMES_CORE_TOOLS``. Core tools are never deferred
+    even when their toolset is technically plugin-provided (this protects
+    against accidental shadowing).
+    """
+    if name in BRIDGE_TOOL_NAMES:
+        return False
+    if name in _core_tool_names():
+        return False
+    # Check registry toolset for MCP prefix.
+    try:
+        from tools.registry import registry
+        entry = registry.get_entry(name)
+        if entry is None:
+            return False
+        if entry.toolset.startswith("mcp-"):
+            return True
+        # Non-MCP, non-core → plugin tool, eligible.
+        return True
+    except Exception:
+        return False
+
+
+def classify_tools(tool_defs: List[Dict[str, Any]]) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
+    """Split a tool-defs list into (visible, deferrable).
+
+    ``visible`` retains every tool that must stay in the model-facing array:
+    every core tool, plus any tool we can't classify. ``deferrable`` is the
+    candidate set for catalog entry.
+    """
+    visible: List[Dict[str, Any]] = []
+    deferrable: List[Dict[str, Any]] = []
+    for td in tool_defs:
+        fn = td.get("function") or {}
+        name = fn.get("name", "")
+        if name in BRIDGE_TOOL_NAMES:
+            # Should never happen — bridge tools are added after classification —
+            # but be defensive.
+            continue
+        if is_deferrable_tool_name(name):
+            deferrable.append(td)
+        else:
+            visible.append(td)
+    return visible, deferrable
+
+
+# ---------------------------------------------------------------------------
+# Token estimation and threshold gate
+# ---------------------------------------------------------------------------
+
+
+def estimate_tokens_from_schemas(tool_defs: Iterable[Dict[str, Any]]) -> int:
+    """Estimate the token cost of a tool-defs list via the chars/4 rule.
+
+    Cheap and stable across providers. The number doesn't need to be exact —
+    it gates the activate/skip decision, and a typical 200K context with a
+    10% threshold means the decision flips around 20K tokens of schema.
+    Order-of-magnitude precision is fine.
+    """
+    total_chars = 0
+    for td in tool_defs:
+        try:
+            total_chars += len(json.dumps(td, ensure_ascii=False, separators=(",", ":")))
+        except (TypeError, ValueError):
+            total_chars += len(str(td))
+    return int(math.ceil(total_chars / CHARS_PER_TOKEN))
+
+
+def should_activate(
+    config: ToolSearchConfig,
+    deferrable_tokens: int,
+    context_length: Optional[int],
+) -> bool:
+    """Decide whether tool search should activate for the current assembly.
+
+    ``"off"`` skips unconditionally. ``"on"`` activates unconditionally
+    (as long as there is at least one deferrable tool — there's no point
+    swapping a no-op). ``"auto"`` activates when the deferrable schemas
+    would consume ``threshold_pct`` of context or more.
+    """
+    if config.enabled == "off":
+        return False
+    if deferrable_tokens <= 0:
+        return False
+    if config.enabled == "on":
+        return True
+    # auto
+    if not context_length or context_length <= 0:
+        # Without a known context size, fall back to a fixed 20K-token cutoff
+        # — the cliff above which Anthropic and OpenAI both saw quality drops.
+        return deferrable_tokens >= 20_000
+    threshold_tokens = int(context_length * (config.threshold_pct / 100.0))
+    return deferrable_tokens >= threshold_tokens
+
+
+# ---------------------------------------------------------------------------
+# Catalog + BM25 retrieval
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class CatalogEntry:
+    """One deferrable tool, in a form the bridge tools can search and serve."""
+
+    name: str
+    description: str
+    schema: Dict[str, Any]  # The full {"type":"function", "function": {...}} entry.
+    source: str  # "mcp" | "plugin" | "other"
+    source_name: str  # Toolset name, e.g. "mcp-github" or "kanban"
+
+    # Pre-tokenized fields for BM25.
+    _tokens: List[str] = field(default_factory=list)
+
+
+_TOKEN_RE = re.compile(r"[A-Za-z0-9]+")
+
+
+def _tokenize(text: str) -> List[str]:
+    if not text:
+        return []
+    return [t.lower() for t in _TOKEN_RE.findall(text)]
+
+
+def _entry_search_text(td: Dict[str, Any]) -> str:
+    """Build the search-text blob for a deferrable tool.
+
+    Includes the tool name (with underscores broken into words so BM25 can
+    match against query terms), the description, and the names of the
+    top-level parameters. Schema bodies are deliberately excluded —
+    indexing them adds noise without improving recall in our measurement.
+    """
+    fn = td.get("function") or {}
+    name = fn.get("name", "")
+    desc = fn.get("description", "") or ""
+    params = ((fn.get("parameters") or {}).get("properties") or {})
+    param_names = " ".join(params.keys())
+    # Break snake_case and dotted names into words for BM25.
+    name_words = name.replace("_", " ").replace(".", " ").replace("-", " ").replace(":", " ")
+    return f"{name_words} {desc} {param_names}"
+
+
+def _classify_source(name: str) -> Tuple[str, str]:
+    """Return (source_kind, source_name) for a registered tool name."""
+    try:
+        from tools.registry import registry
+        entry = registry.get_entry(name)
+        if entry is None:
+            return ("other", "")
+        if entry.toolset.startswith("mcp-"):
+            return ("mcp", entry.toolset)
+        return ("plugin", entry.toolset)
+    except Exception:
+        return ("other", "")
+
+
+def build_catalog(tool_defs: List[Dict[str, Any]]) -> List[CatalogEntry]:
+    """Build the deferred-tool catalog from a tool-defs list.
+
+    Caller is expected to pass only the deferrable subset (``classify_tools``
+    returns it as the second element).
+    """
+    catalog: List[CatalogEntry] = []
+    for td in tool_defs:
+        fn = td.get("function") or {}
+        name = fn.get("name", "")
+        if not name:
+            continue
+        desc = fn.get("description", "") or ""
+        source, source_name = _classify_source(name)
+        entry = CatalogEntry(
+            name=name,
+            description=desc,
+            schema=td,
+            source=source,
+            source_name=source_name,
+            _tokens=_tokenize(_entry_search_text(td)),
+        )
+        catalog.append(entry)
+    return catalog
+
+
+def _bm25_score(query_tokens: List[str], doc_tokens: List[str],
+                doc_lengths: List[int], avg_dl: float,
+                doc_freq: Dict[str, int], n_docs: int,
+                k1: float = 1.5, b: float = 0.75) -> float:
+    """Standard BM25 score for one query against one document.
+
+    Inlined small implementation rather than adding a dependency. Performance
+    is fine — the catalog is bounded by N (tools) typically < 500, and we
+    score against the in-memory tokens list.
+    """
+    if not doc_tokens:
+        return 0.0
+    score = 0.0
+    dl = len(doc_tokens)
+    # Pre-count tokens in the doc.
+    doc_tf: Dict[str, int] = {}
+    for t in doc_tokens:
+        doc_tf[t] = doc_tf.get(t, 0) + 1
+    for q in query_tokens:
+        df = doc_freq.get(q, 0)
+        if df == 0:
+            continue
+        idf = math.log(1 + (n_docs - df + 0.5) / (df + 0.5))
+        tf = doc_tf.get(q, 0)
+        if tf == 0:
+            continue
+        norm = tf * (k1 + 1) / (tf + k1 * (1 - b + b * dl / max(avg_dl, 1.0)))
+        score += idf * norm
+    return score
+
+
+def search_catalog(catalog: List[CatalogEntry], query: str, limit: int = 5) -> List[CatalogEntry]:
+    """Return the top-``limit`` catalog entries for ``query`` by BM25.
+
+    Falls back to a stable name-substring match when BM25 yields no hits
+    above zero. That ensures a query like ``"github"`` against a catalog
+    where every tool is named ``github_*`` still returns results — BM25
+    can underperform when query and document share only one token that
+    appears in every document (zero IDF).
+    """
+    if not catalog or limit <= 0:
+        return []
+    query_tokens = _tokenize(query)
+    if not query_tokens:
+        return []
+
+    # Precompute doc statistics.
+    doc_lengths = [len(e._tokens) for e in catalog]
+    avg_dl = sum(doc_lengths) / max(len(doc_lengths), 1)
+    doc_freq: Dict[str, int] = {}
+    for e in catalog:
+        seen = set(e._tokens)
+        for t in seen:
+            doc_freq[t] = doc_freq.get(t, 0) + 1
+    n_docs = len(catalog)
+
+    scored: List[Tuple[float, CatalogEntry]] = []
+    for entry in catalog:
+        s = _bm25_score(query_tokens, entry._tokens, doc_lengths, avg_dl,
+                        doc_freq, n_docs)
+        if s > 0:
+            scored.append((s, entry))
+
+    if not scored:
+        # Substring fallback against the original tool name.
+        ql = query.lower()
+        for entry in catalog:
+            if ql in entry.name.lower():
+                scored.append((0.1, entry))
+
+    scored.sort(key=lambda x: x[0], reverse=True)
+    return [e for _, e in scored[:limit]]
+
+
+# ---------------------------------------------------------------------------
+# Bridge tool schemas
+# ---------------------------------------------------------------------------
+
+
+def bridge_tool_schemas(deferred_count: int) -> List[Dict[str, Any]]:
+    """Build the bridge tool schemas to inject in place of deferred tools.
+
+    The schemas are intentionally short — every byte added here is a byte
+    the user pays on every turn. Descriptions are tuned to be unambiguous
+    about the call sequence the model should follow.
+    """
+    desc_search = (
+        f"Search {deferred_count} additional tools that are loaded on demand. "
+        "Returns up to ``limit`` matches with name and description. Follow "
+        f"with `{TOOL_DESCRIBE_NAME}` to load a tool's full parameter schema, "
+        f"then `{TOOL_CALL_NAME}` to invoke it. Tools listed at the top of this "
+        "system prompt are already available and do not need to be searched."
+    )
+    desc_describe = (
+        f"Load the full JSON schema for one tool returned by `{TOOL_SEARCH_NAME}`. "
+        f"Required before `{TOOL_CALL_NAME}` if the tool's parameters are unknown."
+    )
+    desc_call = (
+        "Invoke a deferred tool by name with the given arguments. Argument shape "
+        f"matches the tool's schema (see `{TOOL_DESCRIBE_NAME}`). Policy, hooks, "
+        "and approvals run exactly as for any directly-listed tool."
+    )
+
+    return [
+        {
+            "type": "function",
+            "function": {
+                "name": TOOL_SEARCH_NAME,
+                "description": desc_search,
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "query": {
+                            "type": "string",
+                            "description": "Keywords describing the capability you need (e.g. 'create github issue').",
+                        },
+                        "limit": {
+                            "type": "integer",
+                            "description": "Maximum number of results to return. Default 5.",
+                        },
+                    },
+                    "required": ["query"],
+                },
+            },
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": TOOL_DESCRIBE_NAME,
+                "description": desc_describe,
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "name": {
+                            "type": "string",
+                            "description": "Exact tool name (as returned by tool_search).",
+                        },
+                    },
+                    "required": ["name"],
+                },
+            },
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": TOOL_CALL_NAME,
+                "description": desc_call,
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "name": {
+                            "type": "string",
+                            "description": "Exact tool name to invoke.",
+                        },
+                        "arguments": {
+                            "type": "object",
+                            "description": "Arguments for the tool, matching its schema.",
+                        },
+                    },
+                    "required": ["name", "arguments"],
+                },
+            },
+        },
+    ]
+
+
+# ---------------------------------------------------------------------------
+# Public entry point: assemble tool-defs with optional tool search
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class AssemblyResult:
+    """Outcome of one assembly. Useful for tests and observability."""
+
+    tool_defs: List[Dict[str, Any]]
+    activated: bool
+    deferred_count: int = 0
+    deferred_tokens: int = 0
+    threshold_tokens: int = 0
+
+
+def assemble_tool_defs(
+    tool_defs: List[Dict[str, Any]],
+    *,
+    context_length: Optional[int] = None,
+    config: Optional[ToolSearchConfig] = None,
+) -> AssemblyResult:
+    """Return the tool-defs list the model should actually see.
+
+    When tool search is inactive (off, no deferrable tools, or below
+    threshold), this is a passthrough. When active, MCP and plugin tools
+    are stripped from the visible list and replaced with the three bridge
+    tools. Core tools are *never* deferred regardless of config.
+
+    Idempotent: calling with bridge tools already in the input is a no-op
+    (they classify as non-core/non-deferrable but their names are reserved,
+    so they are filtered out of the deferrable set).
+    """
+    if config is None:
+        config = load_config()
+
+    # Defensive: strip any bridge tools that may already be in the list
+    # (e.g. someone called assemble twice).
+    incoming = [td for td in tool_defs
+                if (td.get("function") or {}).get("name") not in BRIDGE_TOOL_NAMES]
+
+    visible, deferrable = classify_tools(incoming)
+    if not deferrable:
+        return AssemblyResult(tool_defs=incoming, activated=False)
+
+    deferrable_tokens = estimate_tokens_from_schemas(deferrable)
+    if not should_activate(config, deferrable_tokens, context_length):
+        return AssemblyResult(
+            tool_defs=incoming,
+            activated=False,
+            deferred_count=len(deferrable),
+            deferred_tokens=deferrable_tokens,
+            threshold_tokens=int((context_length or 0) * (config.threshold_pct / 100.0)),
+        )
+
+    bridge = bridge_tool_schemas(len(deferrable))
+    result = visible + bridge
+    threshold_tokens = int((context_length or 0) * (config.threshold_pct / 100.0))
+
+    logger.info(
+        "tool_search activated: %d core/visible tools kept, %d deferred (~%d tokens, threshold ~%d)",
+        len(visible), len(deferrable), deferrable_tokens, threshold_tokens,
+    )
+
+    return AssemblyResult(
+        tool_defs=result,
+        activated=True,
+        deferred_count=len(deferrable),
+        deferred_tokens=deferrable_tokens,
+        threshold_tokens=threshold_tokens,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Bridge tool dispatch
+# ---------------------------------------------------------------------------
+
+
+def is_bridge_tool(name: str) -> bool:
+    return name in BRIDGE_TOOL_NAMES
+
+
+def _format_search_hit(entry: CatalogEntry) -> Dict[str, Any]:
+    return {
+        "name": entry.name,
+        "source": entry.source,
+        "source_name": entry.source_name,
+        # Cap description so a chatty MCP server doesn't blow up the result.
+        "description": (entry.description or "")[:400],
+    }
+
+
+def dispatch_tool_search(args: Dict[str, Any],
+                         *,
+                         current_tool_defs: List[Dict[str, Any]],
+                         config: Optional[ToolSearchConfig] = None) -> str:
+    """Execute the ``tool_search`` bridge tool. Returns a JSON string."""
+    if config is None:
+        config = load_config()
+    query = str(args.get("query") or "").strip()
+    if not query:
+        return json.dumps({"error": "query is required"}, ensure_ascii=False)
+
+    raw_limit = args.get("limit")
+    if raw_limit is None:
+        limit = config.search_default_limit
+    else:
+        limit = max(1, min(config.max_search_limit, _safe_int(raw_limit, config.search_default_limit)))
+
+    _, deferrable = classify_tools(current_tool_defs)
+    catalog = build_catalog(deferrable)
+    hits = search_catalog(catalog, query, limit=limit)
+    return json.dumps({
+        "query": query,
+        "total_available": len(catalog),
+        "matches": [_format_search_hit(h) for h in hits],
+    }, ensure_ascii=False)
+
+
+def dispatch_tool_describe(args: Dict[str, Any],
+                           *,
+                           current_tool_defs: List[Dict[str, Any]]) -> str:
+    """Execute the ``tool_describe`` bridge tool. Returns a JSON string."""
+    name = str(args.get("name") or "").strip()
+    if not name:
+        return json.dumps({"error": "name is required"}, ensure_ascii=False)
+    if not is_deferrable_tool_name(name):
+        return json.dumps({
+            "error": (
+                f"'{name}' is not a deferrable tool. If you see it in the tools list "
+                "already, call it directly; otherwise check the spelling against tool_search."
+            ),
+        }, ensure_ascii=False)
+    _, deferrable = classify_tools(current_tool_defs)
+    for td in deferrable:
+        fn = td.get("function") or {}
+        if fn.get("name") == name:
+            return json.dumps({
+                "name": name,
+                "description": fn.get("description", ""),
+                "parameters": fn.get("parameters", {}),
+            }, ensure_ascii=False)
+    return json.dumps({
+        "error": f"'{name}' is not currently available. Re-run tool_search to refresh.",
+    }, ensure_ascii=False)
+
+
+def scoped_deferrable_names(tool_defs: List[Dict[str, Any]]) -> frozenset[str]:
+    """Return the set of deferrable tool names present in ``tool_defs``.
+
+    ``tool_defs`` is expected to be the *pre-assembly* tool list for the
+    current session's toolset scope (i.e. what
+    ``get_tool_definitions(skip_tool_search_assembly=True)`` returns for the
+    session's enabled/disabled toolsets). The resulting set is the universe of
+    tools the session may legitimately reach through ``tool_call``. Used as a
+    scoping gate by both the ``model_tools`` bridge dispatch and the
+    ``tool_executor`` unwrap so a restricted-toolset session can never invoke
+    an out-of-scope tool via the bridge.
+    """
+    names: set[str] = set()
+    for td in tool_defs:
+        name = (td.get("function") or {}).get("name", "")
+        if name and is_deferrable_tool_name(name):
+            names.add(name)
+    return frozenset(names)
+
+
+def resolve_underlying_call(args: Dict[str, Any]) -> Tuple[Optional[str], Dict[str, Any], Optional[str]]:
+    """Parse a ``tool_call`` invocation into (underlying_name, args, error_msg).
+
+    Used by:
+    * the dispatcher in ``model_tools.handle_function_call``,
+    * the display layer (so the activity feed shows the underlying tool),
+    * the trajectory recorder.
+
+    On parse error, returns ``(None, {}, error_message)``.
+    """
+    name = str(args.get("name") or "").strip()
+    if not name:
+        return None, {}, "tool_call requires a 'name' argument"
+    if name in BRIDGE_TOOL_NAMES:
+        return None, {}, f"tool_call cannot invoke '{name}' (it is itself a bridge tool)"
+    raw_args = args.get("arguments")
+    if raw_args is None:
+        raw_args = {}
+    if isinstance(raw_args, str):
+        try:
+            raw_args = json.loads(raw_args)
+        except json.JSONDecodeError as e:
+            return None, {}, f"tool_call 'arguments' is not valid JSON: {e}"
+    if not isinstance(raw_args, dict):
+        return None, {}, "tool_call 'arguments' must be an object"
+    if not is_deferrable_tool_name(name):
+        return None, {}, (
+            f"'{name}' is not a deferrable tool. If it appears in the model-facing tools "
+            "list already, call it directly instead of via tool_call."
+        )
+    return name, raw_args, None
+
+
+__all__ = [
+    "TOOL_SEARCH_NAME",
+    "TOOL_DESCRIBE_NAME",
+    "TOOL_CALL_NAME",
+    "BRIDGE_TOOL_NAMES",
+    "ToolSearchConfig",
+    "CatalogEntry",
+    "AssemblyResult",
+    "load_config",
+    "is_deferrable_tool_name",
+    "classify_tools",
+    "estimate_tokens_from_schemas",
+    "should_activate",
+    "build_catalog",
+    "search_catalog",
+    "bridge_tool_schemas",
+    "assemble_tool_defs",
+    "is_bridge_tool",
+    "dispatch_tool_search",
+    "dispatch_tool_describe",
+    "resolve_underlying_call",
+    "scoped_deferrable_names",
+]
diff --git a/tools/transcription_tools.py b/tools/transcription_tools.py
index ba9f73aa9c9..492f04cb83d 100644
--- a/tools/transcription_tools.py
+++ b/tools/transcription_tools.py
@@ -39,7 +39,11 @@ from urllib.parse import urljoin
 
 from utils import is_truthy_value
 from tools.managed_tool_gateway import resolve_managed_tool_gateway
-from tools.tool_backend_helpers import managed_nous_tools_enabled, resolve_openai_audio_api_key
+from tools.tool_backend_helpers import (
+    managed_nous_tools_enabled,
+    nous_tool_gateway_unavailable_message,
+    resolve_openai_audio_api_key,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -791,16 +795,11 @@ def _get_provider(stt_config: dict) -> str:
             return "none"
 
         if provider == "mistral":
-            # `mistralai` PyPI package was quarantined on 2026-05-12 after a
-            # malicious 2.4.6 release. Refuse to use this provider until it's
-            # available again so we surface a clear message instead of an
-            # opaque ImportError mid-call.
+            if _HAS_MISTRAL and get_env_value("MISTRAL_API_KEY"):
+                return "mistral"
             logger.warning(
-                "STT provider 'mistral' (Voxtral Transcribe) is temporarily "
-                "disabled — `mistralai` PyPI package is quarantined "
-                "(malicious 2.4.6 release on 2026-05-12). Falling back to "
-                "another provider. Set stt.provider in config.yaml to 'local' "
-                "or 'openai' to silence this warning."
+                "STT provider 'mistral' configured but mistralai package "
+                "not installed or MISTRAL_API_KEY not set"
             )
             return "none"
 
@@ -841,6 +840,12 @@ def _get_provider(stt_config: dict) -> str:
     if _HAS_OPENAI and _has_openai_audio_backend():
         logger.info("No local STT available, using OpenAI Whisper API")
         return "openai"
+    # Only auto-select Mistral if the SDK is already present — don't trigger a
+    # lazy-install during passive auto-detection. Explicit `provider: mistral`
+    # (above) does lazy-install on first transcription call.
+    if _HAS_MISTRAL and get_env_value("MISTRAL_API_KEY"):
+        logger.info("No local STT available, using Mistral Voxtral Transcribe API")
+        return "mistral"
     try:
         from tools.xai_http import resolve_xai_http_credentials
 
@@ -1381,6 +1386,11 @@ def _transcribe_mistral(file_path: str, model_name: str) -> Dict[str, Any]:
         return {"success": False, "transcript": "", "error": "MISTRAL_API_KEY not set"}
 
     try:
+        try:
+            from tools.lazy_deps import ensure as _lazy_ensure
+            _lazy_ensure("stt.mistral", prompt=False)
+        except ImportError:
+            pass
         from mistralai.client import Mistral
 
         with Mistral(api_key=api_key) as client:
@@ -1749,7 +1759,12 @@ def _resolve_openai_audio_client_config() -> tuple[str, str]:
     if managed_gateway is None:
         message = "Neither stt.openai.api_key in config nor VOICE_TOOLS_OPENAI_KEY/OPENAI_API_KEY is set"
         if managed_nous_tools_enabled():
-            message += ", and the managed OpenAI audio gateway is unavailable"
+            message += (
+                ". "
+                + nous_tool_gateway_unavailable_message(
+                    "managed OpenAI audio for transcription",
+                )
+            )
         raise ValueError(message)
 
     return managed_gateway.nous_user_token, urljoin(
diff --git a/tools/tts_tool.py b/tools/tts_tool.py
index 69dea790dee..cab2cc584ab 100644
--- a/tools/tts_tool.py
+++ b/tools/tts_tool.py
@@ -69,7 +69,12 @@ def get_env_value(name, default=None):
     value = _get_env_value(name)
     return default if value is None else value
 from tools.managed_tool_gateway import resolve_managed_tool_gateway
-from tools.tool_backend_helpers import managed_nous_tools_enabled, prefers_gateway, resolve_openai_audio_api_key
+from tools.tool_backend_helpers import (
+    managed_nous_tools_enabled,
+    nous_tool_gateway_unavailable_message,
+    prefers_gateway,
+    resolve_openai_audio_api_key,
+)
 from tools.xai_http import hermes_xai_user_agent
 
 # ---------------------------------------------------------------------------
@@ -116,7 +121,20 @@ def _import_openai_client():
     return OpenAIClient
 
 def _import_mistral_client():
-    """Lazy import Mistral client. Returns the class or raises ImportError."""
+    """Lazy import Mistral client. Returns the class or raises ImportError.
+
+    Calls :func:`tools.lazy_deps.ensure` first so the ``mistralai`` SDK gets
+    installed on demand if the user picked Mistral as their STT/TTS provider
+    but never ran the post-setup hook (e.g. enabled it by editing config.yaml
+    directly). Mirrors the ElevenLabs lazy-import path.
+    """
+    try:
+        from tools.lazy_deps import ensure
+        ensure("tts.mistral", prompt=False)
+    except ImportError:
+        pass
+    except Exception as e:  # FeatureUnavailable or any unexpected error
+        raise ImportError(str(e))
     from mistralai.client import Mistral
     return Mistral
 
@@ -1969,21 +1987,16 @@ def text_to_speech_tool(
             _generate_xai_tts(text, file_str, tts_config)
 
         elif provider == "mistral":
-            # `mistralai` PyPI package was quarantined on 2026-05-12 after a
-            # malicious 2.4.6 release. Surface a clear status message instead
-            # of attempting an import that would either fail or pull a stale
-            # cached package.
-            return json.dumps({
-                "success": False,
-                "error": (
-                    "Mistral Voxtral TTS is temporarily disabled. The "
-                    "`mistralai` PyPI package was quarantined on 2026-05-12 "
-                    "after a malicious 2.4.6 release. Switch tts.provider in "
-                    "config.yaml to 'edge', 'elevenlabs', 'openai', 'minimax', "
-                    "'gemini', 'xai', 'neutts', or 'kittentts'. Mistral "
-                    "support will return once PyPI un-quarantines the package."
-                ),
-            }, ensure_ascii=False)
+            try:
+                _import_mistral_client()
+            except ImportError:
+                return json.dumps({
+                    "success": False,
+                    "error": "Mistral provider selected but 'mistralai' package not installed. "
+                             "Run: pip install 'hermes-agent[mistral]'"
+                }, ensure_ascii=False)
+            logger.info("Generating speech with Mistral Voxtral TTS...")
+            _generate_mistral_tts(text, file_str, tts_config)
 
         elif provider == "gemini":
             logger.info("Generating speech with Google Gemini TTS...")
@@ -2206,8 +2219,13 @@ def _resolve_openai_audio_client_config() -> tuple[str, str]:
     managed_gateway = resolve_managed_tool_gateway("openai-audio")
     if managed_gateway is None:
         message = "Neither VOICE_TOOLS_OPENAI_KEY nor OPENAI_API_KEY is set"
-        if managed_nous_tools_enabled():
-            message += ", and the managed OpenAI audio gateway is unavailable"
+        if managed_nous_tools_enabled() or prefers_gateway("tts"):
+            message += (
+                ". "
+                + nous_tool_gateway_unavailable_message(
+                    "managed OpenAI audio for TTS",
+                )
+            )
         raise ValueError(message)
 
     return managed_gateway.nous_user_token, urljoin(
diff --git a/tools/vision_tools.py b/tools/vision_tools.py
index 38d19919488..986f9dab984 100644
--- a/tools/vision_tools.py
+++ b/tools/vision_tools.py
@@ -476,6 +476,36 @@ def _supports_media_in_tool_results(provider: str, model: str) -> bool:
     return False
 
 
+def _should_use_native_vision_fast_path() -> bool:
+    """Whether vision tools should attach the image to the main model directly
+    instead of routing through the auxiliary vision LLM.
+
+    True when image routing resolves to ``native`` AND either the provider is
+    known to accept images inside tool results, or the user explicitly declared
+    the model vision-capable via the ``model.supports_vision`` config override.
+    The override is the escape hatch for custom/local providers that aren't in
+    the static allowlist. Best-effort: any resolution failure returns False so
+    the caller falls back to the legacy aux-LLM path.
+    """
+    try:
+        from agent.auxiliary_client import _read_main_provider, _read_main_model
+        from agent.image_routing import decide_image_input_mode, _lookup_supports_vision
+        from hermes_cli.config import load_config
+
+        provider = _read_main_provider()
+        model = _read_main_model()
+        cfg = load_config()
+        if decide_image_input_mode(provider, model, cfg) != "native":
+            return False
+        return (
+            _supports_media_in_tool_results(provider, model)
+            or _lookup_supports_vision(provider, model, cfg) is True
+        )
+    except Exception as exc:
+        logger.debug("Native vision fast-path check failed: %s", exc)
+        return False
+
+
 def _build_native_vision_tool_result(
     image_url: str,
     question: str,
@@ -1030,28 +1060,15 @@ def _handle_vision_analyze(args: Dict[str, Any], **kw: Any) -> Awaitable[str]:
     image_url = args.get("image_url", "")
     question = args.get("question", "")
 
-    # Fast path: when the active main model supports native vision AND the
-    # provider supports image content inside tool results, short-circuit
-    # the auxiliary LLM and return the image bytes as a multimodal
-    # tool-result envelope. The main model sees the pixels directly on its
-    # next turn — no aux call, no information loss, no extra latency.
-    try:
-        from agent.auxiliary_client import _read_main_provider, _read_main_model
-        from agent.image_routing import decide_image_input_mode
-        from hermes_cli.config import load_config
-
-        _provider = _read_main_provider()
-        _model = _read_main_model()
-        _cfg = load_config()
-        _mode = decide_image_input_mode(_provider, _model, _cfg)
-        if _mode == "native" and _supports_media_in_tool_results(_provider, _model):
-            logger.info(
-                "vision_analyze: native fast path (provider=%s, model=%s)",
-                _provider, _model,
-            )
-            return _vision_analyze_native(image_url, question)
-    except Exception as exc:
-        logger.debug("Native vision fast-path check failed; using aux LLM: %s", exc)
+    # Fast path: when native image routing is in effect for the active main
+    # model (provider accepts images in tool results, or the user set the
+    # model.supports_vision override), short-circuit the auxiliary LLM and
+    # return the image bytes as a multimodal tool-result envelope. The main
+    # model sees the pixels directly on its next turn — no aux call, no
+    # information loss, no extra latency.
+    if _should_use_native_vision_fast_path():
+        logger.info("vision_analyze: native fast path")
+        return _vision_analyze_native(image_url, question)
 
     # Legacy path: aux LLM describes the image and we return its text.
     full_prompt = (
diff --git a/tools/voice_mode.py b/tools/voice_mode.py
index df21890db9e..e98fcef8857 100644
--- a/tools/voice_mode.py
+++ b/tools/voice_mode.py
@@ -97,6 +97,9 @@ def detect_audio_environment() -> dict:
     termux_mic_cmd = _termux_microphone_command()
     termux_app_installed = _termux_api_app_installed()
     termux_capture = bool(termux_mic_cmd and termux_app_installed)
+    has_forwarded_audio = bool(
+        os.environ.get('PULSE_SERVER') or os.environ.get('PIPEWIRE_REMOTE')
+    )
 
     # SSH detection
     if any(os.environ.get(v) for v in ('SSH_CLIENT', 'SSH_TTY', 'SSH_CONNECTION')):
@@ -108,7 +111,7 @@ def detect_audio_environment() -> dict:
     # (issue #21203).  Only block when no forwarding is configured.
     from hermes_constants import is_container
     if is_container():
-        if os.environ.get('PULSE_SERVER') or os.environ.get('PIPEWIRE_REMOTE'):
+        if has_forwarded_audio:
             notices.append("Running inside container (Docker/Podman/LXC) with host audio forwarding")
         else:
             warnings.append(
@@ -143,17 +146,22 @@ def detect_audio_environment() -> dict:
         try:
             devices = sd.query_devices()
             if not devices:
-                if os.environ.get('PULSE_SERVER'):
-                    notices.append("No PortAudio devices detected but PULSE_SERVER is set -- continuing")
+                if has_forwarded_audio:
+                    notices.append(
+                        "No PortAudio devices detected but host audio forwarding is configured -- continuing"
+                    )
                 elif termux_capture:
                     notices.append("No PortAudio devices detected, but Termux:API microphone capture is available")
                 else:
                     warnings.append("No audio input/output devices detected")
         except Exception:
             # In WSL with PulseAudio, device queries can fail even though
-            # recording/playback works fine. Don't block if PULSE_SERVER is set.
-            if os.environ.get('PULSE_SERVER'):
-                notices.append("Audio device query failed but PULSE_SERVER is set -- continuing")
+            # recording/playback works fine. Don't block if host audio
+            # forwarding is configured.
+            if has_forwarded_audio:
+                notices.append(
+                    "Audio device query failed but host audio forwarding is configured -- continuing"
+                )
             elif termux_capture:
                 notices.append("PortAudio device query failed, but Termux:API microphone capture is available")
             else:
@@ -1090,7 +1098,8 @@ def check_voice_requirements() -> Dict[str, Any]:
         details_parts.append("STT provider: OK (OpenAI)")
     else:
         details_parts.append(
-            "STT provider: MISSING (pip install faster-whisper, "
+            "STT provider: MISSING (uv pip install faster-whisper — "
+            "`pip install faster-whisper` also works if pip is on PATH, "
             "or set GROQ_API_KEY / VOICE_TOOLS_OPENAI_KEY)"
         )
 
diff --git a/tools/web_tools.py b/tools/web_tools.py
index a55fe78c41e..509546fd573 100644
--- a/tools/web_tools.py
+++ b/tools/web_tools.py
@@ -10,13 +10,12 @@ for Nous Subscribers only.
 Available tools:
 - web_search_tool: Search the web for information
 - web_extract_tool: Extract content from specific web pages
-- web_crawl_tool: Crawl websites with specific instructions
 
 Backend compatibility:
 - Exa: https://exa.ai (search, extract)
-- Firecrawl: https://docs.firecrawl.dev/introduction (search, extract, crawl; direct or derived firecrawl-gateway.<domain> for Nous Subscribers)
+- Firecrawl: https://docs.firecrawl.dev/introduction (search, extract; direct or derived firecrawl-gateway.<domain> for Nous Subscribers)
 - Parallel: https://docs.parallel.ai (search, extract)
-- Tavily: https://tavily.com (search, extract, crawl)
+- Tavily: https://tavily.com (search, extract)
 
 LLM Processing:
 - Uses OpenRouter API with Gemini 3 Flash Preview for intelligent content extraction
@@ -28,16 +27,13 @@ Debug Mode:
 - Captures all tool calls, results, and compression metrics
 
 Usage:
-    from web_tools import web_search_tool, web_extract_tool, web_crawl_tool
+    from web_tools import web_search_tool, web_extract_tool
     
     # Search the web
     results = web_search_tool("Python machine learning libraries", limit=3)
     
     # Extract content from URLs  
     content = web_extract_tool(["https://example.com"], format="markdown")
-    
-    # Crawl a website
-    crawl_data = web_crawl_tool("example.com", "Find contact information")
 """
 
 import json
@@ -55,21 +51,11 @@ import httpx  # noqa: F401 — kept at module top so tests can patch tools.web_t
 if TYPE_CHECKING:
     from firecrawl import Firecrawl  # noqa: F401 — type hints only
 from plugins.web.firecrawl.provider import (
-    Firecrawl,
-    _FirecrawlProxy,
-    _FIRECRAWL_CLS_CACHE,
-    _extract_scrape_payload,
-    _extract_web_search_results,
+    Firecrawl,  # noqa: F401  # re-exported for tests that mock.patch("tools.web_tools.Firecrawl")
     _firecrawl_backend_help_suffix,
-    _get_direct_firecrawl_config,
-    _get_firecrawl_client,
+    _get_firecrawl_client,  # noqa: F401  # re-exported for tests that `from tools.web_tools import _get_firecrawl_client`
     _get_firecrawl_gateway_url,
-    _has_direct_firecrawl_config,
     _is_tool_gateway_ready,
-    _load_firecrawl_cls,
-    _normalize_result_list,
-    _raise_web_backend_configuration_error,
-    _to_plain_object,
     check_firecrawl_api_key,
 )
 # Tavily helpers re-exported for backward-compat with existing unit tests
@@ -110,9 +96,12 @@ from tools.managed_tool_gateway import (  # noqa: F401 — backward-compat names
     read_nous_access_token as _read_nous_access_token,
     resolve_managed_tool_gateway,
 )
-from tools.tool_backend_helpers import managed_nous_tools_enabled, prefers_gateway  # noqa: F401
+from tools.tool_backend_helpers import (  # noqa: F401
+    managed_nous_tools_enabled,
+    nous_tool_gateway_unavailable_message,
+    prefers_gateway,
+)
 from tools.url_safety import is_safe_url
-from tools.website_policy import check_website_access
 import sys
 
 logger = logging.getLogger(__name__)
@@ -367,7 +356,7 @@ async def process_content_with_llm(
         if content_len > MAX_CONTENT_SIZE:
             size_mb = content_len / 1_000_000
             logger.warning("Content too large (%.1fMB > 2MB limit). Refusing to process.", size_mb)
-            return f"[Content too large to process: {size_mb:.1f}MB. Try using web_crawl with specific extraction instructions, or search for a more focused source.]"
+            return f"[Content too large to process: {size_mb:.1f}MB. Try a more focused source URL.]"
         
         # Skip processing if content is too short
         if content_len < min_length:
@@ -743,6 +732,35 @@ def clean_base64_images(text: str) -> str:
 # dispatchers in this file resolve them via get_active_*_provider().
 
 
+def _ensure_web_plugins_loaded() -> None:
+    """Idempotently trigger plugin discovery so the web registry is populated.
+
+    Every bundled web provider (brave-free, ddgs, searxng, exa, parallel,
+    tavily, firecrawl) registers itself via ``plugins/web/<vendor>/__init__.py``
+    during plugin discovery. Tool dispatch can be reached from contexts that
+    haven't already triggered discovery — subprocess agent runs, delegate
+    children, standalone scripts, certain test paths — and without it the
+    registry is empty and ``get_provider('firecrawl')`` returns ``None`` even
+    when the user has ``web.extract_backend: firecrawl`` configured and
+    ``FIRECRAWL_API_KEY`` set. The symptom is a misleading "No web extract
+    provider configured" error (issue #27580).
+
+    Mirrors :func:`tools.browser_tool._ensure_browser_plugins_loaded` exactly:
+    the underlying discovery call is idempotent and cheap on subsequent
+    invocations.
+    """
+    try:
+        from hermes_cli.plugins import _ensure_plugins_discovered
+
+        _ensure_plugins_discovered()
+    except Exception as exc:  # noqa: BLE001
+        # Warning, not debug: if a plugin import is genuinely broken the
+        # user otherwise hits the misleading "No web extract provider
+        # configured" error this helper is meant to eliminate, with no
+        # clue in normal logs about the real cause.
+        logger.warning("Web plugin discovery failed (non-fatal): %s", exc)
+
+
 def web_search_tool(query: str, limit: int = 5) -> str:
     """
     Search the web for information using available search API backend.
@@ -803,6 +821,7 @@ def web_search_tool(query: str, limit: int = 5) -> str:
         # (brave-free, ddgs, searxng, exa, parallel, tavily, firecrawl)
         # now live as plugins; the dispatcher is just a registry lookup +
         # delegation. Sync only — every provider's search() is sync.
+        _ensure_web_plugins_loaded()
         from agent.web_search_registry import (
             get_active_search_provider,
             get_provider as _wsp_get_provider,
@@ -935,6 +954,7 @@ async def web_extract_tool(
             # detect coroutine functions and await; sync functions run
             # inline (the policy gate, SSRF re-check, etc. live inside the
             # provider itself for the firecrawl per-URL loop).
+            _ensure_web_plugins_loaded()
             from agent.web_search_registry import (
                 get_active_extract_provider,
                 get_provider as _wsp_get_provider,
@@ -1130,239 +1150,6 @@ async def web_extract_tool(
         return tool_error(error_msg)
 
 
-async def web_crawl_tool(
-    url: str, 
-    instructions: str = None, 
-    depth: str = "basic", 
-    use_llm_processing: bool = True,
-    model: Optional[str] = None,
-    min_length: int = DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION
-) -> str:
-    """
-    Crawl a website with specific instructions using available crawling API backend.
-    
-    This function provides a generic interface for web crawling that can work
-    with multiple backends. Currently uses Firecrawl.
-    
-    Args:
-        url (str): The base URL to crawl (can include or exclude https://)
-        instructions (str): Instructions for what to crawl/extract using LLM intelligence (optional)
-        depth (str): Depth of extraction ("basic" or "advanced", default: "basic")
-        use_llm_processing (bool): Whether to process content with LLM for summarization (default: True)
-        model (Optional[str]): The model to use for LLM processing (defaults to current auxiliary backend model)
-        min_length (int): Minimum content length to trigger LLM processing (default: 5000)
-    
-    Returns:
-        str: JSON string containing crawled content. If LLM processing is enabled and successful,
-             the 'content' field will contain the processed markdown summary instead of raw content.
-             Each page is processed individually.
-    
-    Raises:
-        Exception: If crawling fails or API key is not set
-    """
-    debug_call_data = {
-        "parameters": {
-            "url": url,
-            "instructions": instructions,
-            "depth": depth,
-            "use_llm_processing": use_llm_processing,
-            "model": model,
-            "min_length": min_length
-        },
-        "error": None,
-        "pages_crawled": 0,
-        "pages_processed_with_llm": 0,
-        "original_response_size": 0,
-        "final_response_size": 0,
-        "compression_metrics": [],
-        "processing_applied": []
-    }
-    
-    try:
-        effective_model = model or _get_default_summarizer_model()
-        auxiliary_available = check_auxiliary_model()
-        backend = _get_backend()
-
-        # Tavily (and any future plugin advertising supports_crawl=True)
-        # dispatches through agent.web_search_registry. The crawl response
-        # shape — {"results": [{"url", "title", "content", ...}]} — is then
-        # post-processed by the shared LLM-summarization path below.
-        from agent.web_search_registry import (
-            get_active_crawl_provider,
-            get_provider as _wsp_get_provider,
-        )
-
-        crawl_provider = _wsp_get_provider(backend) if backend else None
-        if crawl_provider is not None and not crawl_provider.supports_crawl():
-            # When the configured provider is search-only AND cannot
-            # extract URLs either (brave-free / ddgs / searxng), surface a
-            # typed "search-only" error rather than silently switching to
-            # a different crawl backend. When the provider supports extract
-            # but not crawl (e.g. firecrawl), fall through to the legacy
-            # firecrawl-via-extract path below.
-            if not crawl_provider.supports_extract():
-                return json.dumps(
-                    {
-                        "success": False,
-                        "error": (
-                            f"{crawl_provider.display_name} is a search-only "
-                            "backend and cannot crawl URLs. "
-                            "Set FIRECRAWL_API_KEY for crawling, or use "
-                            "web_search instead."
-                        ),
-                    },
-                    ensure_ascii=False,
-                )
-            crawl_provider = None  # let legacy firecrawl path handle it
-        if crawl_provider is None:
-            crawl_provider = get_active_crawl_provider()
-
-        # Mirror main's upstream availability gate: when the resolved
-        # provider is configured-but-unavailable (e.g. firecrawl without
-        # FIRECRAWL_API_KEY), short-circuit BEFORE we dispatch so the
-        # error envelope matches the legacy top-level shape
-        # ``{"success": False, "error": "..."}`` rather than burying the
-        # configuration message inside a per-page ``results[]`` entry.
-        if crawl_provider is not None and not crawl_provider.is_available():
-            return json.dumps(
-                {
-                    "success": False,
-                    "error": (
-                        "web_crawl requires Firecrawl. Set FIRECRAWL_API_KEY, "
-                        f"FIRECRAWL_API_URL{_firecrawl_backend_help_suffix()}, "
-                        "or use web_search + web_extract instead."
-                    ),
-                },
-                ensure_ascii=False,
-            )
-
-        if crawl_provider is not None:
-            # Ensure URL has protocol
-            if not url.startswith(('http://', 'https://')):
-                url = f'https://{url}'
-
-            # SSRF protection — block private/internal addresses
-            if not is_safe_url(url):
-                return json.dumps({"results": [{"url": url, "title": "", "content": "",
-                    "error": "Blocked: URL targets a private or internal network address"}]}, ensure_ascii=False)
-
-            # Website policy check
-            blocked = check_website_access(url)
-            if blocked:
-                logger.info("Blocked web_crawl for %s by rule %s", blocked["host"], blocked["rule"])
-                return json.dumps({"results": [{"url": url, "title": "", "content": "", "error": blocked["message"],
-                    "blocked_by_policy": {"host": blocked["host"], "rule": blocked["rule"], "source": blocked["source"]}}]}, ensure_ascii=False)
-
-            from tools.interrupt import is_interrupted as _is_int
-            if _is_int():
-                return tool_error("Interrupted", success=False)
-
-            logger.info("Web crawl via %s: %s", crawl_provider.name, url)
-
-            # Async-or-sync dispatch — Tavily's crawl is sync, but a future
-            # async-crawl provider works transparently.
-            import inspect
-            crawl_kwargs = {"depth": depth, "limit": 20}
-            if instructions:
-                crawl_kwargs["instructions"] = instructions
-
-            if inspect.iscoroutinefunction(crawl_provider.crawl):
-                response = await crawl_provider.crawl(url, **crawl_kwargs)
-            else:
-                response = await asyncio.to_thread(
-                    crawl_provider.crawl, url, **crawl_kwargs
-                )
-
-            # Provider returns {"results": [...]} matching what the shared
-            # LLM post-processing below expects.
-            if not isinstance(response, dict):
-                response = {"results": []}
-            response.setdefault("results", [])
-
-            # Fall through to the shared LLM processing and trimming below
-            # (skip the Firecrawl-specific crawl logic)
-            pages_crawled = len(response.get('results', []))
-            logger.info("Crawled %d pages", pages_crawled)
-            debug_call_data["pages_crawled"] = pages_crawled
-            debug_call_data["original_response_size"] = len(json.dumps(response))
-
-            # Process each result with LLM if enabled
-            if use_llm_processing and auxiliary_available:
-                logger.info("Processing crawled content with LLM (parallel)...")
-                debug_call_data["processing_applied"].append("llm_processing")
-
-                async def _process_tavily_crawl(result):
-                    page_url = result.get('url', 'Unknown URL')
-                    title = result.get('title', '')
-                    content = result.get('content', '')
-                    if not content:
-                        return result, None, "no_content"
-                    original_size = len(content)
-                    processed = await process_content_with_llm(content, page_url, title, effective_model, min_length)
-                    if processed:
-                        result['raw_content'] = content
-                        result['content'] = processed
-                        metrics = {"url": page_url, "original_size": original_size, "processed_size": len(processed),
-                                   "compression_ratio": len(processed) / original_size if original_size else 1.0, "model_used": effective_model}
-                        return result, metrics, "processed"
-                    metrics = {"url": page_url, "original_size": original_size, "processed_size": original_size,
-                               "compression_ratio": 1.0, "model_used": None, "reason": "content_too_short"}
-                    return result, metrics, "too_short"
-
-                tasks = [_process_tavily_crawl(r) for r in response.get('results', [])]
-                # Use return_exceptions=True so a single task failure does not
-                # discard all other successfully processed crawl results.
-                processed_results = await asyncio.gather(*tasks, return_exceptions=True)
-                for result_item in processed_results:
-                    if isinstance(result_item, BaseException):
-                        logger.warning("Tavily crawl processing task failed: %s", result_item)
-                        continue
-                    result, metrics, status = result_item
-                    if status == "processed":
-                        debug_call_data["compression_metrics"].append(metrics)
-                        debug_call_data["pages_processed_with_llm"] += 1
-
-            if use_llm_processing and not auxiliary_available:
-                logger.warning("LLM processing requested but no auxiliary model available, returning raw content")
-                debug_call_data["processing_applied"].append("llm_processing_unavailable")
-
-            trimmed_results = [{"url": r.get("url", ""), "title": r.get("title", ""), "content": r.get("content", ""), "error": r.get("error"),
-                **({  "blocked_by_policy": r["blocked_by_policy"]} if "blocked_by_policy" in r else {})} for r in response.get("results", [])]
-            result_json = json.dumps({"results": trimmed_results}, indent=2, ensure_ascii=False)
-            cleaned_result = clean_base64_images(result_json)
-            debug_call_data["final_response_size"] = len(cleaned_result)
-            _debug.log_call("web_crawl_tool", debug_call_data)
-            _debug.save()
-            return cleaned_result
-
-        # No registered provider supports crawl AND no crawl-capable plugin
-        # is available. Surface a typed error pointing the user at the two
-        # crawl-capable providers (Firecrawl + Tavily).
-        return json.dumps(
-            {
-                "success": False,
-                "error": (
-                    "web_crawl has no available backend. "
-                    "Set FIRECRAWL_API_KEY (or FIRECRAWL_API_URL for "
-                    f"self-hosted){_firecrawl_backend_help_suffix()}, "
-                    "or set TAVILY_API_KEY for Tavily. "
-                    "Alternatively use web_search + web_extract instead."
-                ),
-            },
-            ensure_ascii=False,
-        )
-
-    except Exception as e:
-        error_msg = f"Error crawling website: {str(e)}"
-        logger.debug("%s", error_msg)
-        
-        debug_call_data["error"] = error_msg
-        _debug.log_call("web_crawl_tool", debug_call_data)
-        _debug.save()
-        
-        return tool_error(error_msg)
-
-
 # Convenience function to check Firecrawl credentials
 def check_web_api_key() -> bool:
     """Check whether the configured web backend is available."""
@@ -1452,16 +1239,15 @@ if __name__ == "__main__":
         print("🐛 Debug mode disabled (set WEB_TOOLS_DEBUG=true to enable)")
     
     print("\nBasic usage:")
-    print("  from web_tools import web_search_tool, web_extract_tool, web_crawl_tool")
+    print("  from web_tools import web_search_tool, web_extract_tool")
     print("  import asyncio")
     print("")
     print("  # Search (synchronous)")
     print("  results = web_search_tool('Python tutorials')")
     print("")
-    print("  # Extract and crawl (asynchronous)")
+    print("  # Extract (asynchronous)")
     print("  async def main():")
     print("      content = await web_extract_tool(['https://example.com'])")
-    print("      crawl_data = await web_crawl_tool('example.com', 'Find docs')")
     print("  asyncio.run(main())")
     
     if nous_available:
@@ -1470,9 +1256,8 @@ if __name__ == "__main__":
         print("  content = await web_extract_tool(['https://python.org/about/'])")
         print("")
         print("  # Customize processing parameters")
-        print("  crawl_data = await web_crawl_tool(")
-        print("      'docs.python.org',")
-        print("      'Find key concepts',")
+        print("  content = await web_extract_tool(")
+        print("      ['https://docs.python.org'],")
         print("      model='google/gemini-3-flash-preview',")
         print("      min_length=3000")
         print("  )")
diff --git a/tools/website_policy.py b/tools/website_policy.py
index 63fb7571007..c621dcbf3c0 100644
--- a/tools/website_policy.py
+++ b/tools/website_policy.py
@@ -29,7 +29,7 @@ _DEFAULT_WEBSITE_BLOCKLIST = {
 }
 
 # Cache: parsed policy + timestamp.  Avoids re-reading config.yaml on every
-# URL check (a web_crawl with 50 pages would otherwise mean 51 YAML parses).
+# URL check (a multi-URL extract with 50 pages would otherwise mean 51 YAML parses).
 _CACHE_TTL_SECONDS = 30.0
 _cache_lock = threading.Lock()
 _cached_policy: Optional[Dict[str, Any]] = None
diff --git a/tools/x_search_tool.py b/tools/x_search_tool.py
index 70251860736..39ecf2daf33 100644
--- a/tools/x_search_tool.py
+++ b/tools/x_search_tool.py
@@ -44,7 +44,6 @@ from __future__ import annotations
 
 import json
 import logging
-import os
 import time
 from datetime import date, datetime, timezone
 from typing import Any, Dict, List, Optional, Tuple
diff --git a/toolsets.py b/toolsets.py
index bab7677887a..10c5dbb0ca0 100644
--- a/toolsets.py
+++ b/toolsets.py
@@ -215,6 +215,12 @@ TOOLSETS = {
         "tools": ["memory"],
         "includes": []
     },
+
+    "context_engine": {
+        "description": "Runtime tools exposed by the active context engine",
+        "tools": [],
+        "includes": []
+    },
     
     "session_search": {
         "description": "Search and recall past conversations with summarization",
diff --git a/tui_gateway/server.py b/tui_gateway/server.py
index 2d75820fa71..43b50986596 100644
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@@ -1191,7 +1191,7 @@ def _apply_model_switch(sid: str, session: dict, raw_input: str) -> dict:
     from hermes_cli.model_switch import parse_model_flags, switch_model
     from hermes_cli.runtime_provider import resolve_runtime_provider
 
-    model_input, explicit_provider, persist_global = parse_model_flags(raw_input)
+    model_input, explicit_provider, persist_global, _force_refresh = parse_model_flags(raw_input)
     if not model_input:
         raise ValueError("model value required")
 
@@ -3576,7 +3576,6 @@ def _(rid, params: dict) -> dict:
 
 
 def _spawn_trees_root():
-    from pathlib import Path as _P
     from hermes_constants import get_hermes_home
 
     root = get_hermes_home() / "spawn-trees"
diff --git a/ui-tui/src/__tests__/textInputPassThrough.test.ts b/ui-tui/src/__tests__/textInputPassThrough.test.ts
index 5988580f9b9..1fb47779b0f 100644
--- a/ui-tui/src/__tests__/textInputPassThrough.test.ts
+++ b/ui-tui/src/__tests__/textInputPassThrough.test.ts
@@ -1,11 +1,27 @@
 import { describe, expect, it } from 'vitest'
 
-import { shouldPassThroughToGlobalHandler } from '../components/textInput.js'
+import { shouldPassThroughToGlobalHandler, shouldPreserveCtrlJNewline } from '../components/textInput.js'
 import { DEFAULT_VOICE_RECORD_KEY, parseVoiceRecordKey } from '../lib/platform.js'
 
 const key = (overrides: Record<string, unknown> = {}) =>
   ({ ctrl: false, meta: false, ...overrides }) as any
 
+describe('shouldPreserveCtrlJNewline', () => {
+  it('preserves Ctrl+J as newline in Ghostty even when tmux masks TERM/TERM_PROGRAM', () => {
+    expect(
+      shouldPreserveCtrlJNewline({
+        GHOSTTY_RESOURCES_DIR: '/usr/share/ghostty',
+        TERM: 'tmux-256color',
+        TERM_PROGRAM: 'tmux'
+      })
+    ).toBe(true)
+  })
+
+  it('keeps bare local POSIX LF-compatible prompts submitting on Ctrl+J', () => {
+    expect(shouldPreserveCtrlJNewline({ TERM: 'xterm-256color' })).toBe(false)
+  })
+})
+
 describe('shouldPassThroughToGlobalHandler', () => {
   it('passes through the configured voice shortcut while composer is focused', () => {
     expect(
diff --git a/ui-tui/src/components/textInput.tsx b/ui-tui/src/components/textInput.tsx
index 2e117a0a007..564484999f6 100644
--- a/ui-tui/src/components/textInput.tsx
+++ b/ui-tui/src/components/textInput.tsx
@@ -36,6 +36,7 @@ const PRINTABLE = /^[ -~\u00a0-\uffff]+$/
 const BRACKET_PASTE = new RegExp(`${ESC}?\\[20[01]~`, 'g')
 const FRAME_BATCH_MS = 16
 const MULTI_CLICK_MS = 500
+type MinimalEnv = Record<string, string | undefined>
 
 const invert = (s: string) => INV + s + INV_OFF
 const dim = (s: string) => DIM + s + DIM_OFF
@@ -122,6 +123,30 @@ export function applyPrintableInsert(
 
 export const shouldRouteMultiCharInputAsPaste = (text: string): boolean => text.includes('\n')
 
+export function shouldPreserveCtrlJNewline(env: MinimalEnv = process.env): boolean {
+  if (env.WT_SESSION) {
+    return true
+  }
+
+  if (env.SSH_CONNECTION || env.SSH_CLIENT || env.SSH_TTY) {
+    return true
+  }
+
+  if (env.GHOSTTY_RESOURCES_DIR || env.GHOSTTY_BIN_DIR) {
+    return true
+  }
+
+  if ((env.TERM ?? '').toLowerCase() === 'xterm-ghostty') {
+    return true
+  }
+
+  if ((env.TERM_PROGRAM ?? '').toLowerCase() === 'ghostty') {
+    return true
+  }
+
+  return (env.WSL_DISTRO_NAME ?? '').toLowerCase().includes('microsoft')
+}
+
 function prevPos(s: string, p: number) {
   const pos = snapPos(s, p)
   let prev = 0
@@ -943,7 +968,10 @@ export function TextInput({
       if (k.return) {
         flushKeyBurst()
 
-        if (k.shift || k.ctrl || (isMac ? isActionMod(k) : k.meta)) {
+        const sequence = (event.keypress as { sequence?: string }).sequence
+        const preserveBareLineFeed = shouldPreserveCtrlJNewline() && sequence === '\n'
+
+        if (k.shift || k.ctrl || preserveBareLineFeed || (isMac ? isActionMod(k) : k.meta)) {
           commit(ins(vRef.current, curRef.current, '\n'), curRef.current + 1)
         } else {
           cbSubmit.current?.(vRef.current)
diff --git a/uv.lock b/uv.lock
index 2087116a5a0..24205de8627 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1243,6 +1243,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/97/a8/c070e1340636acb38d4e6a7e45c46d168a462b48b9b3257e14ca0e5af79b/environs-14.6.0-py3-none-any.whl", hash = "sha256:f8fb3d6c6a55872b0c6db077a28f5a8c7b8984b7c32029613d44cef95cfc0812", size = 17205, upload-time = "2026-02-20T04:02:07.299Z" },
 ]
 
+[[package]]
+name = "eval-type-backport"
+version = "0.3.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/fb/a3/cafafb4558fd638aadfe4121dc6cefb8d743368c085acb2f521df0f3d9d7/eval_type_backport-0.3.1.tar.gz", hash = "sha256:57e993f7b5b69d271e37482e62f74e76a0276c82490cf8e4f0dffeb6b332d5ed", size = 9445, upload-time = "2025-12-02T11:51:42.987Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cf/22/fdc2e30d43ff853720042fa15baa3e6122722be1a7950a98233ebb55cd71/eval_type_backport-0.3.1-py3-none-any.whl", hash = "sha256:279ab641905e9f11129f56a8a78f493518515b83402b860f6f06dd7c011fdfa8", size = 6063, upload-time = "2025-12-02T11:51:41.665Z" },
+]
+
 [[package]]
 name = "exa-py"
 version = "2.10.2"
@@ -1589,7 +1598,7 @@ wheels = [
 
 [[package]]
 name = "hermes-agent"
-version = "0.14.0"
+version = "0.15.1"
 source = { editable = "." }
 dependencies = [
     { name = "croniter" },
@@ -1629,6 +1638,7 @@ all = [
     { name = "pytest-timeout" },
     { name = "pywinpty", marker = "sys_platform == 'win32'" },
     { name = "ruff" },
+    { name = "setuptools" },
     { name = "simple-term-menu" },
     { name = "ty" },
     { name = "uvicorn", extra = ["standard"] },
@@ -1659,6 +1669,7 @@ dev = [
     { name = "pytest-asyncio" },
     { name = "pytest-timeout" },
     { name = "ruff" },
+    { name = "setuptools" },
     { name = "ty" },
 ]
 dingtalk = [
@@ -1715,6 +1726,9 @@ messaging = [
     { name = "slack-bolt" },
     { name = "slack-sdk" },
 ]
+mistral = [
+    { name = "mistralai" },
+]
 modal = [
     { name = "modal" },
 ]
@@ -1840,6 +1854,7 @@ requires-dist = [
     { name = "mcp", marker = "extra == 'computer-use'", specifier = "==1.26.0" },
     { name = "mcp", marker = "extra == 'dev'", specifier = "==1.26.0" },
     { name = "mcp", marker = "extra == 'mcp'", specifier = "==1.26.0" },
+    { name = "mistralai", marker = "extra == 'mistral'", specifier = "==2.4.8" },
     { name = "modal", marker = "extra == 'modal'", specifier = "==1.3.4" },
     { name = "numpy", marker = "extra == 'voice'", specifier = "==2.4.3" },
     { name = "openai", specifier = "==2.24.0" },
@@ -1864,6 +1879,7 @@ requires-dist = [
     { name = "rich", specifier = "==14.3.3" },
     { name = "ruamel-yaml", specifier = "==0.18.17" },
     { name = "ruff", marker = "extra == 'dev'", specifier = "==0.15.10" },
+    { name = "setuptools", marker = "extra == 'dev'", specifier = "==82.0.1" },
     { name = "simple-term-menu", marker = "extra == 'cli'", specifier = "==1.6.6" },
     { name = "slack-bolt", marker = "extra == 'messaging'", specifier = "==1.27.0" },
     { name = "slack-bolt", marker = "extra == 'slack'", specifier = "==1.27.0" },
@@ -1876,7 +1892,7 @@ requires-dist = [
     { name = "uvicorn", extras = ["standard"], marker = "extra == 'web'", specifier = "==0.41.0" },
     { name = "youtube-transcript-api", marker = "extra == 'youtube'", specifier = "==1.2.4" },
 ]
-provides-extras = ["anthropic", "exa", "firecrawl", "parallel-web", "fal", "edge-tts", "modal", "daytona", "hindsight", "dev", "messaging", "cron", "slack", "matrix", "wecom", "cli", "tts-premium", "voice", "pty", "honcho", "mcp", "homeassistant", "sms", "computer-use", "acp", "bedrock", "azure-identity", "termux", "termux-all", "dingtalk", "feishu", "google", "youtube", "web", "all"]
+provides-extras = ["anthropic", "exa", "firecrawl", "parallel-web", "fal", "edge-tts", "modal", "daytona", "hindsight", "dev", "messaging", "cron", "slack", "matrix", "wecom", "cli", "tts-premium", "voice", "pty", "honcho", "mcp", "homeassistant", "sms", "computer-use", "acp", "mistral", "bedrock", "azure-identity", "termux", "termux-all", "dingtalk", "feishu", "google", "youtube", "web", "all"]
 
 [[package]]
 name = "hf-xet"
@@ -2206,6 +2222,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/14/2f/967ba146e6d58cf6a652da73885f52fc68001525b4197effc174321d70b4/jmespath-1.1.0-py3-none-any.whl", hash = "sha256:a5663118de4908c91729bea0acadca56526eb2698e83de10cd116ae0f4e97c64", size = 20419, upload-time = "2026-01-22T16:35:24.919Z" },
 ]
 
+[[package]]
+name = "jsonpath-python"
+version = "1.1.6"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/98/18/4ca8742534a5993ff383f7602e325ce2d5d7cc93d72ac5e1cdedbea8a458/jsonpath_python-1.1.6.tar.gz", hash = "sha256:dded9932b4ec41fb8726e09c83afa4e6be618f938c2db287cc2a81723c639671", size = 88178, upload-time = "2026-05-07T01:26:34.482Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/55/8a/1270a6803bd821cbfcdda387eaa13cb41a7b1f7b9bd145979b3bfb9d6cb7/jsonpath_python-1.1.6-py3-none-any.whl", hash = "sha256:a1c50afd8d3fbbaf47a4873bc890dcb3c15da96f5c020327977d844d8731a2d4", size = 14453, upload-time = "2026-05-07T01:26:33.306Z" },
+]
+
 [[package]]
 name = "jsonschema"
 version = "4.26.0"
@@ -2408,6 +2433,25 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
 ]
 
+[[package]]
+name = "mistralai"
+version = "2.4.8"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "eval-type-backport" },
+    { name = "httpx" },
+    { name = "jsonpath-python" },
+    { name = "opentelemetry-api" },
+    { name = "opentelemetry-semantic-conventions" },
+    { name = "pydantic" },
+    { name = "python-dateutil" },
+    { name = "typing-inspection" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/55/1c/04119828a3da3be8c79efbe59035a621ae22af873c1ee5a4200355025aa6/mistralai-2.4.8.tar.gz", hash = "sha256:4f27b9b7dfd564ae111d3d9992d2a8ad1454aaf3e7675554c686aa3bb89617e2", size = 464443, upload-time = "2026-05-28T10:00:45.72Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/74/2a/d9952a97596ff9570ff7f486084ebfc5637b1bcf62084b97c0f8415713fc/mistralai-2.4.8-py3-none-any.whl", hash = "sha256:edc445c8b5edf332d45db6c708cd1e4d3f62e6eba5d2e8bf3969bdc5117f6472", size = 1110598, upload-time = "2026-05-28T10:00:43.939Z" },
+]
+
 [[package]]
 name = "modal"
 version = "1.3.4"
diff --git a/apps/dashboard/README.md b/web/README.md
similarity index 86%
rename from apps/dashboard/README.md
rename to web/README.md
index bf06ea677d0..c9581635b2f 100644
--- a/apps/dashboard/README.md
+++ b/web/README.md
@@ -10,34 +10,22 @@ Browser-based dashboard for managing Hermes Agent configuration, API keys, and m
 
 ## Development
 
-Install workspace dependencies from the repo root first:
-
 ```bash
+# Start the backend API server
+cd ../
+python -m hermes_cli.main web --no-open
+
+# In another terminal, start the Vite dev server (with HMR + API proxy)
+cd web/
 npm install
-```
-
-Start the backend API server from the repo root:
-
-```bash
-hermes dashboard --tui --no-open
-```
-
-`--tui` exposes the in-browser Chat tab through `/api/pty`. Omit it if you only need the config/session dashboard.
-
-In another terminal, start the Vite dev server:
-
-```bash
-cd apps/dashboard
 npm run dev
 ```
 
-The Vite dev server proxies `/api`, `/api/pty`, and `/dashboard-plugins` to `http://127.0.0.1:9119` (the FastAPI backend). It also fetches the backend's `index.html` on each dev page load so the ephemeral session token stays in sync.
+Open the **Vite URL** printed in the terminal (usually `http://localhost:5173`). That is the live-reload UI.
 
-If the `hermes` entry point is not installed, use:
+`hermes dashboard` on port 9119 serves the **built** bundle from `hermes_cli/web_dist/`, not the Vite dev server — changes in `web/src/` will not appear there until you run `npm run build` and restart the dashboard (or use `web --no-open` + Vite as above).
 
-```bash
-python -m hermes_cli.main dashboard --tui --no-open
-```
+The Vite dev server proxies `/api` requests to `http://127.0.0.1:9119` (the FastAPI backend).
 
 ## Build
 
@@ -45,7 +33,7 @@ python -m hermes_cli.main dashboard --tui --no-open
 npm run build
 ```
 
-This outputs to `../../hermes_cli/web_dist/`, which the FastAPI server serves as a static SPA. The built assets are included in the Python package via `pyproject.toml` package-data.
+This outputs to `../hermes_cli/web_dist/`, which the FastAPI server serves as a static SPA. The built assets are included in the Python package via `pyproject.toml` package-data.
 
 ## Structure
 
@@ -113,3 +101,4 @@ Typography is **opt-in per surface**, not global on layout shells — the app sh
 - Prefer **semantic tokens** (`text-text-*`, `bg-card`, `border-border`, `text-foreground`, `text-destructive`, `text-success`, `text-warning`) over raw layer references (`text-midground`, `text-foreground`).
 - `text-muted-foreground` is now wired to `--color-text-secondary`, so existing call sites stay correct, but new code should prefer the semantic name.
 - When you genuinely need a non-token color (icon de-emphasis on a chart, terminal foreground via inline style), keep alpha at `≥ 0.7` for any text.
+
diff --git a/apps/dashboard/eslint.config.js b/web/eslint.config.js
similarity index 100%
rename from apps/dashboard/eslint.config.js
rename to web/eslint.config.js
diff --git a/apps/dashboard/index.html b/web/index.html
similarity index 100%
rename from apps/dashboard/index.html
rename to web/index.html
diff --git a/apps/dashboard/package-lock.json b/web/package-lock.json
similarity index 59%
rename from apps/dashboard/package-lock.json
rename to web/package-lock.json
index 471df8bdbdc..7e9739d928e 100644
--- a/apps/dashboard/package-lock.json
+++ b/web/package-lock.json
@@ -1,15 +1,14 @@
 {
-  "name": "dashboard",
+  "name": "web",
   "version": "0.0.0",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
-      "name": "dashboard",
+      "name": "web",
       "version": "0.0.0",
       "dependencies": {
-        "@hermes/shared": "file:../shared",
-        "@nous-research/ui": "0.16.0",
+        "@nous-research/ui": "0.18.2",
         "@observablehq/plot": "^0.6.17",
         "@react-three/fiber": "^9.6.0",
         "@tailwindcss/vite": "^4.2.1",
@@ -47,13 +46,6 @@
         "vite": "^7.3.1"
       }
     },
-    "../shared": {
-      "name": "@hermes/shared",
-      "version": "0.0.0",
-      "devDependencies": {
-        "typescript": "^6.0.3"
-      }
-    },
     "node_modules/@babel/code-frame": {
       "version": "7.29.0",
       "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.29.0.tgz",
@@ -70,9 +62,9 @@
       }
     },
     "node_modules/@babel/compat-data": {
-      "version": "7.29.3",
-      "resolved": "https://registry.npmjs.org/@babel/compat-data/-/compat-data-7.29.3.tgz",
-      "integrity": "sha512-LIVqM46zQWZhj17qA8wb4nW/ixr2y1Nw+r1etiAWgRM6U1IqP+LNhL1yg440jYZR72jCWcWbLWzIosH+uP1fqg==",
+      "version": "7.29.0",
+      "resolved": "https://registry.npmjs.org/@babel/compat-data/-/compat-data-7.29.0.tgz",
+      "integrity": "sha512-T1NCJqT/j9+cn8fvkt7jtwbLBfLC/1y1c7NtCeXFRgzGTsafi68MRv8yzkYSapBnFA6L3U2VSc02ciDzoAJhJg==",
       "dev": true,
       "license": "MIT",
       "engines": {
@@ -85,6 +77,7 @@
       "integrity": "sha512-CGOfOJqWjg2qW/Mb6zNsDm+u5vFQ8DxXfbM09z69p5Z6+mE1ikP2jUXw+j42Pf1XTYED2Rni5f95npYeuwMDQA==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@babel/code-frame": "^7.29.0",
         "@babel/generator": "^7.29.0",
@@ -241,9 +234,9 @@
       }
     },
     "node_modules/@babel/parser": {
-      "version": "7.29.3",
-      "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.29.3.tgz",
-      "integrity": "sha512-b3ctpQwp+PROvU/cttc4OYl4MzfJUWy6FZg+PMXfzmt/+39iHVF0sDfqay8TQM3JA2EUOyKcFZt75jWriQijsA==",
+      "version": "7.29.2",
+      "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.29.2.tgz",
+      "integrity": "sha512-4GgRzy/+fsBa72/RZVJmGKPmZu9Byn8o4MoLpmNe1m8ZfYnz5emHLQz3U4gLud6Zwl0RZIcgiLD7Uq7ySFuDLA==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
@@ -956,10 +949,6 @@
       "integrity": "sha512-RiB/yIh78pcIxl6lLMG0CgBXAZ2Y0eVHqMPYugu+9U0AeT6YBeiJpf7lbdJNIugFP5SIjwNRgo4DhR1Qxi26Gg==",
       "license": "MIT"
     },
-    "node_modules/@hermes/shared": {
-      "resolved": "../shared",
-      "link": true
-    },
     "node_modules/@humanfs/core": {
       "version": "0.19.2",
       "resolved": "https://registry.npmjs.org/@humanfs/core/-/core-0.19.2.tgz",
@@ -1091,16 +1080,16 @@
       }
     },
     "node_modules/@nous-research/ui": {
-      "version": "0.16.0",
-      "resolved": "https://registry.npmjs.org/@nous-research/ui/-/ui-0.16.0.tgz",
-      "integrity": "sha512-JvSwf9vBOCEEGDSOYIRn/F/JJSBDh9DvGU3s3OFbX6K1otnSK7s47cZdgvfBoEPmeKFom2fWQDDqfzLV+eR7Qg==",
+      "version": "0.18.2",
+      "resolved": "https://registry.npmjs.org/@nous-research/ui/-/ui-0.18.2.tgz",
+      "integrity": "sha512-xe//1PjCapafXu5onqnJW50MhAK41LXuHw2KvfHDLiI3TBeqtX4QEm4mTrWZaZ2PRqM6koY/o19fdgisB67LGA==",
       "license": "MIT",
       "dependencies": {
         "@nanostores/react": "^1.1.0",
-        "@radix-ui/react-checkbox": "^1.3.3",
         "class-variance-authority": "^0.7.1",
         "clsx": "^2.1.1",
         "nanostores": "^1.3.0",
+        "radix-ui": "^1.4.0",
         "sanitize-html": "^2.17.4",
         "tailwind-merge": "^3.6.0",
         "tw-animate-css": "^1.4.0",
@@ -1139,6 +1128,7 @@
       "resolved": "https://registry.npmjs.org/@observablehq/plot/-/plot-0.6.17.tgz",
       "integrity": "sha512-/qaXP/7mc4MUS0s4cPPFASDRjtsWp85/TbfsciqDgU1HwYixbSbbytNuInD8AcTYC3xaxACgVX06agdfQy9W+g==",
       "license": "ISC",
+      "peer": true,
       "dependencies": {
         "d3": "^7.9.0",
         "interval-tree-1d": "^1.0.0",
@@ -1148,12 +1138,182 @@
         "node": ">=12"
       }
     },
+    "node_modules/@radix-ui/number": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@radix-ui/number/-/number-1.1.1.tgz",
+      "integrity": "sha512-MkKCwxlXTgz6CFoJx3pCwn07GKp36+aZyu/u2Ln2VrA5DcdyCZkASEDBTd8x5whTQQL5CiYf4prXKLcgQdv29g==",
+      "license": "MIT"
+    },
     "node_modules/@radix-ui/primitive": {
       "version": "1.1.3",
       "resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.3.tgz",
       "integrity": "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg==",
       "license": "MIT"
     },
+    "node_modules/@radix-ui/react-accessible-icon": {
+      "version": "1.1.7",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-accessible-icon/-/react-accessible-icon-1.1.7.tgz",
+      "integrity": "sha512-XM+E4WXl0OqUJFovy6GjmxxFyx9opfCAIUku4dlKRd5YEPqt4kALOkQOp0Of6reHuUkJuiPBEc5k0o4z4lTC8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-visually-hidden": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-accordion": {
+      "version": "1.2.12",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-accordion/-/react-accordion-1.2.12.tgz",
+      "integrity": "sha512-T4nygeh9YE9dLRPhAHSeOZi7HBXo+0kYIPJXayZfvWOWA0+n3dESrZbjfDPUABkUNym6Hd+f2IR113To8D2GPA==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/primitive": "1.1.3",
+        "@radix-ui/react-collapsible": "1.1.12",
+        "@radix-ui/react-collection": "1.1.7",
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-direction": "1.1.1",
+        "@radix-ui/react-id": "1.1.1",
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-use-controllable-state": "1.2.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-accordion/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-accordion/node_modules/@radix-ui/react-slot": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-alert-dialog": {
+      "version": "1.1.15",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-alert-dialog/-/react-alert-dialog-1.1.15.tgz",
+      "integrity": "sha512-oTVLkEw5GpdRe29BqJ0LSDFWI3qu0vR1M0mUkOQWDIUnY/QIkLpgDMWuKxP94c2NAC2LGcgVhG1ImF3jkZ5wXw==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/primitive": "1.1.3",
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-dialog": "1.1.15",
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-alert-dialog/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-alert-dialog/node_modules/@radix-ui/react-slot": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
     "node_modules/@radix-ui/react-arrow": {
       "version": "1.1.7",
       "resolved": "https://registry.npmjs.org/@radix-ui/react-arrow/-/react-arrow-1.1.7.tgz",
@@ -1177,6 +1337,179 @@
         }
       }
     },
+    "node_modules/@radix-ui/react-arrow/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-arrow/node_modules/@radix-ui/react-slot": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-aspect-ratio": {
+      "version": "1.1.7",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-aspect-ratio/-/react-aspect-ratio-1.1.7.tgz",
+      "integrity": "sha512-Yq6lvO9HQyPwev1onK1daHCHqXVLzPhSVjmsNjCa2Zcxy2f7uJD2itDtxknv6FzAKCwD1qQkeVDmX/cev13n/g==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-primitive": "2.1.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-aspect-ratio/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-aspect-ratio/node_modules/@radix-ui/react-slot": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-avatar": {
+      "version": "1.1.10",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-avatar/-/react-avatar-1.1.10.tgz",
+      "integrity": "sha512-V8piFfWapM5OmNCXTzVQY+E1rDa53zY+MQ4Y7356v4fFz6vqCyUtIz2rUD44ZEdwg78/jKmMJHj07+C/Z/rcog==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-use-callback-ref": "1.1.1",
+        "@radix-ui/react-use-is-hydrated": "0.1.0",
+        "@radix-ui/react-use-layout-effect": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-avatar/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-avatar/node_modules/@radix-ui/react-slot": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
     "node_modules/@radix-ui/react-checkbox": {
       "version": "1.3.3",
       "resolved": "https://registry.npmjs.org/@radix-ui/react-checkbox/-/react-checkbox-1.3.3.tgz",
@@ -1207,6 +1540,185 @@
         }
       }
     },
+    "node_modules/@radix-ui/react-checkbox/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-checkbox/node_modules/@radix-ui/react-slot": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-collapsible": {
+      "version": "1.1.12",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-collapsible/-/react-collapsible-1.1.12.tgz",
+      "integrity": "sha512-Uu+mSh4agx2ib1uIGPP4/CKNULyajb3p92LsVXmH2EHVMTfZWpll88XJ0j4W0z3f8NK1eYl1+Mf/szHPmcHzyA==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/primitive": "1.1.3",
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-id": "1.1.1",
+        "@radix-ui/react-presence": "1.1.5",
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-use-controllable-state": "1.2.2",
+        "@radix-ui/react-use-layout-effect": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-collapsible/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-collapsible/node_modules/@radix-ui/react-slot": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-collection": {
+      "version": "1.1.7",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-collection/-/react-collection-1.1.7.tgz",
+      "integrity": "sha512-Fh9rGN0MoI4ZFUNyfFVNU4y9LUz93u9/0K+yLgA2bwRojxM8JU1DyvvMBabnZPBgMWREAJvU2jjVzq+LrFUglw==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-collection/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-collection/node_modules/@radix-ui/react-slot": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
     "node_modules/@radix-ui/react-compose-refs": {
       "version": "1.1.2",
       "resolved": "https://registry.npmjs.org/@radix-ui/react-compose-refs/-/react-compose-refs-1.1.2.tgz",
@@ -1237,6 +1749,191 @@
         }
       }
     },
+    "node_modules/@radix-ui/react-context-menu": {
+      "version": "2.2.16",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-context-menu/-/react-context-menu-2.2.16.tgz",
+      "integrity": "sha512-O8morBEW+HsVG28gYDZPTrT9UUovQUlJue5YO836tiTJhuIWBm/zQHc7j388sHWtdH/xUZurK9olD2+pcqx5ww==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/primitive": "1.1.3",
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-menu": "2.1.16",
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-use-callback-ref": "1.1.1",
+        "@radix-ui/react-use-controllable-state": "1.2.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-context-menu/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-context-menu/node_modules/@radix-ui/react-slot": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-dialog": {
+      "version": "1.1.15",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-dialog/-/react-dialog-1.1.15.tgz",
+      "integrity": "sha512-TCglVRtzlffRNxRMEyR36DGBLJpeusFcgMVD9PZEzAKnUs1lKCgX5u9BmC2Yg+LL9MgZDugFFs1Vl+Jp4t/PGw==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/primitive": "1.1.3",
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-dismissable-layer": "1.1.11",
+        "@radix-ui/react-focus-guards": "1.1.3",
+        "@radix-ui/react-focus-scope": "1.1.7",
+        "@radix-ui/react-id": "1.1.1",
+        "@radix-ui/react-portal": "1.1.9",
+        "@radix-ui/react-presence": "1.1.5",
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-slot": "1.2.3",
+        "@radix-ui/react-use-controllable-state": "1.2.2",
+        "aria-hidden": "^1.2.4",
+        "react-remove-scroll": "^2.6.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-dialog/node_modules/@radix-ui/react-portal": {
+      "version": "1.1.9",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.9.tgz",
+      "integrity": "sha512-bpIxvq03if6UNwXZ+HTK71JLh4APvnXntDc6XOX8UVq4XQOVl7lwok0AvIl+b8zgCw3fSaVTZMpAPPagXbKmHQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-use-layout-effect": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-dialog/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-dialog/node_modules/@radix-ui/react-slot": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-direction": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-direction/-/react-direction-1.1.1.tgz",
+      "integrity": "sha512-1UEWRX6jnOA2y4H5WczZ44gOOjTEmlqv1uNW4GAJEO5+bauCBhv8snY65Iw5/VOS/ghKN9gr2KjnLKxrsvoMVw==",
+      "license": "MIT",
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
     "node_modules/@radix-ui/react-dismissable-layer": {
       "version": "1.1.11",
       "resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.1.11.tgz",
@@ -1264,6 +1961,363 @@
         }
       }
     },
+    "node_modules/@radix-ui/react-dismissable-layer/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-dismissable-layer/node_modules/@radix-ui/react-slot": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-dropdown-menu": {
+      "version": "2.1.16",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-dropdown-menu/-/react-dropdown-menu-2.1.16.tgz",
+      "integrity": "sha512-1PLGQEynI/3OX/ftV54COn+3Sud/Mn8vALg2rWnBLnRaGtJDduNW/22XjlGgPdpcIbiQxjKtb7BkcjP00nqfJw==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/primitive": "1.1.3",
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-id": "1.1.1",
+        "@radix-ui/react-menu": "2.1.16",
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-use-controllable-state": "1.2.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-dropdown-menu/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-dropdown-menu/node_modules/@radix-ui/react-slot": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-focus-guards": {
+      "version": "1.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-guards/-/react-focus-guards-1.1.3.tgz",
+      "integrity": "sha512-0rFg/Rj2Q62NCm62jZw0QX7a3sz6QCQU0LpZdNrJX8byRGaGVTqbrW9jAoIAHyMQqsNpeZ81YgSizOt5WXq0Pw==",
+      "license": "MIT",
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-focus-scope": {
+      "version": "1.1.7",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-scope/-/react-focus-scope-1.1.7.tgz",
+      "integrity": "sha512-t2ODlkXBQyn7jkl6TNaw/MtVEVvIGelJDCG41Okq/KwUsJBwQ4XVZsHAVUkK4mBv3ewiAS3PGuUWuY2BoK4ZUw==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-use-callback-ref": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-focus-scope/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-focus-scope/node_modules/@radix-ui/react-slot": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-form": {
+      "version": "0.1.8",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-form/-/react-form-0.1.8.tgz",
+      "integrity": "sha512-QM70k4Zwjttifr5a4sZFts9fn8FzHYvQ5PiB19O2HsYibaHSVt9fH9rzB0XZo/YcM+b7t/p7lYCT/F5eOeF5yQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/primitive": "1.1.3",
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-id": "1.1.1",
+        "@radix-ui/react-label": "2.1.7",
+        "@radix-ui/react-primitive": "2.1.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-form/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-form/node_modules/@radix-ui/react-slot": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-hover-card": {
+      "version": "1.1.15",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-hover-card/-/react-hover-card-1.1.15.tgz",
+      "integrity": "sha512-qgTkjNT1CfKMoP0rcasmlH2r1DAiYicWsDsufxl940sT2wHNEWWv6FMWIQXWhVdmC1d/HYfbhQx60KYyAtKxjg==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/primitive": "1.1.3",
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-dismissable-layer": "1.1.11",
+        "@radix-ui/react-popper": "1.2.8",
+        "@radix-ui/react-portal": "1.1.9",
+        "@radix-ui/react-presence": "1.1.5",
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-use-controllable-state": "1.2.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-hover-card/node_modules/@radix-ui/react-portal": {
+      "version": "1.1.9",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.9.tgz",
+      "integrity": "sha512-bpIxvq03if6UNwXZ+HTK71JLh4APvnXntDc6XOX8UVq4XQOVl7lwok0AvIl+b8zgCw3fSaVTZMpAPPagXbKmHQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-use-layout-effect": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-hover-card/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-hover-card/node_modules/@radix-ui/react-slot": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
     "node_modules/@radix-ui/react-id": {
       "version": "1.1.1",
       "resolved": "https://registry.npmjs.org/@radix-ui/react-id/-/react-id-1.1.1.tgz",
@@ -1282,6 +2336,573 @@
         }
       }
     },
+    "node_modules/@radix-ui/react-label": {
+      "version": "2.1.7",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-label/-/react-label-2.1.7.tgz",
+      "integrity": "sha512-YT1GqPSL8kJn20djelMX7/cTRp/Y9w5IZHvfxQTVHrOqa2yMl7i/UfMqKRU5V7mEyKTrUVgJXhNQPVCG8PBLoQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-primitive": "2.1.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-label/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-label/node_modules/@radix-ui/react-slot": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-menu": {
+      "version": "2.1.16",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-menu/-/react-menu-2.1.16.tgz",
+      "integrity": "sha512-72F2T+PLlphrqLcAotYPp0uJMr5SjP5SL01wfEspJbru5Zs5vQaSHb4VB3ZMJPimgHHCHG7gMOeOB9H3Hdmtxg==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/primitive": "1.1.3",
+        "@radix-ui/react-collection": "1.1.7",
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-direction": "1.1.1",
+        "@radix-ui/react-dismissable-layer": "1.1.11",
+        "@radix-ui/react-focus-guards": "1.1.3",
+        "@radix-ui/react-focus-scope": "1.1.7",
+        "@radix-ui/react-id": "1.1.1",
+        "@radix-ui/react-popper": "1.2.8",
+        "@radix-ui/react-portal": "1.1.9",
+        "@radix-ui/react-presence": "1.1.5",
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-roving-focus": "1.1.11",
+        "@radix-ui/react-slot": "1.2.3",
+        "@radix-ui/react-use-callback-ref": "1.1.1",
+        "aria-hidden": "^1.2.4",
+        "react-remove-scroll": "^2.6.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-menu/node_modules/@radix-ui/react-portal": {
+      "version": "1.1.9",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.9.tgz",
+      "integrity": "sha512-bpIxvq03if6UNwXZ+HTK71JLh4APvnXntDc6XOX8UVq4XQOVl7lwok0AvIl+b8zgCw3fSaVTZMpAPPagXbKmHQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-use-layout-effect": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-menu/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-menu/node_modules/@radix-ui/react-slot": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-menubar": {
+      "version": "1.1.16",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-menubar/-/react-menubar-1.1.16.tgz",
+      "integrity": "sha512-EB1FktTz5xRRi2Er974AUQZWg2yVBb1yjip38/lgwtCVRd3a+maUoGHN/xs9Yv8SY8QwbSEb+YrxGadVWbEutA==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/primitive": "1.1.3",
+        "@radix-ui/react-collection": "1.1.7",
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-direction": "1.1.1",
+        "@radix-ui/react-id": "1.1.1",
+        "@radix-ui/react-menu": "2.1.16",
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-roving-focus": "1.1.11",
+        "@radix-ui/react-use-controllable-state": "1.2.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-menubar/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-menubar/node_modules/@radix-ui/react-slot": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-navigation-menu": {
+      "version": "1.2.14",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-navigation-menu/-/react-navigation-menu-1.2.14.tgz",
+      "integrity": "sha512-YB9mTFQvCOAQMHU+C/jVl96WmuWeltyUEpRJJky51huhds5W2FQr1J8D/16sQlf0ozxkPK8uF3niQMdUwZPv5w==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/primitive": "1.1.3",
+        "@radix-ui/react-collection": "1.1.7",
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-direction": "1.1.1",
+        "@radix-ui/react-dismissable-layer": "1.1.11",
+        "@radix-ui/react-id": "1.1.1",
+        "@radix-ui/react-presence": "1.1.5",
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-use-callback-ref": "1.1.1",
+        "@radix-ui/react-use-controllable-state": "1.2.2",
+        "@radix-ui/react-use-layout-effect": "1.1.1",
+        "@radix-ui/react-use-previous": "1.1.1",
+        "@radix-ui/react-visually-hidden": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-navigation-menu/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-navigation-menu/node_modules/@radix-ui/react-slot": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-one-time-password-field": {
+      "version": "0.1.8",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-one-time-password-field/-/react-one-time-password-field-0.1.8.tgz",
+      "integrity": "sha512-ycS4rbwURavDPVjCb5iS3aG4lURFDILi6sKI/WITUMZ13gMmn/xGjpLoqBAalhJaDk8I3UbCM5GzKHrnzwHbvg==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/number": "1.1.1",
+        "@radix-ui/primitive": "1.1.3",
+        "@radix-ui/react-collection": "1.1.7",
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-direction": "1.1.1",
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-roving-focus": "1.1.11",
+        "@radix-ui/react-use-controllable-state": "1.2.2",
+        "@radix-ui/react-use-effect-event": "0.0.2",
+        "@radix-ui/react-use-is-hydrated": "0.1.0",
+        "@radix-ui/react-use-layout-effect": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-one-time-password-field/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-one-time-password-field/node_modules/@radix-ui/react-slot": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-password-toggle-field": {
+      "version": "0.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-password-toggle-field/-/react-password-toggle-field-0.1.3.tgz",
+      "integrity": "sha512-/UuCrDBWravcaMix4TdT+qlNdVwOM1Nck9kWx/vafXsdfj1ChfhOdfi3cy9SGBpWgTXwYCuboT/oYpJy3clqfw==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/primitive": "1.1.3",
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-id": "1.1.1",
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-use-controllable-state": "1.2.2",
+        "@radix-ui/react-use-effect-event": "0.0.2",
+        "@radix-ui/react-use-is-hydrated": "0.1.0"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-password-toggle-field/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-password-toggle-field/node_modules/@radix-ui/react-slot": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-popover": {
+      "version": "1.1.15",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-popover/-/react-popover-1.1.15.tgz",
+      "integrity": "sha512-kr0X2+6Yy/vJzLYJUPCZEc8SfQcf+1COFoAqauJm74umQhta9M7lNJHP7QQS3vkvcGLQUbWpMzwrXYwrYztHKA==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/primitive": "1.1.3",
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-dismissable-layer": "1.1.11",
+        "@radix-ui/react-focus-guards": "1.1.3",
+        "@radix-ui/react-focus-scope": "1.1.7",
+        "@radix-ui/react-id": "1.1.1",
+        "@radix-ui/react-popper": "1.2.8",
+        "@radix-ui/react-portal": "1.1.9",
+        "@radix-ui/react-presence": "1.1.5",
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-slot": "1.2.3",
+        "@radix-ui/react-use-controllable-state": "1.2.2",
+        "aria-hidden": "^1.2.4",
+        "react-remove-scroll": "^2.6.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-popover/node_modules/@radix-ui/react-portal": {
+      "version": "1.1.9",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.9.tgz",
+      "integrity": "sha512-bpIxvq03if6UNwXZ+HTK71JLh4APvnXntDc6XOX8UVq4XQOVl7lwok0AvIl+b8zgCw3fSaVTZMpAPPagXbKmHQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-use-layout-effect": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-popover/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-popover/node_modules/@radix-ui/react-slot": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
     "node_modules/@radix-ui/react-popper": {
       "version": "1.2.8",
       "resolved": "https://registry.npmjs.org/@radix-ui/react-popper/-/react-popper-1.2.8.tgz",
@@ -1314,6 +2935,47 @@
         }
       }
     },
+    "node_modules/@radix-ui/react-popper/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-popper/node_modules/@radix-ui/react-slot": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
     "node_modules/@radix-ui/react-portal": {
       "version": "1.1.10",
       "resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.10.tgz",
@@ -1338,47 +3000,6 @@
         }
       }
     },
-    "node_modules/@radix-ui/react-portal/node_modules/@radix-ui/react-primitive": {
-      "version": "2.1.4",
-      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.4.tgz",
-      "integrity": "sha512-9hQc4+GNVtJAIEPEqlYqW5RiYdrr8ea5XQ0ZOnD6fgru+83kqT15mq2OCcbe8KnjRZl5vF3ks69AKz3kh1jrhg==",
-      "license": "MIT",
-      "dependencies": {
-        "@radix-ui/react-slot": "1.2.4"
-      },
-      "peerDependencies": {
-        "@types/react": "*",
-        "@types/react-dom": "*",
-        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
-        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
-      },
-      "peerDependenciesMeta": {
-        "@types/react": {
-          "optional": true
-        },
-        "@types/react-dom": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/@radix-ui/react-portal/node_modules/@radix-ui/react-slot": {
-      "version": "1.2.4",
-      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.4.tgz",
-      "integrity": "sha512-Jl+bCv8HxKnlTLVrcDE8zTMJ09R9/ukw4qBs/oZClOfoQk/cOTbDn+NceXfV7j09YPVQUryJPHurafcSg6EVKA==",
-      "license": "MIT",
-      "dependencies": {
-        "@radix-ui/react-compose-refs": "1.1.2"
-      },
-      "peerDependencies": {
-        "@types/react": "*",
-        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
-      },
-      "peerDependenciesMeta": {
-        "@types/react": {
-          "optional": true
-        }
-      }
-    },
     "node_modules/@radix-ui/react-presence": {
       "version": "1.1.5",
       "resolved": "https://registry.npmjs.org/@radix-ui/react-presence/-/react-presence-1.1.5.tgz",
@@ -1404,6 +3025,53 @@
       }
     },
     "node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.4",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.4.tgz",
+      "integrity": "sha512-9hQc4+GNVtJAIEPEqlYqW5RiYdrr8ea5XQ0ZOnD6fgru+83kqT15mq2OCcbe8KnjRZl5vF3ks69AKz3kh1jrhg==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.4"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-progress": {
+      "version": "1.1.7",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-progress/-/react-progress-1.1.7.tgz",
+      "integrity": "sha512-vPdg/tF6YC/ynuBIJlk1mm7Le0VgW6ub6J2UWnTQ7/D23KXcPI1qy+0vBkgKgd38RCMJavBXpB83HPNFMTb0Fg==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-primitive": "2.1.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-progress/node_modules/@radix-ui/react-primitive": {
       "version": "2.1.3",
       "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
       "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
@@ -1426,7 +3094,934 @@
         }
       }
     },
+    "node_modules/@radix-ui/react-progress/node_modules/@radix-ui/react-slot": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-radio-group": {
+      "version": "1.3.8",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-radio-group/-/react-radio-group-1.3.8.tgz",
+      "integrity": "sha512-VBKYIYImA5zsxACdisNQ3BjCBfmbGH3kQlnFVqlWU4tXwjy7cGX8ta80BcrO+WJXIn5iBylEH3K6ZTlee//lgQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/primitive": "1.1.3",
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-direction": "1.1.1",
+        "@radix-ui/react-presence": "1.1.5",
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-roving-focus": "1.1.11",
+        "@radix-ui/react-use-controllable-state": "1.2.2",
+        "@radix-ui/react-use-previous": "1.1.1",
+        "@radix-ui/react-use-size": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-radio-group/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-radio-group/node_modules/@radix-ui/react-slot": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-roving-focus": {
+      "version": "1.1.11",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-roving-focus/-/react-roving-focus-1.1.11.tgz",
+      "integrity": "sha512-7A6S9jSgm/S+7MdtNDSb+IU859vQqJ/QAtcYQcfFC6W8RS4IxIZDldLR0xqCFZ6DCyrQLjLPsxtTNch5jVA4lA==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/primitive": "1.1.3",
+        "@radix-ui/react-collection": "1.1.7",
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-direction": "1.1.1",
+        "@radix-ui/react-id": "1.1.1",
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-use-callback-ref": "1.1.1",
+        "@radix-ui/react-use-controllable-state": "1.2.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-roving-focus/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-roving-focus/node_modules/@radix-ui/react-slot": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-scroll-area": {
+      "version": "1.2.10",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-scroll-area/-/react-scroll-area-1.2.10.tgz",
+      "integrity": "sha512-tAXIa1g3sM5CGpVT0uIbUx/U3Gs5N8T52IICuCtObaos1S8fzsrPXG5WObkQN3S6NVl6wKgPhAIiBGbWnvc97A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/number": "1.1.1",
+        "@radix-ui/primitive": "1.1.3",
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-direction": "1.1.1",
+        "@radix-ui/react-presence": "1.1.5",
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-use-callback-ref": "1.1.1",
+        "@radix-ui/react-use-layout-effect": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-scroll-area/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-scroll-area/node_modules/@radix-ui/react-slot": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-select": {
+      "version": "2.2.6",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-select/-/react-select-2.2.6.tgz",
+      "integrity": "sha512-I30RydO+bnn2PQztvo25tswPH+wFBjehVGtmagkU78yMdwTwVf12wnAOF+AeP8S2N8xD+5UPbGhkUfPyvT+mwQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/number": "1.1.1",
+        "@radix-ui/primitive": "1.1.3",
+        "@radix-ui/react-collection": "1.1.7",
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-direction": "1.1.1",
+        "@radix-ui/react-dismissable-layer": "1.1.11",
+        "@radix-ui/react-focus-guards": "1.1.3",
+        "@radix-ui/react-focus-scope": "1.1.7",
+        "@radix-ui/react-id": "1.1.1",
+        "@radix-ui/react-popper": "1.2.8",
+        "@radix-ui/react-portal": "1.1.9",
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-slot": "1.2.3",
+        "@radix-ui/react-use-callback-ref": "1.1.1",
+        "@radix-ui/react-use-controllable-state": "1.2.2",
+        "@radix-ui/react-use-layout-effect": "1.1.1",
+        "@radix-ui/react-use-previous": "1.1.1",
+        "@radix-ui/react-visually-hidden": "1.2.3",
+        "aria-hidden": "^1.2.4",
+        "react-remove-scroll": "^2.6.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-select/node_modules/@radix-ui/react-portal": {
+      "version": "1.1.9",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.9.tgz",
+      "integrity": "sha512-bpIxvq03if6UNwXZ+HTK71JLh4APvnXntDc6XOX8UVq4XQOVl7lwok0AvIl+b8zgCw3fSaVTZMpAPPagXbKmHQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-use-layout-effect": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-select/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-select/node_modules/@radix-ui/react-slot": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-separator": {
+      "version": "1.1.7",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-separator/-/react-separator-1.1.7.tgz",
+      "integrity": "sha512-0HEb8R9E8A+jZjvmFCy/J4xhbXy3TV+9XSnGJ3KvTtjlIUy/YQ/p6UYZvi7YbeoeXdyU9+Y3scizK6hkY37baA==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-primitive": "2.1.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-separator/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-separator/node_modules/@radix-ui/react-slot": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-slider": {
+      "version": "1.3.6",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slider/-/react-slider-1.3.6.tgz",
+      "integrity": "sha512-JPYb1GuM1bxfjMRlNLE+BcmBC8onfCi60Blk7OBqi2MLTFdS+8401U4uFjnwkOr49BLmXxLC6JHkvAsx5OJvHw==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/number": "1.1.1",
+        "@radix-ui/primitive": "1.1.3",
+        "@radix-ui/react-collection": "1.1.7",
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-direction": "1.1.1",
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-use-controllable-state": "1.2.2",
+        "@radix-ui/react-use-layout-effect": "1.1.1",
+        "@radix-ui/react-use-previous": "1.1.1",
+        "@radix-ui/react-use-size": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-slider/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-slider/node_modules/@radix-ui/react-slot": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
     "node_modules/@radix-ui/react-slot": {
+      "version": "1.2.4",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.4.tgz",
+      "integrity": "sha512-Jl+bCv8HxKnlTLVrcDE8zTMJ09R9/ukw4qBs/oZClOfoQk/cOTbDn+NceXfV7j09YPVQUryJPHurafcSg6EVKA==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-switch": {
+      "version": "1.2.6",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-switch/-/react-switch-1.2.6.tgz",
+      "integrity": "sha512-bByzr1+ep1zk4VubeEVViV592vu2lHE2BZY5OnzehZqOOgogN80+mNtCqPkhn2gklJqOpxWgPoYTSnhBCqpOXQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/primitive": "1.1.3",
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-use-controllable-state": "1.2.2",
+        "@radix-ui/react-use-previous": "1.1.1",
+        "@radix-ui/react-use-size": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-switch/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-switch/node_modules/@radix-ui/react-slot": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-tabs": {
+      "version": "1.1.13",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-tabs/-/react-tabs-1.1.13.tgz",
+      "integrity": "sha512-7xdcatg7/U+7+Udyoj2zodtI9H/IIopqo+YOIcZOq1nJwXWBZ9p8xiu5llXlekDbZkca79a/fozEYQXIA4sW6A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/primitive": "1.1.3",
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-direction": "1.1.1",
+        "@radix-ui/react-id": "1.1.1",
+        "@radix-ui/react-presence": "1.1.5",
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-roving-focus": "1.1.11",
+        "@radix-ui/react-use-controllable-state": "1.2.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-tabs/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-tabs/node_modules/@radix-ui/react-slot": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-toast": {
+      "version": "1.2.15",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-toast/-/react-toast-1.2.15.tgz",
+      "integrity": "sha512-3OSz3TacUWy4WtOXV38DggwxoqJK4+eDkNMl5Z/MJZaoUPaP4/9lf81xXMe1I2ReTAptverZUpbPY4wWwWyL5g==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/primitive": "1.1.3",
+        "@radix-ui/react-collection": "1.1.7",
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-dismissable-layer": "1.1.11",
+        "@radix-ui/react-portal": "1.1.9",
+        "@radix-ui/react-presence": "1.1.5",
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-use-callback-ref": "1.1.1",
+        "@radix-ui/react-use-controllable-state": "1.2.2",
+        "@radix-ui/react-use-layout-effect": "1.1.1",
+        "@radix-ui/react-visually-hidden": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-toast/node_modules/@radix-ui/react-portal": {
+      "version": "1.1.9",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.9.tgz",
+      "integrity": "sha512-bpIxvq03if6UNwXZ+HTK71JLh4APvnXntDc6XOX8UVq4XQOVl7lwok0AvIl+b8zgCw3fSaVTZMpAPPagXbKmHQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-use-layout-effect": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-toast/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-toast/node_modules/@radix-ui/react-slot": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-toggle": {
+      "version": "1.1.10",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-toggle/-/react-toggle-1.1.10.tgz",
+      "integrity": "sha512-lS1odchhFTeZv3xwHH31YPObmJn8gOg7Lq12inrr0+BH/l3Tsq32VfjqH1oh80ARM3mlkfMic15n0kg4sD1poQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/primitive": "1.1.3",
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-use-controllable-state": "1.2.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-toggle-group": {
+      "version": "1.1.11",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-toggle-group/-/react-toggle-group-1.1.11.tgz",
+      "integrity": "sha512-5umnS0T8JQzQT6HbPyO7Hh9dgd82NmS36DQr+X/YJ9ctFNCiiQd6IJAYYZ33LUwm8M+taCz5t2ui29fHZc4Y6Q==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/primitive": "1.1.3",
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-direction": "1.1.1",
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-roving-focus": "1.1.11",
+        "@radix-ui/react-toggle": "1.1.10",
+        "@radix-ui/react-use-controllable-state": "1.2.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-toggle-group/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-toggle-group/node_modules/@radix-ui/react-slot": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-toggle/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-toggle/node_modules/@radix-ui/react-slot": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-toolbar": {
+      "version": "1.1.11",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-toolbar/-/react-toolbar-1.1.11.tgz",
+      "integrity": "sha512-4ol06/1bLoFu1nwUqzdD4Y5RZ9oDdKeiHIsntug54Hcr1pgaHiPqHFEaXI1IFP/EsOfROQZ8Mig9VTIRza6Tjg==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/primitive": "1.1.3",
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-direction": "1.1.1",
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-roving-focus": "1.1.11",
+        "@radix-ui/react-separator": "1.1.7",
+        "@radix-ui/react-toggle-group": "1.1.11"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-toolbar/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-toolbar/node_modules/@radix-ui/react-slot": {
       "version": "1.2.3",
       "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
       "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
@@ -1502,6 +4097,47 @@
         }
       }
     },
+    "node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-slot": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
     "node_modules/@radix-ui/react-use-callback-ref": {
       "version": "1.1.1",
       "resolved": "https://registry.npmjs.org/@radix-ui/react-use-callback-ref/-/react-use-callback-ref-1.1.1.tgz",
@@ -1572,6 +4208,24 @@
         }
       }
     },
+    "node_modules/@radix-ui/react-use-is-hydrated": {
+      "version": "0.1.0",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-use-is-hydrated/-/react-use-is-hydrated-0.1.0.tgz",
+      "integrity": "sha512-U+UORVEq+cTnRIaostJv9AGdV3G6Y+zbVd+12e18jQ5A3c0xL03IhnHuiU4UV69wolOQp5GfR58NW/EgdQhwOA==",
+      "license": "MIT",
+      "dependencies": {
+        "use-sync-external-store": "^1.5.0"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
     "node_modules/@radix-ui/react-use-layout-effect": {
       "version": "1.1.1",
       "resolved": "https://registry.npmjs.org/@radix-ui/react-use-layout-effect/-/react-use-layout-effect-1.1.1.tgz",
@@ -1661,6 +4315,47 @@
         }
       }
     },
+    "node_modules/@radix-ui/react-visually-hidden/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-visually-hidden/node_modules/@radix-ui/react-slot": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
     "node_modules/@radix-ui/rect": {
       "version": "1.1.1",
       "resolved": "https://registry.npmjs.org/@radix-ui/rect/-/rect-1.1.1.tgz",
@@ -1668,10 +4363,11 @@
       "license": "MIT"
     },
     "node_modules/@react-three/fiber": {
-      "version": "9.6.1",
-      "resolved": "https://registry.npmjs.org/@react-three/fiber/-/fiber-9.6.1.tgz",
-      "integrity": "sha512-zF0rsKcVYpcJwbFEnv2HkHX9cvOEgsfQo/X8lwmR2dn13S4qEQJXir9fxf5js2LQFoXqxOY7MDkOkYx2uZ4gSg==",
+      "version": "9.6.0",
+      "resolved": "https://registry.npmjs.org/@react-three/fiber/-/fiber-9.6.0.tgz",
+      "integrity": "sha512-90abYK2q5/qDM+GACs9zRvc5KhEEpEWqWlHSd64zTPNxg+9wCJvTfyD9x2so7hlQhjRYO1Fa6flR3BC/kpTFkA==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@babel/runtime": "^7.17.8",
         "@types/webxr": "*",
@@ -1723,9 +4419,9 @@
       "license": "MIT"
     },
     "node_modules/@rollup/rollup-android-arm-eabi": {
-      "version": "4.60.4",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.60.4.tgz",
-      "integrity": "sha512-F5QXMSiFebS9hKZj02XhWLLnRpJ3B3AROP0tWbFBSj+6kCbg5m9j5JoHKd4mmSVy5mS/IMQloYgYxCuJC0fxEQ==",
+      "version": "4.60.2",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.60.2.tgz",
+      "integrity": "sha512-dnlp69efPPg6Uaw2dVqzWRfAWRnYVb1XJ8CyyhIbZeaq4CA5/mLeZ1IEt9QqQxmbdvagjLIm2ZL8BxXv5lH4Yw==",
       "cpu": [
         "arm"
       ],
@@ -1736,9 +4432,9 @@
       ]
     },
     "node_modules/@rollup/rollup-android-arm64": {
-      "version": "4.60.4",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.60.4.tgz",
-      "integrity": "sha512-GxxTKApUpzRhof7poWvCJHRF51C67u1R7D6DiluBE8wKU1u5GWE8t+v81JvJYtbawoBFX1hLv5Ei4eVjkWokaw==",
+      "version": "4.60.2",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.60.2.tgz",
+      "integrity": "sha512-OqZTwDRDchGRHHm/hwLOL7uVPB9aUvI0am/eQuWMNyFHf5PSEQmyEeYYheA0EPPKUO/l0uigCp+iaTjoLjVoHg==",
       "cpu": [
         "arm64"
       ],
@@ -1749,9 +4445,9 @@
       ]
     },
     "node_modules/@rollup/rollup-darwin-arm64": {
-      "version": "4.60.4",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.60.4.tgz",
-      "integrity": "sha512-tua0TaJxMOB1R0V0RS1jFZ/RpURFDJIOR2A6jWwQeawuFyS4gBW+rntLRaQd0EQ4bd6Vp44Z2rXW+YYDBsj6IA==",
+      "version": "4.60.2",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.60.2.tgz",
+      "integrity": "sha512-UwRE7CGpvSVEQS8gUMBe1uADWjNnVgP3Iusyda1nSRwNDCsRjnGc7w6El6WLQsXmZTbLZx9cecegumcitNfpmA==",
       "cpu": [
         "arm64"
       ],
@@ -1762,9 +4458,9 @@
       ]
     },
     "node_modules/@rollup/rollup-darwin-x64": {
-      "version": "4.60.4",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.60.4.tgz",
-      "integrity": "sha512-CSKq7MsP+5PFIcydhAiR1K0UhEI1A2jWXVKHPCBZ151yOutENwvnPocgVHkivu2kviURtCEB6zUQw0vs8RrhMg==",
+      "version": "4.60.2",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.60.2.tgz",
+      "integrity": "sha512-gjEtURKLCC5VXm1I+2i1u9OhxFsKAQJKTVB8WvDAHF+oZlq0GTVFOlTlO1q3AlCTE/DF32c16ESvfgqR7343/g==",
       "cpu": [
         "x64"
       ],
@@ -1775,9 +4471,9 @@
       ]
     },
     "node_modules/@rollup/rollup-freebsd-arm64": {
-      "version": "4.60.4",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-arm64/-/rollup-freebsd-arm64-4.60.4.tgz",
-      "integrity": "sha512-+O8OkVdyvXMtJEciu2wS/pzm1IxntEEQx3z5TAVy4l32G0etZn+RsA48ARRrFm6Ri8fvqPQfgrvNxSjKAbnd3g==",
+      "version": "4.60.2",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-arm64/-/rollup-freebsd-arm64-4.60.2.tgz",
+      "integrity": "sha512-Bcl6CYDeAgE70cqZaMojOi/eK63h5Me97ZqAQoh77VPjMysA/4ORQBRGo3rRy45x4MzVlU9uZxs8Uwy7ZaKnBw==",
       "cpu": [
         "arm64"
       ],
@@ -1788,9 +4484,9 @@
       ]
     },
     "node_modules/@rollup/rollup-freebsd-x64": {
-      "version": "4.60.4",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-x64/-/rollup-freebsd-x64-4.60.4.tgz",
-      "integrity": "sha512-Iw3oMskH3AfNuhU0MSN7vNbdi4me/NiYo2azqPz/Le16zHSa+3RRmliCMWWQmh4lcndccU40xcJuTYJZxNo/lw==",
+      "version": "4.60.2",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-x64/-/rollup-freebsd-x64-4.60.2.tgz",
+      "integrity": "sha512-LU+TPda3mAE2QB0/Hp5VyeKJivpC6+tlOXd1VMoXV/YFMvk/MNk5iXeBfB4MQGRWyOYVJ01625vjkr0Az98OJQ==",
       "cpu": [
         "x64"
       ],
@@ -1801,15 +4497,12 @@
       ]
     },
     "node_modules/@rollup/rollup-linux-arm-gnueabihf": {
-      "version": "4.60.4",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.60.4.tgz",
-      "integrity": "sha512-EIPRXTVQpHyF8WOo219AD2yEltPehLTcTMz2fn6JsatLYSzQf00hj3rulF+yauOlF9/FtM2WpkT/hJh/KJFGhA==",
+      "version": "4.60.2",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.60.2.tgz",
+      "integrity": "sha512-2QxQrM+KQ7DAW4o22j+XZ6RKdxjLD7BOWTP0Bv0tmjdyhXSsr2Ul1oJDQqh9Zf5qOwTuTc7Ek83mOFaKnodPjg==",
       "cpu": [
         "arm"
       ],
-      "libc": [
-        "glibc"
-      ],
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1817,15 +4510,12 @@
       ]
     },
     "node_modules/@rollup/rollup-linux-arm-musleabihf": {
-      "version": "4.60.4",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.60.4.tgz",
-      "integrity": "sha512-J3Yh9PzzF1Ovah2At+lHiGQdsYgArxBbXv/zHfSyaiFQEqvNv7DcW98pCrmdjCZBrqBiKrKKe2V+aaSGWuBe/w==",
+      "version": "4.60.2",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.60.2.tgz",
+      "integrity": "sha512-TbziEu2DVsTEOPif2mKWkMeDMLoYjx95oESa9fkQQK7r/Orta0gnkcDpzwufEcAO2BLBsD7mZkXGFqEdMRRwfw==",
       "cpu": [
         "arm"
       ],
-      "libc": [
-        "musl"
-      ],
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1833,15 +4523,12 @@
       ]
     },
     "node_modules/@rollup/rollup-linux-arm64-gnu": {
-      "version": "4.60.4",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.60.4.tgz",
-      "integrity": "sha512-BFDEZMYfUvLn37ONE1yMBojPxnMlTFsdyNoqncT0qFq1mAfllL+ATMMJd8TeuVMiX84s1KbcxcZbXInmcO2mRg==",
+      "version": "4.60.2",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.60.2.tgz",
+      "integrity": "sha512-bO/rVDiDUuM2YfuCUwZ1t1cP+/yqjqz+Xf2VtkdppefuOFS2OSeAfgafaHNkFn0t02hEyXngZkxtGqXcXwO8Rg==",
       "cpu": [
         "arm64"
       ],
-      "libc": [
-        "glibc"
-      ],
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1849,15 +4536,12 @@
       ]
     },
     "node_modules/@rollup/rollup-linux-arm64-musl": {
-      "version": "4.60.4",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.60.4.tgz",
-      "integrity": "sha512-pc9EYOSlOgdQ2uPl1o9PF6/kLSgaUosia7gOuS8mB69IxJvlclko1MECXysjs5ryez1/5zjYqx3+xYU0TU6R1A==",
+      "version": "4.60.2",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.60.2.tgz",
+      "integrity": "sha512-hr26p7e93Rl0Za+JwW7EAnwAvKkehh12BU1Llm9Ykiibg4uIr2rbpxG9WCf56GuvidlTG9KiiQT/TXT1yAWxTA==",
       "cpu": [
         "arm64"
       ],
-      "libc": [
-        "musl"
-      ],
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1865,15 +4549,12 @@
       ]
     },
     "node_modules/@rollup/rollup-linux-loong64-gnu": {
-      "version": "4.60.4",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-gnu/-/rollup-linux-loong64-gnu-4.60.4.tgz",
-      "integrity": "sha512-NxnomyxYerDh5n4iLrNa+sH+Z+U4BMEE46V2PgQ/hoB909i8gV1M5wPojWg9fk1jWpO3IQnOs20K4wyZuFLEFQ==",
+      "version": "4.60.2",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-gnu/-/rollup-linux-loong64-gnu-4.60.2.tgz",
+      "integrity": "sha512-pOjB/uSIyDt+ow3k/RcLvUAOGpysT2phDn7TTUB3n75SlIgZzM6NKAqlErPhoFU+npgY3/n+2HYIQVbF70P9/A==",
       "cpu": [
         "loong64"
       ],
-      "libc": [
-        "glibc"
-      ],
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1881,15 +4562,12 @@
       ]
     },
     "node_modules/@rollup/rollup-linux-loong64-musl": {
-      "version": "4.60.4",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-musl/-/rollup-linux-loong64-musl-4.60.4.tgz",
-      "integrity": "sha512-nbJnQ8a3z1mtmrwImCYhc6BGpThAyYVRQxw9uKSKG4wR6aAYno9sVjJ0zaZcW9BPJX1GbrDPf+SvdWjgTuDmnw==",
+      "version": "4.60.2",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-musl/-/rollup-linux-loong64-musl-4.60.2.tgz",
+      "integrity": "sha512-2/w+q8jszv9Ww1c+6uJT3OwqhdmGP2/4T17cu8WuwyUuuaCDDJ2ojdyYwZzCxx0GcsZBhzi3HmH+J5pZNXnd+Q==",
       "cpu": [
         "loong64"
       ],
-      "libc": [
-        "musl"
-      ],
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1897,15 +4575,12 @@
       ]
     },
     "node_modules/@rollup/rollup-linux-ppc64-gnu": {
-      "version": "4.60.4",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-gnu/-/rollup-linux-ppc64-gnu-4.60.4.tgz",
-      "integrity": "sha512-2EU6acNrQLd8tYvo/LXW535wupT3m6fo7HKo6lr7ktQoItxTyOL1ZCR/GfGCuXl2vR+zmfI6eRXkSemafv+iVg==",
+      "version": "4.60.2",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-gnu/-/rollup-linux-ppc64-gnu-4.60.2.tgz",
+      "integrity": "sha512-11+aL5vKheYgczxtPVVRhdptAM2H7fcDR5Gw4/bTcteuZBlH4oP9f5s9zYO9aGZvoGeBpqXI/9TZZihZ609wKw==",
       "cpu": [
         "ppc64"
       ],
-      "libc": [
-        "glibc"
-      ],
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1913,15 +4588,12 @@
       ]
     },
     "node_modules/@rollup/rollup-linux-ppc64-musl": {
-      "version": "4.60.4",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-musl/-/rollup-linux-ppc64-musl-4.60.4.tgz",
-      "integrity": "sha512-WeBtoMuaMxiiIrO2IYP3xs6GMWkJP2C0EoT8beTLkUPmzV1i/UcOSVw1d5r9KBODtHKilG5yFxsGRnBbK3wJ4A==",
+      "version": "4.60.2",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-musl/-/rollup-linux-ppc64-musl-4.60.2.tgz",
+      "integrity": "sha512-i16fokAGK46IVZuV8LIIwMdtqhin9hfYkCh8pf8iC3QU3LpwL+1FSFGej+O7l3E/AoknL6Dclh2oTdnRMpTzFQ==",
       "cpu": [
         "ppc64"
       ],
-      "libc": [
-        "musl"
-      ],
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1929,15 +4601,12 @@
       ]
     },
     "node_modules/@rollup/rollup-linux-riscv64-gnu": {
-      "version": "4.60.4",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.60.4.tgz",
-      "integrity": "sha512-FJHFfqpKUI3A10WrWKiFbBZ7yVbGT4q4B5o1qKFFojqpaYoh9LrQgqWCmmcxQzVSXYtyB5bzkXrYzlHTs21MYA==",
+      "version": "4.60.2",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.60.2.tgz",
+      "integrity": "sha512-49FkKS6RGQoriDSK/6E2GkAsAuU5kETFCh7pG4yD/ylj9rKhTmO3elsnmBvRD4PgJPds5W2PkhC82aVwmUcJ7A==",
       "cpu": [
         "riscv64"
       ],
-      "libc": [
-        "glibc"
-      ],
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1945,15 +4614,12 @@
       ]
     },
     "node_modules/@rollup/rollup-linux-riscv64-musl": {
-      "version": "4.60.4",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-musl/-/rollup-linux-riscv64-musl-4.60.4.tgz",
-      "integrity": "sha512-mcEl6CUT5IAUmQf1m9FYSmVqCJlpQ8r8eyftFUHG8i9OhY7BkBXSUdnLH5DOf0wCOjcP9v/QO93zpmF1SptCCw==",
+      "version": "4.60.2",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-musl/-/rollup-linux-riscv64-musl-4.60.2.tgz",
+      "integrity": "sha512-mjYNkHPfGpUR00DuM1ZZIgs64Hpf4bWcz9Z41+4Q+pgDx73UwWdAYyf6EG/lRFldmdHHzgrYyge5akFUW0D3mQ==",
       "cpu": [
         "riscv64"
       ],
-      "libc": [
-        "musl"
-      ],
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1961,15 +4627,12 @@
       ]
     },
     "node_modules/@rollup/rollup-linux-s390x-gnu": {
-      "version": "4.60.4",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.60.4.tgz",
-      "integrity": "sha512-ynt3JxVd2w2buzoKDWIyiV1pJW93xlQic1THVLXilz429oijRpSHivZAgp65KBu+cMcgf1eVVjdnTLvPxgCuoQ==",
+      "version": "4.60.2",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.60.2.tgz",
+      "integrity": "sha512-ALyvJz965BQk8E9Al/JDKKDLH2kfKFLTGMlgkAbbYtZuJt9LU8DW3ZoDMCtQpXAltZxwBHevXz5u+gf0yA0YoA==",
       "cpu": [
         "s390x"
       ],
-      "libc": [
-        "glibc"
-      ],
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1977,15 +4640,12 @@
       ]
     },
     "node_modules/@rollup/rollup-linux-x64-gnu": {
-      "version": "4.60.4",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.60.4.tgz",
-      "integrity": "sha512-Boiz5+MsaROEWDf+GGEwF8VMHGhlUoQMtIPjOgA5fv4osupqTVnJteQNKJwUcnUog2G55jYXH7KZFFiJe0TEzQ==",
+      "version": "4.60.2",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.60.2.tgz",
+      "integrity": "sha512-UQjrkIdWrKI626Du8lCQ6MJp/6V1LAo2bOK9OTu4mSn8GGXIkPXk/Vsp4bLHCd9Z9Iz2OTEaokUE90VweJgIYQ==",
       "cpu": [
         "x64"
       ],
-      "libc": [
-        "glibc"
-      ],
       "license": "MIT",
       "optional": true,
       "os": [
@@ -1993,15 +4653,12 @@
       ]
     },
     "node_modules/@rollup/rollup-linux-x64-musl": {
-      "version": "4.60.4",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.60.4.tgz",
-      "integrity": "sha512-+qfSY27qIrFfI/Hom04KYFw3GKZSGU4lXus51wsb5EuySfFlWRwjkKWoE9emgRw/ukoT4Udsj4W/+xxG8VbPKg==",
+      "version": "4.60.2",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.60.2.tgz",
+      "integrity": "sha512-bTsRGj6VlSdn/XD4CGyzMnzaBs9bsRxy79eTqTCBsA8TMIEky7qg48aPkvJvFe1HyzQ5oMZdg7AnVlWQSKLTnw==",
       "cpu": [
         "x64"
       ],
-      "libc": [
-        "musl"
-      ],
       "license": "MIT",
       "optional": true,
       "os": [
@@ -2009,9 +4666,9 @@
       ]
     },
     "node_modules/@rollup/rollup-openbsd-x64": {
-      "version": "4.60.4",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-openbsd-x64/-/rollup-openbsd-x64-4.60.4.tgz",
-      "integrity": "sha512-VpTfOPHgVXEBeeR8hZ2O0F3aSso+JDWqTWmTmzcQKted54IAdUVbxE+j/MVxUsKa8L20HJhv3vUezVPoquqWjA==",
+      "version": "4.60.2",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-openbsd-x64/-/rollup-openbsd-x64-4.60.2.tgz",
+      "integrity": "sha512-6d4Z3534xitaA1FcMWP7mQPq5zGwBmGbhphh2DwaA1aNIXUu3KTOfwrWpbwI4/Gr0uANo7NTtaykFyO2hPuFLg==",
       "cpu": [
         "x64"
       ],
@@ -2022,9 +4679,9 @@
       ]
     },
     "node_modules/@rollup/rollup-openharmony-arm64": {
-      "version": "4.60.4",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-openharmony-arm64/-/rollup-openharmony-arm64-4.60.4.tgz",
-      "integrity": "sha512-IPOsh5aRYuLv/nkU51X10Bf75Bsf6+gZdx1X+QP5QM6lIJFHHqbHLG0uJn/hWthzo13UAc2umiUorqZy3axoZg==",
+      "version": "4.60.2",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-openharmony-arm64/-/rollup-openharmony-arm64-4.60.2.tgz",
+      "integrity": "sha512-NetAg5iO2uN7eB8zE5qrZ3CSil+7IJt4WDFLcC75Ymywq1VZVD6qJ6EvNLjZ3rEm6gB7XW5JdT60c6MN35Z85Q==",
       "cpu": [
         "arm64"
       ],
@@ -2035,9 +4692,9 @@
       ]
     },
     "node_modules/@rollup/rollup-win32-arm64-msvc": {
-      "version": "4.60.4",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.60.4.tgz",
-      "integrity": "sha512-4QzE9E81OohJ/HKzHhsqU+zcYYojVOXlFMs1DdyMT6qXl/niOH7AVElmmEdUNHHS/oRkc++d5k6Vy85zFs0DEw==",
+      "version": "4.60.2",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.60.2.tgz",
+      "integrity": "sha512-NCYhOotpgWZ5kdxCZsv6Iudx0wX8980Q/oW4pNFNihpBKsDbEA1zpkfxJGC0yugsUuyDZ7gL37dbzwhR0VI7pQ==",
       "cpu": [
         "arm64"
       ],
@@ -2048,9 +4705,9 @@
       ]
     },
     "node_modules/@rollup/rollup-win32-ia32-msvc": {
-      "version": "4.60.4",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.60.4.tgz",
-      "integrity": "sha512-zTPgT1YuHHcd+Tmx7h8aml0FWFVelV5N54oHow9SLj+GfoDy/huQ+UV396N/C7KpMDMiPspRktzM1/0r1usYEA==",
+      "version": "4.60.2",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.60.2.tgz",
+      "integrity": "sha512-RXsaOqXxfoUBQoOgvmmijVxJnW2IGB0eoMO7F8FAjaj0UTywUO/luSqimWBJn04WNgUkeNhh7fs7pESXajWmkg==",
       "cpu": [
         "ia32"
       ],
@@ -2061,9 +4718,9 @@
       ]
     },
     "node_modules/@rollup/rollup-win32-x64-gnu": {
-      "version": "4.60.4",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-gnu/-/rollup-win32-x64-gnu-4.60.4.tgz",
-      "integrity": "sha512-DRS4G7mi9lJxqEDezIkKCaUIKCrLUUDCUaCsTPCi/rtqaC6D/jjwslMQyiDU50Ka0JKpeXeRBFBAXwArY52vBw==",
+      "version": "4.60.2",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-gnu/-/rollup-win32-x64-gnu-4.60.2.tgz",
+      "integrity": "sha512-qdAzEULD+/hzObedtmV6iBpdL5TIbKVztGiK7O3/KYSf+HIzU257+MX1EXJcyIiDbMAqmbwaufcYPvyRryeZtA==",
       "cpu": [
         "x64"
       ],
@@ -2074,9 +4731,9 @@
       ]
     },
     "node_modules/@rollup/rollup-win32-x64-msvc": {
-      "version": "4.60.4",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.60.4.tgz",
-      "integrity": "sha512-QVTUovf40zgTqlFVrKA1uXMVvU2QWEFWfAH8Wdc48IxLvrJMQVMBRjuQyUpzZCDkakImib9eVazbWlC6ksWtJw==",
+      "version": "4.60.2",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.60.2.tgz",
+      "integrity": "sha512-Nd/SgG27WoA9e+/TdK74KnHz852TLa94ovOYySo/yMPuTmpckK/jIF2jSwS3g7ELSKXK13/cVdmg1Z/DaCWKxA==",
       "cpu": [
         "x64"
       ],
@@ -2096,47 +4753,47 @@
       }
     },
     "node_modules/@tailwindcss/node": {
-      "version": "4.3.0",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/node/-/node-4.3.0.tgz",
-      "integrity": "sha512-aFb4gUhFOgdh9AXo4IzBEOzBkkAxm9VigwDJnMIYv3lcfXCJVesNfbEaBl4BNgVRyid92AmdviqwBUBRKSeY3g==",
+      "version": "4.2.4",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/node/-/node-4.2.4.tgz",
+      "integrity": "sha512-Ai7+yQPxz3ddrDQzFfBKdHEVBg0w3Zl83jnjuwxnZOsnH9pGn93QHQtpU0p/8rYWxvbFZHneni6p1BSLK4DkGA==",
       "license": "MIT",
       "dependencies": {
         "@jridgewell/remapping": "^2.3.5",
-        "enhanced-resolve": "^5.21.0",
+        "enhanced-resolve": "^5.19.0",
         "jiti": "^2.6.1",
         "lightningcss": "1.32.0",
         "magic-string": "^0.30.21",
         "source-map-js": "^1.2.1",
-        "tailwindcss": "4.3.0"
+        "tailwindcss": "4.2.4"
       }
     },
     "node_modules/@tailwindcss/oxide": {
-      "version": "4.3.0",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide/-/oxide-4.3.0.tgz",
-      "integrity": "sha512-F7HZGBeN9I0/AuuJS5PwcD8xayx5ri5GhjYUDBEVYUkexyA/giwbDNjRVrxSezE3T250OU2K/wp/ltWx3UOefg==",
+      "version": "4.2.4",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide/-/oxide-4.2.4.tgz",
+      "integrity": "sha512-9El/iI069DKDSXwTvB9J4BwdO5JhRrOweGaK25taBAvBXyXqJAX+Jqdvs8r8gKpsI/1m0LeJLyQYTf/WLrBT1Q==",
       "license": "MIT",
       "engines": {
         "node": ">= 20"
       },
       "optionalDependencies": {
-        "@tailwindcss/oxide-android-arm64": "4.3.0",
-        "@tailwindcss/oxide-darwin-arm64": "4.3.0",
-        "@tailwindcss/oxide-darwin-x64": "4.3.0",
-        "@tailwindcss/oxide-freebsd-x64": "4.3.0",
-        "@tailwindcss/oxide-linux-arm-gnueabihf": "4.3.0",
-        "@tailwindcss/oxide-linux-arm64-gnu": "4.3.0",
-        "@tailwindcss/oxide-linux-arm64-musl": "4.3.0",
-        "@tailwindcss/oxide-linux-x64-gnu": "4.3.0",
-        "@tailwindcss/oxide-linux-x64-musl": "4.3.0",
-        "@tailwindcss/oxide-wasm32-wasi": "4.3.0",
-        "@tailwindcss/oxide-win32-arm64-msvc": "4.3.0",
-        "@tailwindcss/oxide-win32-x64-msvc": "4.3.0"
+        "@tailwindcss/oxide-android-arm64": "4.2.4",
+        "@tailwindcss/oxide-darwin-arm64": "4.2.4",
+        "@tailwindcss/oxide-darwin-x64": "4.2.4",
+        "@tailwindcss/oxide-freebsd-x64": "4.2.4",
+        "@tailwindcss/oxide-linux-arm-gnueabihf": "4.2.4",
+        "@tailwindcss/oxide-linux-arm64-gnu": "4.2.4",
+        "@tailwindcss/oxide-linux-arm64-musl": "4.2.4",
+        "@tailwindcss/oxide-linux-x64-gnu": "4.2.4",
+        "@tailwindcss/oxide-linux-x64-musl": "4.2.4",
+        "@tailwindcss/oxide-wasm32-wasi": "4.2.4",
+        "@tailwindcss/oxide-win32-arm64-msvc": "4.2.4",
+        "@tailwindcss/oxide-win32-x64-msvc": "4.2.4"
       }
     },
     "node_modules/@tailwindcss/oxide-android-arm64": {
-      "version": "4.3.0",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-android-arm64/-/oxide-android-arm64-4.3.0.tgz",
-      "integrity": "sha512-TJPiq67tKlLuObP6RkwvVGDoxCMBVtDgKkLfa/uyj7/FyxvQwHS+UOnVrXXgbEsfUaMgiVvC4KbJnRr26ho4Ng==",
+      "version": "4.2.4",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-android-arm64/-/oxide-android-arm64-4.2.4.tgz",
+      "integrity": "sha512-e7MOr1SAn9U8KlZzPi1ZXGZHeC5anY36qjNwmZv9pOJ8E4Q6jmD1vyEHkQFmNOIN7twGPEMXRHmitN4zCMN03g==",
       "cpu": [
         "arm64"
       ],
@@ -2150,9 +4807,9 @@
       }
     },
     "node_modules/@tailwindcss/oxide-darwin-arm64": {
-      "version": "4.3.0",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-darwin-arm64/-/oxide-darwin-arm64-4.3.0.tgz",
-      "integrity": "sha512-oMN/WZRb+SO37BmUElEgeEWuU8E/HXRkiODxJxLe1UTHVXLrdVSgfaJV7pSlhRGMSOiXLuxTIjfsF3wYvz8cgQ==",
+      "version": "4.2.4",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-darwin-arm64/-/oxide-darwin-arm64-4.2.4.tgz",
+      "integrity": "sha512-tSC/Kbqpz/5/o/C2sG7QvOxAKqyd10bq+ypZNf+9Fi2TvbVbv1zNpcEptcsU7DPROaSbVgUXmrzKhurFvo5eDg==",
       "cpu": [
         "arm64"
       ],
@@ -2166,9 +4823,9 @@
       }
     },
     "node_modules/@tailwindcss/oxide-darwin-x64": {
-      "version": "4.3.0",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-darwin-x64/-/oxide-darwin-x64-4.3.0.tgz",
-      "integrity": "sha512-N6CUmu4a6bKVADfw77p+iw6Yd9Q3OBhe0veaDX+QazfuVYlQsHfDgxBrsjQ/IW+zywL8mTrNd0SdJT/zgtvMdA==",
+      "version": "4.2.4",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-darwin-x64/-/oxide-darwin-x64-4.2.4.tgz",
+      "integrity": "sha512-yPyUXn3yO/ufR6+Kzv0t4fCg2qNr90jxXc5QqBpjlPNd0NqyDXcmQb/6weunH/MEDXW5dhyEi+agTDiqa3WsGg==",
       "cpu": [
         "x64"
       ],
@@ -2182,9 +4839,9 @@
       }
     },
     "node_modules/@tailwindcss/oxide-freebsd-x64": {
-      "version": "4.3.0",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-freebsd-x64/-/oxide-freebsd-x64-4.3.0.tgz",
-      "integrity": "sha512-zDL5hBkQdH5C6MpqbK3gQAgP80tsMwSI26vjOzjJtNCMUo0lFgOItzHKBIupOZNQxt3ouPH7RPhvNhiTfCe5CQ==",
+      "version": "4.2.4",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-freebsd-x64/-/oxide-freebsd-x64-4.2.4.tgz",
+      "integrity": "sha512-BoMIB4vMQtZsXdGLVc2z+P9DbETkiopogfWZKbWwM8b/1Vinbs4YcUwo+kM/KeLkX3Ygrf4/PsRndKaYhS8Eiw==",
       "cpu": [
         "x64"
       ],
@@ -2198,9 +4855,9 @@
       }
     },
     "node_modules/@tailwindcss/oxide-linux-arm-gnueabihf": {
-      "version": "4.3.0",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm-gnueabihf/-/oxide-linux-arm-gnueabihf-4.3.0.tgz",
-      "integrity": "sha512-R06HdNi7A7OEoMsf6d4tjZ71RCWnZQPHj2mnotSFURjNLdBC+cIgXQ7l81CqeoiQftjf6OOblxXMInMgN2VzMA==",
+      "version": "4.2.4",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm-gnueabihf/-/oxide-linux-arm-gnueabihf-4.2.4.tgz",
+      "integrity": "sha512-7pIHBLTHYRAlS7V22JNuTh33yLH4VElwKtB3bwchK/UaKUPpQ0lPQiOWcbm4V3WP2I6fNIJ23vABIvoy2izdwA==",
       "cpu": [
         "arm"
       ],
@@ -2214,15 +4871,12 @@
       }
     },
     "node_modules/@tailwindcss/oxide-linux-arm64-gnu": {
-      "version": "4.3.0",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm64-gnu/-/oxide-linux-arm64-gnu-4.3.0.tgz",
-      "integrity": "sha512-qTJHELX8jetjhRQHCLilkVLmybpzNQAtaI/gaoVoidn/ufbNDbAo8KlK2J+yPoc8wQxvDxCmh/5lr8nC1+lTbg==",
+      "version": "4.2.4",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm64-gnu/-/oxide-linux-arm64-gnu-4.2.4.tgz",
+      "integrity": "sha512-+E4wxJ0ZGOzSH325reXTWB48l42i93kQqMvDyz5gqfRzRZ7faNhnmvlV4EPGJU3QJM/3Ab5jhJ5pCRUsKn6OQw==",
       "cpu": [
         "arm64"
       ],
-      "libc": [
-        "glibc"
-      ],
       "license": "MIT",
       "optional": true,
       "os": [
@@ -2233,15 +4887,12 @@
       }
     },
     "node_modules/@tailwindcss/oxide-linux-arm64-musl": {
-      "version": "4.3.0",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm64-musl/-/oxide-linux-arm64-musl-4.3.0.tgz",
-      "integrity": "sha512-Z6sukiQsngnWO+l39X4pPbiWT81IC+PLKF+PHxIlyZbGNb9MODfYlXEVlFvej5BOZInWX01kVyzeLvHsXhfczQ==",
+      "version": "4.2.4",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm64-musl/-/oxide-linux-arm64-musl-4.2.4.tgz",
+      "integrity": "sha512-bBADEGAbo4ASnppIziaQJelekCxdMaxisrk+fB7Thit72IBnALp9K6ffA2G4ruj90G9XRS2VQ6q2bCKbfFV82g==",
       "cpu": [
         "arm64"
       ],
-      "libc": [
-        "musl"
-      ],
       "license": "MIT",
       "optional": true,
       "os": [
@@ -2252,15 +4903,12 @@
       }
     },
     "node_modules/@tailwindcss/oxide-linux-x64-gnu": {
-      "version": "4.3.0",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-x64-gnu/-/oxide-linux-x64-gnu-4.3.0.tgz",
-      "integrity": "sha512-DRNdQRpSGzRGfARVuVkxvM8Q12nh19l4BF/G7zGA1oe+9wcC6saFBHTISrpIcKzhiXtSrlSrluCfvMuledoCTQ==",
+      "version": "4.2.4",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-x64-gnu/-/oxide-linux-x64-gnu-4.2.4.tgz",
+      "integrity": "sha512-7Mx25E4WTfnht0TVRTyC00j3i0M+EeFe7wguMDTlX4mRxafznw0CA8WJkFjWYH5BlgELd1kSjuU2JiPnNZbJDA==",
       "cpu": [
         "x64"
       ],
-      "libc": [
-        "glibc"
-      ],
       "license": "MIT",
       "optional": true,
       "os": [
@@ -2271,15 +4919,12 @@
       }
     },
     "node_modules/@tailwindcss/oxide-linux-x64-musl": {
-      "version": "4.3.0",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-x64-musl/-/oxide-linux-x64-musl-4.3.0.tgz",
-      "integrity": "sha512-Z0IADbDo8bh6I7h2IQMx601AdXBLfFpEdUotft86evd/8ZPflZe9COPO8Q1vw+pfLWIUo9zN/JGZvwuAJqduqg==",
+      "version": "4.2.4",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-x64-musl/-/oxide-linux-x64-musl-4.2.4.tgz",
+      "integrity": "sha512-2wwJRF7nyhOR0hhHoChc04xngV3iS+akccHTGtz965FwF0up4b2lOdo6kI1EbDaEXKgvcrFBYcYQQ/rrnWFVfA==",
       "cpu": [
         "x64"
       ],
-      "libc": [
-        "musl"
-      ],
       "license": "MIT",
       "optional": true,
       "os": [
@@ -2290,9 +4935,9 @@
       }
     },
     "node_modules/@tailwindcss/oxide-wasm32-wasi": {
-      "version": "4.3.0",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-wasm32-wasi/-/oxide-wasm32-wasi-4.3.0.tgz",
-      "integrity": "sha512-HNZGOUxEmElksYR7S6sC5jTeNGpobAsy9u7Gu0AskJ8/20FR9GqebUyB+HBcU/ax6BHuiuJi+Oda4B+YX6H1yA==",
+      "version": "4.2.4",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-wasm32-wasi/-/oxide-wasm32-wasi-4.2.4.tgz",
+      "integrity": "sha512-FQsqApeor8Fo6gUEklzmaa9994orJZZDBAlQpK2Mq+DslRKFJeD6AjHpBQ0kZFQohVr8o85PPh8eOy86VlSCmw==",
       "bundleDependencies": [
         "@napi-rs/wasm-runtime",
         "@emnapi/core",
@@ -2307,10 +4952,10 @@
       "license": "MIT",
       "optional": true,
       "dependencies": {
-        "@emnapi/core": "^1.10.0",
-        "@emnapi/runtime": "^1.10.0",
-        "@emnapi/wasi-threads": "^1.2.1",
-        "@napi-rs/wasm-runtime": "^1.1.4",
+        "@emnapi/core": "^1.8.1",
+        "@emnapi/runtime": "^1.8.1",
+        "@emnapi/wasi-threads": "^1.1.0",
+        "@napi-rs/wasm-runtime": "^1.1.1",
         "@tybys/wasm-util": "^0.10.1",
         "tslib": "^2.8.1"
       },
@@ -2318,70 +4963,10 @@
         "node": ">=14.0.0"
       }
     },
-    "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/core": {
-      "version": "1.10.0",
-      "inBundle": true,
-      "license": "MIT",
-      "optional": true,
-      "dependencies": {
-        "@emnapi/wasi-threads": "1.2.1",
-        "tslib": "^2.4.0"
-      }
-    },
-    "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/runtime": {
-      "version": "1.10.0",
-      "inBundle": true,
-      "license": "MIT",
-      "optional": true,
-      "dependencies": {
-        "tslib": "^2.4.0"
-      }
-    },
-    "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/wasi-threads": {
-      "version": "1.2.1",
-      "inBundle": true,
-      "license": "MIT",
-      "optional": true,
-      "dependencies": {
-        "tslib": "^2.4.0"
-      }
-    },
-    "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@napi-rs/wasm-runtime": {
-      "version": "1.1.4",
-      "inBundle": true,
-      "license": "MIT",
-      "optional": true,
-      "dependencies": {
-        "@tybys/wasm-util": "^0.10.1"
-      },
-      "funding": {
-        "type": "github",
-        "url": "https://github.com/sponsors/Brooooooklyn"
-      },
-      "peerDependencies": {
-        "@emnapi/core": "^1.7.1",
-        "@emnapi/runtime": "^1.7.1"
-      }
-    },
-    "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@tybys/wasm-util": {
-      "version": "0.10.1",
-      "inBundle": true,
-      "license": "MIT",
-      "optional": true,
-      "dependencies": {
-        "tslib": "^2.4.0"
-      }
-    },
-    "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/tslib": {
-      "version": "2.8.1",
-      "inBundle": true,
-      "license": "0BSD",
-      "optional": true
-    },
     "node_modules/@tailwindcss/oxide-win32-arm64-msvc": {
-      "version": "4.3.0",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-arm64-msvc/-/oxide-win32-arm64-msvc-4.3.0.tgz",
-      "integrity": "sha512-Pe+RPVTi1T+qymuuRpcdvwSVZjnll/f7n8gBxMMh3xLTctMDKqpdfGimbMyioqtLhUYZxdJ9wGNhV7MKHvgZsQ==",
+      "version": "4.2.4",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-arm64-msvc/-/oxide-win32-arm64-msvc-4.2.4.tgz",
+      "integrity": "sha512-L9BXqxC4ToVgwMFqj3pmZRqyHEztulpUJzCxUtLjobMCzTPsGt1Fa9enKbOpY2iIyVtaHNeNvAK8ERP/64sqGQ==",
       "cpu": [
         "arm64"
       ],
@@ -2395,9 +4980,9 @@
       }
     },
     "node_modules/@tailwindcss/oxide-win32-x64-msvc": {
-      "version": "4.3.0",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-x64-msvc/-/oxide-win32-x64-msvc-4.3.0.tgz",
-      "integrity": "sha512-Mvrf2kXW/yeW/OTezZlCGOirXRcUuLIBx/5Y12BaPM7wJoryG6dfS/NJL8aBPqtTEx/Vm4T4vKzFUcKDT+TKUA==",
+      "version": "4.2.4",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-x64-msvc/-/oxide-win32-x64-msvc-4.2.4.tgz",
+      "integrity": "sha512-ESlKG0EpVJQwRjXDDa9rLvhEAh0mhP1sF7sap9dNZT0yyl9SAG6T7gdP09EH0vIv0UNTlo6jPWyujD6559fZvw==",
       "cpu": [
         "x64"
       ],
@@ -2411,14 +4996,14 @@
       }
     },
     "node_modules/@tailwindcss/vite": {
-      "version": "4.3.0",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/vite/-/vite-4.3.0.tgz",
-      "integrity": "sha512-t6J3OrB5Fc0ExuhohouH0fWUGMYL6PTLhW+E7zIk/pdbnJARZDCwjBznFnkh5ynRnIRSI4YjtTH0t6USjJISrw==",
+      "version": "4.2.4",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/vite/-/vite-4.2.4.tgz",
+      "integrity": "sha512-pCvohwOCspk3ZFn6eJzrrX3g4n2JY73H6MmYC87XfGPyTty4YsCjYTMArRZm/zOI8dIt3+EcrLHAFPe5A4bgtw==",
       "license": "MIT",
       "dependencies": {
-        "@tailwindcss/node": "4.3.0",
-        "@tailwindcss/oxide": "4.3.0",
-        "tailwindcss": "4.3.0"
+        "@tailwindcss/node": "4.2.4",
+        "@tailwindcss/oxide": "4.2.4",
+        "tailwindcss": "4.2.4"
       },
       "peerDependencies": {
         "vite": "^5.2.0 || ^6 || ^7 || ^8"
@@ -2470,10 +5055,9 @@
       }
     },
     "node_modules/@types/estree": {
-      "version": "1.0.9",
-      "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.9.tgz",
-      "integrity": "sha512-GhdPgy1el4/ImP05X05Uw4cw2/M93BCUmnEvWZNStlCzEKME4Fkk+YpoA5OiHNQmoS7Cafb8Xa3Pya8m1Qrzeg==",
-      "dev": true,
+      "version": "1.0.8",
+      "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.8.tgz",
+      "integrity": "sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w==",
       "license": "MIT"
     },
     "node_modules/@types/json-schema": {
@@ -2484,20 +5068,22 @@
       "license": "MIT"
     },
     "node_modules/@types/node": {
-      "version": "24.12.4",
-      "resolved": "https://registry.npmjs.org/@types/node/-/node-24.12.4.tgz",
-      "integrity": "sha512-GUUEShf+PBCGW2KaXwcIt3Yk+e3pkKwWKb9GSyM9WQVE+ep2jzmHdGsHzu4wgcZy5fN9FBdVzjpBQsYlpfpgLA==",
+      "version": "24.12.2",
+      "resolved": "https://registry.npmjs.org/@types/node/-/node-24.12.2.tgz",
+      "integrity": "sha512-A1sre26ke7HDIuY/M23nd9gfB+nrmhtYyMINbjI1zHJxYteKR6qSMX56FsmjMcDb3SMcjJg5BiRRgOCC/yBD0g==",
       "devOptional": true,
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "undici-types": "~7.16.0"
       }
     },
     "node_modules/@types/react": {
-      "version": "19.2.15",
-      "resolved": "https://registry.npmjs.org/@types/react/-/react-19.2.15.tgz",
-      "integrity": "sha512-eRwcGNHve+E8qtEQSSRl6urh+rFop4v8gm6O8rGv25CodbvFdLjA1vVQ1KkiFE0w0UPOnb8tDiFKL5lp0rtY5Q==",
+      "version": "19.2.14",
+      "resolved": "https://registry.npmjs.org/@types/react/-/react-19.2.14.tgz",
+      "integrity": "sha512-ilcTH/UniCkMdtexkoCN0bI7pMcJDvmQFPvuPvmEaYA/NSfFTAgdUSLAoVjaRJm7+6PvcM+q1zYOwS4wTYMF9w==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "csstype": "^3.2.2"
       }
@@ -2508,6 +5094,7 @@
       "integrity": "sha512-jp2L/eY6fn+KgVVQAOqYItbF0VY/YApe5Mz2F0aykSO8gx31bYCZyvSeYxCHKvzHG5eZjc+zyaS5BrBWya2+kQ==",
       "devOptional": true,
       "license": "MIT",
+      "peer": true,
       "peerDependencies": {
         "@types/react": "^19.2.0"
       }
@@ -2528,17 +5115,17 @@
       "license": "MIT"
     },
     "node_modules/@typescript-eslint/eslint-plugin": {
-      "version": "8.59.4",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.59.4.tgz",
-      "integrity": "sha512-PegsU+XfyJJNjd4+u/k6f9yTyp0lEXXiPopUNobZcIAUJFGICFLN+sP0Rb3JehVmiij1Ph0dFGYqODoRo/2+6A==",
+      "version": "8.59.1",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.59.1.tgz",
+      "integrity": "sha512-BOziFIfE+6osHO9FoJG4zjoHUcvI7fTNBSpdAwrNH0/TLvzjsk2oo8XSSOT2HhqUyhZPfHv4UOffoJ9oEEQ7Ag==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
         "@eslint-community/regexpp": "^4.12.2",
-        "@typescript-eslint/scope-manager": "8.59.4",
-        "@typescript-eslint/type-utils": "8.59.4",
-        "@typescript-eslint/utils": "8.59.4",
-        "@typescript-eslint/visitor-keys": "8.59.4",
+        "@typescript-eslint/scope-manager": "8.59.1",
+        "@typescript-eslint/type-utils": "8.59.1",
+        "@typescript-eslint/utils": "8.59.1",
+        "@typescript-eslint/visitor-keys": "8.59.1",
         "ignore": "^7.0.5",
         "natural-compare": "^1.4.0",
         "ts-api-utils": "^2.5.0"
@@ -2551,7 +5138,7 @@
         "url": "https://opencollective.com/typescript-eslint"
       },
       "peerDependencies": {
-        "@typescript-eslint/parser": "^8.59.4",
+        "@typescript-eslint/parser": "^8.59.1",
         "eslint": "^8.57.0 || ^9.0.0 || ^10.0.0",
         "typescript": ">=4.8.4 <6.1.0"
       }
@@ -2567,16 +5154,17 @@
       }
     },
     "node_modules/@typescript-eslint/parser": {
-      "version": "8.59.4",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-8.59.4.tgz",
-      "integrity": "sha512-zORHqO/tuhxY1zWuTvMUqddRxpiFJ72xVfcNoWpqdLjs6lfPbuQBJuW4pk+49/uBMy7Ssr4bzgjiKmmDB1UbZQ==",
+      "version": "8.59.1",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-8.59.1.tgz",
+      "integrity": "sha512-HDQH9O/47Dxi1ceDhBXdaldtf/WV9yRYMjbjCuNk3qnaTD564qwv61Y7+gTxwxRKzSrgO5uhtw584igXVuuZkA==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "dependencies": {
-        "@typescript-eslint/scope-manager": "8.59.4",
-        "@typescript-eslint/types": "8.59.4",
-        "@typescript-eslint/typescript-estree": "8.59.4",
-        "@typescript-eslint/visitor-keys": "8.59.4",
+        "@typescript-eslint/scope-manager": "8.59.1",
+        "@typescript-eslint/types": "8.59.1",
+        "@typescript-eslint/typescript-estree": "8.59.1",
+        "@typescript-eslint/visitor-keys": "8.59.1",
         "debug": "^4.4.3"
       },
       "engines": {
@@ -2592,14 +5180,14 @@
       }
     },
     "node_modules/@typescript-eslint/project-service": {
-      "version": "8.59.4",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/project-service/-/project-service-8.59.4.tgz",
-      "integrity": "sha512-Ly00Vu4oAacfDeHp2Zg85ioNG6l8HG+tN1D7J+xTHSxu9y0awYKJ2zH1rFBn8ZSfuGK+7FxK3Cgl3uAz0aZZLg==",
+      "version": "8.59.1",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/project-service/-/project-service-8.59.1.tgz",
+      "integrity": "sha512-+MuHQlHiEr00Of/IQbE/MmEoi44znZHbR/Pz7Opq4HryUOlRi+/44dro9Ycy8Fyo+/024IWtw8m4JUMCGTYxDg==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
-        "@typescript-eslint/tsconfig-utils": "^8.59.4",
-        "@typescript-eslint/types": "^8.59.4",
+        "@typescript-eslint/tsconfig-utils": "^8.59.1",
+        "@typescript-eslint/types": "^8.59.1",
         "debug": "^4.4.3"
       },
       "engines": {
@@ -2614,14 +5202,14 @@
       }
     },
     "node_modules/@typescript-eslint/scope-manager": {
-      "version": "8.59.4",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-8.59.4.tgz",
-      "integrity": "sha512-mUeR/3H1WrTAddJrwut8OoPjfauaztMQmRwV5fQTUyNVJCLiUXXe4lGEyYIL2oFDpP7UtgbGJXCt72wT0z2S3Q==",
+      "version": "8.59.1",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-8.59.1.tgz",
+      "integrity": "sha512-LwuHQI4pDOYVKvmH2dkaJo6YZCSgouVgnS/z7yBPKBMvgtBvyLqiLy9Z6b7+m/TRcX1NFYUqZetI5Y+aT4GEfg==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
-        "@typescript-eslint/types": "8.59.4",
-        "@typescript-eslint/visitor-keys": "8.59.4"
+        "@typescript-eslint/types": "8.59.1",
+        "@typescript-eslint/visitor-keys": "8.59.1"
       },
       "engines": {
         "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
@@ -2632,9 +5220,9 @@
       }
     },
     "node_modules/@typescript-eslint/tsconfig-utils": {
-      "version": "8.59.4",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/tsconfig-utils/-/tsconfig-utils-8.59.4.tgz",
-      "integrity": "sha512-DLCpnKgD4alVxTBSKulK+gU1KCqOgUXfDRDXh2mZgzokQKa/70ax93I2uVO3m/LLvIAtWZIFoiifudmIqAxpMA==",
+      "version": "8.59.1",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/tsconfig-utils/-/tsconfig-utils-8.59.1.tgz",
+      "integrity": "sha512-/0nEyPbX7gRsk0Uwfe4ALwwgxuA66d/l2mhRDNlAvaj4U3juhUtJNq0DsY8M2AYwwb9rEq2hrC3IcIcEt++iJA==",
       "dev": true,
       "license": "MIT",
       "engines": {
@@ -2649,15 +5237,15 @@
       }
     },
     "node_modules/@typescript-eslint/type-utils": {
-      "version": "8.59.4",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/type-utils/-/type-utils-8.59.4.tgz",
-      "integrity": "sha512-uonTuPAAKr9XaBGqJ3LjYTh72zy5DyGesljO9gtmk/eFW0W1fRHjnwVYKB35Lm8d5Q5CluEW3gPHjTvZTmgrfA==",
+      "version": "8.59.1",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/type-utils/-/type-utils-8.59.1.tgz",
+      "integrity": "sha512-klWPBR2ciQHS3f++ug/mVnWKPjBUo7icEL3FAO1lhAR1Z1i5NQYZ1EannMSRYcq5qCv5wNALlXr6fksRHyYl7w==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
-        "@typescript-eslint/types": "8.59.4",
-        "@typescript-eslint/typescript-estree": "8.59.4",
-        "@typescript-eslint/utils": "8.59.4",
+        "@typescript-eslint/types": "8.59.1",
+        "@typescript-eslint/typescript-estree": "8.59.1",
+        "@typescript-eslint/utils": "8.59.1",
         "debug": "^4.4.3",
         "ts-api-utils": "^2.5.0"
       },
@@ -2674,9 +5262,9 @@
       }
     },
     "node_modules/@typescript-eslint/types": {
-      "version": "8.59.4",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-8.59.4.tgz",
-      "integrity": "sha512-F1o7WJcCq+bc8dwcO/YsSEOudAH8RDtaOhM6wcAQhcUsFhnWQl81JKy48q1hoxAU0qrzM89+31GYh1515Zde3Q==",
+      "version": "8.59.1",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-8.59.1.tgz",
+      "integrity": "sha512-ZDCjgccSdYPw5Bxh+my4Z0lJU96ZDN7jbBzvmEn0FZx3RtU1C7VWl6NbDx94bwY3V5YsgwRzJPOgeY2Q/nLG8A==",
       "dev": true,
       "license": "MIT",
       "engines": {
@@ -2688,16 +5276,16 @@
       }
     },
     "node_modules/@typescript-eslint/typescript-estree": {
-      "version": "8.59.4",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-8.59.4.tgz",
-      "integrity": "sha512-F+RuOmcDXo4+TPdfd/TCLS3m2nw8gE9XXyZLrA3JBfaA5tz9TtdkyD3YJFmPxulyc2cKbEok/CvFE3MgSLWnag==",
+      "version": "8.59.1",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-8.59.1.tgz",
+      "integrity": "sha512-OUd+vJS05sSkOip+BkZ/2NS8RMxrAAJemsC6vU3kmfLyeaJT0TftHkV9mcx2107MmsBVXXexhVu4F0TZXyMl4g==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
-        "@typescript-eslint/project-service": "8.59.4",
-        "@typescript-eslint/tsconfig-utils": "8.59.4",
-        "@typescript-eslint/types": "8.59.4",
-        "@typescript-eslint/visitor-keys": "8.59.4",
+        "@typescript-eslint/project-service": "8.59.1",
+        "@typescript-eslint/tsconfig-utils": "8.59.1",
+        "@typescript-eslint/types": "8.59.1",
+        "@typescript-eslint/visitor-keys": "8.59.1",
         "debug": "^4.4.3",
         "minimatch": "^10.2.2",
         "semver": "^7.7.3",
@@ -2726,9 +5314,9 @@
       }
     },
     "node_modules/@typescript-eslint/typescript-estree/node_modules/brace-expansion": {
-      "version": "5.0.6",
-      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.6.tgz",
-      "integrity": "sha512-kLpxurY4Z4r9sgMsyG0Z9uzsBlgiU/EFKhj/h91/8yHu0edo7XuixOIH3VcJ8kkxs6/jPzoI6U9Vj3WqbMQ94g==",
+      "version": "5.0.5",
+      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.5.tgz",
+      "integrity": "sha512-VZznLgtwhn+Mact9tfiwx64fA9erHH/MCXEUfB/0bX/6Fz6ny5EGTXYltMocqg4xFAQZtnO3DHWWXi8RiuN7cQ==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
@@ -2755,9 +5343,9 @@
       }
     },
     "node_modules/@typescript-eslint/typescript-estree/node_modules/semver": {
-      "version": "7.8.0",
-      "resolved": "https://registry.npmjs.org/semver/-/semver-7.8.0.tgz",
-      "integrity": "sha512-AcM7dV/5ul4EekoQ29Agm5vri8JNqRyj39o0qpX6vDF2GZrtutZl5RwgD1XnZjiTAfncsJhMI48QQH3sN87YNA==",
+      "version": "7.7.4",
+      "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.4.tgz",
+      "integrity": "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==",
       "dev": true,
       "license": "ISC",
       "bin": {
@@ -2768,16 +5356,16 @@
       }
     },
     "node_modules/@typescript-eslint/utils": {
-      "version": "8.59.4",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-8.59.4.tgz",
-      "integrity": "sha512-cYXeNAUsG4lJo5dbc1FcKm+JwIWrj1/UpTORsC6tGMjEZ81DYcvIr9/ueikhMa/Y/gDQYGp+YX9/xQrXje5BJw==",
+      "version": "8.59.1",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-8.59.1.tgz",
+      "integrity": "sha512-3pIeoXhCeYH9FSCBI8P3iNwJlGuzPlYKkTlen2O9T1DSeeg8UG8jstq6BLk+Mda0qup7mgk4z4XL4OzRaxZ8LA==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
         "@eslint-community/eslint-utils": "^4.9.1",
-        "@typescript-eslint/scope-manager": "8.59.4",
-        "@typescript-eslint/types": "8.59.4",
-        "@typescript-eslint/typescript-estree": "8.59.4"
+        "@typescript-eslint/scope-manager": "8.59.1",
+        "@typescript-eslint/types": "8.59.1",
+        "@typescript-eslint/typescript-estree": "8.59.1"
       },
       "engines": {
         "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
@@ -2792,13 +5380,13 @@
       }
     },
     "node_modules/@typescript-eslint/visitor-keys": {
-      "version": "8.59.4",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-8.59.4.tgz",
-      "integrity": "sha512-U3gxVaDVnuZKhSspW/MzMxE1kq7zOdc072FcSNoqA1I9p8HyKbBFfEHoWckBAMgNMph4MamwS5iTVzFmrnt8TQ==",
+      "version": "8.59.1",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-8.59.1.tgz",
+      "integrity": "sha512-LdDNl6C5iJExcM0Yh0PwAIBb9PrSiCsWamF/JyEZawm3kFDnRoaq3LGE4bpyRao/fWeGKKyw7icx0YxrLFC5Cg==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
-        "@typescript-eslint/types": "8.59.4",
+        "@typescript-eslint/types": "8.59.1",
         "eslint-visitor-keys": "^5.0.0"
       },
       "engines": {
@@ -2900,6 +5488,7 @@
       "integrity": "sha512-UVJyE9MttOsBQIDKw1skb9nAwQuR5wuGD3+82K6JgJlm/Y+KI92oNsMNGZCYdDsVtRHSak0pcV5Dno5+4jh9sw==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "bin": {
         "acorn": "bin/acorn"
       },
@@ -2957,6 +5546,18 @@
       "dev": true,
       "license": "Python-2.0"
     },
+    "node_modules/aria-hidden": {
+      "version": "1.2.6",
+      "resolved": "https://registry.npmjs.org/aria-hidden/-/aria-hidden-1.2.6.tgz",
+      "integrity": "sha512-ik3ZgC9dY/lYVVM++OISsaYDeg1tb0VtP5uL3ouh1koGOaUMDPpbFIei4JkFimWUFPn90sbMNMXQAIVOlnYKJA==",
+      "license": "MIT",
+      "dependencies": {
+        "tslib": "^2.0.0"
+      },
+      "engines": {
+        "node": ">=10"
+      }
+    },
     "node_modules/assign-symbols": {
       "version": "1.0.0",
       "resolved": "https://registry.npmjs.org/assign-symbols/-/assign-symbols-1.0.0.tgz",
@@ -3003,9 +5604,9 @@
       "license": "MIT"
     },
     "node_modules/baseline-browser-mapping": {
-      "version": "2.10.31",
-      "resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.10.31.tgz",
-      "integrity": "sha512-MujYO3eP72uvmSE0i4wltsodRfIpZATP3jvzRNRGGxgzId7aVocVJJV3nf01qnzzKFGxQVC9bpWxl5cjxTr/7Q==",
+      "version": "2.10.24",
+      "resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.10.24.tgz",
+      "integrity": "sha512-I2NkZOOrj2XuguvWCK6OVh9GavsNjZjK908Rq3mIBK25+GD8vPX5w2WdxVqnQ7xx3SrZJiCiZFu+/Oz50oSYSA==",
       "dev": true,
       "license": "Apache-2.0",
       "bin": {
@@ -3052,6 +5653,7 @@
         }
       ],
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "baseline-browser-mapping": "^2.10.12",
         "caniuse-lite": "^1.0.30001782",
@@ -3101,9 +5703,9 @@
       }
     },
     "node_modules/caniuse-lite": {
-      "version": "1.0.30001793",
-      "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001793.tgz",
-      "integrity": "sha512-iwSsYWaCOoh26cV8NwNRViHlrfUvYsHDfRVcbtmw0Kg6PJIZZXwMkj1442FYLBGkeUf1juAsU3DTfxW579mrPA==",
+      "version": "1.0.30001791",
+      "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001791.tgz",
+      "integrity": "sha512-yk0l/YSrOnFZk3UROpDLQD9+kC1l4meK/wed583AXrzoarMGJcbRi2Q4RaUYbKxYAsZ8sWmaSa/DsLmdBeI1vQ==",
       "dev": true,
       "funding": [
         {
@@ -3559,6 +6161,7 @@
       "resolved": "https://registry.npmjs.org/d3-selection/-/d3-selection-3.0.0.tgz",
       "integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==",
       "license": "ISC",
+      "peer": true,
       "engines": {
         "node": ">=12"
       }
@@ -3710,6 +6313,12 @@
         "node": ">=8"
       }
     },
+    "node_modules/detect-node-es": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/detect-node-es/-/detect-node-es-1.1.0.tgz",
+      "integrity": "sha512-ypdmJU/TbBby2Dxibuv7ZLW3Bs1QEmM7nHjEANfohJLvE0XVujisn1qPJcZxg+qDucsr+bP6fLD1rPS3AhJ7EQ==",
+      "license": "MIT"
+    },
     "node_modules/dom-serializer": {
       "version": "2.0.0",
       "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz",
@@ -3778,16 +6387,16 @@
       }
     },
     "node_modules/electron-to-chromium": {
-      "version": "1.5.360",
-      "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.360.tgz",
-      "integrity": "sha512-GkcBt6YYAw9SxFWn+xVar4cLVGlXVuswwtRLBozi2zp0GjXs4ZnOrqV4zbXzg35n7w81hCkyJNYicgXlVHAmBA==",
+      "version": "1.5.344",
+      "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.344.tgz",
+      "integrity": "sha512-4MxfbmNDm+KPh066EZy+eUnkcDPcZ35wNmOWzFuh/ijvHsve6kbLTLURy88uCNK5FbpN+yk2nQY6BYh1GEt+wg==",
       "dev": true,
       "license": "ISC"
     },
     "node_modules/enhanced-resolve": {
-      "version": "5.21.6",
-      "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.21.6.tgz",
-      "integrity": "sha512-aNnGCvbJ/RIyWo1IuhNdVjnNF+EjH9wpzpNHt+ci/m9He9LJvUN8wrCcXjp9cWsGNAuvSpVFTx/vraAFQ8qGjQ==",
+      "version": "5.21.0",
+      "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.21.0.tgz",
+      "integrity": "sha512-otxSQPw4lkOZWkHpB3zaEQs6gWYEsmX4xQF68ElXC/TWvGxGMSGOvoNbaLXm6/cS/fSfHtsEdw90y20PCd+sCA==",
       "license": "MIT",
       "dependencies": {
         "graceful-fs": "^4.2.4",
@@ -3878,6 +6487,7 @@
       "integrity": "sha512-XoMjdBOwe/esVgEvLmNsD3IRHkm7fbKIUGvrleloJXUZgDHig2IPWNniv+GwjyJXzuNqVjlr5+4yVUZjycJwfQ==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@eslint-community/eslint-utils": "^4.8.0",
         "@eslint-community/regexpp": "^4.12.1",
@@ -4188,13 +6798,13 @@
       }
     },
     "node_modules/framer-motion": {
-      "version": "12.39.0",
-      "resolved": "https://registry.npmjs.org/framer-motion/-/framer-motion-12.39.0.tgz",
-      "integrity": "sha512-+vnLfzrv0MzjLzNl+nvNvR7jdg3q4cxxjz/YvzfifHl0TREtL00cs1RoMTxs+1PzLiEqZGV6gYsBY0oEAYZ24w==",
+      "version": "12.38.0",
+      "resolved": "https://registry.npmjs.org/framer-motion/-/framer-motion-12.38.0.tgz",
+      "integrity": "sha512-rFYkY/pigbcswl1XQSb7q424kSTQ8q6eAC+YUsSKooHQYuLdzdHjrt6uxUC+PRAO++q5IS7+TamgIw1AphxR+g==",
       "license": "MIT",
       "dependencies": {
-        "motion-dom": "^12.39.0",
-        "motion-utils": "^12.39.0",
+        "motion-dom": "^12.38.0",
+        "motion-utils": "^12.36.0",
         "tslib": "^2.4.0"
       },
       "peerDependencies": {
@@ -4238,6 +6848,15 @@
         "node": ">=6.9.0"
       }
     },
+    "node_modules/get-nonce": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/get-nonce/-/get-nonce-1.0.1.tgz",
+      "integrity": "sha512-FJhYRoDaiatfEkUK8HKlicmu/3SGFD51q3itKDGoSTysQJBnfOcxU5GxnhE1E6soB76MbT0MBtnKJuXyAx+96Q==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    },
     "node_modules/get-value": {
       "version": "2.0.6",
       "resolved": "https://registry.npmjs.org/get-value/-/get-value-2.0.6.tgz",
@@ -4261,9 +6880,9 @@
       }
     },
     "node_modules/globals": {
-      "version": "17.6.0",
-      "resolved": "https://registry.npmjs.org/globals/-/globals-17.6.0.tgz",
-      "integrity": "sha512-sepffkT8stwnIYbsMBpoCHJuJM5l98FUF2AnE07hfvE0m/qp3R586hw4jF4uadbhvg1ooIdzuu7CsfD2jzCaNA==",
+      "version": "17.5.0",
+      "resolved": "https://registry.npmjs.org/globals/-/globals-17.5.0.tgz",
+      "integrity": "sha512-qoV+HK2yFl/366t2/Cb3+xxPUo5BuMynomoDmiaZBIdbs+0pYbjfZU+twLhGKp4uCZ/+NbtpVepH5bGCxRyy2g==",
       "dev": true,
       "license": "MIT",
       "engines": {
@@ -4283,7 +6902,8 @@
       "version": "3.15.0",
       "resolved": "https://registry.npmjs.org/gsap/-/gsap-3.15.0.tgz",
       "integrity": "sha512-dMW4CWBTUK1AEEDeZc1g4xpPGIrSf9fJF960qbTZmN/QwZIWY5wgliS6JWl9/25fpTGJrMRtSjGtOmPnfjZB+A==",
-      "license": "Standard 'no charge' license: https://gsap.com/standard-license."
+      "license": "Standard 'no charge' license: https://gsap.com/standard-license.",
+      "peer": true
     },
     "node_modules/has-flag": {
       "version": "4.0.0",
@@ -4500,9 +7120,9 @@
       }
     },
     "node_modules/jiti": {
-      "version": "2.7.0",
-      "resolved": "https://registry.npmjs.org/jiti/-/jiti-2.7.0.tgz",
-      "integrity": "sha512-AC/7JofJvZGrrneWNaEnJeOLUx+JlGt7tNa0wZiRPT4MY1wmfKjt2+6O2p2uz2+skll8OZZmJMNqeke7kKbNgQ==",
+      "version": "2.6.1",
+      "resolved": "https://registry.npmjs.org/jiti/-/jiti-2.6.1.tgz",
+      "integrity": "sha512-ekilCSN1jwRvIbgeg/57YFh8qQDNbwDb9xT/qu2DAHbFFZUicIl4ygVaAvzveMhMVr3LnpSKTNnwt8PoOfmKhQ==",
       "license": "MIT",
       "bin": {
         "jiti": "lib/jiti-cli.mjs"
@@ -4598,6 +7218,7 @@
       "resolved": "https://registry.npmjs.org/leva/-/leva-0.10.1.tgz",
       "integrity": "sha512-BcjnfUX8jpmwZUz2L7AfBtF9vn4ggTH33hmeufDULbP3YgNZ/C+ss/oO3stbrqRQyaOmRwy70y7BGTGO81S3rA==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@radix-ui/react-portal": "^1.1.4",
         "@radix-ui/react-tooltip": "^1.1.8",
@@ -4783,9 +7404,6 @@
       "cpu": [
         "arm64"
       ],
-      "libc": [
-        "glibc"
-      ],
       "license": "MPL-2.0",
       "optional": true,
       "os": [
@@ -4806,9 +7424,6 @@
       "cpu": [
         "arm64"
       ],
-      "libc": [
-        "musl"
-      ],
       "license": "MPL-2.0",
       "optional": true,
       "os": [
@@ -4829,9 +7444,6 @@
       "cpu": [
         "x64"
       ],
-      "libc": [
-        "glibc"
-      ],
       "license": "MPL-2.0",
       "optional": true,
       "os": [
@@ -4852,9 +7464,6 @@
       "cpu": [
         "x64"
       ],
-      "libc": [
-        "musl"
-      ],
       "license": "MPL-2.0",
       "optional": true,
       "os": [
@@ -5013,12 +7622,13 @@
       }
     },
     "node_modules/motion": {
-      "version": "12.39.0",
-      "resolved": "https://registry.npmjs.org/motion/-/motion-12.39.0.tgz",
-      "integrity": "sha512-H4a+Ze+a9j+/NTla5ezfb/g9vmIOxC+viDj++NGDZyTZkdRKjiOz3kSv6TalRWM8ZmD2y/CfC6TkQc97ybyqSA==",
+      "version": "12.38.0",
+      "resolved": "https://registry.npmjs.org/motion/-/motion-12.38.0.tgz",
+      "integrity": "sha512-uYfXzeHlgThchzwz5Te47dlv5JOUC7OB4rjJ/7XTUgtBZD8CchMN8qEJ4ZVsUmTyYA44zjV0fBwsiktRuFnn+w==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
-        "framer-motion": "^12.39.0",
+        "framer-motion": "^12.38.0",
         "tslib": "^2.4.0"
       },
       "peerDependencies": {
@@ -5039,18 +7649,18 @@
       }
     },
     "node_modules/motion-dom": {
-      "version": "12.39.0",
-      "resolved": "https://registry.npmjs.org/motion-dom/-/motion-dom-12.39.0.tgz",
-      "integrity": "sha512-Xn7aAcGDhco/JZTXOub64UmaYn73C6J1Po7Fk+8EvkJsNGTqfhon6UJY53vJKXW5v5Zl8HrYsVxv6oPXeGoGLQ==",
+      "version": "12.38.0",
+      "resolved": "https://registry.npmjs.org/motion-dom/-/motion-dom-12.38.0.tgz",
+      "integrity": "sha512-pdkHLD8QYRp8VfiNLb8xIBJis1byQ9gPT3Jnh2jqfFtAsWUA3dEepDlsWe/xMpO8McV+VdpKVcp+E+TGJEtOoA==",
       "license": "MIT",
       "dependencies": {
-        "motion-utils": "^12.39.0"
+        "motion-utils": "^12.36.0"
       }
     },
     "node_modules/motion-utils": {
-      "version": "12.39.0",
-      "resolved": "https://registry.npmjs.org/motion-utils/-/motion-utils-12.39.0.tgz",
-      "integrity": "sha512-8nadJAJjTtqRkmRF36FoJTrywK9nnFmnPwnSMyxaOCU7GDjN9RTMJIxx9De8ErM+vpPhMccr/6fo5WciyQLnMQ==",
+      "version": "12.36.0",
+      "resolved": "https://registry.npmjs.org/motion-utils/-/motion-utils-12.36.0.tgz",
+      "integrity": "sha512-eHWisygbiwVvf6PZ1vhaHCLamvkSbPIeAYxWUuL3a2PD/TROgE7FvfHWTIH4vMl798QLfMw15nRqIaRDXTlYRg==",
       "license": "MIT"
     },
     "node_modules/ms": {
@@ -5061,9 +7671,9 @@
       "license": "MIT"
     },
     "node_modules/nanoid": {
-      "version": "3.3.12",
-      "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.12.tgz",
-      "integrity": "sha512-ZB9RH/39qpq5Vu6Y+NmUaFhQR6pp+M2Xt76XBnEwDaGcVAqhlvxrl3B2bKS5D3NH3QR76v3aSrKaF/Kiy7lEtQ==",
+      "version": "3.3.11",
+      "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.11.tgz",
+      "integrity": "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w==",
       "funding": [
         {
           "type": "github",
@@ -5089,6 +7699,7 @@
         }
       ],
       "license": "MIT",
+      "peer": true,
       "engines": {
         "node": "^20.0.0 || >=22.0.0"
       }
@@ -5101,9 +7712,9 @@
       "license": "MIT"
     },
     "node_modules/node-releases": {
-      "version": "2.0.44",
-      "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.44.tgz",
-      "integrity": "sha512-5WUyunoPMsvvEhS8AxHtRzP+oA8UCkJ7YRxatWKjngndhDGLiqEVAQKWjFAiAiuL8zMRGzGSJxFnLetoa43qGQ==",
+      "version": "2.0.38",
+      "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.38.tgz",
+      "integrity": "sha512-3qT/88Y3FbH/Kx4szpQQ4HzUbVrHPKTLVpVocKiLfoYvw9XSGOX2FmD2d6DrXbVYyAQTF2HeF6My8jmzx7/CRw==",
       "dev": true,
       "license": "MIT"
     },
@@ -5216,6 +7827,7 @@
       "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz",
       "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==",
       "license": "MIT",
+      "peer": true,
       "engines": {
         "node": ">=12"
       },
@@ -5224,9 +7836,9 @@
       }
     },
     "node_modules/postcss": {
-      "version": "8.5.15",
-      "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.15.tgz",
-      "integrity": "sha512-FfR8sjd4em2T6fb3I2MwAJU7HWVMr9zba+enmQeeWFfCbm+UOC/0X4DS8XtpUTMwWMGbjKYP7xjfNekzyGmB3A==",
+      "version": "8.5.12",
+      "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.12.tgz",
+      "integrity": "sha512-W62t/Se6rA0Az3DfCL0AqJwXuKwBeYg6nOaIgzP+xZ7N5BFCI7DYi1qs6ygUYT6rvfi6t9k65UMLJC+PHZpDAA==",
       "funding": [
         {
           "type": "opencollective",
@@ -5243,7 +7855,7 @@
       ],
       "license": "MIT",
       "dependencies": {
-        "nanoid": "^3.3.12",
+        "nanoid": "^3.3.11",
         "picocolors": "^1.1.1",
         "source-map-js": "^1.2.1"
       },
@@ -5282,19 +7894,162 @@
         "node": ">=6"
       }
     },
-    "node_modules/react": {
-      "version": "19.2.6",
-      "resolved": "https://registry.npmjs.org/react/-/react-19.2.6.tgz",
-      "integrity": "sha512-sfWGGfavi0xr8Pg0sVsyHMAOziVYKgPLNrS7ig+ivMNb3wbCBw3KxtflsGBAwD3gYQlE/AEZsTLgToRrSCjb0Q==",
+    "node_modules/radix-ui": {
+      "version": "1.4.3",
+      "resolved": "https://registry.npmjs.org/radix-ui/-/radix-ui-1.4.3.tgz",
+      "integrity": "sha512-aWizCQiyeAenIdUbqEpXgRA1ya65P13NKn/W8rWkcN0OPkRDxdBVLWnIEDsS2RpwCK2nobI7oMUSmexzTDyAmA==",
       "license": "MIT",
+      "dependencies": {
+        "@radix-ui/primitive": "1.1.3",
+        "@radix-ui/react-accessible-icon": "1.1.7",
+        "@radix-ui/react-accordion": "1.2.12",
+        "@radix-ui/react-alert-dialog": "1.1.15",
+        "@radix-ui/react-arrow": "1.1.7",
+        "@radix-ui/react-aspect-ratio": "1.1.7",
+        "@radix-ui/react-avatar": "1.1.10",
+        "@radix-ui/react-checkbox": "1.3.3",
+        "@radix-ui/react-collapsible": "1.1.12",
+        "@radix-ui/react-collection": "1.1.7",
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-context-menu": "2.2.16",
+        "@radix-ui/react-dialog": "1.1.15",
+        "@radix-ui/react-direction": "1.1.1",
+        "@radix-ui/react-dismissable-layer": "1.1.11",
+        "@radix-ui/react-dropdown-menu": "2.1.16",
+        "@radix-ui/react-focus-guards": "1.1.3",
+        "@radix-ui/react-focus-scope": "1.1.7",
+        "@radix-ui/react-form": "0.1.8",
+        "@radix-ui/react-hover-card": "1.1.15",
+        "@radix-ui/react-label": "2.1.7",
+        "@radix-ui/react-menu": "2.1.16",
+        "@radix-ui/react-menubar": "1.1.16",
+        "@radix-ui/react-navigation-menu": "1.2.14",
+        "@radix-ui/react-one-time-password-field": "0.1.8",
+        "@radix-ui/react-password-toggle-field": "0.1.3",
+        "@radix-ui/react-popover": "1.1.15",
+        "@radix-ui/react-popper": "1.2.8",
+        "@radix-ui/react-portal": "1.1.9",
+        "@radix-ui/react-presence": "1.1.5",
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-progress": "1.1.7",
+        "@radix-ui/react-radio-group": "1.3.8",
+        "@radix-ui/react-roving-focus": "1.1.11",
+        "@radix-ui/react-scroll-area": "1.2.10",
+        "@radix-ui/react-select": "2.2.6",
+        "@radix-ui/react-separator": "1.1.7",
+        "@radix-ui/react-slider": "1.3.6",
+        "@radix-ui/react-slot": "1.2.3",
+        "@radix-ui/react-switch": "1.2.6",
+        "@radix-ui/react-tabs": "1.1.13",
+        "@radix-ui/react-toast": "1.2.15",
+        "@radix-ui/react-toggle": "1.1.10",
+        "@radix-ui/react-toggle-group": "1.1.11",
+        "@radix-ui/react-toolbar": "1.1.11",
+        "@radix-ui/react-tooltip": "1.2.8",
+        "@radix-ui/react-use-callback-ref": "1.1.1",
+        "@radix-ui/react-use-controllable-state": "1.2.2",
+        "@radix-ui/react-use-effect-event": "0.0.2",
+        "@radix-ui/react-use-escape-keydown": "1.1.1",
+        "@radix-ui/react-use-is-hydrated": "0.1.0",
+        "@radix-ui/react-use-layout-effect": "1.1.1",
+        "@radix-ui/react-use-size": "1.1.1",
+        "@radix-ui/react-visually-hidden": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/radix-ui/node_modules/@radix-ui/react-portal": {
+      "version": "1.1.9",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.9.tgz",
+      "integrity": "sha512-bpIxvq03if6UNwXZ+HTK71JLh4APvnXntDc6XOX8UVq4XQOVl7lwok0AvIl+b8zgCw3fSaVTZMpAPPagXbKmHQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-use-layout-effect": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/radix-ui/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/radix-ui/node_modules/@radix-ui/react-slot": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/react": {
+      "version": "19.2.5",
+      "resolved": "https://registry.npmjs.org/react/-/react-19.2.5.tgz",
+      "integrity": "sha512-llUJLzz1zTUBrskt2pwZgLq59AemifIftw4aB7JxOqf1HY2FDaGDxgwpAPVzHU1kdWabH7FauP4i1oEeer2WCA==",
+      "license": "MIT",
+      "peer": true,
       "engines": {
         "node": ">=0.10.0"
       }
     },
     "node_modules/react-colorful": {
-      "version": "5.7.0",
-      "resolved": "https://registry.npmjs.org/react-colorful/-/react-colorful-5.7.0.tgz",
-      "integrity": "sha512-fuesYIemttah97XmsIHmz4OORDHiSFzyc9HMAIrCHJou2jaRQmL8cFJ76K4zQhhj8jzwOBlOi4BaGTjjOZCfTg==",
+      "version": "5.6.1",
+      "resolved": "https://registry.npmjs.org/react-colorful/-/react-colorful-5.6.1.tgz",
+      "integrity": "sha512-1exovf0uGTGyq5mXQT0zgQ80uvj2PCwvF8zY1RN9/vbJVSjSo3fsB/4L3ObbF7u70NduSiK4xu4Y6q1MHoUGEw==",
       "license": "MIT",
       "peerDependencies": {
         "react": ">=16.8.0",
@@ -5302,15 +8057,16 @@
       }
     },
     "node_modules/react-dom": {
-      "version": "19.2.6",
-      "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.2.6.tgz",
-      "integrity": "sha512-0prMI+hvBbPjsWnxDLxlCGyM8PN6UuWjEUCYmZhO67xIV9Xasa/r/vDnq+Xyq4Lo27g8QSbO5YzARu0D1Sps3g==",
+      "version": "19.2.5",
+      "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.2.5.tgz",
+      "integrity": "sha512-J5bAZz+DXMMwW/wV3xzKke59Af6CHY7G4uYLN1OvBcKEsWOs4pQExj86BBKamxl/Ik5bx9whOrvBlSDfWzgSag==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "scheduler": "^0.27.0"
       },
       "peerDependencies": {
-        "react": "^19.2.6"
+        "react": "^19.2.5"
       }
     },
     "node_modules/react-dropzone": {
@@ -5346,10 +8102,57 @@
         "node": ">=0.10.0"
       }
     },
+    "node_modules/react-remove-scroll": {
+      "version": "2.7.2",
+      "resolved": "https://registry.npmjs.org/react-remove-scroll/-/react-remove-scroll-2.7.2.tgz",
+      "integrity": "sha512-Iqb9NjCCTt6Hf+vOdNIZGdTiH1QSqr27H/Ek9sv/a97gfueI/5h1s3yRi1nngzMUaOOToin5dI1dXKdXiF+u0Q==",
+      "license": "MIT",
+      "dependencies": {
+        "react-remove-scroll-bar": "^2.3.7",
+        "react-style-singleton": "^2.2.3",
+        "tslib": "^2.1.0",
+        "use-callback-ref": "^1.3.3",
+        "use-sidecar": "^1.1.3"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/react-remove-scroll-bar": {
+      "version": "2.3.8",
+      "resolved": "https://registry.npmjs.org/react-remove-scroll-bar/-/react-remove-scroll-bar-2.3.8.tgz",
+      "integrity": "sha512-9r+yi9+mgU33AKcj6IbT9oRCO78WriSj6t/cF8DWBZJ9aOGPOTEDvdUDz1FwKim7QXWwmHqtdHnRJfhAxEG46Q==",
+      "license": "MIT",
+      "dependencies": {
+        "react-style-singleton": "^2.2.2",
+        "tslib": "^2.0.0"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
     "node_modules/react-router": {
-      "version": "7.15.1",
-      "resolved": "https://registry.npmjs.org/react-router/-/react-router-7.15.1.tgz",
-      "integrity": "sha512-R8rl9HhgikFYoPJymnUtPXWbnDb3oget6lQnfIoupbt61aT9aOhRkDsY2XRhZRyX1Z/8a5sL74fXmFNm3NRK5A==",
+      "version": "7.14.2",
+      "resolved": "https://registry.npmjs.org/react-router/-/react-router-7.14.2.tgz",
+      "integrity": "sha512-yCqNne6I8IB6rVCH7XUvlBK7/QKyqypBFGv+8dj4QBFJiiRX+FG7/nkdAvGElyvVZ/HQP5N19wzteuTARXi5Gw==",
       "license": "MIT",
       "dependencies": {
         "cookie": "^1.0.1",
@@ -5369,12 +8172,12 @@
       }
     },
     "node_modules/react-router-dom": {
-      "version": "7.15.1",
-      "resolved": "https://registry.npmjs.org/react-router-dom/-/react-router-dom-7.15.1.tgz",
-      "integrity": "sha512-AzF62gjY6U9rkMq4RfP/r2EVtQ7DMfNMjyOp/flLTCrtRylLiK4wT4pSq6O8rOXZ2eXdZYJPEYe+ifomiv+Igg==",
+      "version": "7.14.2",
+      "resolved": "https://registry.npmjs.org/react-router-dom/-/react-router-dom-7.14.2.tgz",
+      "integrity": "sha512-YZcM5ES8jJSM+KrJ9BdvHHqlnGTg5tH3sC5ChFRj4inosKctdyzBDhOyyHdGk597q2OT6NTrCA1OvB/YDwfekQ==",
       "license": "MIT",
       "dependencies": {
-        "react-router": "7.15.1"
+        "react-router": "7.14.2"
       },
       "engines": {
         "node": ">=20.0.0"
@@ -5384,6 +8187,28 @@
         "react-dom": ">=18"
       }
     },
+    "node_modules/react-style-singleton": {
+      "version": "2.2.3",
+      "resolved": "https://registry.npmjs.org/react-style-singleton/-/react-style-singleton-2.2.3.tgz",
+      "integrity": "sha512-b6jSvxvVnyptAiLjbkWLE/lOnR4lfTtDAl+eUC7RZy+QQWc6wRzIV2CE6xBuMmDxc2qIihtDCZD5NPOFl7fRBQ==",
+      "license": "MIT",
+      "dependencies": {
+        "get-nonce": "^1.0.0",
+        "tslib": "^2.0.0"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
     "node_modules/react-use-measure": {
       "version": "2.1.7",
       "resolved": "https://registry.npmjs.org/react-use-measure/-/react-use-measure-2.1.7.tgz",
@@ -5416,9 +8241,9 @@
       "license": "Unlicense"
     },
     "node_modules/rollup": {
-      "version": "4.60.4",
-      "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.60.4.tgz",
-      "integrity": "sha512-WHeFSbZYsPu3+bLoNRUuAO+wavNlocOPf3wSHTP7hcFKVnJeWsYlCDbr3mTS14FCizf9ccIxXA8sGL8zKeQN3g==",
+      "version": "4.60.2",
+      "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.60.2.tgz",
+      "integrity": "sha512-J9qZyW++QK/09NyN/zeO0dG/1GdGfyp9lV8ajHnRVLfo/uFsbji5mHnDgn/qYdUHyCkM2N+8VyspgZclfAh0eQ==",
       "license": "MIT",
       "dependencies": {
         "@types/estree": "1.0.8"
@@ -5431,40 +8256,34 @@
         "npm": ">=8.0.0"
       },
       "optionalDependencies": {
-        "@rollup/rollup-android-arm-eabi": "4.60.4",
-        "@rollup/rollup-android-arm64": "4.60.4",
-        "@rollup/rollup-darwin-arm64": "4.60.4",
-        "@rollup/rollup-darwin-x64": "4.60.4",
-        "@rollup/rollup-freebsd-arm64": "4.60.4",
-        "@rollup/rollup-freebsd-x64": "4.60.4",
-        "@rollup/rollup-linux-arm-gnueabihf": "4.60.4",
-        "@rollup/rollup-linux-arm-musleabihf": "4.60.4",
-        "@rollup/rollup-linux-arm64-gnu": "4.60.4",
-        "@rollup/rollup-linux-arm64-musl": "4.60.4",
-        "@rollup/rollup-linux-loong64-gnu": "4.60.4",
-        "@rollup/rollup-linux-loong64-musl": "4.60.4",
-        "@rollup/rollup-linux-ppc64-gnu": "4.60.4",
-        "@rollup/rollup-linux-ppc64-musl": "4.60.4",
-        "@rollup/rollup-linux-riscv64-gnu": "4.60.4",
-        "@rollup/rollup-linux-riscv64-musl": "4.60.4",
-        "@rollup/rollup-linux-s390x-gnu": "4.60.4",
-        "@rollup/rollup-linux-x64-gnu": "4.60.4",
-        "@rollup/rollup-linux-x64-musl": "4.60.4",
-        "@rollup/rollup-openbsd-x64": "4.60.4",
-        "@rollup/rollup-openharmony-arm64": "4.60.4",
-        "@rollup/rollup-win32-arm64-msvc": "4.60.4",
-        "@rollup/rollup-win32-ia32-msvc": "4.60.4",
-        "@rollup/rollup-win32-x64-gnu": "4.60.4",
-        "@rollup/rollup-win32-x64-msvc": "4.60.4",
+        "@rollup/rollup-android-arm-eabi": "4.60.2",
+        "@rollup/rollup-android-arm64": "4.60.2",
+        "@rollup/rollup-darwin-arm64": "4.60.2",
+        "@rollup/rollup-darwin-x64": "4.60.2",
+        "@rollup/rollup-freebsd-arm64": "4.60.2",
+        "@rollup/rollup-freebsd-x64": "4.60.2",
+        "@rollup/rollup-linux-arm-gnueabihf": "4.60.2",
+        "@rollup/rollup-linux-arm-musleabihf": "4.60.2",
+        "@rollup/rollup-linux-arm64-gnu": "4.60.2",
+        "@rollup/rollup-linux-arm64-musl": "4.60.2",
+        "@rollup/rollup-linux-loong64-gnu": "4.60.2",
+        "@rollup/rollup-linux-loong64-musl": "4.60.2",
+        "@rollup/rollup-linux-ppc64-gnu": "4.60.2",
+        "@rollup/rollup-linux-ppc64-musl": "4.60.2",
+        "@rollup/rollup-linux-riscv64-gnu": "4.60.2",
+        "@rollup/rollup-linux-riscv64-musl": "4.60.2",
+        "@rollup/rollup-linux-s390x-gnu": "4.60.2",
+        "@rollup/rollup-linux-x64-gnu": "4.60.2",
+        "@rollup/rollup-linux-x64-musl": "4.60.2",
+        "@rollup/rollup-openbsd-x64": "4.60.2",
+        "@rollup/rollup-openharmony-arm64": "4.60.2",
+        "@rollup/rollup-win32-arm64-msvc": "4.60.2",
+        "@rollup/rollup-win32-ia32-msvc": "4.60.2",
+        "@rollup/rollup-win32-x64-gnu": "4.60.2",
+        "@rollup/rollup-win32-x64-msvc": "4.60.2",
         "fsevents": "~2.3.2"
       }
     },
-    "node_modules/rollup/node_modules/@types/estree": {
-      "version": "1.0.8",
-      "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.8.tgz",
-      "integrity": "sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w==",
-      "license": "MIT"
-    },
     "node_modules/rw": {
       "version": "1.3.3",
       "resolved": "https://registry.npmjs.org/rw/-/rw-1.3.3.tgz",
@@ -5650,9 +8469,9 @@
       }
     },
     "node_modules/tailwindcss": {
-      "version": "4.3.0",
-      "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.3.0.tgz",
-      "integrity": "sha512-y6nxMGB1nMW9R6k96e5gdIFzcfL/gTJRNaqGes1YvkLnPVXzWgbqFF2yLC0T8G774n24cx3Pe8XrKoniCOAH+Q==",
+      "version": "4.2.4",
+      "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.2.4.tgz",
+      "integrity": "sha512-HhKppgO81FQof5m6TEnuBWCZGgfRAWbaeOaGT00KOy/Pf/j6oUihdvBpA7ltCeAvZpFhW3j0PTclkxsd4IXYDA==",
       "license": "MIT"
     },
     "node_modules/tapable": {
@@ -5672,7 +8491,8 @@
       "version": "0.180.0",
       "resolved": "https://registry.npmjs.org/three/-/three-0.180.0.tgz",
       "integrity": "sha512-o+qycAMZrh+TsE01GqWUxUIKR1AL0S8pq7zDkYOQw8GqfX8b8VoCKYUoHbhiX5j+7hr8XsuHDVU6+gkQJQKg9w==",
-      "license": "MIT"
+      "license": "MIT",
+      "peer": true
     },
     "node_modules/tinyglobby": {
       "version": "0.2.16",
@@ -5737,6 +8557,7 @@
       "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
       "dev": true,
       "license": "Apache-2.0",
+      "peer": true,
       "bin": {
         "tsc": "bin/tsc",
         "tsserver": "bin/tsserver"
@@ -5746,16 +8567,16 @@
       }
     },
     "node_modules/typescript-eslint": {
-      "version": "8.59.4",
-      "resolved": "https://registry.npmjs.org/typescript-eslint/-/typescript-eslint-8.59.4.tgz",
-      "integrity": "sha512-Rw6+44QNFaXtgHSjPy+Kw8hrJniMYzR85E9yLmOLcfZ91/rz+JXQbDTCmc6ccxMPY6K6PgAq26f0JCBfR7LIPQ==",
+      "version": "8.59.1",
+      "resolved": "https://registry.npmjs.org/typescript-eslint/-/typescript-eslint-8.59.1.tgz",
+      "integrity": "sha512-xqDcFVBmlrltH64lklOVp1wYxgJr6LVdg3NamBgH2OOQDLFdTKfIZXF5PfghrnXQKXZGTQs8tr1vL7fJvq8CTQ==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
-        "@typescript-eslint/eslint-plugin": "8.59.4",
-        "@typescript-eslint/parser": "8.59.4",
-        "@typescript-eslint/typescript-estree": "8.59.4",
-        "@typescript-eslint/utils": "8.59.4"
+        "@typescript-eslint/eslint-plugin": "8.59.1",
+        "@typescript-eslint/parser": "8.59.1",
+        "@typescript-eslint/typescript-estree": "8.59.1",
+        "@typescript-eslint/utils": "8.59.1"
       },
       "engines": {
         "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
@@ -5830,6 +8651,49 @@
         "punycode": "^2.1.0"
       }
     },
+    "node_modules/use-callback-ref": {
+      "version": "1.3.3",
+      "resolved": "https://registry.npmjs.org/use-callback-ref/-/use-callback-ref-1.3.3.tgz",
+      "integrity": "sha512-jQL3lRnocaFtu3V00JToYz/4QkNWswxijDaCVNZRiRTO3HQDLsdu1ZtmIUvV4yPp+rvWm5j0y0TG/S61cuijTg==",
+      "license": "MIT",
+      "dependencies": {
+        "tslib": "^2.0.0"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/use-sidecar": {
+      "version": "1.1.3",
+      "resolved": "https://registry.npmjs.org/use-sidecar/-/use-sidecar-1.1.3.tgz",
+      "integrity": "sha512-Fedw0aZvkhynoPYlA5WXrMCAMm+nSWdZt6lzJQ7Ok8S6Q+VsHmHpRWndVRJ8Be0ZbkfPc5LRYH+5XrzXcEeLRQ==",
+      "license": "MIT",
+      "dependencies": {
+        "detect-node-es": "^1.1.0",
+        "tslib": "^2.0.0"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
     "node_modules/use-sync-external-store": {
       "version": "1.6.0",
       "resolved": "https://registry.npmjs.org/use-sync-external-store/-/use-sync-external-store-1.6.0.tgz",
@@ -5846,10 +8710,11 @@
       "license": "MIT"
     },
     "node_modules/vite": {
-      "version": "7.3.3",
-      "resolved": "https://registry.npmjs.org/vite/-/vite-7.3.3.tgz",
-      "integrity": "sha512-/4XH147Ui7OGTjg3HbdWe5arnZQSbfuRzdr9Ec7TQi5I7R+ir0Rlc9GIvD4v0XZurELqA035KVXJXpR61xhiTA==",
+      "version": "7.3.2",
+      "resolved": "https://registry.npmjs.org/vite/-/vite-7.3.2.tgz",
+      "integrity": "sha512-Bby3NOsna2jsjfLVOHKes8sGwgl4TT0E6vvpYgnAYDIF/tie7MRaFthmKuHx1NSXjiTueXH3do80FMQgvEktRg==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "esbuild": "^0.27.0",
         "fdir": "^6.5.0",
@@ -5966,11 +8831,12 @@
       }
     },
     "node_modules/zod": {
-      "version": "4.4.3",
-      "resolved": "https://registry.npmjs.org/zod/-/zod-4.4.3.tgz",
-      "integrity": "sha512-ytENFjIJFl2UwYglde2jchW2Hwm4GJFLDiSXWdTrJQBIN9Fcyp7n4DhxJEiWNAJMV1/BqWfW/kkg71UDcHJyTQ==",
+      "version": "4.3.6",
+      "resolved": "https://registry.npmjs.org/zod/-/zod-4.3.6.tgz",
+      "integrity": "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "funding": {
         "url": "https://github.com/sponsors/colinhacks"
       }
@@ -5989,9 +8855,9 @@
       }
     },
     "node_modules/zustand": {
-      "version": "5.0.13",
-      "resolved": "https://registry.npmjs.org/zustand/-/zustand-5.0.13.tgz",
-      "integrity": "sha512-efI2tVaVQPqtOh114loML/Z80Y4NP3yc+Ff0fYiZJPauNeWZeIp/bRFD7I9bfmCOYBh/PHxlglQ9+wvlwnPikQ==",
+      "version": "5.0.12",
+      "resolved": "https://registry.npmjs.org/zustand/-/zustand-5.0.12.tgz",
+      "integrity": "sha512-i77ae3aZq4dhMlRhJVCYgMLKuSiZAaUPAct2AksxQ+gOtimhGMdXljRT21P5BNpeT4kXlLIckvkPM029OljD7g==",
       "license": "MIT",
       "engines": {
         "node": ">=12.20.0"
diff --git a/apps/dashboard/package.json b/web/package.json
similarity index 93%
rename from apps/dashboard/package.json
rename to web/package.json
index 6db4c280c48..7615a0976a3 100644
--- a/apps/dashboard/package.json
+++ b/web/package.json
@@ -1,5 +1,5 @@
 {
-  "name": "dashboard",
+  "name": "web",
   "private": true,
   "version": "0.0.0",
   "type": "module",
@@ -10,8 +10,7 @@
     "preview": "vite preview"
   },
   "dependencies": {
-    "@hermes/shared": "file:../shared",
-    "@nous-research/ui": "0.16.0",
+    "@nous-research/ui": "0.18.2",
     "@observablehq/plot": "^0.6.17",
     "@react-three/fiber": "^9.6.0",
     "@tailwindcss/vite": "^4.2.1",
diff --git a/apps/dashboard/public/favicon.ico b/web/public/favicon.ico
similarity index 100%
rename from apps/dashboard/public/favicon.ico
rename to web/public/favicon.ico
diff --git a/apps/dashboard/public/fonts-terminal/JetBrainsMono-Bold.woff2 b/web/public/fonts-terminal/JetBrainsMono-Bold.woff2
similarity index 100%
rename from apps/dashboard/public/fonts-terminal/JetBrainsMono-Bold.woff2
rename to web/public/fonts-terminal/JetBrainsMono-Bold.woff2
diff --git a/apps/dashboard/public/fonts-terminal/JetBrainsMono-Italic.woff2 b/web/public/fonts-terminal/JetBrainsMono-Italic.woff2
similarity index 100%
rename from apps/dashboard/public/fonts-terminal/JetBrainsMono-Italic.woff2
rename to web/public/fonts-terminal/JetBrainsMono-Italic.woff2
diff --git a/apps/dashboard/public/fonts-terminal/JetBrainsMono-Regular.woff2 b/web/public/fonts-terminal/JetBrainsMono-Regular.woff2
similarity index 100%
rename from apps/dashboard/public/fonts-terminal/JetBrainsMono-Regular.woff2
rename to web/public/fonts-terminal/JetBrainsMono-Regular.woff2
diff --git a/apps/dashboard/public/fonts/Collapse-Bold.woff2 b/web/public/fonts/Collapse-Bold.woff2
similarity index 100%
rename from apps/dashboard/public/fonts/Collapse-Bold.woff2
rename to web/public/fonts/Collapse-Bold.woff2
diff --git a/apps/dashboard/public/fonts/Collapse-Regular.woff2 b/web/public/fonts/Collapse-Regular.woff2
similarity index 100%
rename from apps/dashboard/public/fonts/Collapse-Regular.woff2
rename to web/public/fonts/Collapse-Regular.woff2
diff --git a/apps/dashboard/public/fonts/Mondwest-Regular.woff2 b/web/public/fonts/Mondwest-Regular.woff2
similarity index 100%
rename from apps/dashboard/public/fonts/Mondwest-Regular.woff2
rename to web/public/fonts/Mondwest-Regular.woff2
diff --git a/apps/dashboard/public/fonts/RulesCompressed-Medium.woff2 b/web/public/fonts/RulesCompressed-Medium.woff2
similarity index 100%
rename from apps/dashboard/public/fonts/RulesCompressed-Medium.woff2
rename to web/public/fonts/RulesCompressed-Medium.woff2
diff --git a/apps/dashboard/public/fonts/RulesCompressed-Regular.woff2 b/web/public/fonts/RulesCompressed-Regular.woff2
similarity index 100%
rename from apps/dashboard/public/fonts/RulesCompressed-Regular.woff2
rename to web/public/fonts/RulesCompressed-Regular.woff2
diff --git a/apps/dashboard/public/fonts/RulesExpanded-Bold.woff2 b/web/public/fonts/RulesExpanded-Bold.woff2
similarity index 100%
rename from apps/dashboard/public/fonts/RulesExpanded-Bold.woff2
rename to web/public/fonts/RulesExpanded-Bold.woff2
diff --git a/apps/dashboard/public/fonts/RulesExpanded-Regular.woff2 b/web/public/fonts/RulesExpanded-Regular.woff2
similarity index 100%
rename from apps/dashboard/public/fonts/RulesExpanded-Regular.woff2
rename to web/public/fonts/RulesExpanded-Regular.woff2
diff --git a/apps/dashboard/src/App.tsx b/web/src/App.tsx
similarity index 64%
rename from apps/dashboard/src/App.tsx
rename to web/src/App.tsx
index 6220ed26313..26850c2b0fb 100644
--- a/apps/dashboard/src/App.tsx
+++ b/web/src/App.tsx
@@ -2,10 +2,12 @@ import {
   useCallback,
   useEffect,
   useMemo,
+  useRef,
   useState,
   type ComponentType,
   type ReactNode,
 } from "react";
+import { createPortal } from "react-dom";
 import {
   Routes,
   Route,
@@ -31,6 +33,8 @@ import {
   Menu,
   MessageSquare,
   Package,
+  PanelLeftClose,
+  PanelLeftOpen,
   Puzzle,
   RotateCw,
   Settings,
@@ -44,14 +48,15 @@ import {
   Zap,
 } from "lucide-react";
 import { Button } from "@nous-research/ui/ui/components/button";
-import { ListItem } from "@nous-research/ui/ui/components/list-item";
 import { SelectionSwitcher } from "@nous-research/ui/ui/components/selection-switcher";
 import { Spinner } from "@nous-research/ui/ui/components/spinner";
-import { Typography } from "@/components/NouiTypography";
+import { Typography } from "@nous-research/ui/ui/components/typography/index";
 import { cn } from "@/lib/utils";
 import { Backdrop } from "@/components/Backdrop";
 import { SidebarFooter } from "@/components/SidebarFooter";
-import { SidebarStatusStrip } from "@/components/SidebarStatusStrip";
+import { SidebarStatusStrip, gatewayLine } from "@/components/SidebarStatusStrip";
+import { useBelowBreakpoint } from "@nous-research/ui/hooks/use-below-breakpoint";
+import { useSidebarStatus } from "@/hooks/useSidebarStatus";
 import { AuthWidget } from "@/components/AuthWidget";
 import { PageHeaderProvider } from "@/contexts/PageHeaderProvider";
 import { useSystemActions } from "@/contexts/useSystemActions";
@@ -77,6 +82,7 @@ import type { PluginManifest } from "@/plugins";
 import { useTheme } from "@/themes";
 import { isDashboardEmbeddedChatEnabled } from "@/lib/dashboard-flags";
 import { api } from "@/lib/api";
+import type { StatusResponse } from "@/lib/api";
 
 function RootRedirect() {
   return <Navigate to="/sessions" replace />;
@@ -306,6 +312,8 @@ function buildRoutes(
   return routes;
 }
 
+const SIDEBAR_COLLAPSED_KEY = "hermes-sidebar-collapsed";
+
 export default function App() {
   const { t } = useI18n();
   const { pathname } = useLocation();
@@ -313,6 +321,27 @@ export default function App() {
   const { theme } = useTheme();
   const [mobileOpen, setMobileOpen] = useState(false);
   const closeMobile = useCallback(() => setMobileOpen(false), []);
+
+  const [collapsed, setCollapsed] = useState(() => {
+    try {
+      return localStorage.getItem(SIDEBAR_COLLAPSED_KEY) === "true";
+    } catch {
+      return false;
+    }
+  });
+  const toggleCollapsed = useCallback(() => {
+    setCollapsed((prev) => {
+      const next = !prev;
+      try {
+        localStorage.setItem(SIDEBAR_COLLAPSED_KEY, String(next));
+      } catch { /* localStorage may be unavailable in private browsing */ }
+      return next;
+    });
+  }, []);
+  const isMobile = useBelowBreakpoint(1024);
+  const isDesktopCollapsed = collapsed && !isMobile;
+  const tooltipWarmRef = useRef(0);
+  const sidebarStatus = useSidebarStatus();
   const isDocsRoute = pathname === "/docs" || pathname === "/docs/";
   const normalizedPath = pathname.replace(/\/$/, "") || "/";
   const isChatRoute = normalizedPath === "/chat";
@@ -483,9 +512,11 @@ export default function App() {
               "fixed top-0 left-0 z-50 flex h-dvh max-h-dvh w-64 min-h-0 flex-col",
               "border-r border-current/20",
               "bg-background-base/95 backdrop-blur-sm",
-              "transition-transform duration-200 ease-out",
+              "transition-[transform] duration-200 ease-out",
               mobileOpen ? "translate-x-0" : "-translate-x-full",
-              "lg:sticky lg:top-0 lg:translate-x-0 lg:shrink-0",
+              "lg:sticky lg:top-0 lg:translate-x-0 lg:shrink-0 lg:overflow-hidden",
+              "lg:transition-[width] lg:duration-[600ms] lg:ease-[cubic-bezier(0.33,1.35,0.62,1)]",
+              collapsed && "lg:w-14",
             )}
             style={{
               background: "var(--component-sidebar-background)",
@@ -495,11 +526,17 @@ export default function App() {
           >
             <div
               className={cn(
-                "flex h-14 shrink-0 items-center justify-between gap-2 px-4",
+                "flex h-14 shrink-0 items-center gap-2",
                 "border-b border-current/20",
+                collapsed ? "lg:justify-center lg:px-0" : "px-4 justify-between",
               )}
             >
-              <div className="flex items-center gap-2">
+              <div
+                className={cn(
+                  "flex items-center gap-2",
+                  collapsed && "lg:hidden",
+                )}
+              >
                 <PluginSlot name="header-left" />
 
                 <Typography
@@ -521,6 +558,22 @@ export default function App() {
               >
                 <X />
               </Button>
+
+              <Button
+                ghost
+                size="icon"
+                onClick={toggleCollapsed}
+                aria-label={
+                  collapsed ? t.common.expand : t.common.collapse
+                }
+                className="hidden lg:flex text-text-secondary hover:text-midground"
+              >
+                {collapsed ? (
+                  <PanelLeftOpen className="h-4 w-4" />
+                ) : (
+                  <PanelLeftClose className="h-4 w-4" />
+                )}
+              </Button>
             </div>
 
             <nav
@@ -531,9 +584,11 @@ export default function App() {
                 {sidebarNav.coreItems.map((item) => (
                   <SidebarNavLink
                     closeMobile={closeMobile}
+                    collapsed={isDesktopCollapsed}
                     item={item}
                     key={item.path}
                     t={t}
+                    tooltipWarmRef={tooltipWarmRef}
                   />
                 ))}
               </ul>
@@ -548,6 +603,7 @@ export default function App() {
                     className={cn(
                       "px-5 pt-2.5 pb-1",
                       "font-mondwest text-display text-xs tracking-[0.12em] text-text-tertiary",
+                      isDesktopCollapsed && "lg:hidden",
                     )}
                     id="hermes-sidebar-plugin-nav-heading"
                   >
@@ -558,9 +614,11 @@ export default function App() {
                     {sidebarNav.pluginItems.map((item) => (
                       <SidebarNavLink
                         closeMobile={closeMobile}
+                        collapsed={isDesktopCollapsed}
                         item={item}
                         key={item.path}
                         t={t}
+                        tooltipWarmRef={tooltipWarmRef}
                       />
                     ))}
                   </ul>
@@ -568,24 +626,58 @@ export default function App() {
               )}
             </nav>
 
-            <SidebarSystemActions onNavigate={closeMobile} />
+            <SidebarSystemActions
+              collapsed={isDesktopCollapsed}
+              onNavigate={closeMobile}
+              status={sidebarStatus}
+              tooltipWarmRef={tooltipWarmRef}
+            />
 
             <div
               className={cn(
-                "flex shrink-0 items-center justify-between gap-2",
+                "flex shrink-0 items-center gap-2",
                 "px-3 py-2",
                 "border-t border-current/20",
+                isDesktopCollapsed
+                  ? "lg:flex-col lg:items-start lg:gap-3 lg:py-3"
+                  : "justify-between",
               )}
             >
-              <div className="flex min-w-0 items-center gap-2">
+              <div
+                className={cn(
+                  "flex min-w-0 items-center gap-2",
+                  isDesktopCollapsed && "lg:flex-col lg:items-start",
+                )}
+              >
                 <PluginSlot name="header-right" />
-                <ThemeSwitcher dropUp />
-                <LanguageSwitcher dropUp />
+
+                <SidebarIconWithTooltip
+                  collapsed={isDesktopCollapsed}
+                  label={t.theme?.switchTheme ?? "Switch theme"}
+                  tooltipWarmRef={tooltipWarmRef}
+                >
+                  <ThemeSwitcher collapsed={isDesktopCollapsed} dropUp />
+                </SidebarIconWithTooltip>
+
+                <SidebarIconWithTooltip
+                  collapsed={isDesktopCollapsed}
+                  label={t.language.switchTo}
+                  tooltipWarmRef={tooltipWarmRef}
+                >
+                  <LanguageSwitcher collapsed={isDesktopCollapsed} dropUp />
+                </SidebarIconWithTooltip>
               </div>
             </div>
 
-            <AuthWidget />
-            <SidebarFooter />
+            <div
+              className={cn(
+                "flex shrink-0 flex-col",
+                isDesktopCollapsed && "lg:hidden",
+              )}
+            >
+              <AuthWidget />
+              <SidebarFooter status={sidebarStatus} />
+            </div>
           </aside>
 
           <PageHeaderProvider pluginTabs={pluginTabMeta}>
@@ -660,22 +752,37 @@ export default function App() {
   );
 }
 
-function SidebarNavLink({ closeMobile, item, t }: SidebarNavLinkProps) {
+function SidebarNavLink({
+  closeMobile,
+  collapsed,
+  item,
+  tooltipWarmRef,
+  t,
+}: SidebarNavLinkProps) {
   const { path, label, labelKey, icon: Icon } = item;
+  const liRef = useRef<HTMLLIElement>(null);
+  const [hovered, setHovered] = useState(false);
 
   const navLabel = labelKey
     ? ((t.app.nav as Record<string, string>)[labelKey] ?? label)
     : label;
 
   return (
-    <li>
+    <li
+      ref={liRef}
+      onMouseEnter={collapsed ? () => setHovered(true) : undefined}
+      onMouseLeave={collapsed ? () => setHovered(false) : undefined}
+    >
       <NavLink
         to={path}
         end={path === "/sessions"}
         onClick={closeMobile}
+        aria-label={collapsed ? navLabel : undefined}
+        onFocus={collapsed ? () => setHovered(true) : undefined}
+        onBlur={collapsed ? () => setHovered(false) : undefined}
         className={({ isActive }) =>
           cn(
-            "group relative flex items-center gap-3",
+            "group/nav relative flex items-center gap-3",
             "px-5 py-2.5",
             "font-mondwest text-display uppercase text-sm tracking-[0.12em]",
             "whitespace-nowrap transition-colors cursor-pointer",
@@ -692,11 +799,19 @@ function SidebarNavLink({ closeMobile, item, t }: SidebarNavLinkProps) {
         {({ isActive }) => (
           <>
             <Icon className="h-3.5 w-3.5 shrink-0" />
-            <span className="truncate">{navLabel}</span>
+
+            <span
+              className={cn(
+                "truncate transition-opacity duration-300",
+                collapsed ? "lg:opacity-0" : "lg:opacity-100",
+              )}
+            >
+              {navLabel}
+            </span>
 
             <span
               aria-hidden
-              className="absolute inset-y-0.5 left-1.5 right-1.5 bg-midground opacity-0 pointer-events-none transition-opacity duration-200 group-hover:opacity-5"
+              className="absolute inset-y-0.5 left-1.5 right-1.5 bg-midground opacity-0 pointer-events-none transition-opacity duration-200 group-hover/nav:opacity-5"
             />
 
             {isActive && (
@@ -709,11 +824,20 @@ function SidebarNavLink({ closeMobile, item, t }: SidebarNavLinkProps) {
           </>
         )}
       </NavLink>
+
+      {collapsed && hovered && liRef.current && (
+        <SidebarTooltip anchor={liRef.current} label={navLabel} warmRef={tooltipWarmRef} />
+      )}
     </li>
   );
 }
 
-function SidebarSystemActions({ onNavigate }: { onNavigate: () => void }) {
+function SidebarSystemActions({
+  collapsed,
+  onNavigate,
+  status,
+  tooltipWarmRef,
+}: SidebarSystemActionsProps) {
   const { t } = useI18n();
   const navigate = useNavigate();
   const { activeAction, isBusy, isRunning, pendingAction, runAction } =
@@ -755,75 +879,248 @@ function SidebarSystemActions({ onNavigate }: { onNavigate: () => void }) {
         className={cn(
           "px-5 pt-0.5 pb-0.5",
           "font-mondwest text-display text-xs tracking-[0.12em] text-text-tertiary",
+          collapsed && "lg:hidden",
         )}
       >
         {t.app.system}
       </span>
 
-      <SidebarStatusStrip />
+      <div className={cn(collapsed && "lg:hidden")}>
+        <SidebarStatusStrip status={status} />
+      </div>
+
+      <GatewayDot collapsed={collapsed} status={status} tooltipWarmRef={tooltipWarmRef} />
 
       <ul className="flex flex-col">
-        {items.map(({ action, icon: Icon, label, runningLabel, spin }) => {
-          const isPending = pendingAction === action;
-          const isActionRunning =
-            activeAction === action && isRunning && !isPending;
-          const busy = isPending || isActionRunning;
-          const displayLabel = isActionRunning ? runningLabel : label;
-          const disabled = isBusy && !busy;
-
-          return (
-            <li key={action}>
-              <ListItem
-                onClick={() => handleClick(action)}
-                disabled={disabled}
-                aria-busy={busy}
-                active={busy}
-                className={cn(
-                  "gap-3 px-5 py-1.5 whitespace-nowrap",
-                  "font-mondwest text-display text-xs tracking-[0.1em]",
-                  "transition-colors",
-                  busy
-                    ? "text-midground"
-                    : "text-text-secondary hover:text-midground",
-                  "disabled:text-text-disabled",
-                )}
-              >
-                {isPending ? (
-                  <Spinner className="shrink-0 text-[0.875rem]" />
-                ) : isActionRunning && spin ? (
-                  <Spinner className="shrink-0 text-[0.875rem]" />
-                ) : (
-                  <Icon
-                    className={cn(
-                      "h-3.5 w-3.5 shrink-0",
-                      isActionRunning && !spin && "animate-pulse",
-                    )}
-                  />
-                )}
-
-                <span className="truncate">{displayLabel}</span>
-
-                <span
-                  aria-hidden
-                  className="absolute inset-y-0.5 left-1.5 right-1.5 bg-midground opacity-0 pointer-events-none transition-opacity duration-200 group-hover:opacity-5"
-                />
-
-                {busy && (
-                  <span
-                    aria-hidden
-                    className="absolute left-0 top-0 bottom-0 w-px bg-midground"
-                    style={{ mixBlendMode: "plus-lighter" }}
-                  />
-                )}
-              </ListItem>
-            </li>
-          );
-        })}
+        {items.map((item) => (
+          <SystemActionButton
+            key={item.action}
+            collapsed={collapsed}
+            disabled={isBusy && !(pendingAction === item.action || (activeAction === item.action && isRunning))}
+            tooltipWarmRef={tooltipWarmRef}
+            isPending={pendingAction === item.action}
+            isRunning={activeAction === item.action && isRunning && pendingAction !== item.action}
+            item={item}
+            onClick={() => handleClick(item.action)}
+          />
+        ))}
       </ul>
     </div>
   );
 }
 
+function SystemActionButton({
+  collapsed,
+  disabled,
+  isPending,
+  isRunning: isActionRunning,
+  item,
+  onClick,
+  tooltipWarmRef,
+}: SystemActionButtonProps) {
+  const { icon: Icon, label, runningLabel, spin } = item;
+  const liRef = useRef<HTMLLIElement>(null);
+  const [hovered, setHovered] = useState(false);
+  const busy = isPending || isActionRunning;
+  const displayLabel = isActionRunning ? runningLabel : label;
+
+  return (
+    <li
+      ref={liRef}
+      onMouseEnter={collapsed ? () => setHovered(true) : undefined}
+      onMouseLeave={collapsed ? () => setHovered(false) : undefined}
+    >
+      <button
+        onClick={onClick}
+        disabled={disabled}
+        aria-busy={busy}
+        aria-label={collapsed ? displayLabel : undefined}
+        onFocus={collapsed ? () => setHovered(true) : undefined}
+        onBlur={collapsed ? () => setHovered(false) : undefined}
+        type="button"
+        className={cn(
+          "group/action relative flex w-full items-center gap-3",
+          "px-5 py-2.5",
+          "font-mondwest text-display text-xs tracking-[0.1em]",
+          "whitespace-nowrap transition-colors cursor-pointer",
+          "focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-midground",
+          busy
+            ? "text-midground"
+            : "text-text-secondary hover:text-midground",
+          "disabled:text-text-disabled disabled:cursor-not-allowed",
+        )}
+      >
+        {isPending ? (
+          <Spinner className="shrink-0 text-[0.875rem]" />
+        ) : isActionRunning && spin ? (
+          <Spinner className="shrink-0 text-[0.875rem]" />
+        ) : (
+          <Icon
+            className={cn(
+              "h-3.5 w-3.5 shrink-0",
+              isActionRunning && !spin && "animate-pulse",
+            )}
+          />
+        )}
+
+        <span className={cn(
+          "truncate transition-opacity duration-300",
+          collapsed ? "lg:opacity-0" : "lg:opacity-100",
+        )}>
+          {displayLabel}
+        </span>
+
+        <span
+          aria-hidden
+          className="absolute inset-y-0.5 left-1.5 right-1.5 bg-midground opacity-0 pointer-events-none transition-opacity duration-200 group-hover/action:opacity-5"
+        />
+
+        {busy && (
+          <span
+            aria-hidden
+            className="absolute left-0 top-0 bottom-0 w-px bg-midground"
+            style={{ mixBlendMode: "plus-lighter" }}
+          />
+        )}
+      </button>
+
+      {collapsed && hovered && liRef.current && (
+        <SidebarTooltip anchor={liRef.current} label={displayLabel} warmRef={tooltipWarmRef} />
+      )}
+    </li>
+  );
+}
+
+function SidebarIconWithTooltip({
+  children,
+  collapsed,
+  label,
+  tooltipWarmRef,
+}: SidebarIconWithTooltipProps) {
+  const ref = useRef<HTMLDivElement>(null);
+  const [hovered, setHovered] = useState(false);
+
+  return (
+    <div
+      ref={ref}
+      className={cn(
+        "relative w-fit",
+        collapsed && "group/icon",
+      )}
+      onMouseEnter={collapsed ? () => setHovered(true) : undefined}
+      onMouseLeave={collapsed ? () => setHovered(false) : undefined}
+    >
+      {children}
+
+      {collapsed && (
+        <span
+          aria-hidden
+          className="absolute inset-y-0 inset-x-[-0.375rem] bg-midground opacity-0 pointer-events-none transition-opacity duration-200 group-hover/icon:opacity-5 hidden lg:block"
+        />
+      )}
+
+      {collapsed && hovered && ref.current && (
+        <SidebarTooltip anchor={ref.current} label={label} warmRef={tooltipWarmRef} />
+      )}
+    </div>
+  );
+}
+
+function GatewayDot({ collapsed, status, tooltipWarmRef }: GatewayDotProps) {
+  const { t } = useI18n();
+  const ref = useRef<HTMLDivElement>(null);
+  const [hovered, setHovered] = useState(false);
+
+  const toneToColor: Record<string, string> = {
+    "text-success": "bg-success",
+    "text-warning": "bg-warning",
+    "text-destructive": "bg-destructive",
+    "text-muted-foreground": "bg-muted-foreground",
+  };
+
+  let color: string;
+  let label: string;
+
+  if (!status) {
+    color = "bg-midground/20";
+    label = t.status.gateway;
+  } else {
+    const gw = gatewayLine(status, t);
+    color = toneToColor[gw.tone] ?? "bg-muted-foreground";
+    label = `${t.status.gateway} ${gw.label}`;
+  }
+
+  return (
+    <div
+      ref={ref}
+      className={cn(
+        "hidden lg:flex py-3 pl-[1.625rem] transition-opacity duration-300",
+        collapsed ? "lg:opacity-100" : "lg:opacity-0 lg:h-0 lg:py-0 lg:overflow-hidden",
+      )}
+      role="status"
+      aria-label={label}
+      tabIndex={collapsed ? 0 : -1}
+      onMouseEnter={collapsed ? () => setHovered(true) : undefined}
+      onMouseLeave={collapsed ? () => setHovered(false) : undefined}
+      onFocus={collapsed ? () => setHovered(true) : undefined}
+      onBlur={collapsed ? () => setHovered(false) : undefined}
+    >
+      <span
+        aria-hidden
+        className={cn("h-1.5 w-1.5 rounded-full", color)}
+      />
+
+      {hovered && ref.current && (
+        <SidebarTooltip anchor={ref.current} label={label} warmRef={tooltipWarmRef} />
+      )}
+    </div>
+  );
+}
+
+function SidebarTooltip({ anchor, label, warmRef }: SidebarTooltipProps) {
+  const rect = anchor.getBoundingClientRect();
+  const sidebar = document.getElementById("app-sidebar");
+  const sidebarRight = sidebar?.getBoundingClientRect().right ?? rect.right;
+
+  const isWarm = warmRef ? Date.now() - warmRef.current < 300 : false;
+
+  useEffect(() => {
+    if (warmRef) warmRef.current = Date.now();
+    return () => {
+      if (warmRef) warmRef.current = Date.now();
+    };
+  }, [warmRef]);
+
+  return createPortal(
+    <span
+      className={cn(
+        "fixed z-[100] pointer-events-none",
+        "px-2 py-1",
+        "bg-background-base/95 border border-current/20 backdrop-blur-sm shadow-lg",
+        "font-mondwest text-display text-xs tracking-[0.1em] text-midground uppercase",
+      )}
+      style={{
+        top: rect.top + rect.height / 2,
+        left: sidebarRight + 8,
+        transform: "translateY(-50%)",
+        opacity: isWarm ? 1 : undefined,
+        animation: isWarm ? "none" : "sidebar-tooltip-in 120ms ease-out",
+      }}
+    >
+      {label}
+    </span>,
+    document.body,
+  );
+}
+
+type TooltipWarmRef = React.RefObject<number>;
+
+interface GatewayDotProps {
+  collapsed: boolean;
+  status: StatusResponse | null;
+  tooltipWarmRef: TooltipWarmRef;
+}
+
 interface NavItem {
   icon: ComponentType<{ className?: string }>;
   label: string;
@@ -831,10 +1128,42 @@ interface NavItem {
   path: string;
 }
 
+interface SidebarIconWithTooltipProps {
+  children: ReactNode;
+  collapsed: boolean;
+  label: string;
+  tooltipWarmRef: TooltipWarmRef;
+}
+
 interface SidebarNavLinkProps {
   closeMobile: () => void;
+  collapsed: boolean;
   item: NavItem;
   t: Translations;
+  tooltipWarmRef: TooltipWarmRef;
+}
+
+interface SidebarSystemActionsProps {
+  collapsed: boolean;
+  onNavigate: () => void;
+  status: StatusResponse | null;
+  tooltipWarmRef: TooltipWarmRef;
+}
+
+interface SidebarTooltipProps {
+  anchor: HTMLElement;
+  label: string;
+  warmRef?: TooltipWarmRef;
+}
+
+interface SystemActionButtonProps {
+  collapsed: boolean;
+  disabled: boolean;
+  isPending: boolean;
+  isRunning: boolean;
+  item: SystemActionItem;
+  onClick: () => void;
+  tooltipWarmRef: TooltipWarmRef;
 }
 
 interface SystemActionItem {
diff --git a/apps/dashboard/src/components/AuthWidget.tsx b/web/src/components/AuthWidget.tsx
similarity index 100%
rename from apps/dashboard/src/components/AuthWidget.tsx
rename to web/src/components/AuthWidget.tsx
diff --git a/apps/dashboard/src/components/AutoField.tsx b/web/src/components/AutoField.tsx
similarity index 98%
rename from apps/dashboard/src/components/AutoField.tsx
rename to web/src/components/AutoField.tsx
index 4e3451c10fd..18475fe8f4c 100644
--- a/apps/dashboard/src/components/AutoField.tsx
+++ b/web/src/components/AutoField.tsx
@@ -1,7 +1,7 @@
 import { Select, SelectOption } from "@nous-research/ui/ui/components/select";
 import { Switch } from "@nous-research/ui/ui/components/switch";
-import { Input } from "@/components/ui/input";
-import { Label } from "@/components/ui/label";
+import { Input } from "@nous-research/ui/ui/components/input";
+import { Label } from "@nous-research/ui/ui/components/label";
 
 function FieldHint({ schema, schemaKey }: { schema: Record<string, unknown>; schemaKey: string }) {
   const keyPath = schemaKey.includes(".") ? schemaKey : "";
diff --git a/apps/dashboard/src/components/Backdrop.tsx b/web/src/components/Backdrop.tsx
similarity index 100%
rename from apps/dashboard/src/components/Backdrop.tsx
rename to web/src/components/Backdrop.tsx
diff --git a/apps/dashboard/src/components/ChatSidebar.tsx b/web/src/components/ChatSidebar.tsx
similarity index 98%
rename from apps/dashboard/src/components/ChatSidebar.tsx
rename to web/src/components/ChatSidebar.tsx
index c401d2f5e1d..ec8ffa442e8 100644
--- a/apps/dashboard/src/components/ChatSidebar.tsx
+++ b/web/src/components/ChatSidebar.tsx
@@ -25,7 +25,7 @@
 
 import { Button } from "@nous-research/ui/ui/components/button";
 import { Badge } from "@nous-research/ui/ui/components/badge";
-import { Card } from "@/components/ui/card";
+import { Card } from "@nous-research/ui/ui/components/card";
 
 import { ModelPickerDialog } from "@/components/ModelPickerDialog";
 import { ToolCall, type ToolEntry } from "@/components/ToolCall";
@@ -333,7 +333,7 @@ export function ChatSidebar({ channel, className }: ChatSidebarProps) {
                 <ChevronDown className="text-text-secondary" />
               ) : undefined
             }
-            className="self-start min-w-0 px-0 py-0 normal-case tracking-normal text-sm font-medium underline-offset-4 decoration-current/40 hover:underline disabled:no-underline"
+            className="self-start min-w-0 px-0 py-0 normal-case tracking-normal text-sm font-medium hover:underline disabled:no-underline"
             title={info.model ?? "switch model"}
           >
             <span className="truncate">{modelLabel}</span>
diff --git a/apps/dashboard/src/components/DeleteConfirmDialog.tsx b/web/src/components/DeleteConfirmDialog.tsx
similarity index 90%
rename from apps/dashboard/src/components/DeleteConfirmDialog.tsx
rename to web/src/components/DeleteConfirmDialog.tsx
index 9e2e82c6809..c839415ffa5 100644
--- a/apps/dashboard/src/components/DeleteConfirmDialog.tsx
+++ b/web/src/components/DeleteConfirmDialog.tsx
@@ -1,4 +1,4 @@
-import { ConfirmDialog } from "@/components/ui/confirm-dialog";
+import { ConfirmDialog } from "@nous-research/ui/ui/components/confirm-dialog";
 import { useI18n } from "@/i18n";
 
 export function DeleteConfirmDialog({
diff --git a/apps/dashboard/src/components/LanguageSwitcher.tsx b/web/src/components/LanguageSwitcher.tsx
similarity index 63%
rename from apps/dashboard/src/components/LanguageSwitcher.tsx
rename to web/src/components/LanguageSwitcher.tsx
index 9f790026550..fa3e99949c6 100644
--- a/apps/dashboard/src/components/LanguageSwitcher.tsx
+++ b/web/src/components/LanguageSwitcher.tsx
@@ -1,8 +1,10 @@
 import { useState, useRef, useEffect } from "react";
+import { createPortal } from "react-dom";
+import { Check } from "lucide-react";
 import { Button } from "@nous-research/ui/ui/components/button";
-import { BottomPickSheet } from "@/components/BottomPickSheet";
-import { Typography } from "@/components/NouiTypography";
-import { useBelowBreakpoint } from "@/hooks/useBelowBreakpoint";
+import { BottomSheet } from "@nous-research/ui/ui/components/bottom-sheet";
+import { Typography } from "@nous-research/ui/ui/components/typography/index";
+import { useBelowBreakpoint } from "@nous-research/ui/hooks/use-below-breakpoint";
 import { useI18n } from "@/i18n/context";
 import { LOCALE_META } from "@/i18n";
 import type { Locale } from "@/i18n";
@@ -25,10 +27,11 @@ import { cn } from "@/lib/utils";
  * viewport / overflow ancestors. Below the `sm` breakpoint, `dropUp` uses a
  * bottom sheet portaled to `document.body` instead of an anchored dropdown.
  */
-export function LanguageSwitcher({ dropUp = false }: LanguageSwitcherProps) {
+export function LanguageSwitcher({ collapsed = false, dropUp = false }: LanguageSwitcherProps) {
   const { locale, setLocale, t } = useI18n();
   const [open, setOpen] = useState(false);
   const containerRef = useRef<HTMLDivElement>(null);
+  const dropdownRef = useRef<HTMLDivElement>(null);
   const narrowViewport = useBelowBreakpoint(640);
   const useMobileSheet = Boolean(dropUp && narrowViewport);
 
@@ -41,15 +44,14 @@ export function LanguageSwitcher({ dropUp = false }: LanguageSwitcherProps) {
     return () => document.removeEventListener("keydown", onKey);
   }, [open]);
 
-  // Outside-click closing only for anchored dropdown — sheet uses backdrop + portal.
   useEffect(() => {
     if (!open || useMobileSheet) return;
 
     function onPointerDown(e: PointerEvent) {
-      if (!containerRef.current) return;
-      if (!containerRef.current.contains(e.target as Node)) {
-        setOpen(false);
-      }
+      const target = e.target as Node;
+      if (containerRef.current?.contains(target)) return;
+      if (dropdownRef.current?.contains(target)) return;
+      setOpen(false);
     }
 
     document.addEventListener("pointerdown", onPointerDown);
@@ -69,7 +71,10 @@ export function LanguageSwitcher({ dropUp = false }: LanguageSwitcherProps) {
         aria-label={t.language.switchTo}
         aria-haspopup="listbox"
         aria-expanded={open}
-        className="px-2 py-1 normal-case tracking-normal font-normal text-xs text-text-secondary hover:text-foreground"
+        className={cn(
+          "px-2 py-1 normal-case tracking-normal font-normal text-xs text-text-secondary hover:text-foreground",
+          collapsed && "hover:bg-transparent",
+        )}
       >
         <span className="inline-flex items-center gap-1.5">
           <Typography
@@ -82,7 +87,7 @@ export function LanguageSwitcher({ dropUp = false }: LanguageSwitcherProps) {
       </Button>
 
       {useMobileSheet && (
-        <BottomPickSheet
+        <BottomSheet
           backdropDismissLabel={t.common.close}
           onClose={() => setOpen(false)}
           open={open}
@@ -96,26 +101,36 @@ export function LanguageSwitcher({ dropUp = false }: LanguageSwitcherProps) {
               setOpen={setOpen}
             />
           </div>
-        </BottomPickSheet>
+        </BottomSheet>
       )}
 
-      {open && !useMobileSheet && (
-        <div
-          aria-label={sheetTitle}
-          className={cn(
-            "absolute right-0 z-50 min-w-[10rem] rounded-md border border-border bg-popover shadow-md py-1 max-h-80 overflow-y-auto",
-            dropUp ? "bottom-full mb-1" : "top-full mt-1",
-          )}
-          role="listbox"
-        >
-          <LanguageSwitcherOptions
-            allLocales={allLocales}
-            locale={locale}
-            setLocale={setLocale}
-            setOpen={setOpen}
-          />
-        </div>
-      )}
+      {open && !useMobileSheet && (() => {
+        const rect = containerRef.current?.getBoundingClientRect();
+        const dropdown = (
+          <div
+            ref={dropdownRef}
+            aria-label={sheetTitle}
+            className={cn(
+              "min-w-[10rem] border border-border bg-popover shadow-md py-1 max-h-80 overflow-y-auto",
+              dropUp ? "fixed z-[100]" : "absolute z-50 right-0 top-full mt-1",
+            )}
+            role="listbox"
+            style={
+              dropUp && rect
+                ? { bottom: window.innerHeight - rect.top + 4, left: rect.left }
+                : undefined
+            }
+          >
+            <LanguageSwitcherOptions
+              allLocales={allLocales}
+              locale={locale}
+              setLocale={setLocale}
+              setOpen={setOpen}
+            />
+          </div>
+        );
+        return dropUp ? createPortal(dropdown, document.body) : dropdown;
+      })()}
     </div>
   );
 }
@@ -134,10 +149,12 @@ function LanguageSwitcherOptions({
         return (
           <button
             aria-selected={selected}
-            className={
-              "w-full text-left px-3 py-1.5 text-xs flex items-center gap-2 hover:bg-accent hover:text-accent-foreground transition-colors " +
-              (selected ? "font-semibold text-foreground" : "text-muted-foreground")
-            }
+            className={cn(
+              "w-full text-left px-3 py-1.5 flex items-center gap-2 cursor-pointer",
+              "font-mondwest text-display text-xs tracking-[0.08em]",
+              "hover:bg-accent hover:text-accent-foreground transition-colors",
+              selected ? "font-semibold text-foreground" : "text-muted-foreground",
+            )}
             key={code}
             onClick={() => {
               setLocale(code);
@@ -148,7 +165,7 @@ function LanguageSwitcherOptions({
           >
             <span className="truncate">{meta.name}</span>
 
-            {selected && <span className="ml-auto text-xs">✓</span>}
+            {selected && <Check className="ml-auto h-3 w-3 shrink-0 text-midground" />}
           </button>
         );
       })}
@@ -164,5 +181,6 @@ interface LanguageSwitcherOptionsProps {
 }
 
 interface LanguageSwitcherProps {
+  collapsed?: boolean;
   dropUp?: boolean;
 }
diff --git a/apps/dashboard/src/components/Markdown.tsx b/web/src/components/Markdown.tsx
similarity index 99%
rename from apps/dashboard/src/components/Markdown.tsx
rename to web/src/components/Markdown.tsx
index 0b26f99ec1d..a78c4430c34 100644
--- a/apps/dashboard/src/components/Markdown.tsx
+++ b/web/src/components/Markdown.tsx
@@ -344,7 +344,7 @@ function InlineContent({
                 href={href}
                 target="_blank"
                 rel="noreferrer"
-                className="text-primary underline underline-offset-4 decoration-current/40 transition-colors"
+                className="text-primary underline underline-offset-2 decoration-primary/30 hover:decoration-primary/60 transition-colors"
               >
                 {node.text}
               </a>
diff --git a/apps/dashboard/src/components/ModelInfoCard.tsx b/web/src/components/ModelInfoCard.tsx
similarity index 100%
rename from apps/dashboard/src/components/ModelInfoCard.tsx
rename to web/src/components/ModelInfoCard.tsx
diff --git a/apps/dashboard/src/components/ModelPickerDialog.tsx b/web/src/components/ModelPickerDialog.tsx
similarity index 99%
rename from apps/dashboard/src/components/ModelPickerDialog.tsx
rename to web/src/components/ModelPickerDialog.tsx
index e6aeb3a5aa3..94b5d3e5fe4 100644
--- a/apps/dashboard/src/components/ModelPickerDialog.tsx
+++ b/web/src/components/ModelPickerDialog.tsx
@@ -2,8 +2,8 @@ import { Button } from "@nous-research/ui/ui/components/button";
 import { Checkbox } from "@nous-research/ui/ui/components/checkbox";
 import { ListItem } from "@nous-research/ui/ui/components/list-item";
 import { Spinner } from "@nous-research/ui/ui/components/spinner";
-import { Label } from "@/components/ui/label";
-import { Input } from "@/components/ui/input";
+import { Input } from "@nous-research/ui/ui/components/input";
+import { Label } from "@nous-research/ui/ui/components/label";
 import type { GatewayClient } from "@/lib/gatewayClient";
 import { Check, Search, X } from "lucide-react";
 import { useEffect, useMemo, useRef, useState } from "react";
diff --git a/apps/dashboard/src/components/OAuthLoginModal.tsx b/web/src/components/OAuthLoginModal.tsx
similarity index 98%
rename from apps/dashboard/src/components/OAuthLoginModal.tsx
rename to web/src/components/OAuthLoginModal.tsx
index a2e5c9475a6..060761c8334 100644
--- a/apps/dashboard/src/components/OAuthLoginModal.tsx
+++ b/web/src/components/OAuthLoginModal.tsx
@@ -3,9 +3,9 @@ import { ExternalLink, X, Check } from "lucide-react";
 import { Button } from "@nous-research/ui/ui/components/button";
 import { CopyButton } from "@nous-research/ui/ui/components/command-block";
 import { Spinner } from "@nous-research/ui/ui/components/spinner";
-import { H2 } from "@/components/NouiTypography";
+import { H2 } from "@nous-research/ui/ui/components/typography/h2";
 import { api, type OAuthProvider, type OAuthStartResponse } from "@/lib/api";
-import { Input } from "@/components/ui/input";
+import { Input } from "@nous-research/ui/ui/components/input";
 import { useI18n } from "@/i18n";
 import { cn, themedBody } from "@/lib/utils";
 
diff --git a/apps/dashboard/src/components/OAuthProvidersCard.tsx b/web/src/components/OAuthProvidersCard.tsx
similarity index 98%
rename from apps/dashboard/src/components/OAuthProvidersCard.tsx
rename to web/src/components/OAuthProvidersCard.tsx
index bb66f6a0d66..19d95621cf6 100644
--- a/apps/dashboard/src/components/OAuthProvidersCard.tsx
+++ b/web/src/components/OAuthProvidersCard.tsx
@@ -16,9 +16,9 @@ import {
   CardDescription,
   CardHeader,
   CardTitle,
-} from "@/components/ui/card";
+} from "@nous-research/ui/ui/components/card";
 import { Badge } from "@nous-research/ui/ui/components/badge";
-import { ConfirmDialog } from "@/components/ui/confirm-dialog";
+import { ConfirmDialog } from "@nous-research/ui/ui/components/confirm-dialog";
 import { OAuthLoginModal } from "@/components/OAuthLoginModal";
 import { useI18n } from "@/i18n";
 
diff --git a/apps/dashboard/src/components/PlatformsCard.tsx b/web/src/components/PlatformsCard.tsx
similarity index 97%
rename from apps/dashboard/src/components/PlatformsCard.tsx
rename to web/src/components/PlatformsCard.tsx
index a5e8cd727b2..c7d4a3baf4d 100644
--- a/apps/dashboard/src/components/PlatformsCard.tsx
+++ b/web/src/components/PlatformsCard.tsx
@@ -2,7 +2,7 @@ import { AlertTriangle, Radio, Wifi, WifiOff } from "lucide-react";
 import type { PlatformStatus } from "@/lib/api";
 import { isoTimeAgo } from "@/lib/utils";
 import { Badge } from "@nous-research/ui/ui/components/badge";
-import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
+import { Card, CardContent, CardHeader, CardTitle } from "@nous-research/ui/ui/components/card";
 import { useI18n } from "@/i18n";
 
 export function PlatformsCard({ platforms }: PlatformsCardProps) {
diff --git a/apps/dashboard/src/components/SidebarFooter.tsx b/web/src/components/SidebarFooter.tsx
similarity index 79%
rename from apps/dashboard/src/components/SidebarFooter.tsx
rename to web/src/components/SidebarFooter.tsx
index 70ab23d25a8..e133e4f5ee2 100644
--- a/apps/dashboard/src/components/SidebarFooter.tsx
+++ b/web/src/components/SidebarFooter.tsx
@@ -1,10 +1,9 @@
-import { Typography } from "@/components/NouiTypography";
-import { useSidebarStatus } from "@/hooks/useSidebarStatus";
+import { Typography } from "@nous-research/ui/ui/components/typography/index";
+import type { StatusResponse } from "@/lib/api";
 import { cn } from "@/lib/utils";
 import { useI18n } from "@/i18n";
 
-export function SidebarFooter() {
-  const status = useSidebarStatus();
+export function SidebarFooter({ status }: SidebarFooterProps) {
   const { t } = useI18n();
 
   return (
@@ -37,3 +36,7 @@ export function SidebarFooter() {
     </div>
   );
 }
+
+interface SidebarFooterProps {
+  status: StatusResponse | null;
+}
diff --git a/apps/dashboard/src/components/SidebarStatusStrip.tsx b/web/src/components/SidebarStatusStrip.tsx
similarity index 92%
rename from apps/dashboard/src/components/SidebarStatusStrip.tsx
rename to web/src/components/SidebarStatusStrip.tsx
index 6556f492c25..10612ace641 100644
--- a/apps/dashboard/src/components/SidebarStatusStrip.tsx
+++ b/web/src/components/SidebarStatusStrip.tsx
@@ -1,12 +1,10 @@
 import { Link } from "react-router-dom";
 import type { StatusResponse } from "@/lib/api";
-import { useSidebarStatus } from "@/hooks/useSidebarStatus";
 import { cn } from "@/lib/utils";
 import { useI18n } from "@/i18n";
 
 /** Gateway + session summary for the System sidebar block (no separate strip chrome). */
-export function SidebarStatusStrip() {
-  const status = useSidebarStatus();
+export function SidebarStatusStrip({ status }: SidebarStatusStripProps) {
   const { t } = useI18n();
 
   if (status === null) {
@@ -50,7 +48,7 @@ export function SidebarStatusStrip() {
   );
 }
 
-function gatewayLine(
+export function gatewayLine(
   status: StatusResponse,
   t: ReturnType<typeof useI18n>["t"],
 ): { label: string; tone: string } {
@@ -68,3 +66,7 @@ function gatewayLine(
     ? { label: g.running, tone: "text-success" }
     : { label: g.off, tone: "text-muted-foreground" };
 }
+
+interface SidebarStatusStripProps {
+  status: StatusResponse | null;
+}
diff --git a/apps/dashboard/src/components/SlashPopover.tsx b/web/src/components/SlashPopover.tsx
similarity index 100%
rename from apps/dashboard/src/components/SlashPopover.tsx
rename to web/src/components/SlashPopover.tsx
diff --git a/apps/dashboard/src/components/ThemeSwitcher.tsx b/web/src/components/ThemeSwitcher.tsx
similarity index 69%
rename from apps/dashboard/src/components/ThemeSwitcher.tsx
rename to web/src/components/ThemeSwitcher.tsx
index f1359dd442d..66cf9b0c548 100644
--- a/apps/dashboard/src/components/ThemeSwitcher.tsx
+++ b/web/src/components/ThemeSwitcher.tsx
@@ -1,10 +1,11 @@
 import { useCallback, useEffect, useRef, useState } from "react";
+import { createPortal } from "react-dom";
 import { Palette, Check } from "lucide-react";
 import { Button } from "@nous-research/ui/ui/components/button";
 import { ListItem } from "@nous-research/ui/ui/components/list-item";
-import { BottomPickSheet } from "@/components/BottomPickSheet";
-import { Typography } from "@/components/NouiTypography";
-import { useBelowBreakpoint } from "@/hooks/useBelowBreakpoint";
+import { BottomSheet } from "@nous-research/ui/ui/components/bottom-sheet";
+import { Typography } from "@nous-research/ui/ui/components/typography/index";
+import { useBelowBreakpoint } from "@nous-research/ui/hooks/use-below-breakpoint";
 import { BUILTIN_THEMES, useTheme } from "@/themes";
 import type { DashboardTheme, ThemeListEntry } from "@/themes";
 import { useI18n } from "@/i18n";
@@ -23,11 +24,12 @@ import { cn } from "@/lib/utils";
  * bottom sheet portaled to `document.body` so the picker is not clipped by
  * the sidebar (same idea as a responsive Drawer).
  */
-export function ThemeSwitcher({ dropUp = false }: ThemeSwitcherProps) {
+export function ThemeSwitcher({ collapsed = false, dropUp = false }: ThemeSwitcherProps) {
   const { themeName, availableThemes, setTheme } = useTheme();
   const { t } = useI18n();
   const [open, setOpen] = useState(false);
   const wrapperRef = useRef<HTMLDivElement>(null);
+  const dropdownRef = useRef<HTMLDivElement>(null);
   const narrowViewport = useBelowBreakpoint(640);
   const useMobileSheet = Boolean(dropUp && narrowViewport);
 
@@ -45,12 +47,10 @@ export function ThemeSwitcher({ dropUp = false }: ThemeSwitcherProps) {
   useEffect(() => {
     if (!open || useMobileSheet) return;
     const onMouseDown = (e: MouseEvent) => {
-      if (
-        wrapperRef.current &&
-        !wrapperRef.current.contains(e.target as Node)
-      ) {
-        close();
-      }
+      const target = e.target as Node;
+      if (wrapperRef.current?.contains(target)) return;
+      if (dropdownRef.current?.contains(target)) return;
+      close();
     };
     document.addEventListener("mousedown", onMouseDown);
     return () => document.removeEventListener("mousedown", onMouseDown);
@@ -64,9 +64,14 @@ export function ThemeSwitcher({ dropUp = false }: ThemeSwitcherProps) {
     <div ref={wrapperRef} className="relative">
       <Button
         ghost
+        size={collapsed ? "icon" : undefined}
         onClick={() => setOpen((o) => !o)}
-        className="px-2 py-1 normal-case tracking-normal font-normal text-xs text-text-secondary hover:text-foreground"
-        title={t.theme?.switchTheme ?? "Switch theme"}
+        className={cn(
+          collapsed
+            ? "text-text-secondary hover:text-foreground hover:bg-transparent"
+            : "px-2 py-1 normal-case tracking-normal font-normal text-xs text-text-secondary hover:text-foreground",
+        )}
+        title={`${t.theme?.switchTheme ?? "Switch theme"}: ${label}`}
         aria-label={t.theme?.switchTheme ?? "Switch theme"}
         aria-expanded={open}
         aria-haspopup="listbox"
@@ -74,17 +79,19 @@ export function ThemeSwitcher({ dropUp = false }: ThemeSwitcherProps) {
         <span className="inline-flex items-center gap-1.5">
           <Palette className="h-3.5 w-3.5" />
 
-          <Typography
-            mondwest
-            className="hidden sm:inline text-display tracking-wide text-xs"
-          >
-            {label}
-          </Typography>
+          {!collapsed && (
+            <Typography
+              mondwest
+              className="hidden sm:inline text-display tracking-wide text-xs"
+            >
+              {label}
+            </Typography>
+          )}
         </span>
       </Button>
 
       {useMobileSheet && (
-        <BottomPickSheet
+        <BottomSheet
           backdropDismissLabel={t.common.close}
           onClose={close}
           open={open}
@@ -98,37 +105,47 @@ export function ThemeSwitcher({ dropUp = false }: ThemeSwitcherProps) {
               themeName={themeName}
             />
           </div>
-        </BottomPickSheet>
+        </BottomSheet>
       )}
 
-      {open && !useMobileSheet && (
-        <div
-          aria-label={sheetTitle}
-          className={cn(
-            "absolute z-50 min-w-[240px] max-h-[70dvh] overflow-y-auto",
-            dropUp ? "left-0 bottom-full mb-1" : "right-0 top-full mt-1",
-            "border border-current/20 bg-background-base/95 backdrop-blur-sm",
-            "shadow-[0_12px_32px_-8px_rgba(0,0,0,0.6)]",
-          )}
-          role="listbox"
-        >
-          <div className="border-b border-current/20 px-3 py-2">
-            <Typography
-              mondwest
-              className="text-display text-xs tracking-[0.12em] text-text-tertiary"
-            >
-              {sheetTitle}
-            </Typography>
+      {open && !useMobileSheet && (() => {
+        const rect = wrapperRef.current?.getBoundingClientRect();
+        const dropdown = (
+          <div
+            ref={dropdownRef}
+            aria-label={sheetTitle}
+            className={cn(
+              "min-w-[240px] max-h-[70dvh] overflow-y-auto",
+              "border border-current/20 bg-background-base/95 backdrop-blur-sm",
+              "shadow-[0_12px_32px_-8px_rgba(0,0,0,0.6)]",
+              dropUp ? "fixed z-[100]" : "absolute z-50 right-0 top-full mt-1",
+            )}
+            role="listbox"
+            style={
+              dropUp && rect
+                ? { bottom: window.innerHeight - rect.top + 4, left: rect.left }
+                : undefined
+            }
+          >
+            <div className="border-b border-current/20 px-3 py-2">
+              <Typography
+                mondwest
+                className="text-display text-xs tracking-[0.12em] text-text-tertiary"
+              >
+                {sheetTitle}
+              </Typography>
+            </div>
+
+            <ThemeSwitcherOptions
+              availableThemes={availableThemes}
+              close={close}
+              setTheme={setTheme}
+              themeName={themeName}
+            />
           </div>
-
-          <ThemeSwitcherOptions
-            availableThemes={availableThemes}
-            close={close}
-            setTheme={setTheme}
-            themeName={themeName}
-          />
-        </div>
-      )}
+        );
+        return dropUp ? createPortal(dropdown, document.body) : dropdown;
+      })()}
     </div>
   );
 }
@@ -221,5 +238,6 @@ interface ThemeSwitcherOptionsProps {
 }
 
 interface ThemeSwitcherProps {
+  collapsed?: boolean;
   dropUp?: boolean;
 }
diff --git a/apps/dashboard/src/components/ToolCall.tsx b/web/src/components/ToolCall.tsx
similarity index 100%
rename from apps/dashboard/src/components/ToolCall.tsx
rename to web/src/components/ToolCall.tsx
diff --git a/apps/dashboard/src/contexts/PageHeaderProvider.tsx b/web/src/contexts/PageHeaderProvider.tsx
similarity index 100%
rename from apps/dashboard/src/contexts/PageHeaderProvider.tsx
rename to web/src/contexts/PageHeaderProvider.tsx
diff --git a/apps/dashboard/src/contexts/SystemActions.tsx b/web/src/contexts/SystemActions.tsx
similarity index 97%
rename from apps/dashboard/src/contexts/SystemActions.tsx
rename to web/src/contexts/SystemActions.tsx
index 65321c6f003..de4ef439557 100644
--- a/apps/dashboard/src/contexts/SystemActions.tsx
+++ b/web/src/contexts/SystemActions.tsx
@@ -1,7 +1,7 @@
 import { useCallback, useEffect, useState } from "react";
 import { api } from "@/lib/api";
 import type { ActionStatusResponse } from "@/lib/api";
-import { Toast } from "@/components/Toast";
+import { Toast } from "@nous-research/ui/ui/components/toast";
 import { useI18n } from "@/i18n";
 import {
   SystemActionsContext,
diff --git a/apps/dashboard/src/contexts/page-header-context.ts b/web/src/contexts/page-header-context.ts
similarity index 100%
rename from apps/dashboard/src/contexts/page-header-context.ts
rename to web/src/contexts/page-header-context.ts
diff --git a/apps/dashboard/src/contexts/system-actions-context.ts b/web/src/contexts/system-actions-context.ts
similarity index 100%
rename from apps/dashboard/src/contexts/system-actions-context.ts
rename to web/src/contexts/system-actions-context.ts
diff --git a/apps/dashboard/src/contexts/usePageHeader.ts b/web/src/contexts/usePageHeader.ts
similarity index 100%
rename from apps/dashboard/src/contexts/usePageHeader.ts
rename to web/src/contexts/usePageHeader.ts
diff --git a/apps/dashboard/src/contexts/useSystemActions.ts b/web/src/contexts/useSystemActions.ts
similarity index 100%
rename from apps/dashboard/src/contexts/useSystemActions.ts
rename to web/src/contexts/useSystemActions.ts
diff --git a/apps/dashboard/src/hooks/useModalBehavior.ts b/web/src/hooks/useModalBehavior.ts
similarity index 100%
rename from apps/dashboard/src/hooks/useModalBehavior.ts
rename to web/src/hooks/useModalBehavior.ts
diff --git a/apps/dashboard/src/hooks/useSidebarStatus.ts b/web/src/hooks/useSidebarStatus.ts
similarity index 100%
rename from apps/dashboard/src/hooks/useSidebarStatus.ts
rename to web/src/hooks/useSidebarStatus.ts
diff --git a/apps/dashboard/src/i18n/af.ts b/web/src/i18n/af.ts
similarity index 99%
rename from apps/dashboard/src/i18n/af.ts
rename to web/src/i18n/af.ts
index 8bc34e81c04..c3d6312aa3f 100644
--- a/apps/dashboard/src/i18n/af.ts
+++ b/web/src/i18n/af.ts
@@ -127,6 +127,7 @@ export const af: Translations = {
 
   sessions: {
     title: "Sessies",
+    history: "Geskiedenis",
     overview: "Oorsig",
     searchPlaceholder: "Soek boodskap-inhoud...",
     noSessions: "Nog geen sessies nie",
@@ -422,7 +423,7 @@ export const af: Translations = {
   },
 
   language: {
-    switchTo: "Skakel oor na Engels",
+    switchTo: "Verander taal",
   },
 
   theme: {
diff --git a/apps/dashboard/src/i18n/context.tsx b/web/src/i18n/context.tsx
similarity index 100%
rename from apps/dashboard/src/i18n/context.tsx
rename to web/src/i18n/context.tsx
diff --git a/apps/dashboard/src/i18n/de.ts b/web/src/i18n/de.ts
similarity index 99%
rename from apps/dashboard/src/i18n/de.ts
rename to web/src/i18n/de.ts
index ef41f494418..d6fdfe64548 100644
--- a/apps/dashboard/src/i18n/de.ts
+++ b/web/src/i18n/de.ts
@@ -127,6 +127,7 @@ export const de: Translations = {
 
   sessions: {
     title: "Sitzungen",
+    history: "Verlauf",
     overview: "Übersicht",
     searchPlaceholder: "Nachrichteninhalt suchen...",
     noSessions: "Noch keine Sitzungen",
@@ -422,7 +423,7 @@ export const de: Translations = {
   },
 
   language: {
-    switchTo: "Zu Englisch wechseln",
+    switchTo: "Sprache wechseln",
   },
 
   theme: {
diff --git a/apps/dashboard/src/i18n/en.ts b/web/src/i18n/en.ts
similarity index 99%
rename from apps/dashboard/src/i18n/en.ts
rename to web/src/i18n/en.ts
index ac67b6eaf75..f792bf4dc3f 100644
--- a/apps/dashboard/src/i18n/en.ts
+++ b/web/src/i18n/en.ts
@@ -127,6 +127,7 @@ export const en: Translations = {
 
   sessions: {
     title: "Sessions",
+    history: "History",
     overview: "Overview",
     searchPlaceholder: "Search message content...",
     noSessions: "No sessions yet",
@@ -422,7 +423,7 @@ export const en: Translations = {
   },
 
   language: {
-    switchTo: "Switch to Chinese",
+    switchTo: "Switch language",
   },
 
   theme: {
diff --git a/apps/dashboard/src/i18n/es.ts b/web/src/i18n/es.ts
similarity index 99%
rename from apps/dashboard/src/i18n/es.ts
rename to web/src/i18n/es.ts
index 067d595ae88..84a1501e97b 100644
--- a/apps/dashboard/src/i18n/es.ts
+++ b/web/src/i18n/es.ts
@@ -127,6 +127,7 @@ export const es: Translations = {
 
   sessions: {
     title: "Sesiones",
+    history: "Historial",
     overview: "Resumen",
     searchPlaceholder: "Buscar contenido de mensajes...",
     noSessions: "Aún no hay sesiones",
@@ -422,7 +423,7 @@ export const es: Translations = {
   },
 
   language: {
-    switchTo: "Cambiar a inglés",
+    switchTo: "Cambiar idioma",
   },
 
   theme: {
diff --git a/apps/dashboard/src/i18n/fr.ts b/web/src/i18n/fr.ts
similarity index 99%
rename from apps/dashboard/src/i18n/fr.ts
rename to web/src/i18n/fr.ts
index 672f5d90730..409c0a1e397 100644
--- a/apps/dashboard/src/i18n/fr.ts
+++ b/web/src/i18n/fr.ts
@@ -127,6 +127,7 @@ export const fr: Translations = {
 
   sessions: {
     title: "Sessions",
+    history: "Historique",
     overview: "Aperçu",
     searchPlaceholder: "Rechercher dans les messages...",
     noSessions: "Aucune session pour l'instant",
@@ -422,7 +423,7 @@ export const fr: Translations = {
   },
 
   language: {
-    switchTo: "Passer à l'anglais",
+    switchTo: "Changer de langue",
   },
 
   theme: {
diff --git a/apps/dashboard/src/i18n/ga.ts b/web/src/i18n/ga.ts
similarity index 99%
rename from apps/dashboard/src/i18n/ga.ts
rename to web/src/i18n/ga.ts
index 2ad89214348..a4d41e30354 100644
--- a/apps/dashboard/src/i18n/ga.ts
+++ b/web/src/i18n/ga.ts
@@ -127,6 +127,7 @@ export const ga: Translations = {
 
   sessions: {
     title: "Seisiúin",
+    history: "Stair",
     overview: "Forbhreathnú",
     searchPlaceholder: "Cuardaigh ábhar teachtaireachta...",
     noSessions: "Gan seisiúin go fóill",
@@ -422,7 +423,7 @@ export const ga: Translations = {
   },
 
   language: {
-    switchTo: "Athraigh go Béarla",
+    switchTo: "Athraigh teanga",
   },
 
   theme: {
diff --git a/apps/dashboard/src/i18n/hu.ts b/web/src/i18n/hu.ts
similarity index 99%
rename from apps/dashboard/src/i18n/hu.ts
rename to web/src/i18n/hu.ts
index 92e21f39596..7814aff86c8 100644
--- a/apps/dashboard/src/i18n/hu.ts
+++ b/web/src/i18n/hu.ts
@@ -127,6 +127,7 @@ export const hu: Translations = {
 
   sessions: {
     title: "Munkamenetek",
+    history: "Előzmények",
     overview: "Áttekintés",
     searchPlaceholder: "Keresés üzenettartalomban...",
     noSessions: "Még nincsenek munkamenetek",
@@ -422,7 +423,7 @@ export const hu: Translations = {
   },
 
   language: {
-    switchTo: "Váltás angolra",
+    switchTo: "Nyelv váltása",
   },
 
   theme: {
diff --git a/apps/dashboard/src/i18n/index.ts b/web/src/i18n/index.ts
similarity index 100%
rename from apps/dashboard/src/i18n/index.ts
rename to web/src/i18n/index.ts
diff --git a/apps/dashboard/src/i18n/it.ts b/web/src/i18n/it.ts
similarity index 99%
rename from apps/dashboard/src/i18n/it.ts
rename to web/src/i18n/it.ts
index 1089cdbb9a4..1485cb68778 100644
--- a/apps/dashboard/src/i18n/it.ts
+++ b/web/src/i18n/it.ts
@@ -127,6 +127,7 @@ export const it: Translations = {
 
   sessions: {
     title: "Sessioni",
+    history: "Cronologia",
     overview: "Panoramica",
     searchPlaceholder: "Cerca nel contenuto dei messaggi...",
     noSessions: "Nessuna sessione",
@@ -422,7 +423,7 @@ export const it: Translations = {
   },
 
   language: {
-    switchTo: "Passa all'inglese",
+    switchTo: "Cambia lingua",
   },
 
   theme: {
diff --git a/apps/dashboard/src/i18n/ja.ts b/web/src/i18n/ja.ts
similarity index 99%
rename from apps/dashboard/src/i18n/ja.ts
rename to web/src/i18n/ja.ts
index d4e23aa46a1..1b9ad88ea5f 100644
--- a/apps/dashboard/src/i18n/ja.ts
+++ b/web/src/i18n/ja.ts
@@ -127,6 +127,7 @@ export const ja: Translations = {
 
   sessions: {
     title: "セッション",
+    history: "履歴",
     overview: "概要",
     searchPlaceholder: "メッセージ内容を検索...",
     noSessions: "まだセッションがありません",
@@ -422,7 +423,7 @@ export const ja: Translations = {
   },
 
   language: {
-    switchTo: "英語に切り替え",
+    switchTo: "言語を切り替え",
   },
 
   theme: {
diff --git a/apps/dashboard/src/i18n/ko.ts b/web/src/i18n/ko.ts
similarity index 99%
rename from apps/dashboard/src/i18n/ko.ts
rename to web/src/i18n/ko.ts
index 2766f4d9f58..4fcb6f0010e 100644
--- a/apps/dashboard/src/i18n/ko.ts
+++ b/web/src/i18n/ko.ts
@@ -127,6 +127,7 @@ export const ko: Translations = {
 
   sessions: {
     title: "세션",
+    history: "기록",
     overview: "개요",
     searchPlaceholder: "메시지 내용 검색...",
     noSessions: "아직 세션이 없습니다",
@@ -422,7 +423,7 @@ export const ko: Translations = {
   },
 
   language: {
-    switchTo: "영어로 전환",
+    switchTo: "언어 변경",
   },
 
   theme: {
diff --git a/apps/dashboard/src/i18n/pt.ts b/web/src/i18n/pt.ts
similarity index 99%
rename from apps/dashboard/src/i18n/pt.ts
rename to web/src/i18n/pt.ts
index 512519a3fd5..b84c99b67bf 100644
--- a/apps/dashboard/src/i18n/pt.ts
+++ b/web/src/i18n/pt.ts
@@ -127,6 +127,7 @@ export const pt: Translations = {
 
   sessions: {
     title: "Sessões",
+    history: "Histórico",
     overview: "Visão geral",
     searchPlaceholder: "Pesquisar conteúdo das mensagens...",
     noSessions: "Ainda não há sessões",
@@ -422,7 +423,7 @@ export const pt: Translations = {
   },
 
   language: {
-    switchTo: "Mudar para inglês",
+    switchTo: "Mudar idioma",
   },
 
   theme: {
diff --git a/apps/dashboard/src/i18n/ru.ts b/web/src/i18n/ru.ts
similarity index 99%
rename from apps/dashboard/src/i18n/ru.ts
rename to web/src/i18n/ru.ts
index 98b45f9f3a6..e9b5e2cb84a 100644
--- a/apps/dashboard/src/i18n/ru.ts
+++ b/web/src/i18n/ru.ts
@@ -127,6 +127,7 @@ export const ru: Translations = {
 
   sessions: {
     title: "Сессии",
+    history: "История",
     overview: "Обзор",
     searchPlaceholder: "Поиск по содержимому сообщений...",
     noSessions: "Сессий пока нет",
@@ -422,7 +423,7 @@ export const ru: Translations = {
   },
 
   language: {
-    switchTo: "Переключиться на английский",
+    switchTo: "Сменить язык",
   },
 
   theme: {
diff --git a/apps/dashboard/src/i18n/tr.ts b/web/src/i18n/tr.ts
similarity index 99%
rename from apps/dashboard/src/i18n/tr.ts
rename to web/src/i18n/tr.ts
index 64b69887f52..f9aaa14d4b1 100644
--- a/apps/dashboard/src/i18n/tr.ts
+++ b/web/src/i18n/tr.ts
@@ -127,6 +127,7 @@ export const tr: Translations = {
 
   sessions: {
     title: "Oturumlar",
+    history: "Geçmiş",
     overview: "Genel bakış",
     searchPlaceholder: "Mesaj içeriğinde ara...",
     noSessions: "Henüz oturum yok",
@@ -422,7 +423,7 @@ export const tr: Translations = {
   },
 
   language: {
-    switchTo: "İngilizce'ye geç",
+    switchTo: "Dil değiştir",
   },
 
   theme: {
diff --git a/apps/dashboard/src/i18n/types.ts b/web/src/i18n/types.ts
similarity index 99%
rename from apps/dashboard/src/i18n/types.ts
rename to web/src/i18n/types.ts
index b45c6339f75..15f2f1a0c92 100644
--- a/apps/dashboard/src/i18n/types.ts
+++ b/web/src/i18n/types.ts
@@ -145,6 +145,7 @@ export interface Translations {
   // ── Sessions page ──
   sessions: {
     title: string;
+    history: string;
     overview: string;
     searchPlaceholder: string;
     noSessions: string;
diff --git a/apps/dashboard/src/i18n/uk.ts b/web/src/i18n/uk.ts
similarity index 99%
rename from apps/dashboard/src/i18n/uk.ts
rename to web/src/i18n/uk.ts
index 69dccf7caf3..8d67f58ecca 100644
--- a/apps/dashboard/src/i18n/uk.ts
+++ b/web/src/i18n/uk.ts
@@ -127,6 +127,7 @@ export const uk: Translations = {
 
   sessions: {
     title: "Сесії",
+    history: "Історія",
     overview: "Огляд",
     searchPlaceholder: "Пошук у вмісті повідомлень...",
     noSessions: "Поки немає сесій",
@@ -422,7 +423,7 @@ export const uk: Translations = {
   },
 
   language: {
-    switchTo: "Перемкнути на англійську",
+    switchTo: "Змінити мову",
   },
 
   theme: {
diff --git a/apps/dashboard/src/i18n/zh-hant.ts b/web/src/i18n/zh-hant.ts
similarity index 99%
rename from apps/dashboard/src/i18n/zh-hant.ts
rename to web/src/i18n/zh-hant.ts
index 2edb67e02aa..e569b27a487 100644
--- a/apps/dashboard/src/i18n/zh-hant.ts
+++ b/web/src/i18n/zh-hant.ts
@@ -127,6 +127,7 @@ export const zhHant: Translations = {
 
   sessions: {
     title: "工作階段",
+    history: "歷史",
     overview: "總覽",
     searchPlaceholder: "搜尋訊息內容...",
     noSessions: "尚無工作階段",
@@ -422,7 +423,7 @@ export const zhHant: Translations = {
   },
 
   language: {
-    switchTo: "切換為英文",
+    switchTo: "切換語言",
   },
 
   theme: {
diff --git a/apps/dashboard/src/i18n/zh.ts b/web/src/i18n/zh.ts
similarity index 99%
rename from apps/dashboard/src/i18n/zh.ts
rename to web/src/i18n/zh.ts
index 60e6521a082..5bc5ae49355 100644
--- a/apps/dashboard/src/i18n/zh.ts
+++ b/web/src/i18n/zh.ts
@@ -126,6 +126,7 @@ export const zh: Translations = {
 
   sessions: {
     title: "会话",
+    history: "历史",
     overview: "概览",
     searchPlaceholder: "搜索消息内容...",
     noSessions: "暂无会话",
@@ -417,7 +418,7 @@ export const zh: Translations = {
   },
 
   language: {
-    switchTo: "切换到英文",
+    switchTo: "切换语言",
   },
 
   theme: {
diff --git a/apps/dashboard/src/index.css b/web/src/index.css
similarity index 94%
rename from apps/dashboard/src/index.css
rename to web/src/index.css
index 01b6d9bd178..342a4856f65 100644
--- a/apps/dashboard/src/index.css
+++ b/web/src/index.css
@@ -115,6 +115,8 @@ code, kbd, pre, samp, .font-mono, .font-mono-ui {
    all proportionally in Tailwind v4. */
 @theme inline {
   --spacing: calc(0.25rem * var(--theme-spacing-mul, 1));
+  --font-sans: var(--theme-font-sans);
+  --font-mono: var(--theme-font-mono);
 }
 
 #root {
@@ -124,6 +126,18 @@ code, kbd, pre, samp, .font-mono, .font-mono-ui {
   overflow: hidden;
 }
 
+@media (max-width: 768px) {
+  html,
+  body,
+  #root {
+    min-height: 100dvh;
+    height: auto;
+    max-height: none;
+    overflow-x: hidden;
+    overflow-y: auto;
+  }
+}
+
 /* Nousnet's hermes-agent layout bumps `small` and `code` to readable
    dashboard sizes. Keep in sync. */
 small { font-size: 1.0625rem; }
@@ -170,6 +184,12 @@ code { font-size: 0.875rem; }
 }
 
 
+/* Collapsed sidebar tooltip entrance — skipped when moving between items. */
+@keyframes sidebar-tooltip-in {
+  from { opacity: 0; transform: translateY(-50%) translateX(-4px); }
+  to   { opacity: 1; transform: translateY(-50%) translateX(0); }
+}
+
 /* Toast animations used by `components/Toast.tsx`. */
 @keyframes toast-in {
   from { opacity: 0; transform: translateX(16px); }
diff --git a/apps/dashboard/src/lib/api.ts b/web/src/lib/api.ts
similarity index 91%
rename from apps/dashboard/src/lib/api.ts
rename to web/src/lib/api.ts
index 9f001c0aa7b..f475201d148 100644
--- a/apps/dashboard/src/lib/api.ts
+++ b/web/src/lib/api.ts
@@ -41,7 +41,11 @@ function setSessionHeader(headers: Headers, token: string): void {
   }
 }
 
-export async function fetchJSON<T>(url: string, init?: RequestInit): Promise<T> {
+export async function fetchJSON<T>(
+  url: string,
+  init?: RequestInit,
+  options?: FetchJSONOptions,
+): Promise<T> {
   // Inject the session token into all /api/ requests.
   const headers = new Headers(init?.headers);
   const token = window.__HERMES_SESSION_TOKEN__;
@@ -91,6 +95,43 @@ export async function fetchJSON<T>(url: string, init?: RequestInit): Promise<T>
       // Never resolve — the page is about to unload.
       return new Promise<T>(() => {});
     }
+    // Loopback mode: ``_SESSION_TOKEN`` rotates on every server restart
+    // (``hermes update``, ``hermes gateway restart``, etc.). A tab kept
+    // open across the restart holds the OLD token in
+    // ``window.__HERMES_SESSION_TOKEN__`` from the previous HTML render,
+    // so every fetch returns 401. The HTML is served ``Cache-Control:
+    // no-store`` so a reload picks up the freshly-injected token. Trigger
+    // that reload once on the first stale-token 401 — gated mode is
+    // handled above, so reaching here in gated mode means a real
+    // middleware failure that should not reload-loop.
+    if (!window.__HERMES_AUTH_REQUIRED__ && !options?.allowUnauthorized) {
+      let alreadyReloaded = false;
+      try {
+        alreadyReloaded =
+          sessionStorage.getItem("hermes.tokenReloadAttempted") === "1";
+      } catch {
+        /* SSR / privacy mode — fall through to throw */
+      }
+      if (!alreadyReloaded) {
+        try {
+          sessionStorage.setItem("hermes.tokenReloadAttempted", "1");
+        } catch {
+          /* SSR / privacy mode — best effort */
+        }
+        window.location.reload();
+        return new Promise<T>(() => {});
+      }
+    }
+  }
+  if (res.ok) {
+    // Clear the stale-token reload guard: a successful 2xx proves the
+    // current ``window.__HERMES_SESSION_TOKEN__`` is valid, so the next
+    // 401 — if any — should be allowed to trigger its own reload cycle.
+    try {
+      sessionStorage.removeItem("hermes.tokenReloadAttempted");
+    } catch {
+      /* SSR / privacy mode — ignore */
+    }
   }
   if (!res.ok) {
     const text = await res.text().catch(() => res.statusText);
@@ -161,8 +202,19 @@ export const api = {
    * still exists but is never useful there (no Session, no cookie). The
    * AuthWidget component swallows 401s from this call: if the gate isn't
    * engaged, /api/auth/me returns 401 and the widget renders nothing.
+   *
+   * ``allowUnauthorized`` is load-bearing: in loopback mode this endpoint
+   * 401s by design, and fetchJSON's default loopback behaviour treats a
+   * 401 as a rotated session token and full-page-reloads to pick up a
+   * fresh one. Because every *other* dashboard request succeeds (and so
+   * clears the one-shot reload guard), that turns this expected 401 into
+   * an infinite reload loop. Opting out keeps the 401 a plain throw the
+   * widget can catch.
    */
-  getAuthMe: () => fetchJSON<AuthMeResponse>("/api/auth/me"),
+  getAuthMe: () =>
+    fetchJSON<AuthMeResponse>("/api/auth/me", undefined, {
+      allowUnauthorized: true,
+    }),
   logout: () =>
     fetch(`${BASE}/auth/logout`, {
       method: "POST",
@@ -477,6 +529,15 @@ export interface ActionResponse {
   pid: number;
 }
 
+/** Per-call overrides for {@link fetchJSON}. */
+interface FetchJSONOptions {
+  /** When true, a 401 response is surfaced as a normal thrown error rather
+   *  than triggering the loopback stale-token page reload. Use for probes
+   *  whose 401 is an expected signal (e.g. /api/auth/me in non-gated mode)
+   *  rather than evidence of a rotated session token. */
+  allowUnauthorized?: boolean;
+}
+
 export interface ActionStatusResponse {
   exit_code: number | null;
   lines: string[];
diff --git a/apps/dashboard/src/lib/dashboard-flags.ts b/web/src/lib/dashboard-flags.ts
similarity index 100%
rename from apps/dashboard/src/lib/dashboard-flags.ts
rename to web/src/lib/dashboard-flags.ts
diff --git a/apps/dashboard/src/lib/format.ts b/web/src/lib/format.ts
similarity index 100%
rename from apps/dashboard/src/lib/format.ts
rename to web/src/lib/format.ts
diff --git a/web/src/lib/gatewayClient.ts b/web/src/lib/gatewayClient.ts
new file mode 100644
index 00000000000..16b31ae68a0
--- /dev/null
+++ b/web/src/lib/gatewayClient.ts
@@ -0,0 +1,253 @@
+/**
+ * Browser WebSocket client for the tui_gateway JSON-RPC protocol.
+ *
+ * Speaks the exact same newline-delimited JSON-RPC dialect that the Ink TUI
+ * drives over stdio. The server-side transport abstraction
+ * (tui_gateway/transport.py + ws.py) routes the same dispatcher's writes
+ * onto either stdout or a WebSocket depending on how the client connected.
+ *
+ *   const gw = new GatewayClient()
+ *   await gw.connect()
+ *   const { session_id } = await gw.request<{ session_id: string }>("session.create")
+ *   gw.on("message.delta", (ev) => console.log(ev.payload?.text))
+ *   await gw.request("prompt.submit", { session_id, text: "hi" })
+ */
+
+import { HERMES_BASE_PATH, getWsTicket } from "@/lib/api";
+
+export type GatewayEventName =
+  | "gateway.ready"
+  | "session.info"
+  | "message.start"
+  | "message.delta"
+  | "message.complete"
+  | "thinking.delta"
+  | "reasoning.delta"
+  | "reasoning.available"
+  | "status.update"
+  | "tool.start"
+  | "tool.progress"
+  | "tool.complete"
+  | "tool.generating"
+  | "clarify.request"
+  | "approval.request"
+  | "sudo.request"
+  | "secret.request"
+  | "background.complete"
+  | "error"
+  | "skin.changed"
+  | (string & {});
+
+export interface GatewayEvent<P = unknown> {
+  type: GatewayEventName;
+  session_id?: string;
+  payload?: P;
+}
+
+export type ConnectionState =
+  | "idle"
+  | "connecting"
+  | "open"
+  | "closed"
+  | "error";
+
+interface Pending {
+  resolve: (v: unknown) => void;
+  reject: (e: Error) => void;
+  timer: ReturnType<typeof setTimeout>;
+}
+
+const DEFAULT_REQUEST_TIMEOUT_MS = 120_000;
+
+/** Wildcard listener key: subscribe to every event regardless of type. */
+const ANY = "*";
+
+export class GatewayClient {
+  private ws: WebSocket | null = null;
+  private reqId = 0;
+  private pending = new Map<string, Pending>();
+  private listeners = new Map<string, Set<(ev: GatewayEvent) => void>>();
+  private _state: ConnectionState = "idle";
+  private stateListeners = new Set<(s: ConnectionState) => void>();
+
+  get state(): ConnectionState {
+    return this._state;
+  }
+
+  private setState(s: ConnectionState) {
+    if (this._state === s) return;
+    this._state = s;
+    for (const cb of this.stateListeners) cb(s);
+  }
+
+  onState(cb: (s: ConnectionState) => void): () => void {
+    this.stateListeners.add(cb);
+    cb(this._state);
+    return () => this.stateListeners.delete(cb);
+  }
+
+  /** Subscribe to a specific event type. Returns an unsubscribe function. */
+  on<P = unknown>(
+    type: GatewayEventName,
+    cb: (ev: GatewayEvent<P>) => void,
+  ): () => void {
+    let set = this.listeners.get(type);
+    if (!set) {
+      set = new Set();
+      this.listeners.set(type, set);
+    }
+    set.add(cb as (ev: GatewayEvent) => void);
+    return () => set!.delete(cb as (ev: GatewayEvent) => void);
+  }
+
+  /** Subscribe to every event (fires after type-specific listeners). */
+  onAny(cb: (ev: GatewayEvent) => void): () => void {
+    return this.on(ANY as GatewayEventName, cb);
+  }
+
+  async connect(token?: string): Promise<void> {
+    if (this._state === "open" || this._state === "connecting") return;
+    this.setState("connecting");
+
+    // Gated mode: legacy ``?token=`` is rejected by ``_ws_auth_ok``; the
+    // SPA must fetch a single-use ticket via /api/auth/ws-ticket instead.
+    // Explicit ``token`` overrides the gate check (test-only path).
+    let authParamName: string;
+    let authParamValue: string;
+    if (token) {
+      authParamName = "token";
+      authParamValue = token;
+    } else if (window.__HERMES_AUTH_REQUIRED__) {
+      const { ticket } = await getWsTicket();
+      authParamName = "ticket";
+      authParamValue = ticket;
+    } else {
+      authParamName = "token";
+      authParamValue = window.__HERMES_SESSION_TOKEN__ ?? "";
+      if (!authParamValue) {
+        this.setState("error");
+        throw new Error(
+          "Session token not available — page must be served by the Hermes dashboard",
+        );
+      }
+    }
+
+    const scheme = location.protocol === "https:" ? "wss:" : "ws:";
+    const ws = new WebSocket(
+      `${scheme}//${location.host}${HERMES_BASE_PATH}/api/ws?${authParamName}=${encodeURIComponent(authParamValue)}`,
+    );
+    this.ws = ws;
+
+    // Register message + close BEFORE awaiting open — the server emits
+    // `gateway.ready` immediately after accept, so a listener attached
+    // after the open promise resolves can race past it and drop the
+    // initial skin payload.
+    ws.addEventListener("message", (ev) => {
+      try {
+        this.dispatch(JSON.parse(ev.data));
+      } catch {
+        /* malformed frame — ignore */
+      }
+    });
+
+    ws.addEventListener("close", () => {
+      this.setState("closed");
+      this.rejectAllPending(new Error("WebSocket closed"));
+    });
+
+    await new Promise<void>((resolve, reject) => {
+      const onOpen = () => {
+        ws.removeEventListener("error", onError);
+        this.setState("open");
+        resolve();
+      };
+      const onError = () => {
+        ws.removeEventListener("open", onOpen);
+        this.setState("error");
+        reject(new Error("WebSocket connection failed"));
+      };
+      ws.addEventListener("open", onOpen, { once: true });
+      ws.addEventListener("error", onError, { once: true });
+    });
+  }
+
+  close() {
+    this.ws?.close();
+    this.ws = null;
+  }
+
+  private dispatch(msg: Record<string, unknown>) {
+    const id = msg.id as string | undefined;
+
+    if (id !== undefined && this.pending.has(id)) {
+      const p = this.pending.get(id)!;
+      this.pending.delete(id);
+      clearTimeout(p.timer);
+
+      const err = msg.error as { message?: string } | undefined;
+      if (err) p.reject(new Error(err.message ?? "request failed"));
+      else p.resolve(msg.result);
+      return;
+    }
+
+    if (msg.method !== "event") return;
+
+    const params = (msg.params ?? {}) as GatewayEvent;
+    if (typeof params.type !== "string") return;
+
+    for (const cb of this.listeners.get(params.type) ?? []) cb(params);
+    for (const cb of this.listeners.get(ANY) ?? []) cb(params);
+  }
+
+  private rejectAllPending(err: Error) {
+    for (const p of this.pending.values()) {
+      clearTimeout(p.timer);
+      p.reject(err);
+    }
+    this.pending.clear();
+  }
+
+  /** Send a JSON-RPC request. Rejects on error response or timeout. */
+  request<T = unknown>(
+    method: string,
+    params: Record<string, unknown> = {},
+    timeoutMs = DEFAULT_REQUEST_TIMEOUT_MS,
+  ): Promise<T> {
+    if (!this.ws || this._state !== "open") {
+      return Promise.reject(
+        new Error(`gateway not connected (state=${this._state})`),
+      );
+    }
+
+    const id = `w${++this.reqId}`;
+
+    return new Promise<T>((resolve, reject) => {
+      const timer = setTimeout(() => {
+        if (this.pending.delete(id)) {
+          reject(new Error(`request timed out: ${method}`));
+        }
+      }, timeoutMs);
+
+      this.pending.set(id, {
+        resolve: (v) => resolve(v as T),
+        reject,
+        timer,
+      });
+
+      try {
+        this.ws!.send(JSON.stringify({ jsonrpc: "2.0", id, method, params }));
+      } catch (e) {
+        clearTimeout(timer);
+        this.pending.delete(id);
+        reject(e instanceof Error ? e : new Error(String(e)));
+      }
+    });
+  }
+}
+
+declare global {
+  interface Window {
+    __HERMES_SESSION_TOKEN__?: string;
+    __HERMES_AUTH_REQUIRED__?: boolean;
+  }
+}
diff --git a/apps/dashboard/src/lib/nested.ts b/web/src/lib/nested.ts
similarity index 100%
rename from apps/dashboard/src/lib/nested.ts
rename to web/src/lib/nested.ts
diff --git a/apps/dashboard/src/lib/resolve-page-title.ts b/web/src/lib/resolve-page-title.ts
similarity index 100%
rename from apps/dashboard/src/lib/resolve-page-title.ts
rename to web/src/lib/resolve-page-title.ts
diff --git a/apps/dashboard/src/lib/slashExec.ts b/web/src/lib/slashExec.ts
similarity index 100%
rename from apps/dashboard/src/lib/slashExec.ts
rename to web/src/lib/slashExec.ts
diff --git a/apps/dashboard/src/lib/utils.ts b/web/src/lib/utils.ts
similarity index 100%
rename from apps/dashboard/src/lib/utils.ts
rename to web/src/lib/utils.ts
diff --git a/apps/dashboard/src/main.tsx b/web/src/main.tsx
similarity index 100%
rename from apps/dashboard/src/main.tsx
rename to web/src/main.tsx
diff --git a/apps/dashboard/src/pages/AnalyticsPage.tsx b/web/src/pages/AnalyticsPage.tsx
similarity index 99%
rename from apps/dashboard/src/pages/AnalyticsPage.tsx
rename to web/src/pages/AnalyticsPage.tsx
index 2376a2db7fb..b9851c227a8 100644
--- a/apps/dashboard/src/pages/AnalyticsPage.tsx
+++ b/web/src/pages/AnalyticsPage.tsx
@@ -20,7 +20,7 @@ import { timeAgo } from "@/lib/utils";
 import { Button } from "@nous-research/ui/ui/components/button";
 import { Spinner } from "@nous-research/ui/ui/components/spinner";
 import { Stats } from "@nous-research/ui/ui/components/stats";
-import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
+import { Card, CardContent, CardHeader, CardTitle } from "@nous-research/ui/ui/components/card";
 import { Badge } from "@nous-research/ui/ui/components/badge";
 import { usePageHeader } from "@/contexts/usePageHeader";
 import { useI18n } from "@/i18n";
@@ -512,7 +512,7 @@ export default function AnalyticsPage() {
                 <span className="font-mono">
                   dashboard.show_token_analytics: true
                 </span>{" "}
-                in <a href="/config" className="underline underline-offset-4 decoration-current/40">Config</a>.
+                in <a href="/config" className="underline">Config</a>.
               </p>
             </div>
           </CardContent>
diff --git a/apps/dashboard/src/pages/ChatPage.tsx b/web/src/pages/ChatPage.tsx
similarity index 98%
rename from apps/dashboard/src/pages/ChatPage.tsx
rename to web/src/pages/ChatPage.tsx
index bbbe5a79ec7..af993c67021 100644
--- a/apps/dashboard/src/pages/ChatPage.tsx
+++ b/web/src/pages/ChatPage.tsx
@@ -23,7 +23,7 @@ import { WebglAddon } from "@xterm/addon-webgl";
 import { Terminal } from "@xterm/xterm";
 import "@xterm/xterm/css/xterm.css";
 import { Button } from "@nous-research/ui/ui/components/button";
-import { Typography } from "@/components/NouiTypography";
+import { Typography } from "@nous-research/ui/ui/components/typography/index";
 import { HERMES_BASE_PATH, buildWsAuthParam } from "@/lib/api";
 import { cn } from "@/lib/utils";
 import { Copy, PanelRight, X } from "lucide-react";
@@ -119,8 +119,13 @@ export default function ChatPage({ isActive = true }: { isActive?: boolean }) {
   const [searchParams, setSearchParams] = useSearchParams();
   // Lazy-init: the missing-token check happens at construction so the effect
   // body doesn't have to setState (React 19's set-state-in-effect rule).
+  // In gated (OAuth) mode the server intentionally omits the session token —
+  // the SPA authenticates the WS via a single-use ticket (buildWsAuthParam),
+  // so a missing token there is expected, not an error.
   const [banner, setBanner] = useState<string | null>(() =>
-    typeof window !== "undefined" && !window.__HERMES_SESSION_TOKEN__
+    typeof window !== "undefined" &&
+    !window.__HERMES_SESSION_TOKEN__ &&
+    !window.__HERMES_AUTH_REQUIRED__
       ? "Session token unavailable. Open this page through `hermes dashboard`, not directly."
       : null,
   );
@@ -273,8 +278,11 @@ export default function ChatPage({ isActive = true }: { isActive?: boolean }) {
     if (!host) return;
 
     const token = window.__HERMES_SESSION_TOKEN__;
+    const gated = !!window.__HERMES_AUTH_REQUIRED__;
     // Banner already initialised above; just bail before wiring xterm/WS.
-    if (!token) {
+    // In gated mode the token is absent by design — buildWsAuthParam() mints
+    // a WS ticket instead, so don't bail; let the effect reach that path.
+    if (!token && !gated) {
       return;
     }
 
@@ -876,5 +884,6 @@ export default function ChatPage({ isActive = true }: { isActive?: boolean }) {
 declare global {
   interface Window {
     __HERMES_SESSION_TOKEN__?: string;
+    __HERMES_AUTH_REQUIRED__?: boolean;
   }
 }
diff --git a/apps/dashboard/src/pages/ConfigPage.tsx b/web/src/pages/ConfigPage.tsx
similarity index 98%
rename from apps/dashboard/src/pages/ConfigPage.tsx
rename to web/src/pages/ConfigPage.tsx
index 03629eaa035..50ad3261a3f 100644
--- a/apps/dashboard/src/pages/ConfigPage.tsx
+++ b/web/src/pages/ConfigPage.tsx
@@ -38,15 +38,15 @@ import {
 } from "lucide-react";
 import { api } from "@/lib/api";
 import { getNestedValue, setNestedValue } from "@/lib/nested";
-import { useToast } from "@/hooks/useToast";
-import { Toast } from "@/components/Toast";
+import { useToast } from "@nous-research/ui/hooks/use-toast";
+import { Toast } from "@nous-research/ui/ui/components/toast";
 import { AutoField } from "@/components/AutoField";
 import { Button } from "@nous-research/ui/ui/components/button";
 import { ListItem } from "@nous-research/ui/ui/components/list-item";
 import { Spinner } from "@nous-research/ui/ui/components/spinner";
-import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
-import { ConfirmDialog } from "@/components/ui/confirm-dialog";
-import { Input } from "@/components/ui/input";
+import { Card, CardContent, CardHeader, CardTitle } from "@nous-research/ui/ui/components/card";
+import { ConfirmDialog } from "@nous-research/ui/ui/components/confirm-dialog";
+import { Input } from "@nous-research/ui/ui/components/input";
 import { Badge } from "@nous-research/ui/ui/components/badge";
 import { useI18n } from "@/i18n";
 import { usePageHeader } from "@/contexts/usePageHeader";
diff --git a/apps/dashboard/src/pages/CronPage.tsx b/web/src/pages/CronPage.tsx
similarity index 97%
rename from apps/dashboard/src/pages/CronPage.tsx
rename to web/src/pages/CronPage.tsx
index 757f383f01d..85af87489e5 100644
--- a/apps/dashboard/src/pages/CronPage.tsx
+++ b/web/src/pages/CronPage.tsx
@@ -4,17 +4,17 @@ import { Badge } from "@nous-research/ui/ui/components/badge";
 import { Button } from "@nous-research/ui/ui/components/button";
 import { Select, SelectOption } from "@nous-research/ui/ui/components/select";
 import { Spinner } from "@nous-research/ui/ui/components/spinner";
-import { H2 } from "@/components/NouiTypography";
+import { H2 } from "@nous-research/ui/ui/components/typography/h2";
 import { api } from "@/lib/api";
 import type { CronJob, ProfileInfo } from "@/lib/api";
 import { DeleteConfirmDialog } from "@/components/DeleteConfirmDialog";
-import { useToast } from "@/hooks/useToast";
-import { useConfirmDelete } from "@/hooks/useConfirmDelete";
+import { useToast } from "@nous-research/ui/hooks/use-toast";
+import { useConfirmDelete } from "@nous-research/ui/hooks/use-confirm-delete";
 import { useModalBehavior } from "@/hooks/useModalBehavior";
-import { Toast } from "@/components/Toast";
-import { Card, CardContent } from "@/components/ui/card";
-import { Input } from "@/components/ui/input";
-import { Label } from "@/components/ui/label";
+import { Toast } from "@nous-research/ui/ui/components/toast";
+import { Card, CardContent } from "@nous-research/ui/ui/components/card";
+import { Input } from "@nous-research/ui/ui/components/input";
+import { Label } from "@nous-research/ui/ui/components/label";
 import { useI18n } from "@/i18n";
 import { usePageHeader } from "@/contexts/usePageHeader";
 import { PluginSlot } from "@/plugins";
diff --git a/apps/dashboard/src/pages/DocsPage.tsx b/web/src/pages/DocsPage.tsx
similarity index 100%
rename from apps/dashboard/src/pages/DocsPage.tsx
rename to web/src/pages/DocsPage.tsx
diff --git a/apps/dashboard/src/pages/EnvPage.tsx b/web/src/pages/EnvPage.tsx
similarity index 98%
rename from apps/dashboard/src/pages/EnvPage.tsx
rename to web/src/pages/EnvPage.tsx
index 769dd279dfc..5be65d63836 100644
--- a/apps/dashboard/src/pages/EnvPage.tsx
+++ b/web/src/pages/EnvPage.tsx
@@ -17,9 +17,9 @@ import {
 import { api } from "@/lib/api";
 import type { EnvVarInfo } from "@/lib/api";
 import { DeleteConfirmDialog } from "@/components/DeleteConfirmDialog";
-import { Toast } from "@/components/Toast";
-import { useConfirmDelete } from "@/hooks/useConfirmDelete";
-import { useToast } from "@/hooks/useToast";
+import { Toast } from "@nous-research/ui/ui/components/toast";
+import { useConfirmDelete } from "@nous-research/ui/hooks/use-confirm-delete";
+import { useToast } from "@nous-research/ui/hooks/use-toast";
 import { OAuthProvidersCard } from "@/components/OAuthProvidersCard";
 import { Button } from "@nous-research/ui/ui/components/button";
 import { ListItem } from "@nous-research/ui/ui/components/list-item";
@@ -30,10 +30,10 @@ import {
   CardDescription,
   CardHeader,
   CardTitle,
-} from "@/components/ui/card";
+} from "@nous-research/ui/ui/components/card";
 import { Badge } from "@nous-research/ui/ui/components/badge";
-import { Input } from "@/components/ui/input";
-import { Label } from "@/components/ui/label";
+import { Input } from "@nous-research/ui/ui/components/input";
+import { Label } from "@nous-research/ui/ui/components/label";
 import { useI18n } from "@/i18n";
 import { usePageHeader } from "@/contexts/usePageHeader";
 import { PluginSlot } from "@/plugins";
diff --git a/apps/dashboard/src/pages/LogsPage.tsx b/web/src/pages/LogsPage.tsx
similarity index 98%
rename from apps/dashboard/src/pages/LogsPage.tsx
rename to web/src/pages/LogsPage.tsx
index e1865cdab95..94dd3957b0d 100644
--- a/apps/dashboard/src/pages/LogsPage.tsx
+++ b/web/src/pages/LogsPage.tsx
@@ -12,8 +12,8 @@ import { Button } from "@nous-research/ui/ui/components/button";
 import { FilterGroup, Segmented } from "@nous-research/ui/ui/components/segmented";
 import { Spinner } from "@nous-research/ui/ui/components/spinner";
 import { Switch } from "@nous-research/ui/ui/components/switch";
-import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
-import { Label } from "@/components/ui/label";
+import { Card, CardContent, CardHeader, CardTitle } from "@nous-research/ui/ui/components/card";
+import { Label } from "@nous-research/ui/ui/components/label";
 import { useI18n } from "@/i18n";
 import { usePageHeader } from "@/contexts/usePageHeader";
 import { PluginSlot } from "@/plugins";
diff --git a/apps/dashboard/src/pages/ModelsPage.tsx b/web/src/pages/ModelsPage.tsx
similarity index 99%
rename from apps/dashboard/src/pages/ModelsPage.tsx
rename to web/src/pages/ModelsPage.tsx
index e81dc6c8ebf..f0e81d0f084 100644
--- a/apps/dashboard/src/pages/ModelsPage.tsx
+++ b/web/src/pages/ModelsPage.tsx
@@ -24,9 +24,9 @@ import { formatTokenCount } from "@/lib/format";
 import { Button } from "@nous-research/ui/ui/components/button";
 import { Spinner } from "@nous-research/ui/ui/components/spinner";
 import { Stats } from "@nous-research/ui/ui/components/stats";
-import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
+import { Card, CardContent, CardHeader, CardTitle } from "@nous-research/ui/ui/components/card";
 import { Badge } from "@nous-research/ui/ui/components/badge";
-import { ConfirmDialog } from "@/components/ui/confirm-dialog";
+import { ConfirmDialog } from "@nous-research/ui/ui/components/confirm-dialog";
 import { useModalBehavior } from "@/hooks/useModalBehavior";
 import { usePageHeader } from "@/contexts/usePageHeader";
 import { useI18n } from "@/i18n";
@@ -933,7 +933,7 @@ export default function ModelsPage() {
                   …) and provider retries, so they diverge from your provider
                   bill. Enable{" "}
                   <span className="font-mono">dashboard.show_token_analytics</span>{" "}
-                  in <a href="/config" className="underline underline-offset-4 decoration-current/40">Config</a> to
+                  in <a href="/config" className="underline">Config</a> to
                   show the local debug estimate anyway.
                 </p>
               )}
diff --git a/apps/dashboard/src/pages/PluginsPage.tsx b/web/src/pages/PluginsPage.tsx
similarity index 97%
rename from apps/dashboard/src/pages/PluginsPage.tsx
rename to web/src/pages/PluginsPage.tsx
index 936f447aba1..3d3775969ed 100644
--- a/apps/dashboard/src/pages/PluginsPage.tsx
+++ b/web/src/pages/PluginsPage.tsx
@@ -10,12 +10,12 @@ import { Select, SelectOption } from "@nous-research/ui/ui/components/select";
 import { Switch } from "@nous-research/ui/ui/components/switch";
 import { Spinner } from "@nous-research/ui/ui/components/spinner";
 import { CommandBlock } from "@nous-research/ui/ui/components/command-block";
-import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
-import { ConfirmDialog } from "@/components/ui/confirm-dialog";
-import { Input } from "@/components/ui/input";
-import { Label } from "@/components/ui/label";
-import { useToast } from "@/hooks/useToast";
-import { Toast } from "@/components/Toast";
+import { Card, CardContent, CardHeader, CardTitle } from "@nous-research/ui/ui/components/card";
+import { ConfirmDialog } from "@nous-research/ui/ui/components/confirm-dialog";
+import { Input } from "@nous-research/ui/ui/components/input";
+import { Label } from "@nous-research/ui/ui/components/label";
+import { useToast } from "@nous-research/ui/hooks/use-toast";
+import { Toast } from "@nous-research/ui/ui/components/toast";
 import { useI18n } from "@/i18n";
 import { PluginSlot } from "@/plugins";
 import { cn } from "@/lib/utils";
@@ -346,7 +346,7 @@ export default function PluginsPage() {
                   {!m.tab?.hidden ? (
 
 
-                    <Link className="ml-3 inline-flex items-center gap-1 underline underline-offset-4 decoration-current/40" to={m.tab.path}>
+                    <Link className="ml-3 inline-flex items-center gap-1 underline" to={m.tab.path}>
 
 
                       <ExternalLink className="h-3 w-3 opacity-65" />
diff --git a/apps/dashboard/src/pages/ProfilesPage.tsx b/web/src/pages/ProfilesPage.tsx
similarity index 97%
rename from apps/dashboard/src/pages/ProfilesPage.tsx
rename to web/src/pages/ProfilesPage.tsx
index e61ee717e76..210a5da96d6 100644
--- a/apps/dashboard/src/pages/ProfilesPage.tsx
+++ b/web/src/pages/ProfilesPage.tsx
@@ -14,19 +14,19 @@ import {
   X,
 } from "lucide-react";
 import spinners from "unicode-animations";
-import { H2 } from "@/components/NouiTypography";
+import { H2 } from "@nous-research/ui/ui/components/typography/h2";
 import { api } from "@/lib/api";
 import type { ProfileInfo } from "@/lib/api";
 import { DeleteConfirmDialog } from "@/components/DeleteConfirmDialog";
-import { useToast } from "@/hooks/useToast";
-import { useConfirmDelete } from "@/hooks/useConfirmDelete";
+import { useToast } from "@nous-research/ui/hooks/use-toast";
+import { useConfirmDelete } from "@nous-research/ui/hooks/use-confirm-delete";
 import { useModalBehavior } from "@/hooks/useModalBehavior";
-import { Toast } from "@/components/Toast";
-import { Card, CardContent } from "@/components/ui/card";
+import { Toast } from "@nous-research/ui/ui/components/toast";
+import { Card, CardContent } from "@nous-research/ui/ui/components/card";
 import { Badge } from "@nous-research/ui/ui/components/badge";
 import { Button } from "@nous-research/ui/ui/components/button";
-import { Input } from "@/components/ui/input";
-import { Label } from "@/components/ui/label";
+import { Input } from "@nous-research/ui/ui/components/input";
+import { Label } from "@nous-research/ui/ui/components/label";
 import { Checkbox } from "@nous-research/ui/ui/components/checkbox";
 import { useI18n } from "@/i18n";
 import { usePageHeader } from "@/contexts/usePageHeader";
diff --git a/apps/dashboard/src/pages/SessionsPage.tsx b/web/src/pages/SessionsPage.tsx
similarity index 98%
rename from apps/dashboard/src/pages/SessionsPage.tsx
rename to web/src/pages/SessionsPage.tsx
index 5e8f65f35f6..5535d1fe9f8 100644
--- a/apps/dashboard/src/pages/SessionsPage.tsx
+++ b/web/src/pages/SessionsPage.tsx
@@ -34,18 +34,18 @@ import type {
 import { timeAgo } from "@/lib/utils";
 import { Markdown } from "@/components/Markdown";
 import { PlatformsCard } from "@/components/PlatformsCard";
-import { Toast } from "@/components/Toast";
+import { Toast } from "@nous-research/ui/ui/components/toast";
 import { Button } from "@nous-research/ui/ui/components/button";
 import { ListItem } from "@nous-research/ui/ui/components/list-item";
 import { Segmented } from "@nous-research/ui/ui/components/segmented";
 import { Spinner } from "@nous-research/ui/ui/components/spinner";
 import { Badge } from "@nous-research/ui/ui/components/badge";
-import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
+import { Card, CardContent, CardHeader, CardTitle } from "@nous-research/ui/ui/components/card";
 import { DeleteConfirmDialog } from "@/components/DeleteConfirmDialog";
-import { useConfirmDelete } from "@/hooks/useConfirmDelete";
-import { Input } from "@/components/ui/input";
+import { useConfirmDelete } from "@nous-research/ui/hooks/use-confirm-delete";
+import { Input } from "@nous-research/ui/ui/components/input";
 import { useSystemActions } from "@/contexts/useSystemActions";
-import { useToast } from "@/hooks/useToast";
+import { useToast } from "@nous-research/ui/hooks/use-toast";
 import { useI18n } from "@/i18n";
 import { usePageHeader } from "@/contexts/usePageHeader";
 import { PluginSlot } from "@/plugins";
@@ -778,7 +778,7 @@ export default function SessionsPage() {
                 onChange={setView}
                 options={[
                   { value: "overview", label: t.sessions.overview },
-                  { value: "list", label: t.sessions.title },
+                  { value: "list", label: t.sessions.history },
                 ]}
               />
             )}
diff --git a/apps/dashboard/src/pages/SkillsPage.tsx b/web/src/pages/SkillsPage.tsx
similarity index 98%
rename from apps/dashboard/src/pages/SkillsPage.tsx
rename to web/src/pages/SkillsPage.tsx
index 37a1f27ef27..fbc38c0f956 100644
--- a/apps/dashboard/src/pages/SkillsPage.tsx
+++ b/web/src/pages/SkillsPage.tsx
@@ -17,16 +17,16 @@ import {
 } from "lucide-react";
 import { api } from "@/lib/api";
 import type { SkillInfo, ToolsetInfo } from "@/lib/api";
-import { useToast } from "@/hooks/useToast";
-import { Toast } from "@/components/Toast";
-import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
+import { useToast } from "@nous-research/ui/hooks/use-toast";
+import { Toast } from "@nous-research/ui/ui/components/toast";
+import { Card, CardContent, CardHeader, CardTitle } from "@nous-research/ui/ui/components/card";
 import { Badge } from "@nous-research/ui/ui/components/badge";
 import { Button } from "@nous-research/ui/ui/components/button";
 import { ListItem } from "@nous-research/ui/ui/components/list-item";
 import { Spinner } from "@nous-research/ui/ui/components/spinner";
 import { Switch } from "@nous-research/ui/ui/components/switch";
 import { cn } from "@/lib/utils";
-import { Input } from "@/components/ui/input";
+import { Input } from "@nous-research/ui/ui/components/input";
 import { useI18n } from "@/i18n";
 import { usePageHeader } from "@/contexts/usePageHeader";
 import { PluginSlot } from "@/plugins";
diff --git a/apps/dashboard/src/plugins/PluginPage.tsx b/web/src/plugins/PluginPage.tsx
similarity index 100%
rename from apps/dashboard/src/plugins/PluginPage.tsx
rename to web/src/plugins/PluginPage.tsx
diff --git a/apps/dashboard/src/plugins/index.ts b/web/src/plugins/index.ts
similarity index 100%
rename from apps/dashboard/src/plugins/index.ts
rename to web/src/plugins/index.ts
diff --git a/apps/dashboard/src/plugins/registry.ts b/web/src/plugins/registry.ts
similarity index 92%
rename from apps/dashboard/src/plugins/registry.ts
rename to web/src/plugins/registry.ts
index 93d07102aba..e62dc8db7f1 100644
--- a/apps/dashboard/src/plugins/registry.ts
+++ b/web/src/plugins/registry.ts
@@ -23,10 +23,10 @@ import { Badge } from "@nous-research/ui/ui/components/badge";
 import { Button } from "@nous-research/ui/ui/components/button";
 import { Checkbox } from "@nous-research/ui/ui/components/checkbox";
 import { Select, SelectOption } from "@nous-research/ui/ui/components/select";
-import { Card, CardHeader, CardTitle, CardContent } from "@/components/ui/card";
-import { Input } from "@/components/ui/input";
-import { Label } from "@/components/ui/label";
-import { Separator } from "@/components/ui/separator";
+import { Card, CardHeader, CardTitle, CardContent } from "@nous-research/ui/ui/components/card";
+import { Input } from "@nous-research/ui/ui/components/input";
+import { Label } from "@nous-research/ui/ui/components/label";
+import { Separator } from "@nous-research/ui/ui/components/separator";
 import { Tabs, TabsList, TabsTrigger } from "@nous-research/ui/ui/components/tabs";
 import { useI18n } from "@/i18n";
 import { registerSlot, PluginSlot } from "./slots";
diff --git a/apps/dashboard/src/plugins/slots.ts b/web/src/plugins/slots.ts
similarity index 100%
rename from apps/dashboard/src/plugins/slots.ts
rename to web/src/plugins/slots.ts
diff --git a/apps/dashboard/src/plugins/types.ts b/web/src/plugins/types.ts
similarity index 100%
rename from apps/dashboard/src/plugins/types.ts
rename to web/src/plugins/types.ts
diff --git a/apps/dashboard/src/plugins/usePlugins.ts b/web/src/plugins/usePlugins.ts
similarity index 100%
rename from apps/dashboard/src/plugins/usePlugins.ts
rename to web/src/plugins/usePlugins.ts
diff --git a/apps/dashboard/src/themes/context.tsx b/web/src/themes/context.tsx
similarity index 100%
rename from apps/dashboard/src/themes/context.tsx
rename to web/src/themes/context.tsx
diff --git a/apps/dashboard/src/themes/index.ts b/web/src/themes/index.ts
similarity index 100%
rename from apps/dashboard/src/themes/index.ts
rename to web/src/themes/index.ts
diff --git a/apps/dashboard/src/themes/presets.ts b/web/src/themes/presets.ts
similarity index 100%
rename from apps/dashboard/src/themes/presets.ts
rename to web/src/themes/presets.ts
diff --git a/apps/dashboard/src/themes/types.ts b/web/src/themes/types.ts
similarity index 100%
rename from apps/dashboard/src/themes/types.ts
rename to web/src/themes/types.ts
diff --git a/apps/dashboard/tsconfig.app.json b/web/tsconfig.app.json
similarity index 91%
rename from apps/dashboard/tsconfig.app.json
rename to web/tsconfig.app.json
index 5479418f705..dfd66951da2 100644
--- a/apps/dashboard/tsconfig.app.json
+++ b/web/tsconfig.app.json
@@ -19,8 +19,7 @@
     /* Path aliases */
     "baseUrl": ".",
     "paths": {
-      "@/*": ["./src/*"],
-      "@hermes/shared": ["../shared/src/index.ts"]
+      "@/*": ["./src/*"]
     },
 
     /* Linting */
diff --git a/apps/dashboard/tsconfig.json b/web/tsconfig.json
similarity index 100%
rename from apps/dashboard/tsconfig.json
rename to web/tsconfig.json
diff --git a/apps/dashboard/tsconfig.node.json b/web/tsconfig.node.json
similarity index 100%
rename from apps/dashboard/tsconfig.node.json
rename to web/tsconfig.node.json
diff --git a/apps/dashboard/vite.config.ts b/web/vite.config.ts
similarity index 97%
rename from apps/dashboard/vite.config.ts
rename to web/vite.config.ts
index cc190da4f3b..24654173f80 100644
--- a/apps/dashboard/vite.config.ts
+++ b/web/vite.config.ts
@@ -69,7 +69,6 @@ export default defineConfig({
   resolve: {
     alias: {
       "@": path.resolve(__dirname, "./src"),
-      "@hermes/shared": path.resolve(__dirname, "../shared/src"),
     },
     // When @nous-research/ui is symlinked via `file:../../design-language`,
     // Node's module resolution would pick up shared deps from
@@ -91,7 +90,7 @@ export default defineConfig({
     ],
   },
   build: {
-    outDir: "../../hermes_cli/web_dist",
+    outDir: "../hermes_cli/web_dist",
     emptyOutDir: true,
   },
   server: {
diff --git a/website/docs/developer-guide/adding-platform-adapters.md b/website/docs/developer-guide/adding-platform-adapters.md
index a8433fcacdd..a695c1544d2 100644
--- a/website/docs/developer-guide/adding-platform-adapters.md
+++ b/website/docs/developer-guide/adding-platform-adapters.md
@@ -9,7 +9,7 @@ This guide covers adding a new messaging platform to the Hermes gateway. A platf
 :::tip
 There are two ways to add a platform:
 - **Plugin** (recommended for community/third-party): Drop a plugin directory into `~/.hermes/plugins/` — zero core code changes needed. See [Plugin Path](#plugin-path-recommended) below.
-- **Built-in**: Modify 20+ files across code, config, and docs. Use the [Built-in Checklist](#step-by-step-checklist) below.
+- **Built-in**: Modify 20+ files across code, config, and docs. Use the [Built-in Checklist](#step-by-step-checklist-built-in-path) below.
 :::
 
 ## Architecture Overview
diff --git a/website/docs/developer-guide/adding-providers.md b/website/docs/developer-guide/adding-providers.md
index 387c9e5b6e8..f21b6341cf6 100644
--- a/website/docs/developer-guide/adding-providers.md
+++ b/website/docs/developer-guide/adding-providers.md
@@ -321,12 +321,12 @@ At minimum, touch the tests that guard provider wiring.
 
 Common places:
 
-- `tests/test_runtime_provider_resolution.py`
-- `tests/test_cli_provider_resolution.py`
-- `tests/test_cli_model_command.py`
-- `tests/test_setup_model_selection.py`
-- `tests/test_provider_parity.py`
-- `tests/test_run_agent.py`
+- `tests/hermes_cli/test_runtime_provider_resolution.py`
+- `tests/cli/test_cli_provider_resolution.py`
+- `tests/hermes_cli/test_model_switch_custom_providers.py` (and adjacent `tests/hermes_cli/test_model_switch_*.py`)
+- `tests/hermes_cli/test_setup_model_provider.py`
+- `tests/run_agent/test_provider_parity.py`
+- `tests/run_agent/test_run_agent.py`
 - `tests/test_<provider>_adapter.py` for a native provider
 
 For docs-only examples, the exact file set may differ. The point is to cover:
@@ -342,7 +342,7 @@ Run tests with xdist disabled:
 
 ```bash
 source venv/bin/activate
-python -m pytest tests/test_runtime_provider_resolution.py tests/test_cli_provider_resolution.py tests/test_cli_model_command.py tests/test_setup_model_selection.py -n0 -q
+python -m pytest tests/hermes_cli/test_runtime_provider_resolution.py tests/cli/test_cli_provider_resolution.py tests/hermes_cli/test_setup_model_provider.py tests/run_agent/test_provider_parity.py -n0 -q
 ```
 
 For deeper changes, run the full suite before pushing:
diff --git a/website/docs/developer-guide/agent-loop.md b/website/docs/developer-guide/agent-loop.md
index fdc0cc3c8f9..46a100c4766 100644
--- a/website/docs/developer-guide/agent-loop.md
+++ b/website/docs/developer-guide/agent-loop.md
@@ -6,7 +6,7 @@ description: "Detailed walkthrough of AIAgent execution, API modes, tools, callb
 
 # Agent Loop Internals
 
-The core orchestration engine is `run_agent.py`'s `AIAgent` class — a large file (15k+ lines) that handles everything from prompt assembly to tool dispatch to provider failover.
+The core orchestration engine is `run_agent.py`'s `AIAgent` class — a large file (~4,400 lines) that handles everything from prompt assembly to tool dispatch to provider failover.
 
 ## Core Responsibilities
 
diff --git a/website/docs/developer-guide/architecture.md b/website/docs/developer-guide/architecture.md
index 4c83f17aa3f..f3698f8e52a 100644
--- a/website/docs/developer-guide/architecture.md
+++ b/website/docs/developer-guide/architecture.md
@@ -40,7 +40,7 @@ This page is the top-level map of Hermes Agent internals. Use it to orient yours
            ▼                                    ▼
 ┌───────────────────┐              ┌──────────────────────┐
 │ Session Storage   │              │ Tool Backends         │
-│ (SQLite + FTS5)   │              │ Terminal (7 backends) │
+│ (SQLite + FTS5)   │              │ Terminal (6 backends) │
 │ hermes_state.py   │              │ Browser (5 backends)  │
 │ gateway/session.py│              │ Web (4 backends)      │
 └───────────────────┘              │ MCP (dynamic)         │
@@ -130,7 +130,7 @@ hermes-agent/
 ├── skills/                   # Bundled skills (always available)
 ├── optional-skills/          # Official optional skills (install explicitly)
 ├── website/                  # Docusaurus documentation site
-└── tests/                    # Pytest suite (~3,000+ tests)
+└── tests/                    # Pytest suite (~25,000 tests across ~1,250 files)
 ```
 
 ## Data Flow
@@ -197,7 +197,7 @@ The synchronous orchestration engine (`AIAgent` in `run_agent.py`). Handles prov
 
 Prompt construction and maintenance across the conversation lifecycle:
 
-- **`prompt_builder.py`** — Assembles the system prompt from: personality (SOUL.md), memory (MEMORY.md, USER.md), skills, context files (AGENTS.md, .hermes.md), tool-use guidance, and model-specific instructions
+- **`system_prompt.py` + `prompt_builder.py`** — assembles the ordered system-prompt tiers (`stable` → `context` → `volatile`): identity/tool guidance/skills, context files, then memory/profile/timestamp blocks
 - **`prompt_caching.py`** — Applies Anthropic cache breakpoints for prefix caching
 - **`context_compressor.py`** — Summarizes middle conversation turns when context exceeds thresholds
 
diff --git a/website/docs/developer-guide/browser-supervisor.md b/website/docs/developer-guide/browser-supervisor.md
index 8b56cf6bda8..a30abdbdaca 100644
--- a/website/docs/developer-guide/browser-supervisor.md
+++ b/website/docs/developer-guide/browser-supervisor.md
@@ -1,57 +1,49 @@
-# Browser CDP Supervisor — Design
+---
+sidebar_position: 18
+title: "Browser CDP Supervisor"
+description: "How Hermes detects and responds to native JS dialogs and interacts with cross-origin iframes via a persistent CDP connection."
+---
 
-**Status:** Shipped (PR 14540)
-**Last updated:** 2026-04-23
-**Author:** @teknium1
+# Browser CDP Supervisor
 
-## Problem
+The CDP supervisor closes two long-standing gaps in Hermes' browser tooling:
 
-Native JS dialogs (`alert`/`confirm`/`prompt`/`beforeunload`) and iframes are
-the two biggest gaps in our browser tooling:
+1. **Native JS dialogs** (`alert`/`confirm`/`prompt`/`beforeunload`) block the
+   page's JS thread. Without supervision, the agent has no way to know a
+   dialog is open — subsequent tool calls hang or throw opaque errors.
+2. **Cross-origin iframes (OOPIFs)** are invisible to top-level
+   `Runtime.evaluate`. The agent can see iframe nodes in the DOM snapshot but
+   can't click, type, or eval inside them without a CDP session attached to
+   the child target.
 
-1. **Dialogs block the JS thread.** Any operation on the page stalls until the
-   dialog is handled. Before this work, the agent had no way to know a dialog
-   was open — subsequent tool calls would hang or throw opaque errors.
-2. **Iframes are invisible.** The agent could see iframe nodes in the DOM
-   snapshot but could not click, type, or eval inside them — especially
-   cross-origin (OOPIF) iframes that live in separate Chromium processes.
+The supervisor solves both by holding a persistent WebSocket to the backend's
+CDP endpoint per browser task, surfacing pending dialogs and frame structure
+into `browser_snapshot`, and exposing a `browser_dialog` tool for explicit
+responses.
 
-[PR #12550](https://github.com/NousResearch/hermes-agent/pull/12550) proposed a
-stateless `browser_dialog` wrapper. That doesn't solve detection — it's a
-cleaner CDP call for when the agent already knows (via symptoms) that a dialog
-is open. Closed as superseded.
-
-## Backend capability matrix (verified live 2026-04-23)
-
-Using throwaway probe scripts against a data-URL page that fires alerts in the
-main frame and in a same-origin srcdoc iframe, plus a cross-origin
-`https://example.com` iframe:
+## Backend support
 
 | Backend | Dialog detect | Dialog respond | Frame tree | OOPIF `Runtime.evaluate` via `browser_cdp(frame_id=...)` |
 |---|---|---|---|---|
 | Local Chrome (`--remote-debugging-port`) / `/browser connect` | ✓ | ✓ full workflow | ✓ | ✓ |
-| Browserbase | ✓ (via bridge) | ✓ full workflow (via bridge) | ✓ | ✓ (`document.title = "Example Domain"` verified on real cross-origin iframe) |
+| Browserbase | ✓ (via bridge) | ✓ full workflow (via bridge) | ✓ | ✓ |
 | Camofox | ✗ no CDP (REST-only) | ✗ | partial via DOM snapshot | ✗ |
 
-**How Browserbase respond works.** Browserbase's CDP proxy uses Playwright
-internally and auto-dismisses native dialogs within ~10ms, so
-`Page.handleJavaScriptDialog` can't keep up. To work around this, the
-supervisor injects a bridge script via
+**Browserbase quirk.** Browserbase's CDP proxy uses Playwright internally and
+auto-dismisses native dialogs within ~10ms, so `Page.handleJavaScriptDialog`
+can't keep up. The supervisor injects a bridge script via
 `Page.addScriptToEvaluateOnNewDocument` that overrides
 `window.alert`/`confirm`/`prompt` with a synchronous XHR to a magic host
-(`hermes-dialog-bridge.invalid`). `Fetch.enable` intercepts those XHRs
-before they touch the network — the dialog becomes a `Fetch.requestPaused`
-event the supervisor captures, and `respond_to_dialog` fulfills via
+(`hermes-dialog-bridge.invalid`). `Fetch.enable` intercepts those XHRs before
+they touch the network — the dialog becomes a `Fetch.requestPaused` event the
+supervisor captures, and `respond_to_dialog` fulfills via
 `Fetch.fulfillRequest` with a JSON body the injected script decodes.
 
-Net result: from the page's perspective, `prompt()` still returns the
-agent-supplied string. From the agent's perspective, it's the same
-`browser_dialog(action=...)` API either way. Tested end-to-end against
-real Browserbase sessions — 4/4 (alert/prompt/confirm-accept/confirm-dismiss)
-pass including value round-tripping back into page JS.
+From the page's perspective, `prompt()` still returns the agent-supplied
+string. From the agent's perspective, it's the same `browser_dialog(action=...)`
+API either way.
 
-Camofox stays unsupported for this PR; follow-up upstream issue planned at
-`jo-inc/camofox-browser` requesting a dialog polling endpoint.
+Camofox is unsupported — no CDP surface, REST-only.
 
 ## Architecture
 
@@ -63,9 +55,10 @@ Holds a persistent WebSocket to the backend's CDP endpoint. Maintains:
 - **Dialog queue** — `List[PendingDialog]` with `{id, type, message, default_prompt, session_id, opened_at}`
 - **Frame tree** — `Dict[frame_id, FrameInfo]` with parent relationships, URL, origin, whether cross-origin child session
 - **Session map** — `Dict[session_id, SessionInfo]` so interaction tools can route to the right attached session for OOPIF operations
-- **Recent console errors** — ring buffer of the last 50 (for PR 2 diagnostics)
+- **Recent console errors** — ring buffer of the last 50 for diagnostics
 
 Subscribes on attach:
+
 - `Page.enable` — `javascriptDialogOpening`, `frameAttached`, `frameNavigated`, `frameDetached`
 - `Runtime.enable` — `executionContextCreated`, `consoleAPICalled`, `exceptionThrown`
 - `Target.setAutoAttach {autoAttach: true, flatten: true}` — surfaces child OOPIF targets; supervisor enables `Page`+`Runtime` on each
@@ -76,11 +69,13 @@ frozen snapshot without awaiting.
 ### Lifecycle
 
 - **Start:** `SupervisorRegistry.get_or_start(task_id, cdp_url)` — called by
-  `browser_navigate`, Browserbase session create, `/browser connect`. Idempotent.
+  `browser_navigate`, Browserbase session create, `/browser connect`.
+  Idempotent.
 - **Stop:** session teardown or `/browser disconnect`. Cancels the asyncio
   task, closes the WebSocket, discards state.
-- **Rebind:** if the CDP URL changes (user reconnects to a new Chrome), stop
-  the old supervisor and start fresh — never reuse state across endpoints.
+- **Rebind:** if the CDP URL changes (user reconnects to a new Chrome), the
+  old supervisor is stopped and a fresh one started — state is never reused
+  across endpoints.
 
 ### Dialog policy
 
@@ -92,14 +87,14 @@ Configurable via `config.yaml` under `browser.dialog_policy`:
   forever.
 - `auto_dismiss` — record and dismiss immediately; agent sees it after the
   fact via `browser_state` inside `browser_snapshot`.
-- `auto_accept` — record and accept (useful for `beforeunload` where the user
-  wants to navigate away cleanly).
+- `auto_accept` — record and accept (useful for `beforeunload` where the
+  workflow wants to navigate away cleanly).
 
-Policy is per-task; no per-dialog overrides in v1.
+Policy is per-task; no per-dialog overrides.
 
-## Agent surface (PR 1)
+## Agent surface
 
-### One new tool
+### `browser_dialog` tool
 
 ```
 browser_dialog(action, prompt_text=None, dialog_id=None)
@@ -107,9 +102,9 @@ browser_dialog(action, prompt_text=None, dialog_id=None)
 
 - `action="accept"` / `"dismiss"` → responds to the specified or sole pending dialog (required)
 - `prompt_text=...` → text to supply to a `prompt()` dialog
-- `dialog_id=...` → disambiguate when multiple dialogs queued (rare)
+- `dialog_id=...` → disambiguate when multiple dialogs are queued (rare)
 
-Tool is response-only. Agent reads pending dialogs from `browser_snapshot`
+Tool is response-only. The agent reads pending dialogs from `browser_snapshot`
 output before calling.
 
 ### `browser_snapshot` extension
@@ -137,72 +132,52 @@ is attached:
 }
 ```
 
-- **`pending_dialogs`**: dialogs currently blocking the page's JS thread.
+- **`pending_dialogs`** — dialogs currently blocking the page's JS thread.
   The agent must call `browser_dialog(action=...)` to respond. Empty on
   Browserbase because their CDP proxy auto-dismisses within ~10ms.
 
-- **`recent_dialogs`**: ring buffer of up to 20 recently-closed dialogs with
-  a `closed_by` tag — `"agent"` (we responded), `"auto_policy"` (local
+- **`recent_dialogs`** — ring buffer of up to 20 recently-closed dialogs with
+  a `closed_by` tag: `"agent"` (we responded), `"auto_policy"` (local
   auto_dismiss/auto_accept), `"watchdog"` (must_respond timeout hit), or
   `"remote"` (browser/backend closed it on us, e.g. Browserbase). This is
   how agents on Browserbase still get visibility into what happened.
 
-- **`frame_tree`**: frame structure including cross-origin (OOPIF) children.
+- **`frame_tree`** — frame structure including cross-origin (OOPIF) children.
   Capped at 30 entries + OOPIF depth 2 to bound snapshot size on ad-heavy
   pages. `truncated: true` surfaces when limits were hit; agents needing
   the full tree can use `browser_cdp` with `Page.getFrameTree`.
 
-No new tool schema surface for any of these — the agent reads the snapshot
-it already requests.
+No new tool schema surface for any of these — the agent reads the snapshot it
+already requests.
 
 ### Availability gating
 
 Both surfaces gate on `_browser_cdp_check` (supervisor can only run when a CDP
 endpoint is reachable). On Camofox / no-backend sessions, the dialog tool is
-hidden and snapshot omits the new fields — no schema bloat.
+hidden and the snapshot omits the new fields — no schema bloat.
 
 ## Cross-origin iframe interaction
 
-Extending the dialog-detect work, `browser_cdp(frame_id=...)` routes CDP
-calls (notably `Runtime.evaluate`) through the supervisor's already-connected
-WebSocket using the OOPIF's child `sessionId`. Agents pick frame_ids out of
+`browser_cdp(frame_id=...)` routes CDP calls (notably `Runtime.evaluate`)
+through the supervisor's already-connected WebSocket using the OOPIF's child
+`sessionId`. Agents pick frame_ids out of
 `browser_snapshot.frame_tree.children[]` where `is_oopif=true` and pass them
 to `browser_cdp`. For same-origin iframes (no dedicated CDP session), the
 agent uses `contentWindow`/`contentDocument` from a top-level
-`Runtime.evaluate` instead — supervisor surfaces an error pointing at that
+`Runtime.evaluate` instead — the supervisor surfaces an error pointing at that
 fallback when `frame_id` belongs to a non-OOPIF.
 
-On Browserbase, this is the ONLY reliable path for iframe interaction —
+On Browserbase, this is the only reliable path for iframe interaction —
 stateless CDP connections (opened per `browser_cdp` call) hit signed-URL
 expiry, while the supervisor's long-lived connection keeps a valid session.
 
-## Camofox (follow-up)
-
-Issue planned against `jo-inc/camofox-browser` adding:
-- Playwright `page.on('dialog', handler)` per session
-- `GET /tabs/:tabId/dialogs` polling endpoint
-- `POST /tabs/:tabId/dialogs/:id` to accept/dismiss
-- Frame-tree introspection endpoint
-
-## Files touched (PR 1)
-
-### New
+## File layout
 
 - `tools/browser_supervisor.py` — `CDPSupervisor`, `SupervisorRegistry`, `PendingDialog`, `FrameInfo`
 - `tools/browser_dialog_tool.py` — `browser_dialog` tool handler
-- `tests/tools/test_browser_supervisor.py` — mock CDP WebSocket server + lifecycle/state tests
-- `website/docs/developer-guide/browser-supervisor.md` — this file
-
-### Modified
-
-- `toolsets.py` — register `browser_dialog` in `browser`, `hermes-acp`, `hermes-api-server`, core toolsets (gated on CDP reachability)
-- `tools/browser_tool.py`
-  - `browser_navigate` start-hook: if CDP URL resolvable, `SupervisorRegistry.get_or_start(task_id, cdp_url)`
-  - `browser_snapshot` (at ~line 1536): merge supervisor state into return payload
-  - `/browser connect` handler: restart supervisor with new endpoint
-  - Session teardown hooks in `_cleanup_browser_session`
-- `hermes_cli/config.py` — add `browser.dialog_policy` and `browser.dialog_timeout_s` to `DEFAULT_CONFIG`
-- Docs: `website/docs/user-guide/features/browser.md`, `website/docs/reference/tools-reference.md`, `website/docs/reference/toolsets-reference.md`
+- `tools/browser_tool.py` — `browser_navigate` start-hook, `browser_snapshot` merge, `/browser connect` reattach, `_cleanup_browser_session` teardown
+- `toolsets.py` — registers `browser_dialog` in `browser`, `hermes-acp`, `hermes-api-server`, and core toolsets (gated on CDP reachability)
+- `hermes_cli/config.py` — `browser.dialog_policy` and `browser.dialog_timeout_s` defaults
 
 ## Non-goals
 
@@ -214,9 +189,10 @@ Issue planned against `jo-inc/camofox-browser` adding:
 
 ## Testing
 
-Unit tests use an asyncio mock CDP server that speaks enough of the protocol
-to exercise all state transitions: attach, enable, navigate, dialog fire,
-dialog dismiss, frame attach/detach, child target attach, session teardown.
-Real-backend E2E (Browserbase + local Chromium-family browser) is manual — exercise via
-`/browser connect` to a live Chromium-family browser and run the dialog/frame
-test cases described above.
+Unit tests (`tests/tools/test_browser_supervisor.py`) use an asyncio mock CDP
+server that speaks enough of the protocol to exercise all state transitions:
+attach, enable, navigate, dialog fire, dialog dismiss, frame attach/detach,
+child target attach, session teardown. Real-backend E2E (Browserbase + local
+Chromium-family browser) is manual — exercise via `/browser connect` to a
+live Chromium-family browser and run the dialog/frame test cases described
+above.
diff --git a/website/docs/developer-guide/creating-skills.md b/website/docs/developer-guide/creating-skills.md
index df55cc14d65..50335901752 100644
--- a/website/docs/developer-guide/creating-skills.md
+++ b/website/docs/developer-guide/creating-skills.md
@@ -330,7 +330,7 @@ Bundled skills (in `skills/`) ship with every Hermes install. They should be **b
 - Document handling, web research, common dev workflows, system administration
 - Used regularly by a wide range of people
 
-If your skill is official and useful but not universally needed (e.g., a paid service integration, a heavyweight dependency), put it in **`optional-skills/`** — it ships with the repo, is discoverable via `hermes skills browse` (labeled "official"), and installs with builtin trust.
+If your skill is official and useful but not universally needed (e.g., a paid service integration, a heavyweight dependency), put it in **`optional-skills/`** — it ships with the repo, is discoverable via `hermes skills browse` (labeled "official"), and installs with built-in trust.
 
 If your skill is specialized, community-contributed, or niche, it's better suited for a **Skills Hub** — upload it to a registry and share it via `hermes skills install`.
 
@@ -363,7 +363,7 @@ All hub-installed skills go through a security scanner that checks for:
 
 Trust levels:
 - `builtin` — ships with Hermes (always trusted)
-- `official` — from `optional-skills/` in the repo (builtin trust, no third-party warning)
+- `official` — from `optional-skills/` in the repo (built-in trust, no third-party warning)
 - `trusted` — from openai/skills, anthropics/skills, huggingface/skills
 - `community` — non-dangerous findings can be overridden with `--force`; `dangerous` verdicts remain blocked
 
diff --git a/website/docs/developer-guide/memory-provider-plugin.md b/website/docs/developer-guide/memory-provider-plugin.md
index 14112bb1eb8..c490fb2153f 100644
--- a/website/docs/developer-guide/memory-provider-plugin.md
+++ b/website/docs/developer-guide/memory-provider-plugin.md
@@ -61,7 +61,7 @@ class MyMemoryProvider(MemoryProvider):
 | `is_available()` | Agent init, before activation | **Yes** — no network calls |
 | `initialize(session_id, **kwargs)` | Agent startup | **Yes** |
 | `get_tool_schemas()` | After init, for tool injection | **Yes** |
-| `handle_tool_call(name, args)` | When agent uses your tools | **Yes** (if you have tools) |
+| `handle_tool_call(tool_name, args, **kwargs)` | When agent uses your tools | **Yes** (if you have tools) |
 
 ### Config
 
@@ -75,9 +75,9 @@ class MyMemoryProvider(MemoryProvider):
 | Method | When Called | Use Case |
 |--------|-----------|----------|
 | `system_prompt_block()` | System prompt assembly | Static provider info |
-| `prefetch(query)` | Before each API call | Return recalled context |
+| `prefetch(query, *, session_id="")` | Before each API call | Return recalled context |
 | `queue_prefetch(query)` | After each turn | Pre-warm for next turn |
-| `sync_turn(user, assistant)` | After each completed turn | Persist conversation |
+| `sync_turn(user, assistant, *, session_id="")` | After each completed turn | Persist conversation |
 | `on_session_end(messages)` | Conversation ends | Final extraction/flush |
 | `on_pre_compress(messages)` | Before context compression | Save insights before discard |
 | `on_memory_write(action, target, content)` | Built-in memory writes | Mirror to your backend |
@@ -154,10 +154,10 @@ hooks:
 **`sync_turn()` MUST be non-blocking.** If your backend has latency (API calls, LLM processing), run the work in a daemon thread:
 
 ```python
-def sync_turn(self, user_content, assistant_content):
+def sync_turn(self, user_content, assistant_content, *, session_id="", messages=None):
     def _sync():
         try:
-            self._api.ingest(user_content, assistant_content)
+            self._api.ingest(user_content, assistant_content, session_id=session_id, messages=messages)
         except Exception as e:
             logger.warning("Sync failed: %s", e)
 
@@ -167,6 +167,16 @@ def sync_turn(self, user_content, assistant_content):
     self._sync_thread.start()
 ```
 
+`messages` is optional OpenAI-style conversation context as of the completed
+turn. When present, it includes user/assistant messages, assistant tool calls,
+and tool result messages. Providers that do not need raw turn context can omit
+the `messages` parameter; Hermes will continue calling them with the legacy
+signature.
+
+Cloud providers should document what parts of `messages` are sent off-device.
+Tool calls and tool results may contain file paths, command output, or other
+workspace data.
+
 ## Profile Isolation
 
 All storage paths **must** use the `hermes_home` kwarg from `initialize()`, not hardcoded `~/.hermes`:
@@ -182,7 +192,7 @@ data_dir = Path("~/.hermes/my-provider").expanduser()
 
 ## Testing
 
-See `tests/agent/test_memory_plugin_e2e.py` for the complete E2E testing pattern using a real SQLite provider.
+See `tests/agent/test_memory_provider.py` and adjacent memory tests (`tests/agent/test_memory_session_switch.py`, `tests/agent/test_memory_user_id.py`, `tests/run_agent/test_memory_provider_init.py`) for end-to-end patterns.
 
 ```python
 from agent.memory_manager import MemoryManager
diff --git a/website/docs/developer-guide/prompt-assembly.md b/website/docs/developer-guide/prompt-assembly.md
index f23705870ee..d4b31027e2f 100644
--- a/website/docs/developer-guide/prompt-assembly.md
+++ b/website/docs/developer-guide/prompt-assembly.md
@@ -26,18 +26,18 @@ Primary files:
 
 ## Cached system prompt layers
 
-The cached system prompt is assembled in roughly this order:
+The cached system prompt is assembled as three ordered tiers (see `agent/system_prompt.py`):
 
-1. agent identity — `SOUL.md` from `HERMES_HOME` when available, otherwise falls back to `DEFAULT_AGENT_IDENTITY` in `prompt_builder.py`
-2. tool-aware behavior guidance
-3. Honcho static block (when active)
-4. optional system message
-5. frozen MEMORY snapshot
-6. frozen USER profile snapshot
-7. skills index
-8. context files (`AGENTS.md`, `.cursorrules`, `.cursor/rules/*.mdc`) — SOUL.md is **not** included here when it was already loaded as the identity in step 1
-9. timestamp / optional session ID
-10. platform hint
+1. **stable** — identity (`SOUL.md` or fallback), tool/model guidance, skills prompt, environment hints, platform hints
+2. **context** — caller-supplied `system_message` plus project context files (`.hermes.md` / `AGENTS.md` / `CLAUDE.md` / `.cursorrules`)
+3. **volatile** — built-in memory snapshot (`MEMORY.md`), user profile snapshot (`USER.md`), external memory-provider block, timestamp/session/model/provider line
+
+The final system prompt is then joined as: `stable` → `context` → `volatile`.
+
+This ordering matters for precedence discussions:
+- skills are part of the **stable** tier
+- memory/profile snapshots are part of the **volatile** tier
+- both are still in the cached system prompt (they are not injected as ad-hoc mid-turn overlays)
 
 When `skip_context_files` is set (e.g., subagent delegation), SOUL.md is not loaded and the hardcoded `DEFAULT_AGENT_IDENTITY` is used instead.
 
@@ -205,13 +205,15 @@ These are intentionally *not* persisted as part of the cached system prompt:
 - `ephemeral_system_prompt`
 - prefill messages
 - gateway-derived session context overlays
-- later-turn Honcho recall injected into the current-turn user message
+- later-turn Honcho/external recall injected into the current-turn user message
+
+`pre_llm_call` plugin context also lands in this API-call-time path: it is appended to the current turn's **user message**, not written into the cached system prompt. When multiple plugins return context, Hermes concatenates those context blocks (see [Hooks → `pre_llm_call`](../user-guide/features/hooks.md#pre_llm_call)).
 
 This separation keeps the stable prefix stable for caching.
 
 ## Memory snapshots
 
-Local memory and user profile data are injected as frozen snapshots at session start. Mid-session writes update disk state but do not mutate the already-built system prompt until a new session or forced rebuild occurs.
+Local memory and user profile data are captured in the system prompt's **volatile tier**. Mid-session writes update disk state but do not mutate the already-built cached system prompt until a rebuild path runs (new session, or explicit invalidation/rebuild flow such as compression-triggered rebuild).
 
 ## Context files
 
diff --git a/website/docs/developer-guide/provider-runtime.md b/website/docs/developer-guide/provider-runtime.md
index 9f87077191c..b412ff479a3 100644
--- a/website/docs/developer-guide/provider-runtime.md
+++ b/website/docs/developer-guide/provider-runtime.md
@@ -193,7 +193,11 @@ Cron jobs **do** support fallback: `run_job()` reads `fallback_providers` (or le
 
 ### Test coverage
 
-See `tests/test_fallback_model.py` for comprehensive tests covering all supported providers, one-shot semantics, and edge cases.
+Fallback behavior is exercised across several suites:
+
+- `tests/run_agent/test_fallback_credential_isolation.py` — credential isolation between primary and fallback
+- `tests/hermes_cli/test_fallback_cmd.py` — the `/fallback` CLI command
+- `tests/gateway/test_fallback_eviction.py` — gateway eviction of failed providers
 
 ## Related docs
 
diff --git a/website/docs/developer-guide/web-search-provider-plugin.md b/website/docs/developer-guide/web-search-provider-plugin.md
index a89ee9b4b7b..ba44af8f5f8 100644
--- a/website/docs/developer-guide/web-search-provider-plugin.md
+++ b/website/docs/developer-guide/web-search-provider-plugin.md
@@ -6,7 +6,7 @@ description: "How to build a web-search/extract/crawl backend plugin for Hermes
 
 # Building a Web Search Provider Plugin
 
-Web-search provider plugins register a backend that services `web_search`, `web_extract`, and (optionally) deep-crawl tool calls. Built-in providers — Firecrawl, SearXNG, Tavily, Exa, Parallel, Brave Search (free tier), and DDGS — all ship as plugins under `plugins/web/<name>/`. You can add a new one, or override a bundled one, by dropping a directory next to them.
+Web-search provider plugins register a backend that services `web_search`, `web_extract`, and (optionally) deep-crawl tool calls. Built-in providers — Firecrawl, SearXNG, Tavily, Exa, Parallel, Brave Search (free tier), xAI, and DDGS — all ship as plugins under `plugins/web/<name>/`. You can add a new one, or override a bundled one, by dropping a directory next to them.
 
 :::tip
 Web search is one of several **backend plugins** Hermes supports. The others (with their own ABCs) are [Image Generation Provider Plugins](/developer-guide/image-gen-provider-plugin), [Video Generation Provider Plugins](/developer-guide/video-gen-provider-plugin), [Memory Provider Plugins](/developer-guide/memory-provider-plugin), [Context Engine Plugins](/developer-guide/context-engine-plugin), and [Model Provider Plugins](/developer-guide/model-provider-plugin). General tool/hook/CLI plugins live in [Build a Hermes Plugin](/guides/build-a-hermes-plugin).
@@ -80,9 +80,6 @@ class MyBackendWebSearchProvider(WebSearchProvider):
     def supports_extract(self) -> bool:
         return False
 
-    def supports_crawl(self) -> bool:
-        return False
-
     def search(self, query: str, limit: int = 5) -> Dict[str, Any]:
         import httpx
 
@@ -157,12 +154,10 @@ Full contract in `agent/web_search_provider.py`. Methods you may override:
 | `is_available()` | ✅ | — | Cheap availability gate — env vars, optional deps |
 | `supports_search()` | — | `True` | Capability flag for `web_search` routing |
 | `supports_extract()` | — | `False` | Capability flag for `web_extract` routing |
-| `supports_crawl()` | — | `False` | Capability flag for deep-crawl modes |
 | `search(query, limit)` | conditional | raises | Required when `supports_search()` returns `True` |
 | `extract(urls, **kwargs)` | conditional | raises | Required when `supports_extract()` returns `True` |
-| `crawl(url, **kwargs)` | conditional | raises | Required when `supports_crawl()` returns `True` |
 
-Providers can advertise multiple capabilities from a single class — Firecrawl, Tavily, Exa, and Parallel all implement all three of search/extract/crawl. Brave Search and DDGS are search-only; SearXNG is search-only with a documented "pair me with an extract provider" workflow.
+Providers can advertise multiple capabilities from a single class — Firecrawl, Tavily, Exa, and Parallel all implement both search and extract. Brave Search and DDGS are search-only; SearXNG is search-only with a documented "pair me with an extract provider" workflow.
 
 ## Response shape
 
diff --git a/website/docs/getting-started/learning-path.md b/website/docs/getting-started/learning-path.md
index 59d7775d259..619e2010394 100644
--- a/website/docs/getting-started/learning-path.md
+++ b/website/docs/getting-started/learning-path.md
@@ -12,6 +12,10 @@ Hermes Agent can do a lot — CLI assistant, Telegram/Discord bot, task automati
 If you haven't installed Hermes Agent yet, begin with the [Installation guide](/getting-started/installation) and then run through the [Quickstart](/getting-started/quickstart). Everything below assumes you have a working installation.
 :::
 
+:::tip First-time provider setup
+First-time users almost always want `hermes setup --portal` — one OAuth covers a model plus the four Tool Gateway tools (search/image/TTS/browser). See [Nous Portal](/integrations/nous-portal).
+:::
+
 ## How to Use This Page
 
 - **Know your level?** Jump to the [experience-level table](#by-experience-level) and follow the reading order for your tier.
@@ -24,7 +28,7 @@ If you haven't installed Hermes Agent yet, begin with the [Installation guide](/
 |---|---|---|---|
 | **Beginner** | Get up and running, have basic conversations, use built-in tools | [Installation](/getting-started/installation) → [Quickstart](/getting-started/quickstart) → [CLI Usage](/user-guide/cli) → [Configuration](/user-guide/configuration) | ~1 hour |
 | **Intermediate** | Set up messaging bots, use advanced features like memory, cron jobs, and skills | [Sessions](/user-guide/sessions) → [Messaging](/user-guide/messaging) → [Tools](/user-guide/features/tools) → [Skills](/user-guide/features/skills) → [Memory](/user-guide/features/memory) → [Cron](/user-guide/features/cron) | ~2–3 hours |
-| **Advanced** | Build custom tools, create skills, train models with RL, contribute to the project | [Architecture](/developer-guide/architecture) → [Adding Tools](/developer-guide/adding-tools) → [Creating Skills](/developer-guide/creating-skills) → [RL Training](/user-guide/features/rl-training) → [Contributing](/developer-guide/contributing) | ~4–6 hours |
+| **Advanced** | Build custom tools, create skills, train models with RL, contribute to the project | [Architecture](/developer-guide/architecture) → [Adding Tools](/developer-guide/adding-tools) → [Creating Skills](/developer-guide/creating-skills) → [Contributing](/developer-guide/contributing) | ~4–6 hours |
 
 ## By Use Case
 
@@ -96,11 +100,11 @@ page is for built-in Hermes core development, not the usual user/custom-tool pat
 
 ### "I want to train models"
 
-Use reinforcement learning to fine-tune model behavior with Hermes Agent's built-in RL training pipeline.
+Use reinforcement learning to fine-tune model behavior with Hermes Agent's RL training pipeline (powered by [Atropos](https://github.com/NousResearch/atropos)).
 
 1. [Quickstart](/getting-started/quickstart)
 2. [Configuration](/user-guide/configuration)
-3. [RL Training](/user-guide/features/rl-training)
+3. [Atropos RL Environments](https://github.com/NousResearch/atropos) (external)
 4. [Provider Routing](/user-guide/features/provider-routing)
 5. [Architecture](/developer-guide/architecture)
 
@@ -136,7 +140,6 @@ Not sure what's available? Here's a quick directory of major features:
 | **Browser** | Web browsing and scraping | [Browser](/user-guide/features/browser) |
 | **Hooks** | Event-driven callbacks and middleware | [Hooks](/user-guide/features/hooks) |
 | **Batch Processing** | Process multiple inputs in bulk | [Batch Processing](/user-guide/features/batch-processing) |
-| **RL Training** | Fine-tune models with reinforcement learning | [RL Training](/user-guide/features/rl-training) |
 | **Provider Routing** | Route requests across multiple LLM providers | [Provider Routing](/user-guide/features/provider-routing) |
 
 ## What to Read Next
diff --git a/website/docs/getting-started/nix-setup.md b/website/docs/getting-started/nix-setup.md
index ea2beb1fb7a..3cfdd86fd29 100644
--- a/website/docs/getting-started/nix-setup.md
+++ b/website/docs/getting-started/nix-setup.md
@@ -583,7 +583,7 @@ Host                                    Container
   │   ├── state.db, sessions/, memories/   (runtime state)
   │   └── mcp-tokens/                      (OAuth tokens for MCP servers)
   ├── home/                                ──►  /home/hermes    (rw)
-  └── workspace/                           (MESSAGING_CWD)
+  └── workspace/                           (agent working directory)
       ├── SOUL.md                          (from documents option)
       └── (agent-created files)
 
@@ -831,7 +831,7 @@ nix build .#checks.x86_64-linux.config-roundtrip    # merge script preserves use
 | `group` | `str` | `"hermes"` | System group |
 | `createUser` | `bool` | `true` | Auto-create user/group |
 | `stateDir` | `str` | `"/var/lib/hermes"` | State directory (`HERMES_HOME` parent) |
-| `workingDirectory` | `str` | `"${stateDir}/workspace"` | Agent working directory (`MESSAGING_CWD`) |
+| `workingDirectory` | `str` | `"${stateDir}/workspace"` | Agent working directory |
 | `addToSystemPackages` | `bool` | `false` | Add `hermes` CLI to system PATH and set `HERMES_HOME` system-wide |
 
 ### Configuration
@@ -918,7 +918,7 @@ nix build .#checks.x86_64-linux.config-roundtrip    # merge script preserves use
 │   ├── cron/
 │   └── logs/
 ├── home/                            # Agent HOME
-└── workspace/                       # MESSAGING_CWD
+└── workspace/                       # Agent working directory
     ├── SOUL.md                      # From documents option
     └── (agent-created files)
 ```
diff --git a/website/docs/getting-started/quickstart.md b/website/docs/getting-started/quickstart.md
index c398474a2e8..79277e64ace 100644
--- a/website/docs/getting-started/quickstart.md
+++ b/website/docs/getting-started/quickstart.md
@@ -10,7 +10,7 @@ This guide gets you from zero to a working Hermes setup that survives real use.
 
 ## Prefer to watch?
 
-**Onchain AI Garage** put together a Masterclass walkthrough of installation, setup, and basic commands — a good companion to this page if you'd rather follow along on video. For more, see the full [Hermes Agent Tutorials & Use Cases](https://www.youtube.com/channel/UCqB1bhMwGsW-yefBxYwFCCg) playlist.
+**Onchain AI Garage** put together a Masterclass walkthrough of installation, setup, and basic commands — a good companion to this page if you'd rather follow along on video. For more, see the full [Hermes Agent Tutorials & Use Cases](https://www.youtube.com/playlist?list=PLmpUb_PWAkDxewld5ZYyKifuHxgIbiq2d) playlist.
 
 <div style={{position: 'relative', paddingBottom: '56.25%', height: 0, overflow: 'hidden', maxWidth: '100%', marginBottom: '1.5rem'}}>
   <iframe
@@ -111,17 +111,29 @@ Good defaults:
 | **OpenAI Codex** | ChatGPT OAuth, uses Codex models | Device code auth via `hermes model` |
 | **Anthropic** | Claude models directly — Max plan + extra usage credits (OAuth), or API key for pay-per-token | `hermes model` → OAuth login (requires Max + extra credits), or an Anthropic API key |
 | **OpenRouter** | Multi-provider routing across many models | Enter your API key |
-| **Z.AI** | GLM / Zhipu-hosted models | Set `GLM_API_KEY` / `ZAI_API_KEY` |
+| **Z.AI** | GLM / Zhipu-hosted models | Set `GLM_API_KEY` / `ZAI_API_KEY` (also accepts `Z_AI_API_KEY`) |
 | **Kimi / Moonshot** | Moonshot-hosted coding and chat models | Set `KIMI_API_KEY` (or the Kimi-Coding-specific `KIMI_CODING_API_KEY`) |
 | **Kimi / Moonshot China** | China-region Moonshot endpoint | Set `KIMI_CN_API_KEY` |
 | **Arcee AI** | Trinity models | Set `ARCEEAI_API_KEY` |
 | **GMI Cloud** | Multi-model direct API | Set `GMI_API_KEY` |
-| **MiniMax (OAuth)** | MiniMax-M2.7 via browser OAuth — no API key needed | `hermes model` → MiniMax (OAuth) |
+| **MiniMax (OAuth)** | MiniMax frontier model via browser OAuth — no API key needed (model name in `hermes_cli/models.py` may change between releases) | `hermes model` → MiniMax (OAuth) |
 | **MiniMax** | International MiniMax endpoint | Set `MINIMAX_API_KEY` |
 | **MiniMax China** | China-region MiniMax endpoint | Set `MINIMAX_CN_API_KEY` |
-| **Alibaba Cloud** | Qwen models via DashScope | Set `DASHSCOPE_API_KEY` |
+| **Alibaba Cloud** | Qwen models via DashScope | Set `DASHSCOPE_API_KEY` (Qwen Coding Plan also accepts `ALIBABA_CODING_PLAN_API_KEY`) |
 | **Hugging Face** | 20+ open models via unified router (Qwen, DeepSeek, Kimi, etc.) | Set `HF_TOKEN` |
 | **AWS Bedrock** | Claude, Nova, Llama, DeepSeek via native Converse API | IAM role or `aws configure` ([guide](../guides/aws-bedrock.md)) |
+| **Azure Foundry** | Azure AI Foundry-hosted models | Set `AZURE_FOUNDRY_API_KEY` + `AZURE_FOUNDRY_BASE_URL` |
+| **Google AI Studio** | Gemini models via direct API | Set `GOOGLE_API_KEY` / `GEMINI_API_KEY` |
+| **Google Gemini (OAuth)** | Gemini via the `google-gemini-cli` OAuth flow — no key needed | `hermes model` → Google Gemini (OAuth) |
+| **xAI** | Grok models via direct API | Set `XAI_API_KEY` |
+| **xAI Grok OAuth** | SuperGrok / Premium+ subscription, no API key needed | `hermes model` → xAI Grok OAuth |
+| **NovitaAI** | Multi-model API gateway | Set `NOVITA_API_KEY` |
+| **StepFun** | Step Plan models | Set `STEPFUN_API_KEY` |
+| **Xiaomi MiMo** | Xiaomi-hosted models | Set `XIAOMI_API_KEY` |
+| **Tencent TokenHub** | Tencent-hosted models | Set `TOKENHUB_API_KEY` |
+| **Ollama Cloud** | Managed Ollama-hosted models | Set `OLLAMA_API_KEY` |
+| **LM Studio** | Local desktop app exposing an OpenAI-compatible API | Set `LM_API_KEY` (and `LM_BASE_URL` if non-default) |
+| **Qwen OAuth** | Qwen Portal browser OAuth — no API key needed | `hermes model` → Qwen OAuth |
 | **Kilo Code** | KiloCode-hosted models | Set `KILOCODE_API_KEY` |
 | **OpenCode Zen** | Pay-as-you-go access to curated models | Set `OPENCODE_ZEN_API_KEY` |
 | **OpenCode Go** | $10/month subscription for open models | Set `OPENCODE_GO_API_KEY` |
diff --git a/website/docs/getting-started/updating.md b/website/docs/getting-started/updating.md
index 4a6c9b4ba92..64774242c61 100644
--- a/website/docs/getting-started/updating.md
+++ b/website/docs/getting-started/updating.md
@@ -43,9 +43,21 @@ When you run `hermes update`, the following steps occur:
 
 1. **Pairing-data snapshot** — a lightweight pre-update state snapshot is saved (covers `~/.hermes/pairing/`, Feishu comment rules, and other state files that get modified at runtime). Recoverable via the snapshot restore flow described under [Snapshots and rollback](../user-guide/checkpoints-and-rollback.md), or by extracting the most recent quick-snapshot zip Hermes wrote next to your `~/.hermes/` directory.
 2. **Git pull** — pulls the latest code from the `main` branch and updates submodules
-3. **Dependency install** — runs `uv pip install -e ".[all]"` to pick up new or changed dependencies
-4. **Config migration** — detects new config options added since your version and prompts you to set them
-5. **Gateway auto-restart** — running gateways are refreshed after the update completes so the new code takes effect immediately. Service-managed gateways (systemd on Linux, launchd on macOS) are restarted through the service manager. Manual gateways are relaunched automatically when Hermes can map the running PID back to a profile.
+3. **Post-pull syntax validation + auto-rollback** — after the pull, Hermes compiles the eight critical files every `hermes` invocation imports at startup. If any fails to parse (e.g. an orphan merge-conflict marker, an accidentally truncated file), Hermes runs `git reset --hard <pre-pull-sha>` to roll the install back so your shell stays bootable. Re-run `hermes update` once the upstream fix lands.
+4. **Dependency install** — runs `uv pip install -e ".[all]"` to pick up new or changed dependencies
+5. **Config migration** — detects new config options added since your version and prompts you to set them
+6. **Gateway auto-restart** — running gateways are refreshed after the update completes so the new code takes effect immediately. Service-managed gateways (systemd on Linux, launchd on macOS) are restarted through the service manager. Manual gateways are relaunched automatically when Hermes can map the running PID back to a profile.
+
+### Updating against a non-default branch: `--branch`
+
+By default `hermes update` tracks `origin/main`. Pass `--branch <name>` to update against a different branch — useful for QA channels, feature branches, or release-candidate testing:
+
+```bash
+hermes update --branch release-candidate
+hermes update --check --branch experimental   # preview behindness only
+```
+
+If your local checkout is on a different branch, Hermes auto-stashes any uncommitted work, switches HEAD to the target branch, and then pulls. Branches that don't exist locally are auto-tracked from `origin/<name>` (`git checkout -B <name> origin/<name>`). Branches that don't exist anywhere fail cleanly — your stashed changes are restored before exit so you're never stranded in a weird state. The `main`-only fork-upstream sync logic is automatically skipped on non-`main` branches.
 
 ### Preview-only: `hermes update --check`
 
@@ -190,10 +202,10 @@ uv pip install -e ".[all]"
 hermes gateway restart
 ```
 
-To roll back to a specific release tag:
+To roll back to a specific release tag (substitute your previous tag — e.g. a recent release like `v2026.5.16`, or any earlier tag from `git tag --sort=-version:refname`):
 
 ```bash
-git checkout v0.6.0
+git checkout vX.Y.Z
 git submodule update --init --recursive
 uv pip install -e ".[all]"
 ```
diff --git a/website/docs/guides/build-a-hermes-plugin.md b/website/docs/guides/build-a-hermes-plugin.md
index 3341b4a97bc..2e144c7e97a 100644
--- a/website/docs/guides/build-a-hermes-plugin.md
+++ b/website/docs/guides/build-a-hermes-plugin.md
@@ -770,7 +770,7 @@ def register(ctx):
 **Runtime behavior:**
 
 - **CLI mode:** `parent_agent` is resolved from the active CLI agent so workspace hints, spinner, and model selection inherit as expected.
-- **Gateway mode:** There is no CLI agent, so tools degrade gracefully — workspace is read from `TERMINAL_CWD` and no spinner is shown.
+- **Gateway mode:** There is no CLI agent, so tools degrade gracefully — workspace is read from the configured terminal working directory and no spinner is shown.
 - **Explicit override:** If the caller passes `parent_agent=` explicitly, it is respected and not overwritten.
 
 This is the public, stable interface for tool dispatch from plugin commands. Plugins should not reach into `ctx._cli_ref.agent` or similar private state.
diff --git a/website/docs/guides/daily-briefing-bot.md b/website/docs/guides/daily-briefing-bot.md
index 6bb23a283c2..a4fda461be8 100644
--- a/website/docs/guides/daily-briefing-bot.md
+++ b/website/docs/guides/daily-briefing-bot.md
@@ -10,6 +10,10 @@ In this tutorial, you'll build a personal briefing bot that wakes up every morni
 
 By the end, you'll have a fully automated workflow combining **web search**, **cron scheduling**, **delegation**, and **messaging delivery** — no code required.
 
+:::tip
+This recipe hits web search, summarization, and optional TTS — all bundled in a Portal subscription. The fastest setup is `hermes setup --portal`. See [Nous Portal](/integrations/nous-portal).
+:::
+
 ## What We're Building
 
 Here's the flow:
diff --git a/website/docs/guides/migrate-from-openclaw.md b/website/docs/guides/migrate-from-openclaw.md
index b3892bd0a00..38a27e62268 100644
--- a/website/docs/guides/migrate-from-openclaw.md
+++ b/website/docs/guides/migrate-from-openclaw.md
@@ -8,6 +8,10 @@ description: "Complete guide to migrating your OpenClaw / Clawdbot setup to Herm
 
 `hermes claw migrate` imports your OpenClaw (or legacy Clawdbot/Moldbot) setup into Hermes. This guide covers exactly what gets migrated, the config key mappings, and what to verify after migration.
 
+:::tip
+If your OpenClaw setup was multi-provider, `hermes setup --portal` collapses it to one OAuth — 300+ models plus the Tool Gateway in a single login. See [Nous Portal](/integrations/nous-portal).
+:::
+
 ## Quick start
 
 ```bash
@@ -156,7 +160,7 @@ TTS settings are read from **two** OpenClaw config locations with this priority:
 | Browser headless | `browser.headless` | `config.yaml` → `browser.headless` | |
 | Brave search key | `tools.web.search.brave.apiKey` | `.env` → `BRAVE_API_KEY` | Requires `--migrate-secrets` |
 | Gateway auth token | `gateway.auth.token` | `.env` → `HERMES_GATEWAY_TOKEN` | Requires `--migrate-secrets` |
-| Working directory | `agents.defaults.workspace` | `.env` → `MESSAGING_CWD` | |
+| Working directory | `agents.defaults.workspace` | `config.yaml` → `terminal.cwd` | Legacy migrations may still emit `MESSAGING_CWD` as a compatibility fallback |
 
 ### Archived (no direct Hermes equivalent)
 
@@ -225,7 +229,7 @@ The migration resolves all three formats. For env templates and SecretRef object
 
 5. **Test messaging** — if you migrated platform tokens, restart the gateway: `systemctl --user restart hermes-gateway`
 
-6. **Check session policies** — verify `hermes config get session_reset` matches your expectations.
+6. **Check session policies** — run `hermes config show` and verify the `session_reset` value matches your expectations.
 
 7. **Re-pair WhatsApp** — WhatsApp uses QR code pairing (Baileys), not token migration. Run `hermes whatsapp` to pair.
 
diff --git a/website/docs/guides/minimax-oauth.md b/website/docs/guides/minimax-oauth.md
index 1f5667f1621..2d81106c3a7 100644
--- a/website/docs/guides/minimax-oauth.md
+++ b/website/docs/guides/minimax-oauth.md
@@ -16,7 +16,7 @@ The transport reuses the `anthropic_messages` adapter (MiniMax exposes an Anthro
 |------|-------|
 | Provider ID | `minimax-oauth` |
 | Display name | MiniMax (OAuth) |
-| Auth type | Browser OAuth (PKCE device-code flow) |
+| Auth type | Browser OAuth (PKCE redirect flow) |
 | Transport | Anthropic Messages-compatible (`anthropic_messages`) |
 | Models | `MiniMax-M2.7`, `MiniMax-M2.7-highspeed` |
 | Global endpoint | `https://api.minimax.io/anthropic` |
@@ -56,11 +56,9 @@ hermes auth add minimax-oauth
 
 ### China region
 
-If your account is on the China platform (`minimaxi.com`), use the China-region OAuth provider id `minimax-cn` instead, or skip OAuth and configure `MINIMAX_CN_API_KEY` / `MINIMAX_CN_BASE_URL` directly. The `--region cn` flag described in older docs is **not** wired through the CLI's argument parser; use the `minimax-cn` provider instead:
+If your account is on the China platform (`minimaxi.com`), use the API-key-based `minimax-cn` provider instead — `minimax-cn` is registered with `auth_type="api_key"` only (no OAuth flow). Configure `MINIMAX_CN_API_KEY` (and optionally `MINIMAX_CN_BASE_URL`) directly:
 
 ```bash
-hermes auth add minimax-cn --type oauth   # if OAuth is supported on your CN account
-# or simpler:
 echo 'MINIMAX_CN_API_KEY=your-key' >> ~/.hermes/.env
 ```
 
@@ -76,7 +74,7 @@ Hermes will print the verification URL and user code — open the URL on any dev
 
 ## The OAuth Flow
 
-Hermes implements a PKCE device-code flow against the MiniMax OAuth endpoints:
+Hermes implements a PKCE browser OAuth flow against the MiniMax OAuth endpoints:
 
 1. Hermes generates a PKCE verifier / challenge pair and a random state value.
 2. It POSTs to `{base_url}/oauth/code` with the challenge and receives a `user_code` and `verification_uri`.
@@ -115,8 +113,8 @@ hermes model
 Or set the model directly:
 
 ```bash
-hermes config set model MiniMax-M2.7
-hermes config set provider minimax-oauth
+hermes config set model.default MiniMax-M2.7
+hermes config set model.provider minimax-oauth
 ```
 
 ## Configuration Reference
diff --git a/website/docs/guides/oauth-over-ssh.md b/website/docs/guides/oauth-over-ssh.md
index 15ac3668f6f..22ee2f5f6d4 100644
--- a/website/docs/guides/oauth-over-ssh.md
+++ b/website/docs/guides/oauth-over-ssh.md
@@ -40,7 +40,7 @@ hermes auth add xai-oauth --manual-paste
 # → Paste it back into the terminal at the "Callback URL:" prompt.
 ```
 
-The same flag works on `hermes model --manual-paste` for the integrated model picker. A bare `?code=...&state=...` query fragment is accepted too if you don't want to paste the whole URL.
+The same flag works on `hermes model --manual-paste` for the integrated model picker. Hermes accepts three callback paste forms interchangeably: the full URL, a bare `?code=...&state=...` query fragment, or — when the upstream consent page renders the authorization code in-page instead of redirecting (xAI's current behavior on browser-based consoles) — just the bare code value on its own.
 
 Hermes uses the **same PKCE verifier, state and nonce** for both paths, so the upstream OAuth flow is byte-identical — `--manual-paste` is purely a transport change for the callback hop and is not a security downgrade.
 
diff --git a/website/docs/guides/python-library.md b/website/docs/guides/python-library.md
index 3bb08645ac9..89fa122759b 100644
--- a/website/docs/guides/python-library.md
+++ b/website/docs/guides/python-library.md
@@ -44,7 +44,7 @@ The simplest way to use Hermes is the `chat()` method — pass a message, get a
 from run_agent import AIAgent
 
 agent = AIAgent(
-    model="anthropic/claude-sonnet-4",
+    model="anthropic/claude-sonnet-4.6",
     quiet_mode=True,
 )
 response = agent.chat("What is the capital of France?")
@@ -65,7 +65,7 @@ For more control over the conversation, use `run_conversation()` directly. It re
 
 ```python
 agent = AIAgent(
-    model="anthropic/claude-sonnet-4",
+    model="anthropic/claude-sonnet-4.6",
     quiet_mode=True,
 )
 
@@ -102,14 +102,14 @@ Control which toolsets the agent has access to using `enabled_toolsets` or `disa
 ```python
 # Only enable web tools (browsing, search)
 agent = AIAgent(
-    model="anthropic/claude-sonnet-4",
+    model="anthropic/claude-sonnet-4.6",
     enabled_toolsets=["web"],
     quiet_mode=True,
 )
 
 # Enable everything except terminal access
 agent = AIAgent(
-    model="anthropic/claude-sonnet-4",
+    model="anthropic/claude-sonnet-4.6",
     disabled_toolsets=["terminal"],
     quiet_mode=True,
 )
@@ -127,7 +127,7 @@ Maintain conversation state across multiple turns by passing the message history
 
 ```python
 agent = AIAgent(
-    model="anthropic/claude-sonnet-4",
+    model="anthropic/claude-sonnet-4.6",
     quiet_mode=True,
 )
 
@@ -153,7 +153,7 @@ Enable trajectory saving to capture conversations in ShareGPT format — useful
 
 ```python
 agent = AIAgent(
-    model="anthropic/claude-sonnet-4",
+    model="anthropic/claude-sonnet-4.6",
     save_trajectories=True,
     quiet_mode=True,
 )
@@ -311,7 +311,7 @@ print(review)
 
 | Parameter | Type | Default | Description |
 |-----------|------|---------|-------------|
-| `model` | `str` | `"anthropic/claude-opus-4.6"` | Model in OpenRouter format |
+| `model` | `str` | `""` | Model in OpenRouter format (defaults to empty; resolved from your hermes config at runtime) |
 | `quiet_mode` | `bool` | `False` | Suppress CLI output |
 | `enabled_toolsets` | `List[str]` | `None` | Whitelist specific toolsets |
 | `disabled_toolsets` | `List[str]` | `None` | Blacklist specific toolsets |
diff --git a/website/docs/guides/run-hermes-with-nous-portal.md b/website/docs/guides/run-hermes-with-nous-portal.md
index b1d5b8aece2..a8ac20478e5 100644
--- a/website/docs/guides/run-hermes-with-nous-portal.md
+++ b/website/docs/guides/run-hermes-with-nous-portal.md
@@ -161,8 +161,9 @@ Then in any messaging-platform session (Telegram, Discord, Signal, etc.), send a
 The Portal subscription works for [cron jobs](/user-guide/features/cron) and [batch processing](/user-guide/features/batch-processing) the same way it works for interactive chat — the OAuth refresh token is reused automatically. No additional setup; just schedule cron jobs and they'll bill against your subscription.
 
 ```bash
-hermes cron add "Daily AI news summary" "every day at 9am" \
-  "Search the web for top AI news and summarize the 5 most important stories"
+hermes cron create "every day at 9am" \
+  "Search the web for top AI news and summarize the 5 most important stories" \
+  --name "Daily AI news"
 ```
 
 The cron job runs unattended, calls the model + web search + summarization all through your Portal subscription.
diff --git a/website/docs/guides/tips.md b/website/docs/guides/tips.md
index 643c576a4b0..ea7670ace50 100644
--- a/website/docs/guides/tips.md
+++ b/website/docs/guides/tips.md
@@ -8,6 +8,10 @@ description: "Practical advice to get the most out of Hermes Agent — prompt ti
 
 A quick-wins collection of practical tips that make you immediately more effective with Hermes Agent. Each section targets a different aspect — scan the headers and jump to what's relevant.
 
+:::tip Confused which model to pick?
+Run `hermes setup --portal` — you get 300+ models including Claude, GPT-5, and Gemini under one subscription. See [Nous Portal](/integrations/nous-portal).
+:::
+
 ---
 
 ## Getting the Best Results
diff --git a/website/docs/guides/use-voice-mode-with-hermes.md b/website/docs/guides/use-voice-mode-with-hermes.md
index f8685670e8f..90ca25bdb94 100644
--- a/website/docs/guides/use-voice-mode-with-hermes.md
+++ b/website/docs/guides/use-voice-mode-with-hermes.md
@@ -10,6 +10,10 @@ This guide is the practical companion to the [Voice Mode feature reference](/use
 
 If the feature page explains what voice mode can do, this guide shows how to actually use it well.
 
+:::tip
+[Nous Portal](/integrations/nous-portal) bundles both the LLM and TTS through one OAuth — voice mode works end-to-end with no extra credentials.
+:::
+
 ## What voice mode is good for
 
 Voice mode is especially useful when:
diff --git a/website/docs/guides/work-with-skills.md b/website/docs/guides/work-with-skills.md
index 7e61312333f..331558924e0 100644
--- a/website/docs/guides/work-with-skills.md
+++ b/website/docs/guides/work-with-skills.md
@@ -162,7 +162,7 @@ Manage skill config from the CLI:
 hermes skills config gif-search
 
 # View all skill config
-hermes config get skills.config
+hermes config show | grep '^skills\.config'
 ```
 
 ---
diff --git a/website/docs/guides/xai-grok-oauth.md b/website/docs/guides/xai-grok-oauth.md
index db5f87171a3..2a984168723 100644
--- a/website/docs/guides/xai-grok-oauth.md
+++ b/website/docs/guides/xai-grok-oauth.md
@@ -94,6 +94,8 @@ hermes model --manual-paste
 
 See [OAuth over SSH / Remote Hosts](./oauth-over-ssh.md#browser-only-remote-cloud-shell--codespaces--ec2-instance-connect) for the full walkthrough. Regression fix for [#26923](https://github.com/NousResearch/hermes-agent/issues/26923).
 
+If the consent page renders the authorization code directly on the page (xAI's current behavior on browser-based consoles) instead of redirecting to your `127.0.0.1:56121/callback`, paste **just the bare code value** at the `Callback URL:` prompt — Hermes accepts the full URL, a bare `?code=...&state=...` query fragment, or a bare code interchangeably.
+
 ## How the Login Works
 
 1. Hermes opens your browser to `accounts.x.ai`.
diff --git a/website/docs/index.mdx b/website/docs/index.mdx
index a9faf5811a4..7addd371527 100644
--- a/website/docs/index.mdx
+++ b/website/docs/index.mdx
@@ -37,6 +37,10 @@ iex (irm https://raw.githubusercontent.com/NousResearch/hermes-agent/main/script
 
 See the full **[Installation Guide](/getting-started/installation)** for what the installer does, the per-user vs root layout, and Windows-specific notes.
 
+:::tip Fastest path to a working agent
+After installing, run `hermes setup --portal` — one OAuth covers a model plus all four Tool Gateway tools (web search, image generation, TTS, browser). See [Nous Portal](/integrations/nous-portal).
+:::
+
 ## What is Hermes Agent?
 
 It's not a coding copilot tethered to an IDE or a chatbot wrapper around a single API. It's an **autonomous agent** that gets more capable the longer it runs. It lives wherever you put it — a $5 VPS, a GPU cluster, or serverless infrastructure (Daytona, Modal) that costs nearly nothing when idle. Talk to it from Telegram while it works on a cloud VM you never SSH into yourself. It's not tied to your laptop.
@@ -50,7 +54,7 @@ It's not a coding copilot tethered to an IDE or a chatbot wrapper around a singl
 | 🗺️ **[Learning Path](/getting-started/learning-path)** | Find the right docs for your experience level |
 | ⚙️ **[Configuration](/user-guide/configuration)** | Config file, providers, models, and options |
 | 💬 **[Messaging Gateway](/user-guide/messaging)** | Set up Telegram, Discord, Slack, WhatsApp, Teams, or more |
-| 🔧 **[Tools & Toolsets](/user-guide/features/tools)** | 70+ built-in tools and how to configure them |
+| 🔧 **[Tools & Toolsets](/user-guide/features/tools)** | 60+ built-in tools and how to configure them |
 | 🧠 **[Memory System](/user-guide/features/memory)** | Persistent memory that grows across sessions |
 | 📚 **[Skills System](/user-guide/features/skills)** | Procedural memory the agent creates and reuses |
 | 🔌 **[MCP Integration](/user-guide/features/mcp)** | Connect to MCP servers, filter their tools, and extend Hermes safely |
@@ -73,7 +77,7 @@ It's not a coding copilot tethered to an IDE or a chatbot wrapper around a singl
 - **Scheduled automations** — Built-in cron with delivery to any platform
 - **Delegates & parallelizes** — Spawn isolated subagents for parallel workstreams. Programmatic Tool Calling via `execute_code` collapses multi-step pipelines into single inference calls
 - **Open standard skills** — Compatible with [agentskills.io](https://agentskills.io). Skills are portable, shareable, and community-contributed via the Skills Hub
-- **Full web control** — Search, extract, browse, vision, image generation, TTS
+- **Full web control** — Search, extract, browse, vision, image generation, TTS — one subscription via [Nous Portal](/integrations/nous-portal) bundles all of them
 - **MCP support** — Connect to any MCP server for extended tool capabilities
 - **Research-ready** — Batch processing, trajectory export, RL training with Atropos. Built by [Nous Research](https://nousresearch.com) — the lab behind Hermes, Nomos, and Psyche models
 
diff --git a/website/docs/integrations/index.md b/website/docs/integrations/index.md
index 6c7839a6cff..4e00a5600c7 100644
--- a/website/docs/integrations/index.md
+++ b/website/docs/integrations/index.md
@@ -8,6 +8,10 @@ sidebar_position: 0
 
 Hermes Agent connects to external systems for AI inference, tool servers, IDE workflows, programmatic access, and more. These integrations extend what Hermes can do and where it can run.
 
+:::tip Start here
+If you only have time to set up one integration, set up [Nous Portal](/integrations/nous-portal) — a single OAuth login covers 300+ models plus the four Tool Gateway tools (web search, image generation, TTS, and browser automation).
+:::
+
 ## AI Providers & Routing
 
 Hermes supports multiple AI inference providers out of the box. Use `hermes model` to configure interactively, or set them in `config.yaml`.
@@ -61,6 +65,7 @@ Text-to-speech and speech-to-text across all messaging platforms:
 | **ElevenLabs** | Excellent | Paid | `ELEVENLABS_API_KEY` |
 | **OpenAI TTS** | Good | Paid | `VOICE_TOOLS_OPENAI_KEY` |
 | **MiniMax** | Good | Paid | `MINIMAX_API_KEY` |
+| **xAI TTS** | Good | Paid | `XAI_API_KEY` |
 | **NeuTTS** | Good | Free | None needed |
 
 Speech-to-text supports six providers: local faster-whisper (free, runs on-device), a local command wrapper, Groq, OpenAI Whisper API, Mistral, and xAI. Voice message transcription works across Telegram, Discord, WhatsApp, and other messaging platforms. See [Voice & TTS](/user-guide/features/tts) and [Voice Mode](/user-guide/features/voice-mode) for details.
@@ -80,9 +85,9 @@ Speech-to-text supports six providers: local faster-whisper (free, runs on-devic
 
 ## Messaging Platforms
 
-Hermes runs as a gateway bot on 19+ messaging platforms, all configured through the same `gateway` subsystem:
+Hermes runs as a gateway bot on 27+ messaging platforms, all configured through the same `gateway` subsystem:
 
-- **[Telegram](/user-guide/messaging/telegram)**, **[Discord](/user-guide/messaging/discord)**, **[Slack](/user-guide/messaging/slack)**, **[WhatsApp](/user-guide/messaging/whatsapp)**, **[Signal](/user-guide/messaging/signal)**, **[Matrix](/user-guide/messaging/matrix)**, **[Mattermost](/user-guide/messaging/mattermost)**, **[Email](/user-guide/messaging/email)**, **[SMS](/user-guide/messaging/sms)**, **[DingTalk](/user-guide/messaging/dingtalk)**, **[Feishu/Lark](/user-guide/messaging/feishu)**, **[WeCom](/user-guide/messaging/wecom)**, **[WeCom Callback](/user-guide/messaging/wecom-callback)**, **[Weixin](/user-guide/messaging/weixin)**, **[BlueBubbles](/user-guide/messaging/bluebubbles)**, **[QQ Bot](/user-guide/messaging/qqbot)**, **[Yuanbao](/user-guide/messaging/yuanbao)**, **[Home Assistant](/user-guide/messaging/homeassistant)**, **[Microsoft Teams](/user-guide/messaging/teams)**, **[Webhooks](/user-guide/messaging/webhooks)**
+- **[Telegram](/user-guide/messaging/telegram)**, **[Discord](/user-guide/messaging/discord)**, **[Slack](/user-guide/messaging/slack)**, **[WhatsApp](/user-guide/messaging/whatsapp)**, **[Signal](/user-guide/messaging/signal)**, **[Matrix](/user-guide/messaging/matrix)**, **[Mattermost](/user-guide/messaging/mattermost)**, **[Email](/user-guide/messaging/email)**, **[SMS](/user-guide/messaging/sms)**, **[DingTalk](/user-guide/messaging/dingtalk)**, **[Feishu/Lark](/user-guide/messaging/feishu)**, **[WeCom](/user-guide/messaging/wecom)**, **[WeCom Callback](/user-guide/messaging/wecom-callback)**, **[Weixin](/user-guide/messaging/weixin)**, **[BlueBubbles](/user-guide/messaging/bluebubbles)**, **[QQ Bot](/user-guide/messaging/qqbot)**, **[Yuanbao](/user-guide/messaging/yuanbao)**, **[Home Assistant](/user-guide/messaging/homeassistant)**, **[Microsoft Teams](/user-guide/messaging/teams)**, **[Microsoft Teams Meetings](/user-guide/messaging/teams-meetings)**, **[Microsoft Graph Webhook](/user-guide/messaging/msgraph-webhook)**, **[Google Chat](/user-guide/messaging/google_chat)**, **[LINE](/user-guide/messaging/line)**, **[ntfy](/user-guide/messaging/ntfy)**, **[SimpleX](/user-guide/messaging/simplex)**, **[Open WebUI](/user-guide/messaging/open-webui)**, **[Webhooks](/user-guide/messaging/webhooks)**
 
 See the [Messaging Gateway overview](/user-guide/messaging) for the platform comparison table and setup guide.
 
diff --git a/website/docs/integrations/nous-portal.md b/website/docs/integrations/nous-portal.md
index b89756877bd..ddf688d8752 100644
--- a/website/docs/integrations/nous-portal.md
+++ b/website/docs/integrations/nous-portal.md
@@ -26,19 +26,23 @@ The Portal proxies a curated catalog of agentic models from across the ecosystem
 
 | Family | Models |
 |--------|--------|
-| **Anthropic Claude** | Opus, Sonnet, Haiku (4.x series) |
-| **OpenAI** | GPT-5.4, o-series reasoning models |
-| **Google Gemini** | 2.5 Pro, 2.5 Flash |
-| **DeepSeek** | DeepSeek V3.2, DeepSeek-R1 |
-| **Qwen** | Qwen3 family, Qwen Coder |
-| **Kimi / Moonshot** | Kimi-K2, Kimi-Latest |
-| **GLM / Zhipu** | GLM-4.6, GLM-4-Plus |
-| **MiniMax** | M2.7, M1 |
-| **xAI** | Grok-4, Grok-3 |
+| **Anthropic Claude** | Opus 4.7, Opus 4.6, Sonnet 4.6, Haiku 4.5 |
+| **OpenAI** | GPT-5.5, GPT-5.5 Pro, GPT-5.4 Mini, GPT-5.4 Nano, GPT-5.3 Codex |
+| **Google Gemini** | Gemini 3 Pro Preview, Gemini 3 Flash Preview, Gemini 3.1 Pro Preview, Gemini 3.1 Flash Lite Preview |
+| **DeepSeek** | DeepSeek V4 Pro |
+| **Qwen** | Qwen3.7-Max, Qwen3.6-35B-A3B |
+| **Kimi / Moonshot** | Kimi K2.6 |
+| **GLM / Zhipu** | GLM-5.1 |
+| **MiniMax** | MiniMax M2.7 |
+| **xAI** | Grok 4.3 |
+| **NVIDIA** | Nemotron-3 Super 120B-A12B |
+| **Tencent** | Hunyuan 3 Preview |
+| **Xiaomi** | MiMo V2.5 Pro |
+| **StepFun** | Step 3.5 Flash |
 | **Hermes** | Hermes-4-70B, Hermes-4-405B (chat, see [note below](#a-note-on-hermes-4)) |
-| **+ everything else** | 240+ additional models — the full agentic frontier |
+| **+ everything else** | 280+ additional models — the full agentic frontier |
 
-Routing happens through OpenRouter under the hood, so model availability and failover behavior matches what you'd get with an OpenRouter key — just billed against your Nous subscription instead. Switch between Claude Sonnet 4.6 for code and Gemini 2.5 Pro for long context with `/model` mid-session — no new credentials, no top-ups, no surprise zero-balance errors.
+Routing happens through OpenRouter under the hood, so model availability and failover behavior matches what you'd get with an OpenRouter key — just billed against your Nous subscription instead. Switch between Claude Sonnet 4.6 for code and Gemini 3 Pro for long context with `/model` mid-session — no new credentials, no top-ups, no surprise zero-balance errors.
 
 ### The Nous Tool Gateway
 
@@ -76,9 +80,9 @@ They are **not recommended for use inside Hermes Agent**, however. Hermes 4 is t
 
 ```bash
 /model anthropic/claude-sonnet-4.6     # best general-purpose agentic model
-/model openai/gpt-5.4                  # strong reasoning + tool calling
-/model google/gemini-2.5-pro           # huge context window
-/model deepseek/deepseek-v3.2          # cost-effective coder
+/model openai/gpt-5.5-pro              # strong reasoning + tool calling
+/model google/gemini-3-pro-preview     # huge context window
+/model deepseek/deepseek-v4-pro        # cost-effective coder
 ```
 
 The Portal's own [model info page](https://portal.nousresearch.com/info) carries the same warning, so this isn't a Hermes-side opinion — it's the official guidance from Nous Research.
@@ -155,8 +159,8 @@ Inside a session:
 
 ```bash
 /model anthropic/claude-sonnet-4.6
-/model openai/gpt-5.4
-/model google/gemini-2.5-pro
+/model openai/gpt-5.5-pro
+/model google/gemini-3-pro-preview
 ```
 
 Or open the picker:
@@ -201,7 +205,7 @@ After `hermes setup --portal`, `~/.hermes/config.yaml` will look like:
 model:
   provider: nous
   default: anthropic/claude-sonnet-4.6     # or whatever model you picked
-  base_url: https://inference.nousresearch.com/v1
+  base_url: https://inference-api.nousresearch.com/v1
 ```
 
 The Tool Gateway settings live under their respective tool sections:
diff --git a/website/docs/integrations/providers.md b/website/docs/integrations/providers.md
index 127effda6f0..0168a74a6c6 100644
--- a/website/docs/integrations/providers.md
+++ b/website/docs/integrations/providers.md
@@ -41,6 +41,14 @@ You need at least one way to connect to an LLM. Use `hermes model` to switch pro
 | **Hugging Face** | `HF_TOKEN` in `~/.hermes/.env` (provider: `huggingface`, aliases: `hf`) |
 | **Google / Gemini** | `GOOGLE_API_KEY` (or `GEMINI_API_KEY`) in `~/.hermes/.env` (provider: `gemini`) |
 | **Google Gemini (OAuth)** | `hermes model` → "Google Gemini (OAuth)" (provider: `google-gemini-cli`, free tier supported, browser PKCE login) |
+| **OpenAI API (direct)** | `OPENAI_API_KEY` in `~/.hermes/.env` (provider: `openai-api`, optional `OPENAI_BASE_URL`) |
+| **Azure AI Foundry** | `hermes model` → "Azure AI Foundry" (provider: `azure-foundry`; uses Azure OpenAI / Foundry endpoint and key) |
+| **AWS Bedrock** | `hermes model` → "AWS Bedrock" (provider: `bedrock`; standard AWS credentials chain via boto3) |
+| **NVIDIA Build** | `NVIDIA_API_KEY` in `~/.hermes/.env` (provider: `nvidia`; NIM-hosted models on build.nvidia.com) |
+| **Ollama Cloud** | `hermes model` → "Ollama Cloud" (provider: `ollama-cloud`; cloud-hosted Ollama API) |
+| **Qwen OAuth** | `hermes model` → "Qwen OAuth" (provider: `qwen-oauth`; browser PKCE login) |
+| **MiniMax OAuth** | `hermes model` → "MiniMax (OAuth)" (provider: `minimax-oauth`; browser PKCE login) |
+| **StepFun** | `STEPFUN_API_KEY` in `~/.hermes/.env` (provider: `stepfun`) |
 | **LM Studio** | `hermes model` → "LM Studio" (provider: `lmstudio`, optional `LM_API_KEY`) |
 | **Custom Endpoint** | `hermes model` → choose "Custom endpoint" (saved in `config.yaml`) |
 
@@ -65,6 +73,10 @@ Don't have a subscription yet? Get one at [portal.nousresearch.com/manage-subscr
 
 **For full details:** see the dedicated [Nous Portal integration page](/integrations/nous-portal) (what's in the subscription, model catalog, troubleshooting) and the step-by-step [Run Hermes Agent with Nous Portal guide](/guides/run-hermes-with-nous-portal).
 
+**Client identification.** Every Portal request from Hermes Agent carries a `client=hermes-client-v<version>` tag (e.g. `client=hermes-client-v0.13.0`) auto-aligned to your installed release. This is sent on all Portal pathways — main chat loop, auxiliary calls, compression summarizer, web extraction — and lets Portal-side telemetry distinguish Hermes traffic from other clients. No config required; the tag updates automatically when you `hermes update`.
+
+**JWT auth (automatic).** Hermes prefers scoped `inference:invoke` JWTs for Portal requests with the legacy opaque session-key path as a fallback. No configuration is required — credentials are managed by the OAuth flow and rotate transparently. Revoked refresh tokens are quarantined to avoid replay loops.
+
 
 :::info Codex Note
 The OpenAI Codex provider authenticates via device code (open a URL, enter a code). Hermes stores the resulting credentials in its own auth store under `~/.hermes/auth.json` and can import existing Codex CLI credentials from `~/.codex/auth.json` when present. No Codex CLI installation is required.
@@ -273,6 +285,15 @@ No configuration is needed — caching activates automatically when an xAI endpo
 
 xAI also ships a dedicated TTS endpoint (`/v1/tts`). Select **xAI TTS** in `hermes tools` → Voice & TTS, or see the [Voice & TTS](../user-guide/features/tts.md#text-to-speech) page for config.
 
+**Retired xAI model migration (May 15, 2026):** xAI is retiring `grok-4*`, `grok-3`, `grok-code-fast-1`, and `grok-imagine-image-pro` on 2026-05-15. `hermes doctor` and `hermes chat` startup both detect any config still pointing at a retired ref and print the recommended replacement. Use `hermes migrate xai` for a one-shot config rewrite — dry-run by default, add `--apply` to write changes (a timestamped `config.yaml.bak-pre-migrate-xai-*` backup is created automatically).
+
+```bash
+hermes migrate xai          # preview replacements
+hermes migrate xai --apply  # rewrite ~/.hermes/config.yaml in place
+```
+
+**xAI Web Search backend.** When the [Web Search](../user-guide/features/web-search.md) toolset is enabled, `web.backend: xai` routes search through xAI's hosted search endpoint using the same `XAI_API_KEY` / OAuth credentials. No additional setup required if xAI is already configured as a provider.
+
 ### NovitaAI
 
 [NovitaAI](https://novita.ai) is the AI-native cloud for builders and agents. Its three product lines are Model API for 200+ models, Agent Sandbox for building and running AI agents, and GPU Cloud for scalable compute, all available from one platform.
@@ -454,7 +475,7 @@ Open and reasoning models via [GMI Cloud](https://www.gmicloud.ai/) — OpenAI-c
 
 ```bash
 # GMI Cloud
-hermes chat --provider gmi --model deepseek-ai/DeepSeek-R1
+hermes chat --provider gmi --model deepseek-ai/DeepSeek-V3.2
 # Requires: GMI_API_KEY in ~/.hermes/.env
 ```
 
@@ -462,7 +483,7 @@ Or set it permanently in `config.yaml`:
 ```yaml
 model:
   provider: "gmi"
-  default: "deepseek-ai/DeepSeek-R1"
+  default: "deepseek-ai/DeepSeek-V3.2"
 ```
 
 The base URL can be overridden with `GMI_BASE_URL` (default: `https://api.gmi-serving.com/v1`).
@@ -492,7 +513,7 @@ The base URL can be overridden with `STEPFUN_BASE_URL` (default: `https://api.st
 
 ```bash
 # Use any available model
-hermes chat --provider huggingface --model Qwen/Qwen3-235B-A22B-Thinking-2507
+hermes chat --provider huggingface --model Qwen/Qwen3.5-397B-A17B
 # Requires: HF_TOKEN in ~/.hermes/.env
 
 # Short alias
@@ -503,7 +524,7 @@ Or set it permanently in `config.yaml`:
 ```yaml
 model:
   provider: "huggingface"
-  default: "Qwen/Qwen3-235B-A22B-Thinking-2507"
+  default: "Qwen/Qwen3.5-397B-A17B"
 ```
 
 Get your token at [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens) — make sure to enable the "Make calls to Inference Providers" permission. Free tier included ($0.10/month credit, no markup on provider rates).
@@ -623,7 +644,7 @@ model:
 ```
 
 :::warning Legacy env vars
-`OPENAI_BASE_URL` and `LLM_MODEL` in `.env` are **removed**. Neither is read by any part of Hermes — `config.yaml` is the single source of truth for model and endpoint configuration. If you have stale entries in your `.env`, they are automatically cleared on the next `hermes setup` or config migration. Use `hermes model` or edit `config.yaml` directly.
+`LLM_MODEL` in `.env` is **removed** — `config.yaml` is the single source of truth for model and endpoint configuration. `OPENAI_BASE_URL` is still honored, but **only** for the `openai-api` provider (it overrides the OpenAI endpoint for direct API-key access). For other providers and custom endpoints, use `hermes model` or set `model.base_url` in `config.yaml` directly. If you have stale entries in your `.env`, they are automatically cleared on the next `hermes setup` or config migration.
 :::
 
 Both approaches persist to `config.yaml`, which is the source of truth for model, provider, and base URL.
@@ -1253,6 +1274,18 @@ extra_body:
 
 The `hermes model` → Custom Endpoint wizard now prompts for `api_mode` explicitly and persists your answer to `config.yaml`. URL-based auto-detection (e.g. `/anthropic` paths → `anthropic_messages`) still happens as a fallback when the field is left blank.
 
+**Native vision for custom-provider models.** If your custom endpoint serves a vision-capable model that isn't in models.dev, set `model.supports_vision: true` so Hermes routes attached images natively (as `image_url` parts) instead of pre-processing them through `vision_analyze`. Single knob — no need to also set `agent.image_input_mode: native`.
+
+```yaml
+model:
+  provider: custom
+  base_url: http://localhost:8080/v1
+  default: qwen3.6-35b-a3b
+  supports_vision: true   # send images natively; otherwise vision_analyze pre-describes them
+```
+
+The same key is honored on per-named-provider models (`custom_providers[*].models[*].supports_vision`) and accepts standard YAML booleans (`true/false/yes/no/on/off/1/0`).
+
 Switch between them mid-session with the triple syntax:
 
 ```
@@ -1489,7 +1522,7 @@ fallback_model:
 
 When activated, the fallback swaps the model and provider mid-session without losing your conversation. The chain is tried entry-by-entry; activation is one-shot per session.
 
-Supported providers: `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `gemini`, `google-gemini-cli`, `qwen-oauth`, `huggingface`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `minimax-oauth`, `deepseek`, `nvidia`, `xai`, `xai-oauth`, `ollama-cloud`, `bedrock`, `azure-foundry`, `opencode-zen`, `opencode-go`, `kilocode`, `xiaomi`, `arcee`, `gmi`, `stepfun`, `lmstudio`, `alibaba`, `alibaba-coding-plan`, `tencent-tokenhub`, `custom`.
+Supported providers: `openrouter`, `nous`, `novita`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `gemini`, `google-gemini-cli`, `qwen-oauth`, `huggingface`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `minimax-oauth`, `deepseek`, `nvidia`, `xai`, `xai-oauth`, `ollama-cloud`, `bedrock`, `azure-foundry`, `opencode-zen`, `opencode-go`, `kilocode`, `xiaomi`, `arcee`, `gmi`, `stepfun`, `lmstudio`, `alibaba`, `alibaba-coding-plan`, `tencent-tokenhub`, `custom`.
 
 :::tip
 Fallback is configured exclusively through `config.yaml` — or interactively via `hermes fallback`. For full details on when it triggers, how the chain advances, and how it interacts with auxiliary tasks and delegation, see [Fallback Providers](/user-guide/features/fallback-providers).
diff --git a/website/docs/reference/cli-commands.md b/website/docs/reference/cli-commands.md
index aa8b3c1a5e6..5882d4aaac3 100644
--- a/website/docs/reference/cli-commands.md
+++ b/website/docs/reference/cli-commands.md
@@ -47,6 +47,9 @@ hermes [global-options] <command> [subcommand/options]
 | `hermes slack` | Slack helpers (currently: generate the app manifest with every command as a native slash). |
 | `hermes auth` | Manage credentials — add, list, remove, reset, set strategy. Handles OAuth flows for Codex/Nous/Anthropic. |
 | `hermes login` / `logout` | **Deprecated** — use `hermes auth` instead. |
+| `hermes send` | Send a one-shot message to a configured messaging platform (Telegram, Discord, Slack, Signal, SMS, …). Useful from shell scripts, cron jobs, CI hooks, and monitoring daemons — no agent loop, no LLM. |
+| `hermes secrets` | Manage external secret sources (currently Bitwarden Secrets Manager) for pulling API keys at process startup instead of from `~/.hermes/.env`. |
+| `hermes migrate` | Diagnose and (optionally) rewrite `config.yaml` to replace references to retired models or deprecated settings (e.g. `migrate xai`). |
 | `hermes status` | Show agent, auth, and platform status. |
 | `hermes cron` | Inspect and tick the cron scheduler. |
 | `hermes kanban` | Multi-profile collaboration board (tasks, links, dispatcher). |
@@ -224,6 +227,7 @@ Options:
 | Option | Description |
 |--------|-------------|
 | `--all` | On `start` / `restart` / `stop`: act on **every profile's** gateway, not just the active `HERMES_HOME`. Useful if you run multiple profiles side-by-side and want to restart them all after `hermes update`. |
+| `--no-supervise` | On `run`: inside the s6-overlay Docker image, opt out of auto-supervision and use pre-s6 foreground semantics — gateway runs as the container's main process with no auto-restart. No-op outside the s6 image. Equivalent to setting `HERMES_GATEWAY_NO_SUPERVISE=1`. |
 
 :::tip WSL users
 Use `hermes gateway run` instead of `hermes gateway start` — WSL's systemd support is unreliable. Wrap it in tmux for persistence: `tmux new -s hermes 'hermes gateway run'`. See [WSL FAQ](/reference/faq#wsl-gateway-keeps-disconnecting-or-hermes-gateway-start-fails) for details.
@@ -262,6 +266,8 @@ the full guide, supported languages, and configuration knobs.
 hermes setup [model|tts|terminal|gateway|tools|agent] [--non-interactive] [--reset] [--quick] [--reconfigure] [--portal]
 ```
 
+**Easiest path:** `hermes setup --portal` — OAuth into Nous Portal and opt into the [Tool Gateway](../user-guide/features/tool-gateway.md) in one shot.
+
 **First run:** launches the first-time wizard.
 
 **Returning user (already configured):** drops straight into the full reconfigure wizard — every prompt shows your current value as its default, press Enter to keep or type a new value. No menu.
@@ -337,6 +343,122 @@ Run `hermes slack manifest --write` again after `hermes update` to pick
 up any new commands.
 
 
+## `hermes send`
+
+```bash
+hermes send --to <target> "message text"
+hermes send --to <target> --file <path>
+echo "message" | hermes send --to <target>
+hermes send --list [platform]
+```
+
+Send a one-shot message to a configured messaging platform without spinning up an agent or gateway loop. Reuses the gateway's already-configured credentials (`~/.hermes/.env` + `~/.hermes/config.yaml`) so ops scripts, cron jobs, CI hooks, and monitoring daemons can post status updates without reimplementing each platform's REST client.
+
+For bot-token platforms (Telegram, Discord, Slack, Signal, SMS, WhatsApp-CloudAPI) no running gateway is required — `hermes send` talks directly to the platform's REST endpoint. Plugin platforms that need a persistent adapter still require a live gateway.
+
+| Option | Description |
+|--------|-------------|
+| `-t`, `--to <TARGET>` | Delivery target. Formats: `platform` (uses home channel), `platform:chat_id`, `platform:chat_id:thread_id`, or `platform:#channel-name`. Examples: `telegram`, `telegram:-1001234567890`, `discord:#ops`, `slack:C0123ABCD`, `signal:+15551234567`. |
+| `-f`, `--file <PATH>` | Read the message body from `PATH`. Pass `-` to force reading from stdin. |
+| `-s`, `--subject <LINE>` | Prepend a subject/header line before the message body. |
+| `-l`, `--list [platform]` | List configured targets across all platforms (or only the given platform). |
+| `-q`, `--quiet` | Suppress stdout on success — useful in scripts (rely on exit code only). |
+| `--json` | Emit raw JSON result instead of human-readable output. |
+
+If neither a positional `message` argument nor `--file` is provided, `hermes send` reads from stdin when it is not a TTY. Exit codes: `0` on success, `1` on delivery/backend failure, `2` on usage errors.
+
+Examples:
+
+```bash
+hermes send --to telegram "deploy finished"
+echo "RAM 92%" | hermes send --to telegram:-1001234567890
+hermes send --to discord:#ops --file /tmp/report.md
+hermes send --to slack:#eng --subject "[CI]" --file build.log
+hermes send --list                  # all platforms
+hermes send --list telegram         # filter by platform
+```
+
+
+## `hermes secrets`
+
+```bash
+hermes secrets bitwarden <subcommand>
+hermes secrets bw <subcommand>          # short alias
+```
+
+Pull API keys from an external secret manager at process startup instead of storing them in `~/.hermes/.env`. Currently supports **Bitwarden Secrets Manager**. See the full guide: [Bitwarden integration](../user-guide/secrets/bitwarden.md).
+
+`bitwarden` (alias `bw`) subcommands:
+
+| Subcommand | Description |
+|------------|-------------|
+| `setup` | Interactive wizard: install the pinned `bws` binary, store an access token, and pick a project. Accepts `--project-id`, `--access-token`, and `--server-url` for non-interactive use. |
+| `status` | Show current config, binary path/version, and last fetch info. |
+| `sync` | Fetch secrets now and report what changed. Add `--apply` to actually export the secrets into the current shell's environment (default is dry-run). |
+| `install` | Download and verify the pinned `bws` binary. `--force` re-downloads even if a managed copy already exists. |
+| `disable` | Turn off the Bitwarden integration. |
+
+
+## `hermes migrate`
+
+```bash
+hermes migrate <type>
+```
+
+Diagnose and (optionally) rewrite the active `config.yaml` to replace references to retired models or deprecated settings. A timestamped backup of the original `config.yaml` is taken before any rewrite (skip with `--no-backup`).
+
+| Subcommand | Description |
+|------------|-------------|
+| `xai` | Scan `config.yaml` for references to xAI models scheduled for retirement on May 15, 2026 and (with `--apply`) rewrite them in-place to the official replacements per the xAI migration guide. Defaults to dry-run. |
+
+Common flags for migration subcommands:
+
+| Flag | Description |
+|------|-------------|
+| `--apply` | Rewrite `config.yaml` in-place (default: dry-run, no writes). |
+| `--no-backup` | Skip the timestamped backup of `config.yaml` when applying. |
+
+> Not to be confused with `hermes claw migrate` (one-shot import of OpenClaw configuration into Hermes) — `hermes migrate` is the top-level config-rewrite command.
+
+
+## `hermes proxy`
+
+```bash
+hermes proxy <subcommand>
+```
+
+Run a local OpenAI-compatible HTTP server that forwards requests to an OAuth-authenticated upstream provider (e.g. Nous Portal, xAI). External apps can point at the proxy with any bearer token; the proxy attaches your real OAuth credentials on the way out. See [Subscription Proxy](../user-guide/features/subscription-proxy.md) for the full guide.
+
+| Subcommand | Description |
+|------------|-------------|
+| `start` | Run the proxy in the foreground. Flags: `--provider <nous\|xai>` (default `nous`), `--host <addr>` (default `127.0.0.1`; use `0.0.0.0` to expose on LAN), `--port <int>` (default `8645`). |
+| `status` | Show which proxy upstreams are ready (credentials present, OAuth valid). |
+| `providers` | List available proxy upstream providers. |
+
+
+## `hermes security`
+
+```bash
+hermes security <subcommand>
+```
+
+On-demand vulnerability scan against [OSV.dev](https://osv.dev). Covers the Hermes venv (installed PyPI distributions), Python dependencies declared by plugins under `~/.hermes/plugins/`, and pinned `npx`/`uvx` MCP servers in `config.yaml`. Does NOT scan globally-installed packages or editor/browser extensions.
+
+| Subcommand | Description |
+|------------|-------------|
+| `audit` | Run a one-shot supply-chain audit. |
+
+`audit` flags:
+
+| Flag | Default | Description |
+|------|---------|-------------|
+| `--json` | off | Emit machine-readable JSON instead of human-readable text. |
+| `--fail-on <level>` | `critical` | Exit non-zero when any finding meets this severity (`low`, `moderate`, `high`, `critical`). |
+| `--skip-venv` | off | Skip scanning the Hermes Python venv. |
+| `--skip-plugins` | off | Skip scanning plugin requirements files. |
+| `--skip-mcp` | off | Skip scanning pinned MCP servers in `config.yaml`. |
+
+
 ## `hermes login` / `hermes logout` *(Deprecated)*
 
 :::caution
@@ -1251,7 +1373,7 @@ hermes completion fish > ~/.config/fish/completions/hermes.fish
 ## `hermes update`
 
 ```bash
-hermes update [--check] [--backup] [--restart-gateway]
+hermes update [--gateway] [--check] [--no-backup] [--backup] [--yes]
 ```
 
 Pulls the latest `hermes-agent` code and reinstalls dependencies in your venv, then re-runs the post-install hooks (MCP servers, skills sync, completion install). Safe to run on a live install.
@@ -1260,12 +1382,15 @@ Pulls the latest `hermes-agent` code and reinstalls dependencies in your venv, t
 
 | Option | Description |
 |--------|-------------|
-| `--check` | Print the current commit and the latest `origin/main` commit side by side, and exit 0 if in sync or 1 if behind. Does not pull, install, or restart anything. |
-| `--backup` | Create a labeled pre-update snapshot of `HERMES_HOME` (config, auth, sessions, skills, pairing data) before pulling. Default is **off** — the previous always-backup behavior was adding minutes to every update on large homes. Flip it on permanently via `update.backup: true` in `config.yaml`. |
-| `--restart-gateway` | After a successful update, restart the running gateway service. Implies `--all` semantics if multiple profiles are installed. |
+| `--gateway` | Internal mode used by the messaging `/update` command. Uses file-based IPC for prompts and progress streaming instead of reading from terminal stdin. Not a gateway restart flag. |
+| `--check` | Check whether an update is available without pulling, installing dependencies, or restarting anything. |
+| `--no-backup` | Skip the pre-update backup for this run, even if `updates.pre_update_backup` is enabled in `config.yaml`. |
+| `--backup` | Create a labeled pre-update snapshot of `HERMES_HOME` (config, auth, sessions, skills, pairing data) before pulling. Default is **off** — the previous always-backup behavior was adding minutes to every update on large homes. Flip it on permanently via `updates.pre_update_backup: true` in `config.yaml`. |
+| `--yes`, `-y` | Assume yes for interactive prompts such as config migration and stash restore. API-key entry is skipped; run `hermes config migrate` separately for those. |
 
 Additional behavior:
 
+- **Gateway restart.** After a successful update, Hermes attempts to restart all running gateway profiles automatically so they pick up the new code. Use `hermes gateway restart` when you want to restart a gateway without applying an update.
 - **Pairing data snapshot.** Even when `--backup` is off, `hermes update` takes a lightweight snapshot of `~/.hermes/pairing/` and the Feishu comment rules before `git pull`. You can roll it back with `hermes backup restore --state pre-update` if a pull rewrites a file you were editing.
 - **Legacy `hermes.service` warning.** If Hermes detects a pre-rename `hermes.service` systemd unit (instead of the current `hermes-gateway.service`), it prints a one-time migration hint so you can avoid flap-loop issues.
 - **Exit codes.** `0` on success, `1` on pull/install/post-install errors, `2` on unexpected working-tree changes that block `git pull`.
@@ -1276,6 +1401,7 @@ Additional behavior:
 |---------|-------------|
 | `hermes version` | Print version information. |
 | `hermes update` | Pull latest changes and reinstall dependencies. |
+| `hermes postinstall` | Internal bootstrap. Runs once after `pip install hermes-agent` (or `hermes update` on pip installs) to install non-Python dependencies that pip cannot provide — Node.js runtime, headless browser, ripgrep, ffmpeg — and then trigger `hermes setup` if the profile has not been configured yet. Safe to re-run idempotently. |
 | `hermes uninstall [--full] [--yes]` | Remove Hermes, optionally deleting all config/data. |
 
 ## See also
diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md
index 93b617b0666..664a4f06805 100644
--- a/website/docs/reference/environment-variables.md
+++ b/website/docs/reference/environment-variables.md
@@ -197,7 +197,7 @@ These variables configure the [Tool Gateway](/user-guide/features/tool-gateway)
 | `TERMINAL_DAYTONA_IMAGE` | Daytona sandbox image |
 | `TERMINAL_TIMEOUT` | Command timeout in seconds |
 | `TERMINAL_LIFETIME_SECONDS` | Max lifetime for terminal sessions in seconds |
-| `TERMINAL_CWD` | Working directory for terminal sessions (gateway/cron only; CLI uses launch dir) |
+| `TERMINAL_CWD` | Deprecated direct override for gateway/cron terminal sessions. Prefer `terminal.cwd` in `config.yaml`; CLI still uses the launch directory. |
 | `SUDO_PASSWORD` | Enable sudo without interactive prompt |
 
 For cloud sandbox backends, persistence is filesystem-oriented. `TERMINAL_LIFETIME_SECONDS` controls when Hermes cleans up an idle terminal session, and later resumes may recreate the sandbox rather than keep the same live processes running.
@@ -405,14 +405,14 @@ For cloud sandbox backends, persistence is filesystem-oriented. `TERMINAL_LIFETI
 | `WEBHOOK_PORT` | HTTP server port for receiving webhooks (default: `8644`) |
 | `WEBHOOK_SECRET` | Global HMAC secret for webhook signature validation (used as fallback when routes don't specify their own) |
 | `API_SERVER_ENABLED` | Enable the OpenAI-compatible API server (`true`/`false`). Runs alongside other platforms. |
-| `API_SERVER_KEY` | Bearer token for API server authentication. Enforced for non-loopback binding. |
+| `API_SERVER_KEY` | Bearer token for API server authentication. Required whenever the API server is enabled. |
 | `API_SERVER_CORS_ORIGINS` | Comma-separated browser origins allowed to call the API server directly (for example `http://localhost:3000,http://127.0.0.1:3000`). Default: disabled. |
 | `API_SERVER_PORT` | Port for the API server (default: `8642`) |
-| `API_SERVER_HOST` | Host/bind address for the API server (default: `127.0.0.1`). Use `0.0.0.0` for network access — requires `API_SERVER_KEY` and a narrow `API_SERVER_CORS_ORIGINS` allowlist. |
+| `API_SERVER_HOST` | Host/bind address for the API server (default: `127.0.0.1`). `API_SERVER_KEY` is still required on loopback; use a narrow `API_SERVER_CORS_ORIGINS` allowlist for browser access. |
 | `API_SERVER_MODEL_NAME` | Model name advertised on `/v1/models`. Defaults to the profile name (or `hermes-agent` for the default profile). Useful for multi-user setups where frontends like Open WebUI need distinct model names per connection. |
 | `GATEWAY_PROXY_URL` | URL of a remote Hermes API server to forward messages to ([proxy mode](/user-guide/messaging/matrix#proxy-mode-e2ee-on-macos)). When set, the gateway handles platform I/O only — all agent work is delegated to the remote server. Also configurable via `gateway.proxy_url` in `config.yaml`. |
 | `GATEWAY_PROXY_KEY` | Bearer token for authenticating with the remote API server in proxy mode. Must match `API_SERVER_KEY` on the remote host. |
-| `MESSAGING_CWD` | Working directory for terminal commands in messaging mode (default: `~`) |
+| `MESSAGING_CWD` | Deprecated compatibility fallback for gateway working directory. Prefer `terminal.cwd` in `config.yaml`. |
 | `GATEWAY_ALLOWED_USERS` | Comma-separated user IDs allowed across all platforms |
 | `GATEWAY_ALLOW_ALL_USERS` | Allow all users without allowlists (`true`/`false`, default: `false`) |
 
@@ -518,6 +518,7 @@ Advanced per-platform knobs for throttling the outbound message batcher. Most us
 | `HERMES_GATEWAY_PLATFORM_CONNECT_TIMEOUT` | Per-platform connect timeout during gateway startup (seconds). |
 | `HERMES_GATEWAY_BUSY_INPUT_MODE` | Default gateway busy-input behavior: `queue`, `steer`, or `interrupt`. Can be overridden per chat with `/busy`. |
 | `HERMES_GATEWAY_BUSY_ACK_ENABLED` | Whether the gateway sends an acknowledgment message (⚡/⏳/⏩) when a user sends input while the agent is busy (default: `true`). Set to `false` to suppress these messages entirely — the input is still queued/steered/interrupts as normal, only the chat reply is silenced. Bridged from `display.busy_ack_enabled` in `config.yaml`. |
+| `HERMES_GATEWAY_NO_SUPERVISE` | Inside the s6-overlay Docker image, opt out of auto-supervision when running `hermes gateway run` and use pre-s6 foreground semantics (no auto-restart, gateway is the container's main process). Truthy values: `1`, `true`, `yes`. Equivalent to the `--no-supervise` CLI flag. No-op outside the s6 image. |
 | `HERMES_FILE_MUTATION_VERIFIER` | Enable the per-turn file-mutation verifier footer (default: `true`). When enabled, Hermes appends an advisory listing any `write_file` / `patch` calls that failed during the turn and were not superseded by a successful write. Set to `0`, `false`, `no`, or `off` to suppress. Mirrors `display.file_mutation_verifier` in `config.yaml`; the env var wins when set. |
 | `HERMES_CRON_TIMEOUT` | Inactivity timeout for cron job agent runs in seconds (default: `600`). The agent can run indefinitely while actively calling tools or receiving stream tokens — this only triggers when idle. Set to `0` for unlimited. |
 | `HERMES_CRON_SCRIPT_TIMEOUT` | Timeout for pre-run scripts attached to cron jobs in seconds (default: `120`). Override for scripts that need longer execution (e.g., randomized delays for anti-bot timing). Also configurable via `cron.script_timeout_seconds` in `config.yaml`. |
diff --git a/website/docs/reference/faq.md b/website/docs/reference/faq.md
index 7c70662c319..59968f1c8cd 100644
--- a/website/docs/reference/faq.md
+++ b/website/docs/reference/faq.md
@@ -17,7 +17,7 @@ Quick answers and fixes for the most common questions and issues.
 Hermes Agent works with any OpenAI-compatible API. Supported providers include:
 
 - **[OpenRouter](https://openrouter.ai/)** — access hundreds of models through one API key (recommended for flexibility)
-- **Nous Portal** — Nous Research's own inference endpoint
+- **[Nous Portal](/integrations/nous-portal)** — Nous Research's subscription gateway — 300+ models plus web/image/TTS/browser through one OAuth login (recommended for newcomers)
 - **OpenAI** — GPT-5.4, GPT-5-codex, GPT-4.1, GPT-4o, etc.
 - **Anthropic** — Claude models (direct API, OAuth via `hermes auth add anthropic`, OpenRouter, or any compatible proxy)
 - **Google** — Gemini models (direct API via `gemini` provider, the `google-gemini-cli` OAuth provider, OpenRouter, or compatible proxy)
diff --git a/website/docs/reference/mcp-config-reference.md b/website/docs/reference/mcp-config-reference.md
index 86bbf78c61c..44d0d4512a9 100644
--- a/website/docs/reference/mcp-config-reference.md
+++ b/website/docs/reference/mcp-config-reference.md
@@ -25,6 +25,11 @@ mcp_servers:
     url: "..."          # HTTP servers
     headers: {}
 
+    # Optional HTTP/SSE TLS settings:
+    ssl_verify: true                # bool or path to a CA bundle (PEM)
+    client_cert: "/path/to/cert.pem"  # mTLS client certificate (see below)
+    # client_key: "/path/to/key.pem"  # optional, when key lives in a separate file
+
     enabled: true
     timeout: 120
     connect_timeout: 60
@@ -45,6 +50,9 @@ mcp_servers:
 | `env` | mapping | stdio | Environment passed to the subprocess |
 | `url` | string | HTTP | Remote MCP endpoint |
 | `headers` | mapping | HTTP | Headers for remote server requests |
+| `ssl_verify` | bool or string | HTTP | TLS verification. `true` (default) uses system CAs, `false` disables verification (insecure), or a string path to a custom CA bundle (PEM) |
+| `client_cert` | string or list | HTTP | mTLS client certificate. String = path to a PEM file containing cert + key. List `[cert, key]` = separate files. List `[cert, key, password]` = encrypted key |
+| `client_key` | string | HTTP | Path to the client private key, when `client_cert` is a string and the key is in a separate file |
 | `enabled` | bool | both | Skip the server entirely when false |
 | `timeout` | number | both | Tool call timeout |
 | `connect_timeout` | number | both | Initial connection timeout |
@@ -191,6 +199,40 @@ mcp_servers:
       prompts: false
 ```
 
+### TLS client certificate (mTLS)
+
+For HTTP/SSE servers that require a client certificate, set `client_cert` (and optionally `client_key`):
+
+```yaml
+mcp_servers:
+  # Combined cert + key in a single PEM file
+  internal_api:
+    url: "https://mcp.internal.example.com/mcp"
+    client_cert: "~/secrets/mcp-client.pem"
+
+  # Separate cert and key files
+  partner_api:
+    url: "https://mcp.partner.example.com/mcp"
+    client_cert: "~/secrets/client.crt"
+    client_key: "~/secrets/client.key"
+
+  # Encrypted key with a passphrase (3-element list form)
+  bank_api:
+    url: "https://mcp.bank.example.com/mcp"
+    client_cert: ["~/secrets/client.crt", "~/secrets/client.key", "my-passphrase"]
+
+  # Custom CA bundle (private CA / self-signed server)
+  lab_api:
+    url: "https://mcp.lab.local/mcp"
+    ssl_verify: "~/secrets/lab-ca.pem"
+    client_cert: "~/secrets/lab-client.pem"
+```
+
+Notes:
+- Paths support `~` expansion. Missing files fail fast at connect time with a server-scoped error message.
+- `ssl_verify: false` disables server certificate verification entirely. Don't use this with real services.
+- Works on both Streamable HTTP and SSE transports.
+
 ## Reloading config
 
 After changing MCP config, reload servers with:
diff --git a/website/docs/reference/optional-skills-catalog.md b/website/docs/reference/optional-skills-catalog.md
index e70f52fe32f..6d5b7c8e644 100644
--- a/website/docs/reference/optional-skills-catalog.md
+++ b/website/docs/reference/optional-skills-catalog.md
@@ -31,169 +31,172 @@ hermes skills uninstall <skill-name>
 
 | Skill | Description |
 |-------|-------------|
-| [**blackbox**](/user-guide/skills/optional/autonomous-ai-agents/autonomous-ai-agents-blackbox) | Delegate coding tasks to Blackbox AI CLI agent. Multi-model agent with built-in judge that runs tasks through multiple LLMs and picks the best result. Requires the blackbox CLI and a Blackbox AI API key. |
-| [**honcho**](/user-guide/skills/optional/autonomous-ai-agents/autonomous-ai-agents-honcho) | Configure and use Honcho memory with Hermes -- cross-session user modeling, multi-profile peer isolation, observation config, dialectic reasoning, session summaries, and context budget enforcement. Use when setting up Honcho, troubleshoo... |
-| [**openhands**](/user-guide/skills/optional/autonomous-ai-agents/autonomous-ai-agents-openhands) | Delegate coding to OpenHands CLI (model-agnostic, LiteLLM). |
+| [**antigravity-cli**](/docs/user-guide/skills/optional/autonomous-ai-agents/autonomous-ai-agents-antigravity-cli) | Operate the Antigravity CLI (agy): plugins, auth, sandbox. |
+| [**blackbox**](/docs/user-guide/skills/optional/autonomous-ai-agents/autonomous-ai-agents-blackbox) | Delegate coding tasks to Blackbox AI CLI agent. Multi-model agent with built-in judge that runs tasks through multiple LLMs and picks the best result. Requires the blackbox CLI and a Blackbox AI API key. |
+| [**grok**](/docs/user-guide/skills/optional/autonomous-ai-agents/autonomous-ai-agents-grok) | Delegate coding to xAI Grok Build CLI (features, PRs). |
+| [**honcho**](/docs/user-guide/skills/optional/autonomous-ai-agents/autonomous-ai-agents-honcho) | Configure and use Honcho memory with Hermes -- cross-session user modeling, multi-profile peer isolation, observation config, dialectic reasoning, session summaries, and context budget enforcement. Use when setting up Honcho, troubleshoo... |
+| [**openhands**](/docs/user-guide/skills/optional/autonomous-ai-agents/autonomous-ai-agents-openhands) | Delegate coding to OpenHands CLI (model-agnostic, LiteLLM). |
 
 ## blockchain
 
 | Skill | Description |
 |-------|-------------|
-| [**evm**](/user-guide/skills/optional/blockchain/blockchain-evm) | Read-only EVM client: wallets, tokens, gas across 8 chains. |
-| [**hyperliquid**](/user-guide/skills/optional/blockchain/blockchain-hyperliquid) | Hyperliquid market data, account history, trade review. |
-| [**solana**](/user-guide/skills/optional/blockchain/blockchain-solana) | Query Solana blockchain data with USD pricing — wallet balances, token portfolios with values, transaction details, NFTs, whale detection, and live network stats. Uses Solana RPC + CoinGecko. No API key required. |
+| [**evm**](/docs/user-guide/skills/optional/blockchain/blockchain-evm) | Read-only EVM client: wallets, tokens, gas across 8 chains. |
+| [**hyperliquid**](/docs/user-guide/skills/optional/blockchain/blockchain-hyperliquid) | Hyperliquid market data, account history, trade review. |
+| [**solana**](/docs/user-guide/skills/optional/blockchain/blockchain-solana) | Query Solana blockchain data with USD pricing — wallet balances, token portfolios with values, transaction details, NFTs, whale detection, and live network stats. Uses Solana RPC + CoinGecko. No API key required. |
 
 ## communication
 
 | Skill | Description |
 |-------|-------------|
-| [**one-three-one-rule**](/user-guide/skills/optional/communication/communication-one-three-one-rule) | Structured decision-making framework for technical proposals and trade-off analysis. When the user faces a choice between multiple approaches (architecture decisions, tool selection, refactoring strategies, migration paths), this skill p... |
+| [**one-three-one-rule**](/docs/user-guide/skills/optional/communication/communication-one-three-one-rule) | Structured decision-making framework for technical proposals and trade-off analysis. When the user faces a choice between multiple approaches (architecture decisions, tool selection, refactoring strategies, migration paths), this skill p... |
 
 ## creative
 
 | Skill | Description |
 |-------|-------------|
-| [**blender-mcp**](/user-guide/skills/optional/creative/creative-blender-mcp) | Control Blender directly from Hermes via socket connection to the blender-mcp addon. Create 3D objects, materials, animations, and run arbitrary Blender Python (bpy) code. Use when user wants to create or modify anything in Blender. |
-| [**concept-diagrams**](/user-guide/skills/optional/creative/creative-concept-diagrams) | Generate flat, minimal light/dark-aware SVG diagrams as standalone HTML files, using a unified educational visual language with 9 semantic color ramps, sentence-case typography, and automatic dark mode. Best suited for educational and no... |
-| [**hyperframes**](/user-guide/skills/optional/creative/creative-hyperframes) | Create HTML-based video compositions, animated title cards, social overlays, captioned talking-head videos, audio-reactive visuals, and shader transitions using HyperFrames. HTML is the source of truth for video. Use when the user wants... |
-| [**kanban-video-orchestrator**](/user-guide/skills/optional/creative/creative-kanban-video-orchestrator) | Plan, set up, and monitor a multi-agent video production pipeline backed by Hermes Kanban. Use when the user wants to make ANY video — narrative film, product/marketing, music video, explainer, ASCII/terminal art, abstract/generative loo... |
-| [**meme-generation**](/user-guide/skills/optional/creative/creative-meme-generation) | Generate real meme images by picking a template and overlaying text with Pillow. Produces actual .png meme files. |
+| [**blender-mcp**](/docs/user-guide/skills/optional/creative/creative-blender-mcp) | Control Blender directly from Hermes via socket connection to the blender-mcp addon. Create 3D objects, materials, animations, and run arbitrary Blender Python (bpy) code. Use when user wants to create or modify anything in Blender. |
+| [**concept-diagrams**](/docs/user-guide/skills/optional/creative/creative-concept-diagrams) | Generate flat, minimal light/dark-aware SVG diagrams as standalone HTML files, using a unified educational visual language with 9 semantic color ramps, sentence-case typography, and automatic dark mode. Best suited for educational and no... |
+| [**hyperframes**](/docs/user-guide/skills/optional/creative/creative-hyperframes) | Create HTML-based video compositions, animated title cards, social overlays, captioned talking-head videos, audio-reactive visuals, and shader transitions using HyperFrames. HTML is the source of truth for video. Use when the user wants... |
+| [**kanban-video-orchestrator**](/docs/user-guide/skills/optional/creative/creative-kanban-video-orchestrator) | Plan, set up, and monitor a multi-agent video production pipeline backed by Hermes Kanban. Use when the user wants to make ANY video — narrative film, product/marketing, music video, explainer, ASCII/terminal art, abstract/generative loo... |
+| [**meme-generation**](/docs/user-guide/skills/optional/creative/creative-meme-generation) | Generate real meme images by picking a template and overlaying text with Pillow. Produces actual .png meme files. |
 
 ## devops
 
 | Skill | Description |
 |-------|-------------|
-| [**inference-sh-cli**](/user-guide/skills/optional/devops/devops-cli) | Run 150+ AI apps via inference.sh CLI (infsh) — image generation, video creation, LLMs, search, 3D, social automation. Uses the terminal tool. Triggers: inference.sh, infsh, ai apps, flux, veo, image generation, video generation, seedrea... |
-| [**docker-management**](/user-guide/skills/optional/devops/devops-docker-management) | Manage Docker containers, images, volumes, networks, and Compose stacks — lifecycle ops, debugging, cleanup, and Dockerfile optimization. |
-| [**pinggy-tunnel**](/user-guide/skills/optional/devops/devops-pinggy-tunnel) | Zero-install localhost tunnels over SSH via Pinggy. |
-| [**watchers**](/user-guide/skills/optional/devops/devops-watchers) | Poll RSS, JSON APIs, and GitHub with watermark dedup. |
+| [**inference-sh-cli**](/docs/user-guide/skills/optional/devops/devops-cli) | Run 150+ AI apps via inference.sh CLI (infsh) — image generation, video creation, LLMs, search, 3D, social automation. Uses the terminal tool. Triggers: inference.sh, infsh, ai apps, flux, veo, image generation, video generation, seedrea... |
+| [**docker-management**](/docs/user-guide/skills/optional/devops/devops-docker-management) | Manage Docker containers, images, volumes, networks, and Compose stacks — lifecycle ops, debugging, cleanup, and Dockerfile optimization. |
+| [**pinggy-tunnel**](/docs/user-guide/skills/optional/devops/devops-pinggy-tunnel) | Zero-install localhost tunnels over SSH via Pinggy. |
+| [**watchers**](/docs/user-guide/skills/optional/devops/devops-watchers) | Poll RSS, JSON APIs, and GitHub with watermark dedup. |
 
 ## dogfood
 
 | Skill | Description |
 |-------|-------------|
-| [**adversarial-ux-test**](/user-guide/skills/optional/dogfood/dogfood-adversarial-ux-test) | Roleplay the most difficult, tech-resistant user for your product. Browse the app as that persona, find every UX pain point, then filter complaints through a pragmatism layer to separate real problems from noise. Creates actionable ticke... |
+| [**adversarial-ux-test**](/docs/user-guide/skills/optional/dogfood/dogfood-adversarial-ux-test) | Roleplay the most difficult, tech-resistant user for your product. Browse the app as that persona, find every UX pain point, then filter complaints through a pragmatism layer to separate real problems from noise. Creates actionable ticke... |
 
 ## email
 
 | Skill | Description |
 |-------|-------------|
-| [**agentmail**](/user-guide/skills/optional/email/email-agentmail) | Give the agent its own dedicated email inbox via AgentMail. Send, receive, and manage email autonomously using agent-owned email addresses (e.g. hermes-agent@agentmail.to). |
+| [**agentmail**](/docs/user-guide/skills/optional/email/email-agentmail) | Give the agent its own dedicated email inbox via AgentMail. Send, receive, and manage email autonomously using agent-owned email addresses (e.g. hermes-agent@agentmail.to). |
 
 ## finance
 
 | Skill | Description |
 |-------|-------------|
-| [**3-statement-model**](/user-guide/skills/optional/finance/finance-3-statement-model) | Build fully-integrated 3-statement models (IS, BS, CF) in Excel with working capital schedules, D&A roll-forwards, debt schedule, and the plugs that make cash and retained earnings tie. Pairs with excel-author. |
-| [**comps-analysis**](/user-guide/skills/optional/finance/finance-comps-analysis) | Build comparable company analysis in Excel — operating metrics, valuation multiples, statistical benchmarking vs peer sets. Pairs with excel-author. Use for public-company valuation, IPO pricing, sector benchmarking, or outlier detection. |
-| [**dcf-model**](/user-guide/skills/optional/finance/finance-dcf-model) | Build institutional-quality DCF valuation models in Excel — revenue projections, FCF build, WACC, terminal value, Bear/Base/Bull scenarios, 5x5 sensitivity tables. Pairs with excel-author. Use for intrinsic-value equity analysis. |
-| [**excel-author**](/user-guide/skills/optional/finance/finance-excel-author) | Build auditable Excel workbooks headless with openpyxl — blue/black/green cell conventions, formulas over hardcodes, named ranges, balance checks, sensitivity tables. Use for financial models, audit outputs, reconciliations. |
-| [**lbo-model**](/user-guide/skills/optional/finance/finance-lbo-model) | Build leveraged buyout models in Excel — sources & uses, debt schedule, cash sweep, exit multiple, IRR/MOIC sensitivity. Pairs with excel-author. Use for PE screening, sponsor-case valuation, or illustrative LBO in a pitch. |
-| [**merger-model**](/user-guide/skills/optional/finance/finance-merger-model) | Build accretion/dilution (merger) models in Excel — pro-forma P&L, synergies, financing mix, EPS impact. Pairs with excel-author. Use for M&A pitches, board materials, or deal evaluation. |
-| [**pptx-author**](/user-guide/skills/optional/finance/finance-pptx-author) | Build PowerPoint decks headless with python-pptx. Pairs with excel-author for model-backed decks where every number traces to a workbook cell. Use for pitch decks, IC memos, earnings notes. |
-| [**stocks**](/user-guide/skills/optional/finance/finance-stocks) | Stock quotes, history, search, compare, crypto via Yahoo. |
+| [**3-statement-model**](/docs/user-guide/skills/optional/finance/finance-3-statement-model) | Build fully-integrated 3-statement models (IS, BS, CF) in Excel with working capital schedules, D&A roll-forwards, debt schedule, and the plugs that make cash and retained earnings tie. Pairs with excel-author. |
+| [**comps-analysis**](/docs/user-guide/skills/optional/finance/finance-comps-analysis) | Build comparable company analysis in Excel — operating metrics, valuation multiples, statistical benchmarking vs peer sets. Pairs with excel-author. Use for public-company valuation, IPO pricing, sector benchmarking, or outlier detection. |
+| [**dcf-model**](/docs/user-guide/skills/optional/finance/finance-dcf-model) | Build institutional-quality DCF valuation models in Excel — revenue projections, FCF build, WACC, terminal value, Bear/Base/Bull scenarios, 5x5 sensitivity tables. Pairs with excel-author. Use for intrinsic-value equity analysis. |
+| [**excel-author**](/docs/user-guide/skills/optional/finance/finance-excel-author) | Build auditable Excel workbooks headless with openpyxl — blue/black/green cell conventions, formulas over hardcodes, named ranges, balance checks, sensitivity tables. Use for financial models, audit outputs, reconciliations. |
+| [**lbo-model**](/docs/user-guide/skills/optional/finance/finance-lbo-model) | Build leveraged buyout models in Excel — sources & uses, debt schedule, cash sweep, exit multiple, IRR/MOIC sensitivity. Pairs with excel-author. Use for PE screening, sponsor-case valuation, or illustrative LBO in a pitch. |
+| [**merger-model**](/docs/user-guide/skills/optional/finance/finance-merger-model) | Build accretion/dilution (merger) models in Excel — pro-forma P&L, synergies, financing mix, EPS impact. Pairs with excel-author. Use for M&A pitches, board materials, or deal evaluation. |
+| [**pptx-author**](/docs/user-guide/skills/optional/finance/finance-pptx-author) | Build PowerPoint decks headless with python-pptx. Pairs with excel-author for model-backed decks where every number traces to a workbook cell. Use for pitch decks, IC memos, earnings notes. |
+| [**stocks**](/docs/user-guide/skills/optional/finance/finance-stocks) | Stock quotes, history, search, compare, crypto via Yahoo. |
 
 ## health
 
 | Skill | Description |
 |-------|-------------|
-| [**fitness-nutrition**](/user-guide/skills/optional/health/health-fitness-nutrition) | Gym workout planner and nutrition tracker. Search 690+ exercises by muscle, equipment, or category via wger. Look up macros and calories for 380,000+ foods via USDA FoodData Central. Compute BMI, TDEE, one-rep max, macro splits, and body... |
-| [**neuroskill-bci**](/user-guide/skills/optional/health/health-neuroskill-bci) | Connect to a running NeuroSkill instance and incorporate the user's real-time cognitive and emotional state (focus, relaxation, mood, cognitive load, drowsiness, heart rate, HRV, sleep staging, and 40+ derived EXG scores) into responses.... |
+| [**fitness-nutrition**](/docs/user-guide/skills/optional/health/health-fitness-nutrition) | Gym workout planner and nutrition tracker. Search 690+ exercises by muscle, equipment, or category via wger. Look up macros and calories for 380,000+ foods via USDA FoodData Central. Compute BMI, TDEE, one-rep max, macro splits, and body... |
+| [**neuroskill-bci**](/docs/user-guide/skills/optional/health/health-neuroskill-bci) | Connect to a running NeuroSkill instance and incorporate the user's real-time cognitive and emotional state (focus, relaxation, mood, cognitive load, drowsiness, heart rate, HRV, sleep staging, and 40+ derived EXG scores) into responses.... |
 
 ## mcp
 
 | Skill | Description |
 |-------|-------------|
-| [**fastmcp**](/user-guide/skills/optional/mcp/mcp-fastmcp) | Build, test, inspect, install, and deploy MCP servers with FastMCP in Python. Use when creating a new MCP server, wrapping an API or database as MCP tools, exposing resources or prompts, or preparing a FastMCP server for Claude Code, Cur... |
-| [**mcporter**](/user-guide/skills/optional/mcp/mcp-mcporter) | Use the mcporter CLI to list, configure, auth, and call MCP servers/tools directly (HTTP or stdio), including ad-hoc servers, config edits, and CLI/type generation. |
+| [**fastmcp**](/docs/user-guide/skills/optional/mcp/mcp-fastmcp) | Build, test, inspect, install, and deploy MCP servers with FastMCP in Python. Use when creating a new MCP server, wrapping an API or database as MCP tools, exposing resources or prompts, or preparing a FastMCP server for Claude Code, Cur... |
+| [**mcporter**](/docs/user-guide/skills/optional/mcp/mcp-mcporter) | Use the mcporter CLI to list, configure, auth, and call MCP servers/tools directly (HTTP or stdio), including ad-hoc servers, config edits, and CLI/type generation. |
 
 ## migration
 
 | Skill | Description |
 |-------|-------------|
-| [**openclaw-migration**](/user-guide/skills/optional/migration/migration-openclaw-migration) | Migrate a user's OpenClaw customization footprint into Hermes Agent. Imports Hermes-compatible memories, SOUL.md, command allowlists, user skills, and selected workspace assets from ~/.openclaw, then reports exactly what could not be mig... |
+| [**openclaw-migration**](/docs/user-guide/skills/optional/migration/migration-openclaw-migration) | Migrate a user's OpenClaw customization footprint into Hermes Agent. Imports Hermes-compatible memories, SOUL.md, command allowlists, user skills, and selected workspace assets from ~/.openclaw, then reports exactly what could not be mig... |
 
 ## mlops
 
 | Skill | Description |
 |-------|-------------|
-| [**huggingface-accelerate**](/user-guide/skills/optional/mlops/mlops-accelerate) | Simplest distributed training API. 4 lines to add distributed support to any PyTorch script. Unified API for DeepSpeed/FSDP/Megatron/DDP. Automatic device placement, mixed precision (FP16/BF16/FP8). Interactive config, single launch comm... |
-| [**axolotl**](/user-guide/skills/optional/mlops/mlops-training-axolotl) | Axolotl: YAML LLM fine-tuning (LoRA, DPO, GRPO). |
-| [**chroma**](/user-guide/skills/optional/mlops/mlops-chroma) | Open-source embedding database for AI applications. Store embeddings and metadata, perform vector and full-text search, filter by metadata. Simple 4-function API. Scales from notebooks to production clusters. Use for semantic search, RAG... |
-| [**clip**](/user-guide/skills/optional/mlops/mlops-clip) | OpenAI's model connecting vision and language. Enables zero-shot image classification, image-text matching, and cross-modal retrieval. Trained on 400M image-text pairs. Use for image search, content moderation, or vision-language tasks w... |
-| [**faiss**](/user-guide/skills/optional/mlops/mlops-faiss) | Facebook's library for efficient similarity search and clustering of dense vectors. Supports billions of vectors, GPU acceleration, and various index types (Flat, IVF, HNSW). Use for fast k-NN search, large-scale vector retrieval, or whe... |
-| [**optimizing-attention-flash**](/user-guide/skills/optional/mlops/mlops-flash-attention) | Optimizes transformer attention with Flash Attention for 2-4x speedup and 10-20x memory reduction. Use when training/running transformers with long sequences (>512 tokens), encountering GPU memory issues with attention, or need faster in... |
-| [**guidance**](/user-guide/skills/optional/mlops/mlops-guidance) | Control LLM output with regex and grammars, guarantee valid JSON/XML/code generation, enforce structured formats, and build multi-step workflows with Guidance - Microsoft Research's constrained generation framework |
-| [**huggingface-tokenizers**](/user-guide/skills/optional/mlops/mlops-huggingface-tokenizers) | Fast tokenizers optimized for research and production. Rust-based implementation tokenizes 1GB in &lt;20 seconds. Supports BPE, WordPiece, and Unigram algorithms. Train custom vocabularies, track alignments, handle padding/truncation. Integ... |
-| [**instructor**](/user-guide/skills/optional/mlops/mlops-instructor) | Extract structured data from LLM responses with Pydantic validation, retry failed extractions automatically, parse complex JSON with type safety, and stream partial results with Instructor - battle-tested structured output library |
-| [**lambda-labs-gpu-cloud**](/user-guide/skills/optional/mlops/mlops-lambda-labs) | Reserved and on-demand GPU cloud instances for ML training and inference. Use when you need dedicated GPU instances with simple SSH access, persistent filesystems, or high-performance multi-node clusters for large-scale training. |
-| [**llava**](/user-guide/skills/optional/mlops/mlops-llava) | Large Language and Vision Assistant. Enables visual instruction tuning and image-based conversations. Combines CLIP vision encoder with Vicuna/LLaMA language models. Supports multi-turn image chat, visual question answering, and instruct... |
-| [**modal-serverless-gpu**](/user-guide/skills/optional/mlops/mlops-modal) | Serverless GPU cloud platform for running ML workloads. Use when you need on-demand GPU access without infrastructure management, deploying ML models as APIs, or running batch jobs with automatic scaling. |
-| [**nemo-curator**](/user-guide/skills/optional/mlops/mlops-nemo-curator) | GPU-accelerated data curation for LLM training. Supports text/image/video/audio. Features fuzzy deduplication (16× faster), quality filtering (30+ heuristics), semantic deduplication, PII redaction, NSFW detection. Scales across GPUs wit... |
-| [**outlines**](/user-guide/skills/optional/mlops/mlops-inference-outlines) | Outlines: structured JSON/regex/Pydantic LLM generation. |
-| [**peft-fine-tuning**](/user-guide/skills/optional/mlops/mlops-peft) | Parameter-efficient fine-tuning for LLMs using LoRA, QLoRA, and 25+ methods. Use when fine-tuning large models (7B-70B) with limited GPU memory, when you need to train &lt;1% of parameters with minimal accuracy loss, or for multi-adapter se... |
-| [**pinecone**](/user-guide/skills/optional/mlops/mlops-pinecone) | Managed vector database for production AI applications. Fully managed, auto-scaling, with hybrid search (dense + sparse), metadata filtering, and namespaces. Low latency (&lt;100ms p95). Use for production RAG, recommendation systems, or se... |
-| [**pytorch-fsdp**](/user-guide/skills/optional/mlops/mlops-pytorch-fsdp) | Expert guidance for Fully Sharded Data Parallel training with PyTorch FSDP - parameter sharding, mixed precision, CPU offloading, FSDP2 |
-| [**pytorch-lightning**](/user-guide/skills/optional/mlops/mlops-pytorch-lightning) | High-level PyTorch framework with Trainer class, automatic distributed training (DDP/FSDP/DeepSpeed), callbacks system, and minimal boilerplate. Scales from laptop to supercomputer with same code. Use when you want clean training loops w... |
-| [**qdrant-vector-search**](/user-guide/skills/optional/mlops/mlops-qdrant) | High-performance vector similarity search engine for RAG and semantic search. Use when building production RAG systems requiring fast nearest neighbor search, hybrid search with filtering, or scalable vector storage with Rust-powered per... |
-| [**sparse-autoencoder-training**](/user-guide/skills/optional/mlops/mlops-saelens) | Provides guidance for training and analyzing Sparse Autoencoders (SAEs) using SAELens to decompose neural network activations into interpretable features. Use when discovering interpretable features, analyzing superposition, or studying... |
-| [**simpo-training**](/user-guide/skills/optional/mlops/mlops-simpo) | Simple Preference Optimization for LLM alignment. Reference-free alternative to DPO with better performance (+6.4 points on AlpacaEval 2.0). No reference model needed, more efficient than DPO. Use for preference alignment when want simpl... |
-| [**slime-rl-training**](/user-guide/skills/optional/mlops/mlops-slime) | Provides guidance for LLM post-training with RL using slime, a Megatron+SGLang framework. Use when training GLM models, implementing custom data generation workflows, or needing tight Megatron-LM integration for RL scaling. |
-| [**stable-diffusion-image-generation**](/user-guide/skills/optional/mlops/mlops-stable-diffusion) | State-of-the-art text-to-image generation with Stable Diffusion models via HuggingFace Diffusers. Use when generating images from text prompts, performing image-to-image translation, inpainting, or building custom diffusion pipelines. |
-| [**tensorrt-llm**](/user-guide/skills/optional/mlops/mlops-tensorrt-llm) | Optimizes LLM inference with NVIDIA TensorRT for maximum throughput and lowest latency. Use for production deployment on NVIDIA GPUs (A100/H100), when you need 10-100x faster inference than PyTorch, or for serving models with quantizatio... |
-| [**distributed-llm-pretraining-torchtitan**](/user-guide/skills/optional/mlops/mlops-torchtitan) | Provides PyTorch-native distributed LLM pretraining using torchtitan with 4D parallelism (FSDP2, TP, PP, CP). Use when pretraining Llama 3.1, DeepSeek V3, or custom models at scale from 8 to 512+ GPUs with Float8, torch.compile, and dist... |
-| [**fine-tuning-with-trl**](/user-guide/skills/optional/mlops/mlops-training-trl-fine-tuning) | TRL: SFT, DPO, PPO, GRPO, reward modeling for LLM RLHF. |
-| [**unsloth**](/user-guide/skills/optional/mlops/mlops-training-unsloth) | Unsloth: 2-5x faster LoRA/QLoRA fine-tuning, less VRAM. |
-| [**whisper**](/user-guide/skills/optional/mlops/mlops-whisper) | OpenAI's general-purpose speech recognition model. Supports 99 languages, transcription, translation to English, and language identification. Six model sizes from tiny (39M params) to large (1550M params). Use for speech-to-text, podcast... |
+| [**huggingface-accelerate**](/docs/user-guide/skills/optional/mlops/mlops-accelerate) | Simplest distributed training API. 4 lines to add distributed support to any PyTorch script. Unified API for DeepSpeed/FSDP/Megatron/DDP. Automatic device placement, mixed precision (FP16/BF16/FP8). Interactive config, single launch comm... |
+| [**axolotl**](/docs/user-guide/skills/optional/mlops/mlops-training-axolotl) | Axolotl: YAML LLM fine-tuning (LoRA, DPO, GRPO). |
+| [**chroma**](/docs/user-guide/skills/optional/mlops/mlops-chroma) | Open-source embedding database for AI applications. Store embeddings and metadata, perform vector and full-text search, filter by metadata. Simple 4-function API. Scales from notebooks to production clusters. Use for semantic search, RAG... |
+| [**clip**](/docs/user-guide/skills/optional/mlops/mlops-clip) | OpenAI's model connecting vision and language. Enables zero-shot image classification, image-text matching, and cross-modal retrieval. Trained on 400M image-text pairs. Use for image search, content moderation, or vision-language tasks w... |
+| [**faiss**](/docs/user-guide/skills/optional/mlops/mlops-faiss) | Facebook's library for efficient similarity search and clustering of dense vectors. Supports billions of vectors, GPU acceleration, and various index types (Flat, IVF, HNSW). Use for fast k-NN search, large-scale vector retrieval, or whe... |
+| [**optimizing-attention-flash**](/docs/user-guide/skills/optional/mlops/mlops-flash-attention) | Optimizes transformer attention with Flash Attention for 2-4x speedup and 10-20x memory reduction. Use when training/running transformers with long sequences (>512 tokens), encountering GPU memory issues with attention, or need faster in... |
+| [**guidance**](/docs/user-guide/skills/optional/mlops/mlops-guidance) | Control LLM output with regex and grammars, guarantee valid JSON/XML/code generation, enforce structured formats, and build multi-step workflows with Guidance - Microsoft Research's constrained generation framework |
+| [**huggingface-tokenizers**](/docs/user-guide/skills/optional/mlops/mlops-huggingface-tokenizers) | Fast tokenizers optimized for research and production. Rust-based implementation tokenizes 1GB in &lt;20 seconds. Supports BPE, WordPiece, and Unigram algorithms. Train custom vocabularies, track alignments, handle padding/truncation. Integ... |
+| [**instructor**](/docs/user-guide/skills/optional/mlops/mlops-instructor) | Extract structured data from LLM responses with Pydantic validation, retry failed extractions automatically, parse complex JSON with type safety, and stream partial results with Instructor - battle-tested structured output library |
+| [**lambda-labs-gpu-cloud**](/docs/user-guide/skills/optional/mlops/mlops-lambda-labs) | Reserved and on-demand GPU cloud instances for ML training and inference. Use when you need dedicated GPU instances with simple SSH access, persistent filesystems, or high-performance multi-node clusters for large-scale training. |
+| [**llava**](/docs/user-guide/skills/optional/mlops/mlops-llava) | Large Language and Vision Assistant. Enables visual instruction tuning and image-based conversations. Combines CLIP vision encoder with Vicuna/LLaMA language models. Supports multi-turn image chat, visual question answering, and instruct... |
+| [**modal-serverless-gpu**](/docs/user-guide/skills/optional/mlops/mlops-modal) | Serverless GPU cloud platform for running ML workloads. Use when you need on-demand GPU access without infrastructure management, deploying ML models as APIs, or running batch jobs with automatic scaling. |
+| [**nemo-curator**](/docs/user-guide/skills/optional/mlops/mlops-nemo-curator) | GPU-accelerated data curation for LLM training. Supports text/image/video/audio. Features fuzzy deduplication (16× faster), quality filtering (30+ heuristics), semantic deduplication, PII redaction, NSFW detection. Scales across GPUs wit... |
+| [**outlines**](/docs/user-guide/skills/optional/mlops/mlops-inference-outlines) | Outlines: structured JSON/regex/Pydantic LLM generation. |
+| [**peft-fine-tuning**](/docs/user-guide/skills/optional/mlops/mlops-peft) | Parameter-efficient fine-tuning for LLMs using LoRA, QLoRA, and 25+ methods. Use when fine-tuning large models (7B-70B) with limited GPU memory, when you need to train &lt;1% of parameters with minimal accuracy loss, or for multi-adapter se... |
+| [**pinecone**](/docs/user-guide/skills/optional/mlops/mlops-pinecone) | Managed vector database for production AI applications. Fully managed, auto-scaling, with hybrid search (dense + sparse), metadata filtering, and namespaces. Low latency (&lt;100ms p95). Use for production RAG, recommendation systems, or se... |
+| [**pytorch-fsdp**](/docs/user-guide/skills/optional/mlops/mlops-pytorch-fsdp) | Expert guidance for Fully Sharded Data Parallel training with PyTorch FSDP - parameter sharding, mixed precision, CPU offloading, FSDP2 |
+| [**pytorch-lightning**](/docs/user-guide/skills/optional/mlops/mlops-pytorch-lightning) | High-level PyTorch framework with Trainer class, automatic distributed training (DDP/FSDP/DeepSpeed), callbacks system, and minimal boilerplate. Scales from laptop to supercomputer with same code. Use when you want clean training loops w... |
+| [**qdrant-vector-search**](/docs/user-guide/skills/optional/mlops/mlops-qdrant) | High-performance vector similarity search engine for RAG and semantic search. Use when building production RAG systems requiring fast nearest neighbor search, hybrid search with filtering, or scalable vector storage with Rust-powered per... |
+| [**sparse-autoencoder-training**](/docs/user-guide/skills/optional/mlops/mlops-saelens) | Provides guidance for training and analyzing Sparse Autoencoders (SAEs) using SAELens to decompose neural network activations into interpretable features. Use when discovering interpretable features, analyzing superposition, or studying... |
+| [**simpo-training**](/docs/user-guide/skills/optional/mlops/mlops-simpo) | Simple Preference Optimization for LLM alignment. Reference-free alternative to DPO with better performance (+6.4 points on AlpacaEval 2.0). No reference model needed, more efficient than DPO. Use for preference alignment when want simpl... |
+| [**slime-rl-training**](/docs/user-guide/skills/optional/mlops/mlops-slime) | Provides guidance for LLM post-training with RL using slime, a Megatron+SGLang framework. Use when training GLM models, implementing custom data generation workflows, or needing tight Megatron-LM integration for RL scaling. |
+| [**stable-diffusion-image-generation**](/docs/user-guide/skills/optional/mlops/mlops-stable-diffusion) | State-of-the-art text-to-image generation with Stable Diffusion models via HuggingFace Diffusers. Use when generating images from text prompts, performing image-to-image translation, inpainting, or building custom diffusion pipelines. |
+| [**tensorrt-llm**](/docs/user-guide/skills/optional/mlops/mlops-tensorrt-llm) | Optimizes LLM inference with NVIDIA TensorRT for maximum throughput and lowest latency. Use for production deployment on NVIDIA GPUs (A100/H100), when you need 10-100x faster inference than PyTorch, or for serving models with quantizatio... |
+| [**distributed-llm-pretraining-torchtitan**](/docs/user-guide/skills/optional/mlops/mlops-torchtitan) | Provides PyTorch-native distributed LLM pretraining using torchtitan with 4D parallelism (FSDP2, TP, PP, CP). Use when pretraining Llama 3.1, DeepSeek V3, or custom models at scale from 8 to 512+ GPUs with Float8, torch.compile, and dist... |
+| [**fine-tuning-with-trl**](/docs/user-guide/skills/optional/mlops/mlops-training-trl-fine-tuning) | TRL: SFT, DPO, PPO, GRPO, reward modeling for LLM RLHF. |
+| [**unsloth**](/docs/user-guide/skills/optional/mlops/mlops-training-unsloth) | Unsloth: 2-5x faster LoRA/QLoRA fine-tuning, less VRAM. |
+| [**whisper**](/docs/user-guide/skills/optional/mlops/mlops-whisper) | OpenAI's general-purpose speech recognition model. Supports 99 languages, transcription, translation to English, and language identification. Six model sizes from tiny (39M params) to large (1550M params). Use for speech-to-text, podcast... |
 
 ## productivity
 
 | Skill | Description |
 |-------|-------------|
-| [**canvas**](/user-guide/skills/optional/productivity/productivity-canvas) | Canvas LMS integration — fetch enrolled courses and assignments using API token authentication. |
-| [**here.now**](/user-guide/skills/optional/productivity/productivity-here-now) | Publish static sites to &#123;slug&#125;.here.now and store private files in cloud Drives for agent-to-agent handoff. |
-| [**memento-flashcards**](/user-guide/skills/optional/productivity/productivity-memento-flashcards) | Spaced-repetition flashcard system. Create cards from facts or text, chat with flashcards using free-text answers graded by the agent, generate quizzes from YouTube transcripts, review due cards with adaptive scheduling, and export/impor... |
-| [**shop-app**](/user-guide/skills/optional/productivity/productivity-shop-app) | Shop.app: product search, order tracking, returns, reorder. |
-| [**shopify**](/user-guide/skills/optional/productivity/productivity-shopify) | Shopify Admin & Storefront GraphQL APIs via curl. Products, orders, customers, inventory, metafields. |
-| [**siyuan**](/user-guide/skills/optional/productivity/productivity-siyuan) | SiYuan Note API for searching, reading, creating, and managing blocks and documents in a self-hosted knowledge base via curl. |
-| [**telephony**](/user-guide/skills/optional/productivity/productivity-telephony) | Give Hermes phone capabilities without core tool changes. Provision and persist a Twilio number, send and receive SMS/MMS, make direct calls, and place AI-driven outbound calls through Bland.ai or Vapi. |
+| [**canvas**](/docs/user-guide/skills/optional/productivity/productivity-canvas) | Canvas LMS integration — fetch enrolled courses and assignments using API token authentication. |
+| [**here.now**](/docs/user-guide/skills/optional/productivity/productivity-here-now) | Publish static sites to &#123;slug&#125;.here.now and store private files in cloud Drives for agent-to-agent handoff. |
+| [**memento-flashcards**](/docs/user-guide/skills/optional/productivity/productivity-memento-flashcards) | Spaced-repetition flashcard system. Create cards from facts or text, chat with flashcards using free-text answers graded by the agent, generate quizzes from YouTube transcripts, review due cards with adaptive scheduling, and export/impor... |
+| [**shop-app**](/docs/user-guide/skills/optional/productivity/productivity-shop-app) | Shop.app: product search, order tracking, returns, reorder. |
+| [**shopify**](/docs/user-guide/skills/optional/productivity/productivity-shopify) | Shopify Admin & Storefront GraphQL APIs via curl. Products, orders, customers, inventory, metafields. |
+| [**siyuan**](/docs/user-guide/skills/optional/productivity/productivity-siyuan) | SiYuan Note API for searching, reading, creating, and managing blocks and documents in a self-hosted knowledge base via curl. |
+| [**telephony**](/docs/user-guide/skills/optional/productivity/productivity-telephony) | Give Hermes phone capabilities without core tool changes. Provision and persist a Twilio number, send and receive SMS/MMS, make direct calls, and place AI-driven outbound calls through Bland.ai or Vapi. |
 
 ## research
 
 | Skill | Description |
 |-------|-------------|
-| [**bioinformatics**](/user-guide/skills/optional/research/research-bioinformatics) | Gateway to 400+ bioinformatics skills from bioSkills and ClawBio. Covers genomics, transcriptomics, single-cell, variant calling, pharmacogenomics, metagenomics, structural biology, and more. Fetches domain-specific reference material on... |
-| [**darwinian-evolver**](/user-guide/skills/optional/research/research-darwinian-evolver) | Evolve prompts/regex/SQL/code with Imbue's evolution loop. |
-| [**domain-intel**](/user-guide/skills/optional/research/research-domain-intel) | Passive domain reconnaissance using Python stdlib. Subdomain discovery, SSL certificate inspection, WHOIS lookups, DNS records, domain availability checks, and bulk multi-domain analysis. No API keys required. |
-| [**drug-discovery**](/user-guide/skills/optional/research/research-drug-discovery) | Pharmaceutical research assistant for drug discovery workflows. Search bioactive compounds on ChEMBL, calculate drug-likeness (Lipinski Ro5, QED, TPSA, synthetic accessibility), look up drug-drug interactions via OpenFDA, interpret ADMET... |
-| [**duckduckgo-search**](/user-guide/skills/optional/research/research-duckduckgo-search) | Free web search via DuckDuckGo — text, news, images, videos. No API key needed. Prefer the `ddgs` CLI when installed; use the Python DDGS library only after verifying that `ddgs` is available in the current runtime. |
-| [**gitnexus-explorer**](/user-guide/skills/optional/research/research-gitnexus-explorer) | Index a codebase with GitNexus and serve an interactive knowledge graph via web UI + Cloudflare tunnel. |
-| [**osint-investigation**](/user-guide/skills/optional/research/research-osint-investigation) | Public-records OSINT investigation framework — SEC EDGAR filings, USAspending contracts, Senate lobbying, OFAC sanctions, ICIJ offshore leaks, NYC property records (ACRIS), OpenCorporates registries, CourtListener court records, Wayback... |
-| [**parallel-cli**](/user-guide/skills/optional/research/research-parallel-cli) | Optional vendor skill for Parallel CLI — agent-native web search, extraction, deep research, enrichment, FindAll, and monitoring. Prefer JSON output and non-interactive flows. |
-| [**qmd**](/user-guide/skills/optional/research/research-qmd) | Search personal knowledge bases, notes, docs, and meeting transcripts locally using qmd — a hybrid retrieval engine with BM25, vector search, and LLM reranking. Supports CLI and MCP integration. |
-| [**scrapling**](/user-guide/skills/optional/research/research-scrapling) | Web scraping with Scrapling - HTTP fetching, stealth browser automation, Cloudflare bypass, and spider crawling via CLI and Python. |
-| [**searxng-search**](/user-guide/skills/optional/research/research-searxng-search) | Free meta-search via SearXNG — aggregates results from 70+ search engines. Self-hosted or use a public instance. No API key needed. Falls back automatically when the web search toolset is unavailable. |
+| [**bioinformatics**](/docs/user-guide/skills/optional/research/research-bioinformatics) | Gateway to 400+ bioinformatics skills from bioSkills and ClawBio. Covers genomics, transcriptomics, single-cell, variant calling, pharmacogenomics, metagenomics, structural biology, and more. Fetches domain-specific reference material on... |
+| [**darwinian-evolver**](/docs/user-guide/skills/optional/research/research-darwinian-evolver) | Evolve prompts/regex/SQL/code with Imbue's evolution loop. |
+| [**domain-intel**](/docs/user-guide/skills/optional/research/research-domain-intel) | Passive domain reconnaissance using Python stdlib. Subdomain discovery, SSL certificate inspection, WHOIS lookups, DNS records, domain availability checks, and bulk multi-domain analysis. No API keys required. |
+| [**drug-discovery**](/docs/user-guide/skills/optional/research/research-drug-discovery) | Pharmaceutical research assistant for drug discovery workflows. Search bioactive compounds on ChEMBL, calculate drug-likeness (Lipinski Ro5, QED, TPSA, synthetic accessibility), look up drug-drug interactions via OpenFDA, interpret ADMET... |
+| [**duckduckgo-search**](/docs/user-guide/skills/optional/research/research-duckduckgo-search) | Free web search via DuckDuckGo — text, news, images, videos. No API key needed. Prefer the `ddgs` CLI when installed; use the Python DDGS library only after verifying that `ddgs` is available in the current runtime. |
+| [**gitnexus-explorer**](/docs/user-guide/skills/optional/research/research-gitnexus-explorer) | Index a codebase with GitNexus and serve an interactive knowledge graph via web UI + Cloudflare tunnel. |
+| [**osint-investigation**](/docs/user-guide/skills/optional/research/research-osint-investigation) | Public-records OSINT investigation framework — SEC EDGAR filings, USAspending contracts, Senate lobbying, OFAC sanctions, ICIJ offshore leaks, NYC property records (ACRIS), OpenCorporates registries, CourtListener court records, Wayback... |
+| [**parallel-cli**](/docs/user-guide/skills/optional/research/research-parallel-cli) | Optional vendor skill for Parallel CLI — agent-native web search, extraction, deep research, enrichment, FindAll, and monitoring. Prefer JSON output and non-interactive flows. |
+| [**qmd**](/docs/user-guide/skills/optional/research/research-qmd) | Search personal knowledge bases, notes, docs, and meeting transcripts locally using qmd — a hybrid retrieval engine with BM25, vector search, and LLM reranking. Supports CLI and MCP integration. |
+| [**scrapling**](/docs/user-guide/skills/optional/research/research-scrapling) | Web scraping with Scrapling - HTTP fetching, stealth browser automation, Cloudflare bypass, and spider crawling via CLI and Python. |
+| [**searxng-search**](/docs/user-guide/skills/optional/research/research-searxng-search) | Free meta-search via SearXNG — aggregates results from 70+ search engines. Self-hosted or use a public instance. No API key needed. Falls back automatically when the web search toolset is unavailable. |
 
 ## security
 
 | Skill | Description |
 |-------|-------------|
-| [**1password**](/user-guide/skills/optional/security/security-1password) | Set up and use 1Password CLI (op). Use when installing the CLI, enabling desktop app integration, signing in, and reading/injecting secrets for commands. |
-| [**oss-forensics**](/user-guide/skills/optional/security/security-oss-forensics) | Supply chain investigation, evidence recovery, and forensic analysis for GitHub repositories. Covers deleted commit recovery, force-push detection, IOC extraction, multi-source evidence collection, hypothesis formation/validation, and st... |
-| [**sherlock**](/user-guide/skills/optional/security/security-sherlock) | OSINT username search across 400+ social networks. Hunt down social media accounts by username. |
+| [**1password**](/docs/user-guide/skills/optional/security/security-1password) | Set up and use 1Password CLI (op). Use when installing the CLI, enabling desktop app integration, signing in, and reading/injecting secrets for commands. |
+| [**oss-forensics**](/docs/user-guide/skills/optional/security/security-oss-forensics) | Supply chain investigation, evidence recovery, and forensic analysis for GitHub repositories. Covers deleted commit recovery, force-push detection, IOC extraction, multi-source evidence collection, hypothesis formation/validation, and st... |
+| [**sherlock**](/docs/user-guide/skills/optional/security/security-sherlock) | OSINT username search across 400+ social networks. Hunt down social media accounts by username. |
+| [**web-pentest**](/docs/user-guide/skills/optional/security/security-web-pentest) | Authorized web application penetration testing — reconnaissance, vulnerability analysis, proof-based exploitation, and professional reporting. Adapts Shannon's "No Exploit, No Report" methodology with hard guardrails for scope, authoriza... |
 
 ## software-development
 
 | Skill | Description |
 |-------|-------------|
-| [**code-wiki**](/user-guide/skills/optional/software-development/software-development-code-wiki) | Generate wiki docs + Mermaid diagrams for any codebase. |
-| [**rest-graphql-debug**](/user-guide/skills/optional/software-development/software-development-rest-graphql-debug) | Debug REST/GraphQL APIs: status codes, auth, schemas, repro. |
+| [**code-wiki**](/docs/user-guide/skills/optional/software-development/software-development-code-wiki) | Generate wiki docs + Mermaid diagrams for any codebase. |
+| [**rest-graphql-debug**](/docs/user-guide/skills/optional/software-development/software-development-rest-graphql-debug) | Debug REST/GraphQL APIs: status codes, auth, schemas, repro. |
 
 ## web-development
 
 | Skill | Description |
 |-------|-------------|
-| [**page-agent**](/user-guide/skills/optional/web-development/web-development-page-agent) | Embed alibaba/page-agent into your own web application — a pure-JavaScript in-page GUI agent that ships as a single &lt;script> tag or npm package and lets end-users of your site drive the UI with natural language ("click login, fill userna... |
+| [**page-agent**](/docs/user-guide/skills/optional/web-development/web-development-page-agent) | Embed alibaba/page-agent into your own web application — a pure-JavaScript in-page GUI agent that ships as a single &lt;script> tag or npm package and lets end-users of your site drive the UI with natural language ("click login, fill userna... |
 
 ---
 
diff --git a/website/docs/reference/skills-catalog.md b/website/docs/reference/skills-catalog.md
index 26d2a3d3a4b..5382a4b3537 100644
--- a/website/docs/reference/skills-catalog.md
+++ b/website/docs/reference/skills-catalog.md
@@ -16,186 +16,188 @@ If a skill is missing from this list but present in the repo, the catalog is reg
 
 | Skill | Description | Path |
 |-------|-------------|------|
-| [`apple-notes`](/user-guide/skills/bundled/apple/apple-apple-notes) | Manage Apple Notes via memo CLI: create, search, edit. | `apple/apple-notes` |
-| [`apple-reminders`](/user-guide/skills/bundled/apple/apple-apple-reminders) | Apple Reminders via remindctl: add, list, complete. | `apple/apple-reminders` |
-| [`findmy`](/user-guide/skills/bundled/apple/apple-findmy) | Track Apple devices/AirTags via FindMy.app on macOS. | `apple/findmy` |
-| [`imessage`](/user-guide/skills/bundled/apple/apple-imessage) | Send and receive iMessages/SMS via the imsg CLI on macOS. | `apple/imessage` |
-| [`macos-computer-use`](/user-guide/skills/bundled/apple/apple-macos-computer-use) | Drive the macOS desktop in the background — screenshots, mouse, keyboard, scroll, drag — without stealing the user's cursor, keyboard focus, or Space. Works with any tool-capable model. Load this skill whenever the `computer_use` tool is... | `apple/macos-computer-use` |
+| [`apple-notes`](/docs/user-guide/skills/bundled/apple/apple-apple-notes) | Manage Apple Notes via memo CLI: create, search, edit. | `apple/apple-notes` |
+| [`apple-reminders`](/docs/user-guide/skills/bundled/apple/apple-apple-reminders) | Apple Reminders via remindctl: add, list, complete. | `apple/apple-reminders` |
+| [`findmy`](/docs/user-guide/skills/bundled/apple/apple-findmy) | Track Apple devices/AirTags via FindMy.app on macOS. | `apple/findmy` |
+| [`imessage`](/docs/user-guide/skills/bundled/apple/apple-imessage) | Send and receive iMessages/SMS via the imsg CLI on macOS. | `apple/imessage` |
+| [`macos-computer-use`](/docs/user-guide/skills/bundled/apple/apple-macos-computer-use) | Drive the macOS desktop in the background — screenshots, mouse, keyboard, scroll, drag — without stealing the user's cursor, keyboard focus, or Space. Works with any tool-capable model. Load this skill whenever the `computer_use` tool is... | `apple/macos-computer-use` |
 
 ## autonomous-ai-agents
 
 | Skill | Description | Path |
 |-------|-------------|------|
-| [`claude-code`](/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-claude-code) | Delegate coding to Claude Code CLI (features, PRs). | `autonomous-ai-agents/claude-code` |
-| [`codex`](/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-codex) | Delegate coding to OpenAI Codex CLI (features, PRs). | `autonomous-ai-agents/codex` |
-| [`hermes-agent`](/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-hermes-agent) | Configure, extend, or contribute to Hermes Agent. | `autonomous-ai-agents/hermes-agent` |
-| [`opencode`](/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-opencode) | Delegate coding to OpenCode CLI (features, PR review). | `autonomous-ai-agents/opencode` |
+| [`claude-code`](/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-claude-code) | Delegate coding to Claude Code CLI (features, PRs). | `autonomous-ai-agents/claude-code` |
+| [`codex`](/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-codex) | Delegate coding to OpenAI Codex CLI (features, PRs). | `autonomous-ai-agents/codex` |
+| [`hermes-agent`](/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-hermes-agent) | Configure, extend, or contribute to Hermes Agent. | `autonomous-ai-agents/hermes-agent` |
+| [`kanban-codex-lane`](/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-kanban-codex-lane) | Use when a Hermes Kanban worker wants to run Codex CLI as an isolated implementation lane while Hermes keeps ownership of task lifecycle, reconciliation, testing, and handoff. | `autonomous-ai-agents/kanban-codex-lane` |
+| [`opencode`](/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-opencode) | Delegate coding to OpenCode CLI (features, PR review). | `autonomous-ai-agents/opencode` |
 
 ## creative
 
 | Skill | Description | Path |
 |-------|-------------|------|
-| [`architecture-diagram`](/user-guide/skills/bundled/creative/creative-architecture-diagram) | Dark-themed SVG architecture/cloud/infra diagrams as HTML. | `creative/architecture-diagram` |
-| [`ascii-art`](/user-guide/skills/bundled/creative/creative-ascii-art) | ASCII art: pyfiglet, cowsay, boxes, image-to-ascii. | `creative/ascii-art` |
-| [`ascii-video`](/user-guide/skills/bundled/creative/creative-ascii-video) | ASCII video: convert video/audio to colored ASCII MP4/GIF. | `creative/ascii-video` |
-| [`baoyu-article-illustrator`](/user-guide/skills/bundled/creative/creative-baoyu-article-illustrator) | Article illustrations: type × style × palette consistency. | `creative/baoyu-article-illustrator` |
-| [`baoyu-comic`](/user-guide/skills/bundled/creative/creative-baoyu-comic) | Knowledge comics (知识漫画): educational, biography, tutorial. | `creative/baoyu-comic` |
-| [`baoyu-infographic`](/user-guide/skills/bundled/creative/creative-baoyu-infographic) | Infographics: 21 layouts x 21 styles (信息图, 可视化). | `creative/baoyu-infographic` |
-| [`claude-design`](/user-guide/skills/bundled/creative/creative-claude-design) | Design one-off HTML artifacts (landing, deck, prototype). | `creative/claude-design` |
-| [`comfyui`](/user-guide/skills/bundled/creative/creative-comfyui) | Generate images, video, and audio with ComfyUI — install, launch, manage nodes/models, run workflows with parameter injection. Uses the official comfy-cli for lifecycle and direct REST/WebSocket API for execution. | `creative/comfyui` |
-| [`ideation`](/user-guide/skills/bundled/creative/creative-creative-ideation) | Generate project ideas via creative constraints. | `creative/creative-ideation` |
-| [`design-md`](/user-guide/skills/bundled/creative/creative-design-md) | Author/validate/export Google's DESIGN.md token spec files. | `creative/design-md` |
-| [`excalidraw`](/user-guide/skills/bundled/creative/creative-excalidraw) | Hand-drawn Excalidraw JSON diagrams (arch, flow, seq). | `creative/excalidraw` |
-| [`humanizer`](/user-guide/skills/bundled/creative/creative-humanizer) | Humanize text: strip AI-isms and add real voice. | `creative/humanizer` |
-| [`manim-video`](/user-guide/skills/bundled/creative/creative-manim-video) | Manim CE animations: 3Blue1Brown math/algo videos. | `creative/manim-video` |
-| [`p5js`](/user-guide/skills/bundled/creative/creative-p5js) | p5.js sketches: gen art, shaders, interactive, 3D. | `creative/p5js` |
-| [`pixel-art`](/user-guide/skills/bundled/creative/creative-pixel-art) | Pixel art w/ era palettes (NES, Game Boy, PICO-8). | `creative/pixel-art` |
-| [`popular-web-designs`](/user-guide/skills/bundled/creative/creative-popular-web-designs) | 54 real design systems (Stripe, Linear, Vercel) as HTML/CSS. | `creative/popular-web-designs` |
-| [`pretext`](/user-guide/skills/bundled/creative/creative-pretext) | Use when building creative browser demos with @chenglou/pretext — DOM-free text layout for ASCII art, typographic flow around obstacles, text-as-geometry games, kinetic typography, and text-powered generative art. Produces single-file HT... | `creative/pretext` |
-| [`sketch`](/user-guide/skills/bundled/creative/creative-sketch) | Throwaway HTML mockups: 2-3 design variants to compare. | `creative/sketch` |
-| [`songwriting-and-ai-music`](/user-guide/skills/bundled/creative/creative-songwriting-and-ai-music) | Songwriting craft and Suno AI music prompts. | `creative/songwriting-and-ai-music` |
-| [`touchdesigner-mcp`](/user-guide/skills/bundled/creative/creative-touchdesigner-mcp) | Control a running TouchDesigner instance via twozero MCP — create operators, set parameters, wire connections, execute Python, build real-time visuals. 36 native tools. | `creative/touchdesigner-mcp` |
+| [`architecture-diagram`](/docs/user-guide/skills/bundled/creative/creative-architecture-diagram) | Dark-themed SVG architecture/cloud/infra diagrams as HTML. | `creative/architecture-diagram` |
+| [`ascii-art`](/docs/user-guide/skills/bundled/creative/creative-ascii-art) | ASCII art: pyfiglet, cowsay, boxes, image-to-ascii. | `creative/ascii-art` |
+| [`ascii-video`](/docs/user-guide/skills/bundled/creative/creative-ascii-video) | ASCII video: convert video/audio to colored ASCII MP4/GIF. | `creative/ascii-video` |
+| [`baoyu-article-illustrator`](/docs/user-guide/skills/bundled/creative/creative-baoyu-article-illustrator) | Article illustrations: type × style × palette consistency. | `creative/baoyu-article-illustrator` |
+| [`baoyu-comic`](/docs/user-guide/skills/bundled/creative/creative-baoyu-comic) | Knowledge comics (知识漫画): educational, biography, tutorial. | `creative/baoyu-comic` |
+| [`baoyu-infographic`](/docs/user-guide/skills/bundled/creative/creative-baoyu-infographic) | Infographics: 21 layouts x 21 styles (信息图, 可视化). | `creative/baoyu-infographic` |
+| [`claude-design`](/docs/user-guide/skills/bundled/creative/creative-claude-design) | Design one-off HTML artifacts (landing, deck, prototype). | `creative/claude-design` |
+| [`comfyui`](/docs/user-guide/skills/bundled/creative/creative-comfyui) | Generate images, video, and audio with ComfyUI — install, launch, manage nodes/models, run workflows with parameter injection. Uses the official comfy-cli for lifecycle and direct REST/WebSocket API for execution. | `creative/comfyui` |
+| [`ideation`](/docs/user-guide/skills/bundled/creative/creative-creative-ideation) | Generate project ideas via creative constraints. | `creative/creative-ideation` |
+| [`design-md`](/docs/user-guide/skills/bundled/creative/creative-design-md) | Author/validate/export Google's DESIGN.md token spec files. | `creative/design-md` |
+| [`excalidraw`](/docs/user-guide/skills/bundled/creative/creative-excalidraw) | Hand-drawn Excalidraw JSON diagrams (arch, flow, seq). | `creative/excalidraw` |
+| [`humanizer`](/docs/user-guide/skills/bundled/creative/creative-humanizer) | Humanize text: strip AI-isms and add real voice. | `creative/humanizer` |
+| [`manim-video`](/docs/user-guide/skills/bundled/creative/creative-manim-video) | Manim CE animations: 3Blue1Brown math/algo videos. | `creative/manim-video` |
+| [`p5js`](/docs/user-guide/skills/bundled/creative/creative-p5js) | p5.js sketches: gen art, shaders, interactive, 3D. | `creative/p5js` |
+| [`pixel-art`](/docs/user-guide/skills/bundled/creative/creative-pixel-art) | Pixel art w/ era palettes (NES, Game Boy, PICO-8). | `creative/pixel-art` |
+| [`popular-web-designs`](/docs/user-guide/skills/bundled/creative/creative-popular-web-designs) | 54 real design systems (Stripe, Linear, Vercel) as HTML/CSS. | `creative/popular-web-designs` |
+| [`pretext`](/docs/user-guide/skills/bundled/creative/creative-pretext) | Use when building creative browser demos with @chenglou/pretext — DOM-free text layout for ASCII art, typographic flow around obstacles, text-as-geometry games, kinetic typography, and text-powered generative art. Produces single-file HT... | `creative/pretext` |
+| [`sketch`](/docs/user-guide/skills/bundled/creative/creative-sketch) | Throwaway HTML mockups: 2-3 design variants to compare. | `creative/sketch` |
+| [`songwriting-and-ai-music`](/docs/user-guide/skills/bundled/creative/creative-songwriting-and-ai-music) | Songwriting craft and Suno AI music prompts. | `creative/songwriting-and-ai-music` |
+| [`touchdesigner-mcp`](/docs/user-guide/skills/bundled/creative/creative-touchdesigner-mcp) | Control a running TouchDesigner instance via twozero MCP — create operators, set parameters, wire connections, execute Python, build real-time visuals. 36 native tools. | `creative/touchdesigner-mcp` |
 
 ## data-science
 
 | Skill | Description | Path |
 |-------|-------------|------|
-| [`jupyter-live-kernel`](/user-guide/skills/bundled/data-science/data-science-jupyter-live-kernel) | Iterative Python via live Jupyter kernel (hamelnb). | `data-science/jupyter-live-kernel` |
+| [`jupyter-live-kernel`](/docs/user-guide/skills/bundled/data-science/data-science-jupyter-live-kernel) | Iterative Python via live Jupyter kernel (hamelnb). | `data-science/jupyter-live-kernel` |
 
 ## devops
 
 | Skill | Description | Path |
 |-------|-------------|------|
-| [`kanban-orchestrator`](/user-guide/skills/bundled/devops/devops-kanban-orchestrator) | Decomposition playbook + anti-temptation rules for an orchestrator profile routing work through Kanban. The "don't do the work yourself" rule and the basic lifecycle are auto-injected into every kanban worker's system prompt; this skill... | `devops/kanban-orchestrator` |
-| [`kanban-worker`](/user-guide/skills/bundled/devops/devops-kanban-worker) | Pitfalls, examples, and edge cases for Hermes Kanban workers. The lifecycle itself is auto-injected into every worker's system prompt as KANBAN_GUIDANCE (from agent/prompt_builder.py); this skill is what you load when you want deeper det... | `devops/kanban-worker` |
-| [`webhook-subscriptions`](/user-guide/skills/bundled/devops/devops-webhook-subscriptions) | Webhook subscriptions: event-driven agent runs. | `devops/webhook-subscriptions` |
+| [`kanban-orchestrator`](/docs/user-guide/skills/bundled/devops/devops-kanban-orchestrator) | Decomposition playbook + anti-temptation rules for an orchestrator profile routing work through Kanban. The "don't do the work yourself" rule and the basic lifecycle are auto-injected into every kanban worker's system prompt; this skill... | `devops/kanban-orchestrator` |
+| [`kanban-worker`](/docs/user-guide/skills/bundled/devops/devops-kanban-worker) | Pitfalls, examples, and edge cases for Hermes Kanban workers. The lifecycle itself is auto-injected into every worker's system prompt as KANBAN_GUIDANCE (from agent/prompt_builder.py); this skill is what you load when you want deeper det... | `devops/kanban-worker` |
+| [`webhook-subscriptions`](/docs/user-guide/skills/bundled/devops/devops-webhook-subscriptions) | Webhook subscriptions: event-driven agent runs. | `devops/webhook-subscriptions` |
 
 ## dogfood
 
 | Skill | Description | Path |
 |-------|-------------|------|
-| [`dogfood`](/user-guide/skills/bundled/dogfood/dogfood-dogfood) | Exploratory QA of web apps: find bugs, evidence, reports. | `dogfood` |
+| [`dogfood`](/docs/user-guide/skills/bundled/dogfood/dogfood-dogfood) | Exploratory QA of web apps: find bugs, evidence, reports. | `dogfood` |
 
 ## email
 
 | Skill | Description | Path |
 |-------|-------------|------|
-| [`himalaya`](/user-guide/skills/bundled/email/email-himalaya) | Himalaya CLI: IMAP/SMTP email from terminal. | `email/himalaya` |
+| [`himalaya`](/docs/user-guide/skills/bundled/email/email-himalaya) | Himalaya CLI: IMAP/SMTP email from terminal. | `email/himalaya` |
 
 ## gaming
 
 | Skill | Description | Path |
 |-------|-------------|------|
-| [`minecraft-modpack-server`](/user-guide/skills/bundled/gaming/gaming-minecraft-modpack-server) | Host modded Minecraft servers (CurseForge, Modrinth). | `gaming/minecraft-modpack-server` |
-| [`pokemon-player`](/user-guide/skills/bundled/gaming/gaming-pokemon-player) | Play Pokemon via headless emulator + RAM reads. | `gaming/pokemon-player` |
+| [`minecraft-modpack-server`](/docs/user-guide/skills/bundled/gaming/gaming-minecraft-modpack-server) | Host modded Minecraft servers (CurseForge, Modrinth). | `gaming/minecraft-modpack-server` |
+| [`pokemon-player`](/docs/user-guide/skills/bundled/gaming/gaming-pokemon-player) | Play Pokemon via headless emulator + RAM reads. | `gaming/pokemon-player` |
 
 ## github
 
 | Skill | Description | Path |
 |-------|-------------|------|
-| [`codebase-inspection`](/user-guide/skills/bundled/github/github-codebase-inspection) | Inspect codebases w/ pygount: LOC, languages, ratios. | `github/codebase-inspection` |
-| [`github-auth`](/user-guide/skills/bundled/github/github-github-auth) | GitHub auth setup: HTTPS tokens, SSH keys, gh CLI login. | `github/github-auth` |
-| [`github-code-review`](/user-guide/skills/bundled/github/github-github-code-review) | Review PRs: diffs, inline comments via gh or REST. | `github/github-code-review` |
-| [`github-issues`](/user-guide/skills/bundled/github/github-github-issues) | Create, triage, label, assign GitHub issues via gh or REST. | `github/github-issues` |
-| [`github-pr-workflow`](/user-guide/skills/bundled/github/github-github-pr-workflow) | GitHub PR lifecycle: branch, commit, open, CI, merge. | `github/github-pr-workflow` |
-| [`github-repo-management`](/user-guide/skills/bundled/github/github-github-repo-management) | Clone/create/fork repos; manage remotes, releases. | `github/github-repo-management` |
+| [`codebase-inspection`](/docs/user-guide/skills/bundled/github/github-codebase-inspection) | Inspect codebases w/ pygount: LOC, languages, ratios. | `github/codebase-inspection` |
+| [`github-auth`](/docs/user-guide/skills/bundled/github/github-github-auth) | GitHub auth setup: HTTPS tokens, SSH keys, gh CLI login. | `github/github-auth` |
+| [`github-code-review`](/docs/user-guide/skills/bundled/github/github-github-code-review) | Review PRs: diffs, inline comments via gh or REST. | `github/github-code-review` |
+| [`github-issues`](/docs/user-guide/skills/bundled/github/github-github-issues) | Create, triage, label, assign GitHub issues via gh or REST. | `github/github-issues` |
+| [`github-pr-workflow`](/docs/user-guide/skills/bundled/github/github-github-pr-workflow) | GitHub PR lifecycle: branch, commit, open, CI, merge. | `github/github-pr-workflow` |
+| [`github-repo-management`](/docs/user-guide/skills/bundled/github/github-github-repo-management) | Clone/create/fork repos; manage remotes, releases. | `github/github-repo-management` |
 
 ## mcp
 
 | Skill | Description | Path |
 |-------|-------------|------|
-| [`native-mcp`](/user-guide/skills/bundled/mcp/mcp-native-mcp) | MCP client: connect servers, register tools (stdio/HTTP). | `mcp/native-mcp` |
+| [`native-mcp`](/docs/user-guide/skills/bundled/mcp/mcp-native-mcp) | MCP client: connect servers, register tools (stdio/HTTP). | `mcp/native-mcp` |
 
 ## media
 
 | Skill | Description | Path |
 |-------|-------------|------|
-| [`gif-search`](/user-guide/skills/bundled/media/media-gif-search) | Search/download GIFs from Tenor via curl + jq. | `media/gif-search` |
-| [`heartmula`](/user-guide/skills/bundled/media/media-heartmula) | HeartMuLa: Suno-like song generation from lyrics + tags. | `media/heartmula` |
-| [`songsee`](/user-guide/skills/bundled/media/media-songsee) | Audio spectrograms/features (mel, chroma, MFCC) via CLI. | `media/songsee` |
-| [`spotify`](/user-guide/skills/bundled/media/media-spotify) | Spotify: play, search, queue, manage playlists and devices. | `media/spotify` |
-| [`youtube-content`](/user-guide/skills/bundled/media/media-youtube-content) | YouTube transcripts to summaries, threads, blogs. | `media/youtube-content` |
+| [`gif-search`](/docs/user-guide/skills/bundled/media/media-gif-search) | Search/download GIFs from Tenor via curl + jq. | `media/gif-search` |
+| [`heartmula`](/docs/user-guide/skills/bundled/media/media-heartmula) | HeartMuLa: Suno-like song generation from lyrics + tags. | `media/heartmula` |
+| [`songsee`](/docs/user-guide/skills/bundled/media/media-songsee) | Audio spectrograms/features (mel, chroma, MFCC) via CLI. | `media/songsee` |
+| [`spotify`](/docs/user-guide/skills/bundled/media/media-spotify) | Spotify: play, search, queue, manage playlists and devices. | `media/spotify` |
+| [`youtube-content`](/docs/user-guide/skills/bundled/media/media-youtube-content) | YouTube transcripts to summaries, threads, blogs. | `media/youtube-content` |
 
 ## mlops
 
 | Skill | Description | Path |
 |-------|-------------|------|
-| [`audiocraft-audio-generation`](/user-guide/skills/bundled/mlops/mlops-models-audiocraft) | AudioCraft: MusicGen text-to-music, AudioGen text-to-sound. | `mlops/models/audiocraft` |
-| [`dspy`](/user-guide/skills/bundled/mlops/mlops-research-dspy) | DSPy: declarative LM programs, auto-optimize prompts, RAG. | `mlops/research/dspy` |
-| [`huggingface-hub`](/user-guide/skills/bundled/mlops/mlops-huggingface-hub) | HuggingFace hf CLI: search/download/upload models, datasets. | `mlops/huggingface-hub` |
-| [`llama-cpp`](/user-guide/skills/bundled/mlops/mlops-inference-llama-cpp) | llama.cpp local GGUF inference + HF Hub model discovery. | `mlops/inference/llama-cpp` |
-| [`evaluating-llms-harness`](/user-guide/skills/bundled/mlops/mlops-evaluation-lm-evaluation-harness) | lm-eval-harness: benchmark LLMs (MMLU, GSM8K, etc.). | `mlops/evaluation/lm-evaluation-harness` |
-| [`obliteratus`](/user-guide/skills/bundled/mlops/mlops-inference-obliteratus) | OBLITERATUS: abliterate LLM refusals (diff-in-means). | `mlops/inference/obliteratus` |
-| [`segment-anything-model`](/user-guide/skills/bundled/mlops/mlops-models-segment-anything) | SAM: zero-shot image segmentation via points, boxes, masks. | `mlops/models/segment-anything` |
-| [`serving-llms-vllm`](/user-guide/skills/bundled/mlops/mlops-inference-vllm) | vLLM: high-throughput LLM serving, OpenAI API, quantization. | `mlops/inference/vllm` |
-| [`weights-and-biases`](/user-guide/skills/bundled/mlops/mlops-evaluation-weights-and-biases) | W&B: log ML experiments, sweeps, model registry, dashboards. | `mlops/evaluation/weights-and-biases` |
+| [`audiocraft-audio-generation`](/docs/user-guide/skills/bundled/mlops/mlops-models-audiocraft) | AudioCraft: MusicGen text-to-music, AudioGen text-to-sound. | `mlops/models/audiocraft` |
+| [`dspy`](/docs/user-guide/skills/bundled/mlops/mlops-research-dspy) | DSPy: declarative LM programs, auto-optimize prompts, RAG. | `mlops/research/dspy` |
+| [`huggingface-hub`](/docs/user-guide/skills/bundled/mlops/mlops-huggingface-hub) | HuggingFace hf CLI: search/download/upload models, datasets. | `mlops/huggingface-hub` |
+| [`llama-cpp`](/docs/user-guide/skills/bundled/mlops/mlops-inference-llama-cpp) | llama.cpp local GGUF inference + HF Hub model discovery. | `mlops/inference/llama-cpp` |
+| [`evaluating-llms-harness`](/docs/user-guide/skills/bundled/mlops/mlops-evaluation-lm-evaluation-harness) | lm-eval-harness: benchmark LLMs (MMLU, GSM8K, etc.). | `mlops/evaluation/lm-evaluation-harness` |
+| [`obliteratus`](/docs/user-guide/skills/bundled/mlops/mlops-inference-obliteratus) | OBLITERATUS: abliterate LLM refusals (diff-in-means). | `mlops/inference/obliteratus` |
+| [`segment-anything-model`](/docs/user-guide/skills/bundled/mlops/mlops-models-segment-anything) | SAM: zero-shot image segmentation via points, boxes, masks. | `mlops/models/segment-anything` |
+| [`serving-llms-vllm`](/docs/user-guide/skills/bundled/mlops/mlops-inference-vllm) | vLLM: high-throughput LLM serving, OpenAI API, quantization. | `mlops/inference/vllm` |
+| [`weights-and-biases`](/docs/user-guide/skills/bundled/mlops/mlops-evaluation-weights-and-biases) | W&B: log ML experiments, sweeps, model registry, dashboards. | `mlops/evaluation/weights-and-biases` |
 
 ## note-taking
 
 | Skill | Description | Path |
 |-------|-------------|------|
-| [`obsidian`](/user-guide/skills/bundled/note-taking/note-taking-obsidian) | Read, search, create, and edit notes in the Obsidian vault. | `note-taking/obsidian` |
+| [`obsidian`](/docs/user-guide/skills/bundled/note-taking/note-taking-obsidian) | Read, search, create, and edit notes in the Obsidian vault. | `note-taking/obsidian` |
 
 ## productivity
 
 | Skill | Description | Path |
 |-------|-------------|------|
-| [`airtable`](/user-guide/skills/bundled/productivity/productivity-airtable) | Airtable REST API via curl. Records CRUD, filters, upserts. | `productivity/airtable` |
-| [`google-workspace`](/user-guide/skills/bundled/productivity/productivity-google-workspace) | Gmail, Calendar, Drive, Docs, Sheets via gws CLI or Python. | `productivity/google-workspace` |
-| [`linear`](/user-guide/skills/bundled/productivity/productivity-linear) | Linear: manage issues, projects, teams via GraphQL + curl. | `productivity/linear` |
-| [`maps`](/user-guide/skills/bundled/productivity/productivity-maps) | Geocode, POIs, routes, timezones via OpenStreetMap/OSRM. | `productivity/maps` |
-| [`nano-pdf`](/user-guide/skills/bundled/productivity/productivity-nano-pdf) | Edit PDF text/typos/titles via nano-pdf CLI (NL prompts). | `productivity/nano-pdf` |
-| [`notion`](/user-guide/skills/bundled/productivity/productivity-notion) | Notion API + ntn CLI: pages, databases, markdown, Workers. | `productivity/notion` |
-| [`ocr-and-documents`](/user-guide/skills/bundled/productivity/productivity-ocr-and-documents) | Extract text from PDFs/scans (pymupdf, marker-pdf). | `productivity/ocr-and-documents` |
-| [`powerpoint`](/user-guide/skills/bundled/productivity/productivity-powerpoint) | Create, read, edit .pptx decks, slides, notes, templates. | `productivity/powerpoint` |
-| [`teams-meeting-pipeline`](/user-guide/skills/bundled/productivity/productivity-teams-meeting-pipeline) | Operate the Teams meeting summary pipeline via Hermes CLI — summarize meetings, inspect pipeline status, replay jobs, manage Microsoft Graph subscriptions. | `productivity/teams-meeting-pipeline` |
+| [`airtable`](/docs/user-guide/skills/bundled/productivity/productivity-airtable) | Airtable REST API via curl. Records CRUD, filters, upserts. | `productivity/airtable` |
+| [`google-workspace`](/docs/user-guide/skills/bundled/productivity/productivity-google-workspace) | Gmail, Calendar, Drive, Docs, Sheets via gws CLI or Python. | `productivity/google-workspace` |
+| [`linear`](/docs/user-guide/skills/bundled/productivity/productivity-linear) | Linear: manage issues, projects, teams via GraphQL + curl. | `productivity/linear` |
+| [`maps`](/docs/user-guide/skills/bundled/productivity/productivity-maps) | Geocode, POIs, routes, timezones via OpenStreetMap/OSRM. | `productivity/maps` |
+| [`nano-pdf`](/docs/user-guide/skills/bundled/productivity/productivity-nano-pdf) | Edit PDF text/typos/titles via nano-pdf CLI (NL prompts). | `productivity/nano-pdf` |
+| [`notion`](/docs/user-guide/skills/bundled/productivity/productivity-notion) | Notion API + ntn CLI: pages, databases, markdown, Workers. | `productivity/notion` |
+| [`ocr-and-documents`](/docs/user-guide/skills/bundled/productivity/productivity-ocr-and-documents) | Extract text from PDFs/scans (pymupdf, marker-pdf). | `productivity/ocr-and-documents` |
+| [`powerpoint`](/docs/user-guide/skills/bundled/productivity/productivity-powerpoint) | Create, read, edit .pptx decks, slides, notes, templates. | `productivity/powerpoint` |
+| [`teams-meeting-pipeline`](/docs/user-guide/skills/bundled/productivity/productivity-teams-meeting-pipeline) | Operate the Teams meeting summary pipeline via Hermes CLI — summarize meetings, inspect pipeline status, replay jobs, manage Microsoft Graph subscriptions. | `productivity/teams-meeting-pipeline` |
 
 ## red-teaming
 
 | Skill | Description | Path |
 |-------|-------------|------|
-| [`godmode`](/user-guide/skills/bundled/red-teaming/red-teaming-godmode) | Jailbreak LLMs: Parseltongue, GODMODE, ULTRAPLINIAN. | `red-teaming/godmode` |
+| [`godmode`](/docs/user-guide/skills/bundled/red-teaming/red-teaming-godmode) | Jailbreak LLMs: Parseltongue, GODMODE, ULTRAPLINIAN. | `red-teaming/godmode` |
 
 ## research
 
 | Skill | Description | Path |
 |-------|-------------|------|
-| [`arxiv`](/user-guide/skills/bundled/research/research-arxiv) | Search arXiv papers by keyword, author, category, or ID. | `research/arxiv` |
-| [`blogwatcher`](/user-guide/skills/bundled/research/research-blogwatcher) | Monitor blogs and RSS/Atom feeds via blogwatcher-cli tool. | `research/blogwatcher` |
-| [`llm-wiki`](/user-guide/skills/bundled/research/research-llm-wiki) | Karpathy's LLM Wiki: build/query interlinked markdown KB. | `research/llm-wiki` |
-| [`polymarket`](/user-guide/skills/bundled/research/research-polymarket) | Query Polymarket: markets, prices, orderbooks, history. | `research/polymarket` |
-| [`research-paper-writing`](/user-guide/skills/bundled/research/research-research-paper-writing) | Write ML papers for NeurIPS/ICML/ICLR: design→submit. | `research/research-paper-writing` |
+| [`arxiv`](/docs/user-guide/skills/bundled/research/research-arxiv) | Search arXiv papers by keyword, author, category, or ID. | `research/arxiv` |
+| [`blogwatcher`](/docs/user-guide/skills/bundled/research/research-blogwatcher) | Monitor blogs and RSS/Atom feeds via blogwatcher-cli tool. | `research/blogwatcher` |
+| [`llm-wiki`](/docs/user-guide/skills/bundled/research/research-llm-wiki) | Karpathy's LLM Wiki: build/query interlinked markdown KB. | `research/llm-wiki` |
+| [`polymarket`](/docs/user-guide/skills/bundled/research/research-polymarket) | Query Polymarket: markets, prices, orderbooks, history. | `research/polymarket` |
+| [`research-paper-writing`](/docs/user-guide/skills/bundled/research/research-research-paper-writing) | Write ML papers for NeurIPS/ICML/ICLR: design→submit. | `research/research-paper-writing` |
 
 ## smart-home
 
 | Skill | Description | Path |
 |-------|-------------|------|
-| [`openhue`](/user-guide/skills/bundled/smart-home/smart-home-openhue) | Control Philips Hue lights, scenes, rooms via OpenHue CLI. | `smart-home/openhue` |
+| [`openhue`](/docs/user-guide/skills/bundled/smart-home/smart-home-openhue) | Control Philips Hue lights, scenes, rooms via OpenHue CLI. | `smart-home/openhue` |
 
 ## social-media
 
 | Skill | Description | Path |
 |-------|-------------|------|
-| [`xurl`](/user-guide/skills/bundled/social-media/social-media-xurl) | X/Twitter via xurl CLI: post, search, DM, media, v2 API. | `social-media/xurl` |
+| [`xurl`](/docs/user-guide/skills/bundled/social-media/social-media-xurl) | X/Twitter via xurl CLI: post, search, DM, media, v2 API. | `social-media/xurl` |
 
 ## software-development
 
 | Skill | Description | Path |
 |-------|-------------|------|
-| [`debugging-hermes-tui-commands`](/user-guide/skills/bundled/software-development/software-development-debugging-hermes-tui-commands) | Debug Hermes TUI slash commands: Python, gateway, Ink UI. | `software-development/debugging-hermes-tui-commands` |
-| [`hermes-agent-skill-authoring`](/user-guide/skills/bundled/software-development/software-development-hermes-agent-skill-authoring) | Author in-repo SKILL.md: frontmatter, validator, structure. | `software-development/hermes-agent-skill-authoring` |
-| [`node-inspect-debugger`](/user-guide/skills/bundled/software-development/software-development-node-inspect-debugger) | Debug Node.js via --inspect + Chrome DevTools Protocol CLI. | `software-development/node-inspect-debugger` |
-| [`plan`](/user-guide/skills/bundled/software-development/software-development-plan) | Plan mode: write markdown plan to .hermes/plans/, no exec. | `software-development/plan` |
-| [`python-debugpy`](/user-guide/skills/bundled/software-development/software-development-python-debugpy) | Debug Python: pdb REPL + debugpy remote (DAP). | `software-development/python-debugpy` |
-| [`requesting-code-review`](/user-guide/skills/bundled/software-development/software-development-requesting-code-review) | Pre-commit review: security scan, quality gates, auto-fix. | `software-development/requesting-code-review` |
-| [`spike`](/user-guide/skills/bundled/software-development/software-development-spike) | Throwaway experiments to validate an idea before build. | `software-development/spike` |
-| [`subagent-driven-development`](/user-guide/skills/bundled/software-development/software-development-subagent-driven-development) | Execute plans via delegate_task subagents (2-stage review). | `software-development/subagent-driven-development` |
-| [`systematic-debugging`](/user-guide/skills/bundled/software-development/software-development-systematic-debugging) | 4-phase root cause debugging: understand bugs before fixing. | `software-development/systematic-debugging` |
-| [`test-driven-development`](/user-guide/skills/bundled/software-development/software-development-test-driven-development) | TDD: enforce RED-GREEN-REFACTOR, tests before code. | `software-development/test-driven-development` |
-| [`writing-plans`](/user-guide/skills/bundled/software-development/software-development-writing-plans) | Write implementation plans: bite-sized tasks, paths, code. | `software-development/writing-plans` |
+| [`debugging-hermes-tui-commands`](/docs/user-guide/skills/bundled/software-development/software-development-debugging-hermes-tui-commands) | Debug Hermes TUI slash commands: Python, gateway, Ink UI. | `software-development/debugging-hermes-tui-commands` |
+| [`hermes-agent-skill-authoring`](/docs/user-guide/skills/bundled/software-development/software-development-hermes-agent-skill-authoring) | Author in-repo SKILL.md: frontmatter, validator, structure. | `software-development/hermes-agent-skill-authoring` |
+| [`hermes-s6-container-supervision`](/docs/user-guide/skills/bundled/software-development/software-development-hermes-s6-container-supervision) | Modify, debug, or extend the s6-overlay supervision tree inside the Hermes Agent Docker image — adding new services, debugging profile gateways, understanding the Architecture B main-program pattern. | `software-development/hermes-s6-container-supervision` |
+| [`node-inspect-debugger`](/docs/user-guide/skills/bundled/software-development/software-development-node-inspect-debugger) | Debug Node.js via --inspect + Chrome DevTools Protocol CLI. | `software-development/node-inspect-debugger` |
+| [`plan`](/docs/user-guide/skills/bundled/software-development/software-development-plan) | Plan mode: write markdown plan to .hermes/plans/, no exec. | `software-development/plan` |
+| [`python-debugpy`](/docs/user-guide/skills/bundled/software-development/software-development-python-debugpy) | Debug Python: pdb REPL + debugpy remote (DAP). | `software-development/python-debugpy` |
+| [`requesting-code-review`](/docs/user-guide/skills/bundled/software-development/software-development-requesting-code-review) | Pre-commit review: security scan, quality gates, auto-fix. | `software-development/requesting-code-review` |
+| [`spike`](/docs/user-guide/skills/bundled/software-development/software-development-spike) | Throwaway experiments to validate an idea before build. | `software-development/spike` |
+| [`subagent-driven-development`](/docs/user-guide/skills/bundled/software-development/software-development-subagent-driven-development) | Execute plans via delegate_task subagents (2-stage review). | `software-development/subagent-driven-development` |
+| [`systematic-debugging`](/docs/user-guide/skills/bundled/software-development/software-development-systematic-debugging) | 4-phase root cause debugging: understand bugs before fixing. | `software-development/systematic-debugging` |
+| [`test-driven-development`](/docs/user-guide/skills/bundled/software-development/software-development-test-driven-development) | TDD: enforce RED-GREEN-REFACTOR, tests before code. | `software-development/test-driven-development` |
+| [`writing-plans`](/docs/user-guide/skills/bundled/software-development/software-development-writing-plans) | Write implementation plans: bite-sized tasks, paths, code. | `software-development/writing-plans` |
 
 ## yuanbao
 
 | Skill | Description | Path |
 |-------|-------------|------|
-| [`yuanbao`](/user-guide/skills/bundled/yuanbao/yuanbao-yuanbao) | Yuanbao (元宝) groups: @mention users, query info/members. | `yuanbao` |
+| [`yuanbao`](/docs/user-guide/skills/bundled/yuanbao/yuanbao-yuanbao) | Yuanbao (元宝) groups: @mention users, query info/members. | `yuanbao` |
diff --git a/website/docs/reference/slash-commands.md b/website/docs/reference/slash-commands.md
index 5d600048be3..d90e5227c5a 100644
--- a/website/docs/reference/slash-commands.md
+++ b/website/docs/reference/slash-commands.md
@@ -43,7 +43,7 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in
 | `/retry` | Retry the last message (resend to agent) |
 | `/undo` | Remove the last user/assistant exchange |
 | `/title` | Set a title for the current session (usage: /title My Session Name) |
-| `/compress [focus topic]` | Manually compress conversation context (flush memories + summarize). Optional focus topic narrows what the summary preserves. |
+| `/compress [here [N] \| focus topic]` | Manually compress conversation context (flush memories + summarize). `/compress here [N]` summarizes everything except the most recent N exchanges (default 2), kept verbatim — pick your own compression boundary. A focus topic narrows what a full summary preserves. |
 | `/rollback` | List or restore filesystem checkpoints (usage: /rollback [number]) |
 | `/snapshot [create\|restore <id>\|prune]` (alias: `/snap`) | Create or restore state snapshots of Hermes config/state. `create [label]` saves a snapshot, `restore <id>` reverts to it, `prune [N]` removes old snapshots, or list all with no args. |
 | `/stop` | Kill all running background processes |
@@ -87,6 +87,7 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in
 | `/toolsets` | List available toolsets |
 | `/browser [connect\|disconnect\|status]` | Manage a local Chromium-family CDP connection. `connect` attaches browser tools to a running Chrome, Brave, Chromium, or Edge instance (default: `http://127.0.0.1:9222`). `disconnect` detaches. `status` shows current connection. Auto-launches a supported Chromium-family browser if no debugger is detected. |
 | `/skills` | Search, install, inspect, or manage skills from online registries |
+| `/bundles` | List configured skill bundles — `/<name>` slash aliases that preload several skills at once. Configure under `bundles:` in `~/.hermes/config.yaml`. See [Skill Bundles](/user-guide/features/skills#skill-bundles). |
 | `/cron` | Manage scheduled tasks (list, add/create, edit, pause, resume, run, remove) |
 | `/curator` | Background skill maintenance — `status`, `run`, `pin`, `archive`. See [Curator](/user-guide/features/curator). |
 | `/kanban <action>` | Drive the multi-profile, multi-project collaboration board without leaving chat. Full `hermes kanban` surface is available: `/kanban list`, `/kanban show t_abc`, `/kanban create "title" --assignee X`, `/kanban comment t_abc "text"`, `/kanban unblock t_abc`, `/kanban dispatch`, etc. Multi-board support included: `/kanban boards list`, `/kanban boards create <slug>`, `/kanban boards switch <slug>`, `/kanban --board <slug> <action>`. See [Kanban slash command](/user-guide/features/kanban#kanban-slash-command). |
@@ -193,6 +194,7 @@ The messaging gateway supports the following built-in commands inside Telegram,
 
 | Command | Description |
 |---------|-------------|
+| `/start` | Platform-protocol command. Many chat platforms (Telegram, Discord, …) send `/start` automatically the first time a user opens a bot conversation. Hermes acknowledges the ping silently — no agent reply, no session burn — so first-contact handshakes don't waste a turn. You can also send it explicitly to confirm the gateway is reachable. |
 | `/new` | Start a new conversation. |
 | `/reset` | Reset conversation history. |
 | `/status` | Show session info, followed by a local **Session recap** block (recent turn counts, top tools used, files touched, latest prompt + reply). |
@@ -204,7 +206,7 @@ The messaging gateway supports the following built-in commands inside Telegram,
 | `/retry` | Retry the last message. |
 | `/undo` | Remove the last exchange. |
 | `/sethome` (alias: `/set-home`) | Mark the current chat as the platform home channel for deliveries. |
-| `/compress [focus topic]` | Manually compress conversation context. Optional focus topic narrows what the summary preserves. |
+| `/compress [here [N] \| focus topic]` | Manually compress conversation context. `/compress here [N]` keeps the most recent N exchanges (default 2) verbatim and summarizes the rest. A focus topic narrows what a full summary preserves. |
 | `/topic [off\|help\|session-id]` | **Telegram DM only.** Manage user-managed multi-session topic mode. `/topic` enables it or shows status; `/topic off` disables it and clears bindings; `/topic help` shows usage; `/topic <session-id>` inside a topic restores a previous session. See [Multi-session DM mode](/user-guide/messaging/telegram#multi-session-dm-mode-topic). |
 | `/title [name]` | Set or show the session title. |
 | `/resume [name]` | Resume a previously named session. |
diff --git a/website/docs/reference/tools-reference.md b/website/docs/reference/tools-reference.md
index ff1bea7d00b..bc0f62043f2 100644
--- a/website/docs/reference/tools-reference.md
+++ b/website/docs/reference/tools-reference.md
@@ -8,7 +8,7 @@ description: "Authoritative reference for Hermes built-in tools, grouped by tool
 
 This page documents Hermes' built-in tools, grouped by toolset. Availability varies by platform, credentials, and enabled toolsets.
 
-**Quick counts (current registry):** ~70 tools — 10 browser tools (core) + 2 CDP-gated browser tools, 4 file tools, 10 RL tools, 4 Home Assistant tools, 2 terminal tools, 2 web tools, 5 Feishu tools, 7 Spotify tools (registered by the bundled `spotify` plugin), 5 Yuanbao tools, 7 kanban tools (registered when the kanban dispatcher spawns the agent), 2 Discord tools, and a handful of standalone tools (`memory`, `clarify`, `delegate_task`, `execute_code`, `cronjob`, `session_search`, `skill_view`/`skill_manage`/`skills_list`, `text_to_speech`, `image_generate`, `vision_analyze`, `video_analyze`, `mixture_of_agents`, `send_message`, `todo`, `computer_use`, `process`).
+**Quick counts (current registry):** ~64 tools — 10 browser tools (core) + 2 CDP-gated browser tools, 4 file tools, 4 Home Assistant tools, 2 terminal tools, 2 web tools, 5 Feishu tools, 7 Spotify tools (registered by the bundled `spotify` plugin), 5 Yuanbao tools, 9 kanban tools (registered when the kanban dispatcher spawns the agent), 2 Discord tools, and a handful of standalone tools (`memory`, `clarify`, `delegate_task`, `execute_code`, `cronjob`, `session_search`, `skill_view`/`skill_manage`/`skills_list`, `text_to_speech`, `image_generate`, `video_generate`, `vision_analyze`, `video_analyze`, `mixture_of_agents`, `send_message`, `todo`, `computer_use`, `process`).
 
 :::tip MCP Tools
 In addition to built-in tools, Hermes can load tools dynamically from MCP servers. MCP tools appear with the prefix `mcp_<server>_` (e.g., `mcp_github_create_issue` for the `github` MCP server). See [MCP Integration](/user-guide/features/mcp) for configuration.
diff --git a/website/docs/user-guide/cli.md b/website/docs/user-guide/cli.md
index 2a768eb12e2..71d1c14af1e 100644
--- a/website/docs/user-guide/cli.md
+++ b/website/docs/user-guide/cli.md
@@ -8,6 +8,10 @@ description: "Master the Hermes Agent terminal interface — commands, keybindin
 
 Hermes Agent's CLI is a full terminal user interface (TUI) — not a web UI. It features multiline editing, slash-command autocomplete, conversation history, interrupt-and-redirect, and streaming tool output. Built for people who live in the terminal.
 
+:::tip First-time setup
+One command — `hermes setup --portal` — and you're ready to `hermes chat`. See [Nous Portal](/integrations/nous-portal).
+:::
+
 :::tip
 Hermes also ships a modern TUI with modal overlays, mouse selection, and non-blocking input. Launch it with `hermes --tui` — see the [TUI](tui.md) guide.
 :::
@@ -49,7 +53,7 @@ hermes -w -q "Fix issue #123"     # Single query in worktree
 
 ## Interface Layout
 
-<img className="docs-terminal-figure" src="/img/docs/cli-layout.svg" alt="Stylized preview of the Hermes CLI layout showing the banner, conversation area, and fixed input prompt." />
+<img className="docs-terminal-figure" src="/docs/img/docs/cli-layout.svg" alt="Stylized preview of the Hermes CLI layout showing the banner, conversation area, and fixed input prompt." />
 <p className="docs-figure-caption">The Hermes CLI banner, conversation stream, and fixed input prompt rendered as a stable docs figure instead of fragile text art.</p>
 
 The welcome banner shows your model, terminal backend, working directory, available tools, and installed skills at a glance.
diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md
index 39d8232f532..d74587432d5 100644
--- a/website/docs/user-guide/configuration.md
+++ b/website/docs/user-guide/configuration.md
@@ -8,6 +8,10 @@ description: "Configure Hermes Agent — config.yaml, providers, models, API key
 
 All settings are stored in the `~/.hermes/` directory for easy access.
 
+:::tip Easiest path to a working `config.yaml`
+Run `hermes setup --portal` — one OAuth gets you a model provider and all four Tool Gateway tools without hand-editing YAML. Portal subscribers also get 10% off token-billed providers. See [Nous Portal](/integrations/nous-portal).
+:::
+
 ## Directory Structure
 
 ```text
@@ -126,7 +130,7 @@ The agent has the same filesystem access as your user account. Use `hermes tools
 
 Runs commands inside a Docker container with security hardening (all capabilities dropped, no privilege escalation, PID limits).
 
-**Single persistent container, not per-command.** Hermes starts ONE long-lived container on first use and routes every terminal, file, and `execute_code` call through `docker exec` into that same container — across sessions, `/new`, `/reset`, and `delegate_task` subagents — for the lifetime of the Hermes process. Working-directory changes, installed packages, and files in `/workspace` carry over from one tool call to the next, just like a local shell. The container is stopped and removed on shutdown. See **Container lifecycle** below for details.
+**Single persistent container, shared across Hermes processes.** Hermes starts ONE long-lived container on first use and routes every terminal, file, and `execute_code` call through `docker exec` into that same container — across sessions, `/new`, `/reset`, and `delegate_task` subagents. Working-directory changes, installed packages, files in `/workspace`, and **background processes** all carry over from one tool call to the next, and from one Hermes process to the next. When you close a TUI session, run `/quit`, or start a new `hermes` invocation, the container keeps running and the next Hermes process reuses it via a labeled lookup. See **Container lifecycle** below for the exact teardown rules.
 
 ```yaml
 terminal:
@@ -134,8 +138,11 @@ terminal:
   docker_image: "nikolaik/python-nodejs:python3.11-nodejs20"
   docker_mount_cwd_to_workspace: false  # Mount launch dir into /workspace
   docker_run_as_host_user: false   # See "Running container as host user" below
-  docker_forward_env:              # Env vars to forward into container
+  docker_forward_env:              # Host env vars to forward into container
     - "GITHUB_TOKEN"
+  docker_env:                      # Literal env vars to inject (KEY=value)
+    DEBUG: "1"
+    PYTHONUNBUFFERED: "1"
   docker_volumes:                  # Host directory mounts
     - "/home/user/projects:/workspace/projects"
     - "/home/user/data:/data:ro"   # :ro for read-only
@@ -147,14 +154,49 @@ terminal:
   container_cpu: 1                 # CPU cores (0 = unlimited)
   container_memory: 5120           # MB (0 = unlimited)
   container_disk: 51200            # MB (requires overlay2 on XFS+pquota)
-  container_persistent: true       # Persist /workspace and /root across sessions
+  container_persistent: true       # Persist /workspace and /root bind-mount dirs
+
+  # Cross-process container reuse (defaults match the "one long-lived
+  # container shared across sessions" contract — see Container lifecycle).
+  docker_persist_across_processes: true   # Reuse container across Hermes restarts
+  docker_orphan_reaper: true              # Sweep abandoned Exited containers at startup
+
+  # Cross-backend lifecycle settings (apply to docker as well)
+  timeout: 180                     # Per-command timeout in seconds
+  lifetime_seconds: 300            # Idle-reaper window; also feeds 2× orphan-reaper threshold
 ```
 
+**`docker_env`** vs **`docker_forward_env`**: the former injects literal `KEY=value` pairs you specify in the config (the values live in your `config.yaml` or are passed as a JSON dict via `TERMINAL_DOCKER_ENV='{"DEBUG":"1"}'`). The latter forwards values from your shell or `~/.hermes/.env`, so the actual secret never appears in the config file. Use `docker_forward_env` for tokens and `docker_env` for static knobs the container needs.
+
 **`terminal.docker_extra_args`** (also overridable via `TERMINAL_DOCKER_EXTRA_ARGS='["--gpus=all"]'`) lets you pass arbitrary `docker run` flags that Hermes doesn't surface as first-class keys — `--gpus`, `--network`, `--add-host`, alternative `--security-opt` overrides, etc. Each entry must be a string; the list is appended last to the assembled `docker run` invocation so it can override Hermes' defaults if needed. Use sparingly — flags that conflict with the sandbox hardening (capability drops, `--user`, the workspace bind mount) will silently weaken isolation.
 
 **Requirements:** Docker Desktop or Docker Engine installed and running. Hermes probes `$PATH` plus common macOS install locations (`/usr/local/bin/docker`, `/opt/homebrew/bin/docker`, Docker Desktop app bundle). Podman is supported out of the box: set `HERMES_DOCKER_BINARY=podman` (or the full path) to force it when both are installed.
 
-**Container lifecycle:** Hermes reuses a single long-lived container (`docker run -d ... sleep 2h`) for every terminal and file-tool call, across sessions, `/new`, `/reset`, and `delegate_task` subagents, for the lifetime of the Hermes process. Commands run via `docker exec` with a login shell, so working-directory changes, installed packages, and files in `/workspace` all persist from one tool call to the next. The container is stopped and removed on Hermes shutdown (or when the idle-sweep reclaims it).
+#### Container lifecycle
+
+Every Hermes-managed container is tagged with three labels so subsequent processes (and the orphan reaper) can identify it:
+
+- `hermes-agent=1` — marks it as Hermes-managed
+- `hermes-task-id=<sanitized task_id>` — keys the per-task reuse probe
+- `hermes-profile=<sanitized profile name>` — scopes reuse and reaping to the active Hermes profile
+
+On startup, Hermes runs `docker ps --filter label=hermes-task-id=<id> --filter label=hermes-profile=<profile>` and **attaches to the existing container** when it finds one. If the container is `exited` (e.g. after a Docker daemon restart), it's `docker start`'d and reused — filesystem state and any installed packages survive, but in-container background processes do not.
+
+When a Hermes process exits — `/quit`, closing a TUI session, gateway shutdown, even SIGKILL — the cleanup path is a **no-op for the container in default mode**. The container keeps running. The next Hermes process attaches to it in milliseconds via the label probe. This is the behavior the "one long-lived container shared across sessions" contract requires: it's the only way background processes (npm watchers, dev servers, long-running pytest) survive across sessions.
+
+**The container is only torn down (stopped and `docker rm -f`'d) in these cases:**
+
+| Trigger | When it fires |
+|---|---|
+| `docker_persist_across_processes: false` | Explicit per-process isolation. Every `cleanup()` does `stop` + `rm -f`. Matches pre-issue-#20561 behavior. |
+| Idle reaper (`lifetime_seconds`, default 300s) | Only when the env is `persist_across_processes=false`. Persist-mode envs are no-op'd; container survives the idle sweep. |
+| Orphan reaper at next startup | Sweeps **Exited** hermes-labeled containers older than `2 × lifetime_seconds` (default 600s = 10 min), scoped to the current profile. **Running containers are never touched** — sibling-process safety. Set `docker_orphan_reaper: false` to disable. |
+| Direct user action | `docker rm -f`, `docker system prune`, Docker Desktop restart. We don't set `--restart=always`, so a host reboot leaves the container `Exited` (its CoW layer survives and gets reused on next startup, but bg processes are gone). |
+
+Edge cases worth knowing:
+
+- **OOM kill of in-container PID 1** transitions the container to `Exited`. Next reuse will `docker start` it; filesystem state survives, bg processes do not.
+- **Switching profiles** isolates containers from each other — a container labeled `hermes-profile=work` is invisible to a Hermes process running under `hermes-profile=research`. The orphan reaper is profile-scoped too, so cross-profile containers don't get reaped accidentally, but they also won't get cleaned up automatically until you start Hermes again under their original profile.
 
 Parallel subagents spawned via `delegate_task(tasks=[...])` share this one container — concurrent `cd`, env mutations, and writes to the same path will collide. If a subagent needs an isolated sandbox, it must register a per-task image override via `register_task_env_overrides()`, which RL and benchmark environments (TerminalBench2, HermesSweEnv, etc.) do automatically for their per-task Docker images.
 
@@ -166,6 +208,29 @@ Parallel subagents spawned via `delegate_task(tasks=[...])` share this one conta
 
 **Credential forwarding:** Env vars listed in `docker_forward_env` are resolved from your shell environment first, then `~/.hermes/.env`. Skills can also declare `required_environment_variables` which are merged automatically.
 
+#### Environment variable overrides
+
+Every key under `terminal:` has an env-var override of the form `TERMINAL_<KEY_UPPERCASE>`. The most useful ones for the Docker backend:
+
+| Env var | Maps to | Notes |
+|---|---|---|
+| `TERMINAL_DOCKER_IMAGE` | `docker_image` | Base image |
+| `TERMINAL_DOCKER_FORWARD_ENV` | `docker_forward_env` | JSON array: `'["GITHUB_TOKEN","OPENAI_API_KEY"]'` |
+| `TERMINAL_DOCKER_ENV` | `docker_env` | JSON dict: `'{"DEBUG":"1"}'` |
+| `TERMINAL_DOCKER_VOLUMES` | `docker_volumes` | JSON array of `"host:container[:ro]"` strings |
+| `TERMINAL_DOCKER_EXTRA_ARGS` | `docker_extra_args` | JSON array |
+| `TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE` | `docker_mount_cwd_to_workspace` | `true` / `false` |
+| `TERMINAL_DOCKER_RUN_AS_HOST_USER` | `docker_run_as_host_user` | `true` / `false` |
+| `TERMINAL_DOCKER_PERSIST_ACROSS_PROCESSES` | `docker_persist_across_processes` | `true` / `false` — default `true` |
+| `TERMINAL_DOCKER_ORPHAN_REAPER` | `docker_orphan_reaper` | `true` / `false` — default `true` |
+| `TERMINAL_CONTAINER_CPU` | `container_cpu` | CPU cores |
+| `TERMINAL_CONTAINER_MEMORY` | `container_memory` | MB |
+| `TERMINAL_CONTAINER_DISK` | `container_disk` | MB |
+| `TERMINAL_CONTAINER_PERSISTENT` | `container_persistent` | `true` / `false` — controls the bind-mount workspace dirs, distinct from `docker_persist_across_processes` |
+| `TERMINAL_LIFETIME_SECONDS` | `lifetime_seconds` | Idle reaper window |
+| `TERMINAL_TIMEOUT` | `timeout` | Per-command timeout |
+| `HERMES_DOCKER_BINARY` | _none_ | Force a specific docker/podman binary path |
+
 ### SSH Backend
 
 Runs commands on a remote server over SSH. Uses ControlMaster for connection reuse (5-minute idle keepalive). Persistent shell is enabled by default — state (cwd, env vars) survives across commands.
@@ -566,6 +631,7 @@ compression:
   threshold: 0.50                                   # Compress at this % of context limit
   target_ratio: 0.20                                # Fraction of threshold to preserve as recent tail
   protect_last_n: 20                                # Min recent messages to keep uncompressed
+  protect_first_n: 3                                # Non-system head messages pinned across compactions (0 = pin nothing)
   hygiene_hard_message_limit: 400                   # Gateway safety valve — see below
 
 # The summarization model/provider is configured under auxiliary:
@@ -582,6 +648,8 @@ Older configs with `compression.summary_model`, `compression.summary_provider`,
 
 `hygiene_hard_message_limit` is a gateway-only **pre-compression safety valve**. Runaway sessions with thousands of messages can hit model context limits before the normal percent-of-context threshold fires; when message count crosses this ceiling, Hermes forces compression regardless of token usage. Default `400` — raise it for platforms where very long sessions are normal, lower it to force more aggressive compression. Editing this value on a running gateway takes effect on the next message (see below).
 
+`protect_first_n` controls how many **non-system** head messages are pinned across every compaction. Default `3` — the opening user/assistant exchange survives every summarizer pass so the original goal stays visible. On long-running rolling-compaction sessions where the opening turn is no longer relevant, set `protect_first_n: 0` to pin nothing but the system prompt + summary + tail. The system prompt itself is always preserved regardless of this setting.
+
 :::tip Gateway hot-reload of compression and context length
 As of recent releases, editing `model.context_length` or any `compression.*` key in `config.yaml` on a running gateway takes effect on the next message — no gateway restart, no `/reset`, no session rotation required. The cached-agent signature includes these keys, so the gateway transparently rebuilds the agent when it sees a change. API keys and tool/skill config still require the usual reload paths.
 :::
@@ -796,7 +864,7 @@ Available providers for auxiliary tasks: `auto`, `main`, plus any provider in th
 :::
 
 :::warning `"main"` is for auxiliary tasks only
-The `"main"` provider option means "use whatever provider my main agent uses" — it's only valid inside `auxiliary:`, `compression:`, and `fallback_model:` configs. It is **not** a valid value for your top-level `model.provider` setting. If you use a custom OpenAI-compatible endpoint, set `provider: custom` in your `model:` section. See [AI Providers](/integrations/providers) for all main model provider options.
+The `"main"` provider option means "use whatever provider my main agent uses" — it's only valid inside `auxiliary:`, `compression:`, and primary fallback entries (`fallback_providers:` or legacy `fallback_model:`). It is **not** a valid value for your top-level `model.provider` setting. If you use a custom OpenAI-compatible endpoint, set `provider: custom` in your `model:` section. See [AI Providers](/integrations/providers) for all main model provider options.
 :::
 
 ### Full auxiliary config reference
@@ -866,7 +934,7 @@ Each auxiliary task has a configurable `timeout` (in seconds). Defaults: vision
 :::
 
 :::info
-Context compression has its own `compression:` block for thresholds and an `auxiliary.compression:` block for model/provider settings — see [Context Compression](#context-compression) above. The fallback model uses a `fallback_model:` block — see [Fallback Model](/integrations/providers#fallback-model). All three follow the same provider/model/base_url pattern.
+Context compression has its own `compression:` block for thresholds and an `auxiliary.compression:` block for model/provider settings — see [Context Compression](#context-compression) above. The primary fallback chain uses a top-level `fallback_providers:` list — see [Fallback Providers](/integrations/providers#fallback-providers). All three follow the same provider/model/base_url pattern.
 :::
 
 ### OpenRouter routing & Pareto Code for auxiliary tasks
@@ -909,7 +977,7 @@ AUXILIARY_VISION_MODEL=openai/gpt-4o
 
 ### Provider Options
 
-These options apply to **auxiliary task configs** (`auxiliary:`, `compression:`, `fallback_model:`), not to your main `model.provider` setting.
+These options apply to **auxiliary task configs** (`auxiliary:`, `compression:`) and primary fallback entries (`fallback_providers:` or legacy `fallback_model:`), not to your main `model.provider` setting.
 
 | Provider | Description | Requirements |
 |----------|-------------|-------------|
@@ -1422,7 +1490,7 @@ Environment scrubbing (strips `*_API_KEY`, `*_TOKEN`, `*_SECRET`, `*_PASSWORD`,
 
 ## Web Search Backends
 
-The `web_search`, `web_extract`, and `web_crawl` tools support five backend providers. Configure the backend in `config.yaml` or via `hermes tools`:
+The `web_search` and `web_extract` tools support five backend providers. Configure the backend in `config.yaml` or via `hermes tools`:
 
 ```yaml
 web:
@@ -1433,17 +1501,17 @@ web:
   extract_backend: "firecrawl"
 ```
 
-| Backend | Env Var | Search | Extract | Crawl |
-|---------|---------|--------|---------|-------|
-| **Firecrawl** (default) | `FIRECRAWL_API_KEY` | ✔ | ✔ | ✔ |
-| **SearXNG** | `SEARXNG_URL` | ✔ | — | — |
-| **Parallel** | `PARALLEL_API_KEY` | ✔ | ✔ | — |
-| **Tavily** | `TAVILY_API_KEY` | ✔ | ✔ | ✔ |
-| **Exa** | `EXA_API_KEY` | ✔ | ✔ | — |
+| Backend | Env Var | Search | Extract |
+|---------|---------|--------|---------|
+| **Firecrawl** (default) | `FIRECRAWL_API_KEY` | ✔ | ✔ |
+| **SearXNG** | `SEARXNG_URL` | ✔ | — |
+| **Parallel** | `PARALLEL_API_KEY` | ✔ | ✔ |
+| **Tavily** | `TAVILY_API_KEY` | ✔ | ✔ |
+| **Exa** | `EXA_API_KEY` | ✔ | ✔ |
 
 **Backend selection:** If `web.backend` is not set, the backend is auto-detected from available API keys. If only `SEARXNG_URL` is set, SearXNG is used. If only `EXA_API_KEY` is set, Exa is used. If only `TAVILY_API_KEY` is set, Tavily is used. If only `PARALLEL_API_KEY` is set, Parallel is used. Otherwise Firecrawl is the default.
 
-**SearXNG** is a free, self-hosted, privacy-respecting metasearch engine that queries 70+ search engines. No API key needed — just set `SEARXNG_URL` to your instance (e.g., `http://localhost:8080`). SearXNG is search-only; `web_extract` and `web_crawl` require a separate extract provider (set `web.extract_backend`). See the [Web Search setup guide](/user-guide/features/web-search) for Docker setup instructions.
+**SearXNG** is a free, self-hosted, privacy-respecting metasearch engine that queries 70+ search engines. No API key needed — just set `SEARXNG_URL` to your instance (e.g., `http://localhost:8080`). SearXNG is search-only; `web_extract` requires a separate extract provider (set `web.extract_backend`). See the [Web Search setup guide](/user-guide/features/web-search) for Docker setup instructions.
 
 **Self-hosted Firecrawl:** Set `FIRECRAWL_API_URL` to point at your own instance. When a custom URL is set, the API key becomes optional (set `USE_DB_AUTHENTICATION=*** on the server to disable auth).
 
@@ -1516,7 +1584,7 @@ Pre-execution security scanning and secret redaction:
 
 ```yaml
 security:
-  redact_secrets: false          # Redact API key patterns in tool output and logs (off by default)
+  redact_secrets: true           # Redact API key patterns in tool output and logs (on by default)
   tirith_enabled: true           # Enable Tirith security scanning for terminal commands
   tirith_path: "tirith"          # Path to tirith binary (default: "tirith" in $PATH)
   tirith_timeout: 5              # Seconds to wait for tirith scan before timing out
@@ -1527,7 +1595,7 @@ security:
     shared_files: []
 ```
 
-- `redact_secrets` — when `true`, automatically detects and redacts patterns that look like API keys, tokens, and passwords in tool output before it enters the conversation context and logs. **Off by default** — enable if you commonly work with real credentials in tool output and want a safety net. Set to `true` explicitly to turn on.
+- `redact_secrets` — when `true`, automatically detects and redacts patterns that look like API keys, tokens, and passwords in tool output before it enters the conversation context and logs. **On by default**. Set to `false` explicitly only when you need raw credential-like strings for debugging or redactor development.
 - `tirith_enabled` — when `true`, terminal commands are scanned by [Tirith](https://github.com/sheeki03/tirith) before execution to detect potentially dangerous operations.
 - `tirith_path` — path to the tirith binary. Set this if tirith is installed in a non-standard location.
 - `tirith_timeout` — maximum seconds to wait for a tirith scan. Commands proceed if the scan times out.
@@ -1658,12 +1726,14 @@ See also:
 | Context | Default |
 |---------|---------|
 | **CLI (`hermes`)** | Current directory where you run the command |
-| **Messaging gateway** | Home directory `~` (override with `MESSAGING_CWD`) |
+| **Messaging gateway** | `terminal.cwd` from `~/.hermes/config.yaml`; if unset, home directory `~` |
 | **Docker / Singularity / Modal / SSH** | User's home directory inside the container or remote machine |
 
 Override the working directory:
-```bash
-# In ~/.hermes/.env or ~/.hermes/config.yaml:
-MESSAGING_CWD=/home/myuser/projects    # Gateway sessions
-TERMINAL_CWD=/workspace                # All terminal sessions
+```yaml
+# In ~/.hermes/config.yaml:
+terminal:
+  cwd: /home/myuser/projects
 ```
+
+`MESSAGING_CWD` and direct `TERMINAL_CWD` entries in `~/.hermes/.env` are legacy compatibility fallbacks. New configurations should use `terminal.cwd`.
diff --git a/website/docs/user-guide/configuring-models.md b/website/docs/user-guide/configuring-models.md
index 01ab8c20795..00ad11d43f1 100644
--- a/website/docs/user-guide/configuring-models.md
+++ b/website/docs/user-guide/configuring-models.md
@@ -13,6 +13,12 @@ This page covers configuring both from the dashboard. If you prefer config files
 
 :::tip Fastest path: Nous Portal
 [Nous Portal](/user-guide/features/tool-gateway) provides 300+ models under one subscription. On a fresh install, run `hermes setup --portal` to log in and set Nous as your provider in one command. Inspect what's wired up with `hermes portal status`.
+
+- Portal subscribers also get **10% off token-billed providers**.
+:::
+
+:::note `model:` schema — empty string vs. mapping
+On a brand-new install the bundled default config has `model: ""` (an empty string sentinel meaning "not configured yet"). The first time you run `hermes setup` or `hermes model`, that key is upgraded in-place to a mapping with `provider`, `default`, `base_url`, and `api_mode` sub-keys — the shape shown throughout this page and in [`profiles.md`](./profiles.md) / [`configuration.md`](./configuration.md). If you ever see an empty string in `config.yaml`, run `hermes model` (or click **Change** in the dashboard) and Hermes will write the dict form for you.
 :::
 
 ## The Models page
@@ -54,7 +60,7 @@ Every auxiliary task defaults to `auto` — meaning Hermes uses your main model
 | Task | When to override |
 |---|---|
 | **Title Gen** | Almost always. A $0.10/M flash model writes session titles as well as Opus. Default config sets this to `google/gemini-3-flash-preview` on OpenRouter. |
-| **Vision** | When your main model is a coding model without vision (e.g. Kimi, DeepSeek). Point it at `google/gemini-2.5-flash` or `gpt-4o-mini`. |
+| **Vision** | When your main model lacks vision support. Point it at `google/gemini-2.5-flash` or `gpt-4o-mini`. |
 | **Compression** | When you're burning reasoning tokens on Opus/M2.7 just to summarize context. A fast chat model does the job at 1/50th the cost. |
 | **Approval** | For `approval_mode: smart` — a fast/cheap model (haiku, flash, gpt-5-mini) decides whether to auto-approve low-risk commands. Expensive models here are waste. |
 | **Web Extract** | When you use `web_extract` heavily. Same logic as compression — summarization doesn't need reasoning. |
@@ -166,7 +172,9 @@ Inside any `hermes chat` session:
 
 ### Custom aliases
 
-Define your own short names for models you reach for often, then use `/model <alias>` in the CLI or any messaging platform:
+Define your own short names for models you reach for often, then use `/model <alias>` in the CLI or any messaging platform. There are two equivalent formats — pick whichever fits your workflow.
+
+**Canonical (top-level `model_aliases:`)** — full control over provider + base_url:
 
 ```yaml
 # ~/.hermes/config.yaml
@@ -179,13 +187,15 @@ model_aliases:
     provider: x-ai
 ```
 
-Or from the shell (short form, `provider/model`):
+**Short string form (`model.aliases.<name>: provider/model`)** — convenient from the shell because `hermes config set` only writes scalar values, but it can't carry a custom `base_url`:
 
 ```bash
 hermes config set model.aliases.fav anthropic/claude-opus-4.6
 hermes config set model.aliases.grok x-ai/grok-4
 ```
 
+Both paths feed the same loader (`hermes_cli/model_switch.py`). Entries declared in `model_aliases:` take precedence over `model.aliases:` entries with the same name.
+
 Then `/model fav` or `/model grok` in chat. User aliases shadow built-in short names (`sonnet`, `kimi`, `opus`, etc.). See [Custom model aliases](/reference/slash-commands#custom-model-aliases) for the full reference.
 
 ### `hermes model` subcommand
@@ -196,7 +206,7 @@ hermes model            # Interactive provider + model picker (the canonical way
 
 `hermes model` walks you through picking a provider, authenticating (OAuth flows open a browser; API-key providers prompt for the key), and then choosing a specific model from that provider's curated catalog. The choice is written to `model.provider` and `model.model` in `~/.hermes/config.yaml`.
 
-To list providers/models without launching the picker, use the dashboard or the REST endpoints below. To inspect what the CLI will actually use right now: `hermes config get model` and `hermes status`.
+To list providers/models without launching the picker, use the dashboard or the REST endpoints below. To inspect what the CLI will actually use right now: `hermes config show | grep '^model\.'` and `hermes status`.
 
 ### Direct config edit
 
diff --git a/website/docs/user-guide/docker.md b/website/docs/user-guide/docker.md
index fefde13a5b6..9168d39ad70 100644
--- a/website/docs/user-guide/docker.md
+++ b/website/docs/user-guide/docker.md
@@ -26,6 +26,10 @@ docker run -it --rm \
 
 This drops you into the setup wizard, which will prompt you for your API keys and write them to `~/.hermes/.env`. You only need to do this once. It is highly recommended to set up a chat system for the gateway to work with at this point.
 
+:::tip
+Inside the container, run `hermes setup --portal` once — the refresh token persists in the mounted `~/.hermes` volume. See [Nous Portal](/integrations/nous-portal).
+:::
+
 ## Running in gateway mode
 
 Once configured, run the container in the background as a persistent gateway (Telegram, Discord, Slack, WhatsApp, etc.):
@@ -49,6 +53,10 @@ You'll see a one-line breadcrumb in `docker logs` confirming the upgrade. To opt
 This behavior applies to the s6-based image only. Earlier (tini-based) images still run `gateway run` as the foreground main process.
 :::
 
+:::note Where gateway logs go
+See the [Where the logs go](#where-the-logs-go) section below for the full routing map (per-profile gateways, dashboard, boot reconciler, container-wide `docker logs`).
+:::
+
 Note: the API server is gated on `API_SERVER_ENABLED=true`. To expose it beyond `127.0.0.1` inside the container, also set `API_SERVER_HOST=0.0.0.0` and an `API_SERVER_KEY` (minimum 8 characters — generate one with `openssl rand -hex 32`). Example:
 
 ```sh
@@ -68,7 +76,7 @@ Opening any port on an internet facing machine is a security risk. You should no
 
 ## Running the dashboard
 
-The built-in web dashboard runs as an optional side-process inside the same container as the gateway. Set `HERMES_DASHBOARD=1` to run the dashboard on container loopback (`127.0.0.1`) by default:
+The built-in web dashboard runs as a supervised s6-rc service alongside the gateway in the same container. Set `HERMES_DASHBOARD=1` to bring it up:
 
 ```sh
 docker run -d \
@@ -76,33 +84,38 @@ docker run -d \
   --restart unless-stopped \
   -v ~/.hermes:/opt/data \
   -p 8642:8642 \
+  -p 9119:9119 \
   -e HERMES_DASHBOARD=1 \
   nousresearch/hermes-agent gateway run
 ```
 
-The entrypoint starts `hermes dashboard` in the background (running as the non-root `hermes` user) before `exec`-ing the main command. Dashboard output is prefixed with `[dashboard]` in `docker logs` so it's easy to separate from gateway logs.
+The dashboard is supervised by s6 — if it crashes, `s6-supervise` restarts it automatically after a short backoff. Dashboard stdout/stderr is forwarded to `docker logs <container>` (no prefix; the gateway's own output now lives in a per-profile s6-log file — see [Where the logs go](#where-the-logs-go) below — so the two streams don't clash).
 
 | Environment variable | Description | Default |
 |---------------------|-------------|---------|
-| `HERMES_DASHBOARD` | Set to `1` (or `true` / `yes`) to launch the dashboard alongside the main command | *(unset — dashboard not started)* |
-| `HERMES_DASHBOARD_HOST` | Bind address for the dashboard HTTP server | `127.0.0.1` |
+| `HERMES_DASHBOARD` | Set to `1` (or `true` / `yes`) to enable the supervised dashboard service | *(unset — service is registered but stays down)* |
+| `HERMES_DASHBOARD_HOST` | Bind address for the dashboard HTTP server | `0.0.0.0` |
 | `HERMES_DASHBOARD_PORT` | Port for the dashboard HTTP server | `9119` |
 | `HERMES_DASHBOARD_TUI` | Set to `1` to expose the in-browser Chat tab (embedded `hermes --tui` via PTY/WebSocket) | *(unset)* |
+| `HERMES_DASHBOARD_INSECURE` | Set to `1` (or `true` / `yes`) to bind without the OAuth auth gate. Only use on trusted networks behind a reverse proxy without the OAuth contract — the dashboard exposes API keys and session data | *(unset — gate enforced when a `DashboardAuthProvider` is registered)* |
 
-By default, the dashboard stays on loopback to avoid exposing the unauthenticated web surface over the network. To publish it intentionally, set `HERMES_DASHBOARD_HOST=0.0.0.0` and configure your own trusted network boundary/reverse proxy. In that case you must explicitly add `--insecure` behavior by passing host/flags in your command path (the entrypoint no longer auto-enables insecure mode).
+The dashboard inside the container defaults to binding `0.0.0.0` — without it, the published `-p 9119:9119` port would not be reachable from the host. To restrict the bind to container loopback (for sidecar / reverse-proxy setups), set `HERMES_DASHBOARD_HOST=127.0.0.1`.
 
-:::note
-The dashboard runs as a supervised s6 service inside the container. If
-the dashboard process crashes, s6-overlay restarts it automatically
-after a short backoff — you'll see a new PID without needing to
-restart the container. Logs and crash output are visible via
-`docker logs <container>` (s6 forwards service stdout/stderr there).
+The dashboard's OAuth auth gate engages automatically when both of the following are true:
 
-Running the dashboard as a separate container is not supported: its
-gateway-liveness detection requires a shared PID namespace with the
-gateway process.
+1. The bind host is non-loopback (e.g. the default `0.0.0.0` inside the container), **and**
+2. A `DashboardAuthProvider` plugin is registered.
+
+The bundled `dashboard_auth/nous` provider activates whenever `HERMES_DASHBOARD_OAUTH_CLIENT_ID` is set (see [Web Dashboard → Authentication](features/web-dashboard.md)). With the gate engaged, browser callers are redirected to the configured portal's OAuth flow before they can reach any protected route.
+
+If no provider is registered and the bind is non-loopback, the dashboard **fails closed at startup** with a specific error pointing at the missing env var. To opt out of the gate explicitly — for a trusted-LAN deployment behind your own reverse proxy without the OAuth contract — set `HERMES_DASHBOARD_INSECURE=1`. This is the **only** path that disables the gate; the bind host alone never implies `--insecure` (it used to, but that predated the OAuth gate and silently disabled it on every container-deployed dashboard).
+
+:::warning `HERMES_DASHBOARD_INSECURE=1` exposes API keys
+Opting out of the OAuth gate serves the dashboard's API surface (including model keys and session data) to anyone who can reach the published port. Only enable it when you have your own auth layer in front, or on a trusted LAN you fully control.
 :::
 
+Running the dashboard as a separate container is not supported: its gateway-liveness detection requires a shared PID namespace with the gateway process.
+
 ## Running interactively (CLI chat)
 
 To open an interactive chat session against a running data directory:
@@ -145,37 +158,60 @@ Never run two Hermes **gateway** containers against the same data directory simu
 
 ## Multi-profile support
 
-Hermes supports [multiple profiles](../reference/profile-commands.md) — separate `~/.hermes/` directories that let you run independent agents (different SOUL, skills, memory, sessions, credentials) from a single installation. **When running under Docker, using Hermes' built-in multi-profile feature is not recommended.**
+Hermes supports [multiple profiles](../reference/profile-commands.md) — separate `~/.hermes/` subdirectories that let you run independent agents (different SOUL, skills, memory, sessions, credentials) from a single installation. **Inside the official Docker image, the s6 supervision tree treats each profile as a first-class supervised service**, so the recommended deployment is **one container hosting all profiles**.
 
-Instead, the recommended pattern is **one container per profile**, with each container bind-mounting its own host directory as `/opt/data`:
+Each profile created with `hermes profile create <name>` gets:
+
+- A dedicated s6 service slot at `/run/service/gateway-<name>/`, registered dynamically by the runtime — no container rebuild required.
+- Auto-restart on crash, backoff-managed by `s6-supervise`.
+- Per-profile rotated logs at `${HERMES_HOME}/logs/gateways/<name>/current` (10 archives × 1 MB each).
+- State persistence across container restarts: the boot-time reconciler reads `gateway_state.json` from each profile directory and brings the slot back up only for profiles whose last recorded state was `running`. Stopped profiles stay stopped.
+
+The lifecycle commands you'd run on the host work the same way from inside the container:
 
 ```sh
-# Work profile
-docker run -d \
-  --name hermes-work \
-  --restart unless-stopped \
-  -v ~/.hermes-work:/opt/data \
-  -p 8642:8642 \
-  nousresearch/hermes-agent gateway run
+# Create a profile — registers the gateway-<name> s6 slot.
+docker exec hermes hermes profile create coder
 
-# Personal profile
-docker run -d \
-  --name hermes-personal \
-  --restart unless-stopped \
-  -v ~/.hermes-personal:/opt/data \
-  -p 8643:8642 \
-  nousresearch/hermes-agent gateway run
+# Start / stop / restart — dispatches s6-svc; the gateway lifecycle survives docker restart.
+docker exec hermes hermes -p coder gateway start
+docker exec hermes hermes -p coder gateway stop
+docker exec hermes hermes -p coder gateway restart
+
+# Status — reports `Manager: s6 (container supervisor)` inside the container.
+docker exec hermes hermes -p coder gateway status
+
+# Remove a profile — tears down the s6 slot too.
+docker exec hermes hermes profile delete coder
 ```
 
-Why separate containers over profiles in Docker:
+Under the hood, `hermes gateway start/stop/restart` inside the container is intercepted and routed to `s6-svc` against the right service directory; you don't need to learn the s6 commands directly. For raw supervisor state, use `/command/s6-svstat /run/service/gateway-<name>` (note `/command/` is on PATH only for processes spawned by the supervision tree — when calling from `docker exec`, pass the absolute path).
 
-- **Isolation** — each container has its own filesystem, process table, and resource limits. A crash, dependency change, or runaway session in one profile can't affect another.
-- **Independent lifecycle** — upgrade, restart, pause, or roll back each agent separately (`docker restart hermes-work` leaves `hermes-personal` untouched).
-- **Clean port and network separation** — each gateway binds its own host port; there's no risk of cross-talk between chat platforms or API servers.
-- **Simpler mental model** — the container *is* the profile. Backups, migrations, and permissions all follow the bind-mounted directory, with no extra `--profile` flags to remember.
-- **Avoids concurrent-write risk** — the warning above about never running two gateways against the same data directory still applies to profiles within a single container.
+### Why one container with many profiles, not many containers
 
-In Docker Compose, this just means declaring one service per profile with distinct `container_name`, `volumes`, and `ports`:
+Before the s6 migration, "one container per profile" was the recommended pattern because there was no in-container supervisor to manage multiple gateways. With s6 as PID 1, that's no longer necessary, and the single-container layout is simpler in almost every dimension:
+
+| | One container, many profiles | One container per profile |
+|---|---|---|
+| Disk overhead | One image, one bundled venv, one Playwright cache | N images / N caches |
+| Memory overhead | Shared Python interpreter cache, shared node_modules | Duplicated per container |
+| Profile creation | `docker exec ... hermes profile create <name>` (seconds) | New `docker run` invocation + port allocation + bind-mount config |
+| Per-profile crash recovery | `s6-supervise` auto-restart | Docker's `--restart unless-stopped` (slower, kills sibling work) |
+| Logs | Per-profile rotated file via `s6-log`, plus container-boot audit log | `docker logs <name>` per container — no built-in rotation |
+| Backup | One `~/.hermes` directory | N directories to coordinate |
+
+The default profile (`default`) is always registered on first boot, so a fresh container ships with one supervised gateway out of the box. Additional profiles are pure runtime adds.
+
+### When you DO want a separate container
+
+Profile-in-container is the default. Run a separate container per profile only when you have a specific reason:
+
+- **Resource isolation per workload** — e.g. a runaway browser-tool session in profile A shouldn't be able to OOM profile B. Containers give you `--memory` / `--cpus` per profile.
+- **Independent image pinning** — different upstream image tags per workload.
+- **Network segmentation** — distinct Docker networks per profile (e.g. one customer-facing, one internal).
+- **Compliance / blast radius** — distinct credentials never share an OS-level process tree.
+
+In those cases, declare one service per profile with distinct `container_name`, `volumes`, and `ports`:
 
 ```yaml
 services:
@@ -200,6 +236,24 @@ services:
       - ~/.hermes-personal:/opt/data
 ```
 
+The warning from [Persistent volumes](#persistent-volumes) still applies: never point two containers at the same `~/.hermes` directory simultaneously. The s6 supervisor inside each container manages its own profile set; cross-container sharing of a data volume corrupts session files and memory stores.
+
+## Where the logs go
+
+The s6 container has four distinct log surfaces, and "why isn't my gateway showing anything in `docker logs`" is a common surprise. Cheatsheet:
+
+| Source | Where it lands | How to read it |
+|---|---|---|
+| **Per-profile gateway** (`hermes gateway run` and per-profile gateways under s6) | Tee'd to two places: `docker logs <container>` (real time, no extra prefix) **and** `${HERMES_HOME}/logs/gateways/<profile>/current` (rotated, ISO-8601 timestamped, 10 archives × 1 MB each) | `docker logs -f hermes` or `tail -F ~/.hermes/logs/gateways/default/current` on the host |
+| **Dashboard** (when `HERMES_DASHBOARD=1`) | `docker logs <container>` (no prefix) | `docker logs -f hermes` — interleaved with gateway lines |
+| **Boot reconciler** (records which profile gateways were restored on each container start) | `${HERMES_HOME}/logs/container-boot.log` (append-only audit log) | `tail -F ~/.hermes/logs/container-boot.log` |
+| **Generic Hermes logs** (`agent.log`, `errors.log`) | `${HERMES_HOME}/logs/` (profile-aware) | `docker exec hermes hermes logs --follow [--level WARNING] [--session <id>]` |
+
+Two practical consequences worth knowing:
+
+- The file copy at `logs/gateways/<profile>/current` is what survives container restarts. `docker logs` only retains output from the current container's lifetime (and is wiped on `docker rm`); the rotated files persist on the bind-mounted volume.
+- The boot reconciler's audit line shape is `<iso-timestamp> profile=<name> prior_state=<state> action=<registered|started>`, so a quick `grep profile=coder ~/.hermes/logs/container-boot.log` reveals when a given profile was last restored and whether s6 auto-started it.
+
 ## Environment variable forwarding
 
 API keys are read from `/opt/data/.env` inside the container. You can also pass environment variables directly:
@@ -215,7 +269,7 @@ docker run -it --rm \
 Direct `-e` flags override values from `.env`. This is useful for CI/CD or secrets-manager integrations where you don't want keys on disk.
 
 :::note Looking for Docker as the **terminal backend**?
-This page covers running Hermes itself inside Docker. If you want Hermes to execute the agent's `terminal` / `execute_code` calls inside a Docker sandbox container (one persistent container per Hermes process), that's a separate config block — `terminal.backend: docker` plus `terminal.docker_image`, `terminal.docker_volumes`, `terminal.docker_forward_env`, `terminal.docker_run_as_host_user`, and `terminal.docker_extra_args`. See [Configuration → Docker Backend](configuration.md#docker-backend) for the full set.
+This page covers running Hermes itself inside Docker. If you want Hermes to execute the agent's `terminal` / `execute_code` calls inside a Docker sandbox container (one long-lived container shared across Hermes processes — see issue #20561), that's a separate config block — `terminal.backend: docker` plus `terminal.docker_image`, `terminal.docker_volumes`, `terminal.docker_forward_env`, `terminal.docker_env`, `terminal.docker_run_as_host_user`, `terminal.docker_extra_args`, `terminal.docker_persist_across_processes`, and `terminal.docker_orphan_reaper`. See [Configuration → Docker Backend](configuration.md#docker-backend) for the full set including container-lifecycle rules.
 :::
 
 ## Docker Compose example
@@ -247,7 +301,7 @@ services:
           cpus: "2.0"
 ```
 
-Start with `docker compose up -d` and view logs with `docker compose logs -f`. Dashboard output is prefixed with `[dashboard]` so it's easy to filter from gateway logs.
+Start with `docker compose up -d` and view logs with `docker compose logs -f`. The supervised gateway's stdout is also tee'd to `${HERMES_HOME}/logs/gateways/<profile>/current` on the volume — see [Where the logs go](#where-the-logs-go) for the full routing map.
 
 ## Optional: Linux desktop audio bridge
 
@@ -381,24 +435,28 @@ The container ENTRYPOINT is now `/init` (s6-overlay), not `/usr/bin/tini`. All f
 Do not override the image entrypoint unless you keep `/init` (or, equivalently, the legacy `docker/entrypoint.sh` shim that forwards to the stage2 hook) in the command chain. s6-overlay's `/init` runs as root so it can chown the volume on first boot, then drops to the `hermes` user via `s6-setuidgid` for every supervised service AND for the main program. Starting `hermes gateway run` as root inside the official image is refused by default because it can leave root-owned files in `/opt/data` and break later dashboard or gateway starts. Set `HERMES_ALLOW_ROOT_GATEWAY=1` only when you intentionally accept that risk.
 :::
 
-### Per-profile gateway supervision
+### `docker exec` automatically drops to the `hermes` user
 
-Inside the container, each profile created with `hermes profile create <name>` automatically gets an s6-supervised gateway service registered at `/run/service/gateway-<name>/`. The lifecycle commands you'd run on the host work the same way:
+`docker exec hermes <cmd>` defaults to running as root inside the container, but the image ships a thin shim at `/opt/hermes/bin/hermes` (earliest on PATH) that detects root callers and transparently re-execs through `s6-setuidgid hermes`. So `docker exec hermes login`, `docker exec hermes profile create …`, `docker exec hermes setup`, etc. all write files owned by UID 10000 — i.e. readable by the supervised gateway — with no extra `--user` flag needed. Non-root callers (the supervised processes themselves, `docker exec --user hermes`, kanban subagents inside the container) hit a short-circuit that exec's the venv binary directly, so there's no overhead on the hot paths.
+
+If you specifically need a `docker exec` that retains root semantics (diagnostic sessions, inspecting root-only state, files outside `/opt/data` that root happens to own), opt out per invocation:
 
 ```sh
-hermes profile create coder            # registers gateway-coder s6 slot
-hermes -p coder gateway start          # s6-svc -u  → supervised gateway
-hermes -p coder gateway stop           # s6-svc -d  → service down
-hermes -p coder gateway restart        # s6-svc -t  → SIGTERM the supervisor
-hermes profile delete coder            # tears down the s6 slot
+docker exec -e HERMES_DOCKER_EXEC_AS_ROOT=1 hermes <cmd>
 ```
 
+The shim accepts `1` / `true` / `yes` (case-insensitive). Anything else — including typos like `=0` — falls through to the drop, so silent opt-outs aren't possible. If `s6-setuidgid` isn't available (custom builds that stripped s6-overlay), the shim refuses to run as root and exits 126 instead, surfacing the broken privilege model loudly rather than regressing to the historical footgun where `docker exec hermes login` would write `auth.json` as `root:root` and break the supervised gateway's auth on every chat platform message.
+
+### Per-profile gateway supervision
+
+Each profile created with `hermes profile create <name>` automatically gets an s6-supervised gateway service registered at `/run/service/gateway-<name>/`, with state-persistent auto-restart across container restarts. See [Multi-profile support](#multi-profile-support) above for the user-facing workflow and the lifecycle commands.
+
 **Supervision benefits over the pre-s6 image:**
 
 - Gateway crashes are auto-restarted by `s6-supervise` after a ~1s backoff.
-- Dashboard crashes are auto-restarted (set `HERMES_DASHBOARD=1` to start it).
+- Dashboard, when enabled with `HERMES_DASHBOARD=1`, is supervised on the same supervision tree and gets the same auto-restart treatment.
 - `docker restart` preserves running gateways: the cont-init reconciler reads `$HERMES_HOME/profiles/<name>/gateway_state.json` and brings the slot back up if the last recorded state was `running`. Stopped gateways stay stopped.
-- Per-profile gateway logs persist under `$HERMES_HOME/logs/gateways/<profile>/current` (rotated by `s6-log`), and the reconciler's actions are appended to `$HERMES_HOME/logs/container-boot.log` per boot.
+- Per-profile gateway logs persist under `$HERMES_HOME/logs/gateways/<profile>/current` (rotated by `s6-log`), and the reconciler's actions are appended to `$HERMES_HOME/logs/container-boot.log` per boot. See [Where the logs go](#where-the-logs-go) for the full routing map.
 
 `hermes status` inside the container reports `Manager: s6 (container supervisor)`. Use `/command/s6-svstat /run/service/gateway-<name>` for the raw supervisor view (note `/command/` is on PATH for supervision-tree processes only; pass the absolute path when calling from `docker exec`).
 
@@ -652,12 +710,24 @@ Check logs: `docker logs hermes`. Common causes:
 
 ### "Permission denied" errors
 
-The container's stage2 hook drops privileges to the non-root `hermes` user (UID 10000) via `s6-setuidgid` inside each supervised service. If your host `~/.hermes/` is owned by a different UID, set `HERMES_UID`/`HERMES_GID` to match your host user, or ensure the data directory is writable:
+The container's stage2 hook drops privileges to the non-root `hermes` user (UID 10000) via `s6-setuidgid` inside each supervised service. If your host `~/.hermes/` is owned by a different UID, set `HERMES_UID`/`HERMES_GID` — or their `PUID`/`PGID` aliases, for parity with LinuxServer.io and NAS images — to match your host user, or ensure the data directory is writable:
 
 ```sh
 chmod -R 755 ~/.hermes
 ```
 
+On a NAS (UGOS, Synology, unRAID) the data directory is typically a **bind mount** owned by a host UID the container cannot `chown`. Set `PUID`/`PGID` (or `HERMES_UID`/`HERMES_GID`) to that host user so the runtime runs as the owner of the mount rather than UID 10000:
+
+```sh
+docker run -d \
+  --name hermes \
+  -e PUID=1000 -e PGID=10 \
+  -v /volume1/docker/hermes:/opt/data \
+  nousresearch/hermes-agent gateway run
+```
+
+`docker exec hermes <cmd>` automatically drops to UID 10000 too — see [`docker exec` automatically drops to the `hermes` user](#docker-exec-automatically-drops-to-the-hermes-user) for details and the per-invocation opt-out.
+
 ### Browser tools not working
 
 Playwright needs shared memory. Add `--shm-size=1g` to your Docker run command:
diff --git a/website/docs/user-guide/features/api-server.md b/website/docs/user-guide/features/api-server.md
index fd883e84a96..b059e40dff0 100644
--- a/website/docs/user-guide/features/api-server.md
+++ b/website/docs/user-guide/features/api-server.md
@@ -308,6 +308,66 @@ Resume a previously paused job.
 
 Trigger the job to run immediately, out of schedule.
 
+## Sessions API (session control over REST)
+
+External UIs can manage Hermes sessions over REST without standing up the dashboard. All endpoints are gated by `API_SERVER_KEY` and live under `/api/sessions/*`.
+
+| Method | Path | Description |
+|--------|------|-------------|
+| `GET` | `/api/sessions` | List sessions (paginated — `limit`, `offset`, `source`, `include_children`) |
+| `POST` | `/api/sessions` | Create an empty session |
+| `GET` | `/api/sessions/{id}` | Read session metadata |
+| `PATCH` | `/api/sessions/{id}` | Update title or `end_reason` |
+| `DELETE` | `/api/sessions/{id}` | Delete a session |
+| `GET` | `/api/sessions/{id}/messages` | Message history for a session |
+| `POST` | `/api/sessions/{id}/fork` | Branch the session via `SessionDB` lineage (matches CLI `/branch` semantics) |
+| `POST` | `/api/sessions/{id}/chat` | Run one synchronous agent turn |
+| `POST` | `/api/sessions/{id}/chat/stream` | SSE wrapper over a single turn — emits `assistant.delta`, `tool.started`, `tool.completed`, `run.completed` events |
+
+`/v1/capabilities` advertises the full surface via `session_*` feature flags and `endpoints.session_*` entries so external UIs can detect support and fall back safely. Inline images are supported in `chat` and `chat/stream` payloads (multimodal-aware path).
+
+```bash
+# fork a session and run one turn
+curl -X POST http://localhost:8642/api/sessions/$ID/fork \
+  -H "Authorization: Bearer $API_SERVER_KEY" \
+  -d '{"title": "explore alt path"}'
+
+# stream a turn over SSE
+curl -N -X POST http://localhost:8642/api/sessions/$ID/chat/stream \
+  -H "Authorization: Bearer $API_SERVER_KEY" \
+  -d '{"input": "what files changed in the last hour?"}'
+```
+
+## Skills and toolsets discovery
+
+`GET /v1/skills` and `GET /v1/toolsets` let external clients enumerate the agent's capabilities deterministically over REST instead of asking the model. Both are read-only and gated by `API_SERVER_KEY`.
+
+```bash
+curl http://localhost:8642/v1/skills \
+  -H "Authorization: Bearer $API_SERVER_KEY"
+# → [{"name": "github-pr-workflow", "description": "...", "category": "..."}, ...]
+
+curl http://localhost:8642/v1/toolsets \
+  -H "Authorization: Bearer $API_SERVER_KEY"
+# → [{"name": "core", "label": "...", "description": "...", "enabled": true,
+#     "configured": true, "tools": ["read_file", "write_file", ...]}, ...]
+```
+
+`/v1/skills` returns the same metadata the skills hub uses internally. `/v1/toolsets` returns toolsets resolved for the `api_server` platform with the concrete `tools` list each one expands to. Both are advertised under `endpoints.*` in `/v1/capabilities`.
+
+## Long-term memory scoping (`X-Hermes-Session-Key`)
+
+Multi-user frontends like Open WebUI need a stable per-channel identifier for long-term memory (Honcho, etc.) that is **independent** of the transcript-scoped `X-Hermes-Session-Id` (which rotates on `/new`). Pass `X-Hermes-Session-Key` on `/v1/chat/completions`, `/v1/responses`, or `/v1/runs` and Hermes threads it through to `AIAgent(gateway_session_key=...)`, where the Honcho memory provider uses it to derive a stable scope.
+
+```http
+POST /v1/chat/completions HTTP/1.1
+Authorization: Bearer ***
+X-Hermes-Session-Id: transcript-alpha
+X-Hermes-Session-Key: agent:main:webui:dm:user-42
+```
+
+Rules: max 256 chars, control characters (`\r`, `\n`, `\x00`) are rejected, and the value is echoed back on responses (JSON + SSE). `/v1/capabilities` advertises support via `"session_key_header": "X-Hermes-Session-Key"`. Without the key, Honcho's `per-session` strategy produces a different scope per `session_id` — exactly the behavior Hermes had before.
+
 ## System Prompt Handling
 
 When a frontend sends a `system` message (Chat Completions) or `instructions` field (Responses API), hermes-agent **layers it on top** of its core system prompt. Your agent keeps all its tools, memory, and skills — the frontend's system prompt adds extra instructions.
@@ -327,9 +387,7 @@ Authorization: Bearer ***
 Configure the key via `API_SERVER_KEY` env var. If you need a browser to call Hermes directly, also set `API_SERVER_CORS_ORIGINS` to an explicit allowlist.
 
 :::warning Security
-The API server gives full access to hermes-agent's toolset, **including terminal commands**. When binding to a non-loopback address like `0.0.0.0`, `API_SERVER_KEY` is **required**. Also keep `API_SERVER_CORS_ORIGINS` narrow to control browser access.
-
-The default bind address (`127.0.0.1`) is for local-only use. Browser access is disabled by default; enable it only for explicit trusted origins.
+The API server gives full access to hermes-agent's toolset, **including terminal commands**. `API_SERVER_KEY` is **required for every deployment**, including the default loopback bind on `127.0.0.1`. Keep `API_SERVER_CORS_ORIGINS` narrow to control browser access when you explicitly allow browser callers.
 :::
 
 ## Configuration
@@ -341,7 +399,7 @@ The default bind address (`127.0.0.1`) is for local-only use. Browser access is
 | `API_SERVER_ENABLED` | `false` | Enable the API server |
 | `API_SERVER_PORT` | `8642` | HTTP server port |
 | `API_SERVER_HOST` | `127.0.0.1` | Bind address (localhost only by default) |
-| `API_SERVER_KEY` | _(none)_ | Bearer token for auth |
+| `API_SERVER_KEY` | _(required)_ | Bearer token for auth |
 | `API_SERVER_CORS_ORIGINS` | _(none)_ | Comma-separated allowed browser origins |
 | `API_SERVER_MODEL_NAME` | _(profile name)_ | Model name on `/v1/models`. Defaults to profile name, or `hermes-agent` for default profile. |
 
diff --git a/website/docs/user-guide/features/browser.md b/website/docs/user-guide/features/browser.md
index e98ad522b1a..3cf412ed70c 100644
--- a/website/docs/user-guide/features/browser.md
+++ b/website/docs/user-guide/features/browser.md
@@ -185,6 +185,25 @@ Then set in `~/.hermes/.env`:
 CAMOFOX_URL=http://localhost:9377
 ```
 
+If Camofox is running in Docker and you want it to open web apps served from the host machine, enable loopback rewriting. `CAMOFOX_URL` should still point at the host-published control API, but page URLs such as `http://127.0.0.1:3000` must be opened from inside the container as `http://host.docker.internal:3000`:
+
+```yaml
+# ~/.hermes/config.yaml
+browser:
+  camofox:
+    rewrite_loopback_urls: true
+    loopback_host_alias: host.docker.internal  # default; use a LAN IP if needed
+```
+
+Equivalent env vars:
+
+```bash
+CAMOFOX_REWRITE_LOOPBACK_URLS=true
+CAMOFOX_LOOPBACK_HOST_ALIAS=host.docker.internal
+```
+
+The rewrite only applies to page navigation URLs with loopback hosts (`localhost`, `127.0.0.1`, `::1`). It does not change `CAMOFOX_URL`. Leave it disabled for non-Docker Camofox installs, where the browser already runs on the host and loopback URLs are correct.
+
 Or configure via `hermes tools` → Browser Automation → Camofox.
 
 When `CAMOFOX_URL` is set, all browser tools automatically route through Camofox instead of Browserbase or agent-browser.
@@ -376,9 +395,9 @@ BROWSERBASE_ADVANCED_STEALTH=false
 # Session reconnection after disconnects — requires paid plan (default: "true")
 BROWSERBASE_KEEP_ALIVE=true
 
-# Custom session timeout in milliseconds (default: project default)
-# Examples: 600000 (10min), 1800000 (30min)
-BROWSERBASE_SESSION_TIMEOUT=600000
+# Custom session timeout in seconds (max 21600 = 6 hours) (default: project default)
+# Examples: 600 (10min), 1800 (30min), 21600 (6h max)
+BROWSERBASE_SESSION_TIMEOUT=1800
 
 # Inactivity timeout before auto-cleanup in seconds (default: 120)
 BROWSER_INACTIVITY_TIMEOUT=120
diff --git a/website/docs/user-guide/features/code-execution.md b/website/docs/user-guide/features/code-execution.md
index 804984cbfd3..f3beaa473f5 100644
--- a/website/docs/user-guide/features/code-execution.md
+++ b/website/docs/user-guide/features/code-execution.md
@@ -219,6 +219,62 @@ terminal:
 
 See the [Security guide](/user-guide/security#environment-variable-passthrough) for full details.
 
+### `HERMES_*` variables in the child
+
+The child process receives only a small, fixed set of operational `HERMES_*`
+variables by exact name:
+
+- `HERMES_HOME`
+- `HERMES_PROFILE`
+- `HERMES_CONFIG`
+- `HERMES_ENV`
+
+(plus `HERMES_RPC_DIR` / `HERMES_RPC_SOCKET` / `TZ` / `HOME`, which Hermes
+injects explicitly so the RPC channel works).
+
+:::note Behavior change
+Earlier versions passed **any** variable whose name began with `HERMES_`
+through to the child. That broad prefix was removed for security hardening: it
+could leak `HERMES_*`-named configuration that doesn't match a secret substring
+(for example `HERMES_BASE_URL`, `HERMES_KANBAN_DB`, or a `HERMES_*_WEBHOOK`
+endpoint) into arbitrary sandboxed code.
+
+If an `execute_code` script — or a repo/plugin module it imports at import time
+— relied on a `HERMES_*` variable outside the four operational names above, it
+will now find that variable **unset** in the child. The drop is intentional,
+not a bug.
+:::
+
+**Workaround — opt the variable back in explicitly.** Both routes pass the
+variable through `execute_code` *and* `terminal` children, and neither weakens
+the secret-stripping guarantee (Hermes-managed provider credentials can never
+be re-allowed this way):
+
+1. **Per-machine, in `config.yaml`** — add the exact variable name to the
+   passthrough allowlist:
+
+   ```yaml
+   terminal:
+     env_passthrough:
+       - HERMES_KANBAN_DB
+       - HERMES_BASE_URL
+   ```
+
+2. **Per-skill, in the skill's frontmatter** — declare it so it is registered
+   automatically whenever that skill is loaded:
+
+   ```yaml
+   required_environment_variables:
+     - HERMES_KANBAN_DB
+   ```
+
+**Diagnosing it.** When the child drops one or more non-allowlisted `HERMES_*`
+variables, Hermes emits a one-line `debug` log naming them and pointing at the
+`env_passthrough` escape hatch. Run with debug logging (`hermes logs --level
+DEBUG`, or check `~/.hermes/logs/agent.log`) and look for
+`execute_code: dropped N non-allowlisted HERMES_* var(s)` if a script behaves
+as though a `HERMES_*` variable is missing.
+
 Hermes always writes the script and the auto-generated `hermes_tools.py` RPC stub into a temp staging directory that is cleaned up after execution. In `strict` mode the script also *runs* there; in `project` mode it runs in the session's working directory (the staging directory stays on `PYTHONPATH` so imports still resolve). The child process runs in its own process group so it can be cleanly killed on timeout or interruption.
 
 ## execute_code vs terminal
diff --git a/website/docs/user-guide/features/codex-app-server-runtime.md b/website/docs/user-guide/features/codex-app-server-runtime.md
index 928b6d2d66b..3a96f604cc3 100644
--- a/website/docs/user-guide/features/codex-app-server-runtime.md
+++ b/website/docs/user-guide/features/codex-app-server-runtime.md
@@ -9,6 +9,10 @@ Hermes can optionally hand `openai/*` and `openai-codex/*` turns to the [Codex C
 
 This is **opt-in only**. Default Hermes behavior is unchanged unless you flip the flag. Hermes never auto-routes you onto this runtime.
 
+:::tip
+Not using OpenAI Codex? `hermes setup --portal` configures a non-Codex backend with Claude/Gemini/etc. in one step. See [Nous Portal](/integrations/nous-portal).
+:::
+
 ## Why
 
 - Run OpenAI agent turns against your **ChatGPT subscription** (no API key required) using the same auth flow Codex CLI uses.
diff --git a/website/docs/user-guide/features/credential-pools.md b/website/docs/user-guide/features/credential-pools.md
index 508feee5b69..80700057008 100644
--- a/website/docs/user-guide/features/credential-pools.md
+++ b/website/docs/user-guide/features/credential-pools.md
@@ -11,6 +11,10 @@ Credential pools let you register multiple API keys or OAuth tokens for the same
 
 This is different from [fallback providers](./fallback-providers.md), which switch to a *different* provider entirely. Credential pools are same-provider rotation; fallback providers are cross-provider failover. Pools are tried first — if all pool keys are exhausted, *then* the fallback provider activates.
 
+:::tip
+Credential pools are mainly for API-key providers (OpenRouter, Anthropic). A single [Nous Portal](/integrations/nous-portal) OAuth covers 300+ models, so most users don't need a pool when on Portal.
+:::
+
 ## How It Works
 
 ```
@@ -18,8 +22,11 @@ Your request
   → Pick key from pool (round_robin / least_used / fill_first / random)
   → Send to provider
   → 429 rate limit?
-      → Retry same key once (transient blip)
-      → Second 429 → rotate to next pool key
+      → Plan/usage limit reached (e.g. ChatGPT/Codex "usage limit reached")?
+          → Rotate to next pool key immediately (no retry — the cap won't clear on retry)
+      → Generic / transient 429?
+          → Retry same key once (transient blip)
+          → Second 429 → rotate to next pool key
       → All keys exhausted → fallback_model (different provider)
   → 402 billing error?
       → Immediately rotate to next pool key (24h cooldown)
diff --git a/website/docs/user-guide/features/cron.md b/website/docs/user-guide/features/cron.md
index 8b82e56150a..cbefde68a9e 100644
--- a/website/docs/user-guide/features/cron.md
+++ b/website/docs/user-guide/features/cron.md
@@ -21,6 +21,10 @@ Cron jobs can:
 
 All of this is available to Hermes itself through the `cronjob` tool, so you can create, pause, edit, and remove jobs by asking in plain language — no CLI required.
 
+:::tip
+Cron jobs use whatever provider `hermes model` selected. `hermes setup --portal` is the lowest-friction option for unattended runs since OAuth refresh is automatic. See [Nous Portal](/integrations/nous-portal).
+:::
+
 :::warning
 Cron-run sessions cannot recursively create more cron jobs. Hermes disables cron management tools inside cron executions to prevent runaway scheduling loops.
 :::
@@ -113,12 +117,12 @@ cronjob(
 When `workdir` is set:
 
 - `AGENTS.md`, `CLAUDE.md`, and `.cursorrules` from that directory are injected into the system prompt (same discovery order as the interactive CLI)
-- `terminal`, `read_file`, `write_file`, `patch`, `search_files`, and `execute_code` all use that directory as their working directory (via `TERMINAL_CWD`)
+- `terminal`, `read_file`, `write_file`, `patch`, `search_files`, and `execute_code` all use that directory as their working directory
 - The path must be an absolute directory that exists — relative paths and missing directories are rejected at create / update time
 - Pass `--workdir ""` (or `workdir=""` via the tool) on edit to clear it and restore the old behaviour
 
 :::note Serialization
-Jobs with a `workdir` run sequentially on the scheduler tick, not in the parallel pool. This is deliberate — `TERMINAL_CWD` is process-global, so two workdir jobs running at the same time would corrupt each other's cwd. Workdir-less jobs still run in parallel as before.
+Jobs with a `workdir` run sequentially on the scheduler tick, not in the parallel pool. This is deliberate: the cron worker applies the job workdir through process-global terminal state, so two workdir jobs running at the same time would corrupt each other's cwd. Workdir-less jobs still run in parallel as before.
 :::
 
 ## Running cron jobs in a specific profile
@@ -204,10 +208,11 @@ Cron jobs now have a fuller lifecycle than just create/remove.
 
 ```bash
 hermes cron list
-hermes cron pause <job_id>
-hermes cron resume <job_id>
-hermes cron run <job_id>
-hermes cron remove <job_id>
+hermes cron pause <job_id_or_name>
+hermes cron resume <job_id_or_name>
+hermes cron run <job_id_or_name>
+hermes cron remove <job_id_or_name>
+hermes cron edit <job_id_or_name> [...flags]
 hermes cron status
 hermes cron tick
 ```
@@ -218,6 +223,9 @@ What they do:
 - `resume` — re-enable the job and compute the next future run
 - `run` — trigger the job on the next scheduler tick
 - `remove` — delete it entirely
+- `edit` — modify schedule, prompt, profile, delivery, etc.
+
+**Name-based lookup.** All four mutating verbs (`pause`, `resume`, `run`, `remove`, `edit`) plus the agent's `cronjob` tool now accept a job **name** (case-insensitive) in place of the hex ID. The agent and CLI both prefer an exact ID match if one exists; ambiguous name matches (multiple jobs sharing the same name) are refused with the full list of candidate IDs so you can pick one explicitly. Names are not unique, so this guard is load-bearing — it prevents silently mutating the wrong job when two share a name.
 
 ## How it works
 
diff --git a/website/docs/user-guide/features/curator.md b/website/docs/user-guide/features/curator.md
index 56ec4046f68..3830a8f4f17 100644
--- a/website/docs/user-guide/features/curator.md
+++ b/website/docs/user-guide/features/curator.md
@@ -130,30 +130,45 @@ The same subcommands are available as the `/curator` slash command inside a runn
 
 ## What "agent-created" means
 
-A skill is considered agent-created if its name is **not** in:
+The curator only manages skills explicitly marked as **agent-created** in
+`~/.hermes/skills/.usage.json`. A skill qualifies when ALL of the following
+are true:
 
-- `~/.hermes/skills/.bundled_manifest` (skills copied from the repo on install), and
-- `~/.hermes/skills/.hub/lock.json` (skills installed via `hermes skills install`).
+1. Its name is **not** in `~/.hermes/skills/.bundled_manifest` (bundled skills shipped with the repo).
+2. Its name is **not** in `~/.hermes/skills/.hub/lock.json` (hub-installed skills).
+3. Its `.usage.json` entry has `"created_by": "agent"` or `"agent_created": true`.
 
-Everything else in `~/.hermes/skills/` is fair game for the curator. This includes:
+Currently, only the **background self-improvement review fork** sets this marker
+— when it creates a new umbrella skill during its periodic review pass (~every 10
+agent turns). The background fork runs with a write origin of `"background_review"`
+(via `tools/skill_provenance.py`), which is the only path that triggers the
+`mark_agent_created()` call in `skill_manage`.
 
-- Skills the agent saved via `skill_manage(action="create")` during a conversation.
-- Skills you created manually with a hand-written `SKILL.md`.
-- Skills added via external skill directories you've pointed Hermes at.
+Skills the foreground agent creates via `skill_manage(action="create")` during a
+conversation are **not** marked as agent-created — they are considered
+user-directed and the curator intentionally leaves them alone.
 
-:::warning Your hand-written skills look the same as agent-saved ones
-Provenance here is **binary** (bundled/hub vs. everything else). The curator cannot tell a hand-authored skill you rely on for private workflows apart from a skill the self-improvement loop saved mid-session. Both land in the "agent-created" bucket.
+:::warning Your hand-written skills are NOT curated
+If you manually created a `SKILL.md` or pointed Hermes at an external skill
+directory, that skill will have a `.usage.json` entry with `created_by: null`
+(or the field absent). The curator will not touch it. The same applies to
+skills the foreground agent created at your request.
 
-Before the first real pass (7 days after installation by default), take a moment to:
-
-1. Run `hermes curator run --dry-run` to see exactly what the curator would propose.
-2. Use `hermes curator pin <name>` to fence off anything you don't want touched.
-3. Or set `curator.enabled: false` in `config.yaml` if you'd rather manage the library yourself.
-
-Archives are always recoverable via `hermes curator restore <name>`, but it's easier to pin up-front than to chase down a consolidation after the fact.
+**To see which skills the curator actually manages**, run `hermes curator status`.
+If the agent-created count is 0, no skills are currently in the curator's
+jurisdiction — the LLM review pass is skipped and the report will show
+`Model: (not resolved) via (not resolved)` with `Duration: 0s`.
 :::
 
-If you want to protect a specific skill from ever being touched — for example a hand-authored skill you rely on — use `hermes curator pin <name>`. See the next section.
+Skills that ARE agent-created follow the full lifecycle:
+
+- `active` → (30d unused) `stale` → (90d unused) `archived`
+- Pinned skills bypass all auto-transitions
+- Archives are recoverable via `hermes curator restore <name>`
+
+If you want to protect a specific skill from ever being touched — for example a
+hand-authored skill you rely on — use `hermes curator pin <name>`. See the next
+section.
 
 ## Pinning a skill
 
@@ -217,6 +232,15 @@ Every curator run writes a timestamped directory under `~/.hermes/logs/curator/`
 
 `REPORT.md` is a quick way to see what a given run did — which skills transitioned, what the LLM reviewer said, which skills it patched. Good for auditing without having to grep `agent.log`.
 
+:::note No candidates? Report shows `(not resolved)`
+When the curator has **no agent-created skills** to review, the LLM review pass
+is skipped entirely. The report header will show
+`Model: (not resolved) via (not resolved)` with `Duration: 0s` — this does **not**
+indicate a configuration error or model resolution failure. It simply means there
+were no candidates, so no model was ever invoked. The auto-transition phase still
+runs and reports its counts normally.
+:::
+
 ### Rename map in the summary
 
 If a run consolidated multiple skills under an umbrella (or merged near-duplicates), the user-visible summary printed at the end of the run includes an explicit rename map showing every `old-name → new-name` pair the curator applied. This is in addition to per-skill transition lines, so when a wave of renames lands you can spot them at a glance without diffing the JSON report. The hint also surfaces under `hermes curator pin` so you can pin the umbrella name immediately if you want to lock the new label in.
diff --git a/website/docs/user-guide/features/deliverable-mode.md b/website/docs/user-guide/features/deliverable-mode.md
index e08e3966fa6..65df8b535cd 100644
--- a/website/docs/user-guide/features/deliverable-mode.md
+++ b/website/docs/user-guide/features/deliverable-mode.md
@@ -63,8 +63,10 @@ personality entry that biases toward artifact-style replies on
 messaging platforms.
 
 **Project-level:** add the bias to `AGENTS.md` / `CLAUDE.md` /
-`.cursorrules` in a project the agent works from, or to your global
-custom instructions in `~/.hermes/config.yaml` under `agent.custom_instructions`.
+`.cursorrules` in a project the agent works from, to your global
+persona in `~/.hermes/SOUL.md`, or as a named preset under
+`agent.personalities` in `~/.hermes/config.yaml` (switchable per session
+via `/personality`).
 
 The mechanic the agent has to use is simple: render the file to an
 absolute path (e.g. `/tmp/q3-revenue.png`) and mention that path as
diff --git a/website/docs/user-guide/features/extending-the-dashboard.md b/website/docs/user-guide/features/extending-the-dashboard.md
index 257bdb57701..0efbe8adb4c 100644
--- a/website/docs/user-guide/features/extending-the-dashboard.md
+++ b/website/docs/user-guide/features/extending-the-dashboard.md
@@ -17,7 +17,7 @@ All three are **drop-in at runtime**: no repo clone, no `npm run build`, no patc
 If you just want to use the dashboard, see [Web Dashboard](./web-dashboard). If you want to reskin the terminal CLI (not the web dashboard), see [Skins & Themes](./skins) — the CLI skin system is unrelated to dashboard themes.
 
 :::note How the pieces compose
-Themes and plugins are independent but synergistic. A theme can stand alone (just a YAML file). A plugin can stand alone (just a tab). Together they let you build a complete visual reskin with custom HUDs — the bundled `strike-freedom-cockpit` demo does exactly that. See [Combined theme + plugin demo](#combined-theme--plugin-demo).
+Themes and plugins are independent but synergistic. A theme can stand alone (just a YAML file). A plugin can stand alone (just a tab). Together they let you build a complete visual reskin with custom HUDs — the example `strike-freedom-cockpit` demo (lives in the `hermes-example-plugins` companion repo — see [Combined theme + plugin demo](#combined-theme--plugin-demo) for install steps) does exactly that.
 :::
 
 ---
@@ -470,7 +470,7 @@ Plugins use Lucide icon names. The dashboard maps these by name — unknown name
 
 Currently mapped: `Activity`, `BarChart3`, `Clock`, `Code`, `Database`, `Eye`, `FileText`, `Globe`, `Heart`, `KeyRound`, `MessageSquare`, `Package`, `Puzzle`, `Settings`, `Shield`, `Sparkles`, `Star`, `Terminal`, `Wrench`, `Zap`.
 
-Need a different icon? Open a PR to `apps/dashboard/src/App.tsx`'s `ICON_MAP` — pure additive change.
+Need a different icon? Open a PR to `web/src/App.tsx`'s `ICON_MAP` — pure additive change.
 
 ### The Plugin SDK
 
diff --git a/website/docs/user-guide/features/fallback-providers.md b/website/docs/user-guide/features/fallback-providers.md
index 0dc972e27a6..7eaab0ea8af 100644
--- a/website/docs/user-guide/features/fallback-providers.md
+++ b/website/docs/user-guide/features/fallback-providers.md
@@ -29,18 +29,18 @@ hermes fallback
 
 `hermes fallback` reuses the provider picker from `hermes model` — same provider list, same credential prompts, same validation. Use the subcommands `add`, `list` (alias `ls`), `remove` (alias `rm`), and `clear` to manage the chain. Changes persist under the top-level `fallback_providers:` list in `config.yaml`.
 
-If you'd rather edit the YAML directly, add a `fallback_model` section to `~/.hermes/config.yaml`:
+If you'd rather edit the YAML directly, add a top-level `fallback_providers` list to `~/.hermes/config.yaml`:
 
 ```yaml
-fallback_model:
-  provider: openrouter
-  model: anthropic/claude-sonnet-4
+fallback_providers:
+  - provider: openrouter
+    model: anthropic/claude-sonnet-4
 ```
 
-Both `provider` and `model` are **required**. If either is missing, the fallback is disabled.
+Each entry requires both `provider` and `model`. Entries missing either field are ignored.
 
 :::note `fallback_model` vs `fallback_providers`
-`fallback_model` (singular) is the legacy single-fallback key — Hermes still honors it for back-compat. `fallback_providers` (plural, list) supports multiple fallbacks tried in order; `hermes fallback` writes to this key. When both are set, Hermes merges them with `fallback_providers` taking priority.
+`fallback_providers` (plural, list) is the current config shape and supports multiple fallbacks tried in order. `fallback_model` (singular) is the legacy single-fallback key — Hermes still honors it for back-compat, but `hermes fallback` writes the current `fallback_providers` key and migrates legacy config on write. When both are set, `fallback_providers` takes priority.
 :::
 
 ### Supported Providers
@@ -90,11 +90,11 @@ Both `provider` and `model` are **required**. If either is missing, the fallback
 For a custom OpenAI-compatible endpoint, add `base_url` and optionally `key_env`:
 
 ```yaml
-fallback_model:
-  provider: custom
-  model: my-local-model
-  base_url: http://localhost:8000/v1
-  key_env: MY_LOCAL_KEY              # env var name containing the API key
+fallback_providers:
+  - provider: custom
+    model: my-local-model
+    base_url: http://localhost:8000/v1
+    key_env: MY_LOCAL_KEY            # env var name containing the API key
 ```
 
 ### When Fallback Triggers
@@ -128,9 +128,9 @@ model:
   provider: anthropic
   default: claude-sonnet-4-6
 
-fallback_model:
-  provider: openrouter
-  model: anthropic/claude-sonnet-4
+fallback_providers:
+  - provider: openrouter
+    model: anthropic/claude-sonnet-4
 ```
 
 **Nous Portal as fallback for OpenRouter:**
@@ -139,25 +139,25 @@ model:
   provider: openrouter
   default: anthropic/claude-opus-4
 
-fallback_model:
-  provider: nous
-  model: nous-hermes-3
+fallback_providers:
+  - provider: nous
+    model: nous-hermes-3
 ```
 
 **Local model as fallback for cloud:**
 ```yaml
-fallback_model:
-  provider: custom
-  model: llama-3.1-70b
-  base_url: http://localhost:8000/v1
-  key_env: LOCAL_API_KEY
+fallback_providers:
+  - provider: custom
+    model: llama-3.1-70b
+    base_url: http://localhost:8000/v1
+    key_env: LOCAL_API_KEY
 ```
 
 **Codex OAuth as fallback:**
 ```yaml
-fallback_model:
-  provider: openai-codex
-  model: gpt-5.3-codex
+fallback_providers:
+  - provider: openai-codex
+    model: gpt-5.3-codex
 ```
 
 ### Where Fallback Works
@@ -166,12 +166,12 @@ fallback_model:
 |---------|-------------------|
 | CLI sessions | ✔ |
 | Messaging gateway (Telegram, Discord, etc.) | ✔ |
-| Subagent delegation | ✘ (subagents do not inherit fallback config) |
-| Cron jobs | ✘ (run with a fixed provider) |
+| Subagent delegation | ✔ (subagents inherit the parent fallback chain) |
+| Cron jobs | ✔ (cron agents inherit configured fallback providers) |
 | Auxiliary tasks (vision, compression) | ✘ (use their own provider chain — see below) |
 
 :::tip
-There are no environment variables for `fallback_model` — it is configured exclusively through `config.yaml`. This is intentional: fallback configuration is a deliberate choice, not something a stale shell export should override.
+There are no environment variables for the primary fallback chain — configure it exclusively through `config.yaml` or `hermes fallback`. This is intentional: fallback configuration is a deliberate choice, not something a stale shell export should override.
 :::
 
 ---
@@ -252,20 +252,20 @@ auxiliary:
     base_url: null                                    # Custom OpenAI-compatible endpoint
 ```
 
-And the fallback model uses:
+And the primary fallback chain uses:
 
 ```yaml
-fallback_model:
-  provider: openrouter
-  model: anthropic/claude-sonnet-4
-  # base_url: http://localhost:8000/v1               # Optional custom endpoint
+fallback_providers:
+  - provider: openrouter
+    model: anthropic/claude-sonnet-4
+    # base_url: http://localhost:8000/v1             # Optional custom endpoint
 ```
 
 All three — auxiliary, compression, fallback — work the same way: set `provider` to pick who handles the request, `model` to pick which model, and `base_url` to point at a custom endpoint (overrides provider).
 
 ### Provider Options for Auxiliary Tasks
 
-These options apply to `auxiliary:`, `compression:`, and `fallback_model:` configs only — `"main"` is **not** a valid value for your top-level `model.provider`. For custom endpoints, use `provider: custom` in your `model:` section (see [AI Providers](/integrations/providers)).
+These options apply to `auxiliary:`, `compression:`, and `fallback_providers:` entries only — `"main"` is **not** a valid value for your top-level `model.provider`. For custom endpoints, use `provider: custom` in your `model:` section (see [AI Providers](/integrations/providers)).
 
 | Provider | Description | Requirements |
 |----------|-------------|-------------|
@@ -362,7 +362,7 @@ If no provider is available for compression, Hermes drops middle conversation tu
 
 ## Delegation Provider Override
 
-Subagents spawned by `delegate_task` do **not** use the primary fallback model. However, they can be routed to a different provider:model pair for cost optimization:
+Subagents spawned by `delegate_task` inherit the parent agent's primary fallback chain. You can still route subagents to a different primary provider:model pair for cost optimization:
 
 ```yaml
 delegation:
@@ -378,7 +378,7 @@ See [Subagent Delegation](/user-guide/features/delegation) for full configuratio
 
 ## Cron Job Providers
 
-Cron jobs run with whatever provider is configured at execution time. They do not support a fallback model. To use a different provider for cron jobs, configure `provider` and `model` overrides on the cron job itself:
+Cron jobs inherit your configured `fallback_providers` chain (or legacy `fallback_model`) when they create an agent. To use a different primary provider for a cron job, configure `provider` and `model` overrides on the cron job itself:
 
 ```python
 cronjob(
@@ -398,7 +398,7 @@ See [Scheduled Tasks (Cron)](/user-guide/features/cron) for full configuration d
 
 | Feature | Fallback Mechanism | Config Location |
 |---------|-------------------|----------------|
-| Main agent model | `fallback_model` in config.yaml — per-turn failover on errors (primary restored each turn) | `fallback_model:` (top-level) |
+| Main agent model | `fallback_providers` in config.yaml — per-turn failover on errors (primary restored each turn) | `fallback_providers:` (top-level list) |
 | Auxiliary tasks (any) — auto users | Full auto-detection chain (main agent model first, then provider chain) on capacity errors | `auxiliary.<task>.provider: auto` |
 | Auxiliary tasks (any) — explicit provider | `fallback_chain` (if set) → main agent model → warn + raise, on capacity errors only | `auxiliary.<task>.fallback_chain` |
 | Vision | Layered (see above) + internal OpenRouter retry | `auxiliary.vision` |
diff --git a/website/docs/user-guide/features/image-generation.md b/website/docs/user-guide/features/image-generation.md
index 73fa4b334fc..4f225ee00b1 100644
--- a/website/docs/user-guide/features/image-generation.md
+++ b/website/docs/user-guide/features/image-generation.md
@@ -1,13 +1,13 @@
 ---
 title: Image Generation
-description: Generate images via FAL.ai — 9 models including FLUX 2, GPT Image (1.5 & 2), Nano Banana Pro, Ideogram, Recraft V4 Pro, and more, selectable via `hermes tools`.
+description: Generate images via FAL.ai — 11 models including FLUX 2, GPT Image (1.5 & 2), Nano Banana Pro, Ideogram, Recraft V4 Pro, Krea 2, and more, selectable via `hermes tools`.
 sidebar_label: Image Generation
 sidebar_position: 6
 ---
 
 # Image Generation
 
-Hermes Agent generates images from text prompts via FAL.ai. Nine models are supported out of the box, each with different speed, quality, and cost tradeoffs. The active model is user-configurable via `hermes tools` and persists in `config.yaml`.
+Hermes Agent generates images from text prompts via FAL.ai. Eleven models are supported out of the box, each with different speed, quality, and cost tradeoffs. The active model is user-configurable via `hermes tools` and persists in `config.yaml`.
 
 ## Supported Models
 
@@ -22,6 +22,8 @@ Hermes Agent generates images from text prompts via FAL.ai. Nine models are supp
 | `fal-ai/ideogram/v3` | ~5s | Best typography | $0.03–0.09/image |
 | `fal-ai/recraft/v4/pro/text-to-image` | ~8s | Design, brand systems, production-ready | $0.25/image |
 | `fal-ai/qwen-image` | ~12s | LLM-based, complex text | $0.02/MP |
+| `fal-ai/krea/v2/medium/text-to-image` | ~15-25s | Illustration, anime, painting, expressive/artistic styles | $0.030–0.035/image |
+| `fal-ai/krea/v2/large/text-to-image` | ~25-60s | Photorealism, raw textured looks (motion blur, grain, film) | $0.060–0.065/image |
 
 Prices are FAL's pricing at time of writing; check [fal.ai](https://fal.ai/) for current numbers.
 
diff --git a/website/docs/user-guide/features/kanban.md b/website/docs/user-guide/features/kanban.md
index 7a51957828d..ede083b0590 100644
--- a/website/docs/user-guide/features/kanban.md
+++ b/website/docs/user-guide/features/kanban.md
@@ -604,7 +604,10 @@ hermes kanban create "<title>" [--body ...] [--assignee <profile>]
                                 [--max-retries N]
                                 [--skill <name>]...
                                 [--json]
-hermes kanban list [--mine] [--assignee P] [--status S] [--tenant T] [--archived] [--json]
+hermes kanban list [--mine] [--assignee P] [--status S] [--tenant T] [--archived]
+        [--workflow-template-id <id>] [--current-step-key <key>]
+        [--sort created|created-desc|priority|priority-desc|status|assignee|title|updated]
+        [--json]
 hermes kanban show <id> [--json]
 hermes kanban assign <id> <profile>                    # or 'none' to unassign
 hermes kanban link <parent_id> <child_id>
@@ -646,6 +649,62 @@ All commands are also available as a slash command in the interactive CLI and in
 
 `--max-retries` is a per-task circuit-breaker override for the dispatcher. `--max-retries 1` blocks the task on the first non-successful attempt, while `--max-retries 3` allows two retries and blocks on the third failure. Omit it to use `kanban.failure_limit` from `config.yaml`, then the built-in default.
 
+### Concurrency, scheduling, and child promotion config
+
+| Config key | Default | What it does |
+|------------|---------|--------------|
+| `kanban.max_in_progress` | unset (unlimited) | Caps the number of simultaneously running tasks. When the board already has N running, the dispatcher skips spawning more — useful for slow workers (local LLMs, resource-constrained hosts) so they finish what they have before more pile up and time out. Invalid or below-1 values log a warning and behave as unlimited. |
+| `kanban.auto_promote_children` | `true` | After `decompose_triage_task()` produces children with no parent-blocker dependencies, they're automatically promoted to `ready` so the dispatcher can pick them up. Set to `false` to require manual review — children stay in `todo` until you promote them. |
+| `kanban.default_workdir` | unset | Board-level default working directory applied to new tasks when neither `--workspace` nor the task itself overrides it. Per-task `workspace:` still wins. |
+
+```yaml
+kanban:
+  max_in_progress: 2
+  auto_promote_children: false
+  default_workdir: ~/work/active-project
+```
+
+### Scheduled task starts (`scheduled_at`)
+
+Set `scheduled_at` on a task to delay dispatch until a specific time. The dispatcher skips ready tasks whose `scheduled_at` is in the future and picks them up on the first tick after that timestamp.
+
+```bash
+hermes kanban create "nightly backup audit" \
+  --assignee ops --scheduled-at "2026-06-01T03:00:00Z"
+```
+
+### Respawn guard
+
+The dispatcher refuses to re-spawn a ready task when it hit a quota/auth/429 error on the previous run (`blocker_auth`), or completed a run successfully within the guard window (`recent_success`), or a recent task comment links to a GitHub PR (`active_pr`). This prevents repeat worker storms on the same bug or task while a human catches up. See the `respawn_guarded` row in the [event reference](#event-reference).
+
+### Drag-to-delete and bulk delete (dashboard)
+
+The dashboard exposes a **trash drop zone** on the kanban page — drag any card into it to delete the task (cascades through `task_events`, child links, and subscriptions). A confirmation prompt protects against accidents. Bulk delete is also reachable via `DELETE /api/plugins/kanban/tasks` with a JSON body `{"ids": ["t_abc", "t_def", ...]}`.
+
+### Worker visibility endpoints
+
+The dashboard plugin API now exposes three read-only endpoints for external monitors:
+
+| Endpoint | Returns |
+|----------|---------|
+| `GET /api/plugins/kanban/workers/active` | Currently spawned workers with PID, profile, task id, started-at, last heartbeat |
+| `GET /api/plugins/kanban/runs/{id}` | Single-run detail — task id, status, started/ended, exit code, log path |
+| `GET /api/plugins/kanban/inspect` | Combined dispatcher snapshot — backlog, in-progress count vs. `max_in_progress`, recent events |
+
+All three are gated by the same dashboard plugin auth as the rest of the kanban plugin API.
+
+### Kanban Swarm topology helper
+
+`hermes kanban swarm` creates a durable **Kanban Swarm v1** graph in one shot: a completed root/blackboard card, N parallel worker cards, a verifier card gated on all workers, and a synthesizer card gated on the verifier. Shared swarm context (the "blackboard") is stored as structured JSON comments on the root card so any worker can read it.
+
+```bash
+hermes kanban swarm "Design a multi-region failover plan" \
+  --workers researcher,architect,sre \
+  --verifier reviewer --synthesizer writer
+```
+
+The resulting graph dispatches normally — workers run in parallel, the verifier wakes after they all finish, the synthesizer wakes after the verifier marks the work clean.
+
 ## `/kanban` slash command {#kanban-slash-command}
 
 Every `hermes kanban <action>` verb is also reachable as `/kanban <action>` — from inside an interactive `hermes chat` session **and** from any gateway platform (Telegram, Discord, Slack, WhatsApp, Signal, Matrix, Mattermost, email, SMS). Both surfaces call the exact same `hermes_cli.kanban.run_slash()` entry point that reuses the `hermes kanban` argparse tree, so the argument surface, flags, and output format are identical across CLI, `/kanban`, and `hermes kanban`. You don't have to leave the chat to drive the board.
diff --git a/website/docs/user-guide/features/mcp.md b/website/docs/user-guide/features/mcp.md
index 071a97c3194..c2232f11c1a 100644
--- a/website/docs/user-guide/features/mcp.md
+++ b/website/docs/user-guide/features/mcp.md
@@ -229,6 +229,20 @@ On first connect, Hermes prints an authorize URL, opens your browser when possib
 
 See [OAuth over SSH / Remote Hosts](../../guides/oauth-over-ssh.md#mcp-servers) for the full walkthrough, including DCR-less servers (e.g. Slack), pre-registered `client_id`/`client_secret`, scope customization, and re-auth via `hermes mcp login <server>`.
 
+**Pitfall — providers that don't support automatic registration (Google Drive, Atlassian).** Some servers reject the dynamic client registration step (RFC 7591) that bare `auth: oauth` relies on — Google's official Drive server (`https://drivemcp.googleapis.com/mcp/v1`) returns a `400 Bad Request`, so no OAuth client is created and no token is acquired. The symptom is subtle: these servers also serve `tools/list` *without* auth, so `hermes mcp login` can list the tools and look like it worked, but every real tool call later times out. `hermes mcp login` now detects this (it checks that a token actually landed on disk) and tells you to supply your own OAuth client. Create one in the provider's console and add it to config:
+
+```yaml
+mcp_servers:
+  googledrive:
+    url: "https://drivemcp.googleapis.com/mcp/v1"
+    auth: oauth
+    oauth:
+      client_id: "<your-oauth-client-id>"
+      client_secret: "<your-oauth-client-secret>"
+```
+
+Then run `hermes mcp login googledrive` — with the pre-registered client, Hermes skips registration and runs the normal browser authorization flow.
+
 **Pitfall — config auto-reload race.** When you edit `~/.hermes/config.yaml` from inside a running Hermes session, the CLI auto-reloads MCP connections with a 30s timeout. That's not enough for an interactive OAuth flow. Add the entry, then run `hermes mcp login <server>` from a fresh terminal — it waits the full 5 minutes for you to complete auth.
 
 ## Basic configuration reference
diff --git a/website/docs/user-guide/features/memory-providers.md b/website/docs/user-guide/features/memory-providers.md
index 91d4f5bba60..f584c7288a8 100644
--- a/website/docs/user-guide/features/memory-providers.md
+++ b/website/docs/user-guide/features/memory-providers.md
@@ -68,7 +68,7 @@ hermes memory setup        # select "honcho" — runs the Honcho-specific post-s
 
 The legacy `hermes honcho setup` command still works (it now redirects to `hermes memory setup`), but is only registered after Honcho is selected as the active memory provider.
 
-**Config:** `$HERMES_HOME/honcho.json` (profile-local) or `~/.honcho/config.json` (global). Resolution order: `$HERMES_HOME/honcho.json` > `~/.hermes/honcho.json` > `~/.honcho/config.json`. See the [config reference](https://github.com/hermes-ai/hermes-agent/blob/main/plugins/memory/honcho/README.md) and the [Honcho integration guide](https://docs.honcho.dev/v3/guides/integrations/hermes).
+**Config:** `$HERMES_HOME/honcho.json` (profile-local) or `~/.honcho/config.json` (global). Resolution order: `$HERMES_HOME/honcho.json` > `~/.hermes/honcho.json` > `~/.honcho/config.json`. See the [config reference](https://github.com/NousResearch/hermes-agent/blob/main/plugins/memory/honcho/README.md) and the [Honcho integration guide](https://docs.honcho.dev/v3/guides/integrations/hermes).
 
 <details>
 <summary>Full config reference</summary>
@@ -255,7 +255,7 @@ See the [Honcho page](./honcho.md#observation-directional-vs-unified) for the fu
 
 </details>
 
-See the [config reference](https://github.com/hermes-ai/hermes-agent/blob/main/plugins/memory/honcho/README.md) and [Honcho integration guide](https://docs.honcho.dev/v3/guides/integrations/hermes).
+See the [config reference](https://github.com/NousResearch/hermes-agent/blob/main/plugins/memory/honcho/README.md) and [Honcho integration guide](https://docs.honcho.dev/v3/guides/integrations/hermes).
 
 
 ---
@@ -520,6 +520,27 @@ echo 'SUPERMEMORY_API_KEY=***' >> ~/.hermes/.env
 
 **Support:** [Discord](https://supermemory.link/discord) · [support@supermemory.com](mailto:support@supermemory.com)
 
+### Memori
+
+Structured long-term memory using Memori Cloud, with background completed-turn capture, tool-aware turn context, and explicit recall tools for facts, summaries, quota, signup, and feedback.
+
+| | |
+|---|---|
+| **Best for** | Agent-controlled recall with structured project and session attribution |
+| **Requires** | `pip install hermes-memori` + `hermes-memori install` + [Memori API key](https://app.memorilabs.ai/signup) |
+| **Data storage** | Memori Cloud |
+| **Cost** | Memori pricing |
+
+**Tools:** `memori_recall` (search long-term memory), `memori_recall_summary` (summarized context), `memori_quota` (usage/quota), `memori_signup` (request signup email), `memori_feedback` (send integration feedback)
+
+**Setup:**
+```bash
+pip install hermes-memori
+hermes-memori install
+hermes config set memory.provider memori
+hermes memory setup
+```
+
 ---
 
 ## Provider Comparison
@@ -534,6 +555,7 @@ echo 'SUPERMEMORY_API_KEY=***' >> ~/.hermes/.env
 | **RetainDB** | Cloud | $20/mo | 5 | `requests` | Delta compression |
 | **ByteRover** | Local/Cloud | Free/Paid | 3 | `brv` CLI | Pre-compression extraction |
 | **Supermemory** | Cloud | Paid | 4 | `supermemory` | Context fencing + session graph ingest + multi-container |
+| **Memori** | Cloud | Free/Paid | 5 | `hermes-memori` | Tool-aware memory + structured recall |
 
 ## Profile Isolation
 
diff --git a/website/docs/user-guide/features/overview.md b/website/docs/user-guide/features/overview.md
index 5ad06641540..5f6c04f5ca8 100644
--- a/website/docs/user-guide/features/overview.md
+++ b/website/docs/user-guide/features/overview.md
@@ -8,6 +8,10 @@ sidebar_position: 1
 
 Hermes Agent includes a rich set of capabilities that extend far beyond basic chat. From persistent memory and file-aware context to browser automation and voice conversations, these features work together to make Hermes a powerful autonomous assistant.
 
+:::tip Don't know where to start?
+`hermes setup --portal` covers a model provider plus all four Tool Gateway tools (web search, image generation, TTS, browser) in one command. See [Nous Portal](/integrations/nous-portal).
+:::
+
 ## Core
 
 - **[Tools & Toolsets](tools.md)** — Tools are functions that extend the agent's capabilities. They're organized into logical toolsets that can be enabled or disabled per platform, covering web search, terminal execution, file editing, memory, delegation, and more.
@@ -43,7 +47,7 @@ Hermes Agent includes a rich set of capabilities that extend far beyond basic ch
 - **[Memory Providers](memory-providers.md)** — Plug in external memory backends (Honcho, OpenViking, Mem0, Hindsight, Holographic, RetainDB, ByteRover, Supermemory) for cross-session user modeling and personalization beyond the built-in memory system.
 - **[API Server](api-server.md)** — Expose Hermes as an OpenAI-compatible HTTP endpoint. Connect any frontend that speaks the OpenAI format — Open WebUI, LobeChat, LibreChat, and more.
 - **[IDE Integration (ACP)](acp.md)** — Use Hermes inside ACP-compatible editors such as VS Code, Zed, and JetBrains. Chat, tool activity, file diffs, and terminal commands render inside your editor.
-- **[RL Training](rl-training.md)** — Generate trajectory data from agent sessions for reinforcement learning and model fine-tuning.
+- **[Batch Processing](batch-processing.md)** — Run the agent over many prompts or tasks in parallel from the CLI, with structured outputs and trajectory capture suitable for evals or downstream training pipelines.
 
 ## Customization
 
diff --git a/website/docs/user-guide/features/provider-routing.md b/website/docs/user-guide/features/provider-routing.md
index 6da57a58e5b..3dd6e69787e 100644
--- a/website/docs/user-guide/features/provider-routing.md
+++ b/website/docs/user-guide/features/provider-routing.md
@@ -11,6 +11,10 @@ When using [OpenRouter](https://openrouter.ai) as your LLM provider, Hermes Agen
 
 OpenRouter routes requests to many providers (e.g., Anthropic, Google, AWS Bedrock, Together AI). Provider routing lets you optimize for cost, speed, quality, or enforce specific provider requirements.
 
+:::tip
+Traffic routed through [Nous Portal](/integrations/nous-portal) still respects per-model routing and priority configs — and Portal subscribers get 10% off token-billed providers.
+:::
+
 ## Configuration
 
 Add a `provider_routing` section to your `~/.hermes/config.yaml`:
diff --git a/website/docs/user-guide/features/skills.md b/website/docs/user-guide/features/skills.md
index c58dbb391cd..25376afcf51 100644
--- a/website/docs/user-guide/features/skills.md
+++ b/website/docs/user-guide/features/skills.md
@@ -411,7 +411,7 @@ hermes skills tap add myorg/skills-repo           # Add a custom GitHub source
 | `well-known` | `well-known:https://mintlify.com/docs/.well-known/skills/mintlify` | Skills served directly from `/.well-known/skills/index.json` on a website. Search using the site or docs URL. |
 | `url` | `https://sharethis.chat/SKILL.md` | Direct HTTP(S) URL to a single-file `SKILL.md`. Name resolution: frontmatter → URL slug → interactive prompt → `--name` flag. |
 | `github` | `openai/skills/k8s` | Direct GitHub repo/path installs and custom taps. |
-| `clawhub`, `lobehub`, `browse-sh`, `claude-marketplace` | Source-specific identifiers | Community or marketplace integrations. |
+| `clawhub`, `lobehub`, `browse-sh` | Source-specific identifiers | Community or marketplace integrations. |
 
 ### Integrated hubs and registries
 
@@ -419,7 +419,7 @@ Hermes currently integrates with these skills ecosystems and discovery sources:
 
 #### 1. Official optional skills (`official`)
 
-These are maintained in the Hermes repository itself and install with builtin trust.
+These are maintained in the Hermes repository itself and install with built-in trust.
 
 - Catalog: [Official Optional Skills Catalog](../../reference/optional-skills-catalog)
 - Source in repo: `optional-skills/`
@@ -467,6 +467,7 @@ Default taps (browsable without any setup):
 - [openai/skills](https://github.com/openai/skills)
 - [anthropics/skills](https://github.com/anthropics/skills)
 - [huggingface/skills](https://github.com/huggingface/skills)
+- [NVIDIA/skills](https://github.com/NVIDIA/skills) — NVIDIA-verified skills (signed `skill.oms.sig` + governance `skill-card.md`)
 - [garrytan/gstack](https://github.com/garrytan/gstack)
 
 - Example:
@@ -476,6 +477,25 @@ hermes skills install openai/skills/k8s
 hermes skills tap add myorg/skills-repo
 ```
 
+**Category groupings (`skills.sh.json`).** A GitHub tap may ship a
+`skills.sh.json` file at its repo root following the
+[skills.sh schema](https://skills.sh/schemas/skills.sh.schema.json). Its
+`groupings` (each with a `title` and a list of skill names) are read at index
+time and become the category labels shown in the
+[Skills Hub](https://hermes-agent.nousresearch.com/docs) page — instead of a
+tag-derived guess. This is generic: any tap that ships the file gets real
+categorization, no Hermes-side changes required.
+
+```json
+{
+  "$schema": "https://skills.sh/schemas/skills.sh.schema.json",
+  "groupings": [
+    { "title": "Inference AI", "skills": ["dynamo-recipe-runner", "dynamo-router-sla"] },
+    { "title": "Decision Optimization", "skills": ["cuopt-developer", "cuopt-install"] }
+  ]
+}
+```
+
 #### 5. ClawHub (`clawhub`)
 
 A third-party skills marketplace integrated as a community source.
@@ -569,15 +589,15 @@ hermes skills install skills-sh/anthropics/skills/pdf --force
 Important behavior:
 - `--force` can override policy blocks for caution/warn-style findings.
 - `--force` does **not** override a `dangerous` scan verdict.
-- Official optional skills (`official/...`) are treated as builtin trust and do not show the third-party warning panel.
+- Official optional skills (`official/...`) are treated as built-in trust and do not show the third-party warning panel.
 
 ### Trust levels
 
 | Level | Source | Policy |
 |-------|--------|--------|
 | `builtin` | Ships with Hermes | Always trusted |
-| `official` | `optional-skills/` in the repo | Builtin trust, no third-party warning |
-| `trusted` | Trusted registries/repos such as `openai/skills`, `anthropics/skills`, `huggingface/skills` | More permissive policy than community sources |
+| `official` | `optional-skills/` in the repo | Built-in trust, no third-party warning |
+| `trusted` | Trusted registries/repos such as `openai/skills`, `anthropics/skills`, `huggingface/skills`, `NVIDIA/skills` | More permissive policy than community sources |
 | `community` | Everything else (`skills.sh`, well-known endpoints, custom GitHub repos, most marketplaces) | Non-dangerous findings can be overridden with `--force`; `dangerous` verdicts stay blocked |
 
 ### Update lifecycle
diff --git a/website/docs/user-guide/features/subscription-proxy.md b/website/docs/user-guide/features/subscription-proxy.md
index 0625ba45b32..5aa4cfeabb6 100644
--- a/website/docs/user-guide/features/subscription-proxy.md
+++ b/website/docs/user-guide/features/subscription-proxy.md
@@ -72,9 +72,9 @@ automatically when the bearer approaches expiry.
 hermes proxy providers
 ```
 
-Currently shipped: `nous` (Nous Portal). More OAuth providers can be
-added by implementing the `UpstreamAdapter` interface in
-`hermes_cli/proxy/adapters/`.
+Currently shipped: `nous` (Nous Portal) and `xai` (xAI / Grok). More
+OAuth providers can be added by implementing the `UpstreamAdapter`
+interface in `hermes_cli/proxy/adapters/`.
 
 ## Check status
 
diff --git a/website/docs/user-guide/features/tool-search.md b/website/docs/user-guide/features/tool-search.md
new file mode 100644
index 00000000000..fb65ad29be3
--- /dev/null
+++ b/website/docs/user-guide/features/tool-search.md
@@ -0,0 +1,159 @@
+---
+title: Tool Search
+sidebar_position: 95
+---
+
+# Tool Search
+
+When you have many MCP servers or non-core plugin tools attached to a
+session, their JSON schemas can consume a substantial fraction of the
+context window on every turn — even when only a few of them are relevant
+to what the user actually asked for.
+
+**Tool Search** is Hermes' opt-in progressive-disclosure layer for that
+problem. When activated, MCP and plugin tools are replaced in the
+model-visible tools array by three bridge tools, and the model loads each
+specific tool's schema on demand.
+
+:::info Built-in Hermes tools never defer
+The tools that make up Hermes' core capability set (`terminal`,
+`read_file`, `write_file`, `patch`, `search_files`, `todo`, `memory`,
+`browser_*`, `web_search`, `web_extract`, `clarify`, `execute_code`,
+`delegate_task`, `session_search`, `send_message`, and the rest of
+`_HERMES_CORE_TOOLS`) are *always* loaded directly. Only MCP tools and
+non-core plugin tools are eligible for deferral.
+:::
+
+## How it works
+
+When Tool Search activates for a turn, the model sees three new tools in
+place of the deferred ones:
+
+```
+tool_search(query, limit?)     — search the deferred-tool catalog
+tool_describe(name)            — load the full schema for one tool
+tool_call(name, arguments)     — invoke a deferred tool
+```
+
+A typical interaction looks like:
+
+```
+Model: tool_search("create a github issue")
+  → { matches: [{ name: "mcp_github_create_issue", ... }, ...] }
+Model: tool_describe("mcp_github_create_issue")
+  → { parameters: { type: "object", properties: { ... } } }
+Model: tool_call("mcp_github_create_issue", { title: "...", body: "..." })
+  → { ok: true, issue_number: 42 }
+```
+
+When the model invokes `tool_call`, Hermes **unwraps the bridge** and
+dispatches the underlying tool exactly as if the model had called it
+directly. Pre-tool-call hooks, guardrails, approval prompts, and
+post-tool-call hooks all run against the real tool name — not against
+`tool_call`. The activity feed in the CLI and gateway also unwraps so you
+see the underlying tool, not the bridge.
+
+## When does it activate?
+
+By default Tool Search runs in `auto` mode: it activates only when the
+deferrable tool schemas would consume at least 10% of the active model's
+context window. Below that, the tools-array assembly is a pure
+pass-through and you pay no overhead.
+
+This decision is re-evaluated every time the tools array is built, so:
+
+- A session with just a few MCP tools and a long context model never
+  activates Tool Search.
+- A session with many MCP servers attached (15+ tools typically) starts
+  activating it.
+- Removing MCP servers mid-session correctly returns to direct exposure
+  on the next assembly.
+
+## Configuration
+
+```yaml
+tools:
+  tool_search:
+    enabled: auto       # auto (default), on, or off
+    threshold_pct: 10   # percentage of context — only used in auto mode
+    search_default_limit: 5
+    max_search_limit: 20
+```
+
+| Key | Default | Meaning |
+| --- | --- | --- |
+| `enabled` | `auto` | `auto` activates above threshold; `on` always activates if there's at least one deferrable tool; `off` disables entirely. |
+| `threshold_pct` | `10` | Percentage of context length at which `auto` mode kicks in. Range 0–100. |
+| `search_default_limit` | `5` | Hits returned when the model calls `tool_search` without a `limit`. |
+| `max_search_limit` | `20` | Hard upper bound the model can request via `limit`. Range 1–50. |
+
+You can also flip the legacy boolean shape:
+
+```yaml
+tools:
+  tool_search: true   # equivalent to {enabled: auto}
+```
+
+## When NOT to use it
+
+Tool Search trades a fixed per-turn token cost (the three bridge tool
+schemas, ~300 tokens) and at least one extra round trip (search →
+describe → call) for the savings on the deferred schemas. It's a clear
+win when you have many tools and use few per turn; it's overhead when
+you have few tools total.
+
+The `auto` default handles this for you. If you set `enabled: on`
+unconditionally, expect a slight per-turn cost on small toolsets.
+
+## Trade-offs that don't go away
+
+These come from the prompt-cache integrity invariant — they are inherent
+to any progressive-disclosure design, not specific to this implementation:
+
+- **One extra round trip on cold tools.** The first time the model needs
+  a deferred tool, it spends one or two extra model calls to find and
+  load the schema. The token savings on the static side are real, but a
+  portion is paid back at runtime.
+- **No cache benefit on deferred schemas.** A loaded `tool_describe`
+  result enters the conversation history (so it does get cached on
+  subsequent turns) but it never benefits from the system-prompt cache
+  prefix.
+- **Model-quality dependence.** Tool Search assumes the model can write a
+  reasonable search query for the tool it wants. Smaller models do this
+  less well; the published Anthropic numbers (49% → 74% on Opus 4 with
+  vs. without tool search) show the upside but also that ~26 points of
+  accuracy is still retrieval failure.
+- **Toolset edits invalidate cache.** Adding or removing a tool mid-
+  session changes the bridge tools' descriptions (which include the
+  count of deferred tools) and the catalog, so the prompt cache is
+  invalidated. This is the same trade-off as any toolset edit.
+
+## Implementation details
+
+- **Retrieval:** BM25 over tokenized tool name + description + parameter
+  names. Falls back to a literal substring match on the tool name when
+  BM25 returns no positive-score hits, which protects against
+  zero-IDF degenerate cases (e.g. searching `"github"` against a
+  catalog where every tool name contains "github").
+- **Catalog is stateless across turns.** It rebuilds from the current
+  tool-defs list every assembly — no session-keyed `Map`. This avoids
+  the class of bug where a stored catalog drifts out of sync with the
+  live tool registry.
+- **The catalog is scoped to the session's toolsets.** `tool_search`,
+  `tool_describe`, and `tool_call` only ever see and invoke tools the
+  session was actually granted. A subagent, kanban worker, or gateway
+  session restricted to a subset of toolsets cannot use the bridge to
+  discover or call a tool outside that subset — the deferred catalog is
+  the deferrable slice of the session's own enabled/disabled toolsets,
+  not the whole process registry.
+- **No JS sandbox.** Hermes uses the simpler "structured tools" mode
+  (search / describe / call as plain functions). The JS-sandbox "code
+  mode" some other implementations offer is a large surface area; we
+  skip it.
+
+## See also
+
+- `tools/tool_search.py` — the implementation
+- `tests/tools/test_tool_search.py` — the regression suite
+- The `openclaw-tool-search-report` PDF in the original implementation
+  PR for the research that shaped the design
diff --git a/website/docs/user-guide/features/tts.md b/website/docs/user-guide/features/tts.md
index fa879cac17f..96c33d745b9 100644
--- a/website/docs/user-guide/features/tts.md
+++ b/website/docs/user-guide/features/tts.md
@@ -113,6 +113,7 @@ Each provider has a documented per-request input-character cap. Hermes truncates
 | ElevenLabs | Model-aware (see below) |
 | NeuTTS | 2000 |
 | KittenTTS | 2000 |
+| Piper | 5000 |
 
 **ElevenLabs** picks a cap from the configured `model_id`:
 
diff --git a/website/docs/user-guide/features/vision.md b/website/docs/user-guide/features/vision.md
index efe1a344ab2..44352af392d 100644
--- a/website/docs/user-guide/features/vision.md
+++ b/website/docs/user-guide/features/vision.md
@@ -9,6 +9,10 @@ sidebar_position: 7
 
 Hermes Agent supports **multimodal vision** — you can paste images from your clipboard directly into the CLI and ask the agent to analyze, describe, or work with them. Images are sent to the model as base64-encoded content blocks, so any vision-capable model can process them.
 
+:::tip
+Portal subscribers get vision-capable models (Claude, GPT-5, Gemini) in the same catalog — no extra credentials needed. See [Nous Portal](/integrations/nous-portal).
+:::
+
 ## How It Works
 
 1. Copy an image to your clipboard (screenshot, browser image, etc.)
diff --git a/website/docs/user-guide/features/web-dashboard.md b/website/docs/user-guide/features/web-dashboard.md
index 8d651f04d2f..54b058f2250 100644
--- a/website/docs/user-guide/features/web-dashboard.md
+++ b/website/docs/user-guide/features/web-dashboard.md
@@ -8,6 +8,10 @@ description: "Browser-based dashboard for managing configuration, API keys, sess
 
 The web dashboard is a browser-based UI for managing your Hermes Agent installation. Instead of editing YAML files or running CLI commands, you can configure settings, manage API keys, and monitor sessions from a clean web interface.
 
+:::tip
+Hosted-mode auth uses Nous Portal OAuth; if you also want the dashboard to talk to a real backend, `hermes setup --portal` wires up the model and tool gateway too. See [Nous Portal](/integrations/nous-portal).
+:::
+
 ## Quick Start
 
 ```bash
@@ -491,7 +495,7 @@ If you're contributing to the web dashboard frontend:
 hermes dashboard --no-open
 
 # Terminal 2: start the Vite dev server with HMR
-cd apps/dashboard/
+cd web/
 npm install
 npm run dev
 ```
diff --git a/website/docs/user-guide/features/web-search.md b/website/docs/user-guide/features/web-search.md
index 645d1a4c629..161b91ec83c 100644
--- a/website/docs/user-guide/features/web-search.md
+++ b/website/docs/user-guide/features/web-search.md
@@ -1,6 +1,6 @@
 ---
 title: Web Search & Extract
-description: Search the web, extract page content, and crawl websites with multiple backend providers — including free self-hosted SearXNG.
+description: Search the web and extract page content with multiple backend providers — including free self-hosted SearXNG.
 sidebar_label: Web Search
 sidebar_position: 6
 ---
@@ -10,22 +10,22 @@ sidebar_position: 6
 Hermes Agent includes two model-callable web tools backed by multiple providers:
 
 - **`web_search`** — search the web and return ranked results
-- **`web_extract`** — fetch and extract readable content from one or more URLs (with built-in deep-crawl support when the backend provides it)
+- **`web_extract`** — fetch and extract readable content from one or more URLs
 
-Both are configured through a single backend selection. Providers are chosen via `hermes tools` or set directly in `config.yaml`. Recursive crawling capabilities (Firecrawl/Tavily) are exposed through `web_extract` rather than as a separate `web_crawl` tool.
+Both are configured through a single backend selection. Providers are chosen via `hermes tools` or set directly in `config.yaml`.
 
 ## Backends
 
-| Provider | Env Var | Search | Extract | Crawl | Free tier |
-|----------|---------|--------|---------|-------|-----------|
-| **Firecrawl** (default) | `FIRECRAWL_API_KEY` | ✔ | ✔ | ✔ | 500 credits/mo |
-| **SearXNG** | `SEARXNG_URL` | ✔ | — | — | ✔ Free (self-hosted) |
-| **Brave Search (free tier)** | `BRAVE_SEARCH_API_KEY` | ✔ | — | — | 2 000 queries/mo |
-| **DDGS (DuckDuckGo)** | — (no key) | ✔ | — | — | ✔ Free |
-| **Tavily** | `TAVILY_API_KEY` | ✔ | ✔ | ✔ | 1 000 searches/mo |
-| **Exa** | `EXA_API_KEY` | ✔ | ✔ | — | 1 000 searches/mo |
-| **Parallel** | `PARALLEL_API_KEY` | ✔ | ✔ | — | Paid |
-| **xAI (Grok)** | `XAI_API_KEY` or `hermes auth login xai-oauth` | ✔ | — | — | Paid (SuperGrok or per-token) |
+| Provider | Env Var | Search | Extract | Free tier |
+|----------|---------|--------|---------|-----------|
+| **Firecrawl** (default) | `FIRECRAWL_API_KEY` | ✔ | ✔ | 500 credits/mo |
+| **SearXNG** | `SEARXNG_URL` | ✔ | — | ✔ Free (self-hosted) |
+| **Brave Search (free tier)** | `BRAVE_SEARCH_API_KEY` | ✔ | — | 2 000 queries/mo |
+| **DDGS (DuckDuckGo)** | — (no key) | ✔ | — | ✔ Free |
+| **Tavily** | `TAVILY_API_KEY` | ✔ | ✔ | 1 000 searches/mo |
+| **Exa** | `EXA_API_KEY` | ✔ | ✔ | 1 000 searches/mo |
+| **Parallel** | `PARALLEL_API_KEY` | ✔ | ✔ | Paid |
+| **xAI (Grok)** | `XAI_API_KEY` or `hermes auth login xai-oauth` | ✔ | — | Paid (SuperGrok or per-token) |
 
 Brave Search, DDGS, and xAI are **search-only** — pair any of them with Firecrawl/Tavily/Exa/Parallel when you also need `web_extract`. DDGS uses the [`ddgs` Python package](https://pypi.org/project/ddgs/) under the hood; if it isn't already installed, run `pip install ddgs` (or let Hermes lazy-install it on first use). xAI runs Grok's server-side `web_search` tool on the Responses API — results are LLM-generated rather than index-backed, so titles, descriptions, and URL choice are all model output (see the [trust-model caveat](#xai-grok) below).
 
@@ -46,7 +46,7 @@ Backends return raw page markdown, which can be huge (forum threads, docs sites,
 | Under 5 000 | Returned as-is — no LLM call, full markdown reaches the agent |
 | 5 000 – 500 000 | Single-pass summary via the `web_extract` auxiliary model, capped at ~5 000 chars of output |
 | 500 000 – 2 000 000 | Chunked: split into 100 k-char chunks, summarize each in parallel, then synthesize a final summary (~5 000 chars) |
-| Over 2 000 000 | Refused with a hint to use `web_crawl` with focused extraction instructions or a more specific source |
+| Over 2 000 000 | Refused with a hint to use a more focused source URL |
 
 The summary keeps quotes, code blocks, and key facts in their original formatting — it's a content compressor, not a paraphraser. If summarization fails or times out, Hermes falls back to the first ~5 000 chars of raw content rather than a useless error.
 
@@ -89,7 +89,7 @@ hermes tools
 
 ### Firecrawl (default)
 
-Full-featured search, extract, and crawl. Recommended for most users.
+Full-featured search and extract. Recommended for most users.
 
 ```bash
 # ~/.hermes/.env
@@ -113,7 +113,7 @@ When `FIRECRAWL_API_URL` is set, the API key is optional (disable server auth wi
 
 SearXNG is a privacy-respecting, open-source metasearch engine that aggregates results from 70+ search engines. **No API key required** — just point Hermes at a running SearXNG instance.
 
-SearXNG is **search-only** — `web_extract` (including its crawl modes) requires a separate extract provider.
+SearXNG is **search-only** — `web_extract` requires a separate extract provider.
 
 #### Option A — Self-host with Docker (recommended)
 
@@ -222,7 +222,7 @@ Public instances have rate limits, variable uptime, and may disable JSON format
 
 #### Pair SearXNG with an extract provider
 
-SearXNG handles search; you need a separate provider for `web_extract` (including any deep-crawl modes). Use the per-capability keys:
+SearXNG handles search; you need a separate provider for `web_extract`. Use the per-capability keys:
 
 ```yaml
 # ~/.hermes/config.yaml
@@ -237,7 +237,7 @@ With this config, Hermes uses SearXNG for all search queries and Firecrawl for U
 
 ### Tavily
 
-AI-optimised search, extract, and crawl with a generous free tier.
+AI-optimised search and extract with a generous free tier.
 
 ```bash
 # ~/.hermes/.env
@@ -341,7 +341,7 @@ Use different providers for search vs extract. This lets you combine free search
 # ~/.hermes/config.yaml
 web:
   search_backend: "searxng"     # used by web_search
-  extract_backend: "firecrawl"  # used by web_extract (and its deep-crawl modes)
+  extract_backend: "firecrawl"  # used by web_extract
 ```
 
 When per-capability keys are empty, both fall through to `web.backend`. When `web.backend` is also empty, the backend is auto-detected from whichever API key/URL is present.
diff --git a/website/docs/user-guide/features/x-search.md b/website/docs/user-guide/features/x-search.md
index 98d7b4584a1..2e2004cabe0 100644
--- a/website/docs/user-guide/features/x-search.md
+++ b/website/docs/user-guide/features/x-search.md
@@ -11,6 +11,10 @@ The `x_search` tool lets the agent search X (Twitter) posts, profiles, and threa
 
 **Use this instead of `web_search`** when you specifically want current discussion, reactions, or claims **on X**. For general web pages, keep using `web_search` / `web_extract`.
 
+:::tip
+If you're paying Portal for an xAI model anyway, Live Search calls bill against the same xAI key configured for chat. See [Nous Portal](/integrations/nous-portal).
+:::
+
 ## Authentication
 
 `x_search` registers when **either** xAI credential path is available:
diff --git a/website/docs/user-guide/git-worktrees.md b/website/docs/user-guide/git-worktrees.md
index 33d29506ed3..fdaf7e3de22 100644
--- a/website/docs/user-guide/git-worktrees.md
+++ b/website/docs/user-guide/git-worktrees.md
@@ -21,7 +21,7 @@ This page shows how to combine worktrees with Hermes so each session has a clean
 Hermes treats the **current working directory** as the project root:
 
 - CLI: the directory where you run `hermes` or `hermes chat`
-- Messaging gateways: the directory set by `MESSAGING_CWD`
+- Messaging gateways: the directory set by `terminal.cwd` in `~/.hermes/config.yaml`
 
 If you run multiple agents in the **same checkout**, their changes can interfere with each other:
 
@@ -171,4 +171,3 @@ This combination gives you:
 - Strong guarantees that different agents and experiments do not step on each other.
 - Fast iteration cycles with easy recovery from bad edits.
 - Clean, reviewable pull requests.
-
diff --git a/website/docs/user-guide/messaging/email.md b/website/docs/user-guide/messaging/email.md
index c1cf6f5f3fe..d67307be771 100644
--- a/website/docs/user-guide/messaging/email.md
+++ b/website/docs/user-guide/messaging/email.md
@@ -8,10 +8,17 @@ description: "Set up Hermes Agent as an email assistant via IMAP/SMTP"
 
 Hermes can receive and reply to emails using standard IMAP and SMTP protocols. Send an email to the agent's address and it replies in-thread — no special client or bot API needed. Works with Gmail, Outlook, Yahoo, Fastmail, or any provider that supports IMAP/SMTP.
 
-:::info No External Dependencies
-The Email adapter uses Python's built-in `imaplib`, `smtplib`, and `email` modules. No additional packages or external services are required.
+:::info Gateway adapter only: no external dependencies
+This page covers the Email gateway adapter, which uses Python's built-in `imaplib`, `smtplib`, and `email` modules. No additional packages or external services are required for this gateway path.
 :::
 
+This is separate from the bundled [Himalaya email skill](/docs/user-guide/skills/bundled/email/email-himalaya), which lets the agent manage email through terminal commands and requires the external `himalaya` CLI plus a Himalaya config file.
+
+| Use case | What to configure | External dependency |
+|---|---|---|
+| Let people email the Hermes agent and receive replies | Email gateway adapter on this page | None beyond an IMAP/SMTP email account |
+| Let the agent inspect, compose, move, and manage mailbox messages from terminal tools | Himalaya email skill | `himalaya` CLI and `~/.config/himalaya/config.toml` |
+
 ---
 
 ## Prerequisites
diff --git a/website/docs/user-guide/messaging/feishu.md b/website/docs/user-guide/messaging/feishu.md
index 802f1d44f5a..256074fa23f 100644
--- a/website/docs/user-guide/messaging/feishu.md
+++ b/website/docs/user-guide/messaging/feishu.md
@@ -55,6 +55,40 @@ If scan-to-create is not available, the wizard falls back to manual input:
 Keep the App Secret private. Anyone with it can impersonate your app.
 :::
 
+### Configure Permissions
+
+In the Feishu developer console, go to **Permission Management** and add the following scopes. You can bulk-import them in the permissions page.
+
+**Required permissions:**
+
+| Scope | Purpose |
+|-------|---------|
+| `im:message` | Receive and read messages |
+| `im:message:send_as_bot` | Send messages as the bot |
+| `im:resource` | Access images, files, and audio sent by users |
+| `im:chat` | Access chat/group metadata |
+| `im:chat:readonly` | Read chat list and membership |
+
+**Recommended permissions (for full functionality):**
+
+| Scope | Purpose |
+|-------|---------|
+| `im:message.reactions:readonly` | Receive emoji reaction events |
+| `admin:app.info:readonly` | Auto-detect bot identity for @mention gating |
+| `contact:user.id:readonly` | Resolve user IDs for allowlist matching |
+
+### Configure Events
+
+In **Events and Callbacks**:
+
+1. Set the connection mode to **Long Connection (WebSocket)** (recommended) or configure a webhook URL
+2. In the **Event Configuration** section, subscribe to:
+   - `im.message.receive_v1` — required for receiving messages
+
+### Publish the App
+
+After configuring permissions and events, go to **Version Management** and publish a new version of the app. The permissions won't take effect until a version is published and approved (for enterprise apps, this may require admin approval).
+
 ## Step 2: Choose a Connection Mode
 
 ### Recommended: WebSocket mode
diff --git a/website/docs/user-guide/messaging/google_chat.md b/website/docs/user-guide/messaging/google_chat.md
index 8cf2d01d7a3..d9565b154c5 100644
--- a/website/docs/user-guide/messaging/google_chat.md
+++ b/website/docs/user-guide/messaging/google_chat.md
@@ -13,6 +13,8 @@ process does not need a public URL, a tunnel, or a TLS certificate. It connects,
 authenticates, and listens on a subscription — the same way a Telegram bot listens
 on a token.
 
+> Run `hermes gateway setup` and pick **Google Chat** for a guided walk-through.
+
 :::note Workspace edition
 Google Chat is part of Google Workspace. You can use this integration with a
 personal Workspace (`@yourdomain.com` registered through Google) or a work
@@ -237,7 +239,7 @@ specifically, as the user who asked for the file.
 4. On the host, register the client with Hermes:
 
 ```bash
-python -m gateway.platforms.google_chat_user_oauth \
+python -m plugins.platforms.google_chat.oauth \
     --client-secret /path/to/client_secret.json
 ```
 
@@ -330,7 +332,7 @@ The one-time host setup wasn't done. From a terminal on the host that runs
 Hermes:
 
 ```bash
-python -m gateway.platforms.google_chat_user_oauth \
+python -m plugins.platforms.google_chat.oauth \
     --client-secret /path/to/client_secret.json
 ```
 
diff --git a/website/docs/user-guide/messaging/homeassistant.md b/website/docs/user-guide/messaging/homeassistant.md
index f57b439775d..e96cc22cc02 100644
--- a/website/docs/user-guide/messaging/homeassistant.md
+++ b/website/docs/user-guide/messaging/homeassistant.md
@@ -250,3 +250,26 @@ Agent automatically:
      entity_id="light.hallway")
 3. Sends notification: "Front door opened. Hallway lights turned on."
 ```
+
+## Troubleshooting
+
+**Environment variables not picked up.**
+The adapter reads credentials from `~/.hermes/.env` (auto-merged at startup) or
+from `config.yaml`. Double-check the file lives under the active Hermes profile
+home and that there's no stray quoting around the URL/token. Restart the gateway
+after editing — env changes are only applied on process start.
+
+**`conversation entity not found` / agent never replies.**
+Home Assistant's conversation API requires a configured *Assist* conversation
+agent. In HA, open **Settings → Voice assistants → Add assistant** and note the
+resulting entity id (looks like `conversation.home_assistant` or
+`conversation.openai_<name>`). Set that entity id in the adapter's
+`conversation_entity` setting; the default may not exist on your instance.
+
+**REST auth failing (`401 Unauthorized`).**
+The token must be a *Long-Lived Access Token* created from your HA user profile
+page (**Profile → Security → Long-lived access tokens**). Short-lived UI
+session tokens won't work. Also verify the base URL includes the scheme and
+port (e.g. `http://homeassistant.local:8123`) and is reachable from the host
+running Hermes — `curl -H "Authorization: Bearer <token>" <url>/api/` should
+return `{"message": "API running."}`.
diff --git a/website/docs/user-guide/messaging/index.md b/website/docs/user-guide/messaging/index.md
index b1cc6232525..ff40628544f 100644
--- a/website/docs/user-guide/messaging/index.md
+++ b/website/docs/user-guide/messaging/index.md
@@ -10,6 +10,10 @@ Chat with Hermes from Telegram, Discord, Slack, WhatsApp, Signal, SMS, Email, Ho
 
 For the full voice feature set — including CLI microphone mode, spoken replies in messaging, and Discord voice-channel conversations — see [Voice Mode](/user-guide/features/voice-mode) and [Use Voice Mode with Hermes](/guides/use-voice-mode-with-hermes).
 
+:::tip
+Bots need both a model provider and tool providers (TTS, web). A [Nous Portal](/integrations/nous-portal) subscription bundles all of them.
+:::
+
 ## Platform Comparison
 
 | Platform | Voice | Images | Files | Threads | Reactions | Typing | Streaming |
diff --git a/website/docs/user-guide/messaging/line.md b/website/docs/user-guide/messaging/line.md
index 1aa3a753816..075afdbd9d5 100644
--- a/website/docs/user-guide/messaging/line.md
+++ b/website/docs/user-guide/messaging/line.md
@@ -10,6 +10,8 @@ Run Hermes Agent as a [LINE](https://line.me/) bot via the official LINE Messagi
 
 LINE is the dominant messaging app in Japan, Taiwan, and Thailand. If your users live there, this is how they reach you.
 
+> Run `hermes gateway setup` and pick **LINE** for a guided walk-through.
+
 ## How the bot responds
 
 | Context | Behavior |
diff --git a/website/docs/user-guide/messaging/msgraph-webhook.md b/website/docs/user-guide/messaging/msgraph-webhook.md
index dc21552d732..80ae063b3e6 100644
--- a/website/docs/user-guide/messaging/msgraph-webhook.md
+++ b/website/docs/user-guide/messaging/msgraph-webhook.md
@@ -25,6 +25,7 @@ platforms:
   msgraph_webhook:
     enabled: true
     extra:
+      host: 127.0.0.1
       port: 8646
       client_state: "replace-with-a-strong-secret"
       accepted_resources:
@@ -40,6 +41,8 @@ MSGRAPH_WEBHOOK_CLIENT_STATE=<generate-with-openssl-rand-hex-32>
 MSGRAPH_WEBHOOK_ACCEPTED_RESOURCES=communications/onlineMeetings
 ```
 
+Note: the bind host is read from `extra.host` in `config.yaml` (see the example above); there is no `MSGRAPH_WEBHOOK_HOST` env-var override.
+
 Start the gateway: `hermes gateway run`. The listener exposes:
 
 - `POST /msgraph/webhook` — change notifications from Graph
@@ -58,14 +61,14 @@ All settings go under `platforms.msgraph_webhook.extra`:
 
 | Setting | Default | Description |
 |---------|---------|-------------|
-| `host` | `0.0.0.0` | Bind address for the HTTP listener. |
+| `host` | `0.0.0.0` | Bind address for the HTTP listener. Non-loopback binds require `allowed_source_cidrs`; loopback (`127.0.0.1` / `::1`) is the easiest dev-tunnel / reverse-proxy setup. |
 | `port` | `8646` | Bind port. |
 | `webhook_path` | `/msgraph/webhook` | URL path Graph POSTs to. |
 | `health_path` | `/health` | Readiness endpoint. |
 | `client_state` | — | Shared secret Graph echoes in every notification. Compared with `hmac.compare_digest` — generate with `openssl rand -hex 32`. |
 | `accepted_resources` | `[]` (accept all) | Allowlist of Graph resource paths/patterns. Trailing `*` acts as prefix match. Leading `/` is tolerated. Example: `["communications/onlineMeetings", "chats/*/messages"]`. |
 | `max_seen_receipts` | `5000` | Dedupe cache size for notification IDs. Oldest entries evicted when the cap is hit. |
-| `allowed_source_cidrs` | `[]` (allow all) | Optional source-IP allowlist. See below. |
+| `allowed_source_cidrs` | `[]` | Required for non-loopback binds. Leave empty only when the listener is bound to loopback and fronted by a local tunnel / reverse proxy. |
 
 Each setting also has an equivalent env var (`MSGRAPH_WEBHOOK_*`) that merges into the config at gateway startup — see the [environment variables reference](/reference/environment-variables#microsoft-graph-teams-meetings).
 
@@ -75,7 +78,7 @@ Each setting also has an equivalent env var (`MSGRAPH_WEBHOOK_*`) that merges in
 
 Every Graph notification includes the `clientState` string your subscription registered with. The listener rejects any notification whose `clientState` doesn't match, using timing-safe comparison. This is Microsoft's documented mechanism — treat the value as a strong shared secret.
 
-If `client_state` is unset, the listener accepts every well-formed POST. **Don't run without it in production.**
+If `client_state` is unset, the listener refuses to start.
 
 ### Source-IP allowlisting (production deployments)
 
@@ -86,6 +89,7 @@ platforms:
   msgraph_webhook:
     enabled: true
     extra:
+      host: 0.0.0.0
       client_state: "..."
       allowed_source_cidrs:
         - "52.96.0.0/14"
@@ -99,7 +103,7 @@ Or as an env var:
 MSGRAPH_WEBHOOK_ALLOWED_SOURCE_CIDRS="52.96.0.0/14,52.104.0.0/14"
 ```
 
-Empty allowlist = accept from anywhere (default; preserves dev-tunnel workflows). Invalid CIDR strings log a warning and are ignored. **Review the Microsoft IP list quarterly** — it changes.
+Binding a non-loopback host such as `0.0.0.0`, `::`, or a LAN IP without `allowed_source_cidrs` is refused at startup. If you're using a dev tunnel or reverse proxy on the same machine, bind Hermes to `127.0.0.1` or `::1` and leave the allowlist empty there. Invalid CIDR strings log a warning and are ignored. **Review the Microsoft IP list quarterly** — it changes.
 
 ### HTTPS termination
 
@@ -107,7 +111,7 @@ The listener speaks plain HTTP. Terminate TLS at your reverse proxy (Caddy, Ngin
 
 ### Response hygiene
 
-On success the listener returns `202 Accepted` with an empty body — internal counters stay out of the wire response. Operators can observe counts via `/health`.
+On success the listener returns `202 Accepted` with an empty body — internal counters stay out of the wire response. Operators can observe counts via `/health`, which is guarded by the same source-IP rules as the webhook path.
 
 Status code table:
 
@@ -127,8 +131,9 @@ Status code table:
 | Graph subscription validation fails | Public URL is reachable, `/msgraph/webhook` path matches, GET with `validationToken` echoes the token verbatim as `text/plain` within 10 seconds. |
 | Notifications POST but nothing ingests | `client_state` matches what you registered the subscription with. Re-run `openssl rand -hex 32` and create a new subscription if the value drifted. Check `accepted_resources` includes the resource path Graph is sending. |
 | Every notification 403s | `clientState` mismatch (forged, or subscription registered with a different value). Re-create the subscription with `hermes teams-pipeline subscribe --client-state "$MSGRAPH_WEBHOOK_CLIENT_STATE" ...` (ships with the pipeline runtime PR). |
+| Listener refuses to start on `0.0.0.0` | Set `allowed_source_cidrs` to Microsoft's current webhook egress ranges, or bind Hermes to `127.0.0.1` / `::1` behind your tunnel or reverse proxy. |
 | Listener starts but `curl http://localhost:8646/health` hangs | Port binding collision. Check `ss -tlnp \| grep 8646` and change `port:` if needed. |
-| Real Graph requests from Microsoft get 403'd | Source IP allowlist is too narrow. Remove `allowed_source_cidrs` temporarily, confirm traffic flows, then widen the list to include the current Microsoft egress ranges. |
+| Real Graph requests from Microsoft get 403'd | Source IP allowlist is too narrow. Widen the list to include the current Microsoft egress ranges. If you're still validating the tunnel path, bind Hermes to loopback and let the tunnel handle public exposure. |
 
 ## Related Docs
 
diff --git a/website/docs/user-guide/messaging/ntfy.md b/website/docs/user-guide/messaging/ntfy.md
index c7ee2593e4c..6bacac84f2b 100644
--- a/website/docs/user-guide/messaging/ntfy.md
+++ b/website/docs/user-guide/messaging/ntfy.md
@@ -4,6 +4,8 @@
 
 ntfy makes a great lightweight push channel for Hermes: subscribe to a topic from the [ntfy mobile app](https://ntfy.sh/docs/subscribe/phone/), send messages to the topic to talk to the agent, get the response back on your phone.
 
+> Run `hermes gateway setup` and pick **ntfy** for a guided walk-through.
+
 ## Prerequisites
 
 - A topic name (any unique string — `hermes-myname-2026` works fine)
diff --git a/website/docs/user-guide/messaging/simplex.md b/website/docs/user-guide/messaging/simplex.md
index 601cd2736f6..472a629d472 100644
--- a/website/docs/user-guide/messaging/simplex.md
+++ b/website/docs/user-guide/messaging/simplex.md
@@ -2,6 +2,8 @@
 
 [SimpleX Chat](https://simplex.chat/) is a private, decentralised messaging platform where users own their contacts and groups. Unlike other platforms, SimpleX assigns no persistent user IDs — every contact is identified by an opaque internal ID generated at connection time, which makes it one of the most private messengers available.
 
+> Run `hermes gateway setup` and pick **SimpleX** for a guided walk-through.
+
 ## Prerequisites
 
 - The **simplex-chat** CLI installed and running as a daemon
@@ -13,7 +15,7 @@ Download the latest release from the [simplex-chat GitHub releases](https://gith
 
 ```bash
 # Linux / macOS binary
-curl -L https://github.com/simplex-chat/simplex-chat/releases/latest/download/simplex-chat-ubuntu-22_04-x86-64 -o simplex-chat
+curl -L https://github.com/simplex-chat/simplex-chat/releases/latest/download/simplex-chat-ubuntu-22_04-x86_64 -o simplex-chat
 chmod +x simplex-chat
 ```
 
diff --git a/website/docs/user-guide/messaging/teams-meetings.md b/website/docs/user-guide/messaging/teams-meetings.md
index c09f7088d55..e0e118cc091 100644
--- a/website/docs/user-guide/messaging/teams-meetings.md
+++ b/website/docs/user-guide/messaging/teams-meetings.md
@@ -8,6 +8,10 @@ description: "Set up the Microsoft Teams meeting summary pipeline with Microsoft
 
 Use the Teams meeting pipeline when you want Hermes to ingest Microsoft Graph meeting events, fetch transcripts first, fall back to recordings plus STT when needed, and deliver a structured summary to downstream sinks.
 
+Prerequisites: see [Microsoft Teams](./teams.md) for the underlying bot/credential setup.
+
+> Run `hermes gateway setup` and pick **Teams Meetings** for a guided walk-through.
+
 This page focuses on setup and enablement:
 - Graph credentials
 - webhook listener configuration
@@ -65,6 +69,7 @@ The webhook listener is a gateway platform named `msgraph_webhook`. At minimum,
 
 ```bash
 MSGRAPH_WEBHOOK_ENABLED=true
+MSGRAPH_WEBHOOK_HOST=127.0.0.1
 MSGRAPH_WEBHOOK_PORT=8646
 MSGRAPH_WEBHOOK_CLIENT_STATE=<random-shared-secret>
 MSGRAPH_WEBHOOK_ACCEPTED_RESOURCES=communications/onlineMeetings
@@ -91,6 +96,7 @@ platforms:
   msgraph_webhook:
     enabled: true
     extra:
+      host: 127.0.0.1
       port: 8646
       client_state: "replace-me"
       accepted_resources:
@@ -120,6 +126,8 @@ platforms:
           enabled: false
 ```
 
+If you bind the listener to a non-loopback host such as `0.0.0.0`, you must also set `allowed_source_cidrs` to Microsoft's webhook egress ranges. Loopback binds (`127.0.0.1` / `::1`) are the intended dev-tunnel and local reverse-proxy setup.
+
 ## Teams Delivery Modes
 
 The pipeline supports two Teams summary-delivery modes inside the existing Teams plugin.
diff --git a/website/docs/user-guide/messaging/teams.md b/website/docs/user-guide/messaging/teams.md
index 07c91fa0262..ae30d4a5856 100644
--- a/website/docs/user-guide/messaging/teams.md
+++ b/website/docs/user-guide/messaging/teams.md
@@ -10,6 +10,8 @@ Connect Hermes Agent to Microsoft Teams as a bot. Unlike Slack's Socket Mode, Te
 
 Need meeting summaries from Microsoft Graph events rather than normal bot conversations? Use the dedicated setup page: [Teams Meetings](/user-guide/messaging/teams-meetings).
 
+> Run `hermes gateway setup` and pick **Microsoft Teams** for a guided walk-through.
+
 ## How the Bot Responds
 
 | Context | Behavior |
diff --git a/website/docs/user-guide/messaging/telegram.md b/website/docs/user-guide/messaging/telegram.md
index f20bdfee5e3..aab215cf2e2 100644
--- a/website/docs/user-guide/messaging/telegram.md
+++ b/website/docs/user-guide/messaging/telegram.md
@@ -319,7 +319,7 @@ With STT disabled, the gateway still downloads the voice/audio attachment into H
 
 Your tools or skills can then read that path directly (e.g., hand it off to a local diarization pipeline, a richer transcription model, or upload it to long-term storage). The file extension reflects the original format Telegram delivered (`.ogg` for voice notes, `.mp3`/`.m4a`/etc. for audio attachments).
 
-This pairs naturally with the [local Bot API server](#large-files-20mb--via-local-bot-api-server) section below, which lifts Telegram's 20MB getFile ceiling to 2GB — useful when the recordings you want to process are longer than a couple of minutes.
+This pairs naturally with the [local Bot API server](#large-files-20mb-via-local-bot-api-server) section below, which lifts Telegram's 20MB getFile ceiling to 2GB — useful when the recordings you want to process are longer than a couple of minutes.
 
 ### Outgoing Voice (Text-to-Speech)
 
@@ -1233,6 +1233,14 @@ HERMES_TELEGRAM_NOTIFICATIONS=all
 
 Unknown values log a warning and fall back to `important`.
 
+## Status messages edited in place
+
+The Telegram adapter routes recurring agent status callbacks (e.g. "Compressing context…", "Calling tool…") through `send_or_update_status()`, which keeps a `{(chat_id, status_key) → message_id}` cache and **edits the existing bubble** on subsequent emits instead of appending a new one each time. Distinct `status_key` values get their own messages; distinct chats never collide. If the edit fails (e.g. the user deleted the message, or it's older than Telegram allows for edits), the cache entry is dropped and the next emit posts a fresh message and re-caches its ID. No config required — this is the default Telegram behavior. Other adapters that don't implement `send_or_update_status` fall through to plain `send()` unchanged.
+
+## Pin incoming user message during agent turn
+
+When a user sends a message that triggers an agent turn, the Telegram adapter pins that incoming message for the duration of the turn and unpins it when the response is finished — a lightweight visual indicator that the bot is actively working on the message rather than ignoring it. The pin uses `disable_notification=true` to avoid extra pings. No config required.
+
 ## Security
 
 :::warning
diff --git a/website/docs/user-guide/messaging/wecom-callback.md b/website/docs/user-guide/messaging/wecom-callback.md
index a9c6be56b7a..8a45ab8cb3c 100644
--- a/website/docs/user-guide/messaging/wecom-callback.md
+++ b/website/docs/user-guide/messaging/wecom-callback.md
@@ -12,6 +12,10 @@ Hermes supports two WeCom integration modes:
 - **WeCom Callback** (this page) — self-built app, receives encrypted XML callbacks. Shows as a first-class app in users' WeCom sidebar. Supports multi-corp routing.
 :::
 
+See also: [WeCom Bot](./wecom.md) for the bot-style integration.
+
+> Run `hermes gateway setup` and pick **WeCom Callback** for a guided walk-through.
+
 ## How It Works
 
 1. You register a self-built application in the WeCom Admin Console
@@ -147,3 +151,28 @@ The crypto implementation is compatible with Tencent's official WXBizMsgCrypt SD
 - **No typing indicators** — the callback model doesn't support typing status
 - **Text only** — currently supports text messages for input; image/file/voice input not yet implemented. The agent is aware of outbound media capabilities via the WeCom platform hint (images, documents, video, voice).
 - **Response latency** — agent sessions take 3–30 minutes; users see the reply when processing completes
+
+## Troubleshooting
+
+**Signature verification failing.**
+WeCom signs every request with the **Token** you registered in the admin
+console. A mismatch between the token configured in Hermes and the token the
+admin console expects is the most common cause. Re-copy both the **Token** and
+**EncodingAESKey** from the admin console — they're easy to truncate. Whitespace
+in `~/.hermes/.env` values around `=` will also break signature checks. After
+fixing, restart `hermes gateway run`.
+
+**Callback URL not reachable / verification step fails.**
+WeCom hits the public URL you registered. Confirm:
+1. Your reverse proxy / tunnel forwards `/wecom/callback` to the gateway's port.
+2. The URL in the admin console is HTTPS (WeCom rejects plain HTTP).
+3. From outside your network, `curl -i https://<your-domain>/wecom/callback`
+   returns something other than a timeout (a 4xx without query params is fine —
+   it just means the listener is reachable).
+
+**Port not reachable / listener not bound.**
+Check `hermes gateway run` logs for the bound host/port. If the adapter bound to
+`127.0.0.1` you must front it with a reverse proxy or tunnel — WeCom's servers
+can't reach loopback. Set `extra.host: 0.0.0.0` in `config.yaml` (plus
+`allowed_source_cidrs` if exposing directly) or keep loopback and use a tunnel
+such as Cloudflare Tunnel / nginx.
diff --git a/website/docs/user-guide/messaging/wecom.md b/website/docs/user-guide/messaging/wecom.md
index 1a98c82255a..aa98b6b303d 100644
--- a/website/docs/user-guide/messaging/wecom.md
+++ b/website/docs/user-guide/messaging/wecom.md
@@ -8,6 +8,8 @@ description: "Connect Hermes Agent to WeCom via the AI Bot WebSocket gateway"
 
 Connect Hermes to [WeCom](https://work.weixin.qq.com/) (企业微信), Tencent's enterprise messaging platform. The adapter uses WeCom's AI Bot WebSocket gateway for real-time bidirectional communication — no public endpoint or webhook needed.
 
+See also: [WeCom Callback](./wecom-callback.md) for inbound webhook setup.
+
 ## Prerequisites
 
 - A WeCom organization account
diff --git a/website/docs/user-guide/messaging/weixin.md b/website/docs/user-guide/messaging/weixin.md
index c2932a39a7f..a0d25ee8cb9 100644
--- a/website/docs/user-guide/messaging/weixin.md
+++ b/website/docs/user-guide/messaging/weixin.md
@@ -142,6 +142,25 @@ WEIXIN_DM_POLICY=allowlist
 WEIXIN_ALLOWED_USERS=user_id_1,user_id_2
 ```
 
+`WEIXIN_ALLOWED_USERS` is an **inbound filter**, not an invitation system. QR
+login connects one iLink bot identity to Hermes. Other people do not scan the
+Hermes QR code with their own accounts; they must message the connected iLink
+bot/contact through WeChat, and Hermes will process the DM only if the sender's
+Weixin user ID is present in `WEIXIN_ALLOWED_USERS`.
+
+A practical setup flow is:
+
+1. Pair Hermes once with `hermes gateway setup` and note the connected iLink bot
+   account.
+2. Have each allowed user send a direct message to that bot/contact.
+3. Read the sender/user ID from the gateway logs or the inbound event payload.
+4. Add those IDs to `WEIXIN_ALLOWED_USERS`, then restart the gateway.
+
+If only the account that scanned the QR code can talk to Hermes, verify that the
+other users are messaging the iLink bot identity itself, not the personal WeChat
+account that performed the QR login. The iLink bot is a separate identity, and
+ordinary WeChat contact/group routing can be limited by Tencent's iLink behavior.
+
 ### Group Policy
 
 Controls which groups the bot responds in **when iLink delivers group events for the connected identity**. For QR-login iLink bot identities (e.g. `...@im.bot`), group events are typically not delivered at all, so this policy may have no effect — see the iLink bot limitation warning at the top of the page.
diff --git a/website/docs/user-guide/messaging/whatsapp.md b/website/docs/user-guide/messaging/whatsapp.md
index acda8de4063..d2bd52a56b3 100644
--- a/website/docs/user-guide/messaging/whatsapp.md
+++ b/website/docs/user-guide/messaging/whatsapp.md
@@ -8,6 +8,8 @@ description: "Set up Hermes Agent as a WhatsApp bot via the built-in Baileys bri
 
 Hermes connects to WhatsApp through a built-in bridge based on **Baileys**. This works by emulating a WhatsApp Web session — **not** through the official WhatsApp Business API. No Meta developer account or Business verification is required.
 
+> Run `hermes gateway setup` and pick **WhatsApp** for a guided walk-through.
+
 :::warning Unofficial API — Ban Risk
 WhatsApp does **not** officially support third-party bots outside the Business API. Using a third-party bridge carries a small risk of account restrictions. To minimize risk:
 - **Use a dedicated phone number** for the bot (not your personal number)
diff --git a/website/docs/user-guide/multi-profile-gateways.md b/website/docs/user-guide/multi-profile-gateways.md
new file mode 100644
index 00000000000..6f00c24cf83
--- /dev/null
+++ b/website/docs/user-guide/multi-profile-gateways.md
@@ -0,0 +1,332 @@
+---
+sidebar_position: 4
+---
+
+# Running Many Gateways at Once
+
+Operate multiple [profiles](./profiles.md) — each with its own bot tokens,
+sessions, and memory — as managed services on a single machine. This page
+covers the operational concerns: starting them all together, viewing logs
+across profiles, preventing the host from sleeping, and recovering from common
+launchd/systemd quirks.
+
+If you only run one Hermes agent, you don't need this page — see
+[Profiles](./profiles.md) for the basics.
+
+## When to use this
+
+You want this setup when you have two or more Hermes agents that should all
+be online at the same time. Common reasons:
+
+- A personal assistant on one Telegram bot and a coding agent on another
+- One agent per family member or one per Slack workspace
+- Sandbox + production instances of the same configuration
+- A research agent + a writing agent + a cron-driven bot — each with isolated
+  memory and skills
+
+Every profile already gets its own per-platform LaunchAgent
+(`ai.hermes.gateway-<name>.plist`) or systemd user service
+(`hermes-gateway-<name>.service`). This guide adds the patterns for managing
+them collectively.
+
+## Quick start
+
+```bash
+# Create profiles (once)
+hermes profile create coder
+hermes profile create personal-bot
+hermes profile create research
+
+# Configure each
+coder setup
+personal-bot setup
+research setup
+
+# Install each gateway as a managed service
+coder gateway install
+personal-bot gateway install
+research gateway install
+
+# Start them all
+coder gateway start
+personal-bot gateway start
+research gateway start
+```
+
+That's it — three independent agents, each on its own process, restarting
+automatically on crash and on user login.
+
+## Start, stop, or restart all gateways at once
+
+The CLI ships with single-profile lifecycle commands. To act across every
+profile, wrap them in a shell loop. Put the snippet below in
+`~/.local/bin/hermes-gateways` and `chmod +x` it:
+
+```sh
+#!/bin/sh
+set -eu
+
+# Add or remove profile names here as you create / delete profiles.
+profiles="default coder personal-bot research"
+
+usage() {
+  echo "Usage: hermes-gateways {start|stop|restart|status|list}"
+}
+
+run_for_profile() {
+  profile="$1"
+  action="$2"
+  if [ "$profile" = "default" ]; then
+    hermes gateway "$action"
+  else
+    hermes -p "$profile" gateway "$action"
+  fi
+}
+
+action="${1:-}"
+case "$action" in
+  start|stop|restart|status)
+    for profile in $profiles; do
+      echo "==> $action $profile"
+      run_for_profile "$profile" "$action"
+    done
+    ;;
+  list)
+    hermes gateway list
+    ;;
+  *)
+    usage
+    exit 2
+    ;;
+esac
+```
+
+Then:
+
+```bash
+hermes-gateways start      # start every configured profile
+hermes-gateways stop       # stop every configured profile
+hermes-gateways restart    # restart all
+hermes-gateways status     # status across all
+hermes-gateways list       # delegates to `hermes gateway list`
+```
+
+:::tip
+The `default` profile is targeted with `hermes gateway <action>` (no `-p`),
+not `hermes -p default gateway <action>`. The wrapper above handles both forms.
+:::
+
+## Manage one profile
+
+The shortcut commands every profile installs:
+
+```bash
+coder gateway run        # foreground (Ctrl-C to stop)
+coder gateway start      # start the managed service
+coder gateway stop       # stop the managed service
+coder gateway restart    # restart
+coder gateway status     # status
+coder gateway install    # create the LaunchAgent / systemd unit
+coder gateway uninstall  # remove the service file
+```
+
+These are equivalent to `hermes -p coder gateway <action>` — useful if a
+profile alias is not on `PATH` or if you target profiles dynamically from a
+script.
+
+## Service files
+
+Each profile installs its own service with a unique name, so installations
+never clash:
+
+| Platform | Path                                                              |
+| -------- | ----------------------------------------------------------------- |
+| macOS    | `~/Library/LaunchAgents/ai.hermes.gateway-<profile>.plist`        |
+| Linux    | `~/.config/systemd/user/hermes-gateway-<profile>.service`         |
+
+The default profile keeps the historical names: `ai.hermes.gateway.plist` /
+`hermes-gateway.service`.
+
+## Viewing logs
+
+Each profile writes to its own log files:
+
+```bash
+# Default profile
+tail -f ~/.hermes/logs/gateway.log
+tail -f ~/.hermes/logs/gateway.error.log
+
+# Named profile
+tail -f ~/.hermes/profiles/<name>/logs/gateway.log
+tail -f ~/.hermes/profiles/<name>/logs/gateway.error.log
+```
+
+Stream every profile's log simultaneously:
+
+```bash
+tail -f ~/.hermes/logs/gateway.log ~/.hermes/profiles/*/logs/gateway.log
+```
+
+The CLI also has a structured log viewer:
+
+```bash
+hermes logs --tail              # follow default profile
+hermes -p coder logs --tail     # follow one profile
+hermes logs --help              # filters, levels, JSON output
+```
+
+## Identify what's actually running
+
+```bash
+hermes profile list             # profiles + model + gateway state
+hermes-gateways status          # full status across every profile
+launchctl list | grep hermes    # macOS — PIDs and labels
+systemctl --user list-units 'hermes-gateway-*'   # Linux — units
+```
+
+## Editing configuration
+
+Every profile keeps its config inside its own directory:
+
+```
+~/.hermes/profiles/<name>/
+├── .env              # API keys, bot tokens (chmod 600)
+├── config.yaml       # model, provider, toolsets, gateway settings
+└── SOUL.md           # personality / system prompt
+```
+
+The default profile uses `~/.hermes/` directly with the same three files.
+
+Edit them with any editor or via the CLI:
+
+```bash
+hermes config set model.model anthropic/claude-sonnet-4    # default profile
+coder config set model.model openai/gpt-5                  # named profile
+```
+
+After editing `.env` or `config.yaml`, restart the affected gateway:
+
+```bash
+coder gateway restart
+# or, for everything:
+hermes-gateways restart
+```
+
+## Keeping the host awake
+
+The gateway process can run all day, but the operating system will still try
+to sleep when idle. Two patterns:
+
+### macOS — `caffeinate`
+
+`caffeinate` is built into macOS and prevents sleep while it runs. No install.
+
+```bash
+caffeinate -dis                    # block display, idle, and system sleep
+caffeinate -dis -t 28800           # same, auto-exit after 8 hours
+caffeinate -i -w $(cat ~/.hermes/gateway.pid) &   # awake while default gateway runs
+
+# Persistent: run in background and forget
+nohup caffeinate -dis >/dev/null 2>&1 &
+disown
+
+# Inspect / stop
+pmset -g assertions | grep -iE 'caffeinate|prevent|user is active'
+pkill caffeinate
+```
+
+| Flag   | Effect                                            |
+| ------ | ------------------------------------------------- |
+| `-d`   | block display sleep                               |
+| `-i`   | block idle system sleep (default)                 |
+| `-m`   | block disk sleep                                  |
+| `-s`   | block system sleep (AC-powered Macs only)         |
+| `-u`   | simulate user activity (prevents screen lock)     |
+| `-t N` | auto-exit after `N` seconds                       |
+| `-w P` | exit when PID `P` exits                           |
+
+:::warning Lid-close still sleeps the Mac
+`caffeinate` cannot override the hardware-driven lid-close sleep on MacBooks.
+For lid-closed operation, change your Energy Saver / Battery preferences or
+use a third-party tool.
+:::
+
+### Linux — `systemd-inhibit` or `loginctl`
+
+```bash
+# Inhibit suspend while a command runs
+systemd-inhibit --what=idle:sleep --who=hermes --why="gateways running" \
+  sleep infinity &
+
+# Allow user services to keep running after logout (recommended)
+sudo loginctl enable-linger "$USER"
+```
+
+After enabling lingering, your systemd user units (including
+`hermes-gateway-<profile>.service`) continue running across SSH disconnects
+and reboots.
+
+## Token-conflict safety
+
+Each profile must use unique bot tokens for each platform. If two profiles
+share a Telegram, Discord, Slack, WhatsApp, or Signal token, the second
+gateway refuses to start with an error naming the conflicting profile.
+
+To audit:
+
+```bash
+grep -H 'TELEGRAM_BOT_TOKEN\|DISCORD_BOT_TOKEN' \
+     ~/.hermes/.env ~/.hermes/profiles/*/.env
+```
+
+## Updating the code
+
+`hermes update` pulls the latest code once and syncs new bundled skills into
+every profile:
+
+```bash
+hermes update
+hermes-gateways restart
+```
+
+User-modified skills are never overwritten.
+
+## Troubleshooting
+
+### "Could not find service in domain for user gui: 501"
+
+You ran `hermes gateway start` after a previous `hermes gateway stop`. The
+CLI's `stop` does a full `launchctl unload`, which removes the service from
+launchd's registry. The CLI catches this specific error on `start` and
+automatically re-loads the plist (`↻ launchd job was unloaded; reloading
+service definition`). The service starts normally. Nothing to fix.
+
+### Stale PID after a crash
+
+If a profile's gateway shows `not running` but a process is still alive:
+
+```bash
+ps -ef | grep "hermes_cli.*-p <profile>"
+cat ~/.hermes/profiles/<profile>/gateway.pid
+kill -TERM <pid>          # graceful
+kill -KILL <pid>          # if that fails after a few seconds
+<profile> gateway start
+```
+
+### Forcing a hard reset of one service
+
+```bash
+# macOS
+launchctl unload ~/Library/LaunchAgents/ai.hermes.gateway-<profile>.plist
+launchctl load   ~/Library/LaunchAgents/ai.hermes.gateway-<profile>.plist
+
+# Linux
+systemctl --user restart hermes-gateway-<profile>.service
+```
+
+### Health check
+
+```bash
+hermes doctor                  # default profile
+hermes -p <profile> doctor     # one profile
+```
diff --git a/website/docs/user-guide/profiles.md b/website/docs/user-guide/profiles.md
index b09911e637a..494e7ec4241 100644
--- a/website/docs/user-guide/profiles.md
+++ b/website/docs/user-guide/profiles.md
@@ -24,6 +24,10 @@ That's it. `coder` is now its own Hermes profile with its own config, memory, an
 
 ## Creating a profile
 
+:::tip
+Quickest setup: run `hermes setup --portal` inside the new profile to wire up models + tools at once. See [Nous Portal](/integrations/nous-portal).
+:::
+
 ### Blank profile
 
 ```bash
diff --git a/website/docs/user-guide/security.md b/website/docs/user-guide/security.md
index 80a615c2f9e..54dae3f83ef 100644
--- a/website/docs/user-guide/security.md
+++ b/website/docs/user-guide/security.md
@@ -30,10 +30,23 @@ The approval system supports three modes, configured via `approvals.mode` in `~/
 
 ```yaml
 approvals:
-  mode: manual    # manual | smart | off
-  timeout: 60     # seconds to wait for user response (default: 60)
+  mode: manual                    # manual | smart | off
+  timeout: 60                     # seconds to wait for user response (default: 60)
+  cron_mode: deny                 # deny | approve — what cron jobs do when they hit a dangerous command
+  mcp_reload_confirm: true        # /reload-mcp asks before invalidating the MCP tool cache
+  destructive_slash_confirm: true # /clear, /new, /reset, /undo prompt before discarding state
 ```
 
+The full set of keys:
+
+| Key | Default | What it controls |
+|---|---|---|
+| `mode` | `manual` | Approval policy for dangerous shell commands — see the table below. |
+| `timeout` | `60` | Seconds Hermes waits for an approval reply before timing out. |
+| `cron_mode` | `deny` | How [cron jobs](./features/cron.md) behave headlessly when they trigger a dangerous-command prompt. `deny` blocks the command (the agent must find another path); `approve` auto-approves everything in cron context. |
+| `mcp_reload_confirm` | `true` | When true, `/reload-mcp` asks before rebuilding the MCP tool set. Rebuilding invalidates the provider prompt cache (tool schemas live in the system prompt), so the next message re-sends full input tokens. Users who click **Always Approve** flip this key to `false`. |
+| `destructive_slash_confirm` | `true` | When true, destructive session slash commands (`/clear`, `/new`, `/reset`, `/undo`) prompt before discarding conversation state. Three-option dialog (Approve Once / Always Approve / Cancel) routed through native yes/no buttons on Telegram, Discord, and Slack; text fallback elsewhere. Users who click **Always Approve** flip this key to `false`. TUI uses its own modal overlay (set `HERMES_TUI_NO_CONFIRM=1` to opt out there). |
+
 | Mode | Behavior |
 |------|----------|
 | **manual** (default) | Always prompt the user for approval on dangerous commands |
@@ -73,7 +86,7 @@ When YOLO is active, Hermes shows two persistent visual reminders so it's hard t
 YOLO mode disables **all** dangerous command safety checks for the session — **except** the hardline blocklist (see below). Use only when you fully trust the commands being generated (e.g., well-tested automation scripts in disposable environments).
 :::
 
-For destructive session slash commands (`/clear`, `/new` / `/reset`, `/undo`, `/exit --delete`), the CLI also prompts for confirmation before running them. See [Slash Commands — Confirmation prompts for destructive commands](../reference/slash-commands.md#confirmation-prompts-for-destructive-commands).
+For destructive session slash commands (`/clear`, `/new` / `/reset`, `/undo`, `/quit --delete` — `/exit --delete` is an alias), the CLI also prompts for confirmation before running them. See [Slash Commands — Confirmation prompts for destructive commands](../reference/slash-commands.md#confirmation-prompts-for-destructive-commands).
 
 ### Hardline Blocklist (Always-On Floor)
 
@@ -422,7 +435,7 @@ terminal:
     - my_custom_oauth_token.json
 ```
 
-Paths are relative to `~/.hermes/`. Files are mounted to `/root/.hermes/` inside the container.
+Paths are relative to `~/.hermes/`. Files are mounted to `/root/.hermes/` inside the container. This list is read by `tools/credential_files.py` (`terminal.credential_files`) — it lives under the `terminal:` block but is loaded by the credential-files module, not the core terminal backend, so it isn't part of the bundled `DEFAULT_CONFIG` snapshot.
 
 ### What Each Sandbox Filters
 
@@ -573,7 +586,7 @@ Blocked files show a warning:
 4. **Store secrets securely** — keep API keys in `~/.hermes/.env` with proper file permissions
 5. **Enable DM pairing** — use pairing codes instead of hardcoding user IDs when possible
 6. **Review command allowlist** — periodically audit `command_allowlist` in config.yaml
-7. **Set `MESSAGING_CWD`** — don't let the agent operate from sensitive directories
+7. **Set `terminal.cwd`** — don't let the agent operate from sensitive directories
 8. **Run as non-root** — never run the gateway as root
 9. **Monitor logs** — check `~/.hermes/logs/` for unauthorized access attempts
 10. **Keep updated** — run `hermes update` regularly for security patches
diff --git a/website/docs/user-guide/sessions.md b/website/docs/user-guide/sessions.md
index 6b051d0d79b..fa55080c2e3 100644
--- a/website/docs/user-guide/sessions.md
+++ b/website/docs/user-guide/sessions.md
@@ -4,6 +4,8 @@ title: "Sessions"
 description: "Session persistence, resume, search, management, and per-platform session tracking"
 ---
 
+import useBaseUrl from '@docusaurus/useBaseUrl';
+
 # Sessions
 
 Hermes Agent automatically saves every conversation as a session. Sessions enable conversation resume, cross-session search, and full conversation history management.
@@ -144,7 +146,7 @@ Session IDs are shown when you exit a CLI session, and can be found with `hermes
 
 When you resume a session, Hermes displays a compact recap of the previous conversation in a styled panel before the input prompt:
 
-<img className="docs-terminal-figure" src="/img/docs/session-recap.svg" alt="Stylized preview of the Previous Conversation recap panel shown when resuming a Hermes session." />
+<img className="docs-terminal-figure" src={useBaseUrl('/img/docs/session-recap.svg')} alt="Stylized preview of the Previous Conversation recap panel shown when resuming a Hermes session." />
 <p className="docs-figure-caption">Resume mode shows a compact recap panel with recent user and assistant turns before returning you to the live prompt.</p>
 
 The recap:
diff --git a/website/docs/user-guide/skills/bundled/apple/apple-apple-notes.md b/website/docs/user-guide/skills/bundled/apple/apple-apple-notes.md
index edad8b671af..637d56a3267 100644
--- a/website/docs/user-guide/skills/bundled/apple/apple-apple-notes.md
+++ b/website/docs/user-guide/skills/bundled/apple/apple-apple-notes.md
@@ -21,7 +21,7 @@ Manage Apple Notes via memo CLI: create, search, edit.
 | License | MIT |
 | Platforms | macos |
 | Tags | `Notes`, `Apple`, `macOS`, `note-taking` |
-| Related skills | [`obsidian`](/user-guide/skills/bundled/note-taking/note-taking-obsidian) |
+| Related skills | [`obsidian`](/docs/user-guide/skills/bundled/note-taking/note-taking-obsidian) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/bundled/apple/apple-apple-reminders.md b/website/docs/user-guide/skills/bundled/apple/apple-apple-reminders.md
index 49549d369ef..9ab15d6b547 100644
--- a/website/docs/user-guide/skills/bundled/apple/apple-apple-reminders.md
+++ b/website/docs/user-guide/skills/bundled/apple/apple-apple-reminders.md
@@ -84,6 +84,38 @@ remindctl add --title "Call mom" --list Personal --due tomorrow
 remindctl add --title "Meeting prep" --due "2026-02-15 09:00"
 ```
 
+### Due Time vs Alarm / Early Nudge
+
+`--due` and `--alarm` are different fields:
+
+- `--due` sets the reminder's due date/time.
+- `--alarm` sets the EventKit alarm/notification trigger. Timed due reminders may default to an alarm at the due time, but pass `--alarm` explicitly when the user asks for an earlier nudge.
+
+For a reminder due at 2:00 PM with a notification 30 minutes earlier:
+
+```bash
+remindctl add --title "Hairdresser" --due "2026-05-15 14:00" --alarm "2026-05-15 13:30"
+```
+
+To edit an existing reminder:
+
+```bash
+remindctl edit 87354 --due "2026-05-15 14:00" --alarm "2026-05-15 13:30"
+```
+
+The Reminders UI may show or group the item by the alarm time because that is when the notification fires. Verify with JSON instead of assuming the due time moved:
+
+```bash
+remindctl today --json
+```
+
+Expected shape:
+
+- `dueDate`: actual due time
+- `alarmDate`: notification / early nudge time
+
+Apple's public `EKReminder` docs list only reminder-specific properties. Alarm support comes from inherited `EKCalendarItem` behavior exposed by remindctl's `--alarm` flag.
+
 ### Complete / Delete
 
 ```bash
diff --git a/website/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-claude-code.md b/website/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-claude-code.md
index c56fca7ec55..6d537901861 100644
--- a/website/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-claude-code.md
+++ b/website/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-claude-code.md
@@ -21,7 +21,7 @@ Delegate coding to Claude Code CLI (features, PRs).
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `Coding-Agent`, `Claude`, `Anthropic`, `Code-Review`, `Refactoring`, `PTY`, `Automation` |
-| Related skills | [`codex`](/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-codex), [`hermes-agent`](/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-hermes-agent), [`opencode`](/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-opencode) |
+| Related skills | [`codex`](/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-codex), [`hermes-agent`](/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-hermes-agent), [`opencode`](/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-opencode) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-codex.md b/website/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-codex.md
index 1e142db15db..3482f2303c1 100644
--- a/website/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-codex.md
+++ b/website/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-codex.md
@@ -21,7 +21,7 @@ Delegate coding to OpenAI Codex CLI (features, PRs).
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `Coding-Agent`, `Codex`, `OpenAI`, `Code-Review`, `Refactoring` |
-| Related skills | [`claude-code`](/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-claude-code), [`hermes-agent`](/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-hermes-agent) |
+| Related skills | [`claude-code`](/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-claude-code), [`hermes-agent`](/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-hermes-agent) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-hermes-agent.md b/website/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-hermes-agent.md
index f8b3a2bed34..57579e0f135 100644
--- a/website/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-hermes-agent.md
+++ b/website/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-hermes-agent.md
@@ -463,15 +463,15 @@ Common "why is Hermes doing X to my output / tool calls / commands?" toggles —
 
 ### Secret redaction in tool output
 
-Secret redaction is **off by default** — tool output (terminal stdout, `read_file`, web content, subagent summaries, etc.) passes through unmodified. If the user wants Hermes to auto-mask strings that look like API keys, tokens, and secrets before they enter the conversation context and logs:
+Secret redaction is **on by default** — tool output (terminal stdout, `read_file`, web content, subagent summaries, etc.) is scanned for strings that look like API keys, tokens, and secrets before it enters the conversation context and logs. Leave it enabled for normal use:
 
 ```bash
-hermes config set security.redact_secrets true       # enable globally
+hermes config set security.redact_secrets true       # keep enabled globally
 ```
 
-**Restart required.** `security.redact_secrets` is snapshotted at import time — toggling it mid-session (e.g. via `export HERMES_REDACT_SECRETS=true` from a tool call) will NOT take effect for the running process. Tell the user to run `hermes config set security.redact_secrets true` in a terminal, then start a new session. This is deliberate — it prevents an LLM from flipping the toggle on itself mid-task.
+**Restart required.** `security.redact_secrets` is snapshotted at import time — toggling it mid-session (e.g. via `export HERMES_REDACT_SECRETS=false` from a tool call) will NOT take effect for the running process. Tell the user to change it in config from a terminal, then start a new session. This is deliberate — it prevents an LLM from flipping the toggle on itself mid-task.
 
-Disable again with:
+Disable only when you deliberately need raw credential-like strings for debugging or redactor development:
 ```bash
 hermes config set security.redact_secrets false
 ```
diff --git a/website/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-kanban-codex-lane.md b/website/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-kanban-codex-lane.md
new file mode 100644
index 00000000000..aac59a16d04
--- /dev/null
+++ b/website/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-kanban-codex-lane.md
@@ -0,0 +1,295 @@
+---
+title: "Kanban Codex Lane"
+sidebar_label: "Kanban Codex Lane"
+description: "Use when a Hermes Kanban worker wants to run Codex CLI as an isolated implementation lane while Hermes keeps ownership of task lifecycle, reconciliation, tes..."
+---
+
+{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */}
+
+# Kanban Codex Lane
+
+Use when a Hermes Kanban worker wants to run Codex CLI as an isolated implementation lane while Hermes keeps ownership of task lifecycle, reconciliation, testing, and handoff.
+
+## Skill metadata
+
+| | |
+|---|---|
+| Source | Bundled (installed by default) |
+| Path | `skills/autonomous-ai-agents/kanban-codex-lane` |
+| Version | `1.0.0` |
+| Author | Hermes Agent |
+| License | MIT |
+| Tags | `kanban`, `codex`, `worktrees`, `autonomous-agents`, `prediction-market-bot` |
+| Related skills | [`kanban-worker`](/docs/user-guide/skills/bundled/devops/devops-kanban-worker), [`codex`](/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-codex), [`hermes-agent`](/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-hermes-agent) |
+
+## Reference: full SKILL.md
+
+:::info
+The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active.
+:::
+
+# Kanban Codex Lane
+
+## Overview
+
+This skill defines the lightweight Hermes+Codex dual-lane convention for Kanban workers. Hermes is always the task owner: it calls `kanban_show`, decides whether Codex is appropriate, creates or selects an isolated workspace, starts and monitors Codex, reconciles any diff, runs verification, and writes the final `kanban_complete` or `kanban_block` handoff. Codex is an input lane only. Codex output is not a task completion signal, not a trusted reviewer, and not allowed to write durable Kanban state directly.
+
+The convention exists so a Hermes worker can use Codex for bounded implementation help without changing the dispatcher. The dispatcher must still spawn Hermes workers. A worker may optionally spawn Codex inside its own run, then accept, partially accept, or reject the lane after independent review and tests.
+
+## When to Use
+
+Use the Codex lane when all of these are true:
+
+- The Kanban task is a coding, refactor, documentation, test, or mechanical migration task with clear acceptance criteria.
+- A bounded diff can be evaluated by Hermes in one run.
+- The repo can be copied or checked out in an isolated git worktree/branch.
+- Hermes can run the relevant tests itself after Codex exits.
+- The prompt can state all safety constraints and files that must not change.
+
+Do not use the Codex lane when any of these are true:
+
+- The task requires human judgment that is not already captured in the Kanban body.
+- The worker lacks repo access, Codex auth, or time to reconcile the result.
+- The change touches secrets, credential stores, private user data, or production order-entry systems.
+- A small direct edit is faster and safer than spawning another agent.
+- The task is research-only and should produce a written handoff rather than a diff.
+- The worker would be tempted to mark Done based only on Codex self-report.
+
+## Ownership Rules
+
+1. Hermes owns the Kanban lifecycle. Codex must never call `kanban_complete`, `kanban_block`, `kanban_create`, gateway messaging, or any Hermes board CLI as a substitute for the worker.
+2. Hermes owns final acceptance. Treat Codex commits/diffs as untrusted patches until reviewed and verified.
+3. Hermes owns test execution. Codex may run tests, but those runs are advisory; repeat required verification from Hermes with the repo's canonical wrapper.
+4. Hermes owns safety. If Codex changes safety boundaries, risk gates, live trading behavior, or secrets handling, reject the lane even if tests pass.
+5. Hermes owns cleanup. Kill stuck Codex processes and remove temporary worktrees when they are no longer needed.
+
+## Required Worktree and Branch Pattern
+
+Never run Codex directly in a shared dirty checkout. Use a branch/worktree name that ties the lane to the Kanban task and keeps untrusted edits isolated.
+
+Recommended variables:
+
+```bash
+TASK_ID="${HERMES_KANBAN_TASK:-t_manual}"
+REPO="/path/to/repo"
+BASE="$(git -C "$REPO" rev-parse --abbrev-ref HEAD)"
+SAFE_TASK="$(printf '%s' "$TASK_ID" | tr -cd '[:alnum:]_-')"
+BRANCH="codex/${SAFE_TASK}/$(date -u +%Y%m%d%H%M%S)"
+WORKTREE="/tmp/${SAFE_TASK}-codex-lane"
+```
+
+Create the isolated lane:
+
+```bash
+git -C "$REPO" fetch --all --prune
+git -C "$REPO" worktree add -b "$BRANCH" "$WORKTREE" "$BASE"
+git -C "$WORKTREE" status --short --branch
+```
+
+If the current Kanban workspace is already an isolated git worktree created for this task, you may create a sibling Codex branch inside it only if `git status --short` is clean except for intentional Hermes edits. Otherwise create a separate temporary worktree and cherry-pick or copy accepted commits back after reconciliation.
+
+Cleanup after reconciliation:
+
+```bash
+git -C "$REPO" worktree remove "$WORKTREE"
+git -C "$REPO" branch -D "$BRANCH"  # only after accepted commits were copied/cherry-picked or intentionally rejected
+```
+
+Keep the worktree if it is needed as an artifact for review; record it in `codex_lane.artifacts` and mention it in the handoff.
+
+## Codex Capability Checks
+
+Run these before spawning Codex. Missing Codex is a normal reason to skip the lane, not a task blocker if Hermes can do the task directly.
+
+```bash
+command -v codex
+codex --version
+codex features list | grep -i goals || true
+```
+
+If `/goal` support is required, enable or launch with the feature flag only after checking availability:
+
+```bash
+codex features enable goals || true
+codex --enable goals --version
+```
+
+Authentication can be via `OPENAI_API_KEY` or the Codex CLI OAuth state (often `~/.codex/auth.json`). Do not print token files. A missing `OPENAI_API_KEY` is not proof that auth is unavailable.
+
+## Mode Selection
+
+Use `codex exec` for bounded one-shot edits where Codex should exit on its own:
+
+```python
+terminal(
+    command="codex exec --full-auto '$(cat /tmp/codex_prompt.md)'",
+    workdir=WORKTREE,
+    background=True,
+    pty=True,
+    notify_on_complete=True,
+)
+```
+
+Use Codex `/goal` only for broader multi-step work that benefits from durable objective tracking. Launch interactively in a PTY/tmux session or with `codex --enable goals` if the feature is disabled by default. Keep the goal objective self-contained: repo path, task id, safety constraints, allowed scope, acceptance criteria, tests, and commit expectations.
+
+Example `/goal` objective text to paste into Codex:
+
+```text
+/goal Work in this repository only: <WORKTREE>. Task: <TASK_ID> <TITLE>.
+Hermes owns the Kanban lifecycle; do not call Hermes kanban tools or messaging.
+Create small commits on branch <BRANCH>. Follow the PMB safety constraints in the prompt.
+Run the requested verification commands and report exact outputs. Stop after producing a diff and summary.
+```
+
+Do not use `--yolo` for prediction-market-bot or safety-sensitive repos. Prefer `--full-auto` inside the isolated worktree, then rely on Hermes reconciliation.
+
+## Prompt Construction
+
+Use the linked template at `templates/pmb-codex-lane-prompt.md` for prediction-market-bot work. For other repos, keep the same structure and replace the PMB-specific safety block with repo-specific invariants.
+
+Every Codex prompt must include:
+
+- `task_id`, title, and full Kanban acceptance criteria.
+- Repo path, worktree path, branch name, and allowed file scope.
+- Explicit statement: Hermes owns Kanban lifecycle; Codex is an input lane only.
+- Required output: concise summary, files changed, commits, tests run, and known risks.
+- Prohibited actions: secrets access, external messaging, board mutation, unrelated refactors, dependency upgrades unless required.
+- Verification commands Codex may run and commands Hermes will run afterward.
+
+For PMB, include these mandatory safety constraints verbatim:
+
+```text
+PMB safety constraints:
+- live-SIM is paper-only; do not add or enable live REST order entry.
+- Never use market orders.
+- Do not add execution crossing or bypass price/risk checks.
+- Do not fake passive fills, fills, PnL, order states, or reconciliation evidence.
+- Do not weaken risk gates, limits, kill switches, or fail-closed behavior.
+- Keep research/selection outside the C++ hot path unless explicitly requested.
+- Do not read, print, write, or require secrets/tokens/credentials.
+```
+
+## Monitoring, Timeout, and Kill Behavior
+
+Start long Codex lanes in the background with PTY and completion notification:
+
+```python
+result = terminal(
+    command="codex exec --full-auto '$(cat /tmp/codex_prompt.md)'",
+    workdir=WORKTREE,
+    background=True,
+    pty=True,
+    notify_on_complete=True,
+)
+session_id = result["session_id"]
+```
+
+Monitor without interfering:
+
+```python
+process(action="poll", session_id=session_id)
+process(action="log", session_id=session_id, limit=200)
+process(action="wait", session_id=session_id, timeout=300)
+```
+
+Send a Kanban heartbeat every few minutes for lanes longer than two minutes, e.g. `kanban_heartbeat(note="Codex lane running in <WORKTREE>; waiting for tests/diff")`.
+
+Kill conditions:
+
+- No useful output for the task's remaining runtime budget.
+- Codex requests secrets, production credentials, or external permissions.
+- Codex attempts to modify files outside the worktree.
+- Codex starts unrelated rewrites or dependency churn.
+- Codex is still running near the worker timeout and no safe partial artifact exists.
+
+Kill command:
+
+```python
+process(action="kill", session_id=session_id)
+```
+
+After kill, inspect `git status --short`, preserve useful patches only if safe, and record `codex_lane.result: timed_out` or `rejected` with a concrete `rejected_reason`.
+
+## Reconciliation Checklist
+
+Hermes must perform this checklist before accepting any Codex lane result:
+
+- [ ] `git -C <WORKTREE> status --short --branch` shows only expected files.
+- [ ] `git -C <WORKTREE> diff --stat` and `git diff` were reviewed by Hermes.
+- [ ] No secrets, credentials, generated caches, unrelated data, or local artifacts are included.
+- [ ] PMB safety constraints were preserved: no live REST order entry, no market orders, no execution crossing, no fake passive fills/PnL, no risk-gate weakening, no secrets.
+- [ ] Codex commits are small enough to cherry-pick or squash cleanly.
+- [ ] Hermes ran the canonical tests itself, using `scripts/run_tests.sh` for Hermes Agent or the repo's documented wrapper for other repos.
+- [ ] Any Codex-run tests are listed separately from Hermes-run tests.
+- [ ] Accepted commits/diffs were applied to the Hermes-owned workspace/branch.
+- [ ] Rejected or partial work has a concrete reason and artifact path if useful.
+
+Acceptance outcomes:
+
+- `accepted`: Codex diff/commits were reviewed, applied, and verified.
+- `partial`: Some Codex work was accepted after edits or cherry-picks; rejected parts are documented.
+- `rejected`: No Codex changes were accepted; reason is documented.
+- `timed_out`: Codex exceeded the lane budget; useful artifacts may or may not exist.
+
+## kanban_complete Metadata Schema
+
+Include this object under `metadata.codex_lane` for every task where the lane was considered. If Codex was not used, set `used: false` and explain why in `rejected_reason` or a sibling `notes` field.
+
+```json
+{
+  "codex_lane": {
+    "used": true,
+    "mode": "exec | goal | skipped",
+    "worktree": "/absolute/path/to/codex/worktree",
+    "branch": "codex/t_caa69668/20260508100000",
+    "command": "codex exec --full-auto ...",
+    "result": "accepted | rejected | partial | timed_out",
+    "accepted_commits": ["<sha1>", "<sha2>"],
+    "rejected_reason": "empty when fully accepted; otherwise concrete reason",
+    "tests_run": [
+      {"command": "scripts/run_tests.sh tests/tools/test_x.py", "exit_code": 0, "owner": "hermes"},
+      {"command": "codex-reported: npm test", "exit_code": 0, "owner": "codex"}
+    ],
+    "artifacts": ["/absolute/path/to/log-or-patch"]
+  }
+}
+```
+
+For tasks that intentionally skip Codex:
+
+```json
+{
+  "codex_lane": {
+    "used": false,
+    "mode": "skipped",
+    "worktree": null,
+    "branch": null,
+    "command": null,
+    "result": "rejected",
+    "accepted_commits": [],
+    "rejected_reason": "Direct Hermes edit was smaller and safer than spawning Codex.",
+    "tests_run": [],
+    "artifacts": []
+  }
+}
+```
+
+## Common Pitfalls
+
+1. Treating Codex self-report as verification. Always inspect the diff and rerun tests from Hermes.
+2. Running Codex in the user's dirty main checkout. Always isolate in a worktree/branch.
+3. Letting Codex own Kanban. Codex may summarize progress, but Hermes writes board state.
+4. Forgetting PMB safety invariants in the prompt. Missing safety text is a lane setup failure.
+5. Using `/goal` for quick edits. Prefer `codex exec` unless durable multi-step continuation is needed.
+6. Killing a stuck lane without recording why. `rejected_reason` must explain the decision.
+7. Accepting broad unrelated cleanup because tests pass. Reject or cherry-pick only the scoped changes.
+
+## Verification Checklist
+
+- [ ] Codex was skipped or started only after `command -v codex`, `codex --version`, and optional goals feature checks.
+- [ ] Codex ran only in an isolated worktree/branch.
+- [ ] Prompt included task scope, ownership rules, PMB safety constraints when applicable, and verification commands.
+- [ ] Hermes reviewed `git diff` and safety-sensitive files.
+- [ ] Hermes ran canonical tests independently.
+- [ ] `kanban_complete.metadata.codex_lane` follows the schema above.
+- [ ] Temporary processes and unnecessary worktrees were cleaned up.
diff --git a/website/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-opencode.md b/website/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-opencode.md
index 848ecfa5b96..37c6c1d15dc 100644
--- a/website/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-opencode.md
+++ b/website/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-opencode.md
@@ -21,7 +21,7 @@ Delegate coding to OpenCode CLI (features, PR review).
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `Coding-Agent`, `OpenCode`, `Autonomous`, `Refactoring`, `Code-Review` |
-| Related skills | [`claude-code`](/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-claude-code), [`codex`](/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-codex), [`hermes-agent`](/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-hermes-agent) |
+| Related skills | [`claude-code`](/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-claude-code), [`codex`](/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-codex), [`hermes-agent`](/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-hermes-agent) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/bundled/creative/creative-architecture-diagram.md b/website/docs/user-guide/skills/bundled/creative/creative-architecture-diagram.md
index c8802c6faf2..ad816a370ad 100644
--- a/website/docs/user-guide/skills/bundled/creative/creative-architecture-diagram.md
+++ b/website/docs/user-guide/skills/bundled/creative/creative-architecture-diagram.md
@@ -21,7 +21,7 @@ Dark-themed SVG architecture/cloud/infra diagrams as HTML.
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `architecture`, `diagrams`, `SVG`, `HTML`, `visualization`, `infrastructure`, `cloud` |
-| Related skills | [`concept-diagrams`](/user-guide/skills/optional/creative/creative-concept-diagrams), [`excalidraw`](/user-guide/skills/bundled/creative/creative-excalidraw) |
+| Related skills | [`concept-diagrams`](/docs/user-guide/skills/optional/creative/creative-concept-diagrams), [`excalidraw`](/docs/user-guide/skills/bundled/creative/creative-excalidraw) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/bundled/creative/creative-ascii-art.md b/website/docs/user-guide/skills/bundled/creative/creative-ascii-art.md
index 17737e20dd7..ba08d77c059 100644
--- a/website/docs/user-guide/skills/bundled/creative/creative-ascii-art.md
+++ b/website/docs/user-guide/skills/bundled/creative/creative-ascii-art.md
@@ -21,7 +21,7 @@ ASCII art: pyfiglet, cowsay, boxes, image-to-ascii.
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `ASCII`, `Art`, `Banners`, `Creative`, `Unicode`, `Text-Art`, `pyfiglet`, `figlet`, `cowsay`, `boxes` |
-| Related skills | [`excalidraw`](/user-guide/skills/bundled/creative/creative-excalidraw) |
+| Related skills | [`excalidraw`](/docs/user-guide/skills/bundled/creative/creative-excalidraw) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/bundled/creative/creative-claude-design.md b/website/docs/user-guide/skills/bundled/creative/creative-claude-design.md
index 331db0fa687..bf6f4eafaa3 100644
--- a/website/docs/user-guide/skills/bundled/creative/creative-claude-design.md
+++ b/website/docs/user-guide/skills/bundled/creative/creative-claude-design.md
@@ -21,7 +21,7 @@ Design one-off HTML artifacts (landing, deck, prototype).
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `design`, `html`, `prototype`, `ux`, `ui`, `creative`, `artifact`, `deck`, `motion`, `design-system` |
-| Related skills | [`design-md`](/user-guide/skills/bundled/creative/creative-design-md), [`popular-web-designs`](/user-guide/skills/bundled/creative/creative-popular-web-designs), [`excalidraw`](/user-guide/skills/bundled/creative/creative-excalidraw), [`architecture-diagram`](/user-guide/skills/bundled/creative/creative-architecture-diagram) |
+| Related skills | [`design-md`](/docs/user-guide/skills/bundled/creative/creative-design-md), [`popular-web-designs`](/docs/user-guide/skills/bundled/creative/creative-popular-web-designs), [`excalidraw`](/docs/user-guide/skills/bundled/creative/creative-excalidraw), [`architecture-diagram`](/docs/user-guide/skills/bundled/creative/creative-architecture-diagram) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/bundled/creative/creative-comfyui.md b/website/docs/user-guide/skills/bundled/creative/creative-comfyui.md
index c2f93b89919..38610be8b83 100644
--- a/website/docs/user-guide/skills/bundled/creative/creative-comfyui.md
+++ b/website/docs/user-guide/skills/bundled/creative/creative-comfyui.md
@@ -21,7 +21,7 @@ Generate images, video, and audio with ComfyUI — install, launch, manage nodes
 | License | MIT |
 | Platforms | macos, linux, windows |
 | Tags | `comfyui`, `image-generation`, `stable-diffusion`, `flux`, `sd3`, `wan-video`, `hunyuan-video`, `creative`, `generative-ai`, `video-generation` |
-| Related skills | [`stable-diffusion-image-generation`](/user-guide/skills/optional/mlops/mlops-stable-diffusion), `image_gen` |
+| Related skills | [`stable-diffusion-image-generation`](/docs/user-guide/skills/optional/mlops/mlops-stable-diffusion), `image_gen` |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/bundled/creative/creative-design-md.md b/website/docs/user-guide/skills/bundled/creative/creative-design-md.md
index 8ee856676ff..a96723ddb7f 100644
--- a/website/docs/user-guide/skills/bundled/creative/creative-design-md.md
+++ b/website/docs/user-guide/skills/bundled/creative/creative-design-md.md
@@ -21,7 +21,7 @@ Author/validate/export Google's DESIGN.md token spec files.
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `design`, `design-system`, `tokens`, `ui`, `accessibility`, `wcag`, `tailwind`, `dtcg`, `google` |
-| Related skills | [`popular-web-designs`](/user-guide/skills/bundled/creative/creative-popular-web-designs), [`claude-design`](/user-guide/skills/bundled/creative/creative-claude-design), [`excalidraw`](/user-guide/skills/bundled/creative/creative-excalidraw), [`architecture-diagram`](/user-guide/skills/bundled/creative/creative-architecture-diagram) |
+| Related skills | [`popular-web-designs`](/docs/user-guide/skills/bundled/creative/creative-popular-web-designs), [`claude-design`](/docs/user-guide/skills/bundled/creative/creative-claude-design), [`excalidraw`](/docs/user-guide/skills/bundled/creative/creative-excalidraw), [`architecture-diagram`](/docs/user-guide/skills/bundled/creative/creative-architecture-diagram) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/bundled/creative/creative-humanizer.md b/website/docs/user-guide/skills/bundled/creative/creative-humanizer.md
index 2f7dea08152..178c2502b47 100644
--- a/website/docs/user-guide/skills/bundled/creative/creative-humanizer.md
+++ b/website/docs/user-guide/skills/bundled/creative/creative-humanizer.md
@@ -21,7 +21,7 @@ Humanize text: strip AI-isms and add real voice.
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `writing`, `editing`, `humanize`, `anti-ai-slop`, `voice`, `prose`, `text` |
-| Related skills | [`songwriting-and-ai-music`](/user-guide/skills/bundled/creative/creative-songwriting-and-ai-music) |
+| Related skills | [`songwriting-and-ai-music`](/docs/user-guide/skills/bundled/creative/creative-songwriting-and-ai-music) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/bundled/creative/creative-p5js.md b/website/docs/user-guide/skills/bundled/creative/creative-p5js.md
index 75643a1ec56..cb175f61801 100644
--- a/website/docs/user-guide/skills/bundled/creative/creative-p5js.md
+++ b/website/docs/user-guide/skills/bundled/creative/creative-p5js.md
@@ -19,7 +19,7 @@ p5.js sketches: gen art, shaders, interactive, 3D.
 | Version | `1.0.0` |
 | Platforms | linux, macos, windows |
 | Tags | `creative-coding`, `generative-art`, `p5js`, `canvas`, `interactive`, `visualization`, `webgl`, `shaders`, `animation` |
-| Related skills | [`ascii-video`](/user-guide/skills/bundled/creative/creative-ascii-video), [`manim-video`](/user-guide/skills/bundled/creative/creative-manim-video), [`excalidraw`](/user-guide/skills/bundled/creative/creative-excalidraw) |
+| Related skills | [`ascii-video`](/docs/user-guide/skills/bundled/creative/creative-ascii-video), [`manim-video`](/docs/user-guide/skills/bundled/creative/creative-manim-video), [`excalidraw`](/docs/user-guide/skills/bundled/creative/creative-excalidraw) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/bundled/creative/creative-pretext.md b/website/docs/user-guide/skills/bundled/creative/creative-pretext.md
index 32ccdd89ba4..78ed86c8e61 100644
--- a/website/docs/user-guide/skills/bundled/creative/creative-pretext.md
+++ b/website/docs/user-guide/skills/bundled/creative/creative-pretext.md
@@ -21,7 +21,7 @@ Use when building creative browser demos with @chenglou/pretext — DOM-free tex
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `creative-coding`, `typography`, `pretext`, `ascii-art`, `canvas`, `generative`, `text-layout`, `kinetic-typography` |
-| Related skills | [`p5js`](/user-guide/skills/bundled/creative/creative-p5js), [`claude-design`](/user-guide/skills/bundled/creative/creative-claude-design), [`excalidraw`](/user-guide/skills/bundled/creative/creative-excalidraw), [`architecture-diagram`](/user-guide/skills/bundled/creative/creative-architecture-diagram) |
+| Related skills | [`p5js`](/docs/user-guide/skills/bundled/creative/creative-p5js), [`claude-design`](/docs/user-guide/skills/bundled/creative/creative-claude-design), [`excalidraw`](/docs/user-guide/skills/bundled/creative/creative-excalidraw), [`architecture-diagram`](/docs/user-guide/skills/bundled/creative/creative-architecture-diagram) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/bundled/creative/creative-sketch.md b/website/docs/user-guide/skills/bundled/creative/creative-sketch.md
index 25c3e9fe8d8..05ee5d343e6 100644
--- a/website/docs/user-guide/skills/bundled/creative/creative-sketch.md
+++ b/website/docs/user-guide/skills/bundled/creative/creative-sketch.md
@@ -21,7 +21,7 @@ Throwaway HTML mockups: 2-3 design variants to compare.
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `sketch`, `mockup`, `design`, `ui`, `prototype`, `html`, `variants`, `exploration`, `wireframe`, `comparison` |
-| Related skills | [`spike`](/user-guide/skills/bundled/software-development/software-development-spike), [`claude-design`](/user-guide/skills/bundled/creative/creative-claude-design), [`popular-web-designs`](/user-guide/skills/bundled/creative/creative-popular-web-designs), [`excalidraw`](/user-guide/skills/bundled/creative/creative-excalidraw) |
+| Related skills | [`spike`](/docs/user-guide/skills/bundled/software-development/software-development-spike), [`claude-design`](/docs/user-guide/skills/bundled/creative/creative-claude-design), [`popular-web-designs`](/docs/user-guide/skills/bundled/creative/creative-popular-web-designs), [`excalidraw`](/docs/user-guide/skills/bundled/creative/creative-excalidraw) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/bundled/creative/creative-touchdesigner-mcp.md b/website/docs/user-guide/skills/bundled/creative/creative-touchdesigner-mcp.md
index dac3c7a37b2..2577f1f741c 100644
--- a/website/docs/user-guide/skills/bundled/creative/creative-touchdesigner-mcp.md
+++ b/website/docs/user-guide/skills/bundled/creative/creative-touchdesigner-mcp.md
@@ -21,7 +21,7 @@ Control a running TouchDesigner instance via twozero MCP — create operators, s
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `TouchDesigner`, `MCP`, `twozero`, `creative-coding`, `real-time-visuals`, `generative-art`, `audio-reactive`, `VJ`, `installation`, `GLSL` |
-| Related skills | [`native-mcp`](/user-guide/skills/bundled/mcp/mcp-native-mcp), [`ascii-video`](/user-guide/skills/bundled/creative/creative-ascii-video), [`manim-video`](/user-guide/skills/bundled/creative/creative-manim-video), `hermes-video` |
+| Related skills | [`native-mcp`](/docs/user-guide/skills/bundled/mcp/mcp-native-mcp), [`ascii-video`](/docs/user-guide/skills/bundled/creative/creative-ascii-video), [`manim-video`](/docs/user-guide/skills/bundled/creative/creative-manim-video), `hermes-video` |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/bundled/devops/devops-kanban-orchestrator.md b/website/docs/user-guide/skills/bundled/devops/devops-kanban-orchestrator.md
index 0af138f8cca..be60ff79733 100644
--- a/website/docs/user-guide/skills/bundled/devops/devops-kanban-orchestrator.md
+++ b/website/docs/user-guide/skills/bundled/devops/devops-kanban-orchestrator.md
@@ -19,7 +19,7 @@ Decomposition playbook + anti-temptation rules for an orchestrator profile routi
 | Version | `3.0.0` |
 | Platforms | linux, macos, windows |
 | Tags | `kanban`, `multi-agent`, `orchestration`, `routing` |
-| Related skills | [`kanban-worker`](/user-guide/skills/bundled/devops/devops-kanban-worker) |
+| Related skills | [`kanban-worker`](/docs/user-guide/skills/bundled/devops/devops-kanban-worker) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/bundled/devops/devops-kanban-worker.md b/website/docs/user-guide/skills/bundled/devops/devops-kanban-worker.md
index b38db49eab7..6312dafbbae 100644
--- a/website/docs/user-guide/skills/bundled/devops/devops-kanban-worker.md
+++ b/website/docs/user-guide/skills/bundled/devops/devops-kanban-worker.md
@@ -19,7 +19,7 @@ Pitfalls, examples, and edge cases for Hermes Kanban workers. The lifecycle itse
 | Version | `2.0.0` |
 | Platforms | linux, macos, windows |
 | Tags | `kanban`, `multi-agent`, `collaboration`, `workflow`, `pitfalls` |
-| Related skills | [`kanban-orchestrator`](/user-guide/skills/bundled/devops/devops-kanban-orchestrator) |
+| Related skills | [`kanban-orchestrator`](/docs/user-guide/skills/bundled/devops/devops-kanban-orchestrator) |
 
 ## Reference: full SKILL.md
 
@@ -39,7 +39,7 @@ Your workspace kind determines how you should behave inside `$HERMES_KANBAN_WORK
 |---|---|---|
 | `scratch` | Fresh tmp dir, yours alone | Read/write freely; it gets GC'd when the task is archived. |
 | `dir:<path>` | Shared persistent directory | Other runs will read what you write. Treat it like long-lived state. Path is guaranteed absolute (the kernel rejects relative paths). |
-| `worktree` | Git worktree at the resolved path | If `.git` doesn't exist, run `git worktree add <path> <branch>` from the main repo first, then cd and work normally. Commit work here. |
+| `worktree` | Git worktree at the resolved path | If `.git` doesn't exist, run `git worktree add <path> ${HERMES_KANBAN_BRANCH:-wt/$HERMES_KANBAN_TASK}` from the main repo first, then cd and work normally. Commit work here. |
 
 ## Tenant isolation
 
@@ -175,6 +175,13 @@ If you open the task and `kanban_show` returns `runs: [...]` with one or more cl
 - `outcome: "reclaimed"` + `summary: "task archived..."` — operator archived the task out from under the previous run; you probably shouldn't be running at all, check status carefully.
 - `outcome: "blocked"` — a previous attempt blocked; the unblock comment should be in the thread by now.
 
+## Notification routing
+
+You can configure the gateway to receive cross-profile Kanban task notifications by adding `notification_sources` to `~/.hermes/config.yaml`.
+- `notification_sources: ['*']` accepts subscriptions from all profiles.
+- `notification_sources: ['default', 'zilor-ppt']` or `"default,zilor-ppt"` restricts subscriptions to specified profiles.
+- Omitting the key keeps the default behavior (profile isolation).
+
 ## Do NOT
 
 - Call `delegate_task` as a substitute for `kanban_create`. `delegate_task` is for short reasoning subtasks inside YOUR run; `kanban_create` is for cross-agent handoffs that outlive one API loop.
diff --git a/website/docs/user-guide/skills/bundled/github/github-codebase-inspection.md b/website/docs/user-guide/skills/bundled/github/github-codebase-inspection.md
index f039f9578c7..f727c1cd311 100644
--- a/website/docs/user-guide/skills/bundled/github/github-codebase-inspection.md
+++ b/website/docs/user-guide/skills/bundled/github/github-codebase-inspection.md
@@ -21,7 +21,7 @@ Inspect codebases w/ pygount: LOC, languages, ratios.
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `LOC`, `Code Analysis`, `pygount`, `Codebase`, `Metrics`, `Repository` |
-| Related skills | [`github-repo-management`](/user-guide/skills/bundled/github/github-github-repo-management) |
+| Related skills | [`github-repo-management`](/docs/user-guide/skills/bundled/github/github-github-repo-management) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/bundled/github/github-github-auth.md b/website/docs/user-guide/skills/bundled/github/github-github-auth.md
index ef38b9ba45d..92b9d9f6690 100644
--- a/website/docs/user-guide/skills/bundled/github/github-github-auth.md
+++ b/website/docs/user-guide/skills/bundled/github/github-github-auth.md
@@ -21,7 +21,7 @@ GitHub auth setup: HTTPS tokens, SSH keys, gh CLI login.
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `GitHub`, `Authentication`, `Git`, `gh-cli`, `SSH`, `Setup` |
-| Related skills | [`github-pr-workflow`](/user-guide/skills/bundled/github/github-github-pr-workflow), [`github-code-review`](/user-guide/skills/bundled/github/github-github-code-review), [`github-issues`](/user-guide/skills/bundled/github/github-github-issues), [`github-repo-management`](/user-guide/skills/bundled/github/github-github-repo-management) |
+| Related skills | [`github-pr-workflow`](/docs/user-guide/skills/bundled/github/github-github-pr-workflow), [`github-code-review`](/docs/user-guide/skills/bundled/github/github-github-code-review), [`github-issues`](/docs/user-guide/skills/bundled/github/github-github-issues), [`github-repo-management`](/docs/user-guide/skills/bundled/github/github-github-repo-management) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/bundled/github/github-github-code-review.md b/website/docs/user-guide/skills/bundled/github/github-github-code-review.md
index b16e2a7aa5d..56e8fa97ad2 100644
--- a/website/docs/user-guide/skills/bundled/github/github-github-code-review.md
+++ b/website/docs/user-guide/skills/bundled/github/github-github-code-review.md
@@ -21,7 +21,7 @@ Review PRs: diffs, inline comments via gh or REST.
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `GitHub`, `Code-Review`, `Pull-Requests`, `Git`, `Quality` |
-| Related skills | [`github-auth`](/user-guide/skills/bundled/github/github-github-auth), [`github-pr-workflow`](/user-guide/skills/bundled/github/github-github-pr-workflow) |
+| Related skills | [`github-auth`](/docs/user-guide/skills/bundled/github/github-github-auth), [`github-pr-workflow`](/docs/user-guide/skills/bundled/github/github-github-pr-workflow) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/bundled/github/github-github-issues.md b/website/docs/user-guide/skills/bundled/github/github-github-issues.md
index bd8af680af3..6f99685d71a 100644
--- a/website/docs/user-guide/skills/bundled/github/github-github-issues.md
+++ b/website/docs/user-guide/skills/bundled/github/github-github-issues.md
@@ -21,7 +21,7 @@ Create, triage, label, assign GitHub issues via gh or REST.
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `GitHub`, `Issues`, `Project-Management`, `Bug-Tracking`, `Triage` |
-| Related skills | [`github-auth`](/user-guide/skills/bundled/github/github-github-auth), [`github-pr-workflow`](/user-guide/skills/bundled/github/github-github-pr-workflow) |
+| Related skills | [`github-auth`](/docs/user-guide/skills/bundled/github/github-github-auth), [`github-pr-workflow`](/docs/user-guide/skills/bundled/github/github-github-pr-workflow) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/bundled/github/github-github-pr-workflow.md b/website/docs/user-guide/skills/bundled/github/github-github-pr-workflow.md
index 2341829c326..48aa4ea9fff 100644
--- a/website/docs/user-guide/skills/bundled/github/github-github-pr-workflow.md
+++ b/website/docs/user-guide/skills/bundled/github/github-github-pr-workflow.md
@@ -21,7 +21,7 @@ GitHub PR lifecycle: branch, commit, open, CI, merge.
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `GitHub`, `Pull-Requests`, `CI/CD`, `Git`, `Automation`, `Merge` |
-| Related skills | [`github-auth`](/user-guide/skills/bundled/github/github-github-auth), [`github-code-review`](/user-guide/skills/bundled/github/github-github-code-review) |
+| Related skills | [`github-auth`](/docs/user-guide/skills/bundled/github/github-github-auth), [`github-code-review`](/docs/user-guide/skills/bundled/github/github-github-code-review) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/bundled/github/github-github-repo-management.md b/website/docs/user-guide/skills/bundled/github/github-github-repo-management.md
index abdd6f4c913..0921e3dbccc 100644
--- a/website/docs/user-guide/skills/bundled/github/github-github-repo-management.md
+++ b/website/docs/user-guide/skills/bundled/github/github-github-repo-management.md
@@ -21,7 +21,7 @@ Clone/create/fork repos; manage remotes, releases.
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `GitHub`, `Repositories`, `Git`, `Releases`, `Secrets`, `Configuration` |
-| Related skills | [`github-auth`](/user-guide/skills/bundled/github/github-github-auth), [`github-pr-workflow`](/user-guide/skills/bundled/github/github-github-pr-workflow), [`github-issues`](/user-guide/skills/bundled/github/github-github-issues) |
+| Related skills | [`github-auth`](/docs/user-guide/skills/bundled/github/github-github-auth), [`github-pr-workflow`](/docs/user-guide/skills/bundled/github/github-github-pr-workflow), [`github-issues`](/docs/user-guide/skills/bundled/github/github-github-issues) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/bundled/mcp/mcp-native-mcp.md b/website/docs/user-guide/skills/bundled/mcp/mcp-native-mcp.md
index 843529acf6e..eeeb44d6a4d 100644
--- a/website/docs/user-guide/skills/bundled/mcp/mcp-native-mcp.md
+++ b/website/docs/user-guide/skills/bundled/mcp/mcp-native-mcp.md
@@ -21,7 +21,7 @@ MCP client: connect servers, register tools (stdio/HTTP).
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `MCP`, `Tools`, `Integrations` |
-| Related skills | [`mcporter`](/user-guide/skills/optional/mcp/mcp-mcporter) |
+| Related skills | [`mcporter`](/docs/user-guide/skills/optional/mcp/mcp-mcporter) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/bundled/media/media-spotify.md b/website/docs/user-guide/skills/bundled/media/media-spotify.md
index e0b67cc4f67..7df9764f080 100644
--- a/website/docs/user-guide/skills/bundled/media/media-spotify.md
+++ b/website/docs/user-guide/skills/bundled/media/media-spotify.md
@@ -21,7 +21,7 @@ Spotify: play, search, queue, manage playlists and devices.
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `spotify`, `music`, `playback`, `playlists`, `media` |
-| Related skills | [`gif-search`](/user-guide/skills/bundled/media/media-gif-search) |
+| Related skills | [`gif-search`](/docs/user-guide/skills/bundled/media/media-gif-search) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/bundled/mlops/mlops-inference-obliteratus.md b/website/docs/user-guide/skills/bundled/mlops/mlops-inference-obliteratus.md
index 5ab8cd7b2de..3ac4e0ff7ad 100644
--- a/website/docs/user-guide/skills/bundled/mlops/mlops-inference-obliteratus.md
+++ b/website/docs/user-guide/skills/bundled/mlops/mlops-inference-obliteratus.md
@@ -22,7 +22,7 @@ OBLITERATUS: abliterate LLM refusals (diff-in-means).
 | Dependencies | `obliteratus`, `torch`, `transformers`, `bitsandbytes`, `accelerate`, `safetensors` |
 | Platforms | linux, macos |
 | Tags | `Abliteration`, `Uncensoring`, `Refusal-Removal`, `LLM`, `Weight-Projection`, `SVD`, `Mechanistic-Interpretability`, `HuggingFace`, `Model-Surgery` |
-| Related skills | `vllm`, `gguf`, [`huggingface-tokenizers`](/user-guide/skills/optional/mlops/mlops-huggingface-tokenizers) |
+| Related skills | `vllm`, `gguf`, [`huggingface-tokenizers`](/docs/user-guide/skills/optional/mlops/mlops-huggingface-tokenizers) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/bundled/productivity/productivity-google-workspace.md b/website/docs/user-guide/skills/bundled/productivity/productivity-google-workspace.md
index fc8e85742b5..9fc82ced642 100644
--- a/website/docs/user-guide/skills/bundled/productivity/productivity-google-workspace.md
+++ b/website/docs/user-guide/skills/bundled/productivity/productivity-google-workspace.md
@@ -21,7 +21,7 @@ Gmail, Calendar, Drive, Docs, Sheets via gws CLI or Python.
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `Google`, `Gmail`, `Calendar`, `Drive`, `Sheets`, `Docs`, `Contacts`, `Email`, `OAuth` |
-| Related skills | [`himalaya`](/user-guide/skills/bundled/email/email-himalaya) |
+| Related skills | [`himalaya`](/docs/user-guide/skills/bundled/email/email-himalaya) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/bundled/productivity/productivity-ocr-and-documents.md b/website/docs/user-guide/skills/bundled/productivity/productivity-ocr-and-documents.md
index 93525e63f32..b41c8601022 100644
--- a/website/docs/user-guide/skills/bundled/productivity/productivity-ocr-and-documents.md
+++ b/website/docs/user-guide/skills/bundled/productivity/productivity-ocr-and-documents.md
@@ -21,7 +21,7 @@ Extract text from PDFs/scans (pymupdf, marker-pdf).
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `PDF`, `Documents`, `Research`, `Arxiv`, `Text-Extraction`, `OCR` |
-| Related skills | [`powerpoint`](/user-guide/skills/bundled/productivity/productivity-powerpoint) |
+| Related skills | [`powerpoint`](/docs/user-guide/skills/bundled/productivity/productivity-powerpoint) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/bundled/red-teaming/red-teaming-godmode.md b/website/docs/user-guide/skills/bundled/red-teaming/red-teaming-godmode.md
index 95544c67b74..cdd34ca3946 100644
--- a/website/docs/user-guide/skills/bundled/red-teaming/red-teaming-godmode.md
+++ b/website/docs/user-guide/skills/bundled/red-teaming/red-teaming-godmode.md
@@ -21,7 +21,7 @@ Jailbreak LLMs: Parseltongue, GODMODE, ULTRAPLINIAN.
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `jailbreak`, `red-teaming`, `G0DM0D3`, `Parseltongue`, `GODMODE`, `uncensoring`, `safety-bypass`, `prompt-engineering`, `L1B3RT4S` |
-| Related skills | [`obliteratus`](/user-guide/skills/bundled/mlops/mlops-inference-obliteratus) |
+| Related skills | [`obliteratus`](/docs/user-guide/skills/bundled/mlops/mlops-inference-obliteratus) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/bundled/research/research-arxiv.md b/website/docs/user-guide/skills/bundled/research/research-arxiv.md
index 0532089c144..4425858d747 100644
--- a/website/docs/user-guide/skills/bundled/research/research-arxiv.md
+++ b/website/docs/user-guide/skills/bundled/research/research-arxiv.md
@@ -21,7 +21,7 @@ Search arXiv papers by keyword, author, category, or ID.
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `Research`, `Arxiv`, `Papers`, `Academic`, `Science`, `API` |
-| Related skills | [`ocr-and-documents`](/user-guide/skills/bundled/productivity/productivity-ocr-and-documents) |
+| Related skills | [`ocr-and-documents`](/docs/user-guide/skills/bundled/productivity/productivity-ocr-and-documents) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/bundled/research/research-llm-wiki.md b/website/docs/user-guide/skills/bundled/research/research-llm-wiki.md
index 793d3438901..419c7cd7cb2 100644
--- a/website/docs/user-guide/skills/bundled/research/research-llm-wiki.md
+++ b/website/docs/user-guide/skills/bundled/research/research-llm-wiki.md
@@ -21,7 +21,7 @@ Karpathy's LLM Wiki: build/query interlinked markdown KB.
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `wiki`, `knowledge-base`, `research`, `notes`, `markdown`, `rag-alternative` |
-| Related skills | [`obsidian`](/user-guide/skills/bundled/note-taking/note-taking-obsidian), [`arxiv`](/user-guide/skills/bundled/research/research-arxiv) |
+| Related skills | [`obsidian`](/docs/user-guide/skills/bundled/note-taking/note-taking-obsidian), [`arxiv`](/docs/user-guide/skills/bundled/research/research-arxiv) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/bundled/research/research-research-paper-writing.md b/website/docs/user-guide/skills/bundled/research/research-research-paper-writing.md
index 1ecefcce1d2..9dc216ebac7 100644
--- a/website/docs/user-guide/skills/bundled/research/research-research-paper-writing.md
+++ b/website/docs/user-guide/skills/bundled/research/research-research-paper-writing.md
@@ -22,7 +22,7 @@ Write ML papers for NeurIPS/ICML/ICLR: design→submit.
 | Dependencies | `semanticscholar`, `arxiv`, `habanero`, `requests`, `scipy`, `numpy`, `matplotlib`, `SciencePlots` |
 | Platforms | linux, macos |
 | Tags | `Research`, `Paper Writing`, `Experiments`, `ML`, `AI`, `NeurIPS`, `ICML`, `ICLR`, `ACL`, `AAAI`, `COLM`, `LaTeX`, `Citations`, `Statistical Analysis` |
-| Related skills | [`arxiv`](/user-guide/skills/bundled/research/research-arxiv), `ml-paper-writing`, [`subagent-driven-development`](/user-guide/skills/bundled/software-development/software-development-subagent-driven-development), [`plan`](/user-guide/skills/bundled/software-development/software-development-plan) |
+| Related skills | [`arxiv`](/docs/user-guide/skills/bundled/research/research-arxiv), `ml-paper-writing`, [`subagent-driven-development`](/docs/user-guide/skills/bundled/software-development/software-development-subagent-driven-development), [`plan`](/docs/user-guide/skills/bundled/software-development/software-development-plan) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/bundled/software-development/software-development-debugging-hermes-tui-commands.md b/website/docs/user-guide/skills/bundled/software-development/software-development-debugging-hermes-tui-commands.md
index 86ebd065fa9..00c3388e3a4 100644
--- a/website/docs/user-guide/skills/bundled/software-development/software-development-debugging-hermes-tui-commands.md
+++ b/website/docs/user-guide/skills/bundled/software-development/software-development-debugging-hermes-tui-commands.md
@@ -21,7 +21,7 @@ Debug Hermes TUI slash commands: Python, gateway, Ink UI.
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `debugging`, `hermes-agent`, `tui`, `slash-commands`, `typescript`, `python` |
-| Related skills | [`python-debugpy`](/user-guide/skills/bundled/software-development/software-development-python-debugpy), [`node-inspect-debugger`](/user-guide/skills/bundled/software-development/software-development-node-inspect-debugger), [`systematic-debugging`](/user-guide/skills/bundled/software-development/software-development-systematic-debugging) |
+| Related skills | [`python-debugpy`](/docs/user-guide/skills/bundled/software-development/software-development-python-debugpy), [`node-inspect-debugger`](/docs/user-guide/skills/bundled/software-development/software-development-node-inspect-debugger), [`systematic-debugging`](/docs/user-guide/skills/bundled/software-development/software-development-systematic-debugging) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/bundled/software-development/software-development-hermes-agent-skill-authoring.md b/website/docs/user-guide/skills/bundled/software-development/software-development-hermes-agent-skill-authoring.md
index 82653d1535f..dcca5752b1a 100644
--- a/website/docs/user-guide/skills/bundled/software-development/software-development-hermes-agent-skill-authoring.md
+++ b/website/docs/user-guide/skills/bundled/software-development/software-development-hermes-agent-skill-authoring.md
@@ -21,7 +21,7 @@ Author in-repo SKILL.md: frontmatter, validator, structure.
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `skills`, `authoring`, `hermes-agent`, `conventions`, `skill-md` |
-| Related skills | [`writing-plans`](/user-guide/skills/bundled/software-development/software-development-writing-plans), [`requesting-code-review`](/user-guide/skills/bundled/software-development/software-development-requesting-code-review) |
+| Related skills | [`writing-plans`](/docs/user-guide/skills/bundled/software-development/software-development-writing-plans), [`requesting-code-review`](/docs/user-guide/skills/bundled/software-development/software-development-requesting-code-review) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/bundled/software-development/software-development-hermes-s6-container-supervision.md b/website/docs/user-guide/skills/bundled/software-development/software-development-hermes-s6-container-supervision.md
new file mode 100644
index 00000000000..4f35a9a38fc
--- /dev/null
+++ b/website/docs/user-guide/skills/bundled/software-development/software-development-hermes-s6-container-supervision.md
@@ -0,0 +1,196 @@
+---
+title: "Hermes S6 Container Supervision"
+sidebar_label: "Hermes S6 Container Supervision"
+description: "Modify, debug, or extend the s6-overlay supervision tree inside the Hermes Agent Docker image — adding new services, debugging profile gateways, understandin..."
+---
+
+{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */}
+
+# Hermes S6 Container Supervision
+
+Modify, debug, or extend the s6-overlay supervision tree inside the Hermes Agent Docker image — adding new services, debugging profile gateways, understanding the Architecture B main-program pattern.
+
+## Skill metadata
+
+| | |
+|---|---|
+| Source | Bundled (installed by default) |
+| Path | `skills/software-development/hermes-s6-container-supervision` |
+| Version | `1.0.0` |
+| Author | Hermes Agent |
+| License | MIT |
+| Tags | `docker`, `s6`, `supervision`, `gateway`, `profiles` |
+| Related skills | [`hermes-agent`](/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-hermes-agent), `hermes-agent-dev` |
+
+## Reference: full SKILL.md
+
+:::info
+The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active.
+:::
+
+# Hermes s6-overlay Container Supervision
+
+## When to use this skill
+
+Load this skill when you're working on:
+- Adding or removing a static service in the Hermes Docker image (something that should be supervised at every container start, like the dashboard)
+- Diagnosing why a per-profile gateway isn't starting, restarting, or surviving `docker restart`
+- Understanding why the container's CMD is `/opt/hermes/docker/main-wrapper.sh` and how leading-dash args reach the user's program
+- Modifying `cont-init.d` boot scripts (UID remap, volume seeding, profile reconciliation)
+- Changing the rendered run-script for per-profile gateways (Phase 4)
+
+If you're just running the Hermes Agent and want to use Docker, see `website/docs/user-guide/docker.md` instead.
+
+## Architecture at a glance
+
+<!-- ascii-guard-ignore -->
+```
+/init                                  ← PID 1 (s6-overlay v3.2.3.0)
+├── cont-init.d                        ← oneshot setup, runs as root
+│   ├── 01-hermes-setup                ← docker/stage2-hook.sh
+│   │   ├── UID/GID remap
+│   │   ├── chown /opt/data
+│   │   ├── chown /opt/data/profiles (every boot)
+│   │   ├── seed .env / config.yaml / SOUL.md
+│   │   └── skills_sync.py
+│   └── 02-reconcile-profiles          ← hermes_cli.container_boot
+│       ├── chown /run/service (hermes-writable for runtime register)
+│       └── walk $HERMES_HOME/profiles/<name>/gateway_state.json
+│           → recreate /run/service/gateway-<name>/
+│           → auto-start only those with prior_state == "running"
+│
+├── s6-rc.d (static services, in /etc/s6-overlay/s6-rc.d/)
+│   ├── main-hermes/run                ← exec sleep infinity (no-op slot)
+│   └── dashboard/run                  ← if HERMES_DASHBOARD=1, runs `hermes dashboard`
+│
+├── /run/service (s6-svscan watches; tmpfs)
+│   ├── gateway-coder/                 ← runtime-registered per-profile
+│   │   ├── type        ("longrun")
+│   │   ├── run         ("#!/command/with-contenv sh ... exec s6-setuidgid hermes hermes -p coder gateway run")
+│   │   ├── down        (marker — present means "registered but don't auto-start")
+│   │   └── log/run     (s6-log → $HERMES_HOME/logs/gateways/coder/current)
+│   └── ...
+│
+└── CMD ("main program")               ← /opt/hermes/docker/main-wrapper.sh
+    └── routes user args: bare exec | hermes subcommand | hermes (no args)
+        — exec'd by /init with stdin/stdout/stderr inherited (TTY for --tui)
+```
+<!-- ascii-guard-ignore-end -->
+
+## Key files
+
+| Path | Role |
+|---|---|
+| `Dockerfile` | s6-overlay install + cont-init.d wiring + `ENTRYPOINT ["/init", "/opt/hermes/docker/main-wrapper.sh"]` |
+| `docker/stage2-hook.sh` | The "old entrypoint logic" — UID remap, chown, seed, skills sync. Runs as cont-init.d/01-hermes-setup. |
+| `docker/cont-init.d/02-reconcile-profiles` | Calls `hermes_cli.container_boot` on every boot to restore profile gateway slots from the persistent volume. |
+| `docker/main-wrapper.sh` | The container's CMD. Routes user args, drops to hermes via `s6-setuidgid`, exec's the chosen program. |
+| `docker/s6-rc.d/main-hermes/run` | No-op `sleep infinity` — slot exists so the s6-rc user bundle is valid; main hermes runs as the CMD, not as a supervised service. |
+| `docker/s6-rc.d/dashboard/run` | Conditional service — `exec sleep infinity` unless `HERMES_DASHBOARD` is truthy. |
+| `docker/entrypoint.sh` | Back-compat shim that `exec`s the stage2 hook. External scripts that hard-coded the old entrypoint path still work. |
+| `hermes_cli/service_manager.py` | `S6ServiceManager`: `register_profile_gateway`, `unregister_profile_gateway`, `start/stop/restart/is_running`, `list_profile_gateways`. |
+| `hermes_cli/container_boot.py` | `reconcile_profile_gateways()` — walks persistent profiles, regenerates s6 slots, emits `container-boot.log`. |
+| `hermes_cli/gateway.py::_dispatch_via_service_manager_if_s6` | Intercepts `hermes gateway start/stop/restart` and routes to s6 when running in a container. |
+
+## Why Architecture B (CMD as main program, not s6-supervised)
+
+The original plan (v1–v3) called for main hermes to run as a supervised s6-rc service. Two real s6-overlay v3 mechanics blocked that:
+
+1. **cont-init.d scripts receive no CMD args** — so the stage2 hook can't parse `docker run <image> chat -q "hi"` to set `HERMES_ARGS` for a service `run` script to consume.
+2. **`/run/s6/basedir/bin/halt` does NOT propagate the exit code** written to `/run/s6-linux-init-container-results/exitcode`. Containers always exit 143 (SIGTERM) regardless. Confirmed by skarnet (s6 author) in [issue #477](https://github.com/just-containers/s6-overlay/issues/477): _"if you want a container shutdown, you need to either have your CMD exit, or, if you have no CMD, write the container exit code you want then call halt"_.
+
+So we use the s6-overlay-native CMD pattern: `ENTRYPOINT ["/init", "/opt/hermes/docker/main-wrapper.sh"]`. /init prepends the wrapper to user args automatically — so `docker run <image> --version` becomes `/init main-wrapper.sh --version`, and `--version` doesn't get intercepted by /init's POSIX shell. The wrapper drops to hermes via `s6-setuidgid`, then exec's the chosen program. The program's exit code becomes the container exit code, exactly matching the pre-s6 tini contract.
+
+Trade-off: main hermes is unsupervised under s6. That exactly matches its behavior under tini (the pre-s6 image). Dashboard supervision is the only **new** guarantee — and per-profile gateways under `/run/service/` get full supervision.
+
+## Quick recipes
+
+### Verify s6 is PID 1 in a running container
+
+```sh
+docker exec <c> sh -c 'cat /proc/1/comm; readlink /proc/1/exe'
+# Expect: s6-svscan or init / /package/admin/s6/.../s6-svscan
+```
+
+### Inspect a profile gateway service
+
+```sh
+# /command/ isn't on docker-exec PATH — use absolute path
+docker exec <c> /command/s6-svstat /run/service/gateway-<name>
+# "up (pid …) … seconds"            → running
+# "down (exitcode N) … seconds, normally up, want up, …" → s6 wants it up but the process keeps exiting (crash loop)
+# "down … normally up, ready …"     → user stopped it
+```
+
+### Bring a service up/down manually
+
+```sh
+docker exec <c> /command/s6-svc -u /run/service/gateway-<name>   # up
+docker exec <c> /command/s6-svc -d /run/service/gateway-<name>   # down
+docker exec <c> /command/s6-svc -t /run/service/gateway-<name>   # SIGTERM (restart)
+```
+
+### Watch the cont-init reconciler log
+
+```sh
+docker exec <c> tail -n 50 /opt/data/logs/container-boot.log
+# 2026-05-21T06:18:05+0000 profile=coder prior_state=running action=started
+# 2026-05-21T06:18:05+0000 profile=writer prior_state=stopped action=registered
+```
+
+### Add a new static service
+
+1. Create `docker/s6-rc.d/<name>/type` with `longrun\n` and `docker/s6-rc.d/<name>/run` (use `#!/command/with-contenv sh` + `# shellcheck shell=sh`).
+2. Drop to hermes via `s6-setuidgid hermes` at the top of run (unless you specifically need root).
+3. Create empty `docker/s6-rc.d/<name>/dependencies.d/base` so it waits for the base bundle.
+4. Create empty `docker/s6-rc.d/user/contents.d/<name>` so it joins the user bundle.
+5. The `COPY docker/s6-rc.d/` in the Dockerfile picks it up automatically — no other changes.
+
+### Change the per-profile gateway run command
+
+Edit `S6ServiceManager._render_run_script` in `hermes_cli/service_manager.py`. The function is also called by `hermes_cli/container_boot.py::_register_service` during boot reconciliation, so it's the single source of truth. Update the corresponding assertion in `tests/hermes_cli/test_service_manager.py::test_s6_register_creates_service_dir_and_triggers_scan`.
+
+### Run the docker test harness
+
+```sh
+docker build -t hermes-agent-harness:latest .
+HERMES_TEST_IMAGE=hermes-agent-harness:latest scripts/run_tests.sh tests/docker/ -v
+# Expect 19 passed, 0 xfailed against the s6 image
+```
+
+The harness lives in `tests/docker/` and skips when Docker isn't available. The per-test timeout is bumped to 180s (see `tests/docker/conftest.py`).
+
+## Common pitfalls
+
+### "command not found" via `docker exec`
+
+`/command/` (where s6-overlay puts its binaries) is on PATH only for processes spawned by the supervision tree — services, cont-init.d, main-wrapper.sh. `docker exec <c> s6-svstat …` will fail with "command not found"; always use the absolute path `/command/s6-svstat`. The `hermes` binary works because the Dockerfile adds `/opt/hermes/.venv/bin` to the runtime `ENV PATH`.
+
+### Profile directory ownership
+
+The cont-init reconciler runs as hermes (`s6-setuidgid hermes` in `02-reconcile-profiles`). If a profile dir ends up root-owned (e.g. because `docker exec <c> hermes profile create …` ran as root by default), the reconciler can't read SOUL.md and fails with `PermissionError`. Mitigation: `stage2-hook.sh` chowns `$HERMES_HOME/profiles` to hermes on **every** boot, idempotently. Don't remove that block.
+
+### Files written by `docker exec` are root-owned
+
+`docker exec` defaults to root. Either pass `--user hermes` or rely on the stage2 chown sweep next reboot. Don't write files under `$HERMES_HOME/profiles/<name>/` as root manually — the next reconcile pass will sweep them but in-flight operations may hit perm errors.
+
+### Service slot exists but s6-svstat says "s6-supervise not running"
+
+The service directory is on tmpfs and was wiped on container restart. Either the cont-init reconciler hasn't run yet (give it a moment after `docker restart`) or it failed. Check `docker logs <c> | grep '02-reconcile'`.
+
+### Gateway starts then immediately exits (`down (exitcode 1)` in svstat)
+
+Most likely the profile has no model or auth configured. The service slot is correct — the gateway itself is unconfigured. Run `hermes -p <profile> setup` first. The s6 supervisor will keep restarting it; that's the desired behavior (when you fix the config, the next attempt succeeds and stays up).
+
+### Reconciler skipped a profile
+
+The reconciler keys on the **presence of `SOUL.md`** as the "real profile" marker. `hermes profile create` always seeds it. If a profile dir is missing SOUL.md (stray directory, partial restore, backup-in-progress), the reconciler skips it intentionally. Add a `SOUL.md` (even empty) to opt back in.
+
+### "Help, the container exits 143!"
+
+Check whether something is invoking `s6-svscanctl -t` or `/run/s6/basedir/bin/halt` — both cause /init to begin stage 3 shutdown but return 143 (SIGTERM) rather than the desired exit code. This was the Phase 2 architecture pivot from A to B. For container shutdown with a real exit code, you must let the CMD (main-wrapper.sh) exit normally; do **not** try to control exit from a finish script.
+
+## Related skills
+
+- `hermes-agent-dev`: General hermes-agent codebase navigation
+- `hermes-tool-quirks`: Specific Hermes-tool workarounds (sed/grep/etc.) — load when debugging the s6 stack's interaction with hermes built-in tools.
diff --git a/website/docs/user-guide/skills/bundled/software-development/software-development-node-inspect-debugger.md b/website/docs/user-guide/skills/bundled/software-development/software-development-node-inspect-debugger.md
index 273ac492353..deddf5dafdb 100644
--- a/website/docs/user-guide/skills/bundled/software-development/software-development-node-inspect-debugger.md
+++ b/website/docs/user-guide/skills/bundled/software-development/software-development-node-inspect-debugger.md
@@ -21,7 +21,7 @@ Debug Node.js via --inspect + Chrome DevTools Protocol CLI.
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `debugging`, `nodejs`, `node-inspect`, `cdp`, `breakpoints`, `ui-tui` |
-| Related skills | [`systematic-debugging`](/user-guide/skills/bundled/software-development/software-development-systematic-debugging), [`python-debugpy`](/user-guide/skills/bundled/software-development/software-development-python-debugpy), [`debugging-hermes-tui-commands`](/user-guide/skills/bundled/software-development/software-development-debugging-hermes-tui-commands) |
+| Related skills | [`systematic-debugging`](/docs/user-guide/skills/bundled/software-development/software-development-systematic-debugging), [`python-debugpy`](/docs/user-guide/skills/bundled/software-development/software-development-python-debugpy), [`debugging-hermes-tui-commands`](/docs/user-guide/skills/bundled/software-development/software-development-debugging-hermes-tui-commands) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/bundled/software-development/software-development-plan.md b/website/docs/user-guide/skills/bundled/software-development/software-development-plan.md
index 96c18627a5e..254f7bc4f30 100644
--- a/website/docs/user-guide/skills/bundled/software-development/software-development-plan.md
+++ b/website/docs/user-guide/skills/bundled/software-development/software-development-plan.md
@@ -21,7 +21,7 @@ Plan mode: write markdown plan to .hermes/plans/, no exec.
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `planning`, `plan-mode`, `implementation`, `workflow` |
-| Related skills | [`writing-plans`](/user-guide/skills/bundled/software-development/software-development-writing-plans), [`subagent-driven-development`](/user-guide/skills/bundled/software-development/software-development-subagent-driven-development) |
+| Related skills | [`writing-plans`](/docs/user-guide/skills/bundled/software-development/software-development-writing-plans), [`subagent-driven-development`](/docs/user-guide/skills/bundled/software-development/software-development-subagent-driven-development) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/bundled/software-development/software-development-python-debugpy.md b/website/docs/user-guide/skills/bundled/software-development/software-development-python-debugpy.md
index 5826404a120..0524b1f3ab9 100644
--- a/website/docs/user-guide/skills/bundled/software-development/software-development-python-debugpy.md
+++ b/website/docs/user-guide/skills/bundled/software-development/software-development-python-debugpy.md
@@ -21,7 +21,7 @@ Debug Python: pdb REPL + debugpy remote (DAP).
 | License | MIT |
 | Platforms | linux, macos |
 | Tags | `debugging`, `python`, `pdb`, `debugpy`, `breakpoints`, `dap`, `post-mortem` |
-| Related skills | [`systematic-debugging`](/user-guide/skills/bundled/software-development/software-development-systematic-debugging), [`node-inspect-debugger`](/user-guide/skills/bundled/software-development/software-development-node-inspect-debugger), [`debugging-hermes-tui-commands`](/user-guide/skills/bundled/software-development/software-development-debugging-hermes-tui-commands) |
+| Related skills | [`systematic-debugging`](/docs/user-guide/skills/bundled/software-development/software-development-systematic-debugging), [`node-inspect-debugger`](/docs/user-guide/skills/bundled/software-development/software-development-node-inspect-debugger), [`debugging-hermes-tui-commands`](/docs/user-guide/skills/bundled/software-development/software-development-debugging-hermes-tui-commands) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/bundled/software-development/software-development-requesting-code-review.md b/website/docs/user-guide/skills/bundled/software-development/software-development-requesting-code-review.md
index f01bb9a0277..30a0be6613d 100644
--- a/website/docs/user-guide/skills/bundled/software-development/software-development-requesting-code-review.md
+++ b/website/docs/user-guide/skills/bundled/software-development/software-development-requesting-code-review.md
@@ -21,7 +21,7 @@ Pre-commit review: security scan, quality gates, auto-fix.
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `code-review`, `security`, `verification`, `quality`, `pre-commit`, `auto-fix` |
-| Related skills | [`subagent-driven-development`](/user-guide/skills/bundled/software-development/software-development-subagent-driven-development), [`writing-plans`](/user-guide/skills/bundled/software-development/software-development-writing-plans), [`test-driven-development`](/user-guide/skills/bundled/software-development/software-development-test-driven-development), [`github-code-review`](/user-guide/skills/bundled/github/github-github-code-review) |
+| Related skills | [`subagent-driven-development`](/docs/user-guide/skills/bundled/software-development/software-development-subagent-driven-development), [`writing-plans`](/docs/user-guide/skills/bundled/software-development/software-development-writing-plans), [`test-driven-development`](/docs/user-guide/skills/bundled/software-development/software-development-test-driven-development), [`github-code-review`](/docs/user-guide/skills/bundled/github/github-github-code-review) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/bundled/software-development/software-development-spike.md b/website/docs/user-guide/skills/bundled/software-development/software-development-spike.md
index 05ca2396f02..695a6cbde00 100644
--- a/website/docs/user-guide/skills/bundled/software-development/software-development-spike.md
+++ b/website/docs/user-guide/skills/bundled/software-development/software-development-spike.md
@@ -21,7 +21,7 @@ Throwaway experiments to validate an idea before build.
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `spike`, `prototype`, `experiment`, `feasibility`, `throwaway`, `exploration`, `research`, `planning`, `mvp`, `proof-of-concept` |
-| Related skills | [`sketch`](/user-guide/skills/bundled/creative/creative-sketch), [`writing-plans`](/user-guide/skills/bundled/software-development/software-development-writing-plans), [`subagent-driven-development`](/user-guide/skills/bundled/software-development/software-development-subagent-driven-development), [`plan`](/user-guide/skills/bundled/software-development/software-development-plan) |
+| Related skills | [`sketch`](/docs/user-guide/skills/bundled/creative/creative-sketch), [`writing-plans`](/docs/user-guide/skills/bundled/software-development/software-development-writing-plans), [`subagent-driven-development`](/docs/user-guide/skills/bundled/software-development/software-development-subagent-driven-development), [`plan`](/docs/user-guide/skills/bundled/software-development/software-development-plan) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/bundled/software-development/software-development-subagent-driven-development.md b/website/docs/user-guide/skills/bundled/software-development/software-development-subagent-driven-development.md
index 5ac70ba30a5..1ad7859918f 100644
--- a/website/docs/user-guide/skills/bundled/software-development/software-development-subagent-driven-development.md
+++ b/website/docs/user-guide/skills/bundled/software-development/software-development-subagent-driven-development.md
@@ -21,7 +21,7 @@ Execute plans via delegate_task subagents (2-stage review).
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `delegation`, `subagent`, `implementation`, `workflow`, `parallel` |
-| Related skills | [`writing-plans`](/user-guide/skills/bundled/software-development/software-development-writing-plans), [`requesting-code-review`](/user-guide/skills/bundled/software-development/software-development-requesting-code-review), [`test-driven-development`](/user-guide/skills/bundled/software-development/software-development-test-driven-development) |
+| Related skills | [`writing-plans`](/docs/user-guide/skills/bundled/software-development/software-development-writing-plans), [`requesting-code-review`](/docs/user-guide/skills/bundled/software-development/software-development-requesting-code-review), [`test-driven-development`](/docs/user-guide/skills/bundled/software-development/software-development-test-driven-development) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/bundled/software-development/software-development-systematic-debugging.md b/website/docs/user-guide/skills/bundled/software-development/software-development-systematic-debugging.md
index 8872bc0c366..e86f46c9ae7 100644
--- a/website/docs/user-guide/skills/bundled/software-development/software-development-systematic-debugging.md
+++ b/website/docs/user-guide/skills/bundled/software-development/software-development-systematic-debugging.md
@@ -21,7 +21,7 @@ description: "4-phase root cause debugging: understand bugs before fixing"
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `debugging`, `troubleshooting`, `problem-solving`, `root-cause`, `investigation` |
-| Related skills | [`test-driven-development`](/user-guide/skills/bundled/software-development/software-development-test-driven-development), [`writing-plans`](/user-guide/skills/bundled/software-development/software-development-writing-plans), [`subagent-driven-development`](/user-guide/skills/bundled/software-development/software-development-subagent-driven-development) |
+| Related skills | [`test-driven-development`](/docs/user-guide/skills/bundled/software-development/software-development-test-driven-development), [`writing-plans`](/docs/user-guide/skills/bundled/software-development/software-development-writing-plans), [`subagent-driven-development`](/docs/user-guide/skills/bundled/software-development/software-development-subagent-driven-development) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/bundled/software-development/software-development-test-driven-development.md b/website/docs/user-guide/skills/bundled/software-development/software-development-test-driven-development.md
index 3dffe264271..5b424f3adc7 100644
--- a/website/docs/user-guide/skills/bundled/software-development/software-development-test-driven-development.md
+++ b/website/docs/user-guide/skills/bundled/software-development/software-development-test-driven-development.md
@@ -21,7 +21,7 @@ TDD: enforce RED-GREEN-REFACTOR, tests before code.
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `testing`, `tdd`, `development`, `quality`, `red-green-refactor` |
-| Related skills | [`systematic-debugging`](/user-guide/skills/bundled/software-development/software-development-systematic-debugging), [`writing-plans`](/user-guide/skills/bundled/software-development/software-development-writing-plans), [`subagent-driven-development`](/user-guide/skills/bundled/software-development/software-development-subagent-driven-development) |
+| Related skills | [`systematic-debugging`](/docs/user-guide/skills/bundled/software-development/software-development-systematic-debugging), [`writing-plans`](/docs/user-guide/skills/bundled/software-development/software-development-writing-plans), [`subagent-driven-development`](/docs/user-guide/skills/bundled/software-development/software-development-subagent-driven-development) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/bundled/software-development/software-development-writing-plans.md b/website/docs/user-guide/skills/bundled/software-development/software-development-writing-plans.md
index a9a653b9346..6dc0a52988f 100644
--- a/website/docs/user-guide/skills/bundled/software-development/software-development-writing-plans.md
+++ b/website/docs/user-guide/skills/bundled/software-development/software-development-writing-plans.md
@@ -21,7 +21,7 @@ Write implementation plans: bite-sized tasks, paths, code.
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `planning`, `design`, `implementation`, `workflow`, `documentation` |
-| Related skills | [`subagent-driven-development`](/user-guide/skills/bundled/software-development/software-development-subagent-driven-development), [`test-driven-development`](/user-guide/skills/bundled/software-development/software-development-test-driven-development), [`requesting-code-review`](/user-guide/skills/bundled/software-development/software-development-requesting-code-review) |
+| Related skills | [`subagent-driven-development`](/docs/user-guide/skills/bundled/software-development/software-development-subagent-driven-development), [`test-driven-development`](/docs/user-guide/skills/bundled/software-development/software-development-test-driven-development), [`requesting-code-review`](/docs/user-guide/skills/bundled/software-development/software-development-requesting-code-review) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/google-workspace.md b/website/docs/user-guide/skills/google-workspace.md
index 920e6e26058..7248612c6bf 100644
--- a/website/docs/user-guide/skills/google-workspace.md
+++ b/website/docs/user-guide/skills/google-workspace.md
@@ -7,7 +7,7 @@ description: "Send email, manage calendar events, search Drive, read/write Sheet
 
 # Google Workspace Skill
 
-Gmail, Calendar, Drive, Contacts, Sheets, and Docs integration for Hermes. Uses OAuth2 with automatic token refresh. Prefers the [Google Workspace CLI (`gws`)](https://github.com/nicholasgasior/gws) when available for broader coverage, and falls back to Google's Python client libraries otherwise.
+Gmail, Calendar, Drive, Contacts, Sheets, and Docs integration for Hermes. Uses OAuth2 with automatic token refresh. Prefers the [Google Workspace CLI (`gws`)](https://github.com/googleworkspace/cli) when available for broader coverage, and falls back to Google's Python client libraries otherwise.
 
 **Skill path:** `skills/productivity/google-workspace/`
 
diff --git a/website/docs/user-guide/skills/optional/autonomous-ai-agents/autonomous-ai-agents-antigravity-cli.md b/website/docs/user-guide/skills/optional/autonomous-ai-agents/autonomous-ai-agents-antigravity-cli.md
new file mode 100644
index 00000000000..f40f837a92a
--- /dev/null
+++ b/website/docs/user-guide/skills/optional/autonomous-ai-agents/autonomous-ai-agents-antigravity-cli.md
@@ -0,0 +1,195 @@
+---
+title: "Antigravity Cli — Operate the Antigravity CLI (agy): plugins, auth, sandbox"
+sidebar_label: "Antigravity Cli"
+description: "Operate the Antigravity CLI (agy): plugins, auth, sandbox"
+---
+
+{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */}
+
+# Antigravity Cli
+
+Operate the Antigravity CLI (agy): plugins, auth, sandbox.
+
+## Skill metadata
+
+| | |
+|---|---|
+| Source | Optional — install with `hermes skills install official/autonomous-ai-agents/antigravity-cli` |
+| Path | `optional-skills/autonomous-ai-agents/antigravity-cli` |
+| Version | `0.1.0` |
+| Author | Tony Simons (asimons81), Hermes Agent |
+| License | MIT |
+| Platforms | linux, macos, windows |
+| Tags | `Coding-Agent`, `Antigravity`, `CLI`, `Auth`, `Plugins`, `Sandbox` |
+| Related skills | [`grok`](/docs/user-guide/skills/optional/autonomous-ai-agents/autonomous-ai-agents-grok), [`codex`](/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-codex), [`claude-code`](/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-claude-code), [`hermes-agent`](/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-hermes-agent) |
+
+## Reference: full SKILL.md
+
+:::info
+The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active.
+:::
+
+# Antigravity CLI (`agy`)
+
+Operator guide for the Antigravity CLI, invoked as `agy`. Run all `agy`
+commands through the Hermes `terminal` tool; inspect its config and logs with
+`read_file`. This skill is reference + procedure — it does not wrap a network
+API, so there is nothing to authenticate from Hermes itself.
+
+## When to Use
+
+- Installing, updating, or smoke-testing the `agy` binary
+- Driving non-interactive `agy --print` / `agy -p` one-shots
+- Debugging Antigravity auth, sandbox, permissions, or plugin state
+- Reading Antigravity settings, keybindings, conversations, or logs
+
+## Mental model
+
+Antigravity has two layers — keep them distinct or the guidance will be wrong:
+
+1. **Shell wrapper commands** — `agy help`, `agy install`, `agy plugin`,
+   `agy update`, `agy changelog`. Run these through the `terminal` tool.
+2. **Interactive in-session slash commands** — `/config`, `/permissions`,
+   `/skills`, `/agents`, etc. These only exist inside a running `agy` TUI
+   session, not on the shell wrapper.
+
+`agy help` shows the shell wrapper surface, NOT the in-session slash commands.
+
+## Prerequisites
+
+- The `agy` binary on PATH. Verify through the `terminal` tool:
+  `command -v agy && agy --version`.
+- No env vars or API keys required by this skill — Antigravity manages its own
+  auth via the OS keyring / browser sign-in (see Authentication below).
+
+## How to Run
+
+Invoke every `agy` command through the `terminal` tool. Examples:
+
+```
+terminal(command="agy --version")
+terminal(command="agy help")
+terminal(command="agy plugin list")
+terminal(command="agy --print 'Summarize the repo in 3 bullets'", workdir="/path/to/project")
+```
+
+For an interactive multi-turn TUI session, launch `agy` with `pty=true` (and
+tmux for capture/monitoring), the same pattern the `codex` / `claude-code`
+skills use. For one-shot smoke tests and scripted prompts, prefer
+`agy --print` (non-interactive).
+
+To inspect Antigravity's own files, use `read_file` on the paths under Core
+paths below — do not `cat` them through the terminal.
+
+## Core paths
+
+- Binary / entrypoint: `agy`
+- App data dir: `~/.gemini/antigravity-cli/`
+- Settings file: `~/.gemini/antigravity-cli/settings.json`
+- Keybindings file: `~/.gemini/antigravity-cli/keybindings.json`
+- Logs: `~/.gemini/antigravity-cli/log/cli-*.log`
+- Conversations: `~/.gemini/antigravity-cli/conversations/`
+- Brain artifacts: `~/.gemini/antigravity-cli/brain/`
+- History: `~/.gemini/antigravity-cli/history.jsonl`
+- Plugin staging: `~/.gemini/antigravity-cli/plugins/<plugin_name>/`
+
+## Quick Reference
+
+### Wrapper commands
+- `agy changelog`
+- `agy help`
+- `agy install`
+- `agy plugin` / `agy plugins`
+- `agy update`
+
+### Useful flags
+- `--add-dir`
+- `--continue` / `-c`
+- `--conversation`
+- `--dangerously-skip-permissions`
+- `--print` / `-p`
+- `--print-timeout`
+- `--prompt`
+- `--prompt-interactive` / `-i`
+- `--sandbox`
+- `--log-file`
+- `--version`
+
+### Plugin subcommands (`agy plugin --help`)
+- `list`, `import [source]`, `install <target>`, `uninstall <name>`,
+  `enable <name>`, `disable <name>`, `validate [path]`, `link <mp> <target>`,
+  `help`
+
+### Install flags (`agy install --help`)
+- `--dir`, `--skip-aliases`, `--skip-path`
+
+### In-session slash commands
+- **Conversation control:** `/resume` (`/switch`), `/rewind` (`/undo`),
+  `/rename <name>`, `/clear`, `/fork`, `/reset`, `/new`
+- **Settings & tools:** `/config`, `/settings`, `/permissions`, `/model`,
+  `/keybindings`, `/statusline`, `/tasks`, `/skills`, `/mcp`, `/open <path>`,
+  `/usage`, `/logout`, `/agents`
+- **Prompt helpers:** `@` path autocomplete, `esc esc` clears the prompt (when
+  not streaming), `!` runs a terminal command directly, `?` opens help
+
+## Settings and permissions
+
+### Common settings keys (`settings.json`)
+- `allowNonWorkspaceAccess`
+- `colorScheme`
+- `permissions.allow`
+- `trustedWorkspaces`
+
+### Permission modes
+`request-review`, `always-proceed`, `strict`, `proceed-in-sandbox`.
+
+### Sandbox behavior
+- `enableTerminalSandbox` is a boolean in `settings.json`; default `false`.
+- Launch-time overrides (`--sandbox`, `--dangerously-skip-permissions`) can
+  supersede persistent settings for the current session.
+
+## Authentication behavior
+
+- The CLI tries the OS secure keyring first.
+- With no saved session, it falls back to browser-based Google sign-in.
+- Locally it opens the default browser; over SSH it prints an authorization URL
+  and expects the auth code pasted back.
+- `/logout` removes saved credentials.
+
+## Plugins
+
+- Plugins stage under `~/.gemini/antigravity-cli/plugins/<plugin_name>/`.
+- They can bundle skills, agents, rules, MCP servers, and hooks.
+- `agy plugin list` returning no imported plugins is a valid empty state.
+
+## Pitfalls
+
+- `agy help` shows wrapper commands, not interactive slash commands.
+- `agy --version` is the safe non-interactive version check; `agy version` is
+  interactive and can fail without a real TTY.
+- First place to look for failures: `~/.gemini/antigravity-cli/log/cli-*.log`
+  (read with `read_file`).
+- Don't confuse persistent JSON settings with launch-time overrides.
+- `~/.gemini/antigravity-cli/bin/agentapi` is a thin wrapper to `agy agentapi`.
+- On WSL, token storage is file-based, so auth issues are usually local-file /
+  session-state problems, not browser-only problems.
+- Workspace identity can depend on launch directory and the `.antigravitycli`
+  project marker.
+
+## Verification
+
+Confirm the install is real and usable, all through the `terminal` tool (read
+files with `read_file`):
+
+1. `terminal(command="command -v agy")`
+2. `terminal(command="agy --version")`
+3. `terminal(command="agy help")`
+4. `terminal(command="agy plugin list")`
+5. `read_file` on `~/.gemini/antigravity-cli/settings.json`
+6. `read_file` on the latest `~/.gemini/antigravity-cli/log/cli-*.log`
+7. If needed, `read_file` on `~/.gemini/antigravity-cli/keybindings.json`
+
+## Support files
+
+- `references/cli-docs.md` — condensed notes from the getting-started, usage,
+  and features docs.
diff --git a/website/docs/user-guide/skills/optional/autonomous-ai-agents/autonomous-ai-agents-blackbox.md b/website/docs/user-guide/skills/optional/autonomous-ai-agents/autonomous-ai-agents-blackbox.md
index fc2f686c249..737ae091a83 100644
--- a/website/docs/user-guide/skills/optional/autonomous-ai-agents/autonomous-ai-agents-blackbox.md
+++ b/website/docs/user-guide/skills/optional/autonomous-ai-agents/autonomous-ai-agents-blackbox.md
@@ -21,7 +21,7 @@ Delegate coding tasks to Blackbox AI CLI agent. Multi-model agent with built-in
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `Coding-Agent`, `Blackbox`, `Multi-Agent`, `Judge`, `Multi-Model` |
-| Related skills | [`claude-code`](/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-claude-code), [`codex`](/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-codex), [`hermes-agent`](/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-hermes-agent) |
+| Related skills | [`claude-code`](/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-claude-code), [`codex`](/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-codex), [`hermes-agent`](/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-hermes-agent) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/optional/autonomous-ai-agents/autonomous-ai-agents-grok.md b/website/docs/user-guide/skills/optional/autonomous-ai-agents/autonomous-ai-agents-grok.md
new file mode 100644
index 00000000000..7e60560726e
--- /dev/null
+++ b/website/docs/user-guide/skills/optional/autonomous-ai-agents/autonomous-ai-agents-grok.md
@@ -0,0 +1,319 @@
+---
+title: "Grok — Delegate coding to xAI Grok Build CLI (features, PRs)"
+sidebar_label: "Grok"
+description: "Delegate coding to xAI Grok Build CLI (features, PRs)"
+---
+
+{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */}
+
+# Grok
+
+Delegate coding to xAI Grok Build CLI (features, PRs).
+
+## Skill metadata
+
+| | |
+|---|---|
+| Source | Optional — install with `hermes skills install official/autonomous-ai-agents/grok` |
+| Path | `optional-skills/autonomous-ai-agents/grok` |
+| Version | `0.1.0` |
+| Author | Matt Maximo (MattMaximo), Hermes Agent |
+| License | MIT |
+| Platforms | linux, macos, windows |
+| Tags | `Coding-Agent`, `Grok`, `xAI`, `Code-Review`, `Refactoring`, `Automation` |
+| Related skills | [`codex`](/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-codex), [`claude-code`](/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-claude-code), [`hermes-agent`](/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-hermes-agent) |
+
+## Reference: full SKILL.md
+
+:::info
+The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active.
+:::
+
+# Grok Build CLI — Hermes Orchestration Guide
+
+Delegate coding tasks to [Grok Build](https://docs.x.ai/build/overview) (xAI's
+autonomous coding agent CLI, the `grok` command) via the Hermes terminal. Grok
+can read files, write code, run shell commands, spawn subagents, and manage git
+workflows. It runs three ways: an interactive TUI, **headless** (`-p`), and as
+an **ACP agent** over JSON-RPC.
+
+This is the third sibling to `codex` and `claude-code`. The orchestration
+pattern is nearly identical — **prefer headless `-p` for one-shots**, use a PTY
+for interactive sessions.
+
+## When to use
+
+- Building features
+- Refactoring
+- PR reviews
+- Batch issue fixing
+- Any task where you'd otherwise reach for Codex / Claude Code but want Grok
+
+## Prerequisites
+
+- **Install (preferred):** `npm install -g @xai-official/grok`
+  - The official installer `curl -fsSL https://x.ai/cli/install.sh | bash` also
+    works, but the `x.ai` host is Cloudflare-walled in some environments. The
+    npm path avoids that dependency entirely.
+- **Auth — SuperGrok / X Premium+ subscription (primary path):**
+  - Run `grok login` once → opens a browser for OAuth → token cached in
+    `~/.grok/auth.json`. This uses your **SuperGrok or X Premium+** subscription
+    (no per-token API billing).
+  - Check sign-in state by looking for `~/.grok/auth.json`, or run a cheap
+    headless smoke test: `grok --no-auto-update -p "Say ok."`
+  - In the TUI, `/logout` signs out and `/login` (or relaunching) signs back in.
+- **No git repo required** — unlike Codex, Grok runs fine outside a git
+  directory (good for scratch/throwaway tasks).
+- **Claude Code / AGENTS.md compatible with zero config** — Grok auto-reads
+  `CLAUDE.md`, `.claude/` (skills, agents, MCPs, hooks, rules), and the
+  `AGENTS.md` family. Existing project context just works.
+
+> **API-key fallback (not the default for this user):** Grok also supports
+> setting the `XAI_API_KEY` environment variable for pay-as-you-go billing
+> via `api.x.ai`. Only use
+> this if `grok login` / SuperGrok auth is unavailable. The subscription path
+> (`grok login`) is the intended setup here.
+
+## Two Orchestration Modes
+
+### Mode 1: Headless (`-p`) — Non-Interactive (PREFERRED)
+
+Runs a one-shot task, prints the result, and exits. No PTY, no interactive
+dialogs to navigate. This is the cleanest integration path — the analog of
+`claude -p` and `codex exec`.
+
+```
+terminal(command="grok --no-auto-update -p 'Add a dark mode toggle to settings'", workdir="/path/to/project", timeout=180)
+```
+
+Always pass `--no-auto-update` in automation to skip background update checks.
+
+**When to use headless:**
+- One-shot coding tasks (fix a bug, add a feature, refactor)
+- CI/CD automation and scripting
+- Structured output parsing with `--output-format json`
+- Any task that doesn't need multi-turn conversation
+
+### Mode 2: Interactive PTY — Multi-Turn TUI Sessions
+
+The TUI is a fullscreen, mouse-interactive app. Drive it with `pty=true`. For
+robust monitoring/input use tmux (same pattern as the `claude-code` skill).
+
+```
+# Launch in a tmux session for capture-pane monitoring
+terminal(command="tmux new-session -d -s grok-work -x 140 -y 40")
+terminal(command="tmux send-keys -t grok-work 'cd /path/to/project && grok' Enter")
+
+# Wait for startup, then send a task
+terminal(command="sleep 5 && tmux send-keys -t grok-work 'Refactor the auth module to use JWT' Enter")
+
+# Monitor progress
+terminal(command="sleep 15 && tmux capture-pane -t grok-work -p -S -50")
+
+# Exit when done
+terminal(command="tmux send-keys -t grok-work '/quit' Enter && sleep 1 && tmux kill-session -t grok-work")
+```
+
+**Tip for headless-but-inline output:** if you want TUI-style output without the
+fullscreen alt-screen takeover (e.g. for cleaner logs), add `--no-alt-screen`.
+For pure automation, headless `-p` is still cleaner than the TUI.
+
+## Headless Deep Dive
+
+### Common Flags
+
+| Flag | Effect |
+|------|--------|
+| `-p, --single <PROMPT>` | Send one prompt, run headless, exit |
+| `-m, --model <MODEL>` | Choose a model |
+| `-s, --session-id <ID>` | Create or resume a named headless session |
+| `-r, --resume <ID>` | Resume an existing session |
+| `-c, --continue` | Continue the most recent session in the current directory |
+| `--cwd <PATH>` | Set the working directory |
+| `--output-format <FMT>` | `plain` (default), `json`, or `streaming-json` |
+| `--always-approve` | Auto-approve all tool executions (the `--full-auto` / `--yolo` equivalent) |
+| `--no-alt-screen` | Run inline, no fullscreen TUI takeover |
+| `--no-auto-update` | Skip background update checks (use in all automation) |
+
+### Output Formats
+
+- `plain` — human-readable text (default)
+- `json` — one JSON object at the end of the run (parse the result cleanly)
+- `streaming-json` — newline-delimited JSON events as they arrive
+
+```
+# Structured result for parsing
+terminal(command="grok --no-auto-update -p 'List all TODO comments in src/' --output-format json", workdir="/project", timeout=120)
+
+# Auto-approve for autonomous building
+terminal(command="grok --no-auto-update --always-approve -p 'Refactor the database layer and run the tests'", workdir="/project", timeout=300)
+```
+
+### Background Mode (Long Tasks)
+
+```
+# Start headless in background
+terminal(command="grok --no-auto-update --always-approve -p 'Refactor the auth module'", workdir="/project", background=true, notify_on_complete=true)
+# Returns session_id
+
+# Monitor
+process(action="poll", session_id="<id>")
+process(action="log", session_id="<id>")
+
+# Kill if needed
+process(action="kill", session_id="<id>")
+```
+
+For an interactive (TUI) background session, use `pty=true` + tmux and monitor
+with `tmux capture-pane`, exactly like the `claude-code` / `codex` skills.
+
+### Session Continuation
+
+```
+# Start a named session
+terminal(command="grok --no-auto-update -s refactor-db -p 'Start refactoring the database layer' --always-approve", workdir="/project", timeout=240)
+
+# Resume it later
+terminal(command="grok --no-auto-update -r refactor-db -p 'Now add connection pooling' --always-approve", workdir="/project", timeout=180)
+
+# Or continue the most recent session in this directory
+terminal(command="grok --no-auto-update -c -p 'What did you change last time?'", workdir="/project", timeout=60)
+```
+
+## Read-Only Audit → Markdown Note Pattern
+
+To have Grok review local artifacts and return a clean markdown note (for
+Obsidian or a repo) without mutating anything:
+
+1. Prepare stable input files first with Hermes tools (`read_file`,
+   `write_file`). Snapshot only the relevant context into a temp file rather
+   than dumping raw paths.
+2. Run Grok headless **without** `--always-approve` so it cannot auto-write, and
+   demand `markdown only, no preamble`.
+3. Save Grok's stdout straight into the destination note with `write_file()`.
+
+```
+grok --no-auto-update -p "Read /tmp/current.md and /tmp/inventory.md. Produce markdown only, no preamble. Output a clean note titled 'Cleanup Review'." --output-format plain
+```
+
+**Pitfall (same as Claude Code):** for document rewrites, a loose "rewrite this"
+prompt may return a change summary instead of the full file. Instead: pipe the
+file in, and demand `Return ONLY the full revised markdown document. No intro,
+no explanation, no code fences. Start immediately with '# Title'.` Verify the
+first lines with `read_file()` before overwriting the destination.
+
+## PR Review Patterns
+
+### Quick Review (Headless)
+
+```
+terminal(command="cd /path/to/repo && git diff main...feature-branch | grok --no-auto-update -p 'Review this diff for bugs, security issues, and style problems. Be thorough.'", timeout=120)
+```
+
+### Clone-to-temp Review (safe, no repo mutation)
+
+```
+terminal(command="REVIEW=$(mktemp -d) && git clone https://github.com/user/repo.git $REVIEW && cd $REVIEW && gh pr checkout 42 && grok --no-auto-update -p 'Review the changes vs origin/main. Check bugs, security, race conditions, missing tests.'", pty=true, timeout=300)
+```
+
+### Post the review
+
+```
+terminal(command="gh pr comment 42 --body '<review text>'", workdir="/path/to/repo")
+```
+
+## Parallel Issue Fixing with Worktrees
+
+```
+# Create worktrees
+terminal(command="git worktree add -b fix/issue-78 /tmp/issue-78 main", workdir="~/project")
+terminal(command="git worktree add -b fix/issue-99 /tmp/issue-99 main", workdir="~/project")
+
+# Launch Grok headless in each (background)
+terminal(command="grok --no-auto-update --always-approve -p 'Fix issue #78: <description>. Commit when done.'", workdir="/tmp/issue-78", background=true, notify_on_complete=true)
+terminal(command="grok --no-auto-update --always-approve -p 'Fix issue #99: <description>. Commit when done.'", workdir="/tmp/issue-99", background=true, notify_on_complete=true)
+
+# Monitor
+process(action="list")
+
+# After completion: push and open PRs
+terminal(command="cd /tmp/issue-78 && git push -u origin fix/issue-78")
+terminal(command="gh pr create --repo user/repo --head fix/issue-78 --title 'fix: ...' --body '...'")
+
+# Cleanup
+terminal(command="git worktree remove /tmp/issue-78", workdir="~/project")
+```
+
+## Useful Subcommands & TUI Commands
+
+| Command | Purpose |
+|---------|---------|
+| `grok` | Start the interactive TUI |
+| `grok -p "query"` | Headless one-shot |
+| `grok login` / `grok logout` | Sign in / out (SuperGrok / X Premium+ OAuth) |
+| `grok inspect` | Show what Grok discovered in cwd: config sources, instructions, skills, plugins, hooks, MCP servers |
+| `grok agent stdio` | Run as an ACP agent over JSON-RPC (for IDE/tool integration) |
+| `grok update` | Update the CLI (needs the `x.ai` host; skip in automation) |
+
+TUI slash commands (interactive only): `/model <name>`, `/always-approve`,
+`/plan`, `/context`, `/compact`, `/resume`, `/sessions`, `/fork`, `/usage`,
+`/quit`. `Shift+Tab` cycles session modes (including Plan mode, which blocks
+write tools except the session plan file).
+
+## Config (`~/.grok/config.toml`)
+
+```toml
+[cli]
+auto_update = false          # skip background update checks persistently
+
+[ui]
+permission_mode = "ask"      # or "always-approve" to skip tool prompts by default
+
+[models]
+default = "grok-build-0.1"
+```
+
+Put global preferences in `~/.grok/config.toml` (not project-scoped
+`.grok/config.toml`). `permission_mode` supersedes the legacy `approval_mode` /
+`yolo = true` keys.
+
+## Pitfalls & Gotchas
+
+1. **Auth is subscription-gated.** `grok login` requires a SuperGrok or X
+   Premium+ subscription. If login fails or there's no `~/.grok/auth.json`,
+   confirm the subscription is active before falling back to `XAI_API_KEY`.
+2. **Don't conflate Hermes' xAI auth with the `grok` CLI's auth.** Hermes'
+   `x_search` runs on its own xAI OAuth; the standalone `grok` CLI has a
+   separate token in `~/.grok/auth.json`. A working `x_search` does NOT mean
+   `grok` is logged in.
+3. **Always pass `--no-auto-update` in automation** — otherwise Grok phones home
+   for update checks (and `x.ai`/`storage.googleapis.com` may be unreachable).
+4. **Prefer npm install over the curl installer** — `npm install -g
+   @xai-official/grok` avoids the Cloudflare-walled `x.ai` host.
+5. **`--always-approve` is the autonomous-build switch.** Without it, headless
+   runs may stall waiting on tool-approval prompts. Omit it deliberately for
+   read-only review/audit work so Grok can't mutate files.
+6. **Headless `-p` skips TUI dialogs**; the TUI needs `pty=true` (+ tmux for
+   monitoring), just like Claude Code.
+7. **Use `--no-alt-screen`** if you run the TUI inline and the fullscreen
+   alt-screen takeover garbles captured output.
+8. **No git repo needed**, but for PR/commit workflows you still want one — use
+   `mktemp -d && git init` for scratch commit tasks.
+9. **Clean up tmux sessions** with `tmux kill-session -t <name>` when done.
+
+## Rules for Hermes Agents
+
+1. **Prefer headless `-p`** for single tasks — cleanest integration, structured
+   output via `--output-format json`.
+2. **Always set `workdir`** (or `--cwd`) so Grok targets the right project.
+3. **Pass `--no-auto-update`** in every automated invocation.
+4. **Use `--always-approve` only when Grok should write autonomously**; omit it
+   for read-only reviews and audits.
+5. **Background long tasks** with `background=true, notify_on_complete=true` and
+   monitor via the `process` tool.
+6. **Use tmux for multi-turn interactive work** and monitor with
+   `tmux capture-pane -t <session> -p -S -50`.
+7. **Verify auth before relying on it** — check `~/.grok/auth.json` or run a
+   cheap `grok -p "Say ok."` smoke test; don't assume Hermes' xAI auth carries
+   over.
+8. **Report results to the user** — summarize what Grok changed and what's left.
diff --git a/website/docs/user-guide/skills/optional/autonomous-ai-agents/autonomous-ai-agents-honcho.md b/website/docs/user-guide/skills/optional/autonomous-ai-agents/autonomous-ai-agents-honcho.md
index e0451f7d4df..1b989116636 100644
--- a/website/docs/user-guide/skills/optional/autonomous-ai-agents/autonomous-ai-agents-honcho.md
+++ b/website/docs/user-guide/skills/optional/autonomous-ai-agents/autonomous-ai-agents-honcho.md
@@ -21,7 +21,7 @@ Configure and use Honcho memory with Hermes -- cross-session user modeling, mult
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `Honcho`, `Memory`, `Profiles`, `Observation`, `Dialectic`, `User-Modeling`, `Session-Summary` |
-| Related skills | [`hermes-agent`](/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-hermes-agent) |
+| Related skills | [`hermes-agent`](/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-hermes-agent) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/optional/autonomous-ai-agents/autonomous-ai-agents-openhands.md b/website/docs/user-guide/skills/optional/autonomous-ai-agents/autonomous-ai-agents-openhands.md
index 0e74f2573aa..9774fe25b02 100644
--- a/website/docs/user-guide/skills/optional/autonomous-ai-agents/autonomous-ai-agents-openhands.md
+++ b/website/docs/user-guide/skills/optional/autonomous-ai-agents/autonomous-ai-agents-openhands.md
@@ -21,7 +21,7 @@ Delegate coding to OpenHands CLI (model-agnostic, LiteLLM).
 | License | MIT |
 | Platforms | linux, macos |
 | Tags | `Coding-Agent`, `OpenHands`, `Model-Agnostic`, `LiteLLM` |
-| Related skills | [`claude-code`](/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-claude-code), [`codex`](/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-codex), [`opencode`](/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-opencode), [`hermes-agent`](/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-hermes-agent) |
+| Related skills | [`claude-code`](/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-claude-code), [`codex`](/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-codex), [`opencode`](/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-opencode), [`hermes-agent`](/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-hermes-agent) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/optional/blockchain/blockchain-evm.md b/website/docs/user-guide/skills/optional/blockchain/blockchain-evm.md
index 92aa14ffa54..01006870ee4 100644
--- a/website/docs/user-guide/skills/optional/blockchain/blockchain-evm.md
+++ b/website/docs/user-guide/skills/optional/blockchain/blockchain-evm.md
@@ -21,7 +21,7 @@ Read-only EVM client: wallets, tokens, gas across 8 chains.
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `EVM`, `Ethereum`, `BNB`, `BSC`, `Base`, `Arbitrum`, `Polygon`, `Optimism`, `Avalanche`, `zkSync`, `Blockchain`, `Crypto`, `Web3`, `DeFi`, `NFT`, `ENS`, `Whale`, `Security` |
-| Related skills | [`solana`](/user-guide/skills/optional/blockchain/blockchain-solana) |
+| Related skills | [`solana`](/docs/user-guide/skills/optional/blockchain/blockchain-solana) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/optional/creative/creative-concept-diagrams.md b/website/docs/user-guide/skills/optional/creative/creative-concept-diagrams.md
index 7870e466b4c..9b3ba92b3bd 100644
--- a/website/docs/user-guide/skills/optional/creative/creative-concept-diagrams.md
+++ b/website/docs/user-guide/skills/optional/creative/creative-concept-diagrams.md
@@ -21,7 +21,7 @@ Generate flat, minimal light/dark-aware SVG diagrams as standalone HTML files, u
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `diagrams`, `svg`, `visualization`, `education`, `physics`, `chemistry`, `engineering` |
-| Related skills | [`architecture-diagram`](/user-guide/skills/bundled/creative/creative-architecture-diagram), [`excalidraw`](/user-guide/skills/bundled/creative/creative-excalidraw), `generative-widgets` |
+| Related skills | [`architecture-diagram`](/docs/user-guide/skills/bundled/creative/creative-architecture-diagram), [`excalidraw`](/docs/user-guide/skills/bundled/creative/creative-excalidraw), `generative-widgets` |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/optional/creative/creative-hyperframes.md b/website/docs/user-guide/skills/optional/creative/creative-hyperframes.md
index 4d39dede69b..fc27d61d579 100644
--- a/website/docs/user-guide/skills/optional/creative/creative-hyperframes.md
+++ b/website/docs/user-guide/skills/optional/creative/creative-hyperframes.md
@@ -21,7 +21,7 @@ Create HTML-based video compositions, animated title cards, social overlays, cap
 | License | Apache-2.0 |
 | Platforms | linux, macos, windows |
 | Tags | `creative`, `video`, `animation`, `html`, `gsap`, `motion-graphics` |
-| Related skills | [`manim-video`](/user-guide/skills/bundled/creative/creative-manim-video), [`meme-generation`](/user-guide/skills/optional/creative/creative-meme-generation) |
+| Related skills | [`manim-video`](/docs/user-guide/skills/bundled/creative/creative-manim-video), [`meme-generation`](/docs/user-guide/skills/optional/creative/creative-meme-generation) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/optional/creative/creative-kanban-video-orchestrator.md b/website/docs/user-guide/skills/optional/creative/creative-kanban-video-orchestrator.md
index bac154b34da..8fa3cdf127f 100644
--- a/website/docs/user-guide/skills/optional/creative/creative-kanban-video-orchestrator.md
+++ b/website/docs/user-guide/skills/optional/creative/creative-kanban-video-orchestrator.md
@@ -21,7 +21,7 @@ Plan, set up, and monitor a multi-agent video production pipeline backed by Herm
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `video`, `kanban`, `multi-agent`, `orchestration`, `production-pipeline` |
-| Related skills | [`kanban-orchestrator`](/user-guide/skills/bundled/devops/devops-kanban-orchestrator), [`kanban-worker`](/user-guide/skills/bundled/devops/devops-kanban-worker), [`ascii-video`](/user-guide/skills/bundled/creative/creative-ascii-video), [`manim-video`](/user-guide/skills/bundled/creative/creative-manim-video), [`p5js`](/user-guide/skills/bundled/creative/creative-p5js), [`comfyui`](/user-guide/skills/bundled/creative/creative-comfyui), [`touchdesigner-mcp`](/user-guide/skills/bundled/creative/creative-touchdesigner-mcp), [`blender-mcp`](/user-guide/skills/optional/creative/creative-blender-mcp), [`pixel-art`](/user-guide/skills/bundled/creative/creative-pixel-art), [`ascii-art`](/user-guide/skills/bundled/creative/creative-ascii-art), [`songwriting-and-ai-music`](/user-guide/skills/bundled/creative/creative-songwriting-and-ai-music), [`heartmula`](/user-guide/skills/bundled/media/media-heartmula), [`songsee`](/user-guide/skills/bundled/media/media-songsee), [`spotify`](/user-guide/skills/bundled/media/media-spotify), [`youtube-content`](/user-guide/skills/bundled/media/media-youtube-content), [`claude-design`](/user-guide/skills/bundled/creative/creative-claude-design), [`excalidraw`](/user-guide/skills/bundled/creative/creative-excalidraw), [`architecture-diagram`](/user-guide/skills/bundled/creative/creative-architecture-diagram), [`concept-diagrams`](/user-guide/skills/optional/creative/creative-concept-diagrams), [`baoyu-comic`](/user-guide/skills/bundled/creative/creative-baoyu-comic), [`baoyu-infographic`](/user-guide/skills/bundled/creative/creative-baoyu-infographic), [`humanizer`](/user-guide/skills/bundled/creative/creative-humanizer), [`gif-search`](/user-guide/skills/bundled/media/media-gif-search), [`meme-generation`](/user-guide/skills/optional/creative/creative-meme-generation) |
+| Related skills | [`kanban-orchestrator`](/docs/user-guide/skills/bundled/devops/devops-kanban-orchestrator), [`kanban-worker`](/docs/user-guide/skills/bundled/devops/devops-kanban-worker), [`ascii-video`](/docs/user-guide/skills/bundled/creative/creative-ascii-video), [`manim-video`](/docs/user-guide/skills/bundled/creative/creative-manim-video), [`p5js`](/docs/user-guide/skills/bundled/creative/creative-p5js), [`comfyui`](/docs/user-guide/skills/bundled/creative/creative-comfyui), [`touchdesigner-mcp`](/docs/user-guide/skills/bundled/creative/creative-touchdesigner-mcp), [`blender-mcp`](/docs/user-guide/skills/optional/creative/creative-blender-mcp), [`pixel-art`](/docs/user-guide/skills/bundled/creative/creative-pixel-art), [`ascii-art`](/docs/user-guide/skills/bundled/creative/creative-ascii-art), [`songwriting-and-ai-music`](/docs/user-guide/skills/bundled/creative/creative-songwriting-and-ai-music), [`heartmula`](/docs/user-guide/skills/bundled/media/media-heartmula), [`songsee`](/docs/user-guide/skills/bundled/media/media-songsee), [`spotify`](/docs/user-guide/skills/bundled/media/media-spotify), [`youtube-content`](/docs/user-guide/skills/bundled/media/media-youtube-content), [`claude-design`](/docs/user-guide/skills/bundled/creative/creative-claude-design), [`excalidraw`](/docs/user-guide/skills/bundled/creative/creative-excalidraw), [`architecture-diagram`](/docs/user-guide/skills/bundled/creative/creative-architecture-diagram), [`concept-diagrams`](/docs/user-guide/skills/optional/creative/creative-concept-diagrams), [`baoyu-comic`](/docs/user-guide/skills/bundled/creative/creative-baoyu-comic), [`baoyu-infographic`](/docs/user-guide/skills/bundled/creative/creative-baoyu-infographic), [`humanizer`](/docs/user-guide/skills/bundled/creative/creative-humanizer), [`gif-search`](/docs/user-guide/skills/bundled/media/media-gif-search), [`meme-generation`](/docs/user-guide/skills/optional/creative/creative-meme-generation) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/optional/creative/creative-meme-generation.md b/website/docs/user-guide/skills/optional/creative/creative-meme-generation.md
index b7342d47967..836780c678d 100644
--- a/website/docs/user-guide/skills/optional/creative/creative-meme-generation.md
+++ b/website/docs/user-guide/skills/optional/creative/creative-meme-generation.md
@@ -21,7 +21,7 @@ Generate real meme images by picking a template and overlaying text with Pillow.
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `creative`, `memes`, `humor`, `images` |
-| Related skills | [`ascii-art`](/user-guide/skills/bundled/creative/creative-ascii-art), `generative-widgets` |
+| Related skills | [`ascii-art`](/docs/user-guide/skills/bundled/creative/creative-ascii-art), `generative-widgets` |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/optional/devops/devops-pinggy-tunnel.md b/website/docs/user-guide/skills/optional/devops/devops-pinggy-tunnel.md
index 43931442d0a..19f431f1967 100644
--- a/website/docs/user-guide/skills/optional/devops/devops-pinggy-tunnel.md
+++ b/website/docs/user-guide/skills/optional/devops/devops-pinggy-tunnel.md
@@ -21,7 +21,7 @@ Zero-install localhost tunnels over SSH via Pinggy.
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `Pinggy`, `Tunnel`, `Networking`, `SSH`, `Webhook`, `Localhost` |
-| Related skills | `cloudflared-quick-tunnel`, [`webhook-subscriptions`](/user-guide/skills/bundled/devops/devops-webhook-subscriptions) |
+| Related skills | `cloudflared-quick-tunnel`, [`webhook-subscriptions`](/docs/user-guide/skills/bundled/devops/devops-webhook-subscriptions) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/optional/dogfood/dogfood-adversarial-ux-test.md b/website/docs/user-guide/skills/optional/dogfood/dogfood-adversarial-ux-test.md
index 5214608fed5..159f3631d1b 100644
--- a/website/docs/user-guide/skills/optional/dogfood/dogfood-adversarial-ux-test.md
+++ b/website/docs/user-guide/skills/optional/dogfood/dogfood-adversarial-ux-test.md
@@ -21,7 +21,7 @@ Roleplay the most difficult, tech-resistant user for your product. Browse the ap
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `qa`, `ux`, `testing`, `adversarial`, `dogfood`, `personas`, `user-testing` |
-| Related skills | [`dogfood`](/user-guide/skills/bundled/dogfood/dogfood-dogfood) |
+| Related skills | [`dogfood`](/docs/user-guide/skills/bundled/dogfood/dogfood-dogfood) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/optional/finance/finance-3-statement-model.md b/website/docs/user-guide/skills/optional/finance/finance-3-statement-model.md
index 75dd5161aff..886f4f0f7a1 100644
--- a/website/docs/user-guide/skills/optional/finance/finance-3-statement-model.md
+++ b/website/docs/user-guide/skills/optional/finance/finance-3-statement-model.md
@@ -21,7 +21,7 @@ Build fully-integrated 3-statement models (IS, BS, CF) in Excel with working cap
 | License | Apache-2.0 |
 | Platforms | linux, macos, windows |
 | Tags | `finance`, `three-statement`, `income-statement`, `balance-sheet`, `cash-flow`, `excel`, `openpyxl`, `modeling` |
-| Related skills | [`excel-author`](/user-guide/skills/optional/finance/finance-excel-author), [`pptx-author`](/user-guide/skills/optional/finance/finance-pptx-author), [`dcf-model`](/user-guide/skills/optional/finance/finance-dcf-model), [`lbo-model`](/user-guide/skills/optional/finance/finance-lbo-model) |
+| Related skills | [`excel-author`](/docs/user-guide/skills/optional/finance/finance-excel-author), [`pptx-author`](/docs/user-guide/skills/optional/finance/finance-pptx-author), [`dcf-model`](/docs/user-guide/skills/optional/finance/finance-dcf-model), [`lbo-model`](/docs/user-guide/skills/optional/finance/finance-lbo-model) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/optional/finance/finance-comps-analysis.md b/website/docs/user-guide/skills/optional/finance/finance-comps-analysis.md
index 8e2a81d3bcd..952f030567c 100644
--- a/website/docs/user-guide/skills/optional/finance/finance-comps-analysis.md
+++ b/website/docs/user-guide/skills/optional/finance/finance-comps-analysis.md
@@ -21,7 +21,7 @@ Build comparable company analysis in Excel — operating metrics, valuation mult
 | License | Apache-2.0 |
 | Platforms | linux, macos, windows |
 | Tags | `finance`, `valuation`, `comps`, `excel`, `openpyxl`, `modeling`, `investment-banking` |
-| Related skills | [`excel-author`](/user-guide/skills/optional/finance/finance-excel-author), [`pptx-author`](/user-guide/skills/optional/finance/finance-pptx-author), [`dcf-model`](/user-guide/skills/optional/finance/finance-dcf-model), [`lbo-model`](/user-guide/skills/optional/finance/finance-lbo-model) |
+| Related skills | [`excel-author`](/docs/user-guide/skills/optional/finance/finance-excel-author), [`pptx-author`](/docs/user-guide/skills/optional/finance/finance-pptx-author), [`dcf-model`](/docs/user-guide/skills/optional/finance/finance-dcf-model), [`lbo-model`](/docs/user-guide/skills/optional/finance/finance-lbo-model) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/optional/finance/finance-dcf-model.md b/website/docs/user-guide/skills/optional/finance/finance-dcf-model.md
index 0d2426f3607..36d491657b5 100644
--- a/website/docs/user-guide/skills/optional/finance/finance-dcf-model.md
+++ b/website/docs/user-guide/skills/optional/finance/finance-dcf-model.md
@@ -21,7 +21,7 @@ Build institutional-quality DCF valuation models in Excel — revenue projection
 | License | Apache-2.0 |
 | Platforms | linux, macos, windows |
 | Tags | `finance`, `valuation`, `dcf`, `excel`, `openpyxl`, `modeling`, `investment-banking` |
-| Related skills | [`excel-author`](/user-guide/skills/optional/finance/finance-excel-author), [`pptx-author`](/user-guide/skills/optional/finance/finance-pptx-author), [`comps-analysis`](/user-guide/skills/optional/finance/finance-comps-analysis), [`lbo-model`](/user-guide/skills/optional/finance/finance-lbo-model), [`3-statement-model`](/user-guide/skills/optional/finance/finance-3-statement-model) |
+| Related skills | [`excel-author`](/docs/user-guide/skills/optional/finance/finance-excel-author), [`pptx-author`](/docs/user-guide/skills/optional/finance/finance-pptx-author), [`comps-analysis`](/docs/user-guide/skills/optional/finance/finance-comps-analysis), [`lbo-model`](/docs/user-guide/skills/optional/finance/finance-lbo-model), [`3-statement-model`](/docs/user-guide/skills/optional/finance/finance-3-statement-model) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/optional/finance/finance-excel-author.md b/website/docs/user-guide/skills/optional/finance/finance-excel-author.md
index 9c32c7fdc5e..e5d202fa81f 100644
--- a/website/docs/user-guide/skills/optional/finance/finance-excel-author.md
+++ b/website/docs/user-guide/skills/optional/finance/finance-excel-author.md
@@ -21,7 +21,7 @@ Build auditable Excel workbooks headless with openpyxl — blue/black/green cell
 | License | Apache-2.0 |
 | Platforms | linux, macos, windows |
 | Tags | `excel`, `openpyxl`, `finance`, `spreadsheet`, `modeling` |
-| Related skills | [`pptx-author`](/user-guide/skills/optional/finance/finance-pptx-author), [`dcf-model`](/user-guide/skills/optional/finance/finance-dcf-model), [`comps-analysis`](/user-guide/skills/optional/finance/finance-comps-analysis), [`lbo-model`](/user-guide/skills/optional/finance/finance-lbo-model), [`3-statement-model`](/user-guide/skills/optional/finance/finance-3-statement-model) |
+| Related skills | [`pptx-author`](/docs/user-guide/skills/optional/finance/finance-pptx-author), [`dcf-model`](/docs/user-guide/skills/optional/finance/finance-dcf-model), [`comps-analysis`](/docs/user-guide/skills/optional/finance/finance-comps-analysis), [`lbo-model`](/docs/user-guide/skills/optional/finance/finance-lbo-model), [`3-statement-model`](/docs/user-guide/skills/optional/finance/finance-3-statement-model) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/optional/finance/finance-lbo-model.md b/website/docs/user-guide/skills/optional/finance/finance-lbo-model.md
index b32d71ea54b..82a76c67dbf 100644
--- a/website/docs/user-guide/skills/optional/finance/finance-lbo-model.md
+++ b/website/docs/user-guide/skills/optional/finance/finance-lbo-model.md
@@ -21,7 +21,7 @@ Build leveraged buyout models in Excel — sources & uses, debt schedule, cash s
 | License | Apache-2.0 |
 | Platforms | linux, macos, windows |
 | Tags | `finance`, `valuation`, `lbo`, `private-equity`, `excel`, `openpyxl`, `modeling` |
-| Related skills | [`excel-author`](/user-guide/skills/optional/finance/finance-excel-author), [`pptx-author`](/user-guide/skills/optional/finance/finance-pptx-author), [`dcf-model`](/user-guide/skills/optional/finance/finance-dcf-model), [`3-statement-model`](/user-guide/skills/optional/finance/finance-3-statement-model) |
+| Related skills | [`excel-author`](/docs/user-guide/skills/optional/finance/finance-excel-author), [`pptx-author`](/docs/user-guide/skills/optional/finance/finance-pptx-author), [`dcf-model`](/docs/user-guide/skills/optional/finance/finance-dcf-model), [`3-statement-model`](/docs/user-guide/skills/optional/finance/finance-3-statement-model) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/optional/finance/finance-merger-model.md b/website/docs/user-guide/skills/optional/finance/finance-merger-model.md
index cbb6b6b0bdd..30e8ffcd5be 100644
--- a/website/docs/user-guide/skills/optional/finance/finance-merger-model.md
+++ b/website/docs/user-guide/skills/optional/finance/finance-merger-model.md
@@ -21,7 +21,7 @@ Build accretion/dilution (merger) models in Excel — pro-forma P&L, synergies,
 | License | Apache-2.0 |
 | Platforms | linux, macos, windows |
 | Tags | `finance`, `m-and-a`, `merger`, `accretion-dilution`, `excel`, `openpyxl`, `modeling`, `investment-banking` |
-| Related skills | [`excel-author`](/user-guide/skills/optional/finance/finance-excel-author), [`pptx-author`](/user-guide/skills/optional/finance/finance-pptx-author), [`dcf-model`](/user-guide/skills/optional/finance/finance-dcf-model), [`3-statement-model`](/user-guide/skills/optional/finance/finance-3-statement-model) |
+| Related skills | [`excel-author`](/docs/user-guide/skills/optional/finance/finance-excel-author), [`pptx-author`](/docs/user-guide/skills/optional/finance/finance-pptx-author), [`dcf-model`](/docs/user-guide/skills/optional/finance/finance-dcf-model), [`3-statement-model`](/docs/user-guide/skills/optional/finance/finance-3-statement-model) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/optional/finance/finance-pptx-author.md b/website/docs/user-guide/skills/optional/finance/finance-pptx-author.md
index 55f32457dea..a7f863289d4 100644
--- a/website/docs/user-guide/skills/optional/finance/finance-pptx-author.md
+++ b/website/docs/user-guide/skills/optional/finance/finance-pptx-author.md
@@ -21,7 +21,7 @@ Build PowerPoint decks headless with python-pptx. Pairs with excel-author for mo
 | License | Apache-2.0 |
 | Platforms | linux, macos, windows |
 | Tags | `powerpoint`, `pptx`, `python-pptx`, `presentation`, `finance` |
-| Related skills | [`excel-author`](/user-guide/skills/optional/finance/finance-excel-author), [`powerpoint`](/user-guide/skills/bundled/productivity/productivity-powerpoint) |
+| Related skills | [`excel-author`](/docs/user-guide/skills/optional/finance/finance-excel-author), [`powerpoint`](/docs/user-guide/skills/bundled/productivity/productivity-powerpoint) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/optional/finance/finance-stocks.md b/website/docs/user-guide/skills/optional/finance/finance-stocks.md
index d050ada152c..7c43dea3065 100644
--- a/website/docs/user-guide/skills/optional/finance/finance-stocks.md
+++ b/website/docs/user-guide/skills/optional/finance/finance-stocks.md
@@ -21,7 +21,7 @@ Stock quotes, history, search, compare, crypto via Yahoo.
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `Stocks`, `Finance`, `Market`, `Crypto`, `Investing` |
-| Related skills | [`dcf-model`](/user-guide/skills/optional/finance/finance-dcf-model), [`comps-analysis`](/user-guide/skills/optional/finance/finance-comps-analysis), [`lbo-model`](/user-guide/skills/optional/finance/finance-lbo-model) |
+| Related skills | [`dcf-model`](/docs/user-guide/skills/optional/finance/finance-dcf-model), [`comps-analysis`](/docs/user-guide/skills/optional/finance/finance-comps-analysis), [`lbo-model`](/docs/user-guide/skills/optional/finance/finance-lbo-model) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/optional/mcp/mcp-fastmcp.md b/website/docs/user-guide/skills/optional/mcp/mcp-fastmcp.md
index 1cfa9b063c1..2defe89d4eb 100644
--- a/website/docs/user-guide/skills/optional/mcp/mcp-fastmcp.md
+++ b/website/docs/user-guide/skills/optional/mcp/mcp-fastmcp.md
@@ -21,7 +21,7 @@ Build, test, inspect, install, and deploy MCP servers with FastMCP in Python. Us
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `MCP`, `FastMCP`, `Python`, `Tools`, `Resources`, `Prompts`, `Deployment` |
-| Related skills | [`native-mcp`](/user-guide/skills/bundled/mcp/mcp-native-mcp), [`mcporter`](/user-guide/skills/optional/mcp/mcp-mcporter) |
+| Related skills | [`native-mcp`](/docs/user-guide/skills/bundled/mcp/mcp-native-mcp), [`mcporter`](/docs/user-guide/skills/optional/mcp/mcp-mcporter) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/optional/migration/migration-openclaw-migration.md b/website/docs/user-guide/skills/optional/migration/migration-openclaw-migration.md
index 57928a55d99..5837b10511d 100644
--- a/website/docs/user-guide/skills/optional/migration/migration-openclaw-migration.md
+++ b/website/docs/user-guide/skills/optional/migration/migration-openclaw-migration.md
@@ -21,7 +21,7 @@ Migrate a user's OpenClaw customization footprint into Hermes Agent. Imports Her
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `Migration`, `OpenClaw`, `Hermes`, `Memory`, `Persona`, `Import` |
-| Related skills | [`hermes-agent`](/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-hermes-agent) |
+| Related skills | [`hermes-agent`](/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-hermes-agent) |
 
 ## Reference: full SKILL.md
 
@@ -56,7 +56,7 @@ It uses `scripts/openclaw_to_hermes.py` to:
 - import `SOUL.md` into the Hermes home directory as `SOUL.md`
 - transform OpenClaw `MEMORY.md` and `USER.md` into Hermes memory entries
 - merge OpenClaw command approval patterns into Hermes `command_allowlist`
-- migrate Hermes-compatible messaging settings such as `TELEGRAM_ALLOWED_USERS` and `MESSAGING_CWD`
+- migrate Hermes-compatible messaging settings such as `TELEGRAM_ALLOWED_USERS`, and map OpenClaw workspace settings to Hermes working-directory configuration
 - copy OpenClaw skills into `~/.hermes/skills/openclaw-imports/`
 - optionally copy the OpenClaw workspace instructions file into a chosen Hermes workspace
 - mirror compatible workspace assets such as `workspace/tts/` into `~/.hermes/tts/`
diff --git a/website/docs/user-guide/skills/optional/productivity/productivity-shop-app.md b/website/docs/user-guide/skills/optional/productivity/productivity-shop-app.md
index b5f219e29aa..814b686c639 100644
--- a/website/docs/user-guide/skills/optional/productivity/productivity-shop-app.md
+++ b/website/docs/user-guide/skills/optional/productivity/productivity-shop-app.md
@@ -21,7 +21,7 @@ Shop.app: product search, order tracking, returns, reorder.
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `Shopping`, `E-commerce`, `Shop.app`, `Products`, `Orders`, `Returns` |
-| Related skills | [`shopify`](/user-guide/skills/optional/productivity/productivity-shopify), [`maps`](/user-guide/skills/bundled/productivity/productivity-maps) |
+| Related skills | [`shopify`](/docs/user-guide/skills/optional/productivity/productivity-shopify), [`maps`](/docs/user-guide/skills/bundled/productivity/productivity-maps) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/optional/productivity/productivity-shopify.md b/website/docs/user-guide/skills/optional/productivity/productivity-shopify.md
index 3c36be70d93..61bc95cfa66 100644
--- a/website/docs/user-guide/skills/optional/productivity/productivity-shopify.md
+++ b/website/docs/user-guide/skills/optional/productivity/productivity-shopify.md
@@ -21,7 +21,7 @@ Shopify Admin & Storefront GraphQL APIs via curl. Products, orders, customers, i
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `Shopify`, `E-commerce`, `Commerce`, `API`, `GraphQL` |
-| Related skills | [`airtable`](/user-guide/skills/bundled/productivity/productivity-airtable), [`xurl`](/user-guide/skills/bundled/social-media/social-media-xurl) |
+| Related skills | [`airtable`](/docs/user-guide/skills/bundled/productivity/productivity-airtable), [`xurl`](/docs/user-guide/skills/bundled/social-media/social-media-xurl) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/optional/productivity/productivity-siyuan.md b/website/docs/user-guide/skills/optional/productivity/productivity-siyuan.md
index 2f88f113fa6..58263053fdd 100644
--- a/website/docs/user-guide/skills/optional/productivity/productivity-siyuan.md
+++ b/website/docs/user-guide/skills/optional/productivity/productivity-siyuan.md
@@ -21,7 +21,7 @@ SiYuan Note API for searching, reading, creating, and managing blocks and docume
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `SiYuan`, `Notes`, `Knowledge Base`, `PKM`, `API` |
-| Related skills | [`obsidian`](/user-guide/skills/bundled/note-taking/note-taking-obsidian), [`notion`](/user-guide/skills/bundled/productivity/productivity-notion) |
+| Related skills | [`obsidian`](/docs/user-guide/skills/bundled/note-taking/note-taking-obsidian), [`notion`](/docs/user-guide/skills/bundled/productivity/productivity-notion) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/optional/productivity/productivity-telephony.md b/website/docs/user-guide/skills/optional/productivity/productivity-telephony.md
index af6f3855b26..f6c15444cbb 100644
--- a/website/docs/user-guide/skills/optional/productivity/productivity-telephony.md
+++ b/website/docs/user-guide/skills/optional/productivity/productivity-telephony.md
@@ -21,7 +21,7 @@ Give Hermes phone capabilities without core tool changes. Provision and persist
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `telephony`, `phone`, `sms`, `mms`, `voice`, `twilio`, `bland.ai`, `vapi`, `calling`, `texting` |
-| Related skills | [`maps`](/user-guide/skills/bundled/productivity/productivity-maps), [`google-workspace`](/user-guide/skills/bundled/productivity/productivity-google-workspace), [`agentmail`](/user-guide/skills/optional/email/email-agentmail) |
+| Related skills | [`maps`](/docs/user-guide/skills/bundled/productivity/productivity-maps), [`google-workspace`](/docs/user-guide/skills/bundled/productivity/productivity-google-workspace), [`agentmail`](/docs/user-guide/skills/optional/email/email-agentmail) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/optional/research/research-darwinian-evolver.md b/website/docs/user-guide/skills/optional/research/research-darwinian-evolver.md
index 123088c8a7d..121b2dde160 100644
--- a/website/docs/user-guide/skills/optional/research/research-darwinian-evolver.md
+++ b/website/docs/user-guide/skills/optional/research/research-darwinian-evolver.md
@@ -21,7 +21,7 @@ Evolve prompts/regex/SQL/code with Imbue's evolution loop.
 | License | MIT |
 | Platforms | linux, macos |
 | Tags | `evolution`, `optimization`, `prompt-engineering`, `research` |
-| Related skills | [`arxiv`](/user-guide/skills/bundled/research/research-arxiv), [`jupyter-live-kernel`](/user-guide/skills/bundled/data-science/data-science-jupyter-live-kernel) |
+| Related skills | [`arxiv`](/docs/user-guide/skills/bundled/research/research-arxiv), [`jupyter-live-kernel`](/docs/user-guide/skills/bundled/data-science/data-science-jupyter-live-kernel) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/optional/research/research-duckduckgo-search.md b/website/docs/user-guide/skills/optional/research/research-duckduckgo-search.md
index ffa1bd64905..bd08395e24f 100644
--- a/website/docs/user-guide/skills/optional/research/research-duckduckgo-search.md
+++ b/website/docs/user-guide/skills/optional/research/research-duckduckgo-search.md
@@ -21,7 +21,7 @@ Free web search via DuckDuckGo — text, news, images, videos. No API key needed
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `search`, `duckduckgo`, `web-search`, `free`, `fallback` |
-| Related skills | [`arxiv`](/user-guide/skills/bundled/research/research-arxiv) |
+| Related skills | [`arxiv`](/docs/user-guide/skills/bundled/research/research-arxiv) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/optional/research/research-gitnexus-explorer.md b/website/docs/user-guide/skills/optional/research/research-gitnexus-explorer.md
index 808789d81de..5b1f62458d1 100644
--- a/website/docs/user-guide/skills/optional/research/research-gitnexus-explorer.md
+++ b/website/docs/user-guide/skills/optional/research/research-gitnexus-explorer.md
@@ -21,7 +21,7 @@ Index a codebase with GitNexus and serve an interactive knowledge graph via web
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `gitnexus`, `code-intelligence`, `knowledge-graph`, `visualization` |
-| Related skills | [`native-mcp`](/user-guide/skills/bundled/mcp/mcp-native-mcp), [`codebase-inspection`](/user-guide/skills/bundled/github/github-codebase-inspection) |
+| Related skills | [`native-mcp`](/docs/user-guide/skills/bundled/mcp/mcp-native-mcp), [`codebase-inspection`](/docs/user-guide/skills/bundled/github/github-codebase-inspection) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/optional/research/research-osint-investigation.md b/website/docs/user-guide/skills/optional/research/research-osint-investigation.md
index e363ef77fd5..7428c3022b2 100644
--- a/website/docs/user-guide/skills/optional/research/research-osint-investigation.md
+++ b/website/docs/user-guide/skills/optional/research/research-osint-investigation.md
@@ -20,7 +20,7 @@ Public-records OSINT investigation framework — SEC EDGAR filings, USAspending
 | Author | Hermes Agent (adapted from ShinMegamiBoson/OpenPlanter, MIT) |
 | Platforms | linux, macos, windows |
 | Tags | `osint`, `investigation`, `public-records`, `sec`, `sanctions`, `corporate-registry`, `property`, `courts`, `due-diligence`, `journalism` |
-| Related skills | [`domain-intel`](/user-guide/skills/optional/research/research-domain-intel), [`arxiv`](/user-guide/skills/bundled/research/research-arxiv) |
+| Related skills | [`domain-intel`](/docs/user-guide/skills/optional/research/research-domain-intel), [`arxiv`](/docs/user-guide/skills/bundled/research/research-arxiv) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/optional/research/research-parallel-cli.md b/website/docs/user-guide/skills/optional/research/research-parallel-cli.md
index 619ece67fb9..6532ae33c89 100644
--- a/website/docs/user-guide/skills/optional/research/research-parallel-cli.md
+++ b/website/docs/user-guide/skills/optional/research/research-parallel-cli.md
@@ -21,7 +21,7 @@ Optional vendor skill for Parallel CLI — agent-native web search, extraction,
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `Research`, `Web`, `Search`, `Deep-Research`, `Enrichment`, `CLI` |
-| Related skills | [`duckduckgo-search`](/user-guide/skills/optional/research/research-duckduckgo-search), [`mcporter`](/user-guide/skills/optional/mcp/mcp-mcporter) |
+| Related skills | [`duckduckgo-search`](/docs/user-guide/skills/optional/research/research-duckduckgo-search), [`mcporter`](/docs/user-guide/skills/optional/mcp/mcp-mcporter) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/optional/research/research-qmd.md b/website/docs/user-guide/skills/optional/research/research-qmd.md
index 6a711f793b6..47cf81634b8 100644
--- a/website/docs/user-guide/skills/optional/research/research-qmd.md
+++ b/website/docs/user-guide/skills/optional/research/research-qmd.md
@@ -21,7 +21,7 @@ Search personal knowledge bases, notes, docs, and meeting transcripts locally us
 | License | MIT |
 | Platforms | macos, linux |
 | Tags | `Search`, `Knowledge-Base`, `RAG`, `Notes`, `MCP`, `Local-AI` |
-| Related skills | [`obsidian`](/user-guide/skills/bundled/note-taking/note-taking-obsidian), [`native-mcp`](/user-guide/skills/bundled/mcp/mcp-native-mcp), [`arxiv`](/user-guide/skills/bundled/research/research-arxiv) |
+| Related skills | [`obsidian`](/docs/user-guide/skills/bundled/note-taking/note-taking-obsidian), [`native-mcp`](/docs/user-guide/skills/bundled/mcp/mcp-native-mcp), [`arxiv`](/docs/user-guide/skills/bundled/research/research-arxiv) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/optional/research/research-scrapling.md b/website/docs/user-guide/skills/optional/research/research-scrapling.md
index 7623f153326..dd1ba8865db 100644
--- a/website/docs/user-guide/skills/optional/research/research-scrapling.md
+++ b/website/docs/user-guide/skills/optional/research/research-scrapling.md
@@ -21,7 +21,7 @@ Web scraping with Scrapling - HTTP fetching, stealth browser automation, Cloudfl
 | License | MIT |
 | Platforms | linux, macos, windows |
 | Tags | `Web Scraping`, `Browser`, `Cloudflare`, `Stealth`, `Crawling`, `Spider` |
-| Related skills | [`duckduckgo-search`](/user-guide/skills/optional/research/research-duckduckgo-search), [`domain-intel`](/user-guide/skills/optional/research/research-domain-intel) |
+| Related skills | [`duckduckgo-search`](/docs/user-guide/skills/optional/research/research-duckduckgo-search), [`domain-intel`](/docs/user-guide/skills/optional/research/research-domain-intel) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/optional/research/research-searxng-search.md b/website/docs/user-guide/skills/optional/research/research-searxng-search.md
index f6de490fcba..90abfc91198 100644
--- a/website/docs/user-guide/skills/optional/research/research-searxng-search.md
+++ b/website/docs/user-guide/skills/optional/research/research-searxng-search.md
@@ -21,7 +21,7 @@ Free meta-search via SearXNG — aggregates results from 70+ search engines. Sel
 | License | MIT |
 | Platforms | linux, macos |
 | Tags | `search`, `searxng`, `meta-search`, `self-hosted`, `free`, `fallback` |
-| Related skills | [`duckduckgo-search`](/user-guide/skills/optional/research/research-duckduckgo-search), [`domain-intel`](/user-guide/skills/optional/research/research-domain-intel) |
+| Related skills | [`duckduckgo-search`](/docs/user-guide/skills/optional/research/research-duckduckgo-search), [`domain-intel`](/docs/user-guide/skills/optional/research/research-domain-intel) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/skills/optional/security/security-web-pentest.md b/website/docs/user-guide/skills/optional/security/security-web-pentest.md
new file mode 100644
index 00000000000..dcd9850814b
--- /dev/null
+++ b/website/docs/user-guide/skills/optional/security/security-web-pentest.md
@@ -0,0 +1,337 @@
+---
+title: "Web Pentest"
+sidebar_label: "Web Pentest"
+description: "Authorized web application penetration testing — reconnaissance, vulnerability analysis, proof-based exploitation, and professional reporting"
+---
+
+{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */}
+
+# Web Pentest
+
+Authorized web application penetration testing — reconnaissance, vulnerability
+analysis, proof-based exploitation, and professional reporting. Adapts
+Shannon's "No Exploit, No Report" methodology with hard guardrails for
+scope, authorization, and aux-client leakage. Active testing against running
+applications you own or have written authorization to test.
+
+## Skill metadata
+
+| | |
+|---|---|
+| Source | Optional — install with `hermes skills install official/security/web-pentest` |
+| Path | `optional-skills/security/web-pentest` |
+| Platforms | linux, macos |
+
+## Reference: full SKILL.md
+
+:::info
+The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active.
+:::
+
+# Web Application Penetration Testing
+
+A phased pentesting workflow for running web applications. Adapted from
+Shannon's pipeline (Keygraph, AGPL — concepts only, no code borrowed).
+Built around three rules:
+
+1. No exploit, no report — every finding requires reproducible evidence.
+2. Bounded scope — every active request goes against a target the operator
+   pre-declared. Off-scope hosts are refused.
+3. Bypass exhaustion before false-positive dismissal — a "blocked" payload
+   is not a clean bill of health until you've tried the bypass set.
+
+---
+
+## ⚠️ Hard Guardrails — Read Before Every Engagement
+
+Violating any of these invalidates the engagement and may be illegal.
+
+1. **Authorization gate.** Before the first active scan in a session, you
+   MUST confirm with the user, in writing, that they own or have written
+   authorization to test the target. Record the acknowledgement in
+   `engagement/authorization.md` (see template). No acknowledgement → no
+   active scanning. Reading public pages with `curl` is fine; sending
+   payloads is not.
+
+2. **Scope allowlist.** Maintain `engagement/scope.txt` — one hostname or
+   CIDR per line. Every `nmap`, `curl`, `whatweb`, browser navigation, or
+   payload-bearing request MUST be against an entry in scope. If a target
+   redirects you off-scope (3xx to a different host, a link in HTML),
+   STOP and confirm with the user before following.
+
+3. **No production systems without paper.** If the user hasn't told you
+   "yes, prod is in scope and I have written sign-off," assume not. Default
+   targets are staging, local docker, dedicated test instances.
+
+4. **Cloud metadata is off by default.** Do not probe `169.254.169.254`,
+   `metadata.google.internal`, `100.100.100.200`, `[fd00:ec2::254]`, or
+   equivalent unless the engagement explicitly includes SSRF-to-metadata
+   as a goal AND the target is one you control. The agent's browser tool
+   can reach these from inside your own infrastructure — don't.
+
+5. **Destructive payloads need approval.** SQLi payloads that DROP/DELETE,
+   filesystem-write SSTI, command injection with `rm`/`shutdown`/`mkfs`,
+   anything that mutates beyond a single test row → ASK FIRST. The
+   `approval.py` system catches some; don't rely on it alone.
+
+6. **Aux-client leakage risk (Hermes-specific).** This skill produces
+   sessions full of SQLi/XSS/RCE payloads, captured credentials, JWT
+   tokens. Hermes' compression and title-generation paths replay history
+   through the auxiliary client (often the main model). Anything sensitive
+   you write to the conversation can leave the box on the next compress.
+   Mitigation:
+   - Redact captured tokens/credentials to the LAST 6 CHARS before logging
+     them in any message. Full values go to `engagement/evidence/` files,
+     never into chat history.
+   - If the engagement is sensitive, set `auxiliary.title_generation.enabled: false`
+     in `~/.hermes/config.yaml` for the session.
+
+7. **Rate limit yourself.** Default 200ms between active requests against
+   any single host. The recon-scan.sh script enforces this. Don't bypass
+   it without operator approval.
+
+8. **Authority of the report.** This skill produces a security
+   assessment, not a "PASS." Even a clean run is "no exploitable issues
+   FOUND in scope X within time T using methods Y" — not "the application
+   is secure." Mirror that language in the report.
+
+---
+
+## Phase 0: Engagement Setup
+
+Before any scanning happens, create the engagement directory and
+authorization acknowledgement.
+
+```bash
+ENGAGEMENT=engagement-$(date +%Y%m%d-%H%M%S)
+mkdir -p "$ENGAGEMENT"/{evidence,findings,reports}
+cd "$ENGAGEMENT"
+```
+
+1. **Ask the user (verbatim):**
+   > "Confirm: (a) the target URL is [X], (b) you own this application
+   > or have written authorization to test it, and (c) the engagement
+   > may run for up to [N] hours starting now. Reply 'authorized' to
+   > proceed."
+
+2. **Wait for explicit `authorized` response.** Any other answer means STOP.
+
+3. **Record authorization** to `engagement/authorization.md` using the
+   template in `templates/authorization.md`. Include:
+   - Target URL(s) and IP(s)
+   - Authorization basis (ownership / written authz from $name)
+   - Engagement window
+   - Out-of-scope items (production, third-party services, etc.)
+   - Operator name (the user driving this session)
+
+4. **Build scope.txt:**
+   ```
+   localhost
+   127.0.0.1
+   staging.example.com
+   192.168.1.0/24    # internal lab only, with operator OK
+   ```
+
+5. **Read** `references/scope-enforcement.md` before issuing the first
+   active request — that doc has the host-extraction rules you apply
+   to every command/URL before it goes out.
+
+---
+
+## Phase 1: Pre-Recon (Code Analysis, optional)
+
+Skip if no source access (black-box engagement).
+
+If you have read access to the application source:
+
+1. **Map the architecture** — framework, routing, middleware stack
+2. **Inventory sinks** — every `execute(`, `os.system(`, `eval(`,
+   template render, file read/write, redirect target
+3. **Map auth** — session cookie vs JWT, OAuth flows, password reset,
+   privileged endpoints
+4. **Identify trust boundaries** — what's authenticated, what's not,
+   what comes from `request.*`
+5. **Backward taint** from each sink to a request source. Early-terminate
+   when proper sanitization is found (parameterized queries, allowlists,
+   `shlex.quote`, well-known escapers).
+
+Output: `evidence/pre-recon.md` — architecture map, sink inventory,
+suspected vulnerable code paths.
+
+This is OFFLINE work. No traffic to the target.
+
+---
+
+## Phase 2: Recon (Live, Read-Only)
+
+Maps the attack surface. All requests are GETs of public pages, no
+payloads yet. Still scope-bounded.
+
+1. **Verify scope.** Resolve every target hostname → IP. Confirm IPs are
+   in scope (avoids the "DNS points somewhere unexpected" trap).
+
+2. **Network surface** (only if scope permits port scanning):
+   ```bash
+   nmap -sT -T3 --top-ports 100 -oN evidence/nmap.txt $TARGET
+   ```
+   Use `-T3` (default), not `-T4/-T5`. Stealthier and avoids tripping
+   IDS/IPS in shared environments.
+
+3. **Tech fingerprint:**
+   ```bash
+   whatweb -v $TARGET_URL > evidence/whatweb.txt
+   curl -sIk $TARGET_URL > evidence/headers.txt
+   ```
+
+4. **Endpoint discovery:**
+   - Crawl the app with the browser tool (`browser_navigate`,
+     `browser_get_images`, follow links).
+   - Inspect `robots.txt`, `sitemap.xml`, `.well-known/*`.
+   - Use the developer tools network panel via browser tool to capture
+     XHR/fetch calls.
+
+5. **Auth surface:** Identify login, registration, password reset,
+   session cookie names, token formats. Do NOT send credentials yet —
+   just observe.
+
+6. **Correlate with pre-recon** (if you have source). For each
+   `evidence/pre-recon.md` finding, mark whether the live surface
+   confirms it's reachable.
+
+Output: `evidence/recon.md` — endpoints, technologies, auth model,
+input vectors.
+
+---
+
+## Phase 3: Vulnerability Analysis
+
+One delegate_task per vulnerability class. Each agent reads
+`evidence/recon.md` (+ `evidence/pre-recon.md` if present), produces
+`findings/<class>-queue.json` using `templates/exploitation-queue.json`.
+
+Use `delegate_task` with these focused subagents (parallel where possible):
+
+| Class | Goal | Reference |
+|-------|------|-----------|
+| `injection` | SQLi, command, path traversal, SSTI, LFI/RFI, deserialization | `references/vuln-taxonomy.md` (slot types) |
+| `xss` | Reflected, stored, DOM-based | `references/vuln-taxonomy.md` (render contexts) |
+| `auth` | Login bypass, JWT confusion, session fixation, OAuth flaws | `references/exploitation-techniques.md` |
+| `authz` | IDOR, vertical/horizontal escalation, business logic | `references/exploitation-techniques.md` |
+| `ssrf` | Internal reachability, metadata, protocol smuggling | Skip metadata unless explicitly authorized |
+| `infra` | Misconfig, info disclosure, default creds, exposed admin | `references/exploitation-techniques.md` |
+
+Each queue entry has: id, vuln class, source (file:line if known),
+endpoint, parameter, slot type, suspected defense, verdict
+(`identified` / `partial` / `confirmed` / `critical`), witness payload,
+confidence (0-1), notes.
+
+The analysis phase doesn't send malicious payloads yet — it stages them.
+The exploitation phase actually fires them.
+
+---
+
+## Phase 4: Exploitation (Proof-Based, Conditional)
+
+Only run a sub-agent per class where the analysis queue has actionable
+entries (`identified` or `partial`).
+
+For each candidate:
+
+1. **Pre-send check** — host in scope? auth gate satisfied? payload
+   approved if destructive?
+2. **Send the witness payload** — minimal proof. SQLi: `' AND 1=1--`
+   then `' AND 1=2--`. XSS: a benign marker like
+   `<svg/onload=console.log("HERMES-PENTEST-XSS")>`. Never `alert(1)` in
+   stored XSS — it'll fire for other users in shared environments.
+3. **Verify the witness fires** — for blind injection, use a sleep
+   probe (`SLEEP(5)`) and time the response. For SSRF, use a
+   tester-controlled callback host you own (NOT a public service like
+   webhook.site for sensitive engagements — exfil paths).
+4. **Promote level:**
+   - **L1 Identified** — pattern matched, no behavior change
+   - **L2 Partial** — sink reached, but defense in place
+   - **L3 Confirmed** — payload changed app behavior in observable way
+   - **L4 Critical** — data extracted, code executed, access escalated
+5. **Bypass exhaustion before classifying as FP.** For each candidate
+   that blocks: try at least the bypass set in
+   `references/bypass-techniques.md` for that class. Only after the set
+   is exhausted may you write `verdict: false_positive`.
+6. **Record evidence** for every L3/L4:
+   - Full request (method, URL, headers, body)
+   - Response (status, headers, relevant body excerpt)
+   - Reproducer command (curl one-liner)
+   - Impact statement
+
+Output: `findings/exploitation-evidence.md`
+
+**Redact in evidence files:**
+- Any captured credentials/tokens → last 6 chars only in chat;
+  full value to `findings/secrets-vault.md` (gitignored).
+- Other users' PII → redact.
+- Your test credentials → fine to keep.
+
+---
+
+## Phase 5: Reporting
+
+Generate the final report using `templates/pentest-report.md`. Sections:
+
+1. Executive summary
+2. Engagement scope (from `engagement/scope.txt`)
+3. Authorization (from `engagement/authorization.md`)
+4. Findings (L3/L4 only — proof-required). Per finding:
+   - Title, severity (CVSS 3.1), CWE
+   - Affected endpoint(s)
+   - Proof (request + response excerpt)
+   - Reproduction steps
+   - Impact
+   - Remediation
+5. Not-exploited candidates (L1/L2 with notes on what blocked them)
+6. Out-of-scope observations
+7. Methodology / tools used
+8. Limitations and what was NOT tested
+
+**Severity policy:** CVSS only for L3/L4. L1/L2 are "candidates pending
+verification" — don't assign CVSS to unverified findings.
+
+---
+
+## When to Stop
+
+- The user revokes authorization.
+- A candidate finding clearly impacts production data and you don't have
+  approval for destructive testing — STOP and ask.
+- The target starts returning 503/429 storms — back off, reconvene with
+  the operator.
+- You discover something *outside* the contracted scope (e.g. an exposed
+  customer database while testing an unrelated endpoint). STOP, document,
+  report to the operator. Do not pivot without explicit approval — that
+  pivot is what makes pentesting illegal.
+
+---
+
+## What This Skill Does NOT Cover
+
+- Network-layer pentesting beyond port scanning (no Metasploit,
+  Cobalt Strike, AD attacks, network protocol fuzzing).
+- Reverse engineering / binary analysis (see issue #383).
+- Source-only static analysis (see issue #382).
+- Active social engineering / phishing.
+- Anything against systems the operator hasn't pre-authorized.
+
+If the engagement needs any of these, escalate to a professional
+pentester. This skill complements professional pentesting; it does
+not replace it.
+
+---
+
+## Further Reading
+
+- `references/scope-enforcement.md` — how to bound every active request
+- `references/vuln-taxonomy.md` — slot types, render contexts, OWASP map
+- `references/exploitation-techniques.md` — per-class payload patterns
+- `references/bypass-techniques.md` — common WAF/filter bypasses
+- `templates/authorization.md` — engagement authorization template
+- `templates/pentest-report.md` — final report template
+- `templates/exploitation-queue.json` — per-class finding queue schema
+- `scripts/recon-scan.sh` — rate-limited nmap+whatweb+headers wrapper
diff --git a/website/docs/user-guide/skills/optional/software-development/software-development-rest-graphql-debug.md b/website/docs/user-guide/skills/optional/software-development/software-development-rest-graphql-debug.md
index 7b490962d9c..0698d855f5f 100644
--- a/website/docs/user-guide/skills/optional/software-development/software-development-rest-graphql-debug.md
+++ b/website/docs/user-guide/skills/optional/software-development/software-development-rest-graphql-debug.md
@@ -20,7 +20,7 @@ Debug REST/GraphQL APIs: status codes, auth, schemas, repro.
 | Author | eren-karakus0 |
 | License | MIT |
 | Tags | `api`, `rest`, `graphql`, `http`, `debugging`, `testing`, `curl`, `integration` |
-| Related skills | [`systematic-debugging`](/user-guide/skills/bundled/software-development/software-development-systematic-debugging), [`test-driven-development`](/user-guide/skills/bundled/software-development/software-development-test-driven-development) |
+| Related skills | [`systematic-debugging`](/docs/user-guide/skills/bundled/software-development/software-development-systematic-debugging), [`test-driven-development`](/docs/user-guide/skills/bundled/software-development/software-development-test-driven-development) |
 
 ## Reference: full SKILL.md
 
diff --git a/website/docs/user-guide/windows-wsl-quickstart.md b/website/docs/user-guide/windows-wsl-quickstart.md
index baf11f468db..937c643a4dc 100644
--- a/website/docs/user-guide/windows-wsl-quickstart.md
+++ b/website/docs/user-guide/windows-wsl-quickstart.md
@@ -260,6 +260,32 @@ For webhooks from cloud messaging providers (Telegram `setWebhook`, Slack events
 
 The Hermes [Tool Gateway](/user-guide/features/tool-gateway) and the API server are long-lived processes. In WSL2 you have a few options for keeping them up.
 
+### Desktop shortcut for opening Hermes quickly
+
+If you just want a double-click launcher for an interactive Hermes shell, create
+it on the Windows side and have it jump into WSL for you:
+
+1. Right-click the Windows desktop and choose **New -> Shortcut**.
+2. For the target, use your distro name (replace `Ubuntu` if needed):
+
+   ```text
+   wt.exe -w 0 -p "Ubuntu" wsl.exe -d Ubuntu --cd ~ -- bash -ic "hermes"
+   ```
+
+3. Name it something obvious like `Hermes`.
+
+That opens Windows Terminal, starts your WSL distro, drops you in your Linux
+home directory, and launches Hermes. If `hermes` is not on PATH yet, open WSL
+once manually and run `source ~/.bashrc`, or replace the command with
+`uv run hermes` inside your project checkout.
+
+Optional polish:
+
+- **Custom icon:** open **Properties -> Change Icon** and point it at an `.ico`
+  file, such as the Hermes favicon from the repo.
+- **Pinned launcher:** once the shortcut works, pin it to Start or Taskbar so
+  you do not have to browse for it again.
+
 ### Inside WSL with systemd (recommended)
 
 If you enabled systemd per the setup section above, `hermes gateway` and the API server work the way they do on any Linux machine. Use the gateway setup wizard:
diff --git a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/developer-guide/web-search-provider-plugin.md b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/developer-guide/web-search-provider-plugin.md
index 2c1f971dfcc..739501b0376 100644
--- a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/developer-guide/web-search-provider-plugin.md
+++ b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/developer-guide/web-search-provider-plugin.md
@@ -80,9 +80,6 @@ class MyBackendWebSearchProvider(WebSearchProvider):
     def supports_extract(self) -> bool:
         return False
 
-    def supports_crawl(self) -> bool:
-        return False
-
     def search(self, query: str, limit: int = 5) -> Dict[str, Any]:
         import httpx
 
@@ -157,12 +154,10 @@ requires_env:
 | `is_available()` | ✅ | — | 轻量可用性检查——环境变量、可选依赖等 |
 | `supports_search()` | — | `True` | `web_search` 路由的能力标志 |
 | `supports_extract()` | — | `False` | `web_extract` 路由的能力标志 |
-| `supports_crawl()` | — | `False` | 深度爬取模式的能力标志 |
 | `search(query, limit)` | 条件必须 | 抛出异常 | 当 `supports_search()` 返回 `True` 时必须实现 |
 | `extract(urls, **kwargs)` | 条件必须 | 抛出异常 | 当 `supports_extract()` 返回 `True` 时必须实现 |
-| `crawl(url, **kwargs)` | 条件必须 | 抛出异常 | 当 `supports_crawl()` 返回 `True` 时必须实现 |
 
-提供商可以在单个类中声明多种能力——Firecrawl、Tavily、Exa 和 Parallel 均实现了搜索/提取/爬取三种能力。Brave Search 和 DDGS 仅支持搜索；SearXNG 也仅支持搜索，并有文档说明的"与提取提供商配对使用"工作流。
+提供商可以在单个类中声明多种能力——Firecrawl、Tavily、Exa 和 Parallel 均实现了搜索和提取两种能力。Brave Search 和 DDGS 仅支持搜索；SearXNG 也仅支持搜索，并有文档说明的"与提取提供商配对使用"工作流。
 
 ## 响应格式
 
diff --git a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/reference/cli-commands.md b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/reference/cli-commands.md
index aa114fbf0b9..7e3da1c4963 100644
--- a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/reference/cli-commands.md
+++ b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/reference/cli-commands.md
@@ -224,6 +224,7 @@ hermes gateway <subcommand>
 | 选项 | 说明 |
 |--------|-------------|
 | `--all` | 在 `start` / `restart` / `stop` 时：对**每个 profile** 的 gateway 执行操作，而不仅限于活跃的 `HERMES_HOME`。当你并行运行多个 profile 并希望在 `hermes update` 后全部重启时很有用。 |
+| `--no-supervise` | 在 `run` 时：在 s6-overlay Docker 镜像内部，跳过 s6 自动监管，退回到 pre-s6 前台语义——gateway 作为容器主进程运行，无自动重启。在 s6 镜像之外为空操作。等同于设置 `HERMES_GATEWAY_NO_SUPERVISE=1`。 |
 
 :::tip WSL 用户
 使用 `hermes gateway run` 而非 `hermes gateway start`——WSL 的 systemd 支持不稳定。用 tmux 包裹以保持持久运行：`tmux new -s hermes 'hermes gateway run'`。详见 [WSL FAQ](/reference/faq#wsl-gateway-keeps-disconnecting-or-hermes-gateway-start-fails)。
diff --git a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/reference/environment-variables.md b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/reference/environment-variables.md
index db5c0d3a3e3..52ed671891b 100644
--- a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/reference/environment-variables.md
+++ b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/reference/environment-variables.md
@@ -518,6 +518,7 @@ Graph 事件（Teams 会议、日历、聊天等）的入站变更通知监听
 | `HERMES_GATEWAY_PLATFORM_CONNECT_TIMEOUT` | gateway 启动期间每个平台的连接超时（秒）。 |
 | `HERMES_GATEWAY_BUSY_INPUT_MODE` | 默认 gateway 繁忙输入行为：`queue`、`steer` 或 `interrupt`。可通过 `/busy` 按聊天覆盖。 |
 | `HERMES_GATEWAY_BUSY_ACK_ENABLED` | gateway 是否在用户 agent 繁忙时发送确认消息（⚡/⏳/⏩）（默认：`true`）。设为 `false` 可完全抑制这些消息——输入仍会正常排队/引导/中断，只是聊天回复被静默。从 `config.yaml` 中的 `display.busy_ack_enabled` 桥接。 |
+| `HERMES_GATEWAY_NO_SUPERVISE` | 在 s6-overlay Docker 镜像内部运行 `hermes gateway run` 时跳过 s6 自动监管，退回到 pre-s6 前台语义（无自动重启，gateway 作为容器主进程）。真值：`1`、`true`、`yes`。等同于 `--no-supervise` CLI 标志。在 s6 镜像之外为空操作。 |
 | `HERMES_FILE_MUTATION_VERIFIER` | 启用每轮文件变更验证器页脚（默认：`true`）。启用后，Hermes 附加一个建议列表，列出本轮中失败且未被成功写入覆盖的 `write_file`/`patch` 调用。设为 `0`、`false`、`no` 或 `off` 可抑制。镜像 `config.yaml` 中的 `display.file_mutation_verifier`；设置时环境变量优先。 |
 | `HERMES_CRON_TIMEOUT` | cron 任务 agent 运行的不活动超时（秒，默认：`600`）。agent 在主动调用工具或接收流 token 时可无限运行——仅在空闲时触发。设为 `0` 表示无限制。 |
 | `HERMES_CRON_SCRIPT_TIMEOUT` | cron 任务附加的预运行脚本超时（秒，默认：`120`）。对需要更长执行时间的脚本（例如随机延迟的反机器人计时）可增大此值。也可通过 `config.yaml` 中的 `cron.script_timeout_seconds` 配置。 |
diff --git a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/user-guide/configuration.md b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/user-guide/configuration.md
index 441bad64619..f8a0f87b40a 100644
--- a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/user-guide/configuration.md
+++ b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/user-guide/configuration.md
@@ -1411,7 +1411,7 @@ code_execution:
 
 ## Web 搜索后端
 
-`web_search`、`web_extract` 和 `web_crawl` 工具支持五种后端 provider。在 `config.yaml` 中或通过 `hermes tools` 配置后端：
+`web_search` 和 `web_extract` 工具支持五种后端 provider。在 `config.yaml` 中或通过 `hermes tools` 配置后端：
 
 ```yaml
 web:
@@ -1422,17 +1422,17 @@ web:
   extract_backend: "firecrawl"
 ```
 
-| 后端 | 环境变量 | 搜索 | 提取 | 爬取 |
-|---------|---------|--------|---------|-------|
-| **Firecrawl**（默认） | `FIRECRAWL_API_KEY` | ✔ | ✔ | ✔ |
-| **SearXNG** | `SEARXNG_URL` | ✔ | — | — |
-| **Parallel** | `PARALLEL_API_KEY` | ✔ | ✔ | — |
-| **Tavily** | `TAVILY_API_KEY` | ✔ | ✔ | ✔ |
-| **Exa** | `EXA_API_KEY` | ✔ | ✔ | — |
+| 后端 | 环境变量 | 搜索 | 提取 |
+|---------|---------|--------|---------|
+| **Firecrawl**（默认） | `FIRECRAWL_API_KEY` | ✔ | ✔ |
+| **SearXNG** | `SEARXNG_URL` | ✔ | — |
+| **Parallel** | `PARALLEL_API_KEY` | ✔ | ✔ |
+| **Tavily** | `TAVILY_API_KEY` | ✔ | ✔ |
+| **Exa** | `EXA_API_KEY` | ✔ | ✔ |
 
 **后端选择：** 如果未设置 `web.backend`，后端从可用的 API 密钥自动检测。如果仅设置了 `SEARXNG_URL`，使用 SearXNG。如果仅设置了 `EXA_API_KEY`，使用 Exa。如果仅设置了 `TAVILY_API_KEY`，使用 Tavily。如果仅设置了 `PARALLEL_API_KEY`，使用 Parallel。否则 Firecrawl 是默认值。
 
-**SearXNG** 是一个免费、自托管、尊重隐私的元搜索引擎，查询 70+ 个搜索引擎。无需 API 密钥 —— 只需将 `SEARXNG_URL` 设置为您的实例（例如 `http://localhost:8080`）。SearXNG 仅限搜索；`web_extract` 和 `web_crawl` 需要单独的提取 provider（设置 `web.extract_backend`）。Docker 设置说明请参阅 [Web 搜索设置指南](/user-guide/features/web-search)。
+**SearXNG** 是一个免费、自托管、尊重隐私的元搜索引擎，查询 70+ 个搜索引擎。无需 API 密钥 —— 只需将 `SEARXNG_URL` 设置为您的实例（例如 `http://localhost:8080`）。SearXNG 仅限搜索；`web_extract` 需要单独的提取 provider（设置 `web.extract_backend`）。Docker 设置说明请参阅 [Web 搜索设置指南](/user-guide/features/web-search)。
 
 **自托管 Firecrawl：** 设置 `FIRECRAWL_API_URL` 指向您自己的实例。设置自定义 URL 后，API 密钥变为可选（在服务器上设置 `USE_DB_AUTHENTICATION=***` 以禁用认证）。
 
diff --git a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/user-guide/docker.md b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/user-guide/docker.md
index 0f3dde59dd2..b3714bc7922 100644
--- a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/user-guide/docker.md
+++ b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/user-guide/docker.md
@@ -80,8 +80,28 @@ docker run -d \
 | `HERMES_DASHBOARD_HOST` | dashboard HTTP 服务器的绑定地址 | `127.0.0.1` |
 | `HERMES_DASHBOARD_PORT` | dashboard HTTP 服务器的端口 | `9119` |
 | `HERMES_DASHBOARD_TUI` | 设为 `1` 以启用浏览器内 Chat 标签页（通过 PTY/WebSocket 嵌入 `hermes --tui`） | *（未设置）* |
+| `HERMES_DASHBOARD_INSECURE` | 设为 `1`（或 `true` / `yes`）以在不启用 OAuth 鉴权门控的情况下绑定。仅在可信网络（且通过没有 OAuth 契约的反向代理时）使用——dashboard 会暴露 API 密钥与会话数据 | *（未设置——当注册了 `DashboardAuthProvider` 时启用门控）* |
 
-默认情况下，dashboard 保持在回环地址，以避免将未经身份验证的 Web 界面暴露到网络。若要有意发布，请设置 `HERMES_DASHBOARD_HOST=0.0.0.0` 并配置你自己的可信网络边界/反向代理。在这种情况下，你必须通过命令路径中的 host/flags 显式添加 `--insecure` 行为（入口点不再自动启用不安全模式）。
+默认情况下，dashboard 保持在回环地址（`127.0.0.1`），以避免将
+Web 界面暴露到网络。若要有意发布，请设置
+`HERMES_DASHBOARD_HOST=0.0.0.0`。当以下两项同时满足时，
+dashboard 的 OAuth 鉴权门控会自动启用：
+
+1. 绑定地址为非回环地址，**且**
+2. 注册了一个 `DashboardAuthProvider` 插件。
+
+捆绑的 `dashboard_auth/nous` 提供者会在设置
+`HERMES_DASHBOARD_OAUTH_CLIENT_ID` 时自动激活（参见
+[Web Dashboard → 鉴权](features/web-dashboard.md)）。门控启用后，
+浏览器调用方会先被重定向到所配置门户的 OAuth 流，然后才能
+访问任何受保护路由。
+
+如果未注册提供者且绑定为非回环地址，dashboard **会在启动时
+失败关闭**，并给出指向缺失环境变量的具体错误信息。要显式
+退出门控——用于不使用 OAuth 契约、通过你自己的反向代理部署
+在可信局域网中的场景——请设置 `HERMES_DASHBOARD_INSECURE=1`。
+这会恢复旧的“无鉴权，但发出告警”模式，也是唯一可以禁用门控的
+路径；绑定地址不再隐式决定 `--insecure`。
 
 :::note
 dashboard 在容器内作为受监管的 s6 服务运行。如果
diff --git a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/user-guide/features/credential-pools.md b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/user-guide/features/credential-pools.md
index fe538fb9b40..d232f43506f 100644
--- a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/user-guide/features/credential-pools.md
+++ b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/user-guide/features/credential-pools.md
@@ -18,8 +18,11 @@ Your request
   → Pick key from pool (round_robin / least_used / fill_first / random)
   → Send to provider
   → 429 rate limit?
-      → Retry same key once (transient blip)
-      → Second 429 → rotate to next pool key
+      → Plan/usage limit reached (e.g. ChatGPT/Codex "usage limit reached")?
+          → Rotate to next pool key immediately (no retry — the cap won't clear on retry)
+      → Generic / transient 429?
+          → Retry same key once (transient blip)
+          → Second 429 → rotate to next pool key
       → All keys exhausted → fallback_model (different provider)
   → 402 billing error?
       → Immediately rotate to next pool key (24h cooldown)
diff --git a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/user-guide/features/skills.md b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/user-guide/features/skills.md
index 7a74b20b68f..5e71afd86fb 100644
--- a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/user-guide/features/skills.md
+++ b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/user-guide/features/skills.md
@@ -467,6 +467,7 @@ Hermes 可以直接从 GitHub 仓库和基于 GitHub 的 tap 安装。当你已
 - [openai/skills](https://github.com/openai/skills)
 - [anthropics/skills](https://github.com/anthropics/skills)
 - [huggingface/skills](https://github.com/huggingface/skills)
+- [NVIDIA/skills](https://github.com/NVIDIA/skills) — NVIDIA 官方验证的技能（带签名 `skill.oms.sig` 与治理用 `skill-card.md`）
 - [VoltAgent/awesome-agent-skills](https://github.com/VoltAgent/awesome-agent-skills)
 - [garrytan/gstack](https://github.com/garrytan/gstack)
 
@@ -578,7 +579,7 @@ hermes skills install skills-sh/anthropics/skills/pdf --force
 |-------|--------|--------|
 | `builtin` | 随 Hermes 附带 | 始终受信任 |
 | `official` | 仓库中的 `optional-skills/` | 内置信任，无第三方警告 |
-| `trusted` | 受信任的注册表/仓库，如 `openai/skills`、`anthropics/skills`、`huggingface/skills` | 比社区来源更宽松的策略 |
+| `trusted` | 受信任的注册表/仓库，如 `openai/skills`、`anthropics/skills`、`huggingface/skills`、`NVIDIA/skills` | 比社区来源更宽松的策略 |
 | `community` | 其他所有来源（`skills.sh`、well-known 端点、自定义 GitHub 仓库、大多数市场） | 非危险性发现可用 `--force` 覆盖；`dangerous` 结论保持阻止 |
 
 ### 更新生命周期
diff --git a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/user-guide/features/web-search.md b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/user-guide/features/web-search.md
index 3bb64b74dde..70b378bedd1 100644
--- a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/user-guide/features/web-search.md
+++ b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/user-guide/features/web-search.md
@@ -1,6 +1,6 @@
 ---
 title: 网页搜索与提取
-description: 通过多个后端提供商搜索网页、提取页面内容并爬取网站——包括免费的自托管 SearXNG。
+description: 通过多个后端提供商搜索网页并提取页面内容——包括免费的自托管 SearXNG。
 sidebar_label: Web Search
 sidebar_position: 6
 ---
@@ -10,22 +10,22 @@ sidebar_position: 6
 Hermes Agent 内置两个可供模型调用的网页工具，由多个提供商支持：
 
 - **`web_search`** — 搜索网页并返回排序结果
-- **`web_extract`** — 从一个或多个 URL 获取并提取可读内容（当后端支持时内置深度爬取功能）
+- **`web_extract`** — 从一个或多个 URL 获取并提取可读内容
 
-两者均通过单一后端选择进行配置。提供商可通过 `hermes tools` 选择，或直接在 `config.yaml` 中设置。递归爬取功能（Firecrawl/Tavily）通过 `web_extract` 暴露，而非作为独立的 `web_crawl` 工具。
+两者均通过单一后端选择进行配置。提供商可通过 `hermes tools` 选择，或直接在 `config.yaml` 中设置。
 
 ## 后端
 
-| 提供商 | 环境变量 | 搜索 | 提取 | 爬取 | 免费层级 |
-|----------|---------|--------|---------|-------|-----------|
-| **Firecrawl**（默认） | `FIRECRAWL_API_KEY` | ✔ | ✔ | ✔ | 500 积分/月 |
-| **SearXNG** | `SEARXNG_URL` | ✔ | — | — | ✔ 免费（自托管） |
-| **Brave Search（免费层级）** | `BRAVE_SEARCH_API_KEY` | ✔ | — | — | 2 000 次查询/月 |
-| **DDGS (DuckDuckGo)** | —（无需密钥） | ✔ | — | — | ✔ 免费 |
-| **Tavily** | `TAVILY_API_KEY` | ✔ | ✔ | ✔ | 1 000 次搜索/月 |
-| **Exa** | `EXA_API_KEY` | ✔ | ✔ | — | 1 000 次搜索/月 |
-| **Parallel** | `PARALLEL_API_KEY` | ✔ | ✔ | — | 付费 |
-| **xAI (Grok)** | `XAI_API_KEY` 或 `hermes auth login xai-oauth` | ✔ | — | — | 付费（SuperGrok 或按 token 计费） |
+| 提供商 | 环境变量 | 搜索 | 提取 | 免费层级 |
+|----------|---------|--------|---------|-----------|
+| **Firecrawl**（默认） | `FIRECRAWL_API_KEY` | ✔ | ✔ | 500 积分/月 |
+| **SearXNG** | `SEARXNG_URL` | ✔ | — | ✔ 免费（自托管） |
+| **Brave Search（免费层级）** | `BRAVE_SEARCH_API_KEY` | ✔ | — | 2 000 次查询/月 |
+| **DDGS (DuckDuckGo)** | —（无需密钥） | ✔ | — | ✔ 免费 |
+| **Tavily** | `TAVILY_API_KEY` | ✔ | ✔ | 1 000 次搜索/月 |
+| **Exa** | `EXA_API_KEY` | ✔ | ✔ | 1 000 次搜索/月 |
+| **Parallel** | `PARALLEL_API_KEY` | ✔ | ✔ | 付费 |
+| **xAI (Grok)** | `XAI_API_KEY` 或 `hermes auth login xai-oauth` | ✔ | — | 付费（SuperGrok 或按 token 计费） |
 
 Brave Search、DDGS 和 xAI 均为**仅搜索**——如果同时需要 `web_extract`，可将其中任意一个与 Firecrawl/Tavily/Exa/Parallel 配合使用。DDGS 底层使用 [`ddgs` Python 包](https://pypi.org/project/ddgs/)；若尚未安装，请运行 `pip install ddgs`（或让 Hermes 在首次使用时懒加载安装）。xAI 通过 Responses API 运行 Grok 服务端的 `web_search` 工具——结果由 LLM 生成而非基于索引，因此标题、描述和 URL 选择均为模型输出（参见下方[信任模型说明](#xai-grok)）。
 
@@ -46,7 +46,7 @@ Brave Search、DDGS 和 xAI 均为**仅搜索**——如果同时需要 `web_ext
 | 5 000 以下 | 原样返回——不调用 LLM，完整 markdown 直达 agent |
 | 5 000 – 500 000 | 通过 `web_extract` 辅助模型单次摘要，输出上限约 5 000 字符 |
 | 500 000 – 2 000 000 | 分块处理：拆分为 10 万字符的块，并行摘要每块，再合成最终摘要（约 5 000 字符） |
-| 超过 2 000 000 | 拒绝处理，并提示使用带有针对性提取指令的 `web_crawl` 或更具体的来源 |
+| 超过 2 000 000 | 拒绝处理，并提示使用更具体的来源 URL |
 
 摘要保留引用、代码块和关键事实的原始格式——它是内容压缩器，而非改写器。如果摘要失败或超时，Hermes 会回退到原始内容的前约 5 000 字符，而非返回无用的错误信息。
 
@@ -89,7 +89,7 @@ hermes tools
 
 ### Firecrawl（默认）
 
-功能完整的搜索、提取和爬取。推荐大多数用户使用。
+功能完整的搜索和提取。推荐大多数用户使用。
 
 ```bash
 # ~/.hermes/.env
@@ -113,7 +113,7 @@ FIRECRAWL_API_URL=http://localhost:3002
 
 SearXNG 是一个注重隐私的开源元搜索引擎，聚合来自 70 多个搜索引擎的结果。**无需 API 密钥**——只需将 Hermes 指向一个运行中的 SearXNG 实例。
 
-SearXNG 为**仅搜索**——`web_extract`（包括其爬取模式）需要单独的提取提供商。
+SearXNG 为**仅搜索**——`web_extract` 需要单独的提取提供商。
 
 #### 方案 A — 使用 Docker 自托管（推荐）
 
@@ -222,7 +222,7 @@ SEARXNG_URL=https://searx.example.com
 
 #### 将 SearXNG 与提取提供商配合使用
 
-SearXNG 负责搜索；`web_extract`（包括任何深度爬取模式）需要单独的提供商。使用按能力配置的键：
+SearXNG 负责搜索；`web_extract` 需要单独的提供商。使用按能力配置的键：
 
 ```yaml
 # ~/.hermes/config.yaml
@@ -237,7 +237,7 @@ web:
 
 ### Tavily
 
-针对 AI 优化的搜索、提取和爬取，免费层级慷慨。
+针对 AI 优化的搜索和提取，免费层级慷慨。
 
 ```bash
 # ~/.hermes/.env
@@ -341,7 +341,7 @@ web:
 # ~/.hermes/config.yaml
 web:
   search_backend: "searxng"     # 由 web_search 使用
-  extract_backend: "firecrawl"  # 由 web_extract（及其深度爬取模式）使用
+  extract_backend: "firecrawl"  # 由 web_extract 使用
 ```
 
 当按能力键为空时，两者均回退到 `web.backend`。当 `web.backend` 也为空时，后端根据存在的 API 密钥/URL 自动检测。
diff --git a/website/package-lock.json b/website/package-lock.json
index e566e842863..5ebeae77efd 100644
--- a/website/package-lock.json
+++ b/website/package-lock.json
@@ -12564,19 +12564,6 @@
         "uuid": "^11.1.0 || ^12 || ^13 || ^14.0.0"
       }
     },
-    "node_modules/mermaid/node_modules/uuid": {
-      "version": "11.1.0",
-      "resolved": "https://registry.npmjs.org/uuid/-/uuid-11.1.0.tgz",
-      "integrity": "sha512-0/A9rDy9P7cJ+8w1c9WD9V//9Wj15Ce2MPz8Ri6032usz+NfePxx5AcN3bN+r6ZL6jEo066/yNYB3tn4pQEx+A==",
-      "funding": [
-        "https://github.com/sponsors/broofa",
-        "https://github.com/sponsors/ctavan"
-      ],
-      "license": "MIT",
-      "bin": {
-        "uuid": "dist/esm/bin/uuid"
-      }
-    },
     "node_modules/methods": {
       "version": "1.1.2",
       "resolved": "https://registry.npmjs.org/methods/-/methods-1.1.2.tgz",
@@ -16895,15 +16882,6 @@
         "url": "https://github.com/sponsors/sindresorhus"
       }
     },
-    "node_modules/randombytes": {
-      "version": "2.1.0",
-      "resolved": "https://registry.npmjs.org/randombytes/-/randombytes-2.1.0.tgz",
-      "integrity": "sha512-vYl3iOX+4CKUWuxGi9Ukhie6fsqXqS9FE2Zaic4tNFD2N2QQaXOMFbuKK4QmDHC0JO6B1Zp41J0LpT0oR68amQ==",
-      "license": "MIT",
-      "dependencies": {
-        "safe-buffer": "^5.1.0"
-      }
-    },
     "node_modules/range-parser": {
       "version": "1.2.0",
       "resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.0.tgz",
@@ -17921,12 +17899,12 @@
       }
     },
     "node_modules/serialize-javascript": {
-      "version": "6.0.2",
-      "resolved": "https://registry.npmjs.org/serialize-javascript/-/serialize-javascript-6.0.2.tgz",
-      "integrity": "sha512-Saa1xPByTTq2gdeFZYLLo+RFE35NHZkAbqZeWNd3BpzppeVisAqpDjcp8dyf6uIvEqJRd46jemmyA4iFIeVk8g==",
+      "version": "7.0.5",
+      "resolved": "https://registry.npmjs.org/serialize-javascript/-/serialize-javascript-7.0.5.tgz",
+      "integrity": "sha512-F4LcB0UqUl1zErq+1nYEEzSHJnIwb3AF2XWB94b+afhrekOUijwooAYqFyRbjYkm2PAKBabx6oYv/xDxNi8IBw==",
       "license": "BSD-3-Clause",
-      "dependencies": {
-        "randombytes": "^2.1.0"
+      "engines": {
+        "node": ">=20.0.0"
       }
     },
     "node_modules/serve-handler": {
@@ -19405,12 +19383,16 @@
       }
     },
     "node_modules/uuid": {
-      "version": "8.3.2",
-      "resolved": "https://registry.npmjs.org/uuid/-/uuid-8.3.2.tgz",
-      "integrity": "sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg==",
+      "version": "14.0.0",
+      "resolved": "https://registry.npmjs.org/uuid/-/uuid-14.0.0.tgz",
+      "integrity": "sha512-Qo+uWgilfSmAhXCMav1uYFynlQO7fMFiMVZsQqZRMIXp0O7rR7qjkj+cPvBHLgBqi960QCoo/PH2/6ZtVqKvrg==",
+      "funding": [
+        "https://github.com/sponsors/broofa",
+        "https://github.com/sponsors/ctavan"
+      ],
       "license": "MIT",
       "bin": {
-        "uuid": "dist/bin/uuid"
+        "uuid": "dist-node/bin/uuid"
       }
     },
     "node_modules/value-equal": {
diff --git a/website/package.json b/website/package.json
index fc21cd60a75..92227c5d0c8 100644
--- a/website/package.json
+++ b/website/package.json
@@ -34,6 +34,10 @@
     "@docusaurus/types": "3.9.2",
     "typescript": "~5.6.2"
   },
+  "overrides": {
+    "serialize-javascript": "^7.0.5",
+    "uuid": "^14.0.0"
+  },
   "browserslist": {
     "production": [
       ">0.5%",
diff --git a/website/scripts/extract-skills.py b/website/scripts/extract-skills.py
index dd648589db8..c0aa362f0a8 100644
--- a/website/scripts/extract-skills.py
+++ b/website/scripts/extract-skills.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-"""Extract skill metadata into website/src/data/skills.json for the Skills Hub page.
+"""Extract skill metadata into website/static/api/skills.json for the Skills Hub page.
 
 Two data sources:
 
@@ -32,8 +32,12 @@ LOCAL_SKILL_DIRS = [
 ]
 UNIFIED_INDEX_PATH = os.path.join(REPO_ROOT, "website", "static", "api", "skills-index.json")
 LEGACY_INDEX_CACHE_DIR = os.path.join(REPO_ROOT, "skills", "index-cache")
-OUTPUT = os.path.join(REPO_ROOT, "website", "src", "data", "skills.json")
-META_OUTPUT = os.path.join(REPO_ROOT, "website", "src", "data", "skills-meta.json")
+# Output to static/api/ so the file is CDN-served at /api/skills.json
+# rather than bundled into the page's JS chunk. At 50k+ skills the
+# bundled payload was ~26 MB; lazy-fetch keeps the initial page load
+# fast and shrinks the JS chunk back to a few hundred KB.
+OUTPUT = os.path.join(REPO_ROOT, "website", "static", "api", "skills.json")
+META_OUTPUT = os.path.join(REPO_ROOT, "website", "static", "api", "skills-meta.json")
 
 CATEGORY_LABELS = {
     "apple": "Apple",
@@ -91,6 +95,7 @@ GITHUB_TAP_LABELS = {
     "openai/skills": "OpenAI",
     "anthropics/skills": "Anthropic",
     "huggingface/skills": "HuggingFace",
+    "NVIDIA/skills": "NVIDIA",
     "VoltAgent/awesome-agent-skills": "VoltAgent",
     "garrytan/gstack": "gstack",
     "MiniMax-AI/cli": "MiniMax",
@@ -338,6 +343,15 @@ def extract_unified_index_skills():
         category = _guess_category(tags)
         extra = entry.get("extra", {}) or {}
 
+        # A skills.sh.json grouping sidecar (if the tap ships one) gives us a
+        # real, human-readable category — prefer it over the tag heuristic.
+        # extra["category"] holds the grouping title, e.g. "Inference AI".
+        sidecar_category = extra.get("category") if isinstance(extra, dict) else None
+        category_label_override = ""
+        if isinstance(sidecar_category, str) and sidecar_category.strip():
+            category_label_override = sidecar_category.strip()
+            category = category_label_override.lower().replace(" ", "-")
+
         # Author hint from extras when available (skills.sh has installs;
         # clawhub doesn't expose author).
         author = ""
@@ -353,7 +367,8 @@ def extract_unified_index_skills():
             "description": description,
             "overview": "",
             "category": category,
-            "categoryLabel": "",  # filled in _consolidate_small_categories
+            "categoryLabel": category_label_override,  # set from sidecar, else filled in _consolidate_small_categories
+            "fixedCategory": bool(category_label_override),  # sidecar categories are exempt from small-cat collapse
             "source": source_label,
             "tags": tags,
             "platforms": [],
@@ -486,10 +501,17 @@ def _consolidate_small_categories(skills: list) -> list:
             s["category"] = "other"
             s["categoryLabel"] = "Other"
 
-    counts = Counter(s["category"] for s in skills)
+    # Skills with a sidecar-declared category (skills.sh.json grouping) keep
+    # their category even if it's the only skill in it — the tap explicitly
+    # chose that label, so it's not a heuristic guess to collapse away.
+    counts = Counter(
+        s["category"] for s in skills if not s.get("fixedCategory")
+    )
     small_cats = {cat for cat, n in counts.items() if n < MIN_CATEGORY_SIZE}
 
     for s in skills:
+        if s.get("fixedCategory"):
+            continue
         if s["category"] in small_cats:
             s["category"] = "other"
             s["categoryLabel"] = "Other"
@@ -531,7 +553,9 @@ def main():
 
     os.makedirs(os.path.dirname(OUTPUT), exist_ok=True)
     with open(OUTPUT, "w", encoding="utf-8") as f:
-        json.dump(all_skills, f, indent=2)
+        # Minified — file is served over the wire, not read by humans.
+        # At 50k+ skills the indented version was ~30% larger.
+        json.dump(all_skills, f, separators=(",", ":"), ensure_ascii=False)
 
     # Sidecar meta file so the page can render a "Last refreshed" badge
     # without changing the shape of skills.json.
@@ -547,7 +571,7 @@ def main():
     if index_meta:
         meta.update(index_meta)
     with open(META_OUTPUT, "w", encoding="utf-8") as f:
-        json.dump(meta, f, indent=2)
+        json.dump(meta, f, separators=(",", ":"), ensure_ascii=False)
 
     print(f"Extracted {len(all_skills)} skills to {OUTPUT}")
     print(f"  {len(local)} local ({sum(1 for s in local if s['source'] == 'built-in')} built-in, "
diff --git a/website/scripts/generate-skill-docs.py b/website/scripts/generate-skill-docs.py
index c932f01e1bc..2d2b19b1997 100755
--- a/website/scripts/generate-skill-docs.py
+++ b/website/scripts/generate-skill-docs.py
@@ -14,10 +14,8 @@ Sidebar is updated to nest all per-skill pages under Skills → Bundled / Option
 
 from __future__ import annotations
 import re
-import sys
 from collections import defaultdict
 from pathlib import Path
-from textwrap import dedent
 from typing import Any
 
 import yaml
diff --git a/website/scripts/prebuild.mjs b/website/scripts/prebuild.mjs
index 32e050bd933..11f5e07521e 100644
--- a/website/scripts/prebuild.mjs
+++ b/website/scripts/prebuild.mjs
@@ -1,7 +1,8 @@
 #!/usr/bin/env node
 // Runs website/scripts/extract-skills.py and generate-llms-txt.py before
 // docusaurus build/start so that:
-//   - website/src/data/skills.json (imported by src/pages/skills/index.tsx)
+//   - website/static/api/skills.json (lazy-fetched by src/pages/skills/index.tsx)
+//   - website/static/api/skills-meta.json (sidecar metadata for the Skills Hub)
 //   - website/static/llms.txt (agent-friendly short docs index)
 //   - website/static/llms-full.txt (full docs concat for LLM context)
 // all exist without contributors remembering to run Python scripts manually.
@@ -30,7 +31,7 @@ const scriptDir = dirname(fileURLToPath(import.meta.url));
 const websiteDir = resolve(scriptDir, "..");
 const extractScript = join(scriptDir, "extract-skills.py");
 const llmsScript = join(scriptDir, "generate-llms-txt.py");
-const outputFile = join(websiteDir, "src", "data", "skills.json");
+const outputFile = join(websiteDir, "static", "api", "skills.json");
 const unifiedIndexFile = join(websiteDir, "static", "api", "skills-index.json");
 const UNIFIED_INDEX_URL =
   "https://hermes-agent.nousresearch.com/docs/api/skills-index.json";
diff --git a/website/sidebars.ts b/website/sidebars.ts
index a994e4e7fee..713f9faf70a 100644
--- a/website/sidebars.ts
+++ b/website/sidebars.ts
@@ -39,6 +39,7 @@ const sidebars: SidebarsConfig = {
         'user-guide/sessions',
         'user-guide/profiles',
         'user-guide/profile-distributions',
+        'user-guide/multi-profile-gateways',
         'user-guide/git-worktrees',
         'user-guide/docker',
         'user-guide/security',
@@ -62,6 +63,7 @@ const sidebars: SidebarsConfig = {
             'user-guide/features/curator',
             'user-guide/features/memory',
             'user-guide/features/memory-providers',
+            'user-guide/features/honcho',
             'user-guide/features/context-files',
             'user-guide/features/context-references',
             'user-guide/features/personality',
@@ -97,6 +99,7 @@ const sidebars: SidebarsConfig = {
             'user-guide/features/computer-use',
             'user-guide/features/vision',
             'user-guide/features/image-generation',
+            'user-guide/features/spotify',
             'user-guide/features/tts',
             'user-guide/features/deliverable-mode',
           ],
@@ -107,16 +110,10 @@ const sidebars: SidebarsConfig = {
           items: [
             'user-guide/features/web-dashboard',
             'user-guide/features/extending-the-dashboard',
+            'user-guide/features/api-server',
             'user-guide/features/subscription-proxy',
           ],
         },
-        {
-          type: 'category',
-          label: 'Advanced',
-          items: [
-            'user-guide/features/spotify',
-          ],
-        },
         {
           type: 'category',
           label: 'Skills',
@@ -151,6 +148,7 @@ const sidebars: SidebarsConfig = {
                     'user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-claude-code',
                     'user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-codex',
                     'user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-hermes-agent',
+                    'user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-kanban-codex-lane',
                     'user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-opencode',
                   ],
                 },
@@ -357,6 +355,7 @@ const sidebars: SidebarsConfig = {
                   items: [
                     'user-guide/skills/bundled/software-development/software-development-debugging-hermes-tui-commands',
                     'user-guide/skills/bundled/software-development/software-development-hermes-agent-skill-authoring',
+                    'user-guide/skills/bundled/software-development/software-development-hermes-s6-container-supervision',
                     'user-guide/skills/bundled/software-development/software-development-node-inspect-debugger',
                     'user-guide/skills/bundled/software-development/software-development-plan',
                     'user-guide/skills/bundled/software-development/software-development-python-debugpy',
@@ -390,7 +389,9 @@ const sidebars: SidebarsConfig = {
                   key: 'skills-optional-autonomous-ai-agents',
                   collapsed: true,
                   items: [
+                    'user-guide/skills/optional/autonomous-ai-agents/autonomous-ai-agents-antigravity-cli',
                     'user-guide/skills/optional/autonomous-ai-agents/autonomous-ai-agents-blackbox',
+                    'user-guide/skills/optional/autonomous-ai-agents/autonomous-ai-agents-grok',
                     'user-guide/skills/optional/autonomous-ai-agents/autonomous-ai-agents-honcho',
                     'user-guide/skills/optional/autonomous-ai-agents/autonomous-ai-agents-openhands',
                   ],
@@ -582,6 +583,7 @@ const sidebars: SidebarsConfig = {
                     'user-guide/skills/optional/security/security-1password',
                     'user-guide/skills/optional/security/security-oss-forensics',
                     'user-guide/skills/optional/security/security-sherlock',
+                    'user-guide/skills/optional/security/security-web-pentest',
                   ],
                 },
                 {
@@ -615,32 +617,57 @@ const sidebars: SidebarsConfig = {
       collapsed: true,
       items: [
         'user-guide/messaging/index',
-        'user-guide/messaging/telegram',
-        'user-guide/messaging/discord',
-        'user-guide/messaging/slack',
-        'user-guide/messaging/whatsapp',
-        'user-guide/messaging/signal',
-        'user-guide/messaging/email',
-        'user-guide/messaging/sms',
-        'user-guide/messaging/homeassistant',
-        'user-guide/messaging/mattermost',
-        'user-guide/messaging/matrix',
-        'user-guide/messaging/dingtalk',
-        'user-guide/messaging/feishu',
-        'user-guide/messaging/wecom',
-        'user-guide/messaging/wecom-callback',
-        'user-guide/messaging/weixin',
-        'user-guide/messaging/bluebubbles',
-        'user-guide/messaging/qqbot',
-        'user-guide/messaging/yuanbao',
-        'user-guide/messaging/teams',
-        'user-guide/messaging/teams-meetings',
-        'user-guide/messaging/msgraph-webhook',
-        'user-guide/messaging/line',
-        'user-guide/messaging/simplex',
-        'user-guide/messaging/ntfy',
-        'user-guide/messaging/open-webui',
-        'user-guide/messaging/webhooks',
+        {
+          type: 'category',
+          label: 'Popular',
+          items: [
+            'user-guide/messaging/telegram',
+            'user-guide/messaging/discord',
+            'user-guide/messaging/slack',
+            'user-guide/messaging/whatsapp',
+            'user-guide/messaging/signal',
+            'user-guide/messaging/email',
+            'user-guide/messaging/sms',
+          ],
+        },
+        {
+          type: 'category',
+          label: 'Microsoft 365',
+          items: [
+            'user-guide/messaging/teams',
+            'user-guide/messaging/teams-meetings',
+            'user-guide/messaging/msgraph-webhook',
+          ],
+        },
+        {
+          type: 'category',
+          label: 'Chinese platforms',
+          items: [
+            'user-guide/messaging/dingtalk',
+            'user-guide/messaging/feishu',
+            'user-guide/messaging/wecom',
+            'user-guide/messaging/wecom-callback',
+            'user-guide/messaging/weixin',
+            'user-guide/messaging/qqbot',
+            'user-guide/messaging/yuanbao',
+          ],
+        },
+        {
+          type: 'category',
+          label: 'Other',
+          items: [
+            'user-guide/messaging/homeassistant',
+            'user-guide/messaging/mattermost',
+            'user-guide/messaging/matrix',
+            'user-guide/messaging/bluebubbles',
+            'user-guide/messaging/google_chat',
+            'user-guide/messaging/line',
+            'user-guide/messaging/simplex',
+            'user-guide/messaging/ntfy',
+            'user-guide/messaging/open-webui',
+            'user-guide/messaging/webhooks',
+          ],
+        },
       ],
     },
     {
@@ -653,8 +680,6 @@ const sidebars: SidebarsConfig = {
         'integrations/providers',
         'user-guide/features/mcp',
         'user-guide/features/acp',
-        'user-guide/features/api-server',
-        'user-guide/features/honcho',
         'user-guide/features/provider-routing',
         'user-guide/features/fallback-providers',
         'user-guide/features/credential-pools',
@@ -724,6 +749,7 @@ const sidebars: SidebarsConfig = {
             'developer-guide/model-provider-plugin',
             'developer-guide/image-gen-provider-plugin',
             'developer-guide/video-gen-provider-plugin',
+            'developer-guide/web-search-provider-plugin',
             'developer-guide/plugin-llm-access',
             'developer-guide/creating-skills',
             'developer-guide/extending-the-cli',
@@ -734,6 +760,7 @@ const sidebars: SidebarsConfig = {
           label: 'Internals',
           items: [
             'developer-guide/tools-runtime',
+            'developer-guide/browser-supervisor',
             'developer-guide/acp-internals',
             'developer-guide/cron-internals',
             'developer-guide/trajectory-format',
@@ -745,16 +772,34 @@ const sidebars: SidebarsConfig = {
       type: 'category',
       label: 'Reference',
       items: [
-        'reference/cli-commands',
-        'reference/slash-commands',
-        'reference/profile-commands',
-        'reference/environment-variables',
-        'reference/tools-reference',
-        'reference/toolsets-reference',
-        'reference/mcp-config-reference',
-        'reference/model-catalog',
-        'reference/skills-catalog',
-        'reference/optional-skills-catalog',
+        {
+          type: 'category',
+          label: 'Command Reference',
+          items: [
+            'reference/cli-commands',
+            'reference/slash-commands',
+            'reference/profile-commands',
+          ],
+        },
+        {
+          type: 'category',
+          label: 'Configuration Reference',
+          items: [
+            'reference/environment-variables',
+            'reference/mcp-config-reference',
+            'reference/model-catalog',
+          ],
+        },
+        {
+          type: 'category',
+          label: 'Tools & Skills Reference',
+          items: [
+            'reference/tools-reference',
+            'reference/toolsets-reference',
+            'reference/skills-catalog',
+            'reference/optional-skills-catalog',
+          ],
+        },
         'reference/faq',
       ],
     },
diff --git a/website/src/pages/skills/index.tsx b/website/src/pages/skills/index.tsx
index 0ef6f64abc2..9595af67e4f 100644
--- a/website/src/pages/skills/index.tsx
+++ b/website/src/pages/skills/index.tsx
@@ -1,7 +1,5 @@
 import React, { useState, useMemo, useCallback, useRef, useEffect } from "react";
 import Layout from "@theme/Layout";
-import skills from "../../data/skills.json";
-import meta from "../../data/skills-meta.json";
 import styles from "./styles.module.css";
 
 interface Skill {
@@ -21,9 +19,14 @@ interface Skill {
   docsPath?: string;
   identifier?: string;
   installCmd?: string;
+  /** Lowercase pre-joined haystack used by the search filter.
+   *  Built once at load time so per-keystroke filtering is a single
+   *  `.includes()` per skill instead of array-join + toLowerCase on
+   *  every render. Skipped on the wire — added in the loader. */
+  _search?: string;
 }
 
-const allSkills: Skill[] = skills as Skill[];
+const allSkills: Skill[] = [];
 
 interface IndexMeta {
   extractedAt?: string;
@@ -32,7 +35,7 @@ interface IndexMeta {
   externalSource?: string;
   bySource?: Record<string, number>;
 }
-const indexMeta: IndexMeta = meta as IndexMeta;
+const indexMeta: IndexMeta = {};
 
 function formatRelativeTime(iso?: string): string | null {
   if (!iso) return null;
@@ -160,6 +163,13 @@ const SOURCE_CONFIG: Record<
     border: "rgba(251, 191, 36, 0.2)",
     icon: "\u{1F917}",
   },
+  NVIDIA: {
+    label: "NVIDIA",
+    color: "#76b900",
+    bg: "rgba(118, 185, 0, 0.08)",
+    border: "rgba(118, 185, 0, 0.25)",
+    icon: "\u{25B6}",
+  },
   VoltAgent: {
     label: "VoltAgent",
     color: "#facc15",
@@ -204,6 +214,7 @@ const SOURCE_ORDER = [
   "Anthropic",
   "OpenAI",
   "HuggingFace",
+  "NVIDIA",
   "skills.sh",
   "ClawHub",
   "browse.sh",
@@ -398,8 +409,43 @@ function StatCard({ value, label, color }: { value: number; label: string; color
 
 const PAGE_SIZE = 60;
 
+// Routes Docusaurus serves the static API JSON from. `baseUrl` is `/docs/`,
+// `static/api/` ends up at `/docs/api/`. Hardcoding here is fine because the
+// same `baseUrl` is enforced repo-wide; if it ever changes, this is the only
+// place that needs to follow.
+const SKILLS_URL = "/docs/api/skills.json";
+const META_URL = "/docs/api/skills-meta.json";
+
+function buildSearchHaystack(s: Skill): string {
+  // Pre-compute the lowercase blob the search filter scans. Done once at
+  // load time instead of per-keystroke per-skill. With 50k+ skills the
+  // per-keystroke variant was unusably slow.
+  return [
+    s.name,
+    s.description,
+    s.overview,
+    s.categoryLabel,
+    s.author,
+    ...(s.tags || []),
+  ]
+    .filter(Boolean)
+    .join(" ")
+    .toLowerCase();
+}
+
 export default function SkillsDashboard() {
+  // Lazy-loaded data. Was bundled into the JS chunk (~22 MB at 50k skills,
+  // which made the initial page load unusable on mobile). Now fetched on
+  // mount from the same CDN that serves the docs.
+  const [data, setData] = useState<{ skills: Skill[]; meta: IndexMeta } | null>(null);
+  const [loadError, setLoadError] = useState<string | null>(null);
+
   const [search, setSearch] = useState("");
+  // Debounced copy of `search` — used by the filter. Without the debounce,
+  // typing into the search box ran .filter() over the whole catalog on
+  // every keystroke, which on a 50k-item list felt like the page had
+  // hung. 150ms gives a snappy feel without lagging behind the user.
+  const [debouncedSearch, setDebouncedSearch] = useState("");
   const [sourceFilter, setSourceFilter] = useState("all");
   const [categoryFilter, setCategoryFilter] = useState("all");
   const [expandedCard, setExpandedCard] = useState<string | null>(null);
@@ -408,6 +454,42 @@ export default function SkillsDashboard() {
   const searchRef = useRef<HTMLInputElement>(null);
   const gridRef = useRef<HTMLDivElement>(null);
 
+  useEffect(() => {
+    let cancelled = false;
+    (async () => {
+      try {
+        const [sk, mt] = await Promise.all([
+          fetch(SKILLS_URL).then((r) => {
+            if (!r.ok) throw new Error(`skills.json HTTP ${r.status}`);
+            return r.json();
+          }),
+          fetch(META_URL).then((r) => (r.ok ? r.json() : {})).catch(() => ({})),
+        ]);
+        if (cancelled) return;
+        const skillsArr = Array.isArray(sk) ? (sk as Skill[]) : [];
+        // Stamp the precomputed search haystack onto each row.
+        for (const s of skillsArr) s._search = buildSearchHaystack(s);
+        setData({ skills: skillsArr, meta: mt || {} });
+      } catch (err) {
+        if (cancelled) return;
+        setLoadError(err instanceof Error ? err.message : String(err));
+      }
+    })();
+    return () => {
+      cancelled = true;
+    };
+  }, []);
+
+  // Debounce the search input — 150ms feels instant while preventing the
+  // filter from running on every individual keystroke.
+  useEffect(() => {
+    const t = setTimeout(() => setDebouncedSearch(search), 150);
+    return () => clearTimeout(t);
+  }, [search]);
+
+  const allSkillsLocal: Skill[] = data?.skills ?? [];
+  const indexMetaLocal: IndexMeta = data?.meta ?? indexMeta;
+
   useEffect(() => {
     const handler = (e: KeyboardEvent) => {
       if (e.key === "/" && document.activeElement?.tagName !== "INPUT") {
@@ -424,15 +506,15 @@ export default function SkillsDashboard() {
   }, []);
 
   const sources = useMemo(() => {
-    const set = new Set(allSkills.map((s) => s.source));
+    const set = new Set(allSkillsLocal.map((s) => s.source));
     return SOURCE_ORDER.filter((s) => s === "all" || set.has(s));
-  }, []);
+  }, [allSkillsLocal]);
 
   const categoryEntries = useMemo(() => {
     const pool =
       sourceFilter === "all"
-        ? allSkills
-        : allSkills.filter((s) => s.source === sourceFilter);
+        ? allSkillsLocal
+        : allSkillsLocal.filter((s) => s.source === sourceFilter);
     const map = new Map<string, { label: string; count: number }>();
     for (const s of pool) {
       const key = s.category || "uncategorized";
@@ -449,27 +531,25 @@ export default function SkillsDashboard() {
     return Array.from(map.entries())
       .sort((a, b) => b[1].count - a[1].count)
       .map(([key, { label, count }]) => ({ key, label, count }));
-  }, [sourceFilter]);
+  }, [sourceFilter, allSkillsLocal]);
 
   const filtered = useMemo(() => {
-    const q = search.toLowerCase().trim();
-    return allSkills.filter((s) => {
+    const q = debouncedSearch.toLowerCase().trim();
+    return allSkillsLocal.filter((s) => {
       if (sourceFilter !== "all" && s.source !== sourceFilter) return false;
       if (categoryFilter !== "all" && s.category !== categoryFilter) return false;
       if (q) {
-        const haystack = [s.name, s.description, s.overview, s.categoryLabel, s.author, ...(s.tags || [])]
-          .join(" ")
-          .toLowerCase();
-        return haystack.includes(q);
+        // _search is pre-built in the load effect — single .includes() per row.
+        return (s._search || "").includes(q);
       }
       return true;
     });
-  }, [search, sourceFilter, categoryFilter]);
+  }, [debouncedSearch, sourceFilter, categoryFilter, allSkillsLocal]);
 
   useEffect(() => {
     setVisibleCount(PAGE_SIZE);
     setExpandedCard(null);
-  }, [search, sourceFilter, categoryFilter]);
+  }, [debouncedSearch, sourceFilter, categoryFilter]);
 
   const visible = filtered.slice(0, visibleCount);
   const hasMore = visibleCount < filtered.length;
@@ -512,15 +592,22 @@ export default function SkillsDashboard() {
             <h1 className={styles.heroTitle}>Skills Hub</h1>
             <p className={styles.heroSub}>
               Discover, search, and install from{" "}
-              <strong className={styles.heroAccent}>{allSkills.length}</strong> skills
-              across {sources.length - 1} registries
+              <strong className={styles.heroAccent}>
+                {data ? allSkillsLocal.length.toLocaleString() : "…"}
+              </strong>{" "}
+              skills across {sources.length - 1} registries
+              {loadError && (
+                <span style={{ color: "#f87171", marginLeft: 8 }}>
+                  · failed to load catalog ({loadError})
+                </span>
+              )}
             </p>
-            {(indexMeta?.indexGeneratedAt || indexMeta?.extractedAt) && (
+            {(indexMetaLocal?.indexGeneratedAt || indexMetaLocal?.extractedAt) && (
               <p className={styles.heroSub} style={{ fontSize: "0.85rem", opacity: 0.75 }}>
                 Catalog refreshed{" "}
-                <span title={indexMeta.indexGeneratedAt || indexMeta.extractedAt}>
+                <span title={indexMetaLocal.indexGeneratedAt || indexMetaLocal.extractedAt}>
                   {formatRelativeTime(
-                    indexMeta.indexGeneratedAt || indexMeta.extractedAt,
+                    indexMetaLocal.indexGeneratedAt || indexMetaLocal.extractedAt,
                   ) || "recently"}
                 </span>
                 {" "}· auto-rebuilt twice daily
@@ -529,18 +616,18 @@ export default function SkillsDashboard() {
 
             <div className={styles.statsRow}>
               <StatCard
-                value={allSkills.filter((s) => s.source === "built-in").length}
+                value={allSkillsLocal.filter((s) => s.source === "built-in").length}
                 label="Built-in"
                 color="#4ade80"
               />
               <StatCard
-                value={allSkills.filter((s) => s.source === "optional").length}
+                value={allSkillsLocal.filter((s) => s.source === "optional").length}
                 label="Optional"
                 color="#fbbf24"
               />
               <StatCard
                 value={
-                  allSkills.filter(
+                  allSkillsLocal.filter(
                     (s) => s.source !== "built-in" && s.source !== "optional"
                   ).length
                 }
@@ -548,7 +635,7 @@ export default function SkillsDashboard() {
                 color="#60a5fa"
               />
               <StatCard
-                value={new Set(allSkills.map((s) => s.category)).size}
+                value={new Set(allSkillsLocal.map((s) => s.category)).size}
                 label="Categories"
                 color="#a78bfa"
               />
@@ -592,8 +679,8 @@ export default function SkillsDashboard() {
               const conf = SOURCE_CONFIG[src];
               const count =
                 src === "all"
-                  ? allSkills.length
-                  : allSkills.filter((s) => s.source === src).length;
+                  ? allSkillsLocal.length
+                  : allSkillsLocal.filter((s) => s.source === src).length;
               return (
                 <button
                   key={src}
diff --git a/website/static/api/model-catalog.json b/website/static/api/model-catalog.json
index 1a084917aab..13a147dfa74 100644
--- a/website/static/api/model-catalog.json
+++ b/website/static/api/model-catalog.json
@@ -1,6 +1,6 @@
 {
   "version": 1,
-  "updated_at": "2026-05-26T20:49:36Z",
+  "updated_at": "2026-05-29T11:20:16Z",
   "metadata": {
     "source": "hermes-agent repo",
     "docs": "https://hermes-agent.nousresearch.com/docs/reference/model-catalog"
@@ -12,6 +12,14 @@
         "note": "Descriptions drive picker badges. Live /api/v1/models filters curated ids by tool-calling support and free pricing."
       },
       "models": [
+        {
+          "id": "anthropic/claude-opus-4.8",
+          "description": ""
+        },
+        {
+          "id": "anthropic/claude-opus-4.8-fast",
+          "description": "2x price, higher output speed"
+        },
         {
           "id": "anthropic/claude-opus-4.7",
           "description": ""
@@ -73,7 +81,7 @@
           "description": ""
         },
         {
-          "id": "google/gemini-3-flash-preview",
+          "id": "google/gemini-3.5-flash",
           "description": ""
         },
         {
@@ -89,7 +97,7 @@
           "description": ""
         },
         {
-          "id": "stepfun/step-3.5-flash",
+          "id": "stepfun/step-3.7-flash",
           "description": ""
         },
         {
@@ -144,6 +152,9 @@
         "note": "Free-tier gating is determined live via Portal pricing (partition_nous_models_by_tier), not this manifest."
       },
       "models": [
+        {
+          "id": "anthropic/claude-opus-4.8"
+        },
         {
           "id": "anthropic/claude-opus-4.7"
         },
@@ -187,7 +198,7 @@
           "id": "google/gemini-3-pro-preview"
         },
         {
-          "id": "google/gemini-3-flash-preview"
+          "id": "google/gemini-3.5-flash"
         },
         {
           "id": "google/gemini-3.1-pro-preview"
@@ -199,7 +210,7 @@
           "id": "qwen/qwen3.6-35b-a3b"
         },
         {
-          "id": "stepfun/step-3.5-flash"
+          "id": "stepfun/step-3.7-flash"
         },
         {
           "id": "minimax/minimax-m2.7"