diff --git a/.github/actions/nix-setup/action.yml b/.github/actions/nix-setup/action.yml
new file mode 100644
index 0000000000..0fcd7784bc
--- /dev/null
+++ b/.github/actions/nix-setup/action.yml
@@ -0,0 +1,8 @@
+name: 'Setup Nix'
+description: 'Install Nix with DeterminateSystems and enable magic-nix-cache'
+
+runs:
+  using: composite
+  steps:
+    - uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22
+    - uses: DeterminateSystems/magic-nix-cache-action@565684385bcd71bad329742eefe8d12f2e765b39 # v13
diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml
index f9e846e68c..228ee33964 100644
--- a/.github/workflows/docker-publish.yml
+++ b/.github/workflows/docker-publish.yml
@@ -3,8 +3,13 @@ name: Docker Build and Publish
 on:
   push:
     branches: [main]
-  pull_request:
-    branches: [main]
+    paths:
+      - '**/*.py'
+      - 'pyproject.toml'
+      - 'uv.lock'
+      - 'Dockerfile'
+      - 'docker/**'
+      - '.github/workflows/docker-publish.yml'
   release:
     types: [published]
 
@@ -49,6 +54,14 @@ jobs:
 
       - name: Test image starts
         run: |
+          # The image runs as the hermes user (UID 10000).  GitHub Actions
+          # creates /tmp/hermes-test root-owned by default, which hermes
+          # can't write to — chown it to match the in-container UID before
+          # bind-mounting.  Real users doing `docker run -v ~/.hermes:...`
+          # with their own UID hit the same issue and have their own
+          # remediations (HERMES_UID env var, or chown locally).
+          mkdir -p /tmp/hermes-test
+          sudo chown -R 10000:10000 /tmp/hermes-test
           docker run --rm \
             -v /tmp/hermes-test:/opt/data \
             --entrypoint /opt/hermes/docker/entrypoint.sh \
diff --git a/.github/workflows/nix-lockfile-check.yml b/.github/workflows/nix-lockfile-check.yml
new file mode 100644
index 0000000000..9c9bc734a6
--- /dev/null
+++ b/.github/workflows/nix-lockfile-check.yml
@@ -0,0 +1,68 @@
+name: Nix Lockfile Check
+
+on:
+  pull_request:
+  workflow_dispatch:
+
+permissions:
+  contents: read
+  pull-requests: write
+
+concurrency:
+  group: nix-lockfile-check-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  check:
+    runs-on: ubuntu-latest
+    timeout-minutes: 20
+    steps:
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
+
+      - uses: ./.github/actions/nix-setup
+
+      - name: Resolve head SHA
+        id: sha
+        shell: bash
+        run: |
+          FULL="${{ github.event.pull_request.head.sha || github.sha }}"
+          echo "full=$FULL" >> "$GITHUB_OUTPUT"
+          echo "short=${FULL:0:7}" >> "$GITHUB_OUTPUT"
+
+      - name: Check lockfile hashes
+        id: check
+        continue-on-error: true
+        env:
+          LINK_SHA: ${{ steps.sha.outputs.full }}
+        run: nix run .#fix-lockfiles -- --check
+
+      - name: Post sticky PR comment (stale)
+        if: steps.check.outputs.stale == 'true' && github.event_name == 'pull_request'
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          message: |
+            ### ⚠️ npm lockfile hash out of date
+
+            Checked against commit [`${{ steps.sha.outputs.short }}`](${{ github.server_url }}/${{ github.repository }}/commit/${{ steps.sha.outputs.full }}) (PR head at check time).
+
+            The `hash = "sha256-..."` line in these nix files no longer matches the committed `package-lock.json`:
+
+            ${{ steps.check.outputs.report }}
+
+            #### Apply the fix
+
+            - [ ] **Apply lockfile fix** — tick to push a commit with the correct hashes to this PR branch
+            - Or [run the Nix Lockfile Fix workflow](${{ github.server_url }}/${{ github.repository }}/actions/workflows/nix-lockfile-fix.yml) manually (pass PR `#${{ github.event.pull_request.number }}`)
+            - Or locally: `nix run .#fix-lockfiles -- --apply` and commit the diff
+
+      - name: Clear sticky PR comment (resolved)
+        if: steps.check.outputs.stale == 'false' && github.event_name == 'pull_request'
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          delete: true
+
+      - name: Fail if stale
+        if: steps.check.outputs.stale == 'true'
+        run: exit 1
diff --git a/.github/workflows/nix-lockfile-fix.yml b/.github/workflows/nix-lockfile-fix.yml
new file mode 100644
index 0000000000..a1c7dd6e5c
--- /dev/null
+++ b/.github/workflows/nix-lockfile-fix.yml
@@ -0,0 +1,149 @@
+name: Nix Lockfile Fix
+
+on:
+  workflow_dispatch:
+    inputs:
+      pr_number:
+        description: 'PR number to fix (leave empty to run on the selected branch)'
+        required: false
+        type: string
+  issue_comment:
+    types: [edited]
+
+permissions:
+  contents: write
+  pull-requests: write
+
+concurrency:
+  group: nix-lockfile-fix-${{ github.event.issue.number || github.event.inputs.pr_number || github.ref }}
+  cancel-in-progress: false
+
+jobs:
+  fix:
+    # Run on manual dispatch OR when a task-list checkbox in the sticky
+    # lockfile-check comment flips from `[ ]` to `[x]`.
+    if: |
+      github.event_name == 'workflow_dispatch' ||
+      (github.event_name == 'issue_comment'
+       && github.event.issue.pull_request != null
+       && contains(github.event.comment.body, '[x] **Apply lockfile fix**')
+       && !contains(github.event.changes.body.from, '[x] **Apply lockfile fix**'))
+    runs-on: ubuntu-latest
+    timeout-minutes: 25
+    steps:
+      - name: Authorize & resolve PR
+        id: resolve
+        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea  # v7.0.1
+        with:
+          script: |
+            // 1. Verify the actor has write access — applies to both checkbox
+            //    clicks and manual dispatch.
+            const { data: perm } =
+              await github.rest.repos.getCollaboratorPermissionLevel({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                username: context.actor,
+              });
+            if (!['admin', 'write', 'maintain'].includes(perm.permission)) {
+              core.setFailed(
+                `${context.actor} lacks write access (has: ${perm.permission})`
+              );
+              return;
+            }
+
+            // 2. Resolve which ref to check out.
+            let prNumber = '';
+            if (context.eventName === 'issue_comment') {
+              prNumber = String(context.payload.issue.number);
+            } else if (context.eventName === 'workflow_dispatch') {
+              prNumber = context.payload.inputs.pr_number || '';
+            }
+
+            if (!prNumber) {
+              core.setOutput('ref', context.ref.replace(/^refs\/heads\//, ''));
+              core.setOutput('repo', context.repo.repo);
+              core.setOutput('owner', context.repo.owner);
+              core.setOutput('pr', '');
+              return;
+            }
+
+            const { data: pr } = await github.rest.pulls.get({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              pull_number: Number(prNumber),
+            });
+            core.setOutput('ref', pr.head.ref);
+            core.setOutput('repo', pr.head.repo.name);
+            core.setOutput('owner', pr.head.repo.owner.login);
+            core.setOutput('pr', String(pr.number));
+
+      # Wipe the sticky lockfile-check comment to a "running" state as soon
+      # as the job is authorized, so the user sees their click was picked up
+      # before the ~minute of nix build work.
+      - name: Mark sticky as running
+        if: steps.resolve.outputs.pr != ''
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          number: ${{ steps.resolve.outputs.pr }}
+          message: |
+            ### 🔄 Applying lockfile fix…
+
+            Triggered by @${{ github.actor }} — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
+
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
+        with:
+          repository: ${{ steps.resolve.outputs.owner }}/${{ steps.resolve.outputs.repo }}
+          ref: ${{ steps.resolve.outputs.ref }}
+          token: ${{ secrets.GITHUB_TOKEN }}
+          fetch-depth: 0
+
+      - uses: ./.github/actions/nix-setup
+
+      - name: Apply lockfile hashes
+        id: apply
+        run: nix run .#fix-lockfiles -- --apply
+
+      - name: Commit & push
+        if: steps.apply.outputs.changed == 'true'
+        shell: bash
+        run: |
+          set -euo pipefail
+          git config user.name 'github-actions[bot]'
+          git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
+          git add nix/tui.nix nix/web.nix
+          git commit -m "fix(nix): refresh npm lockfile hashes"
+          git push
+
+      - name: Update sticky (applied)
+        if: steps.apply.outputs.changed == 'true' && steps.resolve.outputs.pr != ''
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          number: ${{ steps.resolve.outputs.pr }}
+          message: |
+            ### ✅ Lockfile fix applied
+
+            Pushed a commit refreshing the npm lockfile hashes — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
+
+      - name: Update sticky (already current)
+        if: steps.apply.outputs.changed == 'false' && steps.resolve.outputs.pr != ''
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          number: ${{ steps.resolve.outputs.pr }}
+          message: |
+            ### ✅ Lockfile hashes already current
+
+            Nothing to commit — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
+
+      - name: Update sticky (failed)
+        if: failure() && steps.resolve.outputs.pr != ''
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          number: ${{ steps.resolve.outputs.pr }}
+          message: |
+            ### ❌ Lockfile fix failed
+
+            See the [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for logs.
diff --git a/.github/workflows/nix.yml b/.github/workflows/nix.yml
index 387c9e5d13..7cae6f8151 100644
--- a/.github/workflows/nix.yml
+++ b/.github/workflows/nix.yml
@@ -4,15 +4,6 @@ on:
   push:
     branches: [main]
   pull_request:
-    paths:
-      - 'flake.nix'
-      - 'flake.lock'
-      - 'nix/**'
-      - 'pyproject.toml'
-      - 'uv.lock'
-      - 'hermes_cli/**'
-      - 'run_agent.py'
-      - 'acp_adapter/**'
 
 permissions:
   contents: read
@@ -29,9 +20,8 @@ jobs:
     runs-on: ${{ matrix.os }}
     timeout-minutes: 30
     steps:
-      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
-      - uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25  # v22
-      - uses: DeterminateSystems/magic-nix-cache-action@565684385bcd71bad329742eefe8d12f2e765b39  # v13
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
+      - uses: ./.github/actions/nix-setup
       - name: Check flake
         if: runner.os == 'Linux'
         run: nix flake check --print-build-logs
diff --git a/.github/workflows/supply-chain-audit.yml b/.github/workflows/supply-chain-audit.yml
index 4aa0fd321a..417e7b21f8 100644
--- a/.github/workflows/supply-chain-audit.yml
+++ b/.github/workflows/supply-chain-audit.yml
@@ -3,14 +3,31 @@ name: Supply Chain Audit
 on:
   pull_request:
     types: [opened, synchronize, reopened]
+    paths:
+      - '**/*.py'
+      - '**/*.pth'
+      - '**/setup.py'
+      - '**/setup.cfg'
+      - '**/sitecustomize.py'
+      - '**/usercustomize.py'
+      - '**/__init__.pth'
 
 permissions:
   pull-requests: write
   contents: read
 
+# Narrow, high-signal scanner. Only fires on critical indicators of supply
+# chain attacks (e.g. the litellm-style payloads). Low-signal heuristics
+# (plain base64, plain exec/eval, dependency/Dockerfile/workflow edits,
+# Actions version unpinning, outbound POST/PUT) were intentionally
+# removed — they fired on nearly every PR and trained reviewers to ignore
+# the scanner. Keep this file's checks ruthlessly narrow: if you find
+# yourself adding WARNING-tier patterns here again, make a separate
+# advisory-only workflow instead.
+
 jobs:
   scan:
-    name: Scan PR for supply chain risks
+    name: Scan PR for critical supply chain risks
     runs-on: ubuntu-latest
     steps:
       - name: Checkout
@@ -18,7 +35,7 @@ jobs:
         with:
           fetch-depth: 0
 
-      - name: Scan diff for suspicious patterns
+      - name: Scan diff for critical patterns
         id: scan
         env:
           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -28,19 +45,19 @@ jobs:
           BASE="${{ github.event.pull_request.base.sha }}"
           HEAD="${{ github.event.pull_request.head.sha }}"
 
-          # Get the full diff (added lines only)
+          # Added lines only, excluding lockfiles.
           DIFF=$(git diff "$BASE".."$HEAD" -- . ':!uv.lock' ':!*.lock' ':!package-lock.json' ':!yarn.lock' || true)
 
           FINDINGS=""
-          CRITICAL=false
 
           # --- .pth files (auto-execute on Python startup) ---
+          # The exact mechanism used in the litellm supply chain attack:
+          # https://github.com/BerriAI/litellm/issues/24512
           PTH_FILES=$(git diff --name-only "$BASE".."$HEAD" | grep '\.pth$' || true)
           if [ -n "$PTH_FILES" ]; then
-            CRITICAL=true
             FINDINGS="${FINDINGS}
           ### 🚨 CRITICAL: .pth file added or modified
-          Python \`.pth\` files in \`site-packages/\` execute automatically when the interpreter starts — no import required. This is the exact mechanism used in the [litellm supply chain attack](https://github.com/BerriAI/litellm/issues/24512).
+          Python \`.pth\` files in \`site-packages/\` execute automatically when the interpreter starts — no import required.
 
           **Files:**
           \`\`\`
@@ -49,13 +66,12 @@ jobs:
           "
           fi
 
-          # --- base64 + exec/eval combo (the litellm attack pattern) ---
+          # --- base64 decode + exec/eval on the same line (the litellm attack pattern) ---
           B64_EXEC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'base64\.(b64decode|decodebytes|urlsafe_b64decode)' | grep -iE 'exec\(|eval\(' | head -10 || true)
           if [ -n "$B64_EXEC_HITS" ]; then
-            CRITICAL=true
             FINDINGS="${FINDINGS}
           ### 🚨 CRITICAL: base64 decode + exec/eval combo
-          This is the exact pattern used in the [litellm supply chain attack](https://github.com/BerriAI/litellm/issues/24512) — base64-decoded strings passed to exec/eval to hide credential-stealing payloads.
+          Base64-decoded strings passed directly to exec/eval — the signature of hidden credential-stealing payloads.
 
           **Matches:**
           \`\`\`
@@ -64,41 +80,12 @@ jobs:
           "
           fi
 
-          # --- base64 decode/encode (alone — legitimate uses exist) ---
-          B64_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'base64\.(b64decode|b64encode|decodebytes|encodebytes|urlsafe_b64decode)|atob\(|btoa\(|Buffer\.from\(.*base64' | head -20 || true)
-          if [ -n "$B64_HITS" ]; then
-            FINDINGS="${FINDINGS}
-          ### ⚠️ WARNING: base64 encoding/decoding detected
-          Base64 has legitimate uses (images, JWT, etc.) but is also commonly used to obfuscate malicious payloads. Verify the usage is appropriate.
-
-          **Matches (first 20):**
-          \`\`\`
-          ${B64_HITS}
-          \`\`\`
-          "
-          fi
-
-          # --- exec/eval with string arguments ---
-          EXEC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -E '(exec|eval)\s*\(' | grep -v '^\+\s*#' | grep -v 'test_\|mock\|assert\|# ' | head -20 || true)
-          if [ -n "$EXEC_HITS" ]; then
-            FINDINGS="${FINDINGS}
-          ### ⚠️ WARNING: exec() or eval() usage
-          Dynamic code execution can hide malicious behavior, especially when combined with base64 or network fetches.
-
-          **Matches (first 20):**
-          \`\`\`
-          ${EXEC_HITS}
-          \`\`\`
-          "
-          fi
-
-          # --- subprocess with encoded/obfuscated commands ---
-          PROC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -E 'subprocess\.(Popen|call|run)\s*\(' | grep -iE 'base64|decode|encode|\\x|chr\(' | head -10 || true)
+          # --- subprocess with encoded/obfuscated command argument ---
+          PROC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -E 'subprocess\.(Popen|call|run)\s*\(' | grep -iE 'base64|\\x[0-9a-f]{2}|chr\(' | head -10 || true)
           if [ -n "$PROC_HITS" ]; then
-            CRITICAL=true
             FINDINGS="${FINDINGS}
           ### 🚨 CRITICAL: subprocess with encoded/obfuscated command
-          Subprocess calls with encoded arguments are a strong indicator of payload execution.
+          Subprocess calls whose command strings are base64- or hex-encoded are a strong indicator of payload execution.
 
           **Matches:**
           \`\`\`
@@ -107,25 +94,12 @@ jobs:
           "
           fi
 
-          # --- Network calls to non-standard domains ---
-          EXFIL_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'requests\.(post|put)\(|httpx\.(post|put)\(|urllib\.request\.urlopen' | grep -v '^\+\s*#' | grep -v 'test_\|mock\|assert' | head -10 || true)
-          if [ -n "$EXFIL_HITS" ]; then
-            FINDINGS="${FINDINGS}
-          ### ⚠️ WARNING: Outbound network calls (POST/PUT)
-          Outbound POST/PUT requests in new code could be data exfiltration. Verify the destination URLs are legitimate.
-
-          **Matches (first 10):**
-          \`\`\`
-          ${EXFIL_HITS}
-          \`\`\`
-          "
-          fi
-
-          # --- setup.py / setup.cfg install hooks ---
-          SETUP_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -E '(setup\.py|setup\.cfg|__init__\.pth|sitecustomize\.py|usercustomize\.py)$' || true)
+          # --- Install-hook files (setup.py/sitecustomize/usercustomize/__init__.pth) ---
+          # These execute during pip install or interpreter startup.
+          SETUP_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -E '(^|/)(setup\.py|setup\.cfg|sitecustomize\.py|usercustomize\.py|__init__\.pth)$' || true)
           if [ -n "$SETUP_HITS" ]; then
             FINDINGS="${FINDINGS}
-          ### ⚠️ WARNING: Install hook files modified
+          ### 🚨 CRITICAL: Install-hook file added or modified
           These files can execute code during package installation or interpreter startup.
 
           **Files:**
@@ -135,114 +109,31 @@ jobs:
           "
           fi
 
-          # --- Compile/marshal/pickle (code object injection) ---
-          MARSHAL_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'marshal\.loads|pickle\.loads|compile\(' | grep -v '^\+\s*#' | grep -v 'test_\|re\.compile\|ast\.compile' | head -10 || true)
-          if [ -n "$MARSHAL_HITS" ]; then
-            FINDINGS="${FINDINGS}
-          ### ⚠️ WARNING: marshal/pickle/compile usage
-          These can deserialize or construct executable code objects.
-
-          **Matches:**
-          \`\`\`
-          ${MARSHAL_HITS}
-          \`\`\`
-          "
-          fi
-
-          # --- CI/CD workflow files modified ---
-          WORKFLOW_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -E '\.github/workflows/.*\.ya?ml$' || true)
-          if [ -n "$WORKFLOW_HITS" ]; then
-            FINDINGS="${FINDINGS}
-          ### ⚠️ WARNING: CI/CD workflow files modified
-          Changes to workflow files can alter build pipelines, inject steps, or modify permissions. Verify no unauthorized actions or secrets access were added.
-
-          **Files:**
-          \`\`\`
-          ${WORKFLOW_HITS}
-          \`\`\`
-          "
-          fi
-
-          # --- Dockerfile / container build files modified ---
-          DOCKER_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -iE '(Dockerfile|\.dockerignore|docker-compose)' || true)
-          if [ -n "$DOCKER_HITS" ]; then
-            FINDINGS="${FINDINGS}
-          ### ⚠️ WARNING: Container build files modified
-          Changes to Dockerfiles or compose files can alter base images, add build steps, or expose ports. Verify base image pins and build commands.
-
-          **Files:**
-          \`\`\`
-          ${DOCKER_HITS}
-          \`\`\`
-          "
-          fi
-
-          # --- Dependency manifest files modified ---
-          DEP_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -E '(pyproject\.toml|requirements.*\.txt|package\.json|Gemfile|go\.mod|Cargo\.toml)$' || true)
-          if [ -n "$DEP_HITS" ]; then
-            FINDINGS="${FINDINGS}
-          ### ⚠️ WARNING: Dependency manifest files modified
-          Changes to dependency files can introduce new packages or change version pins. Verify all dependency changes are intentional and from trusted sources.
-
-          **Files:**
-          \`\`\`
-          ${DEP_HITS}
-          \`\`\`
-          "
-          fi
-
-          # --- GitHub Actions version unpinning (mutable tags instead of SHAs) ---
-          ACTIONS_UNPIN=$(echo "$DIFF" | grep -n '^\+' | grep 'uses:' | grep -v '#' | grep -E '@v[0-9]' | head -10 || true)
-          if [ -n "$ACTIONS_UNPIN" ]; then
-            FINDINGS="${FINDINGS}
-          ### ⚠️ WARNING: GitHub Actions with mutable version tags
-          Actions should be pinned to full commit SHAs (not \`@v4\`, \`@v5\`). Mutable tags can be retargeted silently if a maintainer account is compromised.
-
-          **Matches:**
-          \`\`\`
-          ${ACTIONS_UNPIN}
-          \`\`\`
-          "
-          fi
-
-          # --- Output results ---
           if [ -n "$FINDINGS" ]; then
             echo "found=true" >> "$GITHUB_OUTPUT"
-            if [ "$CRITICAL" = true ]; then
-              echo "critical=true" >> "$GITHUB_OUTPUT"
-            else
-              echo "critical=false" >> "$GITHUB_OUTPUT"
-            fi
-            # Write findings to a file (multiline env vars are fragile)
             echo "$FINDINGS" > /tmp/findings.md
           else
             echo "found=false" >> "$GITHUB_OUTPUT"
-            echo "critical=false" >> "$GITHUB_OUTPUT"
           fi
 
-      - name: Post warning comment
+      - name: Post critical finding comment
         if: steps.scan.outputs.found == 'true'
         env:
           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         run: |
-          SEVERITY="⚠️ Supply Chain Risk Detected"
-          if [ "${{ steps.scan.outputs.critical }}" = "true" ]; then
-            SEVERITY="🚨 CRITICAL Supply Chain Risk Detected"
-          fi
+          BODY="## 🚨 CRITICAL Supply Chain Risk Detected
 
-          BODY="## ${SEVERITY}
-
-          This PR contains patterns commonly associated with supply chain attacks. This does **not** mean the PR is malicious — but these patterns require careful human review before merging.
+          This PR contains a pattern that has been used in real supply chain attacks. A maintainer must review the flagged code carefully before merging.
 
           $(cat /tmp/findings.md)
 
           ---
-          *Automated scan triggered by [supply-chain-audit](/.github/workflows/supply-chain-audit.yml). If this is a false positive, a maintainer can approve after manual review.*"
+          *Scanner only fires on high-signal indicators: .pth files, base64+exec/eval combos, subprocess with encoded commands, or install-hook files. Low-signal warnings were removed intentionally — if you're seeing this comment, the finding is worth inspecting.*"
 
           gh pr comment "${{ github.event.pull_request.number }}" --body "$BODY" || echo "::warning::Could not post PR comment (expected for fork PRs — GITHUB_TOKEN is read-only)"
 
       - name: Fail on critical findings
-        if: steps.scan.outputs.critical == 'true'
+        if: steps.scan.outputs.found == 'true'
         run: |
           echo "::error::CRITICAL supply chain risk patterns detected in this PR. See the PR comment for details."
           exit 1
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 7d0822690a..a92afdfa40 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -3,8 +3,14 @@ name: Tests
 on:
   push:
     branches: [main]
+    paths-ignore:
+      - '**/*.md'
+      - 'docs/**'
   pull_request:
     branches: [main]
+    paths-ignore:
+      - '**/*.md'
+      - 'docs/**'
 
 permissions:
   contents: read
@@ -17,7 +23,7 @@ concurrency:
 jobs:
   test:
     runs-on: ubuntu-latest
-    timeout-minutes: 10
+    timeout-minutes: 20
     steps:
       - name: Checkout code
         uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
diff --git a/.gitignore b/.gitignore
index e516d154f3..8b455cf506 100644
--- a/.gitignore
+++ b/.gitignore
@@ -54,6 +54,11 @@ environments/benchmarks/evals/
 # Web UI build output
 hermes_cli/web_dist/
 
+# Web UI assets — synced from @nous-research/ui at build time via
+# `npm run sync-assets` (see web/package.json).
+web/public/fonts/
+web/public/ds-assets/
+
 # Release script temp files
 .release_notes.md
 mini-swe-agent/
diff --git a/AGENTS.md b/AGENTS.md
index 8bd979b058..0f5ce15f28 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -566,3 +566,52 @@ python -m pytest tests/ -q -n 4
 Worker count above 4 will surface test-ordering flakes that CI never sees.
 
 Always run the full suite before pushing changes.
+
+### Don't write change-detector tests
+
+A test is a **change-detector** if it fails whenever data that is **expected
+to change** gets updated — model catalogs, config version numbers,
+enumeration counts, hardcoded lists of provider models. These tests add no
+behavioral coverage; they just guarantee that routine source updates break
+CI and cost engineering time to "fix."
+
+**Do not write:**
+
+```python
+# catalog snapshot — breaks every model release
+assert "gemini-2.5-pro" in _PROVIDER_MODELS["gemini"]
+assert "MiniMax-M2.7" in models
+
+# config version literal — breaks every schema bump
+assert DEFAULT_CONFIG["_config_version"] == 21
+
+# enumeration count — breaks every time a skill/provider is added
+assert len(_PROVIDER_MODELS["huggingface"]) == 8
+```
+
+**Do write:**
+
+```python
+# behavior: does the catalog plumbing work at all?
+assert "gemini" in _PROVIDER_MODELS
+assert len(_PROVIDER_MODELS["gemini"]) >= 1
+
+# behavior: does migration bump the user's version to current latest?
+assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"]
+
+# invariant: no plan-only model leaks into the legacy list
+assert not (set(moonshot_models) & coding_plan_only_models)
+
+# invariant: every model in the catalog has a context-length entry
+for m in _PROVIDER_MODELS["huggingface"]:
+    assert m.lower() in DEFAULT_CONTEXT_LENGTHS_LOWER
+```
+
+The rule: if the test reads like a snapshot of current data, delete it. If
+it reads like a contract about how two pieces of data must relate, keep it.
+When a PR adds a new provider/model and you want a test, make the test
+assert the relationship (e.g. "catalog entries all have context lengths"),
+not the specific names.
+
+Reviewers should reject new change-detector tests; authors should convert
+them into invariants before re-requesting review.
diff --git a/Dockerfile b/Dockerfile
index 0d3da72eb7..a684f9fb31 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -27,12 +27,10 @@ WORKDIR /opt/hermes
 # Copy only package manifests first so npm install + Playwright are cached
 # unless the lockfiles themselves change.
 COPY package.json package-lock.json ./
-COPY scripts/whatsapp-bridge/package.json scripts/whatsapp-bridge/package-lock.json scripts/whatsapp-bridge/
 COPY web/package.json web/package-lock.json web/
 
 RUN npm install --prefer-offline --no-audit && \
     npx playwright install --with-deps chromium --only-shell && \
-    (cd scripts/whatsapp-bridge && npm install --prefer-offline --no-audit) && \
     (cd web && npm install --prefer-offline --no-audit) && \
     npm cache clean --force
 
diff --git a/acp_adapter/entry.py b/acp_adapter/entry.py
index 7db5747a4d..3089f78c27 100644
--- a/acp_adapter/entry.py
+++ b/acp_adapter/entry.py
@@ -20,6 +20,46 @@ from pathlib import Path
 from hermes_constants import get_hermes_home
 
 
+# Methods clients send as periodic liveness probes. They are not part of the
+# ACP schema, so the acp router correctly returns JSON-RPC -32601 to the
+# caller — but the supervisor task that dispatches the request then surfaces
+# the raised RequestError via ``logging.exception("Background task failed")``,
+# which dumps a traceback to stderr every probe interval. Clients like
+# acp-bridge already treat the -32601 response as "agent alive", so the
+# traceback is pure noise. We keep the protocol response intact and only
+# silence the stderr noise for this specific benign case.
+_BENIGN_PROBE_METHODS = frozenset({"ping", "health", "healthcheck"})
+
+
+class _BenignProbeMethodFilter(logging.Filter):
+    """Suppress acp 'Background task failed' tracebacks caused by unknown
+    liveness-probe methods (e.g. ``ping``) while leaving every other
+    background-task error — including method_not_found for any non-probe
+    method — visible in stderr.
+    """
+
+    def filter(self, record: logging.LogRecord) -> bool:
+        if record.getMessage() != "Background task failed":
+            return True
+        exc_info = record.exc_info
+        if not exc_info:
+            return True
+        exc = exc_info[1]
+        # Imported lazily so this module stays importable when the optional
+        # ``agent-client-protocol`` dependency is not installed.
+        try:
+            from acp.exceptions import RequestError
+        except ImportError:
+            return True
+        if not isinstance(exc, RequestError):
+            return True
+        if getattr(exc, "code", None) != -32601:
+            return True
+        data = getattr(exc, "data", None)
+        method = data.get("method") if isinstance(data, dict) else None
+        return method not in _BENIGN_PROBE_METHODS
+
+
 def _setup_logging() -> None:
     """Route all logging to stderr so stdout stays clean for ACP stdio."""
     handler = logging.StreamHandler(sys.stderr)
@@ -29,6 +69,7 @@ def _setup_logging() -> None:
             datefmt="%Y-%m-%d %H:%M:%S",
         )
     )
+    handler.addFilter(_BenignProbeMethodFilter())
     root = logging.getLogger()
     root.handlers.clear()
     root.addHandler(handler)
diff --git a/acp_adapter/permissions.py b/acp_adapter/permissions.py
index 68f61e340a..c2e1a59826 100644
--- a/acp_adapter/permissions.py
+++ b/acp_adapter/permissions.py
@@ -63,6 +63,9 @@ def make_approval_callback(
             logger.warning("Permission request timed out or failed: %s", exc)
             return "deny"
 
+        if response is None:
+            return "deny"
+
         outcome = response.outcome
         if isinstance(outcome, AllowedOutcome):
             option_id = outcome.option_id
diff --git a/acp_adapter/server.py b/acp_adapter/server.py
index 4685a68a8c..d73c71157a 100644
--- a/acp_adapter/server.py
+++ b/acp_adapter/server.py
@@ -4,6 +4,7 @@ from __future__ import annotations
 
 import asyncio
 import logging
+import os
 from collections import defaultdict, deque
 from concurrent.futures import ThreadPoolExecutor
 from typing import Any, Deque, Optional
@@ -51,7 +52,7 @@ try:
 except ImportError:
     from acp.schema import AuthMethod as AuthMethodAgent  # type: ignore[attr-defined]
 
-from acp_adapter.auth import detect_provider, has_provider
+from acp_adapter.auth import detect_provider
 from acp_adapter.events import (
     make_message_cb,
     make_step_cb,
@@ -71,6 +72,11 @@ except Exception:
 # Thread pool for running AIAgent (synchronous) in parallel.
 _executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="acp-agent")
 
+# Server-side page size for list_sessions. The ACP ListSessionsRequest schema
+# does not expose a client-side limit, so this is a fixed cap that clients
+# paginate against using `cursor` / `next_cursor`.
+_LIST_SESSIONS_PAGE_SIZE = 50
+
 
 def _extract_text(
     prompt: list[
@@ -351,9 +357,18 @@ class HermesACPAgent(acp.Agent):
         )
 
     async def authenticate(self, method_id: str, **kwargs: Any) -> AuthenticateResponse | None:
-        if has_provider():
-            return AuthenticateResponse()
-        return None
+        # Only accept authenticate() calls whose method_id matches the
+        # provider we advertised in initialize(). Without this check,
+        # authenticate() would acknowledge any method_id as long as the
+        # server has provider credentials configured — harmless under
+        # Hermes' threat model (ACP is stdio-only, local-trust), but poor
+        # API hygiene and confusing if ACP ever grows multi-method auth.
+        provider = detect_provider()
+        if not provider:
+            return None
+        if not isinstance(method_id, str) or method_id.strip().lower() != provider:
+            return None
+        return AuthenticateResponse()
 
     # ---- Session management -------------------------------------------------
 
@@ -437,7 +452,28 @@ class HermesACPAgent(acp.Agent):
         cwd: str | None = None,
         **kwargs: Any,
     ) -> ListSessionsResponse:
+        """List ACP sessions with optional ``cwd`` filtering and cursor pagination.
+
+        ``cwd`` is passed through to ``SessionManager.list_sessions`` which already
+        normalizes and filters by working directory. ``cursor`` is a ``session_id``
+        previously returned as ``next_cursor``; results resume after that entry.
+        Server-side page size is capped at ``_LIST_SESSIONS_PAGE_SIZE``; when more
+        results remain, ``next_cursor`` is set to the last returned ``session_id``.
+        """
         infos = self.session_manager.list_sessions(cwd=cwd)
+
+        if cursor:
+            for idx, s in enumerate(infos):
+                if s["session_id"] == cursor:
+                    infos = infos[idx + 1:]
+                    break
+            else:
+                # Unknown cursor -> empty page (do not fall back to full list).
+                infos = []
+
+        has_more = len(infos) > _LIST_SESSIONS_PAGE_SIZE
+        infos = infos[:_LIST_SESSIONS_PAGE_SIZE]
+
         sessions = []
         for s in infos:
             updated_at = s.get("updated_at")
@@ -451,7 +487,9 @@ class HermesACPAgent(acp.Agent):
                     updated_at=updated_at,
                 )
             )
-        return ListSessionsResponse(sessions=sessions)
+
+        next_cursor = sessions[-1].session_id if has_more and sessions else None
+        return ListSessionsResponse(sessions=sessions, next_cursor=next_cursor)
 
     # ---- Prompt (core) ------------------------------------------------------
 
@@ -517,15 +555,32 @@ class HermesACPAgent(acp.Agent):
         agent.step_callback = step_cb
         agent.message_callback = message_cb
 
-        if approval_cb:
-            try:
-                from tools import terminal_tool as _terminal_tool
-                previous_approval_cb = getattr(_terminal_tool, "_approval_callback", None)
-                _terminal_tool.set_approval_callback(approval_cb)
-            except Exception:
-                logger.debug("Could not set ACP approval callback", exc_info=True)
+        # Approval callback is per-thread (thread-local, GHSA-qg5c-hvr5-hjgr).
+        # Set it INSIDE _run_agent so the TLS write happens in the executor
+        # thread — setting it here would write to the event-loop thread's TLS,
+        # not the executor's. Also set HERMES_INTERACTIVE so approval.py
+        # takes the CLI-interactive path (which calls the registered
+        # callback via prompt_dangerous_approval) instead of the
+        # non-interactive auto-approve branch (GHSA-96vc-wcxf-jjff).
+        # ACP's conn.request_permission maps cleanly to the interactive
+        # callback shape — not the gateway-queue HERMES_EXEC_ASK path,
+        # which requires a notify_cb registered in _gateway_notify_cbs.
+        previous_approval_cb = None
+        previous_interactive = None
 
         def _run_agent() -> dict:
+            nonlocal previous_approval_cb, previous_interactive
+            if approval_cb:
+                try:
+                    from tools import terminal_tool as _terminal_tool
+                    previous_approval_cb = _terminal_tool._get_approval_callback()
+                    _terminal_tool.set_approval_callback(approval_cb)
+                except Exception:
+                    logger.debug("Could not set ACP approval callback", exc_info=True)
+            # Signal to tools.approval that we have an interactive callback
+            # and the non-interactive auto-approve path must not fire.
+            previous_interactive = os.environ.get("HERMES_INTERACTIVE")
+            os.environ["HERMES_INTERACTIVE"] = "1"
             try:
                 result = agent.run_conversation(
                     user_message=user_text,
@@ -537,6 +592,11 @@ class HermesACPAgent(acp.Agent):
                 logger.exception("Agent error in session %s", session_id)
                 return {"final_response": f"Error: {e}", "messages": state.history}
             finally:
+                # Restore HERMES_INTERACTIVE.
+                if previous_interactive is None:
+                    os.environ.pop("HERMES_INTERACTIVE", None)
+                else:
+                    os.environ["HERMES_INTERACTIVE"] = previous_interactive
                 if approval_cb:
                     try:
                         from tools import terminal_tool as _terminal_tool
@@ -613,8 +673,8 @@ class HermesACPAgent(acp.Agent):
             await self._conn.session_update(
                 session_id=session_id,
                 update=AvailableCommandsUpdate(
-                    sessionUpdate="available_commands_update",
-                    availableCommands=self._available_commands(),
+                    session_update="available_commands_update",
+                    available_commands=self._available_commands(),
                 ),
             )
         except Exception:
diff --git a/agent/account_usage.py b/agent/account_usage.py
new file mode 100644
index 0000000000..0e9562dcc9
--- /dev/null
+++ b/agent/account_usage.py
@@ -0,0 +1,326 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from typing import Any, Optional
+
+import httpx
+
+from agent.anthropic_adapter import _is_oauth_token, resolve_anthropic_token
+from hermes_cli.auth import _read_codex_tokens, resolve_codex_runtime_credentials
+from hermes_cli.runtime_provider import resolve_runtime_provider
+
+
+def _utc_now() -> datetime:
+    return datetime.now(timezone.utc)
+
+
+@dataclass(frozen=True)
+class AccountUsageWindow:
+    label: str
+    used_percent: Optional[float] = None
+    reset_at: Optional[datetime] = None
+    detail: Optional[str] = None
+
+
+@dataclass(frozen=True)
+class AccountUsageSnapshot:
+    provider: str
+    source: str
+    fetched_at: datetime
+    title: str = "Account limits"
+    plan: Optional[str] = None
+    windows: tuple[AccountUsageWindow, ...] = ()
+    details: tuple[str, ...] = ()
+    unavailable_reason: Optional[str] = None
+
+    @property
+    def available(self) -> bool:
+        return bool(self.windows or self.details) and not self.unavailable_reason
+
+
+def _title_case_slug(value: Optional[str]) -> Optional[str]:
+    cleaned = str(value or "").strip()
+    if not cleaned:
+        return None
+    return cleaned.replace("_", " ").replace("-", " ").title()
+
+
+def _parse_dt(value: Any) -> Optional[datetime]:
+    if value in (None, ""):
+        return None
+    if isinstance(value, (int, float)):
+        return datetime.fromtimestamp(float(value), tz=timezone.utc)
+    if isinstance(value, str):
+        text = value.strip()
+        if not text:
+            return None
+        if text.endswith("Z"):
+            text = text[:-1] + "+00:00"
+        try:
+            dt = datetime.fromisoformat(text)
+            return dt if dt.tzinfo else dt.replace(tzinfo=timezone.utc)
+        except ValueError:
+            return None
+    return None
+
+
+def _format_reset(dt: Optional[datetime]) -> str:
+    if not dt:
+        return "unknown"
+    local_dt = dt.astimezone()
+    delta = dt - _utc_now()
+    total_seconds = int(delta.total_seconds())
+    if total_seconds <= 0:
+        return f"now ({local_dt.strftime('%Y-%m-%d %H:%M %Z')})"
+    hours, rem = divmod(total_seconds, 3600)
+    minutes = rem // 60
+    if hours >= 24:
+        days, hours = divmod(hours, 24)
+        rel = f"in {days}d {hours}h"
+    elif hours > 0:
+        rel = f"in {hours}h {minutes}m"
+    else:
+        rel = f"in {minutes}m"
+    return f"{rel} ({local_dt.strftime('%Y-%m-%d %H:%M %Z')})"
+
+
+def render_account_usage_lines(snapshot: Optional[AccountUsageSnapshot], *, markdown: bool = False) -> list[str]:
+    if not snapshot:
+        return []
+    header = f"📈 {'**' if markdown else ''}{snapshot.title}{'**' if markdown else ''}"
+    lines = [header]
+    if snapshot.plan:
+        lines.append(f"Provider: {snapshot.provider} ({snapshot.plan})")
+    else:
+        lines.append(f"Provider: {snapshot.provider}")
+    for window in snapshot.windows:
+        if window.used_percent is None:
+            base = f"{window.label}: unavailable"
+        else:
+            remaining = max(0, round(100 - float(window.used_percent)))
+            used = max(0, round(float(window.used_percent)))
+            base = f"{window.label}: {remaining}% remaining ({used}% used)"
+        if window.reset_at:
+            base += f" • resets {_format_reset(window.reset_at)}"
+        elif window.detail:
+            base += f" • {window.detail}"
+        lines.append(base)
+    for detail in snapshot.details:
+        lines.append(detail)
+    if snapshot.unavailable_reason:
+        lines.append(f"Unavailable: {snapshot.unavailable_reason}")
+    return lines
+
+
+def _resolve_codex_usage_url(base_url: str) -> str:
+    normalized = (base_url or "").strip().rstrip("/")
+    if not normalized:
+        normalized = "https://chatgpt.com/backend-api/codex"
+    if normalized.endswith("/codex"):
+        normalized = normalized[: -len("/codex")]
+    if "/backend-api" in normalized:
+        return normalized + "/wham/usage"
+    return normalized + "/api/codex/usage"
+
+
+def _fetch_codex_account_usage() -> Optional[AccountUsageSnapshot]:
+    creds = resolve_codex_runtime_credentials(refresh_if_expiring=True)
+    token_data = _read_codex_tokens()
+    tokens = token_data.get("tokens") or {}
+    account_id = str(tokens.get("account_id", "") or "").strip() or None
+    headers = {
+        "Authorization": f"Bearer {creds['api_key']}",
+        "Accept": "application/json",
+        "User-Agent": "codex-cli",
+    }
+    if account_id:
+        headers["ChatGPT-Account-Id"] = account_id
+    with httpx.Client(timeout=15.0) as client:
+        response = client.get(_resolve_codex_usage_url(creds.get("base_url", "")), headers=headers)
+        response.raise_for_status()
+    payload = response.json() or {}
+    rate_limit = payload.get("rate_limit") or {}
+    windows: list[AccountUsageWindow] = []
+    for key, label in (("primary_window", "Session"), ("secondary_window", "Weekly")):
+        window = rate_limit.get(key) or {}
+        used = window.get("used_percent")
+        if used is None:
+            continue
+        windows.append(
+            AccountUsageWindow(
+                label=label,
+                used_percent=float(used),
+                reset_at=_parse_dt(window.get("reset_at")),
+            )
+        )
+    details: list[str] = []
+    credits = payload.get("credits") or {}
+    if credits.get("has_credits"):
+        balance = credits.get("balance")
+        if isinstance(balance, (int, float)):
+            details.append(f"Credits balance: ${float(balance):.2f}")
+        elif credits.get("unlimited"):
+            details.append("Credits balance: unlimited")
+    return AccountUsageSnapshot(
+        provider="openai-codex",
+        source="usage_api",
+        fetched_at=_utc_now(),
+        plan=_title_case_slug(payload.get("plan_type")),
+        windows=tuple(windows),
+        details=tuple(details),
+    )
+
+
+def _fetch_anthropic_account_usage() -> Optional[AccountUsageSnapshot]:
+    token = (resolve_anthropic_token() or "").strip()
+    if not token:
+        return None
+    if not _is_oauth_token(token):
+        return AccountUsageSnapshot(
+            provider="anthropic",
+            source="oauth_usage_api",
+            fetched_at=_utc_now(),
+            unavailable_reason="Anthropic account limits are only available for OAuth-backed Claude accounts.",
+        )
+    headers = {
+        "Authorization": f"Bearer {token}",
+        "Accept": "application/json",
+        "Content-Type": "application/json",
+        "anthropic-beta": "oauth-2025-04-20",
+        "User-Agent": "claude-code/2.1.0",
+    }
+    with httpx.Client(timeout=15.0) as client:
+        response = client.get("https://api.anthropic.com/api/oauth/usage", headers=headers)
+        response.raise_for_status()
+    payload = response.json() or {}
+    windows: list[AccountUsageWindow] = []
+    mapping = (
+        ("five_hour", "Current session"),
+        ("seven_day", "Current week"),
+        ("seven_day_opus", "Opus week"),
+        ("seven_day_sonnet", "Sonnet week"),
+    )
+    for key, label in mapping:
+        window = payload.get(key) or {}
+        util = window.get("utilization")
+        if util is None:
+            continue
+        used = float(util) * 100 if float(util) <= 1 else float(util)
+        windows.append(
+            AccountUsageWindow(
+                label=label,
+                used_percent=used,
+                reset_at=_parse_dt(window.get("resets_at")),
+            )
+        )
+    details: list[str] = []
+    extra = payload.get("extra_usage") or {}
+    if extra.get("is_enabled"):
+        used_credits = extra.get("used_credits")
+        monthly_limit = extra.get("monthly_limit")
+        currency = extra.get("currency") or "USD"
+        if isinstance(used_credits, (int, float)) and isinstance(monthly_limit, (int, float)):
+            details.append(
+                f"Extra usage: {used_credits:.2f} / {monthly_limit:.2f} {currency}"
+            )
+    return AccountUsageSnapshot(
+        provider="anthropic",
+        source="oauth_usage_api",
+        fetched_at=_utc_now(),
+        windows=tuple(windows),
+        details=tuple(details),
+    )
+
+
+def _fetch_openrouter_account_usage(base_url: Optional[str], api_key: Optional[str]) -> Optional[AccountUsageSnapshot]:
+    runtime = resolve_runtime_provider(
+        requested="openrouter",
+        explicit_base_url=base_url,
+        explicit_api_key=api_key,
+    )
+    token = str(runtime.get("api_key", "") or "").strip()
+    if not token:
+        return None
+    normalized = str(runtime.get("base_url", "") or "").rstrip("/")
+    credits_url = f"{normalized}/credits"
+    key_url = f"{normalized}/key"
+    headers = {
+        "Authorization": f"Bearer {token}",
+        "Accept": "application/json",
+    }
+    with httpx.Client(timeout=10.0) as client:
+        credits_resp = client.get(credits_url, headers=headers)
+        credits_resp.raise_for_status()
+        credits = (credits_resp.json() or {}).get("data") or {}
+        try:
+            key_resp = client.get(key_url, headers=headers)
+            key_resp.raise_for_status()
+            key_data = (key_resp.json() or {}).get("data") or {}
+        except Exception:
+            key_data = {}
+    total_credits = float(credits.get("total_credits") or 0.0)
+    total_usage = float(credits.get("total_usage") or 0.0)
+    details = [f"Credits balance: ${max(0.0, total_credits - total_usage):.2f}"]
+    windows: list[AccountUsageWindow] = []
+    limit = key_data.get("limit")
+    limit_remaining = key_data.get("limit_remaining")
+    limit_reset = str(key_data.get("limit_reset") or "").strip()
+    usage = key_data.get("usage")
+    if (
+        isinstance(limit, (int, float))
+        and float(limit) > 0
+        and isinstance(limit_remaining, (int, float))
+        and 0 <= float(limit_remaining) <= float(limit)
+    ):
+        limit_value = float(limit)
+        remaining_value = float(limit_remaining)
+        used_percent = ((limit_value - remaining_value) / limit_value) * 100
+        detail_parts = [f"${remaining_value:.2f} of ${limit_value:.2f} remaining"]
+        if limit_reset:
+            detail_parts.append(f"resets {limit_reset}")
+        windows.append(
+            AccountUsageWindow(
+                label="API key quota",
+                used_percent=used_percent,
+                detail=" • ".join(detail_parts),
+            )
+        )
+    if isinstance(usage, (int, float)):
+        usage_parts = [f"API key usage: ${float(usage):.2f} total"]
+        for value, label in (
+            (key_data.get("usage_daily"), "today"),
+            (key_data.get("usage_weekly"), "this week"),
+            (key_data.get("usage_monthly"), "this month"),
+        ):
+            if isinstance(value, (int, float)) and float(value) > 0:
+                usage_parts.append(f"${float(value):.2f} {label}")
+        details.append(" • ".join(usage_parts))
+    return AccountUsageSnapshot(
+        provider="openrouter",
+        source="credits_api",
+        fetched_at=_utc_now(),
+        windows=tuple(windows),
+        details=tuple(details),
+    )
+
+
+def fetch_account_usage(
+    provider: Optional[str],
+    *,
+    base_url: Optional[str] = None,
+    api_key: Optional[str] = None,
+) -> Optional[AccountUsageSnapshot]:
+    normalized = str(provider or "").strip().lower()
+    if normalized in {"", "auto", "custom"}:
+        return None
+    try:
+        if normalized == "openai-codex":
+            return _fetch_codex_account_usage()
+        if normalized == "anthropic":
+            return _fetch_anthropic_account_usage()
+        if normalized == "openrouter":
+            return _fetch_openrouter_account_usage(base_url, api_key)
+    except Exception:
+        return None
+    return None
diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py
index 64b9522517..5e36b1f37e 100644
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@@ -19,6 +19,7 @@ from pathlib import Path
 from hermes_constants import get_hermes_home
 from types import SimpleNamespace
 from typing import Any, Dict, List, Optional, Tuple
+from utils import normalize_proxy_env_vars
 
 try:
     import anthropic as _anthropic_sdk
@@ -265,6 +266,14 @@ def _is_third_party_anthropic_endpoint(base_url: str | None) -> bool:
     return True  # Any other endpoint is a third-party proxy
 
 
+def _is_kimi_coding_endpoint(base_url: str | None) -> bool:
+    """Return True for Kimi's /coding endpoint that requires claude-code UA."""
+    normalized = _normalize_base_url_text(base_url)
+    if not normalized:
+        return False
+    return normalized.rstrip("/").lower().startswith("https://api.kimi.com/coding")
+
+
 def _requires_bearer_auth(base_url: str | None) -> bool:
     """Return True for Anthropic-compatible providers that require Bearer auth.
 
@@ -292,9 +301,15 @@ def _common_betas_for_base_url(base_url: str | None) -> list[str]:
     return _COMMON_BETAS
 
 
-def build_anthropic_client(api_key: str, base_url: str = None):
+def build_anthropic_client(api_key: str, base_url: str = None, timeout: float = None):
     """Create an Anthropic client, auto-detecting setup-tokens vs API keys.
 
+    If *timeout* is provided it overrides the default 900s read timeout.  The
+    connect timeout stays at 10s.  Callers pass this from the per-provider /
+    per-model ``request_timeout_seconds`` config so Anthropic-native and
+    Anthropic-compatible providers respect the same knob as OpenAI-wire
+    providers.
+
     Returns an anthropic.Anthropic instance.
     """
     if _anthropic_sdk is None:
@@ -302,19 +317,32 @@ def build_anthropic_client(api_key: str, base_url: str = None):
             "The 'anthropic' package is required for the Anthropic provider. "
             "Install it with: pip install 'anthropic>=0.39.0'"
         )
+
+    normalize_proxy_env_vars()
+
     from httpx import Timeout
 
     normalized_base_url = _normalize_base_url_text(base_url)
+    _read_timeout = timeout if (isinstance(timeout, (int, float)) and timeout > 0) else 900.0
     kwargs = {
-        "timeout": Timeout(timeout=900.0, connect=10.0),
+        "timeout": Timeout(timeout=float(_read_timeout), connect=10.0),
     }
     if normalized_base_url:
         kwargs["base_url"] = normalized_base_url
     common_betas = _common_betas_for_base_url(normalized_base_url)
 
-    if _requires_bearer_auth(normalized_base_url):
+    if _is_kimi_coding_endpoint(base_url):
+        # Kimi's /coding endpoint requires User-Agent: claude-code/0.1.0
+        # to be recognized as a valid Coding Agent. Without it, returns 403.
+        # Check this BEFORE _requires_bearer_auth since both match api.kimi.com/coding.
+        kwargs["api_key"] = api_key
+        kwargs["default_headers"] = {
+            "User-Agent": "claude-code/0.1.0",
+            **( {"anthropic-beta": ",".join(common_betas)} if common_betas else {} )
+        }
+    elif _requires_bearer_auth(normalized_base_url):
         # Some Anthropic-compatible providers (e.g. MiniMax) expect the API key in
-        # Authorization: Bearer even for regular API keys. Route those endpoints
+        # Authorization: Bearer *** for regular API keys. Route those endpoints
         # through auth_token so the SDK sends Bearer auth instead of x-api-key.
         # Check this before OAuth token shape detection because MiniMax secrets do
         # not use Anthropic's sk-ant-api prefix and would otherwise be misread as
@@ -1055,6 +1083,31 @@ def convert_messages_to_anthropic(
                     "name": fn.get("name", ""),
                     "input": parsed_args,
                 })
+            # Kimi's /coding endpoint (Anthropic protocol) requires assistant
+            # tool-call messages to carry reasoning_content when thinking is
+            # enabled server-side.  Preserve it as a thinking block so Kimi
+            # can validate the message history.  See hermes-agent#13848.
+            #
+            # Accept empty string "" — _copy_reasoning_content_for_api()
+            # injects "" as a tier-3 fallback for Kimi tool-call messages
+            # that had no reasoning.  Kimi requires the field to exist, even
+            # if empty.
+            #
+            # Prepend (not append): Anthropic protocol requires thinking
+            # blocks before text and tool_use blocks.
+            #
+            # Guard: only add when reasoning_details didn't already contribute
+            # thinking blocks.  On native Anthropic, reasoning_details produces
+            # signed thinking blocks — adding another unsigned one from
+            # reasoning_content would create a duplicate (same text) that gets
+            # downgraded to a spurious text block on the last assistant message.
+            reasoning_content = m.get("reasoning_content")
+            _already_has_thinking = any(
+                isinstance(b, dict) and b.get("type") in ("thinking", "redacted_thinking")
+                for b in blocks
+            )
+            if isinstance(reasoning_content, str) and not _already_has_thinking:
+                blocks.insert(0, {"type": "thinking", "thinking": reasoning_content})
             # Anthropic rejects empty assistant content
             effective = blocks or content
             if not effective or effective == "":
@@ -1210,6 +1263,7 @@ def convert_messages_to_anthropic(
     #    cache markers can interfere with signature validation.
     _THINKING_TYPES = frozenset(("thinking", "redacted_thinking"))
     _is_third_party = _is_third_party_anthropic_endpoint(base_url)
+    _is_kimi = _is_kimi_coding_endpoint(base_url)
 
     last_assistant_idx = None
     for i in range(len(result) - 1, -1, -1):
@@ -1221,7 +1275,25 @@ def convert_messages_to_anthropic(
         if m.get("role") != "assistant" or not isinstance(m.get("content"), list):
             continue
 
-        if _is_third_party or idx != last_assistant_idx:
+        if _is_kimi:
+            # Kimi's /coding endpoint enables thinking server-side and
+            # requires unsigned thinking blocks on replayed assistant
+            # tool-call messages.  Strip signed Anthropic blocks (Kimi
+            # can't validate signatures) but preserve the unsigned ones
+            # we synthesised from reasoning_content above.
+            new_content = []
+            for b in m["content"]:
+                if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
+                    new_content.append(b)
+                    continue
+                if b.get("signature") or b.get("data"):
+                    # Anthropic-signed block — Kimi can't validate, strip
+                    continue
+                # Unsigned thinking (synthesised from reasoning_content) —
+                # keep it: Kimi needs it for message-history validation.
+                new_content.append(b)
+            m["content"] = new_content or [{"type": "text", "text": "(empty)"}]
+        elif _is_third_party or idx != last_assistant_idx:
             # Third-party endpoint: strip ALL thinking blocks from every
             # assistant message — signatures are Anthropic-proprietary.
             # Direct Anthropic: strip from non-latest assistant messages only.
@@ -1398,11 +1470,25 @@ def build_anthropic_kwargs(
     # MiniMax Anthropic-compat endpoints support thinking (manual mode only,
     # not adaptive).  Haiku does NOT support extended thinking — skip entirely.
     #
+    # Kimi's /coding endpoint speaks the Anthropic Messages protocol but has
+    # its own thinking semantics: when ``thinking.enabled`` is sent, Kimi
+    # validates the message history and requires every prior assistant
+    # tool-call message to carry OpenAI-style ``reasoning_content``.  The
+    # Anthropic path never populates that field, and
+    # ``convert_messages_to_anthropic`` strips all Anthropic thinking blocks
+    # on third-party endpoints — so the request fails with HTTP 400
+    # "thinking is enabled but reasoning_content is missing in assistant
+    # tool call message at index N".  Kimi's reasoning is driven server-side
+    # on the /coding route, so skip Anthropic's thinking parameter entirely
+    # for that host.  (Kimi on chat_completions enables thinking via
+    # extra_body in the ChatCompletionsTransport — see #13503.)
+    #
     # On 4.7+ the `thinking.display` field defaults to "omitted", which
     # silently hides reasoning text that Hermes surfaces in its CLI. We
     # request "summarized" so the reasoning blocks stay populated — matching
     # 4.6 behavior and preserving the activity-feed UX during long tool runs.
-    if reasoning_config and isinstance(reasoning_config, dict):
+    _is_kimi_coding = _is_kimi_coding_endpoint(base_url)
+    if reasoning_config and isinstance(reasoning_config, dict) and not _is_kimi_coding:
         if reasoning_config.get("enabled") is not False and "haiku" not in model.lower():
             effort = str(reasoning_config.get("effort", "medium")).lower()
             budget = THINKING_BUDGET.get(effort, 8000)
@@ -1518,3 +1604,42 @@ def normalize_anthropic_response(
         ),
         finish_reason,
     )
+
+
+def normalize_anthropic_response_v2(
+    response,
+    strip_tool_prefix: bool = False,
+) -> "NormalizedResponse":
+    """Normalize Anthropic response to NormalizedResponse.
+
+    Wraps the existing normalize_anthropic_response() and maps its output
+    to the shared transport types.  This allows incremental migration —
+    one call site at a time — without changing the original function.
+    """
+    from agent.transports.types import NormalizedResponse, build_tool_call
+
+    assistant_msg, finish_reason = normalize_anthropic_response(response, strip_tool_prefix)
+
+    tool_calls = None
+    if assistant_msg.tool_calls:
+        tool_calls = [
+            build_tool_call(
+                id=tc.id,
+                name=tc.function.name,
+                arguments=tc.function.arguments,
+            )
+            for tc in assistant_msg.tool_calls
+        ]
+
+    provider_data = {}
+    if getattr(assistant_msg, "reasoning_details", None):
+        provider_data["reasoning_details"] = assistant_msg.reasoning_details
+
+    return NormalizedResponse(
+        content=assistant_msg.content,
+        tool_calls=tool_calls,
+        finish_reason=finish_reason,
+        reasoning=getattr(assistant_msg, "reasoning", None),
+        usage=None,  # Anthropic usage is on the raw response, not the normaliser
+        provider_data=provider_data or None,
+    )
diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index 19bde946ee..4f8c9a0a46 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -48,6 +48,7 @@ from openai import OpenAI
 from agent.credential_pool import load_pool
 from hermes_cli.config import get_hermes_home
 from hermes_constants import OPENROUTER_BASE_URL
+from utils import base_url_host_matches, base_url_hostname, normalize_proxy_env_vars
 
 logger = logging.getLogger(__name__)
 
@@ -95,51 +96,37 @@ def _normalize_aux_provider(provider: Optional[str]) -> str:
     return _PROVIDER_ALIASES.get(normalized, normalized)
 
 
-_FIXED_TEMPERATURE_MODELS: Dict[str, float] = {
-    "kimi-for-coding": 0.6,
-}
-
-# Moonshot's kimi-for-coding endpoint (api.kimi.com/coding) documents:
-# "k2.5 model will use a fixed value 1.0, non-thinking mode will use a fixed
-# value 0.6.  Any other value will result in an error."  The same lock applies
-# to the other k2.* models served on that endpoint.  Enumerated explicitly so
-# non-coding siblings like `kimi-k2-instruct` (variable temperature, served on
-# the standard chat API and third parties) are NOT clamped.
-# Source: https://platform.kimi.ai/docs/guide/kimi-k2-5-quickstart
-_KIMI_INSTANT_MODELS: frozenset = frozenset({
-    "kimi-k2.5",
-    "kimi-k2-turbo-preview",
-    "kimi-k2-0905-preview",
-})
-_KIMI_THINKING_MODELS: frozenset = frozenset({
-    "kimi-k2-thinking",
-    "kimi-k2-thinking-turbo",
-})
+# Sentinel: when returned by _fixed_temperature_for_model(), callers must
+# strip the ``temperature`` key from API kwargs entirely so the provider's
+# server-side default applies.  Kimi/Moonshot models manage temperature
+# internally — sending *any* value (even the "correct" one) can conflict
+# with gateway-side mode selection (thinking → 1.0, non-thinking → 0.6).
+OMIT_TEMPERATURE: object = object()
 
 
-def _fixed_temperature_for_model(model: Optional[str]) -> Optional[float]:
-    """Return a required temperature override for models with strict contracts.
+def _is_kimi_model(model: Optional[str]) -> bool:
+    """True for any Kimi / Moonshot model that manages temperature server-side."""
+    bare = (model or "").strip().lower().rsplit("/", 1)[-1]
+    return bare.startswith("kimi-") or bare == "kimi"
 
-    Moonshot's kimi-for-coding endpoint rejects any non-approved temperature on
-    the k2.5 family.  Non-thinking variants require exactly 0.6; thinking
-    variants require 1.0.  An optional ``vendor/`` prefix (e.g.
-    ``moonshotai/kimi-k2.5``) is tolerated for aggregator routings.
 
-    Returns ``None`` for every other model, including ``kimi-k2-instruct*``
-    which is the separate non-coding K2 family with variable temperature.
+def _fixed_temperature_for_model(
+    model: Optional[str],
+    base_url: Optional[str] = None,
+) -> "Optional[float] | object":
+    """Return a temperature directive for models with strict contracts.
+
+    Returns:
+        ``OMIT_TEMPERATURE`` — caller must remove the ``temperature`` key so the
+            provider chooses its own default.  Used for all Kimi / Moonshot
+            models whose gateway selects temperature server-side.
+        ``float`` — a specific value the caller must use (reserved for future
+            models with fixed-temperature contracts).
+        ``None`` — no override; caller should use its own default.
     """
-    normalized = (model or "").strip().lower()
-    fixed = _FIXED_TEMPERATURE_MODELS.get(normalized)
-    if fixed is not None:
-        logger.debug("Forcing temperature=%s for model %r (fixed map)", fixed, model)
-        return fixed
-    bare = normalized.rsplit("/", 1)[-1]
-    if bare in _KIMI_THINKING_MODELS:
-        logger.debug("Forcing temperature=1.0 for kimi thinking model %r", model)
-        return 1.0
-    if bare in _KIMI_INSTANT_MODELS:
-        logger.debug("Forcing temperature=0.6 for kimi instant model %r", model)
-        return 0.6
+    if _is_kimi_model(model):
+        logger.debug("Omitting temperature for Kimi model %r (server-managed)", model)
+        return OMIT_TEMPERATURE
     return None
 
 # Default auxiliary models for direct API-key providers (cheap/fast for side tasks)
@@ -147,6 +134,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
     "gemini": "gemini-3-flash-preview",
     "zai": "glm-4.5-flash",
     "kimi-coding": "kimi-k2-turbo-preview",
+    "stepfun": "step-3.5-flash",
     "kimi-coding-cn": "kimi-k2-turbo-preview",
     "minimax": "MiniMax-M2.7",
     "minimax-cn": "MiniMax-M2.7",
@@ -174,6 +162,16 @@ _OR_HEADERS = {
     "X-OpenRouter-Categories": "productivity,cli-agent",
 }
 
+# Vercel AI Gateway app attribution headers. HTTP-Referer maps to
+# referrerUrl and X-Title maps to appName in the gateway's analytics.
+from hermes_cli import __version__ as _HERMES_VERSION
+
+_AI_GATEWAY_HEADERS = {
+    "HTTP-Referer": "https://hermes-agent.nousresearch.com",
+    "X-Title": "Hermes Agent",
+    "User-Agent": f"HermesAgent/{_HERMES_VERSION}",
+}
+
 # Nous Portal extra_body for product attribution.
 # Callers should pass this as extra_body in chat.completions.create()
 # when the auxiliary client is backed by Nous Portal.
@@ -185,8 +183,6 @@ auxiliary_is_nous: bool = False
 # Default auxiliary models per provider
 _OPENROUTER_MODEL = "google/gemini-3-flash-preview"
 _NOUS_MODEL = "google/gemini-3-flash-preview"
-_NOUS_FREE_TIER_VISION_MODEL = "xiaomi/mimo-v2-omni"
-_NOUS_FREE_TIER_AUX_MODEL = "xiaomi/mimo-v2-pro"
 _NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
 _ANTHROPIC_DEFAULT_BASE_URL = "https://api.anthropic.com"
 _AUTH_JSON_PATH = get_hermes_home() / "auth.json"
@@ -200,6 +196,45 @@ _CODEX_AUX_MODEL = "gpt-5.2-codex"
 _CODEX_AUX_BASE_URL = "https://chatgpt.com/backend-api/codex"
 
 
+def _codex_cloudflare_headers(access_token: str) -> Dict[str, str]:
+    """Headers required to avoid Cloudflare 403s on chatgpt.com/backend-api/codex.
+
+    The Cloudflare layer in front of the Codex endpoint whitelists a small set of
+    first-party originators (``codex_cli_rs``, ``codex_vscode``, ``codex_sdk_ts``,
+    anything starting with ``Codex``). Requests from non-residential IPs (VPS,
+    server-hosted agents) that don't advertise an allowed originator are served
+    a 403 with ``cf-mitigated: challenge`` regardless of auth correctness.
+
+    We pin ``originator: codex_cli_rs`` to match the upstream codex-rs CLI, set
+    ``User-Agent`` to a codex_cli_rs-shaped string (beats SDK fingerprinting),
+    and extract ``ChatGPT-Account-ID`` (canonical casing, from codex-rs
+    ``auth.rs``) out of the OAuth JWT's ``chatgpt_account_id`` claim.
+
+    Malformed tokens are tolerated — we drop the account-ID header rather than
+    raise, so a bad token still surfaces as an auth error (401) instead of a
+    crash at client construction.
+    """
+    headers = {
+        "User-Agent": "codex_cli_rs/0.0.0 (Hermes Agent)",
+        "originator": "codex_cli_rs",
+    }
+    if not isinstance(access_token, str) or not access_token.strip():
+        return headers
+    try:
+        import base64
+        parts = access_token.split(".")
+        if len(parts) < 2:
+            return headers
+        payload_b64 = parts[1] + "=" * (-len(parts[1]) % 4)
+        claims = json.loads(base64.urlsafe_b64decode(payload_b64))
+        acct_id = claims.get("https://api.openai.com/auth", {}).get("chatgpt_account_id")
+        if isinstance(acct_id, str) and acct_id:
+            headers["ChatGPT-Account-ID"] = acct_id
+    except Exception:
+        pass
+    return headers
+
+
 def _to_openai_base_url(base_url: str) -> str:
     """Normalize an Anthropic-style base URL to OpenAI-compatible format.
 
@@ -692,6 +727,33 @@ def _nous_base_url() -> str:
     return os.getenv("NOUS_INFERENCE_BASE_URL", _NOUS_DEFAULT_BASE_URL)
 
 
+def _resolve_nous_runtime_api(*, force_refresh: bool = False) -> Optional[tuple[str, str]]:
+    """Return fresh Nous runtime credentials when available.
+
+    This mirrors the main agent's 401 recovery path and keeps auxiliary
+    clients aligned with the singleton auth store + mint flow instead of
+    relying only on whatever raw tokens happen to be sitting in auth.json
+    or the credential pool.
+    """
+    try:
+        from hermes_cli.auth import resolve_nous_runtime_credentials
+
+        creds = resolve_nous_runtime_credentials(
+            min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
+            timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
+            force_mint=force_refresh,
+        )
+    except Exception as exc:
+        logger.debug("Auxiliary Nous runtime credential resolution failed: %s", exc)
+        return None
+
+    api_key = str(creds.get("api_key") or "").strip()
+    base_url = str(creds.get("base_url") or "").strip().rstrip("/")
+    if not api_key or not base_url:
+        return None
+    return api_key, base_url
+
+
 def _read_codex_access_token() -> Optional[str]:
     """Read a valid, non-expired Codex OAuth access token from Hermes auth store.
 
@@ -775,10 +837,15 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
             if model is None:
                 continue  # skip provider if we don't know a valid aux model
             logger.debug("Auxiliary text client: %s (%s) via pool", pconfig.name, model)
+            if provider_id == "gemini":
+                from agent.gemini_native_adapter import GeminiNativeClient, is_native_gemini_base_url
+
+                if is_native_gemini_base_url(base_url):
+                    return GeminiNativeClient(api_key=api_key, base_url=base_url), model
             extra = {}
-            if "api.kimi.com" in base_url.lower():
-                extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
-            elif "api.githubcopilot.com" in base_url.lower():
+            if base_url_host_matches(base_url, "api.kimi.com"):
+                extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
+            elif base_url_host_matches(base_url, "api.githubcopilot.com"):
                 from hermes_cli.models import copilot_default_headers
 
                 extra["default_headers"] = copilot_default_headers()
@@ -796,10 +863,15 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
         if model is None:
             continue  # skip provider if we don't know a valid aux model
         logger.debug("Auxiliary text client: %s (%s)", pconfig.name, model)
+        if provider_id == "gemini":
+            from agent.gemini_native_adapter import GeminiNativeClient, is_native_gemini_base_url
+
+            if is_native_gemini_base_url(base_url):
+                return GeminiNativeClient(api_key=api_key, base_url=base_url), model
         extra = {}
-        if "api.kimi.com" in base_url.lower():
-            extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
-        elif "api.githubcopilot.com" in base_url.lower():
+        if base_url_host_matches(base_url, "api.kimi.com"):
+            extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
+        elif base_url_host_matches(base_url, "api.githubcopilot.com"):
             from hermes_cli.models import copilot_default_headers
 
             extra["default_headers"] = copilot_default_headers()
@@ -848,29 +920,50 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
         pass
 
     nous = _read_nous_auth()
-    if not nous:
+    runtime = _resolve_nous_runtime_api(force_refresh=False)
+    if runtime is None and not nous:
         return None, None
     global auxiliary_is_nous
     auxiliary_is_nous = True
     logger.debug("Auxiliary client: Nous Portal")
-    if nous.get("source") == "pool":
-        model = "gemini-3-flash"
-    else:
-        model = _NOUS_MODEL
-    # Free-tier users can't use paid auxiliary models — use the free
-    # models instead: mimo-v2-omni for vision, mimo-v2-pro for text tasks.
+
+    # Ask the Portal which model it currently recommends for this task type.
+    # The /api/nous/recommended-models endpoint is the authoritative source:
+    # it distinguishes paid vs free tier recommendations, and get_nous_recommended_aux_model
+    # auto-detects the caller's tier via check_nous_free_tier().  Fall back to
+    # _NOUS_MODEL (google/gemini-3-flash-preview) when the Portal is unreachable
+    # or returns a null recommendation for this task type.
+    model = _NOUS_MODEL
     try:
-        from hermes_cli.models import check_nous_free_tier
-        if check_nous_free_tier():
-            model = _NOUS_FREE_TIER_VISION_MODEL if vision else _NOUS_FREE_TIER_AUX_MODEL
-            logger.debug("Free-tier Nous account — using %s for auxiliary/%s",
-                         model, "vision" if vision else "text")
-    except Exception:
-        pass
+        from hermes_cli.models import get_nous_recommended_aux_model
+        recommended = get_nous_recommended_aux_model(vision=vision)
+        if recommended:
+            model = recommended
+            logger.debug(
+                "Auxiliary/%s: using Portal-recommended model %s",
+                "vision" if vision else "text", model,
+            )
+        else:
+            logger.debug(
+                "Auxiliary/%s: no Portal recommendation, falling back to %s",
+                "vision" if vision else "text", model,
+            )
+    except Exception as exc:
+        logger.debug(
+            "Auxiliary/%s: recommended-models lookup failed (%s); "
+            "falling back to %s",
+            "vision" if vision else "text", exc, model,
+        )
+
+    if runtime is not None:
+        api_key, base_url = runtime
+    else:
+        api_key = _nous_api_key(nous or {})
+        base_url = str((nous or {}).get("inference_base_url") or _nous_base_url()).rstrip("/")
     return (
         OpenAI(
-            api_key=_nous_api_key(nous),
-            base_url=str(nous.get("inference_base_url") or _nous_base_url()).rstrip("/"),
+            api_key=api_key,
+            base_url=base_url,
         ),
         model,
     )
@@ -948,7 +1041,7 @@ def _resolve_custom_runtime() -> Tuple[Optional[str], Optional[str], Optional[st
         return None, None, None
 
     custom_base = custom_base.strip().rstrip("/")
-    if "openrouter.ai" in custom_base.lower():
+    if base_url_host_matches(custom_base, "openrouter.ai"):
         # requested='custom' falls back to OpenRouter when no custom endpoint is
         # configured. Treat that as "no custom endpoint" for auxiliary routing.
         return None, None, None
@@ -982,6 +1075,8 @@ def _validate_proxy_env_urls() -> None:
     """
     from urllib.parse import urlparse
 
+    normalize_proxy_env_vars()
+
     for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
                 "https_proxy", "http_proxy", "all_proxy"):
         value = str(os.environ.get(key) or "").strip()
@@ -1016,7 +1111,7 @@ def _validate_base_url(base_url: str) -> None:
         ) from exc
 
 
-def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]:
+def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
     runtime = _resolve_custom_runtime()
     if len(runtime) == 2:
         custom_base, custom_key = runtime
@@ -1032,6 +1127,23 @@ def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]:
     if custom_mode == "codex_responses":
         real_client = OpenAI(api_key=custom_key, base_url=custom_base)
         return CodexAuxiliaryClient(real_client, model), model
+    if custom_mode == "anthropic_messages":
+        # Third-party Anthropic-compatible gateway (MiniMax, Zhipu GLM,
+        # LiteLLM proxies, etc.).  Must NEVER be treated as OAuth —
+        # Anthropic OAuth claims only apply to api.anthropic.com.
+        try:
+            from agent.anthropic_adapter import build_anthropic_client
+            real_client = build_anthropic_client(custom_key, custom_base)
+        except ImportError:
+            logger.warning(
+                "Custom endpoint declares api_mode=anthropic_messages but the "
+                "anthropic SDK is not installed — falling back to OpenAI-wire."
+            )
+            return OpenAI(api_key=custom_key, base_url=custom_base), model
+        return (
+            AnthropicAuxiliaryClient(real_client, model, custom_key, custom_base, is_oauth=False),
+            model,
+        )
     return OpenAI(api_key=custom_key, base_url=custom_base), model
 
 
@@ -1052,7 +1164,11 @@ def _try_codex() -> Tuple[Optional[Any], Optional[str]]:
             return None, None
         base_url = _CODEX_AUX_BASE_URL
     logger.debug("Auxiliary client: Codex OAuth (%s via Responses API)", _CODEX_AUX_MODEL)
-    real_client = OpenAI(api_key=codex_token, base_url=base_url)
+    real_client = OpenAI(
+        api_key=codex_token,
+        base_url=base_url,
+        default_headers=_codex_cloudflare_headers(codex_token),
+    )
     return CodexAuxiliaryClient(real_client, _CODEX_AUX_MODEL), _CODEX_AUX_MODEL
 
 
@@ -1191,6 +1307,15 @@ def _is_connection_error(exc: Exception) -> bool:
     return False
 
 
+def _is_auth_error(exc: Exception) -> bool:
+    """Detect auth failures that should trigger provider-specific refresh."""
+    status = getattr(exc, "status_code", None)
+    if status == 401:
+        return True
+    err_lower = str(exc).lower()
+    return "error code: 401" in err_lower or "authenticationerror" in type(exc).__name__.lower()
+
+
 def _try_payment_fallback(
     failed_provider: str,
     task: str = None,
@@ -1348,6 +1473,13 @@ def _to_async_client(sync_client, model: str):
         return AsyncCodexAuxiliaryClient(sync_client), model
     if isinstance(sync_client, AnthropicAuxiliaryClient):
         return AsyncAnthropicAuxiliaryClient(sync_client), model
+    try:
+        from agent.gemini_native_adapter import GeminiNativeClient, AsyncGeminiNativeClient
+
+        if isinstance(sync_client, GeminiNativeClient):
+            return AsyncGeminiNativeClient(sync_client), model
+    except ImportError:
+        pass
     try:
         from agent.copilot_acp_client import CopilotACPClient
         if isinstance(sync_client, CopilotACPClient):
@@ -1359,15 +1491,15 @@ def _to_async_client(sync_client, model: str):
         "api_key": sync_client.api_key,
         "base_url": str(sync_client.base_url),
     }
-    base_lower = str(sync_client.base_url).lower()
-    if "openrouter" in base_lower:
+    sync_base_url = str(sync_client.base_url)
+    if base_url_host_matches(sync_base_url, "openrouter.ai"):
         async_kwargs["default_headers"] = dict(_OR_HEADERS)
-    elif "api.githubcopilot.com" in base_lower:
+    elif base_url_host_matches(sync_base_url, "api.githubcopilot.com"):
         from hermes_cli.models import copilot_default_headers
 
         async_kwargs["default_headers"] = copilot_default_headers()
-    elif "api.kimi.com" in base_lower:
-        async_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
+    elif base_url_host_matches(sync_base_url, "api.kimi.com"):
+        async_kwargs["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
     return AsyncOpenAI(**async_kwargs), model
 
 
@@ -1443,8 +1575,7 @@ def resolve_provider_client(
         # Auto-detect: api.openai.com + codex model name pattern
         if api_mode and api_mode != "codex_responses":
             return False  # explicit non-codex mode
-        normalized_base = (base_url_str or "").strip().lower()
-        if "api.openai.com" in normalized_base and "openrouter" not in normalized_base:
+        if base_url_hostname(base_url_str) == "api.openai.com":
             model_lower = (model_str or "").lower()
             if "codex" in model_lower:
                 return True
@@ -1492,7 +1623,13 @@ def resolve_provider_client(
 
     # ── Nous Portal (OAuth) ──────────────────────────────────────────
     if provider == "nous":
-        client, default = _try_nous()
+        # Detect vision tasks: either explicit model override from
+        # _PROVIDER_VISION_MODELS, or caller passed a known vision model.
+        _is_vision = (
+            model in _PROVIDER_VISION_MODELS.values()
+            or (model or "").strip().lower() == "mimo-v2-omni"
+        )
+        client, default = _try_nous(vision=_is_vision)
         if client is None:
             logger.warning("resolve_provider_client: nous requested "
                            "but Nous Portal not configured (run: hermes auth)")
@@ -1512,7 +1649,11 @@ def resolve_provider_client(
                                "but no Codex OAuth token found (run: hermes model)")
                 return None, None
             final_model = _normalize_resolved_model(model or _CODEX_AUX_MODEL, provider)
-            raw_client = OpenAI(api_key=codex_token, base_url=_CODEX_AUX_BASE_URL)
+            raw_client = OpenAI(
+                api_key=codex_token,
+                base_url=_CODEX_AUX_BASE_URL,
+                default_headers=_codex_cloudflare_headers(codex_token),
+            )
             return (raw_client, final_model)
         # Standard path: wrap in CodexAuxiliaryClient adapter
         client, default = _try_codex()
@@ -1544,9 +1685,9 @@ def resolve_provider_client(
                 provider,
             )
             extra = {}
-            if "api.kimi.com" in custom_base.lower():
-                extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
-            elif "api.githubcopilot.com" in custom_base.lower():
+            if base_url_host_matches(custom_base, "api.kimi.com"):
+                extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
+            elif base_url_host_matches(custom_base, "api.githubcopilot.com"):
                 from hermes_cli.models import copilot_default_headers
                 extra["default_headers"] = copilot_default_headers()
             client = OpenAI(api_key=custom_key, base_url=custom_base, **extra)
@@ -1640,11 +1781,20 @@ def resolve_provider_client(
         default_model = _API_KEY_PROVIDER_AUX_MODELS.get(provider, "")
         final_model = _normalize_resolved_model(model or default_model, provider)
 
+        if provider == "gemini":
+            from agent.gemini_native_adapter import GeminiNativeClient, is_native_gemini_base_url
+
+            if is_native_gemini_base_url(base_url):
+                client = GeminiNativeClient(api_key=api_key, base_url=base_url)
+                logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
+                return (_to_async_client(client, final_model) if async_mode
+                        else (client, final_model))
+
         # Provider-specific headers
         headers = {}
-        if "api.kimi.com" in base_url.lower():
-            headers["User-Agent"] = "KimiCLI/1.30.0"
-        elif "api.githubcopilot.com" in base_url.lower():
+        if base_url_host_matches(base_url, "api.kimi.com"):
+            headers["User-Agent"] = "claude-code/0.1.0"
+        elif base_url_host_matches(base_url, "api.githubcopilot.com"):
             from hermes_cli.models import copilot_default_headers
 
             headers.update(copilot_default_headers())
@@ -1875,24 +2025,35 @@ def resolve_vision_provider_client(
         #      _PROVIDER_VISION_MODELS provides per-provider vision model
         #      overrides when the provider has a dedicated multimodal model
         #      that differs from the chat model (e.g. xiaomi → mimo-v2-omni,
-        #      zai → glm-5v-turbo).
+        #      zai → glm-5v-turbo). Nous is the exception: it has a dedicated
+        #      strict vision backend with tier-aware defaults, so it must not
+        #      fall through to the user's text chat model here.
         #   2. OpenRouter  (vision-capable aggregator fallback)
         #   3. Nous Portal (vision-capable aggregator fallback)
         #   4. Stop
         main_provider = _read_main_provider()
         main_model = _read_main_model()
         if main_provider and main_provider not in ("auto", ""):
-            vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
-            rpc_client, rpc_model = resolve_provider_client(
-                main_provider, vision_model,
-                api_mode=resolved_api_mode)
-            if rpc_client is not None:
-                logger.info(
-                    "Vision auto-detect: using main provider %s (%s)",
-                    main_provider, rpc_model or vision_model,
-                )
-                return _finalize(
-                    main_provider, rpc_client, rpc_model or vision_model)
+            if main_provider == "nous":
+                sync_client, default_model = _resolve_strict_vision_backend(main_provider)
+                if sync_client is not None:
+                    logger.info(
+                        "Vision auto-detect: using main provider %s (%s)",
+                        main_provider, default_model or resolved_model or main_model,
+                    )
+                    return _finalize(main_provider, sync_client, default_model)
+            else:
+                vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
+                rpc_client, rpc_model = resolve_provider_client(
+                    main_provider, vision_model,
+                    api_mode=resolved_api_mode)
+                if rpc_client is not None:
+                    logger.info(
+                        "Vision auto-detect: using main provider %s (%s)",
+                        main_provider, rpc_model or vision_model,
+                    )
+                    return _finalize(
+                        main_provider, rpc_client, rpc_model or vision_model)
 
         # Fall back through aggregators (uses their dedicated vision model,
         # not the user's main model) when main provider has no client.
@@ -1939,7 +2100,7 @@ def auxiliary_max_tokens_param(value: int) -> dict:
     # Only use max_completion_tokens for direct OpenAI custom endpoints
     if (not or_key
             and _read_nous_auth() is None
-            and "api.openai.com" in custom_base.lower()):
+            and base_url_hostname(custom_base) == "api.openai.com"):
         return {"max_completion_tokens": value}
     return {"max_tokens": value}
 
@@ -1967,6 +2128,76 @@ _client_cache_lock = threading.Lock()
 _CLIENT_CACHE_MAX_SIZE = 64  # safety belt — evict oldest when exceeded
 
 
+def _client_cache_key(
+    provider: str,
+    *,
+    async_mode: bool,
+    base_url: Optional[str] = None,
+    api_key: Optional[str] = None,
+    api_mode: Optional[str] = None,
+    main_runtime: Optional[Dict[str, Any]] = None,
+) -> tuple:
+    runtime = _normalize_main_runtime(main_runtime)
+    runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
+    return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key)
+
+
+def _store_cached_client(cache_key: tuple, client: Any, default_model: Optional[str], *, bound_loop: Any = None) -> None:
+    with _client_cache_lock:
+        old_entry = _client_cache.get(cache_key)
+        if old_entry is not None and old_entry[0] is not client:
+            _force_close_async_httpx(old_entry[0])
+            try:
+                close_fn = getattr(old_entry[0], "close", None)
+                if callable(close_fn):
+                    close_fn()
+            except Exception:
+                pass
+        _client_cache[cache_key] = (client, default_model, bound_loop)
+
+
+def _refresh_nous_auxiliary_client(
+    *,
+    cache_provider: str,
+    model: Optional[str],
+    async_mode: bool,
+    base_url: Optional[str] = None,
+    api_key: Optional[str] = None,
+    api_mode: Optional[str] = None,
+    main_runtime: Optional[Dict[str, Any]] = None,
+) -> Tuple[Optional[Any], Optional[str]]:
+    """Refresh Nous runtime creds, rebuild the client, and replace the cache entry."""
+    runtime = _resolve_nous_runtime_api(force_refresh=True)
+    if runtime is None:
+        return None, model
+
+    fresh_key, fresh_base_url = runtime
+    sync_client = OpenAI(api_key=fresh_key, base_url=fresh_base_url)
+    final_model = model
+
+    current_loop = None
+    if async_mode:
+        try:
+            import asyncio as _aio
+            current_loop = _aio.get_event_loop()
+        except RuntimeError:
+            pass
+        client, final_model = _to_async_client(sync_client, final_model or "")
+    else:
+        client = sync_client
+
+    cache_key = _client_cache_key(
+        cache_provider,
+        async_mode=async_mode,
+        base_url=base_url,
+        api_key=api_key,
+        api_mode=api_mode,
+        main_runtime=main_runtime,
+    )
+    _store_cached_client(cache_key, client, final_model, bound_loop=current_loop)
+    return client, final_model
+
+
 def neuter_async_httpx_del() -> None:
     """Monkey-patch ``AsyncHttpxClientWrapper.__del__`` to be a no-op.
 
@@ -2068,7 +2299,7 @@ def cleanup_stale_async_clients() -> None:
 
 def _is_openrouter_client(client: Any) -> bool:
     for obj in (client, getattr(client, "_client", None), getattr(client, "client", None)):
-        if obj and "openrouter" in str(getattr(obj, "base_url", "") or "").lower():
+        if obj and base_url_host_matches(str(getattr(obj, "base_url", "") or ""), "openrouter.ai"):
             return True
     return False
 
@@ -2120,8 +2351,14 @@ def _get_cached_client(
         except RuntimeError:
             pass
     runtime = _normalize_main_runtime(main_runtime)
-    runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
-    cache_key = (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key)
+    cache_key = _client_cache_key(
+        provider,
+        async_mode=async_mode,
+        base_url=base_url,
+        api_key=api_key,
+        api_mode=api_mode,
+        main_runtime=main_runtime,
+    )
     with _client_cache_lock:
         if cache_key in _client_cache:
             cached_client, cached_default, cached_loop = _client_cache[cache_key]
@@ -2190,7 +2427,6 @@ def _resolve_task_provider_model(
     to "custom" and the task uses that direct endpoint. api_mode is one of
     "chat_completions", "codex_responses", or None (auto-detect).
     """
-    config = {}
     cfg_provider = None
     cfg_model = None
     cfg_base_url = None
@@ -2198,16 +2434,7 @@ def _resolve_task_provider_model(
     cfg_api_mode = None
 
     if task:
-        try:
-            from hermes_cli.config import load_config
-            config = load_config()
-        except ImportError:
-            config = {}
-
-        aux = config.get("auxiliary", {}) if isinstance(config, dict) else {}
-        task_config = aux.get(task, {}) if isinstance(aux, dict) else {}
-        if not isinstance(task_config, dict):
-            task_config = {}
+        task_config = _get_auxiliary_task_config(task)
         cfg_provider = str(task_config.get("provider", "")).strip() or None
         cfg_model = str(task_config.get("model", "")).strip() or None
         cfg_base_url = str(task_config.get("base_url", "")).strip() or None
@@ -2237,17 +2464,25 @@ def _resolve_task_provider_model(
 _DEFAULT_AUX_TIMEOUT = 30.0
 
 
-def _get_task_timeout(task: str, default: float = _DEFAULT_AUX_TIMEOUT) -> float:
-    """Read timeout from auxiliary.{task}.timeout in config, falling back to *default*."""
+def _get_auxiliary_task_config(task: str) -> Dict[str, Any]:
+    """Return the config dict for auxiliary.<task>, or {} when unavailable."""
     if not task:
-        return default
+        return {}
     try:
         from hermes_cli.config import load_config
         config = load_config()
     except ImportError:
-        return default
+        return {}
     aux = config.get("auxiliary", {}) if isinstance(config, dict) else {}
     task_config = aux.get(task, {}) if isinstance(aux, dict) else {}
+    return task_config if isinstance(task_config, dict) else {}
+
+
+def _get_task_timeout(task: str, default: float = _DEFAULT_AUX_TIMEOUT) -> float:
+    """Read timeout from auxiliary.{task}.timeout in config, falling back to *default*."""
+    if not task:
+        return default
+    task_config = _get_auxiliary_task_config(task)
     raw = task_config.get("timeout")
     if raw is not None:
         try:
@@ -2257,6 +2492,15 @@ def _get_task_timeout(task: str, default: float = _DEFAULT_AUX_TIMEOUT) -> float
     return default
 
 
+def _get_task_extra_body(task: str) -> Dict[str, Any]:
+    """Read auxiliary.<task>.extra_body and return a shallow copy when valid."""
+    task_config = _get_auxiliary_task_config(task)
+    raw = task_config.get("extra_body")
+    if isinstance(raw, dict):
+        return dict(raw)
+    return {}
+
+
 # ---------------------------------------------------------------------------
 # Anthropic-compatible endpoint detection + image block conversion
 # ---------------------------------------------------------------------------
@@ -2344,8 +2588,10 @@ def _build_call_kwargs(
         "timeout": timeout,
     }
 
-    fixed_temperature = _fixed_temperature_for_model(model)
-    if fixed_temperature is not None:
+    fixed_temperature = _fixed_temperature_for_model(model, base_url)
+    if fixed_temperature is OMIT_TEMPERATURE:
+        temperature = None  # strip — let server choose
+    elif fixed_temperature is not None:
         temperature = fixed_temperature
 
     # Opus 4.7+ rejects any non-default temperature/top_p/top_k — silently
@@ -2365,7 +2611,7 @@ def _build_call_kwargs(
         # Direct OpenAI api.openai.com with newer models needs max_completion_tokens.
         if provider == "custom":
             custom_base = base_url or _current_custom_base_url()
-            if "api.openai.com" in custom_base.lower():
+            if base_url_hostname(custom_base) == "api.openai.com":
                 kwargs["max_completion_tokens"] = max_tokens
             else:
                 kwargs["max_tokens"] = max_tokens
@@ -2457,6 +2703,8 @@ def call_llm(
     """
     resolved_provider, resolved_model, resolved_base_url, resolved_api_key, resolved_api_mode = _resolve_task_provider_model(
         task, provider, model, base_url, api_key)
+    effective_extra_body = _get_task_extra_body(task)
+    effective_extra_body.update(extra_body or {})
 
     if task == "vision":
         effective_provider, client, final_model = resolve_vision_provider_client(
@@ -2525,11 +2773,14 @@ def call_llm(
                      task, resolved_provider or "auto", final_model or "default",
                      f" at {_base_info}" if _base_info and "openrouter" not in _base_info else "")
 
+    # Pass the client's actual base_url (not just resolved_base_url) so
+    # endpoint-specific temperature overrides can distinguish
+    # api.moonshot.ai vs api.kimi.com/coding even on auto-detected routes.
     kwargs = _build_call_kwargs(
         resolved_provider, final_model, messages,
         temperature=temperature, max_tokens=max_tokens,
-        tools=tools, timeout=effective_timeout, extra_body=extra_body,
-        base_url=resolved_base_url)
+        tools=tools, timeout=effective_timeout, extra_body=effective_extra_body,
+        base_url=_base_info or resolved_base_url)
 
     # Convert image blocks for Anthropic-compatible endpoints (e.g. MiniMax)
     _client_base = str(getattr(client, "base_url", "") or "")
@@ -2555,6 +2806,29 @@ def call_llm(
                     raise
                 first_err = retry_err
 
+        # ── Nous auth refresh parity with main agent ──────────────────
+        client_is_nous = (
+            resolved_provider == "nous"
+            or base_url_host_matches(_base_info, "inference-api.nousresearch.com")
+        )
+        if _is_auth_error(first_err) and client_is_nous:
+            refreshed_client, refreshed_model = _refresh_nous_auxiliary_client(
+                cache_provider=resolved_provider or "nous",
+                model=final_model,
+                async_mode=False,
+                base_url=resolved_base_url,
+                api_key=resolved_api_key,
+                api_mode=resolved_api_mode,
+                main_runtime=main_runtime,
+            )
+            if refreshed_client is not None:
+                logger.info("Auxiliary %s: refreshed Nous runtime credentials after 401, retrying",
+                            task or "call")
+                if refreshed_model and refreshed_model != kwargs.get("model"):
+                    kwargs["model"] = refreshed_model
+                return _validate_llm_response(
+                    refreshed_client.chat.completions.create(**kwargs), task)
+
         # ── Payment / credit exhaustion fallback ──────────────────────
         # When the resolved provider returns 402 or a credit-related error,
         # try alternative providers instead of giving up.  This handles the
@@ -2583,7 +2857,8 @@ def call_llm(
                     fb_label, fb_model, messages,
                     temperature=temperature, max_tokens=max_tokens,
                     tools=tools, timeout=effective_timeout,
-                    extra_body=extra_body)
+                    extra_body=effective_extra_body,
+                    base_url=str(getattr(fb_client, "base_url", "") or ""))
                 return _validate_llm_response(
                     fb_client.chat.completions.create(**fb_kwargs), task)
         raise
@@ -2665,6 +2940,8 @@ async def async_call_llm(
     """
     resolved_provider, resolved_model, resolved_base_url, resolved_api_key, resolved_api_mode = _resolve_task_provider_model(
         task, provider, model, base_url, api_key)
+    effective_extra_body = _get_task_extra_body(task)
+    effective_extra_body.update(extra_body or {})
 
     if task == "vision":
         effective_provider, client, final_model = resolve_vision_provider_client(
@@ -2718,14 +2995,17 @@ async def async_call_llm(
 
     effective_timeout = timeout if timeout is not None else _get_task_timeout(task)
 
+    # Pass the client's actual base_url (not just resolved_base_url) so
+    # endpoint-specific temperature overrides can distinguish
+    # api.moonshot.ai vs api.kimi.com/coding even on auto-detected routes.
+    _client_base = str(getattr(client, "base_url", "") or "")
     kwargs = _build_call_kwargs(
         resolved_provider, final_model, messages,
         temperature=temperature, max_tokens=max_tokens,
-        tools=tools, timeout=effective_timeout, extra_body=extra_body,
-        base_url=resolved_base_url)
+        tools=tools, timeout=effective_timeout, extra_body=effective_extra_body,
+        base_url=_client_base or resolved_base_url)
 
     # Convert image blocks for Anthropic-compatible endpoints (e.g. MiniMax)
-    _client_base = str(getattr(client, "base_url", "") or "")
     if _is_anthropic_compat_endpoint(resolved_provider, _client_base):
         kwargs["messages"] = _convert_openai_images_to_anthropic(kwargs["messages"])
 
@@ -2747,6 +3027,28 @@ async def async_call_llm(
                     raise
                 first_err = retry_err
 
+        # ── Nous auth refresh parity with main agent ──────────────────
+        client_is_nous = (
+            resolved_provider == "nous"
+            or base_url_host_matches(_client_base, "inference-api.nousresearch.com")
+        )
+        if _is_auth_error(first_err) and client_is_nous:
+            refreshed_client, refreshed_model = _refresh_nous_auxiliary_client(
+                cache_provider=resolved_provider or "nous",
+                model=final_model,
+                async_mode=True,
+                base_url=resolved_base_url,
+                api_key=resolved_api_key,
+                api_mode=resolved_api_mode,
+            )
+            if refreshed_client is not None:
+                logger.info("Auxiliary %s (async): refreshed Nous runtime credentials after 401, retrying",
+                            task or "call")
+                if refreshed_model and refreshed_model != kwargs.get("model"):
+                    kwargs["model"] = refreshed_model
+                return _validate_llm_response(
+                    await refreshed_client.chat.completions.create(**kwargs), task)
+
         # ── Payment / connection fallback (mirrors sync call_llm) ─────
         should_fallback = _is_payment_error(first_err) or _is_connection_error(first_err)
         is_auto = resolved_provider in ("auto", "", None)
@@ -2761,7 +3063,8 @@ async def async_call_llm(
                     fb_label, fb_model, messages,
                     temperature=temperature, max_tokens=max_tokens,
                     tools=tools, timeout=effective_timeout,
-                    extra_body=extra_body)
+                    extra_body=effective_extra_body,
+                    base_url=str(getattr(fb_client, "base_url", "") or ""))
                 # Convert sync fallback client to async
                 async_fb, async_fb_model = _to_async_client(fb_client, fb_model or "")
                 if async_fb_model and async_fb_model != fb_kwargs.get("model"):
diff --git a/agent/codex_responses_adapter.py b/agent/codex_responses_adapter.py
new file mode 100644
index 0000000000..4d3e5590be
--- /dev/null
+++ b/agent/codex_responses_adapter.py
@@ -0,0 +1,813 @@
+"""Codex Responses API adapter.
+
+Pure format-conversion and normalization logic for the OpenAI Responses API
+(used by OpenAI Codex, xAI, GitHub Models, and other Responses-compatible endpoints).
+
+Extracted from run_agent.py to isolate Responses API-specific logic from the
+core agent loop. All functions are stateless — they operate on the data passed
+in and return transformed results.
+"""
+
+from __future__ import annotations
+
+import hashlib
+import json
+import logging
+import re
+import uuid
+from types import SimpleNamespace
+from typing import Any, Dict, List, Optional
+
+from agent.prompt_builder import DEFAULT_AGENT_IDENTITY
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Multimodal content helpers
+# ---------------------------------------------------------------------------
+
+def _chat_content_to_responses_parts(content: Any) -> List[Dict[str, Any]]:
+    """Convert chat-style multimodal content to Responses API input parts.
+
+    Input:  ``[{"type":"text"|"image_url", ...}]`` (native OpenAI Chat format)
+    Output: ``[{"type":"input_text"|"input_image", ...}]`` (Responses format)
+
+    Returns an empty list when ``content`` is not a list or contains no
+    recognized parts — callers fall back to the string path.
+    """
+    if not isinstance(content, list):
+        return []
+    converted: List[Dict[str, Any]] = []
+    for part in content:
+        if isinstance(part, str):
+            if part:
+                converted.append({"type": "input_text", "text": part})
+            continue
+        if not isinstance(part, dict):
+            continue
+        ptype = str(part.get("type") or "").strip().lower()
+        if ptype in {"text", "input_text", "output_text"}:
+            text = part.get("text")
+            if isinstance(text, str) and text:
+                converted.append({"type": "input_text", "text": text})
+            continue
+        if ptype in {"image_url", "input_image"}:
+            image_ref = part.get("image_url")
+            detail = part.get("detail")
+            if isinstance(image_ref, dict):
+                url = image_ref.get("url")
+                detail = image_ref.get("detail", detail)
+            else:
+                url = image_ref
+            if not isinstance(url, str) or not url:
+                continue
+            image_part: Dict[str, Any] = {"type": "input_image", "image_url": url}
+            if isinstance(detail, str) and detail.strip():
+                image_part["detail"] = detail.strip()
+            converted.append(image_part)
+    return converted
+
+
+def _summarize_user_message_for_log(content: Any) -> str:
+    """Return a short text summary of a user message for logging/trajectory.
+
+    Multimodal messages arrive as a list of ``{type:"text"|"image_url", ...}``
+    parts from the API server.  Logging, spinner previews, and trajectory
+    files all want a plain string — this helper extracts the first chunk of
+    text and notes any attached images.  Returns an empty string for empty
+    lists and ``str(content)`` for unexpected scalar types.
+    """
+    if content is None:
+        return ""
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        text_bits: List[str] = []
+        image_count = 0
+        for part in content:
+            if isinstance(part, str):
+                if part:
+                    text_bits.append(part)
+                continue
+            if not isinstance(part, dict):
+                continue
+            ptype = str(part.get("type") or "").strip().lower()
+            if ptype in {"text", "input_text", "output_text"}:
+                text = part.get("text")
+                if isinstance(text, str) and text:
+                    text_bits.append(text)
+            elif ptype in {"image_url", "input_image"}:
+                image_count += 1
+        summary = " ".join(text_bits).strip()
+        if image_count:
+            note = f"[{image_count} image{'s' if image_count != 1 else ''}]"
+            summary = f"{note} {summary}" if summary else note
+        return summary
+    try:
+        return str(content)
+    except Exception:
+        return ""
+
+
+# ---------------------------------------------------------------------------
+# ID helpers
+# ---------------------------------------------------------------------------
+
+def _deterministic_call_id(fn_name: str, arguments: str, index: int = 0) -> str:
+    """Generate a deterministic call_id from tool call content.
+
+    Used as a fallback when the API doesn't provide a call_id.
+    Deterministic IDs prevent cache invalidation — random UUIDs would
+    make every API call's prefix unique, breaking OpenAI's prompt cache.
+    """
+    seed = f"{fn_name}:{arguments}:{index}"
+    digest = hashlib.sha256(seed.encode("utf-8", errors="replace")).hexdigest()[:12]
+    return f"call_{digest}"
+
+
+def _split_responses_tool_id(raw_id: Any) -> tuple[Optional[str], Optional[str]]:
+    """Split a stored tool id into (call_id, response_item_id)."""
+    if not isinstance(raw_id, str):
+        return None, None
+    value = raw_id.strip()
+    if not value:
+        return None, None
+    if "|" in value:
+        call_id, response_item_id = value.split("|", 1)
+        call_id = call_id.strip() or None
+        response_item_id = response_item_id.strip() or None
+        return call_id, response_item_id
+    if value.startswith("fc_"):
+        return None, value
+    return value, None
+
+
+def _derive_responses_function_call_id(
+    call_id: str,
+    response_item_id: Optional[str] = None,
+) -> str:
+    """Build a valid Responses `function_call.id` (must start with `fc_`)."""
+    if isinstance(response_item_id, str):
+        candidate = response_item_id.strip()
+        if candidate.startswith("fc_"):
+            return candidate
+
+    source = (call_id or "").strip()
+    if source.startswith("fc_"):
+        return source
+    if source.startswith("call_") and len(source) > len("call_"):
+        return f"fc_{source[len('call_'):]}"
+
+    sanitized = re.sub(r"[^A-Za-z0-9_-]", "", source)
+    if sanitized.startswith("fc_"):
+        return sanitized
+    if sanitized.startswith("call_") and len(sanitized) > len("call_"):
+        return f"fc_{sanitized[len('call_'):]}"
+    if sanitized:
+        return f"fc_{sanitized[:48]}"
+
+    seed = source or str(response_item_id or "") or uuid.uuid4().hex
+    digest = hashlib.sha1(seed.encode("utf-8")).hexdigest()[:24]
+    return f"fc_{digest}"
+
+
+# ---------------------------------------------------------------------------
+# Schema conversion
+# ---------------------------------------------------------------------------
+
+def _responses_tools(tools: Optional[List[Dict[str, Any]]] = None) -> Optional[List[Dict[str, Any]]]:
+    """Convert chat-completions tool schemas to Responses function-tool schemas."""
+    if not tools:
+        return None
+
+    converted: List[Dict[str, Any]] = []
+    for item in tools:
+        fn = item.get("function", {}) if isinstance(item, dict) else {}
+        name = fn.get("name")
+        if not isinstance(name, str) or not name.strip():
+            continue
+        converted.append({
+            "type": "function",
+            "name": name,
+            "description": fn.get("description", ""),
+            "strict": False,
+            "parameters": fn.get("parameters", {"type": "object", "properties": {}}),
+        })
+    return converted or None
+
+
+# ---------------------------------------------------------------------------
+# Message format conversion
+# ---------------------------------------------------------------------------
+
+def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """Convert internal chat-style messages to Responses input items."""
+    items: List[Dict[str, Any]] = []
+    seen_item_ids: set = set()
+
+    for msg in messages:
+        if not isinstance(msg, dict):
+            continue
+        role = msg.get("role")
+        if role == "system":
+            continue
+
+        if role in {"user", "assistant"}:
+            content = msg.get("content", "")
+            if isinstance(content, list):
+                content_parts = _chat_content_to_responses_parts(content)
+                content_text = "".join(
+                    p.get("text", "") for p in content_parts if p.get("type") == "input_text"
+                )
+            else:
+                content_parts = []
+                content_text = str(content) if content is not None else ""
+
+            if role == "assistant":
+                # Replay encrypted reasoning items from previous turns
+                # so the API can maintain coherent reasoning chains.
+                codex_reasoning = msg.get("codex_reasoning_items")
+                has_codex_reasoning = False
+                if isinstance(codex_reasoning, list):
+                    for ri in codex_reasoning:
+                        if isinstance(ri, dict) and ri.get("encrypted_content"):
+                            item_id = ri.get("id")
+                            if item_id and item_id in seen_item_ids:
+                                continue
+                            # Strip the "id" field — with store=False the
+                            # Responses API cannot look up items by ID and
+                            # returns 404.  The encrypted_content blob is
+                            # self-contained for reasoning chain continuity.
+                            replay_item = {k: v for k, v in ri.items() if k != "id"}
+                            items.append(replay_item)
+                            if item_id:
+                                seen_item_ids.add(item_id)
+                            has_codex_reasoning = True
+
+                if content_parts:
+                    items.append({"role": "assistant", "content": content_parts})
+                elif content_text.strip():
+                    items.append({"role": "assistant", "content": content_text})
+                elif has_codex_reasoning:
+                    # The Responses API requires a following item after each
+                    # reasoning item (otherwise: missing_following_item error).
+                    # When the assistant produced only reasoning with no visible
+                    # content, emit an empty assistant message as the required
+                    # following item.
+                    items.append({"role": "assistant", "content": ""})
+
+                tool_calls = msg.get("tool_calls")
+                if isinstance(tool_calls, list):
+                    for tc in tool_calls:
+                        if not isinstance(tc, dict):
+                            continue
+                        fn = tc.get("function", {})
+                        fn_name = fn.get("name")
+                        if not isinstance(fn_name, str) or not fn_name.strip():
+                            continue
+
+                        embedded_call_id, embedded_response_item_id = _split_responses_tool_id(
+                            tc.get("id")
+                        )
+                        call_id = tc.get("call_id")
+                        if not isinstance(call_id, str) or not call_id.strip():
+                            call_id = embedded_call_id
+                        if not isinstance(call_id, str) or not call_id.strip():
+                            if (
+                                isinstance(embedded_response_item_id, str)
+                                and embedded_response_item_id.startswith("fc_")
+                                and len(embedded_response_item_id) > len("fc_")
+                            ):
+                                call_id = f"call_{embedded_response_item_id[len('fc_'):]}"
+                            else:
+                                _raw_args = str(fn.get("arguments", "{}"))
+                                call_id = _deterministic_call_id(fn_name, _raw_args, len(items))
+                        call_id = call_id.strip()
+
+                        arguments = fn.get("arguments", "{}")
+                        if isinstance(arguments, dict):
+                            arguments = json.dumps(arguments, ensure_ascii=False)
+                        elif not isinstance(arguments, str):
+                            arguments = str(arguments)
+                        arguments = arguments.strip() or "{}"
+
+                        items.append({
+                            "type": "function_call",
+                            "call_id": call_id,
+                            "name": fn_name,
+                            "arguments": arguments,
+                        })
+                continue
+
+            # Non-assistant (user) role: emit multimodal parts when present,
+            # otherwise fall back to the text payload.
+            if content_parts:
+                items.append({"role": role, "content": content_parts})
+            else:
+                items.append({"role": role, "content": content_text})
+            continue
+
+        if role == "tool":
+            raw_tool_call_id = msg.get("tool_call_id")
+            call_id, _ = _split_responses_tool_id(raw_tool_call_id)
+            if not isinstance(call_id, str) or not call_id.strip():
+                if isinstance(raw_tool_call_id, str) and raw_tool_call_id.strip():
+                    call_id = raw_tool_call_id.strip()
+            if not isinstance(call_id, str) or not call_id.strip():
+                continue
+            items.append({
+                "type": "function_call_output",
+                "call_id": call_id,
+                "output": str(msg.get("content", "") or ""),
+            })
+
+    return items
+
+
+# ---------------------------------------------------------------------------
+# Input preflight / validation
+# ---------------------------------------------------------------------------
+
+def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
+    if not isinstance(raw_items, list):
+        raise ValueError("Codex Responses input must be a list of input items.")
+
+    normalized: List[Dict[str, Any]] = []
+    seen_ids: set = set()
+    for idx, item in enumerate(raw_items):
+        if not isinstance(item, dict):
+            raise ValueError(f"Codex Responses input[{idx}] must be an object.")
+
+        item_type = item.get("type")
+        if item_type == "function_call":
+            call_id = item.get("call_id")
+            name = item.get("name")
+            if not isinstance(call_id, str) or not call_id.strip():
+                raise ValueError(f"Codex Responses input[{idx}] function_call is missing call_id.")
+            if not isinstance(name, str) or not name.strip():
+                raise ValueError(f"Codex Responses input[{idx}] function_call is missing name.")
+
+            arguments = item.get("arguments", "{}")
+            if isinstance(arguments, dict):
+                arguments = json.dumps(arguments, ensure_ascii=False)
+            elif not isinstance(arguments, str):
+                arguments = str(arguments)
+            arguments = arguments.strip() or "{}"
+
+            normalized.append(
+                {
+                    "type": "function_call",
+                    "call_id": call_id.strip(),
+                    "name": name.strip(),
+                    "arguments": arguments,
+                }
+            )
+            continue
+
+        if item_type == "function_call_output":
+            call_id = item.get("call_id")
+            if not isinstance(call_id, str) or not call_id.strip():
+                raise ValueError(f"Codex Responses input[{idx}] function_call_output is missing call_id.")
+            output = item.get("output", "")
+            if output is None:
+                output = ""
+            if not isinstance(output, str):
+                output = str(output)
+
+            normalized.append(
+                {
+                    "type": "function_call_output",
+                    "call_id": call_id.strip(),
+                    "output": output,
+                }
+            )
+            continue
+
+        if item_type == "reasoning":
+            encrypted = item.get("encrypted_content")
+            if isinstance(encrypted, str) and encrypted:
+                item_id = item.get("id")
+                if isinstance(item_id, str) and item_id:
+                    if item_id in seen_ids:
+                        continue
+                    seen_ids.add(item_id)
+                reasoning_item = {"type": "reasoning", "encrypted_content": encrypted}
+                # Do NOT include the "id" in the outgoing item — with
+                # store=False (our default) the API tries to resolve the
+                # id server-side and returns 404.  The id is still used
+                # above for local deduplication via seen_ids.
+                summary = item.get("summary")
+                if isinstance(summary, list):
+                    reasoning_item["summary"] = summary
+                else:
+                    reasoning_item["summary"] = []
+                normalized.append(reasoning_item)
+            continue
+
+        role = item.get("role")
+        if role in {"user", "assistant"}:
+            content = item.get("content", "")
+            if content is None:
+                content = ""
+            if isinstance(content, list):
+                # Multimodal content from ``_chat_messages_to_responses_input``
+                # is already in Responses format (``input_text`` / ``input_image``).
+                # Validate each part and pass through.
+                validated: List[Dict[str, Any]] = []
+                for part_idx, part in enumerate(content):
+                    if isinstance(part, str):
+                        if part:
+                            validated.append({"type": "input_text", "text": part})
+                        continue
+                    if not isinstance(part, dict):
+                        raise ValueError(
+                            f"Codex Responses input[{idx}].content[{part_idx}] must be an object or string."
+                        )
+                    ptype = str(part.get("type") or "").strip().lower()
+                    if ptype in {"input_text", "text", "output_text"}:
+                        text = part.get("text", "")
+                        if not isinstance(text, str):
+                            text = str(text or "")
+                        validated.append({"type": "input_text", "text": text})
+                    elif ptype in {"input_image", "image_url"}:
+                        image_ref = part.get("image_url", "")
+                        detail = part.get("detail")
+                        if isinstance(image_ref, dict):
+                            url = image_ref.get("url", "")
+                            detail = image_ref.get("detail", detail)
+                        else:
+                            url = image_ref
+                        if not isinstance(url, str):
+                            url = str(url or "")
+                        image_part: Dict[str, Any] = {"type": "input_image", "image_url": url}
+                        if isinstance(detail, str) and detail.strip():
+                            image_part["detail"] = detail.strip()
+                        validated.append(image_part)
+                    else:
+                        raise ValueError(
+                            f"Codex Responses input[{idx}].content[{part_idx}] has unsupported type {part.get('type')!r}."
+                        )
+                normalized.append({"role": role, "content": validated})
+                continue
+            if not isinstance(content, str):
+                content = str(content)
+
+            normalized.append({"role": role, "content": content})
+            continue
+
+        raise ValueError(
+            f"Codex Responses input[{idx}] has unsupported item shape (type={item_type!r}, role={role!r})."
+        )
+
+    return normalized
+
+
+def _preflight_codex_api_kwargs(
+    api_kwargs: Any,
+    *,
+    allow_stream: bool = False,
+) -> Dict[str, Any]:
+    if not isinstance(api_kwargs, dict):
+        raise ValueError("Codex Responses request must be a dict.")
+
+    required = {"model", "instructions", "input"}
+    missing = [key for key in required if key not in api_kwargs]
+    if missing:
+        raise ValueError(f"Codex Responses request missing required field(s): {', '.join(sorted(missing))}.")
+
+    model = api_kwargs.get("model")
+    if not isinstance(model, str) or not model.strip():
+        raise ValueError("Codex Responses request 'model' must be a non-empty string.")
+    model = model.strip()
+
+    instructions = api_kwargs.get("instructions")
+    if instructions is None:
+        instructions = ""
+    if not isinstance(instructions, str):
+        instructions = str(instructions)
+    instructions = instructions.strip() or DEFAULT_AGENT_IDENTITY
+
+    normalized_input = _preflight_codex_input_items(api_kwargs.get("input"))
+
+    tools = api_kwargs.get("tools")
+    normalized_tools = None
+    if tools is not None:
+        if not isinstance(tools, list):
+            raise ValueError("Codex Responses request 'tools' must be a list when provided.")
+        normalized_tools = []
+        for idx, tool in enumerate(tools):
+            if not isinstance(tool, dict):
+                raise ValueError(f"Codex Responses tools[{idx}] must be an object.")
+            if tool.get("type") != "function":
+                raise ValueError(f"Codex Responses tools[{idx}] has unsupported type {tool.get('type')!r}.")
+
+            name = tool.get("name")
+            parameters = tool.get("parameters")
+            if not isinstance(name, str) or not name.strip():
+                raise ValueError(f"Codex Responses tools[{idx}] is missing a valid name.")
+            if not isinstance(parameters, dict):
+                raise ValueError(f"Codex Responses tools[{idx}] is missing valid parameters.")
+
+            description = tool.get("description", "")
+            if description is None:
+                description = ""
+            if not isinstance(description, str):
+                description = str(description)
+
+            strict = tool.get("strict", False)
+            if not isinstance(strict, bool):
+                strict = bool(strict)
+
+            normalized_tools.append(
+                {
+                    "type": "function",
+                    "name": name.strip(),
+                    "description": description,
+                    "strict": strict,
+                    "parameters": parameters,
+                }
+            )
+
+    store = api_kwargs.get("store", False)
+    if store is not False:
+        raise ValueError("Codex Responses contract requires 'store' to be false.")
+
+    allowed_keys = {
+        "model", "instructions", "input", "tools", "store",
+        "reasoning", "include", "max_output_tokens", "temperature",
+        "tool_choice", "parallel_tool_calls", "prompt_cache_key", "service_tier",
+        "extra_headers",
+    }
+    normalized: Dict[str, Any] = {
+        "model": model,
+        "instructions": instructions,
+        "input": normalized_input,
+        "store": False,
+    }
+    if normalized_tools is not None:
+        normalized["tools"] = normalized_tools
+
+    # Pass through reasoning config
+    reasoning = api_kwargs.get("reasoning")
+    if isinstance(reasoning, dict):
+        normalized["reasoning"] = reasoning
+    include = api_kwargs.get("include")
+    if isinstance(include, list):
+        normalized["include"] = include
+    service_tier = api_kwargs.get("service_tier")
+    if isinstance(service_tier, str) and service_tier.strip():
+        normalized["service_tier"] = service_tier.strip()
+
+    # Pass through max_output_tokens and temperature
+    max_output_tokens = api_kwargs.get("max_output_tokens")
+    if isinstance(max_output_tokens, (int, float)) and max_output_tokens > 0:
+        normalized["max_output_tokens"] = int(max_output_tokens)
+    temperature = api_kwargs.get("temperature")
+    if isinstance(temperature, (int, float)):
+        normalized["temperature"] = float(temperature)
+
+    # Pass through tool_choice, parallel_tool_calls, prompt_cache_key
+    for passthrough_key in ("tool_choice", "parallel_tool_calls", "prompt_cache_key"):
+        val = api_kwargs.get(passthrough_key)
+        if val is not None:
+            normalized[passthrough_key] = val
+
+    extra_headers = api_kwargs.get("extra_headers")
+    if extra_headers is not None:
+        if not isinstance(extra_headers, dict):
+            raise ValueError("Codex Responses request 'extra_headers' must be an object.")
+        normalized_headers: Dict[str, str] = {}
+        for key, value in extra_headers.items():
+            if not isinstance(key, str) or not key.strip():
+                raise ValueError("Codex Responses request 'extra_headers' keys must be non-empty strings.")
+            if value is None:
+                continue
+            normalized_headers[key.strip()] = str(value)
+        if normalized_headers:
+            normalized["extra_headers"] = normalized_headers
+
+    if allow_stream:
+        stream = api_kwargs.get("stream")
+        if stream is not None and stream is not True:
+            raise ValueError("Codex Responses 'stream' must be true when set.")
+        if stream is True:
+            normalized["stream"] = True
+        allowed_keys.add("stream")
+    elif "stream" in api_kwargs:
+        raise ValueError("Codex Responses stream flag is only allowed in fallback streaming requests.")
+
+    unexpected = sorted(key for key in api_kwargs if key not in allowed_keys)
+    if unexpected:
+        raise ValueError(
+            f"Codex Responses request has unsupported field(s): {', '.join(unexpected)}."
+        )
+
+    return normalized
+
+
+# ---------------------------------------------------------------------------
+# Response extraction helpers
+# ---------------------------------------------------------------------------
+
+def _extract_responses_message_text(item: Any) -> str:
+    """Extract assistant text from a Responses message output item."""
+    content = getattr(item, "content", None)
+    if not isinstance(content, list):
+        return ""
+
+    chunks: List[str] = []
+    for part in content:
+        ptype = getattr(part, "type", None)
+        if ptype not in {"output_text", "text"}:
+            continue
+        text = getattr(part, "text", None)
+        if isinstance(text, str) and text:
+            chunks.append(text)
+    return "".join(chunks).strip()
+
+
+def _extract_responses_reasoning_text(item: Any) -> str:
+    """Extract a compact reasoning text from a Responses reasoning item."""
+    summary = getattr(item, "summary", None)
+    if isinstance(summary, list):
+        chunks: List[str] = []
+        for part in summary:
+            text = getattr(part, "text", None)
+            if isinstance(text, str) and text:
+                chunks.append(text)
+        if chunks:
+            return "\n".join(chunks).strip()
+    text = getattr(item, "text", None)
+    if isinstance(text, str) and text:
+        return text.strip()
+    return ""
+
+
+# ---------------------------------------------------------------------------
+# Full response normalization
+# ---------------------------------------------------------------------------
+
+def _normalize_codex_response(response: Any) -> tuple[Any, str]:
+    """Normalize a Responses API object to an assistant_message-like object."""
+    output = getattr(response, "output", None)
+    if not isinstance(output, list) or not output:
+        # The Codex backend can return empty output when the answer was
+        # delivered entirely via stream events. Check output_text as a
+        # last-resort fallback before raising.
+        out_text = getattr(response, "output_text", None)
+        if isinstance(out_text, str) and out_text.strip():
+            logger.debug(
+                "Codex response has empty output but output_text is present (%d chars); "
+                "synthesizing output item.", len(out_text.strip()),
+            )
+            output = [SimpleNamespace(
+                type="message", role="assistant", status="completed",
+                content=[SimpleNamespace(type="output_text", text=out_text.strip())],
+            )]
+            response.output = output
+        else:
+            raise RuntimeError("Responses API returned no output items")
+
+    response_status = getattr(response, "status", None)
+    if isinstance(response_status, str):
+        response_status = response_status.strip().lower()
+    else:
+        response_status = None
+
+    if response_status in {"failed", "cancelled"}:
+        error_obj = getattr(response, "error", None)
+        if isinstance(error_obj, dict):
+            error_msg = error_obj.get("message") or str(error_obj)
+        else:
+            error_msg = str(error_obj) if error_obj else f"Responses API returned status '{response_status}'"
+        raise RuntimeError(error_msg)
+
+    content_parts: List[str] = []
+    reasoning_parts: List[str] = []
+    reasoning_items_raw: List[Dict[str, Any]] = []
+    tool_calls: List[Any] = []
+    has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"}
+    saw_commentary_phase = False
+    saw_final_answer_phase = False
+
+    for item in output:
+        item_type = getattr(item, "type", None)
+        item_status = getattr(item, "status", None)
+        if isinstance(item_status, str):
+            item_status = item_status.strip().lower()
+        else:
+            item_status = None
+
+        if item_status in {"queued", "in_progress", "incomplete"}:
+            has_incomplete_items = True
+
+        if item_type == "message":
+            item_phase = getattr(item, "phase", None)
+            if isinstance(item_phase, str):
+                normalized_phase = item_phase.strip().lower()
+                if normalized_phase in {"commentary", "analysis"}:
+                    saw_commentary_phase = True
+                elif normalized_phase in {"final_answer", "final"}:
+                    saw_final_answer_phase = True
+            message_text = _extract_responses_message_text(item)
+            if message_text:
+                content_parts.append(message_text)
+        elif item_type == "reasoning":
+            reasoning_text = _extract_responses_reasoning_text(item)
+            if reasoning_text:
+                reasoning_parts.append(reasoning_text)
+            # Capture the full reasoning item for multi-turn continuity.
+            # encrypted_content is an opaque blob the API needs back on
+            # subsequent turns to maintain coherent reasoning chains.
+            encrypted = getattr(item, "encrypted_content", None)
+            if isinstance(encrypted, str) and encrypted:
+                raw_item = {"type": "reasoning", "encrypted_content": encrypted}
+                item_id = getattr(item, "id", None)
+                if isinstance(item_id, str) and item_id:
+                    raw_item["id"] = item_id
+                # Capture summary — required by the API when replaying reasoning items
+                summary = getattr(item, "summary", None)
+                if isinstance(summary, list):
+                    raw_summary = []
+                    for part in summary:
+                        text = getattr(part, "text", None)
+                        if isinstance(text, str):
+                            raw_summary.append({"type": "summary_text", "text": text})
+                    raw_item["summary"] = raw_summary
+                reasoning_items_raw.append(raw_item)
+        elif item_type == "function_call":
+            if item_status in {"queued", "in_progress", "incomplete"}:
+                continue
+            fn_name = getattr(item, "name", "") or ""
+            arguments = getattr(item, "arguments", "{}")
+            if not isinstance(arguments, str):
+                arguments = json.dumps(arguments, ensure_ascii=False)
+            raw_call_id = getattr(item, "call_id", None)
+            raw_item_id = getattr(item, "id", None)
+            embedded_call_id, _ = _split_responses_tool_id(raw_item_id)
+            call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id
+            if not isinstance(call_id, str) or not call_id.strip():
+                call_id = _deterministic_call_id(fn_name, arguments, len(tool_calls))
+            call_id = call_id.strip()
+            response_item_id = raw_item_id if isinstance(raw_item_id, str) else None
+            response_item_id = _derive_responses_function_call_id(call_id, response_item_id)
+            tool_calls.append(SimpleNamespace(
+                id=call_id,
+                call_id=call_id,
+                response_item_id=response_item_id,
+                type="function",
+                function=SimpleNamespace(name=fn_name, arguments=arguments),
+            ))
+        elif item_type == "custom_tool_call":
+            fn_name = getattr(item, "name", "") or ""
+            arguments = getattr(item, "input", "{}")
+            if not isinstance(arguments, str):
+                arguments = json.dumps(arguments, ensure_ascii=False)
+            raw_call_id = getattr(item, "call_id", None)
+            raw_item_id = getattr(item, "id", None)
+            embedded_call_id, _ = _split_responses_tool_id(raw_item_id)
+            call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id
+            if not isinstance(call_id, str) or not call_id.strip():
+                call_id = _deterministic_call_id(fn_name, arguments, len(tool_calls))
+            call_id = call_id.strip()
+            response_item_id = raw_item_id if isinstance(raw_item_id, str) else None
+            response_item_id = _derive_responses_function_call_id(call_id, response_item_id)
+            tool_calls.append(SimpleNamespace(
+                id=call_id,
+                call_id=call_id,
+                response_item_id=response_item_id,
+                type="function",
+                function=SimpleNamespace(name=fn_name, arguments=arguments),
+            ))
+
+    final_text = "\n".join([p for p in content_parts if p]).strip()
+    if not final_text and hasattr(response, "output_text"):
+        out_text = getattr(response, "output_text", "")
+        if isinstance(out_text, str):
+            final_text = out_text.strip()
+
+    assistant_message = SimpleNamespace(
+        content=final_text,
+        tool_calls=tool_calls,
+        reasoning="\n\n".join(reasoning_parts).strip() if reasoning_parts else None,
+        reasoning_content=None,
+        reasoning_details=None,
+        codex_reasoning_items=reasoning_items_raw or None,
+    )
+
+    if tool_calls:
+        finish_reason = "tool_calls"
+    elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase):
+        finish_reason = "incomplete"
+    elif reasoning_items_raw and not final_text:
+        # Response contains only reasoning (encrypted thinking state) with
+        # no visible content or tool calls.  The model is still thinking and
+        # needs another turn to produce the actual answer.  Marking this as
+        # "stop" would send it into the empty-content retry loop which burns
+        # 3 retries then fails — treat it as incomplete instead so the Codex
+        # continuation path handles it correctly.
+        finish_reason = "incomplete"
+    else:
+        finish_reason = "stop"
+    return assistant_message, finish_reason
diff --git a/agent/context_compressor.py b/agent/context_compressor.py
index ae8c2c0bd3..254ac0ac5e 100644
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -31,6 +31,7 @@ from agent.model_metadata import (
     get_model_context_length,
     estimate_messages_tokens_rough,
 )
+from agent.redact import redact_sensitive_text
 
 logger = logging.getLogger(__name__)
 
@@ -550,11 +551,15 @@ class ContextCompressor(ContextEngine):
         Includes tool call arguments and result content (up to
         ``_CONTENT_MAX`` chars per message) so the summarizer can preserve
         specific details like file paths, commands, and outputs.
+
+        All content is redacted before serialization to prevent secrets
+        (API keys, tokens, passwords) from leaking into the summary that
+        gets sent to the auxiliary model and persisted across compactions.
         """
         parts = []
         for msg in turns:
             role = msg.get("role", "unknown")
-            content = msg.get("content") or ""
+            content = redact_sensitive_text(msg.get("content") or "")
 
             # Tool results: keep enough content for the summarizer
             if role == "tool":
@@ -575,7 +580,7 @@ class ContextCompressor(ContextEngine):
                         if isinstance(tc, dict):
                             fn = tc.get("function", {})
                             name = fn.get("name", "?")
-                            args = fn.get("arguments", "")
+                            args = redact_sensitive_text(fn.get("arguments", ""))
                             # Truncate long arguments but keep enough for context
                             if len(args) > self._TOOL_ARGS_MAX:
                                 args = args[:self._TOOL_ARGS_HEAD] + "..."
@@ -633,7 +638,13 @@ class ContextCompressor(ContextEngine):
             "assistant that continues the conversation. "
             "Do NOT respond to any questions or requests in the conversation — "
             "only output the structured summary. "
-            "Do NOT include any preamble, greeting, or prefix."
+            "Do NOT include any preamble, greeting, or prefix. "
+            "Write the summary in the same language the user was using in the "
+            "conversation — do not translate or switch to English. "
+            "NEVER include API keys, tokens, passwords, secrets, credentials, "
+            "or connection strings in the summary — replace any that appear "
+            "with [REDACTED]. Note that the user had credentials present, but "
+            "do not preserve their values."
         )
 
         # Shared structured template (used by both paths).
@@ -690,7 +701,7 @@ Be specific with file paths, commands, line numbers, and results.]
 [What remains to be done — framed as context, not instructions]
 
 ## Critical Context
-[Any specific values, error messages, configuration details, or data that would be lost without explicit preservation]
+[Any specific values, error messages, configuration details, or data that would be lost without explicit preservation. NEVER include API keys, tokens, passwords, or credentials — write [REDACTED] instead.]
 
 Target ~{summary_budget} tokens. Be CONCRETE — include file paths, command outputs, error messages, line numbers, and specific values. Avoid vague descriptions like "made some changes" — say exactly what changed.
 
@@ -730,7 +741,7 @@ Use this exact structure:
             prompt += f"""
 
 FOCUS TOPIC: "{focus_topic}"
-The user has requested that this compaction PRIORITISE preserving all information related to the focus topic above. For content related to "{focus_topic}", include full detail — exact values, file paths, command outputs, error messages, and decisions. For content NOT related to the focus topic, summarise more aggressively (brief one-liners or omit if truly irrelevant). The focus topic sections should receive roughly 60-70% of the summary token budget."""
+The user has requested that this compaction PRIORITISE preserving all information related to the focus topic above. For content related to "{focus_topic}", include full detail — exact values, file paths, command outputs, error messages, and decisions. For content NOT related to the focus topic, summarise more aggressively (brief one-liners or omit if truly irrelevant). The focus topic sections should receive roughly 60-70% of the summary token budget. Even for the focus topic, NEVER preserve API keys, tokens, passwords, or credentials — use [REDACTED]."""
 
         try:
             call_kwargs = {
@@ -753,7 +764,9 @@ The user has requested that this compaction PRIORITISE preserving all informatio
             # Handle cases where content is not a string (e.g., dict from llama.cpp)
             if not isinstance(content, str):
                 content = str(content) if content else ""
-            summary = content.strip()
+            # Redact the summary output as well — the summarizer LLM may
+            # ignore prompt instructions and echo back secrets verbatim.
+            summary = redact_sensitive_text(content.strip())
             # Store for iterative updates on next compaction
             self._previous_summary = summary
             self._summary_failure_cooldown_until = 0.0
@@ -794,7 +807,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                 )
                 self.summary_model = ""  # empty = use main model
                 self._summary_failure_cooldown_until = 0.0  # no cooldown
-                return self._generate_summary(messages, summary_budget)  # retry immediately
+                return self._generate_summary(turns_to_summarize)  # retry immediately
 
             # Transient errors (timeout, rate limit, network) — shorter cooldown
             _transient_cooldown = 60
diff --git a/agent/context_references.py b/agent/context_references.py
index 7ecb90c497..50a33a1d75 100644
--- a/agent/context_references.py
+++ b/agent/context_references.py
@@ -483,9 +483,7 @@ def _rg_files(path: Path, cwd: Path, limit: int) -> list[Path] | None:
             text=True,
             timeout=10,
         )
-    except FileNotFoundError:
-        return None
-    except subprocess.TimeoutExpired:
+    except (FileNotFoundError, OSError, subprocess.TimeoutExpired):
         return None
     if result.returncode != 0:
         return None
diff --git a/agent/copilot_acp_client.py b/agent/copilot_acp_client.py
index 031c58d705..783f949567 100644
--- a/agent/copilot_acp_client.py
+++ b/agent/copilot_acp_client.py
@@ -21,6 +21,9 @@ from pathlib import Path
 from types import SimpleNamespace
 from typing import Any
 
+from agent.file_safety import get_read_block_error, is_write_denied
+from agent.redact import redact_sensitive_text
+
 ACP_MARKER_BASE_URL = "acp://copilot"
 _DEFAULT_TIMEOUT_SECONDS = 900.0
 
@@ -54,6 +57,18 @@ def _jsonrpc_error(message_id: Any, code: int, message: str) -> dict[str, Any]:
     }
 
 
+def _permission_denied(message_id: Any) -> dict[str, Any]:
+    return {
+        "jsonrpc": "2.0",
+        "id": message_id,
+        "result": {
+            "outcome": {
+                "outcome": "cancelled",
+            }
+        },
+    }
+
+
 def _format_messages_as_prompt(
     messages: list[dict[str, Any]],
     model: str | None = None,
@@ -386,6 +401,8 @@ class CopilotACPClient:
         stderr_tail: deque[str] = deque(maxlen=40)
 
         def _stdout_reader() -> None:
+            if proc.stdout is None:
+                return
             for line in proc.stdout:
                 try:
                     inbox.put(json.loads(line))
@@ -533,18 +550,13 @@ class CopilotACPClient:
         params = msg.get("params") or {}
 
         if method == "session/request_permission":
-            response = {
-                "jsonrpc": "2.0",
-                "id": message_id,
-                "result": {
-                    "outcome": {
-                        "outcome": "allow_once",
-                    }
-                },
-            }
+            response = _permission_denied(message_id)
         elif method == "fs/read_text_file":
             try:
                 path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
+                block_error = get_read_block_error(str(path))
+                if block_error:
+                    raise PermissionError(block_error)
                 content = path.read_text() if path.exists() else ""
                 line = params.get("line")
                 limit = params.get("limit")
@@ -553,6 +565,8 @@ class CopilotACPClient:
                     start = line - 1
                     end = start + limit if isinstance(limit, int) and limit > 0 else None
                     content = "".join(lines[start:end])
+                if content:
+                    content = redact_sensitive_text(content)
                 response = {
                     "jsonrpc": "2.0",
                     "id": message_id,
@@ -565,6 +579,10 @@ class CopilotACPClient:
         elif method == "fs/write_text_file":
             try:
                 path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
+                if is_write_denied(str(path)):
+                    raise PermissionError(
+                        f"Write denied: '{path}' is a protected system/credential file."
+                    )
                 path.parent.mkdir(parents=True, exist_ok=True)
                 path.write_text(str(params.get("content") or ""))
                 response = {
diff --git a/agent/credential_pool.py b/agent/credential_pool.py
index b02514e990..de8d03185a 100644
--- a/agent/credential_pool.py
+++ b/agent/credential_pool.py
@@ -983,6 +983,14 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
     active_sources: Set[str] = set()
     auth_store = _load_auth_store()
 
+    # Shared suppression gate — used at every upsert site so
+    # `hermes auth remove <provider> <N>` is stable across all source types.
+    try:
+        from hermes_cli.auth import is_source_suppressed as _is_suppressed
+    except ImportError:
+        def _is_suppressed(_p, _s):  # type: ignore[misc]
+            return False
+
     if provider == "anthropic":
         # Only auto-discover external credentials (Claude Code, Hermes PKCE)
         # when the user has explicitly configured anthropic as their provider.
@@ -1002,13 +1010,8 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
             ("claude_code", read_claude_code_credentials()),
         ):
             if creds and creds.get("accessToken"):
-                # Check if user explicitly removed this source
-                try:
-                    from hermes_cli.auth import is_source_suppressed
-                    if is_source_suppressed(provider, source_name):
-                        continue
-                except ImportError:
-                    pass
+                if _is_suppressed(provider, source_name):
+                    continue
                 active_sources.add(source_name)
                 changed |= _upsert_entry(
                     entries,
@@ -1026,7 +1029,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
 
     elif provider == "nous":
         state = _load_provider_state(auth_store, "nous")
-        if state:
+        if state and not _is_suppressed(provider, "device_code"):
             active_sources.add("device_code")
             # Prefer a user-supplied label embedded in the singleton state
             # (set by persist_nous_credentials(label=...) when the user ran
@@ -1067,20 +1070,21 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
             token, source = resolve_copilot_token()
             if token:
                 source_name = "gh_cli" if "gh" in source.lower() else f"env:{source}"
-                active_sources.add(source_name)
-                pconfig = PROVIDER_REGISTRY.get(provider)
-                changed |= _upsert_entry(
-                    entries,
-                    provider,
-                    source_name,
-                    {
-                        "source": source_name,
-                        "auth_type": AUTH_TYPE_API_KEY,
-                        "access_token": token,
-                        "base_url": pconfig.inference_base_url if pconfig else "",
-                        "label": source,
-                    },
-                )
+                if not _is_suppressed(provider, source_name):
+                    active_sources.add(source_name)
+                    pconfig = PROVIDER_REGISTRY.get(provider)
+                    changed |= _upsert_entry(
+                        entries,
+                        provider,
+                        source_name,
+                        {
+                            "source": source_name,
+                            "auth_type": AUTH_TYPE_API_KEY,
+                            "access_token": token,
+                            "base_url": pconfig.inference_base_url if pconfig else "",
+                            "label": source,
+                        },
+                    )
         except Exception as exc:
             logger.debug("Copilot token seed failed: %s", exc)
 
@@ -1096,20 +1100,21 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
             token = creds.get("api_key", "")
             if token:
                 source_name = creds.get("source", "qwen-cli")
-                active_sources.add(source_name)
-                changed |= _upsert_entry(
-                    entries,
-                    provider,
-                    source_name,
-                    {
-                        "source": source_name,
-                        "auth_type": AUTH_TYPE_OAUTH,
-                        "access_token": token,
-                        "expires_at_ms": creds.get("expires_at_ms"),
-                        "base_url": creds.get("base_url", ""),
-                        "label": creds.get("auth_file", source_name),
-                    },
-                )
+                if not _is_suppressed(provider, source_name):
+                    active_sources.add(source_name)
+                    changed |= _upsert_entry(
+                        entries,
+                        provider,
+                        source_name,
+                        {
+                            "source": source_name,
+                            "auth_type": AUTH_TYPE_OAUTH,
+                            "access_token": token,
+                            "expires_at_ms": creds.get("expires_at_ms"),
+                            "base_url": creds.get("base_url", ""),
+                            "label": creds.get("auth_file", source_name),
+                        },
+                    )
         except Exception as exc:
             logger.debug("Qwen OAuth token seed failed: %s", exc)
 
@@ -1118,13 +1123,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
         # the device_code source as suppressed so it won't be re-seeded from
         # the Hermes auth store.  Without this gate the removal is instantly
         # undone on the next load_pool() call.
-        codex_suppressed = False
-        try:
-            from hermes_cli.auth import is_source_suppressed
-            codex_suppressed = is_source_suppressed(provider, "device_code")
-        except ImportError:
-            pass
-        if codex_suppressed:
+        if _is_suppressed(provider, "device_code"):
             return changed, active_sources
 
         state = _load_provider_state(auth_store, "openai-codex")
@@ -1158,10 +1157,22 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
 def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool, Set[str]]:
     changed = False
     active_sources: Set[str] = set()
+    # Honour user suppression — `hermes auth remove <provider> <N>` for an
+    # env-seeded credential marks the env:<VAR> source as suppressed so it
+    # won't be re-seeded from the user's shell environment or ~/.hermes/.env.
+    # Without this gate the removal is silently undone on the next
+    # load_pool() call whenever the var is still exported by the shell.
+    try:
+        from hermes_cli.auth import is_source_suppressed as _is_source_suppressed
+    except ImportError:
+        def _is_source_suppressed(_p, _s):  # type: ignore[misc]
+            return False
     if provider == "openrouter":
         token = os.getenv("OPENROUTER_API_KEY", "").strip()
         if token:
             source = "env:OPENROUTER_API_KEY"
+            if _is_source_suppressed(provider, source):
+                return changed, active_sources
             active_sources.add(source)
             changed |= _upsert_entry(
                 entries,
@@ -1198,6 +1209,8 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
         if not token:
             continue
         source = f"env:{env_var}"
+        if _is_source_suppressed(provider, source):
+            continue
         active_sources.add(source)
         auth_type = AUTH_TYPE_OAUTH if provider == "anthropic" and not token.startswith("sk-ant-api") else AUTH_TYPE_API_KEY
         base_url = env_url or pconfig.inference_base_url
@@ -1242,6 +1255,13 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
     changed = False
     active_sources: Set[str] = set()
 
+    # Shared suppression gate — same pattern as _seed_from_env/_seed_from_singletons.
+    try:
+        from hermes_cli.auth import is_source_suppressed as _is_suppressed
+    except ImportError:
+        def _is_suppressed(_p, _s):  # type: ignore[misc]
+            return False
+
     # Seed from the custom_providers config entry's api_key field
     cp_config = _get_custom_provider_config(pool_key)
     if cp_config:
@@ -1250,19 +1270,20 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
         name = str(cp_config.get("name") or "").strip()
         if api_key:
             source = f"config:{name}"
-            active_sources.add(source)
-            changed |= _upsert_entry(
-                entries,
-                pool_key,
-                source,
-                {
-                    "source": source,
-                    "auth_type": AUTH_TYPE_API_KEY,
-                    "access_token": api_key,
-                    "base_url": base_url,
-                    "label": name or source,
-                },
-            )
+            if not _is_suppressed(pool_key, source):
+                active_sources.add(source)
+                changed |= _upsert_entry(
+                    entries,
+                    pool_key,
+                    source,
+                    {
+                        "source": source,
+                        "auth_type": AUTH_TYPE_API_KEY,
+                        "access_token": api_key,
+                        "base_url": base_url,
+                        "label": name or source,
+                    },
+                )
 
     # Seed from model.api_key if model.provider=='custom' and model.base_url matches
     try:
@@ -1282,19 +1303,20 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
                 matched_key = get_custom_provider_pool_key(model_base_url)
                 if matched_key == pool_key:
                     source = "model_config"
-                    active_sources.add(source)
-                    changed |= _upsert_entry(
-                        entries,
-                        pool_key,
-                        source,
-                        {
-                            "source": source,
-                            "auth_type": AUTH_TYPE_API_KEY,
-                            "access_token": model_api_key,
-                            "base_url": model_base_url,
-                            "label": "model_config",
-                        },
-                    )
+                    if not _is_suppressed(pool_key, source):
+                        active_sources.add(source)
+                        changed |= _upsert_entry(
+                            entries,
+                            pool_key,
+                            source,
+                            {
+                                "source": source,
+                                "auth_type": AUTH_TYPE_API_KEY,
+                                "access_token": model_api_key,
+                                "base_url": model_base_url,
+                                "label": "model_config",
+                            },
+                        )
     except Exception:
         pass
 
diff --git a/agent/credential_sources.py b/agent/credential_sources.py
new file mode 100644
index 0000000000..8ad2fade0b
--- /dev/null
+++ b/agent/credential_sources.py
@@ -0,0 +1,401 @@
+"""Unified removal contract for every credential source Hermes reads from.
+
+Hermes seeds its credential pool from many places:
+
+    env:<VAR>     — os.environ / ~/.hermes/.env
+    claude_code   — ~/.claude/.credentials.json
+    hermes_pkce   — ~/.hermes/.anthropic_oauth.json
+    device_code   — auth.json providers.<provider> (nous, openai-codex, ...)
+    qwen-cli      — ~/.qwen/oauth_creds.json
+    gh_cli        — gh auth token
+    config:<name> — custom_providers config entry
+    model_config  — model.api_key when model.provider == "custom"
+    manual        — user ran `hermes auth add`
+
+Each source has its own reader inside ``agent.credential_pool._seed_from_*``
+(which keep their existing shape — we haven't restructured them).  What we
+unify here is **removal**:
+
+    ``hermes auth remove <provider> <N>`` must make the pool entry stay gone.
+
+Before this module, every source had an ad-hoc removal branch in
+``auth_remove_command``, and several sources had no branch at all — so
+``auth remove`` silently reverted on the next ``load_pool()`` call for
+qwen-cli, nous device_code (partial), hermes_pkce, copilot gh_cli, and
+custom-config sources.
+
+Now every source registers a ``RemovalStep`` that does exactly three things
+in the same shape:
+
+    1. Clean up whatever externally-readable state the source reads from
+       (.env line, auth.json block, OAuth file, etc.)
+    2. Suppress the ``(provider, source_id)`` in auth.json so the
+       corresponding ``_seed_from_*`` branch skips the upsert on re-load
+    3. Return ``RemovalResult`` describing what was cleaned and any
+       diagnostic hints the user should see (shell-exported env vars,
+       external credential files we deliberately don't delete, etc.)
+
+Adding a new credential source is:
+    - wire up a reader branch in ``_seed_from_*`` (existing pattern)
+    - gate that reader behind ``is_source_suppressed(provider, source_id)``
+    - register a ``RemovalStep`` here
+
+No more per-source if/elif chain in ``auth_remove_command``.
+"""
+
+from __future__ import annotations
+
+import os
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Callable, List, Optional
+
+
+@dataclass
+class RemovalResult:
+    """Outcome of removing a credential source.
+
+    Attributes:
+        cleaned: Short strings describing external state that was actually
+            mutated (``"Cleared XAI_API_KEY from .env"``,
+            ``"Cleared openai-codex OAuth tokens from auth store"``).
+            Printed as plain lines to the user.
+        hints: Diagnostic lines ABOUT state the user may need to clean up
+            themselves or is deliberately left intact (shell-exported env
+            var, Claude Code credential file we don't delete, etc.).
+            Printed as plain lines to the user.  Always non-destructive.
+        suppress: Whether to call ``suppress_credential_source`` after
+            cleanup so future ``load_pool`` calls skip this source.
+            Default True — almost every source needs this to stay sticky.
+            The only legitimate False is ``manual`` entries, which aren't
+            seeded from anywhere external.
+    """
+
+    cleaned: List[str] = field(default_factory=list)
+    hints: List[str] = field(default_factory=list)
+    suppress: bool = True
+
+
+@dataclass
+class RemovalStep:
+    """How to remove one specific credential source cleanly.
+
+    Attributes:
+        provider: Provider pool key (``"xai"``, ``"anthropic"``, ``"nous"``, ...).
+            Special value ``"*"`` means "matches any provider" — used for
+            sources like ``manual`` that aren't provider-specific.
+        source_id: Source identifier as it appears in
+            ``PooledCredential.source``.  May be a literal (``"claude_code"``)
+            or a prefix pattern matched via ``match_fn``.
+        match_fn: Optional predicate overriding literal ``source_id``
+            matching.  Gets the removed entry's source string.  Used for
+            ``env:*`` (any env-seeded key), ``config:*`` (any custom
+            pool), and ``manual:*`` (any manual-source variant).
+        remove_fn: ``(provider, removed_entry) -> RemovalResult``.  Does the
+            actual cleanup and returns what happened for the user.
+        description: One-line human-readable description for docs / tests.
+    """
+
+    provider: str
+    source_id: str
+    remove_fn: Callable[..., RemovalResult]
+    match_fn: Optional[Callable[[str], bool]] = None
+    description: str = ""
+
+    def matches(self, provider: str, source: str) -> bool:
+        if self.provider != "*" and self.provider != provider:
+            return False
+        if self.match_fn is not None:
+            return self.match_fn(source)
+        return source == self.source_id
+
+
+_REGISTRY: List[RemovalStep] = []
+
+
+def register(step: RemovalStep) -> RemovalStep:
+    _REGISTRY.append(step)
+    return step
+
+
+def find_removal_step(provider: str, source: str) -> Optional[RemovalStep]:
+    """Return the first matching RemovalStep, or None if unregistered.
+
+    Unregistered sources fall through to the default remove path in
+    ``auth_remove_command``: the pool entry is already gone (that happens
+    before dispatch), no external cleanup, no suppression.  This is the
+    correct behaviour for ``manual`` entries — they were only ever stored
+    in the pool, nothing external to clean up.
+    """
+    for step in _REGISTRY:
+        if step.matches(provider, source):
+            return step
+    return None
+
+
+# ---------------------------------------------------------------------------
+# Individual RemovalStep implementations — one per source.
+# ---------------------------------------------------------------------------
+# Each remove_fn is intentionally small and single-purpose.  Adding a new
+# credential source means adding ONE entry here — no other changes to
+# auth_remove_command.
+
+
+def _remove_env_source(provider: str, removed) -> RemovalResult:
+    """env:<VAR> — the most common case.
+
+    Handles three user situations:
+      1. Var lives only in ~/.hermes/.env  → clear it
+      2. Var lives only in the user's shell (shell profile, systemd
+         EnvironmentFile, launchd plist) → hint them where to unset it
+      3. Var lives in both → clear from .env, hint about shell
+    """
+    from hermes_cli.config import get_env_path, remove_env_value
+
+    result = RemovalResult()
+    env_var = removed.source[len("env:"):]
+    if not env_var:
+        return result
+
+    # Detect shell vs .env BEFORE remove_env_value pops os.environ.
+    env_in_process = bool(os.getenv(env_var))
+    env_in_dotenv = False
+    try:
+        env_path = get_env_path()
+        if env_path.exists():
+            env_in_dotenv = any(
+                line.strip().startswith(f"{env_var}=")
+                for line in env_path.read_text(errors="replace").splitlines()
+            )
+    except OSError:
+        pass
+    shell_exported = env_in_process and not env_in_dotenv
+
+    cleared = remove_env_value(env_var)
+    if cleared:
+        result.cleaned.append(f"Cleared {env_var} from .env")
+
+    if shell_exported:
+        result.hints.extend([
+            f"Note: {env_var} is still set in your shell environment "
+            f"(not in ~/.hermes/.env).",
+            "  Unset it there (shell profile, systemd EnvironmentFile, "
+            "launchd plist, etc.) or it will keep being visible to Hermes.",
+            f"  The pool entry is now suppressed — Hermes will ignore "
+            f"{env_var} until you run `hermes auth add {provider}`.",
+        ])
+    else:
+        result.hints.append(
+            f"Suppressed env:{env_var} — it will not be re-seeded even "
+            f"if the variable is re-exported later."
+        )
+    return result
+
+
+def _remove_claude_code(provider: str, removed) -> RemovalResult:
+    """~/.claude/.credentials.json is owned by Claude Code itself.
+
+    We don't delete it — the user's Claude Code install still needs to
+    work.  We just suppress it so Hermes stops reading it.
+    """
+    return RemovalResult(hints=[
+        "Suppressed claude_code credential — it will not be re-seeded.",
+        "Note: Claude Code credentials still live in ~/.claude/.credentials.json",
+        "Run `hermes auth add anthropic` to re-enable if needed.",
+    ])
+
+
+def _remove_hermes_pkce(provider: str, removed) -> RemovalResult:
+    """~/.hermes/.anthropic_oauth.json is ours — delete it outright."""
+    from hermes_constants import get_hermes_home
+
+    result = RemovalResult()
+    oauth_file = get_hermes_home() / ".anthropic_oauth.json"
+    if oauth_file.exists():
+        try:
+            oauth_file.unlink()
+            result.cleaned.append("Cleared Hermes Anthropic OAuth credentials")
+        except OSError as exc:
+            result.hints.append(f"Could not delete {oauth_file}: {exc}")
+    return result
+
+
+def _clear_auth_store_provider(provider: str) -> bool:
+    """Delete auth_store.providers[provider].  Returns True if deleted."""
+    from hermes_cli.auth import (
+        _auth_store_lock,
+        _load_auth_store,
+        _save_auth_store,
+    )
+
+    with _auth_store_lock():
+        auth_store = _load_auth_store()
+        providers_dict = auth_store.get("providers")
+        if isinstance(providers_dict, dict) and provider in providers_dict:
+            del providers_dict[provider]
+            _save_auth_store(auth_store)
+            return True
+    return False
+
+
+def _remove_nous_device_code(provider: str, removed) -> RemovalResult:
+    """Nous OAuth lives in auth.json providers.nous — clear it and suppress.
+
+    We suppress in addition to clearing because nothing else stops the
+    user's next `hermes login` run from writing providers.nous again
+    before they decide to.  Suppression forces them to go through
+    `hermes auth add nous` to re-engage, which is the documented re-add
+    path and clears the suppression atomically.
+    """
+    result = RemovalResult()
+    if _clear_auth_store_provider(provider):
+        result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
+    return result
+
+
+def _remove_codex_device_code(provider: str, removed) -> RemovalResult:
+    """Codex tokens live in TWO places: our auth store AND ~/.codex/auth.json.
+
+    refresh_codex_oauth_pure() writes both every time, so clearing only
+    the Hermes auth store is not enough — _seed_from_singletons() would
+    re-import from ~/.codex/auth.json on the next load_pool() call and
+    the removal would be instantly undone.  We suppress instead of
+    deleting Codex CLI's file, so the Codex CLI itself keeps working.
+
+    The canonical source name in ``_seed_from_singletons`` is
+    ``"device_code"`` (no prefix).  Entries may show up in the pool as
+    either ``"device_code"`` (seeded) or ``"manual:device_code"`` (added
+    via ``hermes auth add openai-codex``), but in both cases the re-seed
+    gate lives at the ``"device_code"`` suppression key.  We suppress
+    that canonical key here; the central dispatcher also suppresses
+    ``removed.source`` which is fine — belt-and-suspenders, idempotent.
+    """
+    from hermes_cli.auth import suppress_credential_source
+
+    result = RemovalResult()
+    if _clear_auth_store_provider(provider):
+        result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
+    # Suppress the canonical re-seed source, not just whatever source the
+    # removed entry had.  Otherwise `manual:device_code` removals wouldn't
+    # block the `device_code` re-seed path.
+    suppress_credential_source(provider, "device_code")
+    result.hints.extend([
+        "Suppressed openai-codex device_code source — it will not be re-seeded.",
+        "Note: Codex CLI credentials still live in ~/.codex/auth.json",
+        "Run `hermes auth add openai-codex` to re-enable if needed.",
+    ])
+    return result
+
+
+def _remove_qwen_cli(provider: str, removed) -> RemovalResult:
+    """~/.qwen/oauth_creds.json is owned by the Qwen CLI.
+
+    Same pattern as claude_code — suppress, don't delete.  The user's
+    Qwen CLI install still reads from that file.
+    """
+    return RemovalResult(hints=[
+        "Suppressed qwen-cli credential — it will not be re-seeded.",
+        "Note: Qwen CLI credentials still live in ~/.qwen/oauth_creds.json",
+        "Run `hermes auth add qwen-oauth` to re-enable if needed.",
+    ])
+
+
+def _remove_copilot_gh(provider: str, removed) -> RemovalResult:
+    """Copilot token comes from `gh auth token` or COPILOT_GITHUB_TOKEN / GH_TOKEN / GITHUB_TOKEN.
+
+    Copilot is special: the same token can be seeded as multiple source
+    entries (gh_cli from ``_seed_from_singletons`` plus env:<VAR> from
+    ``_seed_from_env``), so removing one entry without suppressing the
+    others lets the duplicates resurrect.  We suppress ALL known copilot
+    sources here so removal is stable regardless of which entry the
+    user clicked.
+
+    We don't touch the user's gh CLI or shell state — just suppress so
+    Hermes stops picking the token up.
+    """
+    # Suppress ALL copilot source variants up-front so no path resurrects
+    # the pool entry.  The central dispatcher in auth_remove_command will
+    # ALSO suppress removed.source, but it's idempotent so double-calling
+    # is harmless.
+    from hermes_cli.auth import suppress_credential_source
+    suppress_credential_source(provider, "gh_cli")
+    for env_var in ("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"):
+        suppress_credential_source(provider, f"env:{env_var}")
+
+    return RemovalResult(hints=[
+        "Suppressed all copilot token sources (gh_cli + env vars) — they will not be re-seeded.",
+        "Note: Your gh CLI / shell environment is unchanged.",
+        "Run `hermes auth add copilot` to re-enable if needed.",
+    ])
+
+
+def _remove_custom_config(provider: str, removed) -> RemovalResult:
+    """Custom provider pools are seeded from custom_providers config or
+    model.api_key.  Both are in config.yaml — modifying that from here
+    is more invasive than suppression.  We suppress; the user can edit
+    config.yaml if they want to remove the key from disk entirely.
+    """
+    source_label = removed.source
+    return RemovalResult(hints=[
+        f"Suppressed {source_label} — it will not be re-seeded.",
+        "Note: The underlying value in config.yaml is unchanged.  Edit it "
+        "directly if you want to remove the credential from disk.",
+    ])
+
+
+def _register_all_sources() -> None:
+    """Called once on module import.
+
+    ORDER MATTERS — ``find_removal_step`` returns the first match.  Put
+    provider-specific steps before the generic ``env:*`` step so that e.g.
+    copilot's ``env:GH_TOKEN`` goes through the copilot removal (which
+    doesn't touch the user's shell), not the generic env-var removal
+    (which would try to clear .env).
+    """
+    register(RemovalStep(
+        provider="copilot", source_id="gh_cli",
+        match_fn=lambda src: src == "gh_cli" or src.startswith("env:"),
+        remove_fn=_remove_copilot_gh,
+        description="gh auth token / COPILOT_GITHUB_TOKEN / GH_TOKEN",
+    ))
+    register(RemovalStep(
+        provider="*", source_id="env:",
+        match_fn=lambda src: src.startswith("env:"),
+        remove_fn=_remove_env_source,
+        description="Any env-seeded credential (XAI_API_KEY, DEEPSEEK_API_KEY, etc.)",
+    ))
+    register(RemovalStep(
+        provider="anthropic", source_id="claude_code",
+        remove_fn=_remove_claude_code,
+        description="~/.claude/.credentials.json",
+    ))
+    register(RemovalStep(
+        provider="anthropic", source_id="hermes_pkce",
+        remove_fn=_remove_hermes_pkce,
+        description="~/.hermes/.anthropic_oauth.json",
+    ))
+    register(RemovalStep(
+        provider="nous", source_id="device_code",
+        remove_fn=_remove_nous_device_code,
+        description="auth.json providers.nous",
+    ))
+    register(RemovalStep(
+        provider="openai-codex", source_id="device_code",
+        match_fn=lambda src: src == "device_code" or src.endswith(":device_code"),
+        remove_fn=_remove_codex_device_code,
+        description="auth.json providers.openai-codex + ~/.codex/auth.json",
+    ))
+    register(RemovalStep(
+        provider="qwen-oauth", source_id="qwen-cli",
+        remove_fn=_remove_qwen_cli,
+        description="~/.qwen/oauth_creds.json",
+    ))
+    register(RemovalStep(
+        provider="*", source_id="config:",
+        match_fn=lambda src: src.startswith("config:") or src == "model_config",
+        remove_fn=_remove_custom_config,
+        description="Custom provider config.yaml api_key field",
+    ))
+
+
+_register_all_sources()
diff --git a/agent/display.py b/agent/display.py
index 3f1341485e..474595d76c 100644
--- a/agent/display.py
+++ b/agent/display.py
@@ -225,9 +225,11 @@ def build_tool_preview(tool_name: str, args: dict, max_len: int | None = None) -
             content = _oneline(args.get("content", ""))
             return f"+{target}: \"{content[:25]}{'...' if len(content) > 25 else ''}\""
         elif action == "replace":
-            return f"~{target}: \"{_oneline(args.get('old_text', '')[:20])}\""
+            old = _oneline(args.get("old_text") or "") or "<missing old_text>"
+            return f"~{target}: \"{old[:20]}\""
         elif action == "remove":
-            return f"-{target}: \"{_oneline(args.get('old_text', '')[:20])}\""
+            old = _oneline(args.get("old_text") or "") or "<missing old_text>"
+            return f"-{target}: \"{old[:20]}\""
         return action
 
     if tool_name == "send_message":
@@ -939,9 +941,13 @@ def get_cute_tool_message(
         if action == "add":
             return _wrap(f"┊ 🧠 memory    +{target}: \"{_trunc(args.get('content', ''), 30)}\"  {dur}")
         elif action == "replace":
-            return _wrap(f"┊ 🧠 memory    ~{target}: \"{_trunc(args.get('old_text', ''), 20)}\"  {dur}")
+            old = args.get("old_text") or ""
+            old = old if old else "<missing old_text>"
+            return _wrap(f"┊ 🧠 memory    ~{target}: \"{_trunc(old, 20)}\"  {dur}")
         elif action == "remove":
-            return _wrap(f"┊ 🧠 memory    -{target}: \"{_trunc(args.get('old_text', ''), 20)}\"  {dur}")
+            old = args.get("old_text") or ""
+            old = old if old else "<missing old_text>"
+            return _wrap(f"┊ 🧠 memory    -{target}: \"{_trunc(old, 20)}\"  {dur}")
         return _wrap(f"┊ 🧠 memory    {action}  {dur}")
     if tool_name == "skills_list":
         return _wrap(f"┊ 📚 skills    list {args.get('category', 'all')}  {dur}")
diff --git a/agent/error_classifier.py b/agent/error_classifier.py
index fa6a985041..14a2609d83 100644
--- a/agent/error_classifier.py
+++ b/agent/error_classifier.py
@@ -290,7 +290,7 @@ def classify_api_error(
     if isinstance(body, dict):
         _err_obj = body.get("error", {})
         if isinstance(_err_obj, dict):
-            _body_msg = (_err_obj.get("message") or "").lower()
+            _body_msg = str(_err_obj.get("message") or "").lower()
             # Parse metadata.raw for wrapped provider errors
             _metadata = _err_obj.get("metadata", {})
             if isinstance(_metadata, dict):
@@ -302,11 +302,11 @@ def classify_api_error(
                         if isinstance(_inner, dict):
                             _inner_err = _inner.get("error", {})
                             if isinstance(_inner_err, dict):
-                                _metadata_msg = (_inner_err.get("message") or "").lower()
+                                _metadata_msg = str(_inner_err.get("message") or "").lower()
                     except (json.JSONDecodeError, TypeError):
                         pass
         if not _body_msg:
-            _body_msg = (body.get("message") or "").lower()
+            _body_msg = str(body.get("message") or "").lower()
     # Combine all message sources for pattern matching
     parts = [_raw_msg]
     if _body_msg and _body_msg not in _raw_msg:
@@ -470,11 +470,16 @@ def _classify_by_status(
                 retryable=False,
                 should_fallback=True,
             )
-        # Generic 404 — could be model or endpoint
+        # Generic 404 with no "model not found" signal — could be a wrong
+        # endpoint path (common with local llama.cpp / Ollama / vLLM when
+        # the URL is slightly misconfigured), a proxy routing glitch, or
+        # a transient backend issue.  Classifying these as model_not_found
+        # silently falls back to a different provider and tells the model
+        # the model is missing, which is wrong and wastes a turn.  Treat
+        # as unknown so the retry loop surfaces the real error instead.
         return result_fn(
-            FailoverReason.model_not_found,
-            retryable=False,
-            should_fallback=True,
+            FailoverReason.unknown,
+            retryable=True,
         )
 
     if status_code == 413:
@@ -606,10 +611,10 @@ def _classify_400(
     if isinstance(body, dict):
         err_obj = body.get("error", {})
         if isinstance(err_obj, dict):
-            err_body_msg = (err_obj.get("message") or "").strip().lower()
+            err_body_msg = str(err_obj.get("message") or "").strip().lower()
         # Responses API (and some providers) use flat body: {"message": "..."}
         if not err_body_msg:
-            err_body_msg = (body.get("message") or "").strip().lower()
+            err_body_msg = str(body.get("message") or "").strip().lower()
     is_generic = len(err_body_msg) < 30 or err_body_msg in ("error", "")
     is_large = approx_tokens > context_length * 0.4 or approx_tokens > 80000 or num_messages > 80
 
diff --git a/agent/file_safety.py b/agent/file_safety.py
new file mode 100644
index 0000000000..09da46cafd
--- /dev/null
+++ b/agent/file_safety.py
@@ -0,0 +1,111 @@
+"""Shared file safety rules used by both tools and ACP shims."""
+
+from __future__ import annotations
+
+import os
+from pathlib import Path
+from typing import Optional
+
+
+def _hermes_home_path() -> Path:
+    """Resolve the active HERMES_HOME (profile-aware) without circular imports."""
+    try:
+        from hermes_constants import get_hermes_home  # local import to avoid cycles
+        return get_hermes_home()
+    except Exception:
+        return Path(os.path.expanduser("~/.hermes"))
+
+
+def build_write_denied_paths(home: str) -> set[str]:
+    """Return exact sensitive paths that must never be written."""
+    hermes_home = _hermes_home_path()
+    return {
+        os.path.realpath(p)
+        for p in [
+            os.path.join(home, ".ssh", "authorized_keys"),
+            os.path.join(home, ".ssh", "id_rsa"),
+            os.path.join(home, ".ssh", "id_ed25519"),
+            os.path.join(home, ".ssh", "config"),
+            str(hermes_home / ".env"),
+            os.path.join(home, ".bashrc"),
+            os.path.join(home, ".zshrc"),
+            os.path.join(home, ".profile"),
+            os.path.join(home, ".bash_profile"),
+            os.path.join(home, ".zprofile"),
+            os.path.join(home, ".netrc"),
+            os.path.join(home, ".pgpass"),
+            os.path.join(home, ".npmrc"),
+            os.path.join(home, ".pypirc"),
+            "/etc/sudoers",
+            "/etc/passwd",
+            "/etc/shadow",
+        ]
+    }
+
+
+def build_write_denied_prefixes(home: str) -> list[str]:
+    """Return sensitive directory prefixes that must never be written."""
+    return [
+        os.path.realpath(p) + os.sep
+        for p in [
+            os.path.join(home, ".ssh"),
+            os.path.join(home, ".aws"),
+            os.path.join(home, ".gnupg"),
+            os.path.join(home, ".kube"),
+            "/etc/sudoers.d",
+            "/etc/systemd",
+            os.path.join(home, ".docker"),
+            os.path.join(home, ".azure"),
+            os.path.join(home, ".config", "gh"),
+        ]
+    ]
+
+
+def get_safe_write_root() -> Optional[str]:
+    """Return the resolved HERMES_WRITE_SAFE_ROOT path, or None if unset."""
+    root = os.getenv("HERMES_WRITE_SAFE_ROOT", "")
+    if not root:
+        return None
+    try:
+        return os.path.realpath(os.path.expanduser(root))
+    except Exception:
+        return None
+
+
+def is_write_denied(path: str) -> bool:
+    """Return True if path is blocked by the write denylist or safe root."""
+    home = os.path.realpath(os.path.expanduser("~"))
+    resolved = os.path.realpath(os.path.expanduser(str(path)))
+
+    if resolved in build_write_denied_paths(home):
+        return True
+    for prefix in build_write_denied_prefixes(home):
+        if resolved.startswith(prefix):
+            return True
+
+    safe_root = get_safe_write_root()
+    if safe_root and not (resolved == safe_root or resolved.startswith(safe_root + os.sep)):
+        return True
+
+    return False
+
+
+def get_read_block_error(path: str) -> Optional[str]:
+    """Return an error message when a read targets internal Hermes cache files."""
+    resolved = Path(path).expanduser().resolve()
+    hermes_home = _hermes_home_path().resolve()
+    blocked_dirs = [
+        hermes_home / "skills" / ".hub" / "index-cache",
+        hermes_home / "skills" / ".hub",
+    ]
+    for blocked in blocked_dirs:
+        try:
+            resolved.relative_to(blocked)
+        except ValueError:
+            continue
+        return (
+            f"Access denied: {path} is an internal Hermes cache file "
+            "and cannot be read directly to prevent prompt injection. "
+            "Use the skills_list or skill_view tools instead."
+        )
+    return None
diff --git a/agent/gemini_cloudcode_adapter.py b/agent/gemini_cloudcode_adapter.py
index ed687bffd6..24866c3a53 100644
--- a/agent/gemini_cloudcode_adapter.py
+++ b/agent/gemini_cloudcode_adapter.py
@@ -39,6 +39,7 @@ from typing import Any, Dict, Iterator, List, Optional
 import httpx
 
 from agent import google_oauth
+from agent.gemini_schema import sanitize_gemini_tool_parameters
 from agent.google_code_assist import (
     CODE_ASSIST_ENDPOINT,
     FREE_TIER_ID,
@@ -205,7 +206,7 @@ def _translate_tools_to_gemini(tools: Any) -> List[Dict[str, Any]]:
             decl["description"] = str(fn["description"])
         params = fn.get("parameters")
         if isinstance(params, dict):
-            decl["parameters"] = params
+            decl["parameters"] = sanitize_gemini_tool_parameters(params)
         declarations.append(decl)
     if not declarations:
         return []
@@ -504,9 +505,16 @@ def _iter_sse_events(response: httpx.Response) -> Iterator[Dict[str, Any]]:
 def _translate_stream_event(
     event: Dict[str, Any],
     model: str,
-    tool_call_indices: Dict[str, int],
+    tool_call_counter: List[int],
 ) -> List[_GeminiStreamChunk]:
-    """Unwrap Code Assist envelope and emit OpenAI-shaped chunk(s)."""
+    """Unwrap Code Assist envelope and emit OpenAI-shaped chunk(s).
+
+    ``tool_call_counter`` is a single-element list used as a mutable counter
+    across events in the same stream. Each ``functionCall`` part gets a
+    fresh, unique OpenAI ``index`` — keying by function name would collide
+    whenever the model issues parallel calls to the same tool (e.g. reading
+    three files in one turn).
+    """
     inner = event.get("response") if isinstance(event.get("response"), dict) else event
     candidates = inner.get("candidates") or []
     if not candidates:
@@ -532,7 +540,8 @@ def _translate_stream_event(
         fc = part.get("functionCall")
         if isinstance(fc, dict) and fc.get("name"):
             name = str(fc["name"])
-            idx = tool_call_indices.setdefault(name, len(tool_call_indices))
+            idx = tool_call_counter[0]
+            tool_call_counter[0] += 1
             try:
                 args_str = json.dumps(fc.get("args") or {}, ensure_ascii=False)
             except (TypeError, ValueError):
@@ -549,7 +558,7 @@ def _translate_stream_event(
     finish_reason_raw = str(cand.get("finishReason") or "")
     if finish_reason_raw:
         mapped = _map_gemini_finish_reason(finish_reason_raw)
-        if tool_call_indices:
+        if tool_call_counter[0] > 0:
             mapped = "tool_calls"
         chunks.append(_make_stream_chunk(model=model, finish_reason=mapped))
     return chunks
@@ -733,9 +742,9 @@ class GeminiCloudCodeClient:
                         # Materialize error body for better diagnostics
                         response.read()
                         raise _gemini_http_error(response)
-                    tool_call_indices: Dict[str, int] = {}
+                    tool_call_counter: List[int] = [0]
                     for event in _iter_sse_events(response):
-                        for chunk in _translate_stream_event(event, model, tool_call_indices):
+                        for chunk in _translate_stream_event(event, model, tool_call_counter):
                             yield chunk
             except httpx.HTTPError as exc:
                 raise CodeAssistError(
@@ -790,7 +799,8 @@ def _gemini_http_error(response: httpx.Response) -> CodeAssistError:
         err_obj = {}
     err_status = str(err_obj.get("status") or "").strip()
     err_message = str(err_obj.get("message") or "").strip()
-    err_details_list = err_obj.get("details") if isinstance(err_obj.get("details"), list) else []
+    _raw_details = err_obj.get("details")
+    err_details_list = _raw_details if isinstance(_raw_details, list) else []
 
     # Extract google.rpc.ErrorInfo reason + metadata.  There may be more
     # than one ErrorInfo (rare), so we pick the first one with a reason.
diff --git a/agent/gemini_native_adapter.py b/agent/gemini_native_adapter.py
new file mode 100644
index 0000000000..406e4a19b7
--- /dev/null
+++ b/agent/gemini_native_adapter.py
@@ -0,0 +1,847 @@
+"""OpenAI-compatible facade over Google AI Studio's native Gemini API.
+
+Hermes keeps ``api_mode='chat_completions'`` for the ``gemini`` provider so the
+main agent loop can keep using its existing OpenAI-shaped message flow.
+This adapter is the transport shim that converts those OpenAI-style
+``messages[]`` / ``tools[]`` requests into Gemini's native
+``models/{model}:generateContent`` schema and converts the responses back.
+
+Why this exists
+---------------
+Google's OpenAI-compatible endpoint has been brittle for Hermes's multi-turn
+agent/tool loop (auth churn, tool-call replay quirks, thought-signature
+requirements).  The native Gemini API is the canonical path and avoids the
+OpenAI-compat layer entirely.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import base64
+import json
+import logging
+import time
+import uuid
+from types import SimpleNamespace
+from typing import Any, Dict, Iterator, List, Optional
+
+import httpx
+
+from agent.gemini_schema import sanitize_gemini_tool_parameters
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta"
+
+
+def is_native_gemini_base_url(base_url: str) -> bool:
+    """Return True when the endpoint speaks Gemini's native REST API."""
+    normalized = str(base_url or "").strip().rstrip("/").lower()
+    if not normalized:
+        return False
+    if "generativelanguage.googleapis.com" not in normalized:
+        return False
+    return not normalized.endswith("/openai")
+
+
+class GeminiAPIError(Exception):
+    """Error shape compatible with Hermes retry/error classification."""
+
+    def __init__(
+        self,
+        message: str,
+        *,
+        code: str = "gemini_api_error",
+        status_code: Optional[int] = None,
+        response: Optional[httpx.Response] = None,
+        retry_after: Optional[float] = None,
+        details: Optional[Dict[str, Any]] = None,
+    ) -> None:
+        super().__init__(message)
+        self.code = code
+        self.status_code = status_code
+        self.response = response
+        self.retry_after = retry_after
+        self.details = details or {}
+
+
+def _coerce_content_to_text(content: Any) -> str:
+    if content is None:
+        return ""
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        pieces: List[str] = []
+        for part in content:
+            if isinstance(part, str):
+                pieces.append(part)
+            elif isinstance(part, dict) and part.get("type") == "text":
+                text = part.get("text")
+                if isinstance(text, str):
+                    pieces.append(text)
+        return "\n".join(pieces)
+    return str(content)
+
+
+def _extract_multimodal_parts(content: Any) -> List[Dict[str, Any]]:
+    if not isinstance(content, list):
+        text = _coerce_content_to_text(content)
+        return [{"text": text}] if text else []
+
+    parts: List[Dict[str, Any]] = []
+    for item in content:
+        if isinstance(item, str):
+            parts.append({"text": item})
+            continue
+        if not isinstance(item, dict):
+            continue
+        ptype = item.get("type")
+        if ptype == "text":
+            text = item.get("text")
+            if isinstance(text, str) and text:
+                parts.append({"text": text})
+        elif ptype == "image_url":
+            url = ((item.get("image_url") or {}).get("url") or "")
+            if not isinstance(url, str) or not url.startswith("data:"):
+                continue
+            try:
+                header, encoded = url.split(",", 1)
+                mime = header.split(":", 1)[1].split(";", 1)[0]
+                raw = base64.b64decode(encoded)
+            except Exception:
+                continue
+            parts.append(
+                {
+                    "inlineData": {
+                        "mimeType": mime,
+                        "data": base64.b64encode(raw).decode("ascii"),
+                    }
+                }
+            )
+    return parts
+
+
+def _tool_call_extra_signature(tool_call: Dict[str, Any]) -> Optional[str]:
+    extra = tool_call.get("extra_content") or {}
+    if not isinstance(extra, dict):
+        return None
+    google = extra.get("google") or extra.get("thought_signature")
+    if isinstance(google, dict):
+        sig = google.get("thought_signature") or google.get("thoughtSignature")
+        return str(sig) if isinstance(sig, str) and sig else None
+    if isinstance(google, str) and google:
+        return google
+    return None
+
+
+def _translate_tool_call_to_gemini(tool_call: Dict[str, Any]) -> Dict[str, Any]:
+    fn = tool_call.get("function") or {}
+    args_raw = fn.get("arguments", "")
+    try:
+        args = json.loads(args_raw) if isinstance(args_raw, str) and args_raw else {}
+    except json.JSONDecodeError:
+        args = {"_raw": args_raw}
+    if not isinstance(args, dict):
+        args = {"_value": args}
+
+    part: Dict[str, Any] = {
+        "functionCall": {
+            "name": str(fn.get("name") or ""),
+            "args": args,
+        }
+    }
+    thought_signature = _tool_call_extra_signature(tool_call)
+    if thought_signature:
+        part["thoughtSignature"] = thought_signature
+    return part
+
+
+def _translate_tool_result_to_gemini(
+    message: Dict[str, Any],
+    tool_name_by_call_id: Optional[Dict[str, str]] = None,
+) -> Dict[str, Any]:
+    tool_name_by_call_id = tool_name_by_call_id or {}
+    tool_call_id = str(message.get("tool_call_id") or "")
+    name = str(
+        message.get("name")
+        or tool_name_by_call_id.get(tool_call_id)
+        or tool_call_id
+        or "tool"
+    )
+    content = _coerce_content_to_text(message.get("content"))
+    try:
+        parsed = json.loads(content) if content.strip().startswith(("{", "[")) else None
+    except json.JSONDecodeError:
+        parsed = None
+    response = parsed if isinstance(parsed, dict) else {"output": content}
+    return {
+        "functionResponse": {
+            "name": name,
+            "response": response,
+        }
+    }
+
+
+def _build_gemini_contents(messages: List[Dict[str, Any]]) -> tuple[List[Dict[str, Any]], Optional[Dict[str, Any]]]:
+    system_text_parts: List[str] = []
+    contents: List[Dict[str, Any]] = []
+    tool_name_by_call_id: Dict[str, str] = {}
+
+    for msg in messages:
+        if not isinstance(msg, dict):
+            continue
+        role = str(msg.get("role") or "user")
+
+        if role == "system":
+            system_text_parts.append(_coerce_content_to_text(msg.get("content")))
+            continue
+
+        if role in {"tool", "function"}:
+            contents.append(
+                {
+                    "role": "user",
+                    "parts": [
+                        _translate_tool_result_to_gemini(
+                            msg,
+                            tool_name_by_call_id=tool_name_by_call_id,
+                        )
+                    ],
+                }
+            )
+            continue
+
+        gemini_role = "model" if role == "assistant" else "user"
+        parts: List[Dict[str, Any]] = []
+
+        content_parts = _extract_multimodal_parts(msg.get("content"))
+        parts.extend(content_parts)
+
+        tool_calls = msg.get("tool_calls") or []
+        if isinstance(tool_calls, list):
+            for tool_call in tool_calls:
+                if isinstance(tool_call, dict):
+                    tool_call_id = str(tool_call.get("id") or tool_call.get("call_id") or "")
+                    tool_name = str(((tool_call.get("function") or {}).get("name") or ""))
+                    if tool_call_id and tool_name:
+                        tool_name_by_call_id[tool_call_id] = tool_name
+                    parts.append(_translate_tool_call_to_gemini(tool_call))
+
+        if parts:
+            contents.append({"role": gemini_role, "parts": parts})
+
+    system_instruction = None
+    joined_system = "\n".join(part for part in system_text_parts if part).strip()
+    if joined_system:
+        system_instruction = {"parts": [{"text": joined_system}]}
+    return contents, system_instruction
+
+
+def _translate_tools_to_gemini(tools: Any) -> List[Dict[str, Any]]:
+    if not isinstance(tools, list):
+        return []
+    declarations: List[Dict[str, Any]] = []
+    for tool in tools:
+        if not isinstance(tool, dict):
+            continue
+        fn = tool.get("function") or {}
+        if not isinstance(fn, dict):
+            continue
+        name = fn.get("name")
+        if not isinstance(name, str) or not name:
+            continue
+        decl: Dict[str, Any] = {"name": name}
+        description = fn.get("description")
+        if isinstance(description, str) and description:
+            decl["description"] = description
+        parameters = fn.get("parameters")
+        if isinstance(parameters, dict):
+            decl["parameters"] = sanitize_gemini_tool_parameters(parameters)
+        declarations.append(decl)
+    return [{"functionDeclarations": declarations}] if declarations else []
+
+
+def _translate_tool_choice_to_gemini(tool_choice: Any) -> Optional[Dict[str, Any]]:
+    if tool_choice is None:
+        return None
+    if isinstance(tool_choice, str):
+        if tool_choice == "auto":
+            return {"functionCallingConfig": {"mode": "AUTO"}}
+        if tool_choice == "required":
+            return {"functionCallingConfig": {"mode": "ANY"}}
+        if tool_choice == "none":
+            return {"functionCallingConfig": {"mode": "NONE"}}
+    if isinstance(tool_choice, dict):
+        fn = tool_choice.get("function") or {}
+        name = fn.get("name")
+        if isinstance(name, str) and name:
+            return {"functionCallingConfig": {"mode": "ANY", "allowedFunctionNames": [name]}}
+    return None
+
+
+def _normalize_thinking_config(config: Any) -> Optional[Dict[str, Any]]:
+    if not isinstance(config, dict) or not config:
+        return None
+    budget = config.get("thinkingBudget", config.get("thinking_budget"))
+    include = config.get("includeThoughts", config.get("include_thoughts"))
+    level = config.get("thinkingLevel", config.get("thinking_level"))
+    normalized: Dict[str, Any] = {}
+    if isinstance(budget, (int, float)):
+        normalized["thinkingBudget"] = int(budget)
+    if isinstance(include, bool):
+        normalized["includeThoughts"] = include
+    if isinstance(level, str) and level.strip():
+        normalized["thinkingLevel"] = level.strip().lower()
+    return normalized or None
+
+
+def build_gemini_request(
+    *,
+    messages: List[Dict[str, Any]],
+    tools: Any = None,
+    tool_choice: Any = None,
+    temperature: Optional[float] = None,
+    max_tokens: Optional[int] = None,
+    top_p: Optional[float] = None,
+    stop: Any = None,
+    thinking_config: Any = None,
+) -> Dict[str, Any]:
+    contents, system_instruction = _build_gemini_contents(messages)
+    request: Dict[str, Any] = {"contents": contents}
+    if system_instruction:
+        request["systemInstruction"] = system_instruction
+
+    gemini_tools = _translate_tools_to_gemini(tools)
+    if gemini_tools:
+        request["tools"] = gemini_tools
+
+    tool_config = _translate_tool_choice_to_gemini(tool_choice)
+    if tool_config:
+        request["toolConfig"] = tool_config
+
+    generation_config: Dict[str, Any] = {}
+    if temperature is not None:
+        generation_config["temperature"] = temperature
+    if max_tokens is not None:
+        generation_config["maxOutputTokens"] = max_tokens
+    if top_p is not None:
+        generation_config["topP"] = top_p
+    if stop:
+        generation_config["stopSequences"] = stop if isinstance(stop, list) else [str(stop)]
+    normalized_thinking = _normalize_thinking_config(thinking_config)
+    if normalized_thinking:
+        generation_config["thinkingConfig"] = normalized_thinking
+    if generation_config:
+        request["generationConfig"] = generation_config
+
+    return request
+
+
+def _map_gemini_finish_reason(reason: str) -> str:
+    mapping = {
+        "STOP": "stop",
+        "MAX_TOKENS": "length",
+        "SAFETY": "content_filter",
+        "RECITATION": "content_filter",
+        "OTHER": "stop",
+    }
+    return mapping.get(str(reason or "").upper(), "stop")
+
+
+def _tool_call_extra_from_part(part: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+    sig = part.get("thoughtSignature")
+    if isinstance(sig, str) and sig:
+        return {"google": {"thought_signature": sig}}
+    return None
+
+
+def _empty_response(model: str) -> SimpleNamespace:
+    message = SimpleNamespace(
+        role="assistant",
+        content="",
+        tool_calls=None,
+        reasoning=None,
+        reasoning_content=None,
+        reasoning_details=None,
+    )
+    choice = SimpleNamespace(index=0, message=message, finish_reason="stop")
+    usage = SimpleNamespace(
+        prompt_tokens=0,
+        completion_tokens=0,
+        total_tokens=0,
+        prompt_tokens_details=SimpleNamespace(cached_tokens=0),
+    )
+    return SimpleNamespace(
+        id=f"chatcmpl-{uuid.uuid4().hex[:12]}",
+        object="chat.completion",
+        created=int(time.time()),
+        model=model,
+        choices=[choice],
+        usage=usage,
+    )
+
+
+def translate_gemini_response(resp: Dict[str, Any], model: str) -> SimpleNamespace:
+    candidates = resp.get("candidates") or []
+    if not isinstance(candidates, list) or not candidates:
+        return _empty_response(model)
+
+    cand = candidates[0] if isinstance(candidates[0], dict) else {}
+    content_obj = cand.get("content") if isinstance(cand, dict) else {}
+    parts = content_obj.get("parts") if isinstance(content_obj, dict) else []
+
+    text_pieces: List[str] = []
+    reasoning_pieces: List[str] = []
+    tool_calls: List[SimpleNamespace] = []
+
+    for index, part in enumerate(parts or []):
+        if not isinstance(part, dict):
+            continue
+        if part.get("thought") is True and isinstance(part.get("text"), str):
+            reasoning_pieces.append(part["text"])
+            continue
+        if isinstance(part.get("text"), str):
+            text_pieces.append(part["text"])
+            continue
+        fc = part.get("functionCall")
+        if isinstance(fc, dict) and fc.get("name"):
+            try:
+                args_str = json.dumps(fc.get("args") or {}, ensure_ascii=False)
+            except (TypeError, ValueError):
+                args_str = "{}"
+            tool_call = SimpleNamespace(
+                id=f"call_{uuid.uuid4().hex[:12]}",
+                type="function",
+                index=index,
+                function=SimpleNamespace(name=str(fc["name"]), arguments=args_str),
+            )
+            extra_content = _tool_call_extra_from_part(part)
+            if extra_content:
+                tool_call.extra_content = extra_content
+            tool_calls.append(tool_call)
+
+    finish_reason = "tool_calls" if tool_calls else _map_gemini_finish_reason(str(cand.get("finishReason") or ""))
+    usage_meta = resp.get("usageMetadata") or {}
+    usage = SimpleNamespace(
+        prompt_tokens=int(usage_meta.get("promptTokenCount") or 0),
+        completion_tokens=int(usage_meta.get("candidatesTokenCount") or 0),
+        total_tokens=int(usage_meta.get("totalTokenCount") or 0),
+        prompt_tokens_details=SimpleNamespace(
+            cached_tokens=int(usage_meta.get("cachedContentTokenCount") or 0),
+        ),
+    )
+    reasoning = "".join(reasoning_pieces) or None
+    message = SimpleNamespace(
+        role="assistant",
+        content="".join(text_pieces) if text_pieces else None,
+        tool_calls=tool_calls or None,
+        reasoning=reasoning,
+        reasoning_content=reasoning,
+        reasoning_details=None,
+    )
+    choice = SimpleNamespace(index=0, message=message, finish_reason=finish_reason)
+    return SimpleNamespace(
+        id=f"chatcmpl-{uuid.uuid4().hex[:12]}",
+        object="chat.completion",
+        created=int(time.time()),
+        model=model,
+        choices=[choice],
+        usage=usage,
+    )
+
+
+class _GeminiStreamChunk(SimpleNamespace):
+    pass
+
+
+def _make_stream_chunk(
+    *,
+    model: str,
+    content: str = "",
+    tool_call_delta: Optional[Dict[str, Any]] = None,
+    finish_reason: Optional[str] = None,
+    reasoning: str = "",
+) -> _GeminiStreamChunk:
+    delta_kwargs: Dict[str, Any] = {
+        "role": "assistant",
+        "content": None,
+        "tool_calls": None,
+        "reasoning": None,
+        "reasoning_content": None,
+    }
+    if content:
+        delta_kwargs["content"] = content
+    if tool_call_delta is not None:
+        tool_delta = SimpleNamespace(
+            index=tool_call_delta.get("index", 0),
+            id=tool_call_delta.get("id") or f"call_{uuid.uuid4().hex[:12]}",
+            type="function",
+            function=SimpleNamespace(
+                name=tool_call_delta.get("name") or "",
+                arguments=tool_call_delta.get("arguments") or "",
+            ),
+        )
+        extra_content = tool_call_delta.get("extra_content")
+        if isinstance(extra_content, dict):
+            tool_delta.extra_content = extra_content
+        delta_kwargs["tool_calls"] = [tool_delta]
+    if reasoning:
+        delta_kwargs["reasoning"] = reasoning
+        delta_kwargs["reasoning_content"] = reasoning
+    delta = SimpleNamespace(**delta_kwargs)
+    choice = SimpleNamespace(index=0, delta=delta, finish_reason=finish_reason)
+    return _GeminiStreamChunk(
+        id=f"chatcmpl-{uuid.uuid4().hex[:12]}",
+        object="chat.completion.chunk",
+        created=int(time.time()),
+        model=model,
+        choices=[choice],
+        usage=None,
+    )
+
+
+def _iter_sse_events(response: httpx.Response) -> Iterator[Dict[str, Any]]:
+    buffer = ""
+    for chunk in response.iter_text():
+        if not chunk:
+            continue
+        buffer += chunk
+        while "\n" in buffer:
+            line, buffer = buffer.split("\n", 1)
+            line = line.rstrip("\r")
+            if not line:
+                continue
+            if not line.startswith("data: "):
+                continue
+            data = line[6:]
+            if data == "[DONE]":
+                return
+            try:
+                payload = json.loads(data)
+            except json.JSONDecodeError:
+                logger.debug("Non-JSON Gemini SSE line: %s", data[:200])
+                continue
+            if isinstance(payload, dict):
+                yield payload
+
+
+def translate_stream_event(event: Dict[str, Any], model: str, tool_call_indices: Dict[str, Dict[str, Any]]) -> List[_GeminiStreamChunk]:
+    candidates = event.get("candidates") or []
+    if not candidates:
+        return []
+    cand = candidates[0] if isinstance(candidates[0], dict) else {}
+    parts = ((cand.get("content") or {}).get("parts") or []) if isinstance(cand, dict) else []
+    chunks: List[_GeminiStreamChunk] = []
+
+    for part_index, part in enumerate(parts):
+        if not isinstance(part, dict):
+            continue
+        if part.get("thought") is True and isinstance(part.get("text"), str):
+            chunks.append(_make_stream_chunk(model=model, reasoning=part["text"]))
+            continue
+        if isinstance(part.get("text"), str) and part["text"]:
+            chunks.append(_make_stream_chunk(model=model, content=part["text"]))
+        fc = part.get("functionCall")
+        if isinstance(fc, dict) and fc.get("name"):
+            name = str(fc["name"])
+            try:
+                args_str = json.dumps(fc.get("args") or {}, ensure_ascii=False, sort_keys=True)
+            except (TypeError, ValueError):
+                args_str = "{}"
+            thought_signature = part.get("thoughtSignature") if isinstance(part.get("thoughtSignature"), str) else ""
+            call_key = json.dumps(
+                {
+                    "part_index": part_index,
+                    "name": name,
+                    "thought_signature": thought_signature,
+                },
+                sort_keys=True,
+            )
+            slot = tool_call_indices.get(call_key)
+            if slot is None:
+                slot = {
+                    "index": len(tool_call_indices),
+                    "id": f"call_{uuid.uuid4().hex[:12]}",
+                    "last_arguments": "",
+                }
+                tool_call_indices[call_key] = slot
+            emitted_arguments = args_str
+            last_arguments = str(slot.get("last_arguments") or "")
+            if last_arguments:
+                if args_str == last_arguments:
+                    emitted_arguments = ""
+                elif args_str.startswith(last_arguments):
+                    emitted_arguments = args_str[len(last_arguments):]
+            slot["last_arguments"] = args_str
+            chunks.append(
+                _make_stream_chunk(
+                    model=model,
+                    tool_call_delta={
+                        "index": slot["index"],
+                        "id": slot["id"],
+                        "name": name,
+                        "arguments": emitted_arguments,
+                        "extra_content": _tool_call_extra_from_part(part),
+                    },
+                )
+            )
+
+    finish_reason_raw = str(cand.get("finishReason") or "")
+    if finish_reason_raw:
+        mapped = "tool_calls" if tool_call_indices else _map_gemini_finish_reason(finish_reason_raw)
+        chunks.append(_make_stream_chunk(model=model, finish_reason=mapped))
+    return chunks
+
+
+def gemini_http_error(response: httpx.Response) -> GeminiAPIError:
+    status = response.status_code
+    body_text = ""
+    body_json: Dict[str, Any] = {}
+    try:
+        body_text = response.text
+    except Exception:
+        body_text = ""
+    if body_text:
+        try:
+            parsed = json.loads(body_text)
+            if isinstance(parsed, dict):
+                body_json = parsed
+        except (ValueError, TypeError):
+            body_json = {}
+
+    err_obj = body_json.get("error") if isinstance(body_json, dict) else None
+    if not isinstance(err_obj, dict):
+        err_obj = {}
+    err_status = str(err_obj.get("status") or "").strip()
+    err_message = str(err_obj.get("message") or "").strip()
+    _raw_details = err_obj.get("details")
+    details_list = _raw_details if isinstance(_raw_details, list) else []
+
+    reason = ""
+    retry_after: Optional[float] = None
+    metadata: Dict[str, Any] = {}
+    for detail in details_list:
+        if not isinstance(detail, dict):
+            continue
+        type_url = str(detail.get("@type") or "")
+        if not reason and type_url.endswith("/google.rpc.ErrorInfo"):
+            reason_value = detail.get("reason")
+            if isinstance(reason_value, str):
+                reason = reason_value
+            md = detail.get("metadata")
+            if isinstance(md, dict):
+                metadata = md
+    header_retry = response.headers.get("Retry-After") or response.headers.get("retry-after")
+    if header_retry:
+        try:
+            retry_after = float(header_retry)
+        except (TypeError, ValueError):
+            retry_after = None
+
+    code = f"gemini_http_{status}"
+    if status == 401:
+        code = "gemini_unauthorized"
+    elif status == 429:
+        code = "gemini_rate_limited"
+    elif status == 404:
+        code = "gemini_model_not_found"
+
+    if err_message:
+        message = f"Gemini HTTP {status} ({err_status or 'error'}): {err_message}"
+    else:
+        message = f"Gemini returned HTTP {status}: {body_text[:500]}"
+
+    return GeminiAPIError(
+        message,
+        code=code,
+        status_code=status,
+        response=response,
+        retry_after=retry_after,
+        details={
+            "status": err_status,
+            "reason": reason,
+            "metadata": metadata,
+            "message": err_message,
+        },
+    )
+
+
+class _GeminiChatCompletions:
+    def __init__(self, client: "GeminiNativeClient"):
+        self._client = client
+
+    def create(self, **kwargs: Any) -> Any:
+        return self._client._create_chat_completion(**kwargs)
+
+
+class _AsyncGeminiChatCompletions:
+    def __init__(self, client: "AsyncGeminiNativeClient"):
+        self._client = client
+
+    async def create(self, **kwargs: Any) -> Any:
+        return await self._client._create_chat_completion(**kwargs)
+
+
+class _GeminiChatNamespace:
+    def __init__(self, client: "GeminiNativeClient"):
+        self.completions = _GeminiChatCompletions(client)
+
+
+class _AsyncGeminiChatNamespace:
+    def __init__(self, client: "AsyncGeminiNativeClient"):
+        self.completions = _AsyncGeminiChatCompletions(client)
+
+
+class GeminiNativeClient:
+    """Minimal OpenAI-SDK-compatible facade over Gemini's native REST API."""
+
+    def __init__(
+        self,
+        *,
+        api_key: str,
+        base_url: Optional[str] = None,
+        default_headers: Optional[Dict[str, str]] = None,
+        timeout: Any = None,
+        http_client: Optional[httpx.Client] = None,
+        **_: Any,
+    ) -> None:
+        self.api_key = api_key
+        normalized_base = (base_url or DEFAULT_GEMINI_BASE_URL).rstrip("/")
+        if normalized_base.endswith("/openai"):
+            normalized_base = normalized_base[: -len("/openai")]
+        self.base_url = normalized_base
+        self._default_headers = dict(default_headers or {})
+        self.chat = _GeminiChatNamespace(self)
+        self.is_closed = False
+        self._http = http_client or httpx.Client(
+            timeout=timeout or httpx.Timeout(connect=15.0, read=600.0, write=30.0, pool=30.0)
+        )
+
+    def close(self) -> None:
+        self.is_closed = True
+        try:
+            self._http.close()
+        except Exception:
+            pass
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.close()
+
+    def _headers(self) -> Dict[str, str]:
+        headers = {
+            "Content-Type": "application/json",
+            "Accept": "application/json",
+            "x-goog-api-key": self.api_key,
+            "User-Agent": "hermes-agent (gemini-native)",
+        }
+        headers.update(self._default_headers)
+        return headers
+
+    @staticmethod
+    def _advance_stream_iterator(iterator: Iterator[_GeminiStreamChunk]) -> tuple[bool, Optional[_GeminiStreamChunk]]:
+        try:
+            return False, next(iterator)
+        except StopIteration:
+            return True, None
+
+    def _create_chat_completion(
+        self,
+        *,
+        model: str = "gemini-2.5-flash",
+        messages: Optional[List[Dict[str, Any]]] = None,
+        stream: bool = False,
+        tools: Any = None,
+        tool_choice: Any = None,
+        temperature: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        top_p: Optional[float] = None,
+        stop: Any = None,
+        extra_body: Optional[Dict[str, Any]] = None,
+        timeout: Any = None,
+        **_: Any,
+    ) -> Any:
+        thinking_config = None
+        if isinstance(extra_body, dict):
+            thinking_config = extra_body.get("thinking_config") or extra_body.get("thinkingConfig")
+
+        request = build_gemini_request(
+            messages=messages or [],
+            tools=tools,
+            tool_choice=tool_choice,
+            temperature=temperature,
+            max_tokens=max_tokens,
+            top_p=top_p,
+            stop=stop,
+            thinking_config=thinking_config,
+        )
+
+        if stream:
+            return self._stream_completion(model=model, request=request, timeout=timeout)
+
+        url = f"{self.base_url}/models/{model}:generateContent"
+        response = self._http.post(url, json=request, headers=self._headers(), timeout=timeout)
+        if response.status_code != 200:
+            raise gemini_http_error(response)
+        try:
+            payload = response.json()
+        except ValueError as exc:
+            raise GeminiAPIError(
+                f"Invalid JSON from Gemini native API: {exc}",
+                code="gemini_invalid_json",
+                status_code=response.status_code,
+                response=response,
+            ) from exc
+        return translate_gemini_response(payload, model=model)
+
+    def _stream_completion(self, *, model: str, request: Dict[str, Any], timeout: Any = None) -> Iterator[_GeminiStreamChunk]:
+        url = f"{self.base_url}/models/{model}:streamGenerateContent?alt=sse"
+        stream_headers = dict(self._headers())
+        stream_headers["Accept"] = "text/event-stream"
+
+        def _generator() -> Iterator[_GeminiStreamChunk]:
+            try:
+                with self._http.stream("POST", url, json=request, headers=stream_headers, timeout=timeout) as response:
+                    if response.status_code != 200:
+                        response.read()
+                        raise gemini_http_error(response)
+                    tool_call_indices: Dict[str, Dict[str, Any]] = {}
+                    for event in _iter_sse_events(response):
+                        for chunk in translate_stream_event(event, model, tool_call_indices):
+                            yield chunk
+            except httpx.HTTPError as exc:
+                raise GeminiAPIError(
+                    f"Gemini streaming request failed: {exc}",
+                    code="gemini_stream_error",
+                ) from exc
+
+        return _generator()
+
+
+class AsyncGeminiNativeClient:
+    """Async wrapper used by auxiliary_client for native Gemini calls."""
+
+    def __init__(self, sync_client: GeminiNativeClient):
+        self._sync = sync_client
+        self.api_key = sync_client.api_key
+        self.base_url = sync_client.base_url
+        self.chat = _AsyncGeminiChatNamespace(self)
+
+    async def _create_chat_completion(self, **kwargs: Any) -> Any:
+        stream = bool(kwargs.get("stream"))
+        result = await asyncio.to_thread(self._sync.chat.completions.create, **kwargs)
+        if not stream:
+            return result
+
+        async def _async_stream() -> Any:
+            while True:
+                done, chunk = await asyncio.to_thread(self._sync._advance_stream_iterator, result)
+                if done:
+                    break
+                yield chunk
+
+        return _async_stream()
+
+    async def close(self) -> None:
+        await asyncio.to_thread(self._sync.close)
diff --git a/agent/gemini_schema.py b/agent/gemini_schema.py
new file mode 100644
index 0000000000..904c99d31b
--- /dev/null
+++ b/agent/gemini_schema.py
@@ -0,0 +1,85 @@
+"""Helpers for translating OpenAI-style tool schemas to Gemini's schema subset."""
+
+from __future__ import annotations
+
+from typing import Any, Dict, List
+
+# Gemini's ``FunctionDeclaration.parameters`` field accepts the ``Schema``
+# object, which is only a subset of OpenAPI 3.0 / JSON Schema.  Strip fields
+# outside that subset before sending Hermes tool schemas to Google.
+_GEMINI_SCHEMA_ALLOWED_KEYS = {
+    "type",
+    "format",
+    "title",
+    "description",
+    "nullable",
+    "enum",
+    "maxItems",
+    "minItems",
+    "properties",
+    "required",
+    "minProperties",
+    "maxProperties",
+    "minLength",
+    "maxLength",
+    "pattern",
+    "example",
+    "anyOf",
+    "propertyOrdering",
+    "default",
+    "items",
+    "minimum",
+    "maximum",
+}
+
+
+def sanitize_gemini_schema(schema: Any) -> Dict[str, Any]:
+    """Return a Gemini-compatible copy of a tool parameter schema.
+
+    Hermes tool schemas are OpenAI-flavored JSON Schema and may contain keys
+    such as ``$schema`` or ``additionalProperties`` that Google's Gemini
+    ``Schema`` object rejects.  This helper preserves the documented Gemini
+    subset and recursively sanitizes nested ``properties`` / ``items`` /
+    ``anyOf`` definitions.
+    """
+
+    if not isinstance(schema, dict):
+        return {}
+
+    cleaned: Dict[str, Any] = {}
+    for key, value in schema.items():
+        if key not in _GEMINI_SCHEMA_ALLOWED_KEYS:
+            continue
+        if key == "properties":
+            if not isinstance(value, dict):
+                continue
+            props: Dict[str, Any] = {}
+            for prop_name, prop_schema in value.items():
+                if not isinstance(prop_name, str):
+                    continue
+                props[prop_name] = sanitize_gemini_schema(prop_schema)
+            cleaned[key] = props
+            continue
+        if key == "items":
+            cleaned[key] = sanitize_gemini_schema(value)
+            continue
+        if key == "anyOf":
+            if not isinstance(value, list):
+                continue
+            cleaned[key] = [
+                sanitize_gemini_schema(item)
+                for item in value
+                if isinstance(item, dict)
+            ]
+            continue
+        cleaned[key] = value
+    return cleaned
+
+
+def sanitize_gemini_tool_parameters(parameters: Any) -> Dict[str, Any]:
+    """Normalize tool parameters to a valid Gemini object schema."""
+
+    cleaned = sanitize_gemini_schema(parameters)
+    if not cleaned:
+        return {"type": "object", "properties": {}}
+    return cleaned
diff --git a/agent/image_gen_provider.py b/agent/image_gen_provider.py
new file mode 100644
index 0000000000..47f65c1b34
--- /dev/null
+++ b/agent/image_gen_provider.py
@@ -0,0 +1,242 @@
+"""
+Image Generation Provider ABC
+=============================
+
+Defines the pluggable-backend interface for image generation. Providers register
+instances via ``PluginContext.register_image_gen_provider()``; the active one
+(selected via ``image_gen.provider`` in ``config.yaml``) services every
+``image_generate`` tool call.
+
+Providers live in ``<repo>/plugins/image_gen/<name>/`` (built-in, auto-loaded
+as ``kind: backend``) or ``~/.hermes/plugins/image_gen/<name>/`` (user, opt-in
+via ``plugins.enabled``).
+
+Response shape
+--------------
+All providers return a dict that :func:`success_response` / :func:`error_response`
+produce. The tool wrapper JSON-serializes it. Keys:
+
+    success        bool
+    image          str | None       URL or absolute file path
+    model          str              provider-specific model identifier
+    prompt         str              echoed prompt
+    aspect_ratio   str              "landscape" | "square" | "portrait"
+    provider       str              provider name (for diagnostics)
+    error          str              only when success=False
+    error_type     str              only when success=False
+"""
+
+from __future__ import annotations
+
+import abc
+import base64
+import datetime
+import logging
+import uuid
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+
+logger = logging.getLogger(__name__)
+
+
+VALID_ASPECT_RATIOS: Tuple[str, ...] = ("landscape", "square", "portrait")
+DEFAULT_ASPECT_RATIO = "landscape"
+
+
+# ---------------------------------------------------------------------------
+# ABC
+# ---------------------------------------------------------------------------
+
+
+class ImageGenProvider(abc.ABC):
+    """Abstract base class for an image generation backend.
+
+    Subclasses must implement :meth:`generate`. Everything else has sane
+    defaults — override only what your provider needs.
+    """
+
+    @property
+    @abc.abstractmethod
+    def name(self) -> str:
+        """Stable short identifier used in ``image_gen.provider`` config.
+
+        Lowercase, no spaces. Examples: ``fal``, ``openai``, ``replicate``.
+        """
+
+    @property
+    def display_name(self) -> str:
+        """Human-readable label shown in ``hermes tools``. Defaults to ``name.title()``."""
+        return self.name.title()
+
+    def is_available(self) -> bool:
+        """Return True when this provider can service calls.
+
+        Typically checks for a required API key. Default: True
+        (providers with no external dependencies are always available).
+        """
+        return True
+
+    def list_models(self) -> List[Dict[str, Any]]:
+        """Return catalog entries for ``hermes tools`` model picker.
+
+        Each entry::
+
+            {
+                "id": "gpt-image-1.5",               # required
+                "display": "GPT Image 1.5",          # optional; defaults to id
+                "speed": "~10s",                     # optional
+                "strengths": "...",                  # optional
+                "price": "$...",                     # optional
+            }
+
+        Default: empty list (provider has no user-selectable models).
+        """
+        return []
+
+    def get_setup_schema(self) -> Dict[str, Any]:
+        """Return provider metadata for the ``hermes tools`` picker.
+
+        Used by ``tools_config.py`` to inject this provider as a row in
+        the Image Generation provider list. Shape::
+
+            {
+                "name": "OpenAI",                     # picker label
+                "badge": "paid",                      # optional short tag
+                "tag": "One-line description...",     # optional subtitle
+                "env_vars": [                         # keys to prompt for
+                    {"key": "OPENAI_API_KEY",
+                     "prompt": "OpenAI API key",
+                     "url": "https://platform.openai.com/api-keys"},
+                ],
+            }
+
+        Default: minimal entry derived from ``display_name``. Override to
+        expose API key prompts and custom badges.
+        """
+        return {
+            "name": self.display_name,
+            "badge": "",
+            "tag": "",
+            "env_vars": [],
+        }
+
+    def default_model(self) -> Optional[str]:
+        """Return the default model id, or None if not applicable."""
+        models = self.list_models()
+        if models:
+            return models[0].get("id")
+        return None
+
+    @abc.abstractmethod
+    def generate(
+        self,
+        prompt: str,
+        aspect_ratio: str = DEFAULT_ASPECT_RATIO,
+        **kwargs: Any,
+    ) -> Dict[str, Any]:
+        """Generate an image.
+
+        Implementations should return the dict from :func:`success_response`
+        or :func:`error_response`. ``kwargs`` may contain forward-compat
+        parameters future versions of the schema will expose — implementations
+        should ignore unknown keys.
+        """
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def resolve_aspect_ratio(value: Optional[str]) -> str:
+    """Clamp an aspect_ratio value to the valid set, defaulting to landscape.
+
+    Invalid values are coerced rather than rejected so the tool surface is
+    forgiving of agent mistakes.
+    """
+    if not isinstance(value, str):
+        return DEFAULT_ASPECT_RATIO
+    v = value.strip().lower()
+    if v in VALID_ASPECT_RATIOS:
+        return v
+    return DEFAULT_ASPECT_RATIO
+
+
+def _images_cache_dir() -> Path:
+    """Return ``$HERMES_HOME/cache/images/``, creating parents as needed."""
+    from hermes_constants import get_hermes_home
+
+    path = get_hermes_home() / "cache" / "images"
+    path.mkdir(parents=True, exist_ok=True)
+    return path
+
+
+def save_b64_image(
+    b64_data: str,
+    *,
+    prefix: str = "image",
+    extension: str = "png",
+) -> Path:
+    """Decode base64 image data and write it under ``$HERMES_HOME/cache/images/``.
+
+    Returns the absolute :class:`Path` to the saved file.
+
+    Filename format: ``<prefix>_<YYYYMMDD_HHMMSS>_<short-uuid>.<ext>``.
+    """
+    raw = base64.b64decode(b64_data)
+    ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+    short = uuid.uuid4().hex[:8]
+    path = _images_cache_dir() / f"{prefix}_{ts}_{short}.{extension}"
+    path.write_bytes(raw)
+    return path
+
+
+def success_response(
+    *,
+    image: str,
+    model: str,
+    prompt: str,
+    aspect_ratio: str,
+    provider: str,
+    extra: Optional[Dict[str, Any]] = None,
+) -> Dict[str, Any]:
+    """Build a uniform success response dict.
+
+    ``image`` may be an HTTP URL or an absolute filesystem path (for b64
+    providers like OpenAI). Callers that need to pass through additional
+    backend-specific fields can supply ``extra``.
+    """
+    payload: Dict[str, Any] = {
+        "success": True,
+        "image": image,
+        "model": model,
+        "prompt": prompt,
+        "aspect_ratio": aspect_ratio,
+        "provider": provider,
+    }
+    if extra:
+        for k, v in extra.items():
+            payload.setdefault(k, v)
+    return payload
+
+
+def error_response(
+    *,
+    error: str,
+    error_type: str = "provider_error",
+    provider: str = "",
+    model: str = "",
+    prompt: str = "",
+    aspect_ratio: str = DEFAULT_ASPECT_RATIO,
+) -> Dict[str, Any]:
+    """Build a uniform error response dict."""
+    return {
+        "success": False,
+        "image": None,
+        "error": error,
+        "error_type": error_type,
+        "model": model,
+        "prompt": prompt,
+        "aspect_ratio": aspect_ratio,
+        "provider": provider,
+    }
diff --git a/agent/image_gen_registry.py b/agent/image_gen_registry.py
new file mode 100644
index 0000000000..715133231c
--- /dev/null
+++ b/agent/image_gen_registry.py
@@ -0,0 +1,120 @@
+"""
+Image Generation Provider Registry
+==================================
+
+Central map of registered providers. Populated by plugins at import-time via
+``PluginContext.register_image_gen_provider()``; consumed by the
+``image_generate`` tool to dispatch each call to the active backend.
+
+Active selection
+----------------
+The active provider is chosen by ``image_gen.provider`` in ``config.yaml``.
+If unset, :func:`get_active_provider` applies fallback logic:
+
+1. If exactly one provider is registered, use it.
+2. Otherwise if a provider named ``fal`` is registered, use it (legacy
+   default — matches pre-plugin behavior).
+3. Otherwise return ``None`` (the tool surfaces a helpful error pointing
+   the user at ``hermes tools``).
+"""
+
+from __future__ import annotations
+
+import logging
+import threading
+from typing import Dict, List, Optional
+
+from agent.image_gen_provider import ImageGenProvider
+
+logger = logging.getLogger(__name__)
+
+
+_providers: Dict[str, ImageGenProvider] = {}
+_lock = threading.Lock()
+
+
+def register_provider(provider: ImageGenProvider) -> None:
+    """Register an image generation provider.
+
+    Re-registration (same ``name``) overwrites the previous entry and logs
+    a debug message — this makes hot-reload scenarios (tests, dev loops)
+    behave predictably.
+    """
+    if not isinstance(provider, ImageGenProvider):
+        raise TypeError(
+            f"register_provider() expects an ImageGenProvider instance, "
+            f"got {type(provider).__name__}"
+        )
+    name = provider.name
+    if not isinstance(name, str) or not name.strip():
+        raise ValueError("Image gen provider .name must be a non-empty string")
+    with _lock:
+        existing = _providers.get(name)
+        _providers[name] = provider
+    if existing is not None:
+        logger.debug("Image gen provider '%s' re-registered (was %r)", name, type(existing).__name__)
+    else:
+        logger.debug("Registered image gen provider '%s' (%s)", name, type(provider).__name__)
+
+
+def list_providers() -> List[ImageGenProvider]:
+    """Return all registered providers, sorted by name."""
+    with _lock:
+        items = list(_providers.values())
+    return sorted(items, key=lambda p: p.name)
+
+
+def get_provider(name: str) -> Optional[ImageGenProvider]:
+    """Return the provider registered under *name*, or None."""
+    if not isinstance(name, str):
+        return None
+    with _lock:
+        return _providers.get(name.strip())
+
+
+def get_active_provider() -> Optional[ImageGenProvider]:
+    """Resolve the currently-active provider.
+
+    Reads ``image_gen.provider`` from config.yaml; falls back per the
+    module docstring.
+    """
+    configured: Optional[str] = None
+    try:
+        from hermes_cli.config import load_config
+
+        cfg = load_config()
+        section = cfg.get("image_gen") if isinstance(cfg, dict) else None
+        if isinstance(section, dict):
+            raw = section.get("provider")
+            if isinstance(raw, str) and raw.strip():
+                configured = raw.strip()
+    except Exception as exc:
+        logger.debug("Could not read image_gen.provider from config: %s", exc)
+
+    with _lock:
+        snapshot = dict(_providers)
+
+    if configured:
+        provider = snapshot.get(configured)
+        if provider is not None:
+            return provider
+        logger.debug(
+            "image_gen.provider='%s' configured but not registered; falling back",
+            configured,
+        )
+
+    # Fallback: single-provider case
+    if len(snapshot) == 1:
+        return next(iter(snapshot.values()))
+
+    # Fallback: prefer legacy FAL for backward compat
+    if "fal" in snapshot:
+        return snapshot["fal"]
+
+    return None
+
+
+def _reset_for_tests() -> None:
+    """Clear the registry. **Test-only.**"""
+    with _lock:
+        _providers.clear()
diff --git a/agent/insights.py b/agent/insights.py
index 4dafb74876..70907b4f3d 100644
--- a/agent/insights.py
+++ b/agent/insights.py
@@ -124,6 +124,7 @@ class InsightsEngine:
         # Gather raw data
         sessions = self._get_sessions(cutoff, source)
         tool_usage = self._get_tool_usage(cutoff, source)
+        skill_usage = self._get_skill_usage(cutoff, source)
         message_stats = self._get_message_stats(cutoff, source)
 
         if not sessions:
@@ -135,6 +136,15 @@ class InsightsEngine:
                 "models": [],
                 "platforms": [],
                 "tools": [],
+                "skills": {
+                    "summary": {
+                        "total_skill_loads": 0,
+                        "total_skill_edits": 0,
+                        "total_skill_actions": 0,
+                        "distinct_skills_used": 0,
+                    },
+                    "top_skills": [],
+                },
                 "activity": {},
                 "top_sessions": [],
             }
@@ -144,6 +154,7 @@ class InsightsEngine:
         models = self._compute_model_breakdown(sessions)
         platforms = self._compute_platform_breakdown(sessions)
         tools = self._compute_tool_breakdown(tool_usage)
+        skills = self._compute_skill_breakdown(skill_usage)
         activity = self._compute_activity_patterns(sessions)
         top_sessions = self._compute_top_sessions(sessions)
 
@@ -156,6 +167,7 @@ class InsightsEngine:
             "models": models,
             "platforms": platforms,
             "tools": tools,
+            "skills": skills,
             "activity": activity,
             "top_sessions": top_sessions,
         }
@@ -284,6 +296,82 @@ class InsightsEngine:
             for name, count in tool_counts.most_common()
         ]
 
+    def _get_skill_usage(self, cutoff: float, source: str = None) -> List[Dict]:
+        """Extract per-skill usage from assistant tool calls."""
+        skill_counts: Dict[str, Dict[str, Any]] = {}
+
+        if source:
+            cursor = self._conn.execute(
+                """SELECT m.tool_calls, m.timestamp
+                   FROM messages m
+                   JOIN sessions s ON s.id = m.session_id
+                   WHERE s.started_at >= ? AND s.source = ?
+                     AND m.role = 'assistant' AND m.tool_calls IS NOT NULL""",
+                (cutoff, source),
+            )
+        else:
+            cursor = self._conn.execute(
+                """SELECT m.tool_calls, m.timestamp
+                   FROM messages m
+                   JOIN sessions s ON s.id = m.session_id
+                   WHERE s.started_at >= ?
+                     AND m.role = 'assistant' AND m.tool_calls IS NOT NULL""",
+                (cutoff,),
+            )
+
+        for row in cursor.fetchall():
+            try:
+                calls = row["tool_calls"]
+                if isinstance(calls, str):
+                    calls = json.loads(calls)
+                if not isinstance(calls, list):
+                    continue
+            except (json.JSONDecodeError, TypeError):
+                continue
+
+            timestamp = row["timestamp"]
+            for call in calls:
+                if not isinstance(call, dict):
+                    continue
+                func = call.get("function", {})
+                tool_name = func.get("name")
+                if tool_name not in {"skill_view", "skill_manage"}:
+                    continue
+
+                args = func.get("arguments")
+                if isinstance(args, str):
+                    try:
+                        args = json.loads(args)
+                    except (json.JSONDecodeError, TypeError):
+                        continue
+                if not isinstance(args, dict):
+                    continue
+
+                skill_name = args.get("name")
+                if not isinstance(skill_name, str) or not skill_name.strip():
+                    continue
+
+                entry = skill_counts.setdefault(
+                    skill_name,
+                    {
+                        "skill": skill_name,
+                        "view_count": 0,
+                        "manage_count": 0,
+                        "last_used_at": None,
+                    },
+                )
+                if tool_name == "skill_view":
+                    entry["view_count"] += 1
+                else:
+                    entry["manage_count"] += 1
+
+                if timestamp is not None and (
+                    entry["last_used_at"] is None or timestamp > entry["last_used_at"]
+                ):
+                    entry["last_used_at"] = timestamp
+
+        return list(skill_counts.values())
+
     def _get_message_stats(self, cutoff: float, source: str = None) -> Dict:
         """Get aggregate message statistics."""
         if source:
@@ -475,6 +563,46 @@ class InsightsEngine:
             })
         return result
 
+    def _compute_skill_breakdown(self, skill_usage: List[Dict]) -> Dict[str, Any]:
+        """Process per-skill usage into summary + ranked list."""
+        total_skill_loads = sum(s["view_count"] for s in skill_usage) if skill_usage else 0
+        total_skill_edits = sum(s["manage_count"] for s in skill_usage) if skill_usage else 0
+        total_skill_actions = total_skill_loads + total_skill_edits
+
+        top_skills = []
+        for skill in skill_usage:
+            total_count = skill["view_count"] + skill["manage_count"]
+            percentage = (total_count / total_skill_actions * 100) if total_skill_actions else 0
+            top_skills.append({
+                "skill": skill["skill"],
+                "view_count": skill["view_count"],
+                "manage_count": skill["manage_count"],
+                "total_count": total_count,
+                "percentage": percentage,
+                "last_used_at": skill.get("last_used_at"),
+            })
+
+        top_skills.sort(
+            key=lambda s: (
+                s["total_count"],
+                s["view_count"],
+                s["manage_count"],
+                s["last_used_at"] or 0,
+                s["skill"],
+            ),
+            reverse=True,
+        )
+
+        return {
+            "summary": {
+                "total_skill_loads": total_skill_loads,
+                "total_skill_edits": total_skill_edits,
+                "total_skill_actions": total_skill_actions,
+                "distinct_skills_used": len(skill_usage),
+            },
+            "top_skills": top_skills,
+        }
+
     def _compute_activity_patterns(self, sessions: List[Dict]) -> Dict:
         """Analyze activity patterns by day of week and hour."""
         day_counts = Counter()  # 0=Monday ... 6=Sunday
@@ -670,6 +798,28 @@ class InsightsEngine:
                 lines.append(f"  ... and {len(report['tools']) - 15} more tools")
             lines.append("")
 
+        # Skill usage
+        skills = report.get("skills", {})
+        top_skills = skills.get("top_skills", [])
+        if top_skills:
+            lines.append("  🧠 Top Skills")
+            lines.append("  " + "─" * 56)
+            lines.append(f"  {'Skill':<28} {'Loads':>7} {'Edits':>7} {'Last used':>11}")
+            for skill in top_skills[:10]:
+                last_used = "—"
+                if skill.get("last_used_at"):
+                    last_used = datetime.fromtimestamp(skill["last_used_at"]).strftime("%b %d")
+                lines.append(
+                    f"  {skill['skill'][:28]:<28} {skill['view_count']:>7,} {skill['manage_count']:>7,} {last_used:>11}"
+                )
+            summary = skills.get("summary", {})
+            lines.append(
+                f"  Distinct skills: {summary.get('distinct_skills_used', 0)}  "
+                f"Loads: {summary.get('total_skill_loads', 0):,}  "
+                f"Edits: {summary.get('total_skill_edits', 0):,}"
+            )
+            lines.append("")
+
         # Activity patterns
         act = report.get("activity", {})
         if act.get("by_day"):
@@ -753,6 +903,18 @@ class InsightsEngine:
                 lines.append(f"  {t['tool']} — {t['count']:,} calls ({t['percentage']:.1f}%)")
             lines.append("")
 
+        skills = report.get("skills", {})
+        if skills.get("top_skills"):
+            lines.append("**🧠 Top Skills:**")
+            for skill in skills["top_skills"][:5]:
+                suffix = ""
+                if skill.get("last_used_at"):
+                    suffix = f", last used {datetime.fromtimestamp(skill['last_used_at']).strftime('%b %d')}"
+                lines.append(
+                    f"  {skill['skill']} — {skill['view_count']:,} loads, {skill['manage_count']:,} edits{suffix}"
+                )
+            lines.append("")
+
         # Activity summary
         act = report.get("activity", {})
         if act.get("busiest_day") and act.get("busiest_hour"):
diff --git a/agent/model_metadata.py b/agent/model_metadata.py
index 81bac6c92f..152e536fdb 100644
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -14,6 +14,8 @@ from urllib.parse import urlparse
 import requests
 import yaml
 
+from utils import base_url_host_matches, base_url_hostname
+
 from hermes_constants import OPENROUTER_MODELS_URL
 
 logger = logging.getLogger(__name__)
@@ -23,7 +25,7 @@ logger = logging.getLogger(__name__)
 # are preserved so the full model name reaches cache lookups and server queries.
 _PROVIDER_PREFIXES: frozenset[str] = frozenset({
     "openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
-    "gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "anthropic", "deepseek",
+    "gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "stepfun", "minimax", "minimax-cn", "anthropic", "deepseek",
     "opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba",
     "qwen-oauth",
     "xiaomi",
@@ -34,7 +36,7 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
     "glm", "z-ai", "z.ai", "zhipu", "github", "github-copilot",
     "github-models", "kimi", "moonshot", "kimi-cn", "moonshot-cn", "claude", "deep-seek",
     "ollama",
-    "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
+    "stepfun", "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
     "mimo", "xiaomi-mimo",
     "arcee-ai", "arceeai",
     "xai", "x-ai", "x.ai", "grok",
@@ -116,7 +118,6 @@ DEFAULT_CONTEXT_LENGTHS = {
     "gpt-5.4-nano": 400000,           # 400k (not 1.05M like full 5.4)
     "gpt-5.4-mini": 400000,           # 400k (not 1.05M like full 5.4)
     "gpt-5.4": 1050000,               # GPT-5.4, GPT-5.4 Pro (1.05M context)
-    "gpt-5.3-codex-spark": 128000,    # Spark variant has reduced 128k context
     "gpt-5.1-chat": 128000,           # Chat variant has 128k context
     "gpt-5": 400000,                  # GPT-5.x base, mini, codex variants (400k)
     "gpt-4.1": 1047576,
@@ -169,6 +170,7 @@ DEFAULT_CONTEXT_LENGTHS = {
     "Qwen/Qwen3.5-35B-A3B": 131072,
     "deepseek-ai/DeepSeek-V3.2": 65536,
     "moonshotai/Kimi-K2.5": 262144,
+    "moonshotai/Kimi-K2.6": 262144,
     "moonshotai/Kimi-K2-Thinking": 262144,
     "MiniMaxAI/MiniMax-M2.5": 204800,
     "XiaomiMiMo/MiMo-V2-Flash": 256000,
@@ -211,8 +213,15 @@ def _normalize_base_url(base_url: str) -> str:
     return (base_url or "").strip().rstrip("/")
 
 
+def _auth_headers(api_key: str = "") -> Dict[str, str]:
+    token = str(api_key or "").strip()
+    if not token:
+        return {}
+    return {"Authorization": f"Bearer {token}"}
+
+
 def _is_openrouter_base_url(base_url: str) -> bool:
-    return "openrouter.ai" in _normalize_base_url(base_url).lower()
+    return base_url_host_matches(base_url, "openrouter.ai")
 
 
 def _is_custom_endpoint(base_url: str) -> bool:
@@ -228,6 +237,8 @@ _URL_TO_PROVIDER: Dict[str, str] = {
     "api.moonshot.ai": "kimi-coding",
     "api.moonshot.cn": "kimi-coding-cn",
     "api.kimi.com": "kimi-coding",
+    "api.stepfun.ai": "stepfun",
+    "api.stepfun.com": "stepfun",
     "api.arcee.ai": "arcee",
     "api.minimax": "minimax",
     "dashscope.aliyuncs.com": "alibaba",
@@ -310,7 +321,7 @@ def is_local_endpoint(base_url: str) -> bool:
     return False
 
 
-def detect_local_server_type(base_url: str) -> Optional[str]:
+def detect_local_server_type(base_url: str, api_key: str = "") -> Optional[str]:
     """Detect which local server is running at base_url by probing known endpoints.
 
     Returns one of: "ollama", "lm-studio", "vllm", "llamacpp", or None.
@@ -322,8 +333,10 @@ def detect_local_server_type(base_url: str) -> Optional[str]:
     if server_url.endswith("/v1"):
         server_url = server_url[:-3]
 
+    headers = _auth_headers(api_key)
+
     try:
-        with httpx.Client(timeout=2.0) as client:
+        with httpx.Client(timeout=2.0, headers=headers) as client:
             # LM Studio exposes /api/v1/models — check first (most specific)
             try:
                 r = client.get(f"{server_url}/api/v1/models")
@@ -510,6 +523,59 @@ def fetch_endpoint_model_metadata(
     headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
     last_error: Optional[Exception] = None
 
+    if is_local_endpoint(normalized):
+        try:
+            if detect_local_server_type(normalized, api_key=api_key) == "lm-studio":
+                server_url = normalized[:-3].rstrip("/") if normalized.endswith("/v1") else normalized
+                response = requests.get(
+                    server_url.rstrip("/") + "/api/v1/models",
+                    headers=headers,
+                    timeout=10,
+                )
+                response.raise_for_status()
+                payload = response.json()
+                cache: Dict[str, Dict[str, Any]] = {}
+                for model in payload.get("models", []):
+                    if not isinstance(model, dict):
+                        continue
+                    model_id = model.get("key") or model.get("id")
+                    if not model_id:
+                        continue
+                    entry: Dict[str, Any] = {"name": model.get("name", model_id)}
+
+                    context_length = None
+                    for inst in model.get("loaded_instances", []) or []:
+                        if not isinstance(inst, dict):
+                            continue
+                        cfg = inst.get("config", {})
+                        ctx = cfg.get("context_length") if isinstance(cfg, dict) else None
+                        if isinstance(ctx, int) and ctx > 0:
+                            context_length = ctx
+                            break
+                    if context_length is None:
+                        context_length = _extract_context_length(model)
+                    if context_length is not None:
+                        entry["context_length"] = context_length
+
+                    max_completion_tokens = _extract_max_completion_tokens(model)
+                    if max_completion_tokens is not None:
+                        entry["max_completion_tokens"] = max_completion_tokens
+
+                    pricing = _extract_pricing(model)
+                    if pricing:
+                        entry["pricing"] = pricing
+
+                    _add_model_aliases(cache, model_id, entry)
+                    alt_id = model.get("id")
+                    if isinstance(alt_id, str) and alt_id and alt_id != model_id:
+                        _add_model_aliases(cache, alt_id, entry)
+
+                _endpoint_model_metadata_cache[normalized] = cache
+                _endpoint_model_metadata_cache_time[normalized] = time.time()
+                return cache
+        except Exception as exc:
+            last_error = exc
+
     for candidate in candidates:
         url = candidate.rstrip("/") + "/models"
         try:
@@ -716,7 +782,7 @@ def _model_id_matches(candidate_id: str, lookup_model: str) -> bool:
     return False
 
 
-def query_ollama_num_ctx(model: str, base_url: str) -> Optional[int]:
+def query_ollama_num_ctx(model: str, base_url: str, api_key: str = "") -> Optional[int]:
     """Query an Ollama server for the model's context length.
 
     Returns the model's maximum context from GGUF metadata via ``/api/show``,
@@ -734,14 +800,16 @@ def query_ollama_num_ctx(model: str, base_url: str) -> Optional[int]:
         server_url = server_url[:-3]
 
     try:
-        server_type = detect_local_server_type(base_url)
+        server_type = detect_local_server_type(base_url, api_key=api_key)
     except Exception:
         return None
     if server_type != "ollama":
         return None
 
+    headers = _auth_headers(api_key)
+
     try:
-        with httpx.Client(timeout=3.0) as client:
+        with httpx.Client(timeout=3.0, headers=headers) as client:
             resp = client.post(f"{server_url}/api/show", json={"name": bare_model})
             if resp.status_code != 200:
                 return None
@@ -769,7 +837,7 @@ def query_ollama_num_ctx(model: str, base_url: str) -> Optional[int]:
     return None
 
 
-def _query_local_context_length(model: str, base_url: str) -> Optional[int]:
+def _query_local_context_length(model: str, base_url: str, api_key: str = "") -> Optional[int]:
     """Query a local server for the model's context length."""
     import httpx
 
@@ -782,13 +850,15 @@ def _query_local_context_length(model: str, base_url: str) -> Optional[int]:
     if server_url.endswith("/v1"):
         server_url = server_url[:-3]
 
+    headers = _auth_headers(api_key)
+
     try:
-        server_type = detect_local_server_type(base_url)
+        server_type = detect_local_server_type(base_url, api_key=api_key)
     except Exception:
         server_type = None
 
     try:
-        with httpx.Client(timeout=3.0) as client:
+        with httpx.Client(timeout=3.0, headers=headers) as client:
             # Ollama: /api/show returns model details with context info
             if server_type == "ollama":
                 resp = client.post(f"{server_url}/api/show", json={"name": model})
@@ -999,7 +1069,7 @@ def get_model_context_length(
         if not _is_known_provider_base_url(base_url):
             # 3. Try querying local server directly
             if is_local_endpoint(base_url):
-                local_ctx = _query_local_context_length(model, base_url)
+                local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
                 if local_ctx and local_ctx > 0:
                     save_context_length(model, base_url, local_ctx)
                     return local_ctx
@@ -1013,7 +1083,7 @@ def get_model_context_length(
 
     # 4. Anthropic /v1/models API (only for regular API keys, not OAuth)
     if provider == "anthropic" or (
-        base_url and "api.anthropic.com" in base_url
+        base_url and base_url_hostname(base_url) == "api.anthropic.com"
     ):
         ctx = _query_anthropic_context_length(model, base_url or "https://api.anthropic.com", api_key)
         if ctx:
@@ -1022,7 +1092,11 @@ def get_model_context_length(
     # 4b. AWS Bedrock — use static context length table.
     # Bedrock's ListFoundationModels doesn't expose context window sizes,
     # so we maintain a curated table in bedrock_adapter.py.
-    if provider == "bedrock" or (base_url and "bedrock-runtime" in base_url):
+    if provider == "bedrock" or (
+        base_url
+        and base_url_hostname(base_url).startswith("bedrock-runtime.")
+        and base_url_host_matches(base_url, "amazonaws.com")
+    ):
         try:
             from agent.bedrock_adapter import get_bedrock_context_length
             return get_bedrock_context_length(model)
@@ -1069,7 +1143,7 @@ def get_model_context_length(
 
     # 9. Query local server as last resort
     if base_url and is_local_endpoint(base_url):
-        local_ctx = _query_local_context_length(model, base_url)
+        local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
         if local_ctx and local_ctx > 0:
             save_context_length(model, base_url, local_ctx)
             return local_ctx
diff --git a/agent/models_dev.py b/agent/models_dev.py
index 3e5c911e7e..2f06a75d89 100644
--- a/agent/models_dev.py
+++ b/agent/models_dev.py
@@ -146,6 +146,7 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
     "openai-codex": "openai",
     "zai": "zai",
     "kimi-coding": "kimi-for-coding",
+    "stepfun": "stepfun",
     "kimi-coding-cn": "kimi-for-coding",
     "minimax": "minimax",
     "minimax-cn": "minimax-cn",
diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py
index 3e042f65df..8e061f831b 100644
--- a/agent/prompt_builder.py
+++ b/agent/prompt_builder.py
@@ -152,7 +152,13 @@ MEMORY_GUIDANCE = (
     "Do NOT save task progress, session outcomes, completed-work logs, or temporary TODO "
     "state to memory; use session_search to recall those from past transcripts. "
     "If you've discovered a new way to do something, solved a problem that could be "
-    "necessary later, save it as a skill with the skill tool."
+    "necessary later, save it as a skill with the skill tool.\n"
+    "Write memories as declarative facts, not instructions to yourself. "
+    "'User prefers concise responses' ✓ — 'Always respond concisely' ✗. "
+    "'Project uses pytest with xdist' ✓ — 'Run tests with pytest -n 4' ✗. "
+    "Imperative phrasing gets re-read as a directive in later sessions and can "
+    "cause repeated work or override the user's current request. Procedures and "
+    "workflows belong in skills, not memory."
 )
 
 SESSION_SEARCH_GUIDANCE = (
@@ -344,7 +350,13 @@ PLATFORM_HINTS = {
     ),
     "cli": (
         "You are a CLI AI Agent. Try not to use markdown but simple text "
-        "renderable inside a terminal."
+        "renderable inside a terminal. "
+        "File delivery: there is no attachment channel — the user reads your "
+        "response directly in their terminal. Do NOT emit MEDIA:/path tags "
+        "(those are only intercepted on messaging platforms like Telegram, "
+        "Discord, Slack, etc.; on the CLI they render as literal text). "
+        "When referring to a file you created or changed, just state its "
+        "absolute path in plain text; the user can open it from there."
     ),
     "sms": (
         "You are communicating via SMS. Keep responses concise and use plain text "
@@ -613,12 +625,14 @@ def build_skills_system_prompt(
         or get_session_env("HERMES_SESSION_PLATFORM")
         or ""
     )
+    disabled = get_disabled_skill_names()
     cache_key = (
         str(skills_dir.resolve()),
         tuple(str(d) for d in external_dirs),
         tuple(sorted(str(t) for t in (available_tools or set()))),
         tuple(sorted(str(ts) for ts in (available_toolsets or set()))),
         _platform_hint,
+        tuple(sorted(disabled)),
     )
     with _SKILLS_PROMPT_CACHE_LOCK:
         cached = _SKILLS_PROMPT_CACHE.get(cache_key)
@@ -626,8 +640,6 @@ def build_skills_system_prompt(
             _SKILLS_PROMPT_CACHE.move_to_end(cache_key)
             return cached
 
-    disabled = get_disabled_skill_names()
-
     # ── Layer 2: disk snapshot ────────────────────────────────────────
     snapshot = _load_skills_snapshot(skills_dir)
 
diff --git a/agent/redact.py b/agent/redact.py
index af3b7bb93c..3679b73236 100644
--- a/agent/redact.py
+++ b/agent/redact.py
@@ -13,6 +13,48 @@ import re
 
 logger = logging.getLogger(__name__)
 
+# Sensitive query-string parameter names (case-insensitive exact match).
+# Ported from nearai/ironclaw#2529 — catches tokens whose values don't match
+# any known vendor prefix regex (e.g. opaque tokens, short OAuth codes).
+_SENSITIVE_QUERY_PARAMS = frozenset({
+    "access_token",
+    "refresh_token",
+    "id_token",
+    "token",
+    "api_key",
+    "apikey",
+    "client_secret",
+    "password",
+    "auth",
+    "jwt",
+    "session",
+    "secret",
+    "key",
+    "code",           # OAuth authorization codes
+    "signature",      # pre-signed URL signatures
+    "x-amz-signature",
+})
+
+# Sensitive form-urlencoded / JSON body key names (case-insensitive exact match).
+# Exact match, NOT substring — "token_count" and "session_id" must NOT match.
+# Ported from nearai/ironclaw#2529.
+_SENSITIVE_BODY_KEYS = frozenset({
+    "access_token",
+    "refresh_token",
+    "id_token",
+    "token",
+    "api_key",
+    "apikey",
+    "client_secret",
+    "password",
+    "auth",
+    "jwt",
+    "secret",
+    "private_key",
+    "authorization",
+    "key",
+})
+
 # Snapshot at import time so runtime env mutations (e.g. LLM-generated
 # `export HERMES_REDACT_SECRETS=false`) cannot disable redaction mid-session.
 _REDACT_ENABLED = os.getenv("HERMES_REDACT_SECRETS", "").lower() not in ("0", "false", "no", "off")
@@ -108,6 +150,30 @@ _DISCORD_MENTION_RE = re.compile(r"<@!?(\d{17,20})>")
 # Negative lookahead prevents matching hex strings or identifiers
 _SIGNAL_PHONE_RE = re.compile(r"(\+[1-9]\d{6,14})(?![A-Za-z0-9])")
 
+# URLs containing query strings — matches `scheme://...?...[# or end]`.
+# Used to scan text for URLs whose query params may contain secrets.
+# Ported from nearai/ironclaw#2529.
+_URL_WITH_QUERY_RE = re.compile(
+    r"(https?|wss?|ftp)://"          # scheme
+    r"([^\s/?#]+)"                    # authority (may include userinfo)
+    r"([^\s?#]*)"                     # path
+    r"\?([^\s#]+)"                    # query (required)
+    r"(#\S*)?",                       # optional fragment
+)
+
+# URLs containing userinfo — `scheme://user:password@host` for ANY scheme
+# (not just DB protocols already covered by _DB_CONNSTR_RE above).
+# Catches things like `https://user:token@api.example.com/v1/foo`.
+_URL_USERINFO_RE = re.compile(
+    r"(https?|wss?|ftp)://([^/\s:@]+):([^/\s@]+)@",
+)
+
+# Form-urlencoded body detection: conservative — only applies when the entire
+# text looks like a query string (k=v&k=v pattern with no newlines).
+_FORM_BODY_RE = re.compile(
+    r"^[A-Za-z_][A-Za-z0-9_.-]*=[^&\s]*(?:&[A-Za-z_][A-Za-z0-9_.-]*=[^&\s]*)+$"
+)
+
 # Compile known prefix patterns into one alternation
 _PREFIX_RE = re.compile(
     r"(?<![A-Za-z0-9_-])(" + "|".join(_PREFIX_PATTERNS) + r")(?![A-Za-z0-9_-])"
@@ -121,6 +187,72 @@ def _mask_token(token: str) -> str:
     return f"{token[:6]}...{token[-4:]}"
 
 
+def _redact_query_string(query: str) -> str:
+    """Redact sensitive parameter values in a URL query string.
+
+    Handles `k=v&k=v` format. Sensitive keys (case-insensitive) have values
+    replaced with `***`. Non-sensitive keys pass through unchanged.
+    Empty or malformed pairs are preserved as-is.
+    """
+    if not query:
+        return query
+    parts = []
+    for pair in query.split("&"):
+        if "=" not in pair:
+            parts.append(pair)
+            continue
+        key, _, value = pair.partition("=")
+        if key.lower() in _SENSITIVE_QUERY_PARAMS:
+            parts.append(f"{key}=***")
+        else:
+            parts.append(pair)
+    return "&".join(parts)
+
+
+def _redact_url_query_params(text: str) -> str:
+    """Scan text for URLs with query strings and redact sensitive params.
+
+    Catches opaque tokens that don't match vendor prefix regexes, e.g.
+    `https://example.com/cb?code=ABC123&state=xyz` → `...?code=***&state=xyz`.
+    """
+    def _sub(m: re.Match) -> str:
+        scheme = m.group(1)
+        authority = m.group(2)
+        path = m.group(3)
+        query = _redact_query_string(m.group(4))
+        fragment = m.group(5) or ""
+        return f"{scheme}://{authority}{path}?{query}{fragment}"
+    return _URL_WITH_QUERY_RE.sub(_sub, text)
+
+
+def _redact_url_userinfo(text: str) -> str:
+    """Strip `user:password@` from HTTP/WS/FTP URLs.
+
+    DB protocols (postgres, mysql, mongodb, redis, amqp) are handled
+    separately by `_DB_CONNSTR_RE`.
+    """
+    return _URL_USERINFO_RE.sub(
+        lambda m: f"{m.group(1)}://{m.group(2)}:***@",
+        text,
+    )
+
+
+def _redact_form_body(text: str) -> str:
+    """Redact sensitive values in a form-urlencoded body.
+
+    Only applies when the entire input looks like a pure form body
+    (k=v&k=v with no newlines, no other text). Single-line non-form
+    text passes through unchanged. This is a conservative pass — the
+    `_redact_url_query_params` function handles embedded query strings.
+    """
+    if not text or "\n" in text or "&" not in text:
+        return text
+    # The body-body form check is strict: only trigger on clean k=v&k=v.
+    if not _FORM_BODY_RE.match(text.strip()):
+        return text
+    return _redact_query_string(text.strip())
+
+
 def redact_sensitive_text(text: str) -> str:
     """Apply all redaction patterns to a block of text.
 
@@ -173,6 +305,16 @@ def redact_sensitive_text(text: str) -> str:
     # JWT tokens (eyJ... — base64-encoded JSON headers)
     text = _JWT_RE.sub(lambda m: _mask_token(m.group(0)), text)
 
+    # URL userinfo (http(s)://user:pass@host) — redact for non-DB schemes.
+    # DB schemes are handled above by _DB_CONNSTR_RE.
+    text = _redact_url_userinfo(text)
+
+    # URL query params containing opaque tokens (?access_token=…&code=…)
+    text = _redact_url_query_params(text)
+
+    # Form-urlencoded bodies (only triggers on clean k=v&k=v inputs).
+    text = _redact_form_body(text)
+
     # Discord user/role mentions (<@snowflake_id>)
     text = _DISCORD_MENTION_RE.sub(lambda m: f"<@{'!' if '!' in m.group(0) else ''}***>", text)
 
diff --git a/agent/shell_hooks.py b/agent/shell_hooks.py
new file mode 100644
index 0000000000..b579ad5b87
--- /dev/null
+++ b/agent/shell_hooks.py
@@ -0,0 +1,831 @@
+"""
+Shell-script hooks bridge.
+
+Reads the ``hooks:`` block from ``cli-config.yaml``, prompts the user for
+consent on first use of each ``(event, command)`` pair, and registers
+callbacks on the existing plugin hook manager so every existing
+``invoke_hook()`` site dispatches to the configured shell scripts — with
+zero changes to call sites.
+
+Design notes
+------------
+* Python plugins and shell hooks compose naturally: both flow through
+  :func:`hermes_cli.plugins.invoke_hook` and its aggregators.  Python
+  plugins are registered first (via ``discover_and_load()``) so their
+  block decisions win ties over shell-hook blocks.
+* Subprocess execution uses ``shlex.split(os.path.expanduser(command))``
+  with ``shell=False`` — no shell injection footguns.  Users that need
+  pipes/redirection wrap their logic in a script.
+* First-use consent is gated by the allowlist under
+  ``~/.hermes/shell-hooks-allowlist.json``.  Non-TTY callers must pass
+  ``accept_hooks=True`` (resolved from ``--accept-hooks``,
+  ``HERMES_ACCEPT_HOOKS``, or ``hooks_auto_accept: true`` in config)
+  for registration to succeed without a prompt.
+* Registration is idempotent — safe to invoke from both the CLI entry
+  point (``hermes_cli/main.py``) and the gateway entry point
+  (``gateway/run.py``).
+
+Wire protocol
+-------------
+**stdin** (JSON, piped to the script)::
+
+    {
+        "hook_event_name": "pre_tool_call",
+        "tool_name":       "terminal",
+        "tool_input":      {"command": "rm -rf /"},
+        "session_id":      "sess_abc123",
+        "cwd":             "/home/user/project",
+        "extra":           {...}   # event-specific kwargs
+    }
+
+**stdout** (JSON, optional — anything else is ignored)::
+
+    # Block a pre_tool_call (either shape accepted; normalised internally):
+    {"decision": "block", "reason":  "Forbidden command"}   # Claude-Code-style
+    {"action":   "block", "message": "Forbidden command"}   # Hermes-canonical
+
+    # Inject context for pre_llm_call:
+    {"context": "Today is Friday"}
+
+    # Silent no-op:
+    <empty or any non-matching JSON object>
+"""
+
+from __future__ import annotations
+
+import difflib
+import json
+import logging
+import os
+import re
+import shlex
+import subprocess
+import sys
+import tempfile
+import threading
+import time
+from contextlib import contextmanager
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, Callable, Dict, Iterator, List, Optional, Set, Tuple
+
+try:
+    import fcntl  # POSIX only; Windows falls back to best-effort without flock.
+except ImportError:  # pragma: no cover
+    fcntl = None  # type: ignore[assignment]
+
+from hermes_constants import get_hermes_home
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_TIMEOUT_SECONDS = 60
+MAX_TIMEOUT_SECONDS = 300
+ALLOWLIST_FILENAME = "shell-hooks-allowlist.json"
+
+# (event, matcher, command) triples that have been wired to the plugin
+# manager in the current process.  Matcher is part of the key because
+# the same script can legitimately register for different matchers under
+# the same event (e.g. one entry per tool the user wants to gate).
+# Second registration attempts for the exact same triple become no-ops
+# so the CLI and gateway can both call register_from_config() safely.
+_registered: Set[Tuple[str, Optional[str], str]] = set()
+_registered_lock = threading.Lock()
+
+# Intra-process lock for allowlist read-modify-write on platforms that
+# lack ``fcntl`` (non-POSIX).  Kept separate from ``_registered_lock``
+# because ``register_from_config`` already holds ``_registered_lock`` when
+# it triggers ``_record_approval`` — reusing it here would self-deadlock
+# (``threading.Lock`` is non-reentrant).  POSIX callers use the sibling
+# ``.lock`` file via ``fcntl.flock`` and bypass this.
+_allowlist_write_lock = threading.Lock()
+
+
+@dataclass
+class ShellHookSpec:
+    """Parsed and validated representation of a single ``hooks:`` entry."""
+
+    event: str
+    command: str
+    matcher: Optional[str] = None
+    timeout: int = DEFAULT_TIMEOUT_SECONDS
+    compiled_matcher: Optional[re.Pattern] = field(default=None, repr=False)
+
+    def __post_init__(self) -> None:
+        # Strip whitespace introduced by YAML quirks (e.g. multi-line string
+        # folding) — a matcher of " terminal" would otherwise silently fail
+        # to match "terminal" without any diagnostic.
+        if isinstance(self.matcher, str):
+            stripped = self.matcher.strip()
+            self.matcher = stripped if stripped else None
+        if self.matcher:
+            try:
+                self.compiled_matcher = re.compile(self.matcher)
+            except re.error as exc:
+                logger.warning(
+                    "shell hook matcher %r is invalid (%s) — treating as "
+                    "literal equality", self.matcher, exc,
+                )
+                self.compiled_matcher = None
+
+    def matches_tool(self, tool_name: Optional[str]) -> bool:
+        if not self.matcher:
+            return True
+        if tool_name is None:
+            return False
+        if self.compiled_matcher is not None:
+            return self.compiled_matcher.fullmatch(tool_name) is not None
+        # compiled_matcher is None only when the regex failed to compile,
+        # in which case we already warned and fall back to literal equality.
+        return tool_name == self.matcher
+
+
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+
+def register_from_config(
+    cfg: Optional[Dict[str, Any]],
+    *,
+    accept_hooks: bool = False,
+) -> List[ShellHookSpec]:
+    """Register every configured shell hook on the plugin manager.
+
+    ``cfg`` is the full parsed config dict (``hermes_cli.config.load_config``
+    output).  The ``hooks:`` key is read out of it.  Missing, empty, or
+    non-dict ``hooks`` is treated as zero configured hooks.
+
+    ``accept_hooks=True`` skips the TTY consent prompt — the caller is
+    promising that the user has opted in via a flag, env var, or config
+    setting.  ``HERMES_ACCEPT_HOOKS=1`` and ``hooks_auto_accept: true`` are
+    also honored inside this function so either CLI or gateway call sites
+    pick them up.
+
+    Returns the list of :class:`ShellHookSpec` entries that ended up wired
+    up on the plugin manager.  Skipped entries (unknown events, malformed,
+    not allowlisted, already registered) are logged but not returned.
+    """
+    if not isinstance(cfg, dict):
+        return []
+
+    effective_accept = _resolve_effective_accept(cfg, accept_hooks)
+
+    specs = _parse_hooks_block(cfg.get("hooks"))
+    if not specs:
+        return []
+
+    registered: List[ShellHookSpec] = []
+
+    # Import lazily — avoids circular imports at module-load time.
+    from hermes_cli.plugins import get_plugin_manager
+
+    manager = get_plugin_manager()
+
+    # Idempotence + allowlist read happen under the lock; the TTY
+    # prompt runs outside so other threads aren't parked on a blocking
+    # input().  Mutation re-takes the lock with a defensive idempotence
+    # re-check in case two callers ever race through the prompt.
+    for spec in specs:
+        key = (spec.event, spec.matcher, spec.command)
+        with _registered_lock:
+            if key in _registered:
+                continue
+            already_allowlisted = _is_allowlisted(spec.event, spec.command)
+
+        if not already_allowlisted:
+            if not _prompt_and_record(
+                spec.event, spec.command, accept_hooks=effective_accept,
+            ):
+                logger.warning(
+                    "shell hook for %s (%s) not allowlisted — skipped. "
+                    "Use --accept-hooks / HERMES_ACCEPT_HOOKS=1 / "
+                    "hooks_auto_accept: true, or approve at the TTY "
+                    "prompt next run.",
+                    spec.event, spec.command,
+                )
+                continue
+
+        with _registered_lock:
+            if key in _registered:
+                continue
+            manager._hooks.setdefault(spec.event, []).append(_make_callback(spec))
+            _registered.add(key)
+            registered.append(spec)
+            logger.info(
+                "shell hook registered: %s -> %s (matcher=%s, timeout=%ds)",
+                spec.event, spec.command, spec.matcher, spec.timeout,
+            )
+
+    return registered
+
+
+def iter_configured_hooks(cfg: Optional[Dict[str, Any]]) -> List[ShellHookSpec]:
+    """Return the parsed ``ShellHookSpec`` entries from config without
+    registering anything.  Used by ``hermes hooks list`` and ``doctor``."""
+    if not isinstance(cfg, dict):
+        return []
+    return _parse_hooks_block(cfg.get("hooks"))
+
+
+def reset_for_tests() -> None:
+    """Clear the idempotence set.  Test-only helper."""
+    with _registered_lock:
+        _registered.clear()
+
+
+# ---------------------------------------------------------------------------
+# Config parsing
+# ---------------------------------------------------------------------------
+
+def _parse_hooks_block(hooks_cfg: Any) -> List[ShellHookSpec]:
+    """Normalise the ``hooks:`` dict into a flat list of ``ShellHookSpec``.
+
+    Malformed entries warn-and-skip — we never raise from config parsing
+    because a broken hook must not crash the agent.
+    """
+    from hermes_cli.plugins import VALID_HOOKS
+
+    if not isinstance(hooks_cfg, dict):
+        return []
+
+    specs: List[ShellHookSpec] = []
+
+    for event_name, entries in hooks_cfg.items():
+        if event_name not in VALID_HOOKS:
+            suggestion = difflib.get_close_matches(
+                str(event_name), VALID_HOOKS, n=1, cutoff=0.6,
+            )
+            if suggestion:
+                logger.warning(
+                    "unknown hook event %r in hooks: config — did you mean %r?",
+                    event_name, suggestion[0],
+                )
+            else:
+                logger.warning(
+                    "unknown hook event %r in hooks: config (valid: %s)",
+                    event_name, ", ".join(sorted(VALID_HOOKS)),
+                )
+            continue
+
+        if entries is None:
+            continue
+
+        if not isinstance(entries, list):
+            logger.warning(
+                "hooks.%s must be a list of hook definitions; got %s",
+                event_name, type(entries).__name__,
+            )
+            continue
+
+        for i, raw in enumerate(entries):
+            spec = _parse_single_entry(event_name, i, raw)
+            if spec is not None:
+                specs.append(spec)
+
+    return specs
+
+
+def _parse_single_entry(
+    event: str, index: int, raw: Any,
+) -> Optional[ShellHookSpec]:
+    if not isinstance(raw, dict):
+        logger.warning(
+            "hooks.%s[%d] must be a mapping with a 'command' key; got %s",
+            event, index, type(raw).__name__,
+        )
+        return None
+
+    command = raw.get("command")
+    if not isinstance(command, str) or not command.strip():
+        logger.warning(
+            "hooks.%s[%d] is missing a non-empty 'command' field",
+            event, index,
+        )
+        return None
+
+    matcher = raw.get("matcher")
+    if matcher is not None and not isinstance(matcher, str):
+        logger.warning(
+            "hooks.%s[%d].matcher must be a string regex; ignoring",
+            event, index,
+        )
+        matcher = None
+
+    if matcher is not None and event not in ("pre_tool_call", "post_tool_call"):
+        logger.warning(
+            "hooks.%s[%d].matcher=%r will be ignored at runtime — the "
+            "matcher field is only honored for pre_tool_call / "
+            "post_tool_call.  The hook will fire on every %s event.",
+            event, index, matcher, event,
+        )
+        matcher = None
+
+    timeout_raw = raw.get("timeout", DEFAULT_TIMEOUT_SECONDS)
+    try:
+        timeout = int(timeout_raw)
+    except (TypeError, ValueError):
+        logger.warning(
+            "hooks.%s[%d].timeout must be an int (got %r); using default %ds",
+            event, index, timeout_raw, DEFAULT_TIMEOUT_SECONDS,
+        )
+        timeout = DEFAULT_TIMEOUT_SECONDS
+
+    if timeout < 1:
+        logger.warning(
+            "hooks.%s[%d].timeout must be >=1; using default %ds",
+            event, index, DEFAULT_TIMEOUT_SECONDS,
+        )
+        timeout = DEFAULT_TIMEOUT_SECONDS
+
+    if timeout > MAX_TIMEOUT_SECONDS:
+        logger.warning(
+            "hooks.%s[%d].timeout=%ds exceeds max %ds; clamping",
+            event, index, timeout, MAX_TIMEOUT_SECONDS,
+        )
+        timeout = MAX_TIMEOUT_SECONDS
+
+    return ShellHookSpec(
+        event=event,
+        command=command.strip(),
+        matcher=matcher,
+        timeout=timeout,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Subprocess callback
+# ---------------------------------------------------------------------------
+
+_TOP_LEVEL_PAYLOAD_KEYS = {"tool_name", "args", "session_id", "parent_session_id"}
+
+
+def _spawn(spec: ShellHookSpec, stdin_json: str) -> Dict[str, Any]:
+    """Run ``spec.command`` as a subprocess with ``stdin_json`` on stdin.
+
+    Returns a diagnostic dict with the same keys for every outcome
+    (``returncode``, ``stdout``, ``stderr``, ``timed_out``,
+    ``elapsed_seconds``, ``error``).  This is the single place the
+    subprocess is actually invoked — both the live callback path
+    (:func:`_make_callback`) and the CLI test helper (:func:`run_once`)
+    go through it.
+    """
+    result: Dict[str, Any] = {
+        "returncode": None,
+        "stdout": "",
+        "stderr": "",
+        "timed_out": False,
+        "elapsed_seconds": 0.0,
+        "error": None,
+    }
+    try:
+        argv = shlex.split(os.path.expanduser(spec.command))
+    except ValueError as exc:
+        result["error"] = f"command {spec.command!r} cannot be parsed: {exc}"
+        return result
+    if not argv:
+        result["error"] = "empty command"
+        return result
+
+    t0 = time.monotonic()
+    try:
+        proc = subprocess.run(
+            argv,
+            input=stdin_json,
+            capture_output=True,
+            timeout=spec.timeout,
+            text=True,
+            shell=False,
+        )
+    except subprocess.TimeoutExpired:
+        result["timed_out"] = True
+        result["elapsed_seconds"] = round(time.monotonic() - t0, 3)
+        return result
+    except FileNotFoundError:
+        result["error"] = "command not found"
+        return result
+    except PermissionError:
+        result["error"] = "command not executable"
+        return result
+    except Exception as exc:  # pragma: no cover — defensive
+        result["error"] = str(exc)
+        return result
+
+    result["returncode"] = proc.returncode
+    result["stdout"] = proc.stdout or ""
+    result["stderr"] = proc.stderr or ""
+    result["elapsed_seconds"] = round(time.monotonic() - t0, 3)
+    return result
+
+
+def _make_callback(spec: ShellHookSpec) -> Callable[..., Optional[Dict[str, Any]]]:
+    """Build the closure that ``invoke_hook()`` will call per firing."""
+
+    def _callback(**kwargs: Any) -> Optional[Dict[str, Any]]:
+        # Matcher gate — only meaningful for tool-scoped events.
+        if spec.event in ("pre_tool_call", "post_tool_call"):
+            if not spec.matches_tool(kwargs.get("tool_name")):
+                return None
+
+        r = _spawn(spec, _serialize_payload(spec.event, kwargs))
+
+        if r["error"]:
+            logger.warning(
+                "shell hook failed (event=%s command=%s): %s",
+                spec.event, spec.command, r["error"],
+            )
+            return None
+        if r["timed_out"]:
+            logger.warning(
+                "shell hook timed out after %.2fs (event=%s command=%s)",
+                r["elapsed_seconds"], spec.event, spec.command,
+            )
+            return None
+
+        stderr = r["stderr"].strip()
+        if stderr:
+            logger.debug(
+                "shell hook stderr (event=%s command=%s): %s",
+                spec.event, spec.command, stderr[:400],
+            )
+        # Non-zero exits: log but still parse stdout so scripts that
+        # signal failure via exit code can also return a block directive.
+        if r["returncode"] != 0:
+            logger.warning(
+                "shell hook exited %d (event=%s command=%s); stderr=%s",
+                r["returncode"], spec.event, spec.command, stderr[:400],
+            )
+        return _parse_response(spec.event, r["stdout"])
+
+    _callback.__name__ = f"shell_hook[{spec.event}:{spec.command}]"
+    _callback.__qualname__ = _callback.__name__
+    return _callback
+
+
+def _serialize_payload(event: str, kwargs: Dict[str, Any]) -> str:
+    """Render the stdin JSON payload.  Unserialisable values are
+    stringified via ``default=str`` rather than dropped."""
+    extras = {k: v for k, v in kwargs.items() if k not in _TOP_LEVEL_PAYLOAD_KEYS}
+    try:
+        cwd = str(Path.cwd())
+    except OSError:
+        cwd = ""
+    payload = {
+        "hook_event_name": event,
+        "tool_name": kwargs.get("tool_name"),
+        "tool_input": kwargs.get("args") if isinstance(kwargs.get("args"), dict) else None,
+        "session_id": kwargs.get("session_id") or kwargs.get("parent_session_id") or "",
+        "cwd": cwd,
+        "extra": extras,
+    }
+    return json.dumps(payload, ensure_ascii=False, default=str)
+
+
+def _parse_response(event: str, stdout: str) -> Optional[Dict[str, Any]]:
+    """Translate stdout JSON into a Hermes wire-shape dict.
+
+    For ``pre_tool_call`` the Claude-Code-style ``{"decision": "block",
+    "reason": "..."}`` payload is translated into the canonical Hermes
+    ``{"action": "block", "message": "..."}`` shape expected by
+    :func:`hermes_cli.plugins.get_pre_tool_call_block_message`.  This is
+    the single most important correctness invariant in this module —
+    skipping the translation silently breaks every ``pre_tool_call``
+    block directive.
+
+    For ``pre_llm_call``, ``{"context": "..."}`` is passed through
+    unchanged to match the existing plugin-hook contract.
+
+    Anything else returns ``None``.
+    """
+    stdout = (stdout or "").strip()
+    if not stdout:
+        return None
+
+    try:
+        data = json.loads(stdout)
+    except json.JSONDecodeError:
+        logger.warning(
+            "shell hook stdout was not valid JSON (event=%s): %s",
+            event, stdout[:200],
+        )
+        return None
+
+    if not isinstance(data, dict):
+        return None
+
+    if event == "pre_tool_call":
+        if data.get("action") == "block":
+            message = data.get("message") or data.get("reason") or ""
+            if isinstance(message, str) and message:
+                return {"action": "block", "message": message}
+        if data.get("decision") == "block":
+            message = data.get("reason") or data.get("message") or ""
+            if isinstance(message, str) and message:
+                return {"action": "block", "message": message}
+        return None
+
+    context = data.get("context")
+    if isinstance(context, str) and context.strip():
+        return {"context": context}
+
+    return None
+
+
+# ---------------------------------------------------------------------------
+# Allowlist / consent
+# ---------------------------------------------------------------------------
+
+def allowlist_path() -> Path:
+    """Path to the per-user shell-hook allowlist file."""
+    return get_hermes_home() / ALLOWLIST_FILENAME
+
+
+def load_allowlist() -> Dict[str, Any]:
+    """Return the parsed allowlist, or an empty skeleton if absent."""
+    try:
+        raw = json.loads(allowlist_path().read_text())
+    except (FileNotFoundError, json.JSONDecodeError, OSError):
+        return {"approvals": []}
+    if not isinstance(raw, dict):
+        return {"approvals": []}
+    approvals = raw.get("approvals")
+    if not isinstance(approvals, list):
+        raw["approvals"] = []
+    return raw
+
+
+def save_allowlist(data: Dict[str, Any]) -> None:
+    """Atomically persist the allowlist via per-process ``mkstemp`` +
+    ``os.replace``.  Cross-process read-modify-write races are handled
+    by :func:`_locked_update_approvals` (``fcntl.flock``).  On OSError
+    the failure is logged; the in-process hook still registers but
+    the approval won't survive across runs."""
+    p = allowlist_path()
+    try:
+        p.parent.mkdir(parents=True, exist_ok=True)
+        fd, tmp_path = tempfile.mkstemp(
+            prefix=f"{p.name}.", suffix=".tmp", dir=str(p.parent),
+        )
+        try:
+            with os.fdopen(fd, "w") as fh:
+                fh.write(json.dumps(data, indent=2, sort_keys=True))
+            os.replace(tmp_path, p)
+        except Exception:
+            try:
+                os.unlink(tmp_path)
+            except OSError:
+                pass
+            raise
+    except OSError as exc:
+        logger.warning(
+            "Failed to persist shell hook allowlist to %s: %s. "
+            "The approval is in-memory for this run, but the next "
+            "startup will re-prompt (or skip registration on non-TTY "
+            "runs without --accept-hooks / HERMES_ACCEPT_HOOKS).",
+            p, exc,
+        )
+
+
+def _is_allowlisted(event: str, command: str) -> bool:
+    data = load_allowlist()
+    return any(
+        isinstance(e, dict)
+        and e.get("event") == event
+        and e.get("command") == command
+        for e in data.get("approvals", [])
+    )
+
+
+@contextmanager
+def _locked_update_approvals() -> Iterator[Dict[str, Any]]:
+    """Serialise read-modify-write on the allowlist across processes.
+
+    Holds an exclusive ``flock`` on a sibling lock file for the duration
+    of the update so concurrent ``_record_approval``/``revoke`` callers
+    cannot clobber each other's changes (the race Codex reproduced with
+    20–50 simultaneous writers).  Falls back to an in-process lock on
+    platforms without ``fcntl``.
+    """
+    p = allowlist_path()
+    p.parent.mkdir(parents=True, exist_ok=True)
+    lock_path = p.with_suffix(p.suffix + ".lock")
+
+    if fcntl is None:  # pragma: no cover — non-POSIX fallback
+        with _allowlist_write_lock:
+            data = load_allowlist()
+            yield data
+            save_allowlist(data)
+        return
+
+    with open(lock_path, "a+") as lock_fh:
+        fcntl.flock(lock_fh.fileno(), fcntl.LOCK_EX)
+        try:
+            data = load_allowlist()
+            yield data
+            save_allowlist(data)
+        finally:
+            fcntl.flock(lock_fh.fileno(), fcntl.LOCK_UN)
+
+
+def _prompt_and_record(
+    event: str, command: str, *, accept_hooks: bool,
+) -> bool:
+    """Decide whether to approve an unseen ``(event, command)`` pair.
+    Returns ``True`` iff the approval was granted and recorded.
+    """
+    if accept_hooks:
+        _record_approval(event, command)
+        logger.info(
+            "shell hook auto-approved via --accept-hooks / env / config: "
+            "%s -> %s", event, command,
+        )
+        return True
+
+    if not sys.stdin.isatty():
+        return False
+
+    print(
+        f"\n⚠ Hermes is about to register a shell hook that will run a\n"
+        f"  command on your behalf.\n\n"
+        f"    Event:   {event}\n"
+        f"    Command: {command}\n\n"
+        f"  Commands run with your full user credentials.  Only approve\n"
+        f"  commands you trust."
+    )
+    try:
+        answer = input("Allow this hook to run? [y/N]: ").strip().lower()
+    except (EOFError, KeyboardInterrupt):
+        print()  # keep the terminal tidy after ^C
+        return False
+
+    if answer in ("y", "yes"):
+        _record_approval(event, command)
+        return True
+
+    return False
+
+
+def _record_approval(event: str, command: str) -> None:
+    entry = {
+        "event": event,
+        "command": command,
+        "approved_at": _utc_now_iso(),
+        "script_mtime_at_approval": script_mtime_iso(command),
+    }
+    with _locked_update_approvals() as data:
+        data["approvals"] = [
+            e for e in data.get("approvals", [])
+            if not (
+                isinstance(e, dict)
+                and e.get("event") == event
+                and e.get("command") == command
+            )
+        ] + [entry]
+
+
+def _utc_now_iso() -> str:
+    return datetime.now(tz=timezone.utc).isoformat().replace("+00:00", "Z")
+
+
+def revoke(command: str) -> int:
+    """Remove every allowlist entry matching ``command``.
+
+    Returns the number of entries removed.  Does not unregister any
+    callbacks that are already live on the plugin manager in the current
+    process — restart the CLI / gateway to drop them.
+    """
+    with _locked_update_approvals() as data:
+        before = len(data.get("approvals", []))
+        data["approvals"] = [
+            e for e in data.get("approvals", [])
+            if not (isinstance(e, dict) and e.get("command") == command)
+        ]
+        after = len(data["approvals"])
+    return before - after
+
+
+_SCRIPT_EXTENSIONS: Tuple[str, ...] = (
+    ".sh", ".bash", ".zsh", ".fish",
+    ".py", ".pyw",
+    ".rb", ".pl", ".lua",
+    ".js", ".mjs", ".cjs", ".ts",
+)
+
+
+def _command_script_path(command: str) -> str:
+    """Return the script path from ``command`` for doctor / drift checks.
+
+    Prefers a token ending in a known script extension, then a token
+    containing ``/`` or leading ``~``, then the first token.  Handles
+    ``python3 /path/hook.py``, ``/usr/bin/env bash hook.sh``, and the
+    common bare-path form.
+    """
+    try:
+        parts = shlex.split(command)
+    except ValueError:
+        return command
+    if not parts:
+        return command
+    for part in parts:
+        if part.lower().endswith(_SCRIPT_EXTENSIONS):
+            return part
+    for part in parts:
+        if "/" in part or part.startswith("~"):
+            return part
+    return parts[0]
+
+
+# ---------------------------------------------------------------------------
+# Helpers for accept-hooks resolution
+# ---------------------------------------------------------------------------
+
+def _resolve_effective_accept(
+    cfg: Dict[str, Any], accept_hooks_arg: bool,
+) -> bool:
+    """Combine all three opt-in channels into a single boolean.
+
+    Precedence (any truthy source flips us on):
+      1. ``--accept-hooks`` flag (CLI) / explicit argument
+      2. ``HERMES_ACCEPT_HOOKS`` env var
+      3. ``hooks_auto_accept: true`` in ``cli-config.yaml``
+    """
+    if accept_hooks_arg:
+        return True
+    env = os.environ.get("HERMES_ACCEPT_HOOKS", "").strip().lower()
+    if env in ("1", "true", "yes", "on"):
+        return True
+    cfg_val = cfg.get("hooks_auto_accept", False)
+    return bool(cfg_val)
+
+
+# ---------------------------------------------------------------------------
+# Introspection (used by `hermes hooks` CLI)
+# ---------------------------------------------------------------------------
+
+def allowlist_entry_for(event: str, command: str) -> Optional[Dict[str, Any]]:
+    """Return the allowlist record for this pair, if any."""
+    for e in load_allowlist().get("approvals", []):
+        if (
+            isinstance(e, dict)
+            and e.get("event") == event
+            and e.get("command") == command
+        ):
+            return e
+    return None
+
+
+def script_mtime_iso(command: str) -> Optional[str]:
+    """ISO-8601 mtime of the resolved script path, or ``None`` if the
+    script is missing."""
+    path = _command_script_path(command)
+    if not path:
+        return None
+    try:
+        expanded = os.path.expanduser(path)
+        return datetime.fromtimestamp(
+            os.path.getmtime(expanded), tz=timezone.utc,
+        ).isoformat().replace("+00:00", "Z")
+    except OSError:
+        return None
+
+
+def script_is_executable(command: str) -> bool:
+    """Return ``True`` iff ``command`` is runnable as configured.
+
+    For a bare invocation (``/path/hook.sh``) the script itself must be
+    executable.  For interpreter-prefixed commands (``python3
+    /path/hook.py``, ``/usr/bin/env bash hook.sh``) the script just has
+    to be readable — the interpreter doesn't care about the ``X_OK``
+    bit.  Mirrors what ``_spawn`` would actually do at runtime."""
+    path = _command_script_path(command)
+    if not path:
+        return False
+    expanded = os.path.expanduser(path)
+    if not os.path.isfile(expanded):
+        return False
+    try:
+        argv = shlex.split(command)
+    except ValueError:
+        return False
+    is_bare_invocation = bool(argv) and argv[0] == path
+    required = os.X_OK if is_bare_invocation else os.R_OK
+    return os.access(expanded, required)
+
+
+def run_once(
+    spec: ShellHookSpec, kwargs: Dict[str, Any],
+) -> Dict[str, Any]:
+    """Fire a single shell-hook invocation with a synthetic payload.
+    Used by ``hermes hooks test`` and ``hermes hooks doctor``.
+
+    ``kwargs`` is the same dict that :func:`hermes_cli.plugins.invoke_hook`
+    would pass at runtime.  It is routed through :func:`_serialize_payload`
+    so the synthetic stdin exactly matches what a real hook firing would
+    produce — otherwise scripts tested via ``hermes hooks test`` could
+    diverge silently from production behaviour.
+
+    Returns the :func:`_spawn` diagnostic dict plus a ``parsed`` field
+    holding the canonical Hermes-wire-shape response."""
+    stdin_json = _serialize_payload(spec.event, kwargs)
+    result = _spawn(spec, stdin_json)
+    result["parsed"] = _parse_response(spec.event, result["stdout"])
+    return result
diff --git a/agent/skill_commands.py b/agent/skill_commands.py
index 280105daca..a4345ca8c4 100644
--- a/agent/skill_commands.py
+++ b/agent/skill_commands.py
@@ -8,6 +8,7 @@ can invoke skills via /skill-name commands and prompt-only built-ins like
 import json
 import logging
 import re
+import subprocess
 from datetime import datetime
 from pathlib import Path
 from typing import Any, Dict, Optional
@@ -22,6 +23,110 @@ _PLAN_SLUG_RE = re.compile(r"[^a-z0-9]+")
 _SKILL_INVALID_CHARS = re.compile(r"[^a-z0-9-]")
 _SKILL_MULTI_HYPHEN = re.compile(r"-{2,}")
 
+# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md.
+# Tokens that don't resolve (e.g. ${HERMES_SESSION_ID} with no session) are
+# left as-is so the user can debug them.
+_SKILL_TEMPLATE_RE = re.compile(r"\$\{(HERMES_SKILL_DIR|HERMES_SESSION_ID)\}")
+
+# Matches inline shell snippets like:  !`date +%Y-%m-%d`
+# Non-greedy, single-line only — no newlines inside the backticks.
+_INLINE_SHELL_RE = re.compile(r"!`([^`\n]+)`")
+
+# Cap inline-shell output so a runaway command can't blow out the context.
+_INLINE_SHELL_MAX_OUTPUT = 4000
+
+
+def _load_skills_config() -> dict:
+    """Load the ``skills`` section of config.yaml (best-effort)."""
+    try:
+        from hermes_cli.config import load_config
+
+        cfg = load_config() or {}
+        skills_cfg = cfg.get("skills")
+        if isinstance(skills_cfg, dict):
+            return skills_cfg
+    except Exception:
+        logger.debug("Could not read skills config", exc_info=True)
+    return {}
+
+
+def _substitute_template_vars(
+    content: str,
+    skill_dir: Path | None,
+    session_id: str | None,
+) -> str:
+    """Replace ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} in skill content.
+
+    Only substitutes tokens for which a concrete value is available —
+    unresolved tokens are left in place so the author can spot them.
+    """
+    if not content:
+        return content
+
+    skill_dir_str = str(skill_dir) if skill_dir else None
+
+    def _replace(match: re.Match) -> str:
+        token = match.group(1)
+        if token == "HERMES_SKILL_DIR" and skill_dir_str:
+            return skill_dir_str
+        if token == "HERMES_SESSION_ID" and session_id:
+            return str(session_id)
+        return match.group(0)
+
+    return _SKILL_TEMPLATE_RE.sub(_replace, content)
+
+
+def _run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
+    """Execute a single inline-shell snippet and return its stdout (trimmed).
+
+    Failures return a short ``[inline-shell error: ...]`` marker instead of
+    raising, so one bad snippet can't wreck the whole skill message.
+    """
+    try:
+        completed = subprocess.run(
+            ["bash", "-c", command],
+            cwd=str(cwd) if cwd else None,
+            capture_output=True,
+            text=True,
+            timeout=max(1, int(timeout)),
+            check=False,
+        )
+    except subprocess.TimeoutExpired:
+        return f"[inline-shell timeout after {timeout}s: {command}]"
+    except FileNotFoundError:
+        return f"[inline-shell error: bash not found]"
+    except Exception as exc:
+        return f"[inline-shell error: {exc}]"
+
+    output = (completed.stdout or "").rstrip("\n")
+    if not output and completed.stderr:
+        output = completed.stderr.rstrip("\n")
+    if len(output) > _INLINE_SHELL_MAX_OUTPUT:
+        output = output[:_INLINE_SHELL_MAX_OUTPUT] + "…[truncated]"
+    return output
+
+
+def _expand_inline_shell(
+    content: str,
+    skill_dir: Path | None,
+    timeout: int,
+) -> str:
+    """Replace every !`cmd` snippet in ``content`` with its stdout.
+
+    Runs each snippet with the skill directory as CWD so relative paths in
+    the snippet work the way the author expects.
+    """
+    if "!`" not in content:
+        return content
+
+    def _replace(match: re.Match) -> str:
+        cmd = match.group(1).strip()
+        if not cmd:
+            return ""
+        return _run_inline_shell(cmd, skill_dir, timeout)
+
+    return _INLINE_SHELL_RE.sub(_replace, content)
+
 
 def build_plan_path(
     user_instruction: str = "",
@@ -133,14 +238,36 @@ def _build_skill_message(
     activation_note: str,
     user_instruction: str = "",
     runtime_note: str = "",
+    session_id: str | None = None,
 ) -> str:
     """Format a loaded skill into a user/system message payload."""
     from tools.skills_tool import SKILLS_DIR
 
     content = str(loaded_skill.get("content") or "")
 
+    # ── Template substitution and inline-shell expansion ──
+    # Done before anything else so downstream blocks (setup notes,
+    # supporting-file hints) see the expanded content.
+    skills_cfg = _load_skills_config()
+    if skills_cfg.get("template_vars", True):
+        content = _substitute_template_vars(content, skill_dir, session_id)
+    if skills_cfg.get("inline_shell", False):
+        timeout = int(skills_cfg.get("inline_shell_timeout", 10) or 10)
+        content = _expand_inline_shell(content, skill_dir, timeout)
+
     parts = [activation_note, "", content.strip()]
 
+    # ── Inject the absolute skill directory so the agent can reference
+    #    bundled scripts without an extra skill_view() round-trip. ──
+    if skill_dir:
+        parts.append("")
+        parts.append(f"[Skill directory: {skill_dir}]")
+        parts.append(
+            "Resolve any relative paths in this skill (e.g. `scripts/foo.js`, "
+            "`templates/config.yaml`) against that directory, then run them "
+            "with the terminal tool using the absolute path."
+        )
+
     # ── Inject resolved skill config values ──
     _inject_skill_config(loaded_skill, parts)
 
@@ -188,11 +315,13 @@ def _build_skill_message(
             # Skill is from an external dir — use the skill name instead
             skill_view_target = skill_dir.name
         parts.append("")
-        parts.append("[This skill has supporting files you can load with the skill_view tool:]")
+        parts.append("[This skill has supporting files:]")
         for sf in supporting:
-            parts.append(f"- {sf}")
+            parts.append(f"- {sf}  ->  {skill_dir / sf}")
         parts.append(
-            f'\nTo view any of these, use: skill_view(name="{skill_view_target}", file_path="<path>")'
+            f'\nLoad any of these with skill_view(name="{skill_view_target}", '
+            f'file_path="<path>"), or run scripts directly by absolute path '
+            f"(e.g. `node {skill_dir}/scripts/foo.js`)."
         )
 
     if user_instruction:
@@ -332,6 +461,7 @@ def build_skill_invocation_message(
         activation_note,
         user_instruction=user_instruction,
         runtime_note=runtime_note,
+        session_id=task_id,
     )
 
 
@@ -370,6 +500,7 @@ def build_preloaded_skills_prompt(
                 loaded_skill,
                 skill_dir,
                 activation_note,
+                session_id=task_id,
             )
         )
         loaded_names.append(skill_name)
diff --git a/agent/smart_model_routing.py b/agent/smart_model_routing.py
deleted file mode 100644
index 6d482be270..0000000000
--- a/agent/smart_model_routing.py
+++ /dev/null
@@ -1,195 +0,0 @@
-"""Helpers for optional cheap-vs-strong model routing."""
-
-from __future__ import annotations
-
-import os
-import re
-from typing import Any, Dict, Optional
-
-from utils import is_truthy_value
-
-_COMPLEX_KEYWORDS = {
-    "debug",
-    "debugging",
-    "implement",
-    "implementation",
-    "refactor",
-    "patch",
-    "traceback",
-    "stacktrace",
-    "exception",
-    "error",
-    "analyze",
-    "analysis",
-    "investigate",
-    "architecture",
-    "design",
-    "compare",
-    "benchmark",
-    "optimize",
-    "optimise",
-    "review",
-    "terminal",
-    "shell",
-    "tool",
-    "tools",
-    "pytest",
-    "test",
-    "tests",
-    "plan",
-    "planning",
-    "delegate",
-    "subagent",
-    "cron",
-    "docker",
-    "kubernetes",
-}
-
-_URL_RE = re.compile(r"https?://|www\.", re.IGNORECASE)
-
-
-def _coerce_bool(value: Any, default: bool = False) -> bool:
-    return is_truthy_value(value, default=default)
-
-
-def _coerce_int(value: Any, default: int) -> int:
-    try:
-        return int(value)
-    except (TypeError, ValueError):
-        return default
-
-
-def choose_cheap_model_route(user_message: str, routing_config: Optional[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
-    """Return the configured cheap-model route when a message looks simple.
-
-    Conservative by design: if the message has signs of code/tool/debugging/
-    long-form work, keep the primary model.
-    """
-    cfg = routing_config or {}
-    if not _coerce_bool(cfg.get("enabled"), False):
-        return None
-
-    cheap_model = cfg.get("cheap_model") or {}
-    if not isinstance(cheap_model, dict):
-        return None
-    provider = str(cheap_model.get("provider") or "").strip().lower()
-    model = str(cheap_model.get("model") or "").strip()
-    if not provider or not model:
-        return None
-
-    text = (user_message or "").strip()
-    if not text:
-        return None
-
-    max_chars = _coerce_int(cfg.get("max_simple_chars"), 160)
-    max_words = _coerce_int(cfg.get("max_simple_words"), 28)
-
-    if len(text) > max_chars:
-        return None
-    if len(text.split()) > max_words:
-        return None
-    if text.count("\n") > 1:
-        return None
-    if "```" in text or "`" in text:
-        return None
-    if _URL_RE.search(text):
-        return None
-
-    lowered = text.lower()
-    words = {token.strip(".,:;!?()[]{}\"'`") for token in lowered.split()}
-    if words & _COMPLEX_KEYWORDS:
-        return None
-
-    route = dict(cheap_model)
-    route["provider"] = provider
-    route["model"] = model
-    route["routing_reason"] = "simple_turn"
-    return route
-
-
-def resolve_turn_route(user_message: str, routing_config: Optional[Dict[str, Any]], primary: Dict[str, Any]) -> Dict[str, Any]:
-    """Resolve the effective model/runtime for one turn.
-
-    Returns a dict with model/runtime/signature/label fields.
-    """
-    route = choose_cheap_model_route(user_message, routing_config)
-    if not route:
-        return {
-            "model": primary.get("model"),
-            "runtime": {
-                "api_key": primary.get("api_key"),
-                "base_url": primary.get("base_url"),
-                "provider": primary.get("provider"),
-                "api_mode": primary.get("api_mode"),
-                "command": primary.get("command"),
-                "args": list(primary.get("args") or []),
-                "credential_pool": primary.get("credential_pool"),
-            },
-            "label": None,
-            "signature": (
-                primary.get("model"),
-                primary.get("provider"),
-                primary.get("base_url"),
-                primary.get("api_mode"),
-                primary.get("command"),
-                tuple(primary.get("args") or ()),
-            ),
-        }
-
-    from hermes_cli.runtime_provider import resolve_runtime_provider
-
-    explicit_api_key = None
-    api_key_env = str(route.get("api_key_env") or "").strip()
-    if api_key_env:
-        explicit_api_key = os.getenv(api_key_env) or None
-
-    try:
-        runtime = resolve_runtime_provider(
-            requested=route.get("provider"),
-            explicit_api_key=explicit_api_key,
-            explicit_base_url=route.get("base_url"),
-        )
-    except Exception:
-        return {
-            "model": primary.get("model"),
-            "runtime": {
-                "api_key": primary.get("api_key"),
-                "base_url": primary.get("base_url"),
-                "provider": primary.get("provider"),
-                "api_mode": primary.get("api_mode"),
-                "command": primary.get("command"),
-                "args": list(primary.get("args") or []),
-                "credential_pool": primary.get("credential_pool"),
-            },
-            "label": None,
-            "signature": (
-                primary.get("model"),
-                primary.get("provider"),
-                primary.get("base_url"),
-                primary.get("api_mode"),
-                primary.get("command"),
-                tuple(primary.get("args") or ()),
-            ),
-        }
-
-    return {
-        "model": route.get("model"),
-        "runtime": {
-            "api_key": runtime.get("api_key"),
-            "base_url": runtime.get("base_url"),
-            "provider": runtime.get("provider"),
-            "api_mode": runtime.get("api_mode"),
-            "command": runtime.get("command"),
-            "args": list(runtime.get("args") or []),
-            "credential_pool": runtime.get("credential_pool"),
-        },
-        "label": f"smart route → {route.get('model')} ({runtime.get('provider')})",
-        "signature": (
-            route.get("model"),
-            runtime.get("provider"),
-            runtime.get("base_url"),
-            runtime.get("api_mode"),
-            runtime.get("command"),
-            tuple(runtime.get("args") or ()),
-        ),
-    }
diff --git a/agent/transports/__init__.py b/agent/transports/__init__.py
new file mode 100644
index 0000000000..5752113325
--- /dev/null
+++ b/agent/transports/__init__.py
@@ -0,0 +1,51 @@
+"""Transport layer types and registry for provider response normalization.
+
+Usage:
+    from agent.transports import get_transport
+    transport = get_transport("anthropic_messages")
+    result = transport.normalize_response(raw_response)
+"""
+
+from agent.transports.types import NormalizedResponse, ToolCall, Usage, build_tool_call, map_finish_reason  # noqa: F401
+
+_REGISTRY: dict = {}
+
+
+def register_transport(api_mode: str, transport_cls: type) -> None:
+    """Register a transport class for an api_mode string."""
+    _REGISTRY[api_mode] = transport_cls
+
+
+def get_transport(api_mode: str):
+    """Get a transport instance for the given api_mode.
+
+    Returns None if no transport is registered for this api_mode.
+    This allows gradual migration — call sites can check for None
+    and fall back to the legacy code path.
+    """
+    if not _REGISTRY:
+        _discover_transports()
+    cls = _REGISTRY.get(api_mode)
+    if cls is None:
+        return None
+    return cls()
+
+
+def _discover_transports() -> None:
+    """Import all transport modules to trigger auto-registration."""
+    try:
+        import agent.transports.anthropic  # noqa: F401
+    except ImportError:
+        pass
+    try:
+        import agent.transports.codex  # noqa: F401
+    except ImportError:
+        pass
+    try:
+        import agent.transports.chat_completions  # noqa: F401
+    except ImportError:
+        pass
+    try:
+        import agent.transports.bedrock  # noqa: F401
+    except ImportError:
+        pass
diff --git a/agent/transports/anthropic.py b/agent/transports/anthropic.py
new file mode 100644
index 0000000000..7ffa71a6f9
--- /dev/null
+++ b/agent/transports/anthropic.py
@@ -0,0 +1,129 @@
+"""Anthropic Messages API transport.
+
+Delegates to the existing adapter functions in agent/anthropic_adapter.py.
+This transport owns format conversion and normalization — NOT client lifecycle.
+"""
+
+from typing import Any, Dict, List, Optional
+
+from agent.transports.base import ProviderTransport
+from agent.transports.types import NormalizedResponse
+
+
+class AnthropicTransport(ProviderTransport):
+    """Transport for api_mode='anthropic_messages'.
+
+    Wraps the existing functions in anthropic_adapter.py behind the
+    ProviderTransport ABC.  Each method delegates — no logic is duplicated.
+    """
+
+    @property
+    def api_mode(self) -> str:
+        return "anthropic_messages"
+
+    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
+        """Convert OpenAI messages to Anthropic (system, messages) tuple.
+
+        kwargs:
+            base_url: Optional[str] — affects thinking signature handling.
+        """
+        from agent.anthropic_adapter import convert_messages_to_anthropic
+
+        base_url = kwargs.get("base_url")
+        return convert_messages_to_anthropic(messages, base_url=base_url)
+
+    def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
+        """Convert OpenAI tool schemas to Anthropic input_schema format."""
+        from agent.anthropic_adapter import convert_tools_to_anthropic
+
+        return convert_tools_to_anthropic(tools)
+
+    def build_kwargs(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None,
+        **params,
+    ) -> Dict[str, Any]:
+        """Build Anthropic messages.create() kwargs.
+
+        Calls convert_messages and convert_tools internally.
+
+        params (all optional):
+            max_tokens: int
+            reasoning_config: dict | None
+            tool_choice: str | None
+            is_oauth: bool
+            preserve_dots: bool
+            context_length: int | None
+            base_url: str | None
+            fast_mode: bool
+        """
+        from agent.anthropic_adapter import build_anthropic_kwargs
+
+        return build_anthropic_kwargs(
+            model=model,
+            messages=messages,
+            tools=tools,
+            max_tokens=params.get("max_tokens", 16384),
+            reasoning_config=params.get("reasoning_config"),
+            tool_choice=params.get("tool_choice"),
+            is_oauth=params.get("is_oauth", False),
+            preserve_dots=params.get("preserve_dots", False),
+            context_length=params.get("context_length"),
+            base_url=params.get("base_url"),
+            fast_mode=params.get("fast_mode", False),
+        )
+
+    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
+        """Normalize Anthropic response to NormalizedResponse.
+
+        kwargs:
+            strip_tool_prefix: bool — strip 'mcp_mcp_' prefixes from tool names.
+        """
+        from agent.anthropic_adapter import normalize_anthropic_response_v2
+
+        strip_tool_prefix = kwargs.get("strip_tool_prefix", False)
+        return normalize_anthropic_response_v2(response, strip_tool_prefix=strip_tool_prefix)
+
+    def validate_response(self, response: Any) -> bool:
+        """Check Anthropic response structure is valid."""
+        if response is None:
+            return False
+        content_blocks = getattr(response, "content", None)
+        if not isinstance(content_blocks, list):
+            return False
+        if not content_blocks:
+            return False
+        return True
+
+    def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
+        """Extract Anthropic cache_read and cache_creation token counts."""
+        usage = getattr(response, "usage", None)
+        if usage is None:
+            return None
+        cached = getattr(usage, "cache_read_input_tokens", 0) or 0
+        written = getattr(usage, "cache_creation_input_tokens", 0) or 0
+        if cached or written:
+            return {"cached_tokens": cached, "creation_tokens": written}
+        return None
+
+    # Promote the adapter's canonical mapping to module level so it's shared
+    _STOP_REASON_MAP = {
+        "end_turn": "stop",
+        "tool_use": "tool_calls",
+        "max_tokens": "length",
+        "stop_sequence": "stop",
+        "refusal": "content_filter",
+        "model_context_window_exceeded": "length",
+    }
+
+    def map_finish_reason(self, raw_reason: str) -> str:
+        """Map Anthropic stop_reason to OpenAI finish_reason."""
+        return self._STOP_REASON_MAP.get(raw_reason, "stop")
+
+
+# Auto-register on import
+from agent.transports import register_transport  # noqa: E402
+
+register_transport("anthropic_messages", AnthropicTransport)
diff --git a/agent/transports/base.py b/agent/transports/base.py
new file mode 100644
index 0000000000..b516967b6a
--- /dev/null
+++ b/agent/transports/base.py
@@ -0,0 +1,89 @@
+"""Abstract base for provider transports.
+
+A transport owns the data path for one api_mode:
+  convert_messages → convert_tools → build_kwargs → normalize_response
+
+It does NOT own: client construction, streaming, credential refresh,
+prompt caching, interrupt handling, or retry logic.  Those stay on AIAgent.
+"""
+
+from abc import ABC, abstractmethod
+from typing import Any, Dict, List, Optional
+
+from agent.transports.types import NormalizedResponse
+
+
+class ProviderTransport(ABC):
+    """Base class for provider-specific format conversion and normalization."""
+
+    @property
+    @abstractmethod
+    def api_mode(self) -> str:
+        """The api_mode string this transport handles (e.g. 'anthropic_messages')."""
+        ...
+
+    @abstractmethod
+    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
+        """Convert OpenAI-format messages to provider-native format.
+
+        Returns provider-specific structure (e.g. (system, messages) for Anthropic,
+        or the messages list unchanged for chat_completions).
+        """
+        ...
+
+    @abstractmethod
+    def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
+        """Convert OpenAI-format tool definitions to provider-native format.
+
+        Returns provider-specific tool list (e.g. Anthropic input_schema format).
+        """
+        ...
+
+    @abstractmethod
+    def build_kwargs(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None,
+        **params,
+    ) -> Dict[str, Any]:
+        """Build the complete API call kwargs dict.
+
+        This is the primary entry point — it typically calls convert_messages()
+        and convert_tools() internally, then adds model-specific config.
+
+        Returns a dict ready to be passed to the provider's SDK client.
+        """
+        ...
+
+    @abstractmethod
+    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
+        """Normalize a raw provider response to the shared NormalizedResponse type.
+
+        This is the only method that returns a transport-layer type.
+        """
+        ...
+
+    def validate_response(self, response: Any) -> bool:
+        """Optional: check if the raw response is structurally valid.
+
+        Returns True if valid, False if the response should be treated as invalid.
+        Default implementation always returns True.
+        """
+        return True
+
+    def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
+        """Optional: extract provider-specific cache hit/creation stats.
+
+        Returns dict with 'cached_tokens' and 'creation_tokens', or None.
+        Default returns None.
+        """
+        return None
+
+    def map_finish_reason(self, raw_reason: str) -> str:
+        """Optional: map provider-specific stop reason to OpenAI equivalent.
+
+        Default returns the raw reason unchanged.  Override for providers
+        with different stop reason vocabularies.
+        """
+        return raw_reason
diff --git a/agent/transports/bedrock.py b/agent/transports/bedrock.py
new file mode 100644
index 0000000000..af549e7eae
--- /dev/null
+++ b/agent/transports/bedrock.py
@@ -0,0 +1,154 @@
+"""AWS Bedrock Converse API transport.
+
+Delegates to the existing adapter functions in agent/bedrock_adapter.py.
+Bedrock uses its own boto3 client (not the OpenAI SDK), so the transport
+owns format conversion and normalization, while client construction and
+boto3 calls stay on AIAgent.
+"""
+
+from typing import Any, Dict, List, Optional
+
+from agent.transports.base import ProviderTransport
+from agent.transports.types import NormalizedResponse, ToolCall, Usage
+
+
+class BedrockTransport(ProviderTransport):
+    """Transport for api_mode='bedrock_converse'."""
+
+    @property
+    def api_mode(self) -> str:
+        return "bedrock_converse"
+
+    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
+        """Convert OpenAI messages to Bedrock Converse format."""
+        from agent.bedrock_adapter import convert_messages_to_converse
+        return convert_messages_to_converse(messages)
+
+    def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
+        """Convert OpenAI tool schemas to Bedrock Converse toolConfig."""
+        from agent.bedrock_adapter import convert_tools_to_converse
+        return convert_tools_to_converse(tools)
+
+    def build_kwargs(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None,
+        **params,
+    ) -> Dict[str, Any]:
+        """Build Bedrock converse() kwargs.
+
+        Calls convert_messages and convert_tools internally.
+
+        params:
+            max_tokens: int — output token limit (default 4096)
+            temperature: float | None
+            guardrail_config: dict | None — Bedrock guardrails
+            region: str — AWS region (default 'us-east-1')
+        """
+        from agent.bedrock_adapter import build_converse_kwargs
+
+        region = params.get("region", "us-east-1")
+        guardrail = params.get("guardrail_config")
+
+        kwargs = build_converse_kwargs(
+            model=model,
+            messages=messages,
+            tools=tools,
+            max_tokens=params.get("max_tokens", 4096),
+            temperature=params.get("temperature"),
+            guardrail_config=guardrail,
+        )
+        # Sentinel keys for dispatch — agent pops these before the boto3 call
+        kwargs["__bedrock_converse__"] = True
+        kwargs["__bedrock_region__"] = region
+        return kwargs
+
+    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
+        """Normalize Bedrock response to NormalizedResponse.
+
+        Handles two shapes:
+        1. Raw boto3 dict (from direct converse() calls)
+        2. Already-normalized SimpleNamespace with .choices (from dispatch site)
+        """
+        from agent.bedrock_adapter import normalize_converse_response
+
+        # Normalize to OpenAI-compatible SimpleNamespace
+        if hasattr(response, "choices") and response.choices:
+            # Already normalized at dispatch site
+            ns = response
+        else:
+            # Raw boto3 dict
+            ns = normalize_converse_response(response)
+
+        choice = ns.choices[0]
+        msg = choice.message
+        finish_reason = choice.finish_reason or "stop"
+
+        tool_calls = None
+        if msg.tool_calls:
+            tool_calls = [
+                ToolCall(
+                    id=tc.id,
+                    name=tc.function.name,
+                    arguments=tc.function.arguments,
+                )
+                for tc in msg.tool_calls
+            ]
+
+        usage = None
+        if hasattr(ns, "usage") and ns.usage:
+            u = ns.usage
+            usage = Usage(
+                prompt_tokens=getattr(u, "prompt_tokens", 0) or 0,
+                completion_tokens=getattr(u, "completion_tokens", 0) or 0,
+                total_tokens=getattr(u, "total_tokens", 0) or 0,
+            )
+
+        reasoning = getattr(msg, "reasoning", None) or getattr(msg, "reasoning_content", None)
+
+        return NormalizedResponse(
+            content=msg.content,
+            tool_calls=tool_calls,
+            finish_reason=finish_reason,
+            reasoning=reasoning,
+            usage=usage,
+        )
+
+    def validate_response(self, response: Any) -> bool:
+        """Check Bedrock response structure.
+
+        After normalize_converse_response, the response has OpenAI-compatible
+        .choices — same check as chat_completions.
+        """
+        if response is None:
+            return False
+        # Raw Bedrock dict response — check for 'output' key
+        if isinstance(response, dict):
+            return "output" in response
+        # Already-normalized SimpleNamespace
+        if hasattr(response, "choices"):
+            return bool(response.choices)
+        return False
+
+    def map_finish_reason(self, raw_reason: str) -> str:
+        """Map Bedrock stop reason to OpenAI finish_reason.
+
+        The adapter already does this mapping inside normalize_converse_response,
+        so this is only used for direct access to raw responses.
+        """
+        _MAP = {
+            "end_turn": "stop",
+            "tool_use": "tool_calls",
+            "max_tokens": "length",
+            "stop_sequence": "stop",
+            "guardrail_intervened": "content_filter",
+            "content_filtered": "content_filter",
+        }
+        return _MAP.get(raw_reason, "stop")
+
+
+# Auto-register on import
+from agent.transports import register_transport  # noqa: E402
+
+register_transport("bedrock_converse", BedrockTransport)
diff --git a/agent/transports/chat_completions.py b/agent/transports/chat_completions.py
new file mode 100644
index 0000000000..900f59dcf4
--- /dev/null
+++ b/agent/transports/chat_completions.py
@@ -0,0 +1,387 @@
+"""OpenAI Chat Completions transport.
+
+Handles the default api_mode ('chat_completions') used by ~16 OpenAI-compatible
+providers (OpenRouter, Nous, NVIDIA, Qwen, Ollama, DeepSeek, xAI, Kimi, etc.).
+
+Messages and tools are already in OpenAI format — convert_messages and
+convert_tools are near-identity.  The complexity lives in build_kwargs
+which has provider-specific conditionals for max_tokens defaults,
+reasoning configuration, temperature handling, and extra_body assembly.
+"""
+
+import copy
+from typing import Any, Dict, List, Optional
+
+from agent.prompt_builder import DEVELOPER_ROLE_MODELS
+from agent.transports.base import ProviderTransport
+from agent.transports.types import NormalizedResponse, ToolCall, Usage
+
+
+class ChatCompletionsTransport(ProviderTransport):
+    """Transport for api_mode='chat_completions'.
+
+    The default path for OpenAI-compatible providers.
+    """
+
+    @property
+    def api_mode(self) -> str:
+        return "chat_completions"
+
+    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> List[Dict[str, Any]]:
+        """Messages are already in OpenAI format — sanitize Codex leaks only.
+
+        Strips Codex Responses API fields (``codex_reasoning_items`` on the
+        message, ``call_id``/``response_item_id`` on tool_calls) that strict
+        chat-completions providers reject with 400/422.
+        """
+        needs_sanitize = False
+        for msg in messages:
+            if not isinstance(msg, dict):
+                continue
+            if "codex_reasoning_items" in msg:
+                needs_sanitize = True
+                break
+            tool_calls = msg.get("tool_calls")
+            if isinstance(tool_calls, list):
+                for tc in tool_calls:
+                    if isinstance(tc, dict) and ("call_id" in tc or "response_item_id" in tc):
+                        needs_sanitize = True
+                        break
+                if needs_sanitize:
+                    break
+
+        if not needs_sanitize:
+            return messages
+
+        sanitized = copy.deepcopy(messages)
+        for msg in sanitized:
+            if not isinstance(msg, dict):
+                continue
+            msg.pop("codex_reasoning_items", None)
+            tool_calls = msg.get("tool_calls")
+            if isinstance(tool_calls, list):
+                for tc in tool_calls:
+                    if isinstance(tc, dict):
+                        tc.pop("call_id", None)
+                        tc.pop("response_item_id", None)
+        return sanitized
+
+    def convert_tools(self, tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """Tools are already in OpenAI format — identity."""
+        return tools
+
+    def build_kwargs(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None,
+        **params,
+    ) -> Dict[str, Any]:
+        """Build chat.completions.create() kwargs.
+
+        This is the most complex transport method — it handles ~16 providers
+        via params rather than subclasses.
+
+        params:
+            timeout: float — API call timeout
+            max_tokens: int | None — user-configured max tokens
+            ephemeral_max_output_tokens: int | None — one-shot override (error recovery)
+            max_tokens_param_fn: callable — returns {max_tokens: N} or {max_completion_tokens: N}
+            reasoning_config: dict | None
+            request_overrides: dict | None
+            session_id: str | None
+            qwen_session_metadata: dict | None — {sessionId, promptId} precomputed
+            model_lower: str — lowercase model name for pattern matching
+            # Provider detection flags (all optional, default False)
+            is_openrouter: bool
+            is_nous: bool
+            is_qwen_portal: bool
+            is_github_models: bool
+            is_nvidia_nim: bool
+            is_kimi: bool
+            is_custom_provider: bool
+            ollama_num_ctx: int | None
+            # Provider routing
+            provider_preferences: dict | None
+            # Qwen-specific
+            qwen_prepare_fn: callable | None — runs AFTER codex sanitization
+            qwen_prepare_inplace_fn: callable | None — in-place variant for deepcopied lists
+            # Temperature
+            fixed_temperature: Any — from _fixed_temperature_for_model()
+            omit_temperature: bool
+            # Reasoning
+            supports_reasoning: bool
+            github_reasoning_extra: dict | None
+            # Claude on OpenRouter/Nous max output
+            anthropic_max_output: int | None
+            # Extra
+            extra_body_additions: dict | None — pre-built extra_body entries
+        """
+        # Codex sanitization: drop reasoning_items / call_id / response_item_id
+        sanitized = self.convert_messages(messages)
+
+        # Qwen portal prep AFTER codex sanitization.  If sanitize already
+        # deepcopied, reuse that copy via the in-place variant to avoid a
+        # second deepcopy.
+        is_qwen = params.get("is_qwen_portal", False)
+        if is_qwen:
+            qwen_prep = params.get("qwen_prepare_fn")
+            qwen_prep_inplace = params.get("qwen_prepare_inplace_fn")
+            if sanitized is messages:
+                if qwen_prep is not None:
+                    sanitized = qwen_prep(sanitized)
+            else:
+                # Already deepcopied — transform in place
+                if qwen_prep_inplace is not None:
+                    qwen_prep_inplace(sanitized)
+                elif qwen_prep is not None:
+                    sanitized = qwen_prep(sanitized)
+
+        # Developer role swap for GPT-5/Codex models
+        model_lower = params.get("model_lower", (model or "").lower())
+        if (
+            sanitized
+            and isinstance(sanitized[0], dict)
+            and sanitized[0].get("role") == "system"
+            and any(p in model_lower for p in DEVELOPER_ROLE_MODELS)
+        ):
+            sanitized = list(sanitized)
+            sanitized[0] = {**sanitized[0], "role": "developer"}
+
+        api_kwargs: Dict[str, Any] = {
+            "model": model,
+            "messages": sanitized,
+        }
+
+        timeout = params.get("timeout")
+        if timeout is not None:
+            api_kwargs["timeout"] = timeout
+
+        # Temperature
+        fixed_temp = params.get("fixed_temperature")
+        omit_temp = params.get("omit_temperature", False)
+        if omit_temp:
+            api_kwargs.pop("temperature", None)
+        elif fixed_temp is not None:
+            api_kwargs["temperature"] = fixed_temp
+
+        # Qwen metadata (caller precomputes {sessionId, promptId})
+        qwen_meta = params.get("qwen_session_metadata")
+        if qwen_meta and is_qwen:
+            api_kwargs["metadata"] = qwen_meta
+
+        # Tools
+        if tools:
+            api_kwargs["tools"] = tools
+
+        # max_tokens resolution — priority: ephemeral > user > provider default
+        max_tokens_fn = params.get("max_tokens_param_fn")
+        ephemeral = params.get("ephemeral_max_output_tokens")
+        max_tokens = params.get("max_tokens")
+        anthropic_max_out = params.get("anthropic_max_output")
+        is_nvidia_nim = params.get("is_nvidia_nim", False)
+        is_kimi = params.get("is_kimi", False)
+        reasoning_config = params.get("reasoning_config")
+
+        if ephemeral is not None and max_tokens_fn:
+            api_kwargs.update(max_tokens_fn(ephemeral))
+        elif max_tokens is not None and max_tokens_fn:
+            api_kwargs.update(max_tokens_fn(max_tokens))
+        elif is_nvidia_nim and max_tokens_fn:
+            api_kwargs.update(max_tokens_fn(16384))
+        elif is_qwen and max_tokens_fn:
+            api_kwargs.update(max_tokens_fn(65536))
+        elif is_kimi and max_tokens_fn:
+            # Kimi/Moonshot: 32000 matches Kimi CLI's default
+            api_kwargs.update(max_tokens_fn(32000))
+        elif anthropic_max_out is not None:
+            api_kwargs["max_tokens"] = anthropic_max_out
+
+        # Kimi: top-level reasoning_effort (unless thinking disabled)
+        if is_kimi:
+            _kimi_thinking_off = bool(
+                reasoning_config
+                and isinstance(reasoning_config, dict)
+                and reasoning_config.get("enabled") is False
+            )
+            if not _kimi_thinking_off:
+                _kimi_effort = "medium"
+                if reasoning_config and isinstance(reasoning_config, dict):
+                    _e = (reasoning_config.get("effort") or "").strip().lower()
+                    if _e in ("low", "medium", "high"):
+                        _kimi_effort = _e
+                api_kwargs["reasoning_effort"] = _kimi_effort
+
+        # extra_body assembly
+        extra_body: Dict[str, Any] = {}
+
+        is_openrouter = params.get("is_openrouter", False)
+        is_nous = params.get("is_nous", False)
+        is_github_models = params.get("is_github_models", False)
+
+        provider_prefs = params.get("provider_preferences")
+        if provider_prefs and is_openrouter:
+            extra_body["provider"] = provider_prefs
+
+        # Kimi extra_body.thinking
+        if is_kimi:
+            _kimi_thinking_enabled = True
+            if reasoning_config and isinstance(reasoning_config, dict):
+                if reasoning_config.get("enabled") is False:
+                    _kimi_thinking_enabled = False
+            extra_body["thinking"] = {
+                "type": "enabled" if _kimi_thinking_enabled else "disabled",
+            }
+
+        # Reasoning
+        if params.get("supports_reasoning", False):
+            if is_github_models:
+                gh_reasoning = params.get("github_reasoning_extra")
+                if gh_reasoning is not None:
+                    extra_body["reasoning"] = gh_reasoning
+            else:
+                if reasoning_config is not None:
+                    rc = dict(reasoning_config)
+                    if is_nous and rc.get("enabled") is False:
+                        pass  # omit for Nous when disabled
+                    else:
+                        extra_body["reasoning"] = rc
+                else:
+                    extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
+
+        if is_nous:
+            extra_body["tags"] = ["product=hermes-agent"]
+
+        # Ollama num_ctx
+        ollama_ctx = params.get("ollama_num_ctx")
+        if ollama_ctx:
+            options = extra_body.get("options", {})
+            options["num_ctx"] = ollama_ctx
+            extra_body["options"] = options
+
+        # Ollama/custom think=false
+        if params.get("is_custom_provider", False):
+            if reasoning_config and isinstance(reasoning_config, dict):
+                _effort = (reasoning_config.get("effort") or "").strip().lower()
+                _enabled = reasoning_config.get("enabled", True)
+                if _effort == "none" or _enabled is False:
+                    extra_body["think"] = False
+
+        if is_qwen:
+            extra_body["vl_high_resolution_images"] = True
+
+        # Merge any pre-built extra_body additions
+        additions = params.get("extra_body_additions")
+        if additions:
+            extra_body.update(additions)
+
+        if extra_body:
+            api_kwargs["extra_body"] = extra_body
+
+        # Request overrides last (service_tier etc.)
+        overrides = params.get("request_overrides")
+        if overrides:
+            api_kwargs.update(overrides)
+
+        return api_kwargs
+
+    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
+        """Normalize OpenAI ChatCompletion to NormalizedResponse.
+
+        For chat_completions, this is near-identity — the response is already
+        in OpenAI format.  extra_content on tool_calls (Gemini thought_signature)
+        is preserved via ToolCall.provider_data.  reasoning_details (OpenRouter
+        unified format) and reasoning_content (DeepSeek/Moonshot) are also
+        preserved for downstream replay.
+        """
+        choice = response.choices[0]
+        msg = choice.message
+        finish_reason = choice.finish_reason or "stop"
+
+        tool_calls = None
+        if msg.tool_calls:
+            tool_calls = []
+            for tc in msg.tool_calls:
+                # Preserve provider-specific extras on the tool call.
+                # Gemini 3 thinking models attach extra_content with
+                # thought_signature — without replay on the next turn the API
+                # rejects the request with 400.
+                tc_provider_data: Dict[str, Any] = {}
+                extra = getattr(tc, "extra_content", None)
+                if extra is None and hasattr(tc, "model_extra"):
+                    extra = (tc.model_extra or {}).get("extra_content")
+                if extra is not None:
+                    if hasattr(extra, "model_dump"):
+                        try:
+                            extra = extra.model_dump()
+                        except Exception:
+                            pass
+                    tc_provider_data["extra_content"] = extra
+                tool_calls.append(ToolCall(
+                    id=tc.id,
+                    name=tc.function.name,
+                    arguments=tc.function.arguments,
+                    provider_data=tc_provider_data or None,
+                ))
+
+        usage = None
+        if hasattr(response, "usage") and response.usage:
+            u = response.usage
+            usage = Usage(
+                prompt_tokens=getattr(u, "prompt_tokens", 0) or 0,
+                completion_tokens=getattr(u, "completion_tokens", 0) or 0,
+                total_tokens=getattr(u, "total_tokens", 0) or 0,
+            )
+
+        # Preserve reasoning fields separately.  DeepSeek/Moonshot use
+        # ``reasoning_content``; others use ``reasoning``.  Downstream code
+        # (_extract_reasoning, thinking-prefill retry) reads both distinctly,
+        # so keep them apart in provider_data rather than merging.
+        reasoning = getattr(msg, "reasoning", None)
+        reasoning_content = getattr(msg, "reasoning_content", None)
+
+        provider_data: Dict[str, Any] = {}
+        if reasoning_content:
+            provider_data["reasoning_content"] = reasoning_content
+        rd = getattr(msg, "reasoning_details", None)
+        if rd:
+            provider_data["reasoning_details"] = rd
+
+        return NormalizedResponse(
+            content=msg.content,
+            tool_calls=tool_calls,
+            finish_reason=finish_reason,
+            reasoning=reasoning,
+            usage=usage,
+            provider_data=provider_data or None,
+        )
+
+    def validate_response(self, response: Any) -> bool:
+        """Check that response has valid choices."""
+        if response is None:
+            return False
+        if not hasattr(response, "choices") or response.choices is None:
+            return False
+        if not response.choices:
+            return False
+        return True
+
+    def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
+        """Extract OpenRouter/OpenAI cache stats from prompt_tokens_details."""
+        usage = getattr(response, "usage", None)
+        if usage is None:
+            return None
+        details = getattr(usage, "prompt_tokens_details", None)
+        if details is None:
+            return None
+        cached = getattr(details, "cached_tokens", 0) or 0
+        written = getattr(details, "cache_write_tokens", 0) or 0
+        if cached or written:
+            return {"cached_tokens": cached, "creation_tokens": written}
+        return None
+
+
+# Auto-register on import
+from agent.transports import register_transport  # noqa: E402
+
+register_transport("chat_completions", ChatCompletionsTransport)
diff --git a/agent/transports/codex.py b/agent/transports/codex.py
new file mode 100644
index 0000000000..ec48352193
--- /dev/null
+++ b/agent/transports/codex.py
@@ -0,0 +1,217 @@
+"""OpenAI Responses API (Codex) transport.
+
+Delegates to the existing adapter functions in agent/codex_responses_adapter.py.
+This transport owns format conversion and normalization — NOT client lifecycle,
+streaming, or the _run_codex_stream() call path.
+"""
+
+from typing import Any, Dict, List, Optional
+
+from agent.transports.base import ProviderTransport
+from agent.transports.types import NormalizedResponse, ToolCall, Usage
+
+
+class ResponsesApiTransport(ProviderTransport):
+    """Transport for api_mode='codex_responses'.
+
+    Wraps the functions extracted into codex_responses_adapter.py (PR 1).
+    """
+
+    @property
+    def api_mode(self) -> str:
+        return "codex_responses"
+
+    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
+        """Convert OpenAI chat messages to Responses API input items."""
+        from agent.codex_responses_adapter import _chat_messages_to_responses_input
+        return _chat_messages_to_responses_input(messages)
+
+    def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
+        """Convert OpenAI tool schemas to Responses API function definitions."""
+        from agent.codex_responses_adapter import _responses_tools
+        return _responses_tools(tools)
+
+    def build_kwargs(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None,
+        **params,
+    ) -> Dict[str, Any]:
+        """Build Responses API kwargs.
+
+        Calls convert_messages and convert_tools internally.
+
+        params:
+            instructions: str — system prompt (extracted from messages[0] if not given)
+            reasoning_config: dict | None — {effort, enabled}
+            session_id: str | None — used for prompt_cache_key + xAI conv header
+            max_tokens: int | None — max_output_tokens
+            request_overrides: dict | None — extra kwargs merged in
+            provider: str | None — provider name for backend-specific logic
+            base_url: str | None — endpoint URL
+            base_url_hostname: str | None — hostname for backend detection
+            is_github_responses: bool — Copilot/GitHub models backend
+            is_codex_backend: bool — chatgpt.com/backend-api/codex
+            is_xai_responses: bool — xAI/Grok backend
+            github_reasoning_extra: dict | None — Copilot reasoning params
+        """
+        from agent.codex_responses_adapter import (
+            _chat_messages_to_responses_input,
+            _responses_tools,
+        )
+
+        from run_agent import DEFAULT_AGENT_IDENTITY
+
+        instructions = params.get("instructions", "")
+        payload_messages = messages
+        if not instructions:
+            if messages and messages[0].get("role") == "system":
+                instructions = str(messages[0].get("content") or "").strip()
+                payload_messages = messages[1:]
+        if not instructions:
+            instructions = DEFAULT_AGENT_IDENTITY
+
+        is_github_responses = params.get("is_github_responses", False)
+        is_codex_backend = params.get("is_codex_backend", False)
+        is_xai_responses = params.get("is_xai_responses", False)
+
+        # Resolve reasoning effort
+        reasoning_effort = "medium"
+        reasoning_enabled = True
+        reasoning_config = params.get("reasoning_config")
+        if reasoning_config and isinstance(reasoning_config, dict):
+            if reasoning_config.get("enabled") is False:
+                reasoning_enabled = False
+            elif reasoning_config.get("effort"):
+                reasoning_effort = reasoning_config["effort"]
+
+        _effort_clamp = {"minimal": "low"}
+        reasoning_effort = _effort_clamp.get(reasoning_effort, reasoning_effort)
+
+        kwargs = {
+            "model": model,
+            "instructions": instructions,
+            "input": _chat_messages_to_responses_input(payload_messages),
+            "tools": _responses_tools(tools),
+            "tool_choice": "auto",
+            "parallel_tool_calls": True,
+            "store": False,
+        }
+
+        session_id = params.get("session_id")
+        if not is_github_responses and session_id:
+            kwargs["prompt_cache_key"] = session_id
+
+        if reasoning_enabled and is_xai_responses:
+            kwargs["include"] = ["reasoning.encrypted_content"]
+        elif reasoning_enabled:
+            if is_github_responses:
+                github_reasoning = params.get("github_reasoning_extra")
+                if github_reasoning is not None:
+                    kwargs["reasoning"] = github_reasoning
+            else:
+                kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"}
+                kwargs["include"] = ["reasoning.encrypted_content"]
+        elif not is_github_responses and not is_xai_responses:
+            kwargs["include"] = []
+
+        request_overrides = params.get("request_overrides")
+        if request_overrides:
+            kwargs.update(request_overrides)
+
+        max_tokens = params.get("max_tokens")
+        if max_tokens is not None and not is_codex_backend:
+            kwargs["max_output_tokens"] = max_tokens
+
+        if is_xai_responses and session_id:
+            kwargs["extra_headers"] = {"x-grok-conv-id": session_id}
+
+        return kwargs
+
+    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
+        """Normalize Codex Responses API response to NormalizedResponse."""
+        from agent.codex_responses_adapter import (
+            _normalize_codex_response,
+            _extract_responses_message_text,
+            _extract_responses_reasoning_text,
+        )
+
+        # _normalize_codex_response returns (SimpleNamespace, finish_reason_str)
+        msg, finish_reason = _normalize_codex_response(response)
+
+        tool_calls = None
+        if msg and msg.tool_calls:
+            tool_calls = []
+            for tc in msg.tool_calls:
+                provider_data = {}
+                if hasattr(tc, "call_id") and tc.call_id:
+                    provider_data["call_id"] = tc.call_id
+                if hasattr(tc, "response_item_id") and tc.response_item_id:
+                    provider_data["response_item_id"] = tc.response_item_id
+                tool_calls.append(ToolCall(
+                    id=tc.id if hasattr(tc, "id") else (tc.function.name if hasattr(tc, "function") else None),
+                    name=tc.function.name if hasattr(tc, "function") else getattr(tc, "name", ""),
+                    arguments=tc.function.arguments if hasattr(tc, "function") else getattr(tc, "arguments", "{}"),
+                    provider_data=provider_data or None,
+                ))
+
+        # Extract reasoning items for provider_data
+        provider_data = {}
+        if msg and hasattr(msg, "codex_reasoning_items") and msg.codex_reasoning_items:
+            provider_data["codex_reasoning_items"] = msg.codex_reasoning_items
+        if msg and hasattr(msg, "reasoning_details") and msg.reasoning_details:
+            provider_data["reasoning_details"] = msg.reasoning_details
+
+        return NormalizedResponse(
+            content=msg.content if msg else None,
+            tool_calls=tool_calls,
+            finish_reason=finish_reason or "stop",
+            reasoning=msg.reasoning if msg and hasattr(msg, "reasoning") else None,
+            usage=None,  # Codex usage is extracted separately in normalize_usage()
+            provider_data=provider_data or None,
+        )
+
+    def validate_response(self, response: Any) -> bool:
+        """Check Codex Responses API response has valid output structure.
+
+        Returns True only if response.output is a non-empty list.
+        Does NOT check output_text fallback — the caller handles that
+        with diagnostic logging for stream backfill recovery.
+        """
+        if response is None:
+            return False
+        output = getattr(response, "output", None)
+        if not isinstance(output, list) or not output:
+            return False
+        return True
+
+    def preflight_kwargs(self, api_kwargs: Any, *, allow_stream: bool = False) -> dict:
+        """Validate and sanitize Codex API kwargs before the call.
+
+        Normalizes input items, strips unsupported fields, validates structure.
+        """
+        from agent.codex_responses_adapter import _preflight_codex_api_kwargs
+        return _preflight_codex_api_kwargs(api_kwargs, allow_stream=allow_stream)
+
+    def map_finish_reason(self, raw_reason: str) -> str:
+        """Map Codex response.status to OpenAI finish_reason.
+
+        Codex uses response.status ('completed', 'incomplete') +
+        response.incomplete_details.reason for granular mapping.
+        This method handles the simple status string; the caller
+        should check incomplete_details separately for 'max_output_tokens'.
+        """
+        _MAP = {
+            "completed": "stop",
+            "incomplete": "length",
+            "failed": "stop",
+            "cancelled": "stop",
+        }
+        return _MAP.get(raw_reason, "stop")
+
+
+# Auto-register on import
+from agent.transports import register_transport  # noqa: E402
+
+register_transport("codex_responses", ResponsesApiTransport)
diff --git a/agent/transports/types.py b/agent/transports/types.py
new file mode 100644
index 0000000000..2b048fcaa4
--- /dev/null
+++ b/agent/transports/types.py
@@ -0,0 +1,100 @@
+"""Shared types for normalized provider responses.
+
+These dataclasses define the canonical shape that all provider adapters
+normalize responses to.  The shared surface is intentionally minimal —
+only fields that every downstream consumer reads are top-level.
+Protocol-specific state goes in ``provider_data`` dicts (response-level
+and per-tool-call) so that protocol-aware code paths can access it
+without polluting the shared type.
+"""
+
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional
+
+
+@dataclass
+class ToolCall:
+    """A normalized tool call from any provider.
+
+    ``id`` is the protocol's canonical identifier — what gets used in
+    ``tool_call_id`` / ``tool_use_id`` when constructing tool result
+    messages.  May be ``None`` when the provider omits it; the agent
+    fills it via ``_deterministic_call_id()`` before storing in history.
+
+    ``provider_data`` carries per-tool-call protocol metadata that only
+    protocol-aware code reads:
+
+    * Codex: ``{"call_id": "call_XXX", "response_item_id": "fc_XXX"}``
+    * Gemini: ``{"extra_content": {"google": {"thought_signature": "..."}}}``
+    * Others: ``None``
+    """
+
+    id: Optional[str]
+    name: str
+    arguments: str  # JSON string
+    provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
+
+
+@dataclass
+class Usage:
+    """Token usage from an API response."""
+
+    prompt_tokens: int = 0
+    completion_tokens: int = 0
+    total_tokens: int = 0
+    cached_tokens: int = 0
+
+
+@dataclass
+class NormalizedResponse:
+    """Normalized API response from any provider.
+
+    Shared fields are truly cross-provider — every caller can rely on
+    them without branching on api_mode.  Protocol-specific state goes in
+    ``provider_data`` so that only protocol-aware code paths read it.
+
+    Response-level ``provider_data`` examples:
+
+    * Anthropic: ``{"reasoning_details": [...]}``
+    * Codex: ``{"codex_reasoning_items": [...]}``
+    * Others: ``None``
+    """
+
+    content: Optional[str]
+    tool_calls: Optional[List[ToolCall]]
+    finish_reason: str  # "stop", "tool_calls", "length", "content_filter"
+    reasoning: Optional[str] = None
+    usage: Optional[Usage] = None
+    provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
+
+
+# ---------------------------------------------------------------------------
+# Factory helpers
+# ---------------------------------------------------------------------------
+
+def build_tool_call(
+    id: Optional[str],
+    name: str,
+    arguments: Any,
+    **provider_fields: Any,
+) -> ToolCall:
+    """Build a ``ToolCall``, auto-serialising *arguments* if it's a dict.
+
+    Any extra keyword arguments are collected into ``provider_data``.
+    """
+    args_str = json.dumps(arguments) if isinstance(arguments, dict) else str(arguments)
+    pd = dict(provider_fields) if provider_fields else None
+    return ToolCall(id=id, name=name, arguments=args_str, provider_data=pd)
+
+
+def map_finish_reason(reason: Optional[str], mapping: Dict[str, str]) -> str:
+    """Translate a provider-specific stop reason to the normalised set.
+
+    Falls back to ``"stop"`` for unknown or ``None`` reasons.
+    """
+    if reason is None:
+        return "stop"
+    return mapping.get(reason, "stop")
diff --git a/agent/usage_pricing.py b/agent/usage_pricing.py
index 29c75b172a..3554c5b991 100644
--- a/agent/usage_pricing.py
+++ b/agent/usage_pricing.py
@@ -6,6 +6,7 @@ from decimal import Decimal
 from typing import Any, Dict, Literal, Optional
 
 from agent.model_metadata import fetch_endpoint_model_metadata, fetch_model_metadata
+from utils import base_url_host_matches
 
 DEFAULT_PRICING = {"input": 0.0, "output": 0.0}
 
@@ -393,7 +394,7 @@ def resolve_billing_route(
 
     if provider_name == "openai-codex":
         return BillingRoute(provider="openai-codex", model=model, base_url=base_url or "", billing_mode="subscription_included")
-    if provider_name == "openrouter" or "openrouter.ai" in base:
+    if provider_name == "openrouter" or base_url_host_matches(base_url or "", "openrouter.ai"):
         return BillingRoute(provider="openrouter", model=model, base_url=base_url or "", billing_mode="official_models_api")
     if provider_name == "anthropic":
         return BillingRoute(provider="anthropic", model=model.split("/")[-1], base_url=base_url or "", billing_mode="official_docs_snapshot")
diff --git a/batch_runner.py b/batch_runner.py
index 1a65f473ff..7413ad59f4 100644
--- a/batch_runner.py
+++ b/batch_runner.py
@@ -444,6 +444,7 @@ def _process_batch_worker(args: Tuple) -> Dict[str, Any]:
             if not reasoning.get("has_any_reasoning", True):
                 print(f"   🚫 Prompt {prompt_index} discarded (no reasoning in any turn)")
                 discarded_no_reasoning += 1
+                completed_in_batch.append(prompt_index)
                 continue
             
             # Get and normalize tool stats for consistent schema across all entries
@@ -1189,12 +1190,12 @@ def main(
     """
     # Handle list distributions
     if list_distributions:
-        from toolset_distributions import list_distributions as get_all_dists, print_distribution_info
-        
+        from toolset_distributions import print_distribution_info
+
         print("📊 Available Toolset Distributions")
         print("=" * 70)
-        
-        all_dists = get_all_dists()
+
+        all_dists = list_distributions()
         for dist_name in sorted(all_dists.keys()):
             print_distribution_info(dist_name)
         
diff --git a/cli-config.yaml.example b/cli-config.yaml.example
index 20b54b7887..e8e3d30af6 100644
--- a/cli-config.yaml.example
+++ b/cli-config.yaml.example
@@ -63,7 +63,38 @@ model:
   #   Leave unset to use the model's native output ceiling (recommended).
   #   Set only if you want to deliberately limit individual response length.
   #
-  # max_tokens: 8192
+# max_tokens: 8192
+
+# Named provider overrides (optional)
+# Use this for per-provider request timeouts, non-stream stale timeouts,
+# and per-model exceptions.
+# Applies to the primary turn client on every api_mode (OpenAI-wire, native
+# Anthropic, and Anthropic-compatible providers), the fallback chain, and
+# client rebuilds during credential rotation.  For OpenAI-wire chat
+# completions (streaming and non-streaming) the configured value is also
+# used as the per-request ``timeout=`` kwarg so it wins over the legacy
+# HERMES_API_TIMEOUT env var (which still applies when no config is set).
+# ``stale_timeout_seconds`` controls the non-streaming stale-call detector and
+# wins over the legacy HERMES_API_CALL_STALE_TIMEOUT env var. Leaving these
+# unset keeps the legacy defaults (HERMES_API_TIMEOUT=1800s,
+# HERMES_API_CALL_STALE_TIMEOUT=300s, native Anthropic 900s).
+#
+# Not currently wired for AWS Bedrock (bedrock_converse + AnthropicBedrock
+# SDK paths) — those use boto3 with its own timeout configuration.
+#
+# providers:
+#   ollama-local:
+#     request_timeout_seconds: 300   # Longer timeout for local cold-starts
+#     stale_timeout_seconds: 900     # Explicitly re-enable stale detection on local endpoints
+#   anthropic:
+#     request_timeout_seconds: 30    # Fast-fail cloud requests
+#     models:
+#       claude-opus-4.6:
+#         timeout_seconds: 600       # Longer timeout for extended-thinking Opus calls
+#   openai-codex:
+#     models:
+#       gpt-5.4:
+#         stale_timeout_seconds: 1800  # Longer non-stream stale timeout for slow large-context turns
 
 # =============================================================================
 # OpenRouter Provider Routing (only applies when using OpenRouter)
@@ -91,20 +122,6 @@ model:
 #   # Data policy: "allow" (default) or "deny" to exclude providers that may store data
 #   # data_collection: "deny"
 
-# =============================================================================
-# Smart Model Routing (optional)
-# =============================================================================
-# Use a cheaper model for short/simple turns while keeping your main model for
-# more complex requests. Disabled by default.
-#
-# smart_model_routing:
-#   enabled: true
-#   max_simple_chars: 160
-#   max_simple_words: 28
-#   cheap_model:
-#     provider: openrouter
-#     model: google/gemini-2.5-flash
-
 # =============================================================================
 # Git Worktree Isolation
 # =============================================================================
@@ -357,6 +374,18 @@ compression:
 #   web_extract:
 #     provider: "auto"
 #     model: ""
+#
+#   # Session search — summarizes matching past sessions
+#   session_search:
+#     provider: "auto"
+#     model: ""
+#     timeout: 30
+#     max_concurrency: 3    # Limit parallel summaries to reduce request-burst 429s
+#     extra_body: {}        # Provider-specific OpenAI-compatible request fields
+#                           # Example for providers that support request-body
+#                           # reasoning controls:
+#                           # extra_body:
+#                           #   enable_thinking: false
 
 # =============================================================================
 # Persistent Memory
@@ -741,10 +770,12 @@ code_execution:
 # Subagent Delegation
 # =============================================================================
 # The delegate_task tool spawns child agents with isolated context.
-# Supports single tasks and batch mode (up to 3 parallel).
+# Supports single tasks and batch mode (default 3 parallel, configurable).
 delegation:
   max_iterations: 50                          # Max tool-calling turns per child (default: 50)
-  default_toolsets: ["terminal", "file", "web"]  # Default toolsets for subagents
+  # max_concurrent_children: 3                # Max parallel child agents (default: 3)
+  # max_spawn_depth: 1                        # Tree depth cap (1-3, default: 1 = flat). Raise to 2 or 3 to allow orchestrator children to spawn their own workers.
+  # orchestrator_enabled: true                # Kill switch for role="orchestrator" children (default: true).
   # model: "google/gemini-3-flash-preview"    # Override model for subagents (empty = inherit parent)
   # provider: "openrouter"                    # Override provider for subagents (empty = inherit parent)
   #                                           # Resolves full credentials (base_url, api_key) automatically.
@@ -888,3 +919,39 @@ display:
 #   # Names and usernames are NOT affected (user-chosen, publicly visible).
 #   # Routing/delivery still uses the original values internally.
 #   redact_pii: false
+
+# =============================================================================
+# Shell-script hooks
+# =============================================================================
+# Register shell scripts as plugin-hook callbacks.  Each entry is executed as
+# a subprocess (shell=False, shlex.split) with a JSON payload on stdin.  On
+# stdout the script may return JSON that either blocks the tool call or
+# injects context into the next LLM call.
+#
+# Valid events (mirror hermes_cli.plugins.VALID_HOOKS):
+#   pre_tool_call, post_tool_call, pre_llm_call, post_llm_call,
+#   pre_api_request, post_api_request, on_session_start, on_session_end,
+#   on_session_finalize, on_session_reset, subagent_stop
+#
+# First-use consent: each (event, command) pair prompts once on a TTY, then
+# is persisted to ~/.hermes/shell-hooks-allowlist.json.  Non-interactive
+# runs (gateway, cron) need --accept-hooks, HERMES_ACCEPT_HOOKS=1, or the
+# hooks_auto_accept key below.
+#
+# See website/docs/user-guide/features/hooks.md for the full JSON wire
+# protocol and worked examples.
+#
+# hooks:
+#   pre_tool_call:
+#     - matcher: "terminal"
+#       command: "~/.hermes/agent-hooks/block-rm-rf.sh"
+#       timeout: 10
+#   post_tool_call:
+#     - matcher: "write_file|patch"
+#       command: "~/.hermes/agent-hooks/auto-format.sh"
+#   pre_llm_call:
+#     - command: "~/.hermes/agent-hooks/inject-cwd-context.sh"
+#   subagent_stop:
+#     - command: "~/.hermes/agent-hooks/log-orchestration.sh"
+#
+# hooks_auto_accept: false
diff --git a/cli.py b/cli.py
index c9ce95e9f2..9d87ff3562 100644
--- a/cli.py
+++ b/cli.py
@@ -19,12 +19,14 @@ import shutil
 import sys
 import json
 import re
+import concurrent.futures
 import base64
 import atexit
 import tempfile
 import time
 import uuid
 import textwrap
+from urllib.parse import unquote, urlparse
 from contextlib import contextmanager
 from pathlib import Path
 from datetime import datetime
@@ -65,6 +67,7 @@ from agent.usage_pricing import (
     format_duration_compact,
     format_token_count_compact,
 )
+from agent.account_usage import fetch_account_usage, render_account_usage_lines
 from hermes_cli.banner import _format_context_length, format_banner_version_label
 
 _COMMAND_SPINNER_FRAMES = ("⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏")
@@ -74,6 +77,7 @@ _COMMAND_SPINNER_FRAMES = ("⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧
 # User-managed env files should override stale shell exports on restart.
 from hermes_constants import get_hermes_home, display_hermes_home
 from hermes_cli.env_loader import load_hermes_dotenv
+from utils import base_url_host_matches
 
 _hermes_home = get_hermes_home()
 _project_env = Path(__file__).parent / '.env'
@@ -310,12 +314,6 @@ def load_cli_config() -> Dict[str, Any]:
             "enabled": True,      # Auto-compress when approaching context limit
             "threshold": 0.50,    # Compress at 50% of model's context limit
         },
-        "smart_model_routing": {
-            "enabled": False,
-            "max_simple_chars": 160,
-            "max_simple_words": 28,
-            "cheap_model": {},
-        },
         "agent": {
             "max_turns": 90,  # Default max tool-calling iterations (shared with subagents)
             "verbose": False,
@@ -373,7 +371,6 @@ def load_cli_config() -> Dict[str, Any]:
         },
         "delegation": {
             "max_iterations": 45,  # Max tool-calling turns per child agent
-            "default_toolsets": ["terminal", "file", "web"],  # Default toolsets for subagents
             "model": "",       # Subagent model override (empty = inherit parent model)
             "provider": "",    # Subagent provider override (empty = inherit parent provider)
             "base_url": "",    # Direct OpenAI-compatible endpoint for subagents
@@ -534,7 +531,6 @@ def load_cli_config() -> Dict[str, Any]:
             if _file_has_terminal_config or env_var not in os.environ:
                 val = terminal_config[config_key]
                 if isinstance(val, list):
-                    import json
                     os.environ[env_var] = json.dumps(val)
                 else:
                     os.environ[env_var] = str(val)
@@ -918,6 +914,32 @@ def _cleanup_worktree(info: Dict[str, str] = None) -> None:
     print(f"\033[32m✓ Worktree cleaned up: {wt_path}\033[0m")
 
 
+def _run_state_db_auto_maintenance(session_db) -> None:
+    """Call ``SessionDB.maybe_auto_prune_and_vacuum`` using current config.
+
+    Reads the ``sessions:`` section from config.yaml via
+    :func:`hermes_cli.config.load_config` (the authoritative loader that
+    deep-merges DEFAULT_CONFIG, so unmigrated configs still get default
+    values). Honours ``auto_prune`` / ``retention_days`` /
+    ``vacuum_after_prune`` / ``min_interval_hours``, and delegates to the
+    DB. Never raises — maintenance must never block interactive startup.
+    """
+    if session_db is None:
+        return
+    try:
+        from hermes_cli.config import load_config as _load_full_config
+        cfg = (_load_full_config().get("sessions") or {})
+        if not cfg.get("auto_prune", False):
+            return
+        session_db.maybe_auto_prune_and_vacuum(
+            retention_days=int(cfg.get("retention_days", 90)),
+            min_interval_hours=int(cfg.get("min_interval_hours", 24)),
+            vacuum=bool(cfg.get("vacuum_after_prune", True)),
+        )
+    except Exception as exc:
+        logger.debug("state.db auto-maintenance skipped: %s", exc)
+
+
 def _prune_stale_worktrees(repo_root: str, max_age_hours: int = 24) -> None:
     """Remove stale worktrees and orphaned branches on startup.
 
@@ -1147,6 +1169,41 @@ def _rich_text_from_ansi(text: str) -> _RichText:
     return _RichText.from_ansi(text or "")
 
 
+def _strip_markdown_syntax(text: str) -> str:
+    """Best-effort markdown marker removal for plain-text display."""
+    plain = _rich_text_from_ansi(text or "").plain
+    plain = re.sub(r"^\s{0,3}(?:[-*_]\s*){3,}$", "", plain, flags=re.MULTILINE)
+    plain = re.sub(r"^\s{0,3}#{1,6}\s+", "", plain, flags=re.MULTILINE)
+    # Preserve blockquotes, lists, and checkboxes because they carry structure.
+    plain = re.sub(r"(```+|~~~+)", "", plain)
+    plain = re.sub(r"`([^`]*)`", r"\1", plain)
+    plain = re.sub(r"!\[([^\]]*)\]\([^\)]*\)", r"\1", plain)
+    plain = re.sub(r"\[([^\]]+)\]\([^\)]*\)", r"\1", plain)
+    plain = re.sub(r"\*\*\*([^*]+)\*\*\*", r"\1", plain)
+    plain = re.sub(r"(?<!\w)___([^_]+)___(?!\w)", r"\1", plain)
+    plain = re.sub(r"\*\*([^*]+)\*\*", r"\1", plain)
+    plain = re.sub(r"(?<!\w)__([^_]+)__(?!\w)", r"\1", plain)
+    plain = re.sub(r"\*([^*]+)\*", r"\1", plain)
+    plain = re.sub(r"(?<!\w)_([^_]+)_(?!\w)", r"\1", plain)
+    plain = re.sub(r"~~([^~]+)~~", r"\1", plain)
+    plain = re.sub(r"\n{3,}", "\n\n", plain)
+    return plain.strip("\n")
+
+
+def _render_final_assistant_content(text: str, mode: str = "render"):
+    """Render final assistant content as markdown, stripped text, or raw text."""
+    from rich.markdown import Markdown
+
+    normalized_mode = str(mode or "render").strip().lower()
+    if normalized_mode == "strip":
+        return _RichText(_strip_markdown_syntax(text))
+    if normalized_mode == "raw":
+        return _rich_text_from_ansi(text or "")
+
+    plain = _rich_text_from_ansi(text or "").plain
+    return Markdown(plain)
+
+
 def _cprint(text: str):
     """Print ANSI-colored text through prompt_toolkit's native renderer.
 
@@ -1240,10 +1297,21 @@ def _resolve_attachment_path(raw_path: str) -> Path | None:
 
     if (token.startswith('"') and token.endswith('"')) or (token.startswith("'") and token.endswith("'")):
         token = token[1:-1].strip()
+    token = token.replace('\\ ', ' ')
     if not token:
         return None
 
-    expanded = os.path.expandvars(os.path.expanduser(token))
+    expanded = token
+    if token.startswith("file://"):
+        try:
+            parsed = urlparse(token)
+            if parsed.scheme == "file":
+                expanded = unquote(parsed.path or "")
+                if parsed.netloc and os.name == "nt":
+                    expanded = f"//{parsed.netloc}{expanded}"
+        except Exception:
+            expanded = token
+    expanded = os.path.expandvars(os.path.expanduser(expanded))
     if os.name != "nt":
         normalized = expanded.replace("\\", "/")
         if len(normalized) >= 3 and normalized[1] == ":" and normalized[2] == "/" and normalized[0].isalpha():
@@ -1330,6 +1398,7 @@ def _detect_file_drop(user_input: str) -> "dict | None":
         or stripped.startswith("~")
         or stripped.startswith("./")
         or stripped.startswith("../")
+        or stripped.startswith("file://")
         or (len(stripped) >= 3 and stripped[1] == ":" and stripped[2] in ("\\", "/") and stripped[0].isalpha())
         or stripped.startswith('"/')
         or stripped.startswith('"~')
@@ -1340,8 +1409,25 @@ def _detect_file_drop(user_input: str) -> "dict | None":
     if not starts_like_path:
         return None
 
+    direct_path = _resolve_attachment_path(stripped)
+    if direct_path is not None:
+        return {
+            "path": direct_path,
+            "is_image": direct_path.suffix.lower() in _IMAGE_EXTENSIONS,
+            "remainder": "",
+        }
+
     first_token, remainder = _split_path_input(stripped)
     drop_path = _resolve_attachment_path(first_token)
+    if drop_path is None and " " in stripped and stripped[0] not in {"'", '"'}:
+        space_positions = [idx for idx, ch in enumerate(stripped) if ch == " "]
+        for pos in reversed(space_positions):
+            candidate = stripped[:pos].rstrip()
+            resolved = _resolve_attachment_path(candidate)
+            if resolved is not None:
+                drop_path = resolved
+                remainder = stripped[pos + 1 :].strip()
+                break
     if drop_path is None:
         return None
 
@@ -1724,10 +1810,30 @@ class HermesCLI:
         
         # streaming: stream tokens to the terminal as they arrive (display.streaming in config.yaml)
         self.streaming_enabled = CLI_CONFIG["display"].get("streaming", False)
+        self.final_response_markdown = str(
+            CLI_CONFIG["display"].get("final_response_markdown", "strip")
+        ).strip().lower() or "strip"
+        if self.final_response_markdown not in {"render", "strip", "raw"}:
+            self.final_response_markdown = "strip"
 
         # Inline diff previews for write actions (display.inline_diffs in config.yaml)
         self._inline_diffs_enabled = CLI_CONFIG["display"].get("inline_diffs", True)
 
+        # Submitted multiline user-message preview (display.user_message_preview in config.yaml)
+        _ump = CLI_CONFIG["display"].get("user_message_preview", {})
+        if not isinstance(_ump, dict):
+            _ump = {}
+        try:
+            _ump_first_lines = int(_ump.get("first_lines", 2))
+        except (TypeError, ValueError):
+            _ump_first_lines = 2
+        try:
+            _ump_last_lines = int(_ump.get("last_lines", 2))
+        except (TypeError, ValueError):
+            _ump_last_lines = 2
+        self.user_message_preview_first_lines = max(1, _ump_first_lines)
+        self.user_message_preview_last_lines = max(0, _ump_last_lines)
+
         # Streaming display state
         self._stream_buf = ""        # Partial line buffer for line-buffered rendering
         self._stream_started = False  # True once first delta arrives
@@ -1785,7 +1891,7 @@ class HermesCLI:
         # Match key to resolved base_url: OpenRouter URL → prefer OPENROUTER_API_KEY,
         # custom endpoint → prefer OPENAI_API_KEY (issue #560).
         # Note: _ensure_runtime_credentials() re-resolves this before first use.
-        if self.base_url and "openrouter.ai" in self.base_url:
+        if self.base_url and base_url_host_matches(self.base_url, "openrouter.ai"):
             self.api_key = api_key or os.getenv("OPENROUTER_API_KEY") or os.getenv("OPENAI_API_KEY")
         else:
             self.api_key = api_key or os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY")
@@ -1810,7 +1916,7 @@ class HermesCLI:
             mcp_names = set((CLI_CONFIG.get("mcp_servers") or {}).keys())
             invalid = [t for t in toolsets if not validate_toolset(t) and t not in mcp_names]
             if invalid:
-                self.console.print(f"[bold red]Warning: Unknown toolsets: {', '.join(invalid)}[/]")
+                self._console_print(f"[bold red]Warning: Unknown toolsets: {', '.join(invalid)}[/]")
         
         # Filesystem checkpoints: CLI flag > config
         cp_cfg = CLI_CONFIG.get("checkpoints", {})
@@ -1857,8 +1963,9 @@ class HermesCLI:
             fb = [fb] if fb.get("provider") and fb.get("model") else []
         self._fallback_model = fb
 
-        # Optional cheap-vs-strong routing for simple turns
-        self._smart_model_routing = CLI_CONFIG.get("smart_model_routing", {}) or {}
+        # Signature of the currently-initialised agent's runtime.  Used to
+        # rebuild the agent when provider / model / base_url changes across
+        # turns (e.g. after /model or credential rotation).
         self._active_agent_route_signature = None
 
         # Agent will be initialized on first use
@@ -1869,6 +1976,10 @@ class HermesCLI:
         self.conversation_history: List[Dict[str, Any]] = []
         self.session_start = datetime.now()
         self._resumed = False
+        # Per-prompt elapsed timer — started at the beginning of each chat turn,
+        # frozen when the agent thread completes, displayed in the status bar.
+        self._prompt_start_time: Optional[float] = None  # time.time() when turn started
+        self._prompt_duration: float = 0.0  # frozen duration of last completed turn
         # Initialize SQLite session store early so /title works before first message
         self._session_db = None
         try:
@@ -1876,7 +1987,13 @@ class HermesCLI:
             self._session_db = SessionDB()
         except Exception as e:
             logger.warning("Failed to initialize SessionDB — session will NOT be indexed for search: %s", e)
-        
+
+        # Opportunistic state.db maintenance — runs at most once per
+        # min_interval_hours, tracked via state_meta in state.db itself so
+        # it's shared across all Hermes processes for this HERMES_HOME.
+        # Never blocks startup on failure.
+        _run_state_db_auto_maintenance(self._session_db)
+
         # Deferred title: stored in memory until the session is created in the DB
         self._pending_title: Optional[str] = None
         
@@ -1945,8 +2062,7 @@ class HermesCLI:
 
     def _invalidate(self, min_interval: float = 0.25) -> None:
         """Throttled UI repaint — prevents terminal blinking on slow/SSH connections."""
-        import time as _time
-        now = _time.monotonic()
+        now = time.monotonic()
         if hasattr(self, "_app") and self._app and (now - self._last_invalidate) >= min_interval:
             self._last_invalidate = now
             self._app.invalidate()
@@ -1967,6 +2083,44 @@ class HermesCLI:
         filled = round((safe_percent / 100) * width)
         return f"[{('█' * filled) + ('░' * max(0, width - filled))}]"
 
+    @staticmethod
+    def _format_prompt_elapsed(prompt_start_time: Optional[float], prompt_duration: float, live: bool = False) -> str:
+        """Format per-prompt elapsed time for the status bar.
+
+        Always returns a string — shows 0s on fresh start before first turn.
+        Keeps seconds visible at all scales so it increments smoothly:
+            59s → 1m → 1m 1s → ... → 1m 59s → 2m → 2m 1s → ...
+            59m 59s → 1h → 1h 0m 1s → ...
+            23h 59m 59s → 1d → 1d 0h 1m → ...
+
+        Emoji prefix: ⏱ when turn is live, ⏲ when frozen or fresh start.
+        Uses width-1 (no variation selector) glyphs so the status bar stays
+        aligned in monospace terminals.
+        """
+        if prompt_start_time is None and prompt_duration == 0.0:
+            return "⏲ 0s"
+        elapsed = time.time() - prompt_start_time if prompt_start_time is not None else prompt_duration
+        elapsed = max(0.0, elapsed)
+
+        days = int(elapsed // 86400)
+        remaining = elapsed % 86400
+        hours = int(remaining // 3600)
+        remaining = remaining % 3600
+        minutes = int(remaining // 60)
+        seconds = int(remaining % 60)
+
+        if days > 0:
+            time_str = f"{days}d {hours}h {minutes}m"
+        elif hours > 0:
+            time_str = f"{hours}h {minutes}m {seconds}s" if seconds else f"{hours}h {minutes}m"
+        elif minutes > 0:
+            time_str = f"{minutes}m {seconds}s" if seconds else f"{minutes}m"
+        else:
+            time_str = f"{int(elapsed)}s"
+
+        emoji = "⏱" if live else "⏲"
+        return f"{emoji} {time_str}"
+
     def _get_status_bar_snapshot(self) -> Dict[str, Any]:
         # Prefer the agent's model name — it updates on fallback.
         # self.model reflects the originally configured model and never
@@ -1985,6 +2139,11 @@ class HermesCLI:
             "model_name": model_name,
             "model_short": model_short,
             "duration": format_duration_compact(elapsed_seconds),
+            "prompt_elapsed": self._format_prompt_elapsed(
+                getattr(self, "_prompt_start_time", None),
+                getattr(self, "_prompt_duration", 0.0),
+                live=getattr(self, "_prompt_start_time", None) is not None,
+            ),
             "context_tokens": 0,
             "context_length": None,
             "context_percent": None,
@@ -2121,8 +2280,7 @@ class HermesCLI:
             return ""
         t0 = getattr(self, "_tool_start_time", 0) or 0
         if t0 > 0:
-            import time as _time
-            elapsed = _time.monotonic() - t0
+            elapsed = time.monotonic() - t0
             if elapsed >= 60:
                 _m, _s = int(elapsed // 60), int(elapsed % 60)
                 elapsed_str = f"{_m}m {_s}s"
@@ -2176,6 +2334,9 @@ class HermesCLI:
 
             parts = [f"⚕ {snapshot['model_short']}", context_label, percent_label]
             parts.append(duration_label)
+            prompt_elapsed = snapshot.get("prompt_elapsed")
+            if prompt_elapsed:
+                parts.append(prompt_elapsed)
             return self._trim_status_bar_text(" │ ".join(parts), width)
         except Exception:
             return f"⚕ {self.model if getattr(self, 'model', None) else 'Hermes'}"
@@ -2234,8 +2395,13 @@ class HermesCLI:
                         (bar_style, percent_label),
                         ("class:status-bar-dim", " │ "),
                         ("class:status-bar-dim", duration_label),
-                        ("class:status-bar", " "),
                     ]
+                    # Position 7: per-prompt elapsed timer (live or frozen)
+                    prompt_elapsed = snapshot.get("prompt_elapsed")
+                    if prompt_elapsed:
+                        frags.append(("class:status-bar-dim", " │ "))
+                        frags.append(("class:status-bar-dim", prompt_elapsed))
+                    frags.append(("class:status-bar", " "))
 
             total_width = sum(self._status_bar_display_width(text) for _, text in frags)
             if total_width > width:
@@ -2261,7 +2427,7 @@ class HermesCLI:
                 normalized_model = normalize_model_for_provider(current_model, resolved_provider)
                 if normalized_model and normalized_model != current_model:
                     if not self._model_is_default:
-                        self.console.print(
+                        self._console_print(
                             f"[yellow]⚠️  Normalized model '{current_model}' to '{normalized_model}' for {resolved_provider}.[/]"
                         )
                     self.model = normalized_model
@@ -2277,7 +2443,7 @@ class HermesCLI:
                 canonical = normalize_copilot_model_id(current_model, api_key=self.api_key)
                 if canonical and canonical != current_model:
                     if not self._model_is_default:
-                        self.console.print(
+                        self._console_print(
                             f"[yellow]⚠️  Normalized Copilot model '{current_model}' to '{canonical}'.[/]"
                         )
                     self.model = canonical
@@ -2299,7 +2465,7 @@ class HermesCLI:
                 canonical = normalize_opencode_model_id(resolved_provider, current_model)
                 if canonical and canonical != current_model:
                     if not self._model_is_default:
-                        self.console.print(
+                        self._console_print(
                             f"[yellow]⚠️  Stripped provider prefix from '{current_model}'; using '{canonical}' for {resolved_provider}.[/]"
                         )
                     self.model = canonical
@@ -2321,7 +2487,7 @@ class HermesCLI:
         if "/" in current_model:
             slug = current_model.split("/", 1)[1]
             if not self._model_is_default:
-                self.console.print(
+                self._console_print(
                     f"[yellow]⚠️  Stripped provider prefix from '{current_model}'; "
                     f"using '{slug}' for OpenAI Codex.[/]"
                 )
@@ -2369,9 +2535,6 @@ class HermesCLI:
 
     def _emit_reasoning_preview(self, reasoning_text: str) -> None:
         """Render a buffered reasoning preview as a single [thinking] block."""
-        import re
-        import textwrap
-
         preview_text = reasoning_text.strip()
         if not preview_text:
             return
@@ -2454,6 +2617,59 @@ class HermesCLI:
         if flush_text:
             self._emit_reasoning_preview(flush_text)
 
+    def _format_submitted_user_message_preview(self, user_input: str) -> str:
+        """Format the submitted user-message scrollback preview."""
+        lines = user_input.split("\n")
+        if len(lines) <= 1:
+            return f"[bold {_accent_hex()}]●[/] [bold]{_escape(user_input)}[/]"
+
+        first_lines = int(getattr(self, "user_message_preview_first_lines", 2))
+        last_lines = int(getattr(self, "user_message_preview_last_lines", 2))
+        first_lines = max(1, first_lines)
+        last_lines = max(0, last_lines)
+        head = lines[:first_lines]
+        remaining_after_head = max(0, len(lines) - len(head))
+        tail_count = min(last_lines, remaining_after_head)
+        tail = lines[-tail_count:] if tail_count else []
+
+        hidden_middle_count = len(lines) - len(head) - len(tail)
+        if hidden_middle_count < 0:
+            hidden_middle_count = 0
+            tail = []
+
+        preview_lines = [
+            f"[bold {_accent_hex()}]●[/] [bold]{_escape(head[0])}[/]"
+        ]
+        preview_lines.extend(f"[bold]{_escape(line)}[/]" for line in head[1:])
+
+        if hidden_middle_count > 0:
+            noun = "line" if hidden_middle_count == 1 else "lines"
+            preview_lines.append(f"[dim]... (+{hidden_middle_count} more {noun})[/]")
+
+        preview_lines.extend(f"[bold]{_escape(line)}[/]" for line in tail)
+        return "\n".join(preview_lines)
+
+    def _expand_paste_references(self, text: str | None) -> str:
+        """Expand [Pasted text #N -> file] placeholders into file contents."""
+        if not isinstance(text, str) or "[Pasted text #" not in text:
+            return text or ""
+        paste_ref_re = re.compile(r'\[Pasted text #\d+: \d+ lines \u2192 (.+?)\]')
+
+        def _expand_ref(match):
+            path = Path(match.group(1))
+            return path.read_text(encoding="utf-8") if path.exists() else match.group(0)
+
+        return paste_ref_re.sub(_expand_ref, text)
+
+    def _print_user_message_preview(self, user_input: str) -> None:
+        """Render a user message using the normal chat scrollback style."""
+        ChatConsole().print(f"[{_accent_hex()}]{'─' * 40}[/]")
+        text = str(user_input or "")
+        if "\n" in text:
+            ChatConsole().print(self._format_submitted_user_message_preview(text))
+        else:
+            ChatConsole().print(f"[bold {_accent_hex()}]●[/] [bold]{_escape(text)}[/]")
+
     def _stream_reasoning_delta(self, text: str) -> None:
         """Stream reasoning/thinking tokens into a dim box above the response.
 
@@ -2697,6 +2913,8 @@ class HermesCLI:
         _tc = getattr(self, "_stream_text_ansi", "")
         while "\n" in self._stream_buf:
             line, self._stream_buf = self._stream_buf.split("\n", 1)
+            if self.final_response_markdown == "strip":
+                line = _strip_markdown_syntax(line)
             _cprint(f"{_STREAM_PAD}{_tc}{line}{_RST}" if _tc else f"{_STREAM_PAD}{line}")
 
     def _flush_stream(self) -> None:
@@ -2714,7 +2932,8 @@ class HermesCLI:
 
         if self._stream_buf:
             _tc = getattr(self, "_stream_text_ansi", "")
-            _cprint(f"{_STREAM_PAD}{_tc}{self._stream_buf}{_RST}" if _tc else f"{_STREAM_PAD}{self._stream_buf}")
+            line = _strip_markdown_syntax(self._stream_buf) if self.final_response_markdown == "strip" else self._stream_buf
+            _cprint(f"{_STREAM_PAD}{_tc}{line}{_RST}" if _tc else f"{_STREAM_PAD}{line}")
             self._stream_buf = ""
 
         # Close the response box
@@ -2757,9 +2976,7 @@ class HermesCLI:
 
     def _command_spinner_frame(self) -> str:
         """Return the current spinner frame for slow slash commands."""
-        import time as _time
-
-        frame_idx = int(_time.monotonic() * 10) % len(_COMMAND_SPINNER_FRAMES)
+        frame_idx = int(time.monotonic() * 10) % len(_COMMAND_SPINNER_FRAMES)
         return _COMMAND_SPINNER_FRAMES[frame_idx]
 
     @contextmanager
@@ -2776,6 +2993,39 @@ class HermesCLI:
             self._command_status = ""
             self._invalidate(min_interval=0.0)
 
+    def _open_external_editor(self, buffer=None) -> bool:
+        """Open the active input buffer in an external editor."""
+        app = getattr(self, "_app", None)
+        if not app:
+            _cprint(f"{_DIM}External editor is only available inside the interactive CLI.{_RST}")
+            return False
+        if self._command_running:
+            _cprint(f"{_DIM}Wait for the current command to finish before opening the editor.{_RST}")
+            return False
+        if self._sudo_state or self._secret_state or self._approval_state or self._clarify_state:
+            _cprint(f"{_DIM}Finish the active prompt before opening the editor.{_RST}")
+            return False
+        target_buffer = buffer or getattr(app, "current_buffer", None)
+        if target_buffer is None:
+            _cprint(f"{_DIM}No active input buffer is available for the external editor.{_RST}")
+            return False
+        try:
+            existing_text = getattr(target_buffer, "text", "")
+            expanded_text = self._expand_paste_references(existing_text)
+            if expanded_text != existing_text and hasattr(target_buffer, "text"):
+                self._skip_paste_collapse = True
+                target_buffer.text = expanded_text
+                if hasattr(target_buffer, "cursor_position"):
+                    target_buffer.cursor_position = len(expanded_text)
+            # Set skip flag (again) so the text-change event fired when the
+            # editor closes does not re-collapse the returned content.
+            self._skip_paste_collapse = True
+            target_buffer.open_in_editor(validate_and_handle=False)
+            return True
+        except Exception as exc:
+            _cprint(f"{_DIM}Failed to open external editor: {exc}{_RST}")
+            return False
+
     def _ensure_runtime_credentials(self) -> bool:
         """
         Ensure runtime credentials are resolved before agent use.
@@ -2883,24 +3133,36 @@ class HermesCLI:
         return True
 
     def _resolve_turn_agent_config(self, user_message: str) -> dict:
-        """Resolve model/runtime overrides for a single user turn."""
-        from agent.smart_model_routing import resolve_turn_route
+        """Build the effective model/runtime config for a single user turn.
+
+        Always uses the session's primary model/provider.  If the user has
+        toggled `/fast` on and the current model supports Priority
+        Processing / Anthropic fast mode, attach `request_overrides` so the
+        API call is marked accordingly.
+        """
         from hermes_cli.models import resolve_fast_mode_overrides
 
-        route = resolve_turn_route(
-            user_message,
-            self._smart_model_routing,
-            {
-                "model": self.model,
-                "api_key": self.api_key,
-                "base_url": self.base_url,
-                "provider": self.provider,
-                "api_mode": self.api_mode,
-                "command": self.acp_command,
-                "args": list(self.acp_args or []),
-                "credential_pool": getattr(self, "_credential_pool", None),
-            },
-        )
+        runtime = {
+            "api_key": self.api_key,
+            "base_url": self.base_url,
+            "provider": self.provider,
+            "api_mode": self.api_mode,
+            "command": self.acp_command,
+            "args": list(self.acp_args or []),
+            "credential_pool": getattr(self, "_credential_pool", None),
+        }
+        route = {
+            "model": self.model,
+            "runtime": runtime,
+            "signature": (
+                self.model,
+                runtime["provider"],
+                runtime["base_url"],
+                runtime["api_mode"],
+                runtime["command"],
+                tuple(runtime["args"]),
+            ),
+        }
 
         service_tier = getattr(self, "service_tier", None)
         if not service_tier:
@@ -2908,13 +3170,13 @@ class HermesCLI:
             return route
 
         try:
-            overrides = resolve_fast_mode_overrides(route.get("model"))
+            overrides = resolve_fast_mode_overrides(route["model"])
         except Exception:
             overrides = None
         route["request_overrides"] = overrides
         return route
 
-    def _init_agent(self, *, model_override: str = None, runtime_override: dict = None, route_label: str = None, request_overrides: dict | None = None) -> bool:
+    def _init_agent(self, *, model_override: str = None, runtime_override: dict = None, request_overrides: dict | None = None) -> bool:
         """
         Initialize the agent on first use.
         When resuming a session, restores conversation history from SQLite.
@@ -3070,7 +3332,7 @@ class HermesCLI:
         use_compact = self.compact or term_width < 80
         
         if use_compact:
-            self.console.print(_build_compact_banner())
+            self._console_print(_build_compact_banner())
             self._show_status()
         else:
             # Get tools for display
@@ -3095,25 +3357,25 @@ class HermesCLI:
 
         # Warn about very low context lengths (common with local servers)
         if ctx_len and ctx_len <= 8192:
-            self.console.print()
-            self.console.print(
+            self._console_print()
+            self._console_print(
                 f"[yellow]⚠️  Context length is only {ctx_len:,} tokens — "
                 f"this is likely too low for agent use with tools.[/]"
             )
-            self.console.print(
+            self._console_print(
                 "[dim]   Hermes needs 16k–32k minimum. Tool schemas + system prompt alone use ~4k–8k.[/]"
             )
             base_url = getattr(self, "base_url", "") or ""
             if "11434" in base_url or "ollama" in base_url.lower():
-                self.console.print(
+                self._console_print(
                     "[dim]   Ollama fix: OLLAMA_CONTEXT_LENGTH=32768 ollama serve[/]"
                 )
             elif "1234" in base_url:
-                self.console.print(
+                self._console_print(
                     "[dim]   LM Studio fix: Set context length in model settings → reload model[/]"
                 )
             else:
-                self.console.print(
+                self._console_print(
                     "[dim]   Fix: Set model.context_length in config.yaml, or increase your server's context setting[/]"
                 )
 
@@ -3122,20 +3384,20 @@ class HermesCLI:
 
         model_name = getattr(self, "model", "") or ""
         if is_nous_hermes_non_agentic(model_name):
-            self.console.print()
-            self.console.print(
+            self._console_print()
+            self._console_print(
                 "[bold yellow]⚠  Nous Research Hermes 3 & 4 models are NOT agentic and are not "
                 "designed for use with Hermes Agent.[/]"
             )
-            self.console.print(
+            self._console_print(
                 "[dim]   They lack tool-calling capabilities required for agent workflows. "
                 "Consider using an agentic model (Claude, GPT, Gemini, DeepSeek, etc.).[/]"
             )
-            self.console.print(
+            self._console_print(
                 "[dim]   Switch with: /model sonnet  or  /model gpt5[/]"
             )
 
-        self.console.print()
+        self._console_print()
 
     def _preload_resumed_session(self) -> bool:
         """Load a resumed session's history from the DB early (before first chat).
@@ -3153,10 +3415,10 @@ class HermesCLI:
 
         session_meta = self._session_db.get_session(self.session_id)
         if not session_meta:
-            self.console.print(
+            self._console_print(
                 f"[bold red]Session not found: {self.session_id}[/]"
             )
-            self.console.print(
+            self._console_print(
                 "[dim]Use a session ID from a previous CLI run "
                 "(hermes sessions list).[/]"
             )
@@ -3171,7 +3433,7 @@ class HermesCLI:
             if session_meta.get("title"):
                 title_part = f' "{session_meta["title"]}"'
             accent_color = _accent_hex()
-            self.console.print(
+            self._console_print(
                 f"[{accent_color}]↻ Resumed session [bold]{self.session_id}[/bold]"
                 f"{title_part} "
                 f"({msg_count} user message{'s' if msg_count != 1 else ''}, "
@@ -3179,7 +3441,7 @@ class HermesCLI:
             )
         else:
             accent_color = _accent_hex()
-            self.console.print(
+            self._console_print(
                 f"[{accent_color}]Session {self.session_id} found but has no "
                 f"messages. Starting fresh.[/]"
             )
@@ -3354,7 +3616,7 @@ class HermesCLI:
             padding=(0, 1),
             style=_history_text_c,
         )
-        self.console.print(panel)
+        self._console_print(panel)
 
     def _try_attach_clipboard_image(self) -> bool:
         """Check clipboard for an image and attach it if found.
@@ -3725,7 +3987,6 @@ class HermesCLI:
         image later with ``vision_analyze`` if needed.
         """
         import asyncio as _asyncio
-        import json as _json
         from tools.vision_tools import vision_analyze_tool
 
         analysis_prompt = (
@@ -3745,7 +4006,7 @@ class HermesCLI:
                 result_json = _asyncio.run(
                     vision_analyze_tool(image_url=str(img_path), user_prompt=analysis_prompt)
                 )
-                result = _json.loads(result_json)
+                result = json.loads(result_json)
                 if result.get("success"):
                     description = result.get("analysis", "")
                     enriched_parts.append(
@@ -3790,14 +4051,14 @@ class HermesCLI:
             api_key_missing = [u for u in unavailable if u["missing_vars"]]
             
             if api_key_missing:
-                self.console.print()
-                self.console.print("[yellow]⚠️  Some tools disabled (missing API keys):[/]")
+                self._console_print()
+                self._console_print("[yellow]⚠️  Some tools disabled (missing API keys):[/]")
                 for item in api_key_missing:
                     tools_str = ", ".join(item["tools"][:2])  # Show first 2 tools
                     if len(item["tools"]) > 2:
                         tools_str += f", +{len(item['tools'])-2} more"
-                    self.console.print(f"   [dim]• {item['name']}[/] [dim italic]({', '.join(item['missing_vars'])})[/]")
-                self.console.print("[dim]   Run 'hermes setup' to configure[/]")
+                    self._console_print(f"   [dim]• {item['name']}[/] [dim italic]({', '.join(item['missing_vars'])})[/]")
+                self._console_print("[dim]   Run 'hermes setup' to configure[/]")
         except Exception:
             pass  # Don't crash on import errors
     
@@ -3835,7 +4096,7 @@ class HermesCLI:
         if self._provider_source:
             provider_info += f" [dim {separator_color}]·[/] [dim]auth: {self._provider_source}[/]"
 
-        self.console.print(
+        self._console_print(
             f"  {api_indicator} [{accent_color}]{model_short}[/] "
             f"[dim {separator_color}]·[/] [bold {label_color}]{tool_count} tools[/]"
             f"{toolsets_info}{provider_info}"
@@ -3892,7 +4153,7 @@ class HermesCLI:
             f"Tokens: {total_tokens:,}",
             f"Agent Running: {'Yes' if is_running else 'No'}",
         ])
-        self.console.print("\n".join(lines), highlight=False, markup=False)
+        self._console_print("\n".join(lines), highlight=False, markup=False)
     
     def _fast_command_available(self) -> bool:
         try:
@@ -3941,6 +4202,7 @@ class HermesCLI:
 
         _cprint(f"\n  {_DIM}Tip: Just type your message to chat with Hermes!{_RST}")
         _cprint(f"  {_DIM}Multi-line: Alt+Enter for a new line{_RST}")
+        _cprint(f"  {_DIM}Draft editor: Ctrl+G{_RST}")
         if _is_termux_environment():
             _cprint(f"  {_DIM}Attach image: /image {_termux_example_image_path()} or start your prompt with a local image path{_RST}\n")
         else:
@@ -3999,8 +4261,37 @@ class HermesCLI:
         """
         import shlex
         from argparse import Namespace
+        from contextlib import redirect_stdout
+        from io import StringIO
         from hermes_cli.tools_config import tools_disable_enable_command
 
+        def _run_capture(ns: Namespace) -> None:
+            """Run tools_disable_enable_command, routing its ANSI-colored
+            print() output through _cprint when inside the interactive TUI
+            so escapes aren't mangled by patch_stdout's StdoutProxy into
+            garbled '?[32m...?[0m' text.
+
+            Outside the TUI (standalone mode, tests), call straight through
+            so real stdout / pytest capture works as expected.
+            """
+            # Standalone/tests, run as usual
+            if getattr(self, "_app", None) is None:
+                tools_disable_enable_command(ns)
+                return
+
+            # Buffer reports isatty()=True so color() in hermes_cli/colors.py
+            # still emits ANSI escapes. StringIO.isatty() is False, which
+            # would otherwise strip all colors before we re-render them.
+            class _TTYBuf(StringIO):
+                def isatty(self) -> bool:
+                    return True
+
+            buf = _TTYBuf()
+            with redirect_stdout(buf):
+                tools_disable_enable_command(ns)
+            for line in buf.getvalue().splitlines():
+                _cprint(line)
+
         try:
             parts = shlex.split(cmd)
         except ValueError:
@@ -4012,8 +4303,7 @@ class HermesCLI:
             return
 
         if subcommand == "list":
-            tools_disable_enable_command(
-                Namespace(tools_action="list", platform="cli"))
+            _run_capture(Namespace(tools_action="list", platform="cli"))
             return
 
         names = parts[2:]
@@ -4030,8 +4320,7 @@ class HermesCLI:
         label = ", ".join(names)
         _cprint(f"{_ACCENT}{verb} {label}...{_RST}")
 
-        tools_disable_enable_command(
-            Namespace(tools_action=subcommand, names=names, platform="cli"))
+        _run_capture(Namespace(tools_action=subcommand, names=names, platform="cli"))
 
         # Reset session so the new tool config is picked up from a clean state
         from hermes_cli.tools_config import _get_platform_tools
@@ -4758,7 +5047,7 @@ class HermesCLI:
                 pass
 
         cache_enabled = (
-            ("openrouter" in (result.base_url or "").lower() and "claude" in result.new_model.lower())
+            (base_url_host_matches(result.base_url or "", "openrouter.ai") and "claude" in result.new_model.lower())
             or result.api_mode == "anthropic_messages"
         )
         if cache_enabled:
@@ -4986,7 +5275,7 @@ class HermesCLI:
 
         # Cache notice
         cache_enabled = (
-            ("openrouter" in (result.base_url or "").lower() and "claude" in result.new_model.lower())
+            (base_url_host_matches(result.base_url or "", "openrouter.ai") and "claude" in result.new_model.lower())
             or result.api_mode == "anthropic_messages"
         )
         if cache_enabled:
@@ -5017,6 +5306,30 @@ class HermesCLI:
         except Exception:
             return False
 
+    def _should_handle_steer_command_inline(self, text: str, has_images: bool = False) -> bool:
+        """Return True when /steer should be dispatched immediately while the agent is running.
+
+        /steer MUST bypass the normal _pending_input → process_loop path when
+        the agent is active, because process_loop is blocked inside
+        self.chat() for the duration of the run.  By the time the queued
+        command is pulled from _pending_input, _agent_running has already
+        flipped back to False, and process_command() takes the idle
+        fallback — delivering the steer as a next-turn message instead of
+        injecting it mid-run.  Dispatching inline on the UI thread calls
+        agent.steer() directly, which is thread-safe (uses _pending_steer_lock).
+        """
+        if not text or has_images or not _looks_like_slash_command(text):
+            return False
+        if not getattr(self, "_agent_running", False):
+            return False
+        try:
+            from hermes_cli.commands import resolve_command
+            base = text.split(None, 1)[0].lower().lstrip('/')
+            cmd = resolve_command(base)
+            return bool(cmd and cmd.name == "steer")
+        except Exception:
+            return False
+
     def _show_model_and_providers(self):
         """Show current model + provider and list all authenticated providers.
 
@@ -5090,8 +5403,15 @@ class HermesCLI:
 
         print("  To change model or provider, use: hermes model")
 
+    def _output_console(self):
+        """Use prompt_toolkit-safe Rich rendering once the TUI is live."""
+        if getattr(self, "_app", None):
+            return ChatConsole()
+        return self.console
 
-    
+    def _console_print(self, *args, **kwargs):
+        """Print through the active command-safe console."""
+        self._output_console().print(*args, **kwargs)
 
     @staticmethod
     def _resolve_personality_prompt(value) -> str:
@@ -5111,14 +5431,14 @@ class HermesCLI:
             from agent.google_oauth import get_valid_access_token, GoogleOAuthError, load_credentials
             from agent.google_code_assist import retrieve_user_quota, CodeAssistError
         except ImportError as exc:
-            self.console.print(f"  [red]Gemini modules unavailable: {exc}[/]")
+            self._console_print(f"  [red]Gemini modules unavailable: {exc}[/]")
             return
 
         try:
             access_token = get_valid_access_token()
         except GoogleOAuthError as exc:
-            self.console.print(f"  [yellow]{exc}[/]")
-            self.console.print("  Run [bold]/model[/] and pick 'Google Gemini (OAuth)' to sign in.")
+            self._console_print(f"  [yellow]{exc}[/]")
+            self._console_print("  Run [bold]/model[/] and pick 'Google Gemini (OAuth)' to sign in.")
             return
 
         creds = load_credentials()
@@ -5127,18 +5447,18 @@ class HermesCLI:
         try:
             buckets = retrieve_user_quota(access_token, project_id=project_id)
         except CodeAssistError as exc:
-            self.console.print(f"  [red]Quota lookup failed:[/] {exc}")
+            self._console_print(f"  [red]Quota lookup failed:[/] {exc}")
             return
 
         if not buckets:
-            self.console.print("  [dim]No quota buckets reported (account may be on legacy/unmetered tier).[/]")
+            self._console_print("  [dim]No quota buckets reported (account may be on legacy/unmetered tier).[/]")
             return
 
         # Sort for stable display, group by model
         buckets.sort(key=lambda b: (b.model_id, b.token_type))
-        self.console.print()
-        self.console.print(f"  [bold]Gemini Code Assist quota[/]  (project: {project_id or '(auto / free-tier)'})")
-        self.console.print()
+        self._console_print()
+        self._console_print(f"  [bold]Gemini Code Assist quota[/]  (project: {project_id or '(auto / free-tier)'})")
+        self._console_print()
         for b in buckets:
             pct = max(0.0, min(1.0, b.remaining_fraction))
             width = 20
@@ -5148,8 +5468,8 @@ class HermesCLI:
             header = b.model_id
             if b.token_type:
                 header += f" [{b.token_type}]"
-            self.console.print(f"    {header:40s}  {bar}  {pct_str}")
-        self.console.print()
+            self._console_print(f"    {header:40s}  {bar}  {pct_str}")
+        self._console_print()
 
     def _handle_personality_command(self, cmd: str):
         """Handle the /personality command to set predefined personalities."""
@@ -5280,7 +5600,7 @@ class HermesCLI:
             print("    /cron list")
             print('    /cron add "every 2h" "Check server status" [--skill blogwatcher]')
             print('    /cron edit <job_id> --schedule "every 4h" --prompt "New task"')
-            print("    /cron edit <job_id> --skill blogwatcher --skill find-nearby")
+            print("    /cron edit <job_id> --skill blogwatcher --skill maps")
             print("    /cron edit <job_id> --remove-skill blogwatcher")
             print("    /cron edit <job_id> --clear-skills")
             print("    /cron pause <job_id>")
@@ -5597,7 +5917,7 @@ class HermesCLI:
                         _tip_color = get_active_skin().get_color("banner_dim", "#B8860B")
                     except Exception:
                         _tip_color = "#B8860B"
-                    self.console.print(f"[dim {_tip_color}]✦ Tip: {_tip}[/]")
+                    self._console_print(f"[dim {_tip_color}]✦ Tip: {_tip}[/]")
                 except Exception:
                     pass
         elif canonical == "history":
@@ -5691,7 +6011,7 @@ class HermesCLI:
         elif canonical == "statusbar":
             self._status_bar_visible = not self._status_bar_visible
             state = "visible" if self._status_bar_visible else "hidden"
-            self.console.print(f"  Status bar {state}")
+            self._console_print(f"  Status bar {state}")
         elif canonical == "verbose":
             self._toggle_verbose()
         elif canonical == "yolo":
@@ -5814,15 +6134,15 @@ class HermesCLI:
                             )
                             output = result.stdout.strip() or result.stderr.strip()
                             if output:
-                                self.console.print(_rich_text_from_ansi(output))
+                                self._console_print(_rich_text_from_ansi(output))
                             else:
-                                self.console.print("[dim]Command returned no output[/]")
+                                self._console_print("[dim]Command returned no output[/]")
                         except subprocess.TimeoutExpired:
-                            self.console.print("[bold red]Quick command timed out (30s)[/]")
+                            self._console_print("[bold red]Quick command timed out (30s)[/]")
                         except Exception as e:
-                            self.console.print(f"[bold red]Quick command error: {e}[/]")
+                            self._console_print(f"[bold red]Quick command error: {e}[/]")
                     else:
-                        self.console.print(f"[bold red]Quick command '{base_cmd}' has no command defined[/]")
+                        self._console_print(f"[bold red]Quick command '{base_cmd}' has no command defined[/]")
                 elif qcmd.get("type") == "alias":
                     target = qcmd.get("target", "").strip()
                     if target:
@@ -5831,9 +6151,9 @@ class HermesCLI:
                         aliased_command = f"{target} {user_args}".strip()
                         return self.process_command(aliased_command)
                     else:
-                        self.console.print(f"[bold red]Quick command '{base_cmd}' has no target defined[/]")
+                        self._console_print(f"[bold red]Quick command '{base_cmd}' has no target defined[/]")
                 else:
-                    self.console.print(f"[bold red]Quick command '{base_cmd}' has unsupported type (supported: 'exec', 'alias')[/]")
+                    self._console_print(f"[bold red]Quick command '{base_cmd}' has unsupported type (supported: 'exec', 'alias')[/]")
             # Check for plugin-registered slash commands
             elif base_cmd.lstrip("/") in _get_plugin_cmd_handler_names():
                 from hermes_cli.plugins import get_plugin_command_handler
@@ -6012,8 +6332,7 @@ class HermesCLI:
                 # with the output (fixes #2718).
                 if self._app:
                     self._app.invalidate()
-                    import time as _tmod
-                    _tmod.sleep(0.05)  # brief pause for refresh
+                    time.sleep(0.05)  # brief pause for refresh
                 print()
                 ChatConsole().print(f"[{_accent_hex()}]{'─' * 40}[/]")
                 _cprint(f"  ✅ Background task #{task_num} complete")
@@ -6033,7 +6352,7 @@ class HermesCLI:
 
                     _chat_console = ChatConsole()
                     _chat_console.print(Panel(
-                        _rich_text_from_ansi(response),
+                        _render_final_assistant_content(response, mode=self.final_response_markdown),
                         title=f"[{_resp_color} bold]{label} (background #{task_num})[/]",
                         title_align="left",
                         border_style=_resp_color,
@@ -6053,8 +6372,7 @@ class HermesCLI:
                 # Same TUI refresh pattern as success path (#2718)
                 if self._app:
                     self._app.invalidate()
-                    import time as _tmod
-                    _tmod.sleep(0.05)
+                    time.sleep(0.05)
                 print()
                 _cprint(f"  ❌ Background task #{task_num} failed: {e}")
             finally:
@@ -6158,7 +6476,7 @@ class HermesCLI:
                         _resp_color = "#4F6D4A"
 
                     ChatConsole().print(Panel(
-                        _rich_text_from_ansi(response),
+                        _render_final_assistant_content(response, mode=self.final_response_markdown),
                         title=f"[{_resp_color} bold]⚕ /btw[/]",
                         title_align="left",
                         border_style=_resp_color,
@@ -6274,7 +6592,6 @@ class HermesCLI:
                 _launched = self._try_launch_chrome_debug(_port, _plat.system())
                 if _launched:
                     # Wait for the port to come up
-                    import time as _time
                     for _wait in range(10):
                         try:
                             s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
@@ -6284,7 +6601,7 @@ class HermesCLI:
                             _already_open = True
                             break
                         except (OSError, socket.timeout):
-                            _time.sleep(0.5)
+                            time.sleep(0.5)
                     if _already_open:
                         print(f"   ✓ Chrome launched and listening on port {_port}")
                     else:
@@ -6650,6 +6967,18 @@ class HermesCLI:
                 focus_topic=focus_topic or None,
             )
             self.conversation_history = compressed
+            # _compress_context ends the old session and creates a new child
+            # session on the agent (run_agent.py::_compress_context). Sync the
+            # CLI's session_id so /status, /resume, exit summary, and title
+            # generation all point at the live continuation session, not the
+            # ended parent. Without this, subsequent end_session() calls target
+            # the already-closed parent and the child is orphaned.
+            if (
+                getattr(self.agent, "session_id", None)
+                and self.agent.session_id != self.session_id
+            ):
+                self.session_id = self.agent.session_id
+                self._pending_title = None
             new_tokens = estimate_messages_tokens_rough(self.conversation_history)
             summary = summarize_manual_compression(
                 original_history,
@@ -6752,6 +7081,27 @@ class HermesCLI:
         if cost_result.status == "unknown":
             print(f"  Note:             Pricing unknown for {agent.model}")
 
+        # Account limits -- fetched off-thread with a hard timeout so slow
+        # provider APIs don't hang the prompt.
+        provider = getattr(agent, "provider", None) or getattr(self, "provider", None)
+        base_url = getattr(agent, "base_url", None) or getattr(self, "base_url", None)
+        api_key = getattr(agent, "api_key", None) or getattr(self, "api_key", None)
+        account_snapshot = None
+        if provider:
+            with concurrent.futures.ThreadPoolExecutor(max_workers=1) as _pool:
+                try:
+                    account_snapshot = _pool.submit(
+                        fetch_account_usage, provider,
+                        base_url=base_url, api_key=api_key,
+                    ).result(timeout=10.0)
+                except (concurrent.futures.TimeoutError, Exception):
+                    account_snapshot = None
+        account_lines = [f"  {line}" for line in render_account_usage_lines(account_snapshot)]
+        if account_lines:
+            print()
+            for line in account_lines:
+                print(line)
+
         if self.verbose:
             logging.getLogger().setLevel(logging.DEBUG)
             for noisy in ('openai', 'openai._base_client', 'httpx', 'httpcore', 'asyncio', 'hpack', 'grpc', 'modal'):
@@ -6802,7 +7152,6 @@ class HermesCLI:
         known state.  When a change is detected, triggers _reload_mcp() and
         informs the user so they know the tool list has been refreshed.
         """
-        import time
         import yaml as _yaml
 
         CONFIG_WATCH_INTERVAL = 5.0  # seconds between config.yaml stat() calls
@@ -6894,7 +7243,6 @@ class HermesCLI:
 
             # Refresh the agent's tool list so the model can call new tools
             if self.agent is not None:
-                from model_tools import get_tool_definitions
                 self.agent.tools = get_tool_definitions(
                     enabled_toolsets=self.agent.enabled_toolsets
                     if hasattr(self.agent, "enabled_toolsets") else None,
@@ -6977,7 +7325,6 @@ class HermesCLI:
         full history of tool calls (not just the current one in the spinner).
         """
         if event_type == "tool.completed":
-            import time as _time
             self._tool_start_time = 0.0
             # Print stacked scrollback line for "all" / "new" modes
             if function_name and self.tool_progress_mode in ("all", "new"):
@@ -7006,7 +7353,6 @@ class HermesCLI:
         if event_type != "tool.started":
             return
         if function_name and not function_name.startswith("_"):
-            import time as _time
             from agent.display import get_tool_emoji
             emoji = get_tool_emoji(function_name)
             label = preview or function_name
@@ -7015,7 +7361,7 @@ class HermesCLI:
             if _pl > 0 and len(label) > _pl:
                 label = label[:_pl - 3] + "..."
             self._spinner_text = f"{emoji} {label}"
-            self._tool_start_time = _time.monotonic()
+            self._tool_start_time = time.monotonic()
             # Store args for stacked scrollback line on completion
             self._pending_tool_info.setdefault(function_name, []).append(
                 function_args if function_args is not None else {}
@@ -7132,11 +7478,12 @@ class HermesCLI:
             self._voice_stop_and_transcribe()
 
         # Audio cue: single beep BEFORE starting stream (avoid CoreAudio conflict)
-        try:
-            from tools.voice_mode import play_beep
-            play_beep(frequency=880, count=1)
-        except Exception:
-            pass
+        if self._voice_beeps_enabled():
+            try:
+                from tools.voice_mode import play_beep
+                play_beep(frequency=880, count=1)
+            except Exception:
+                pass
 
         try:
             self._voice_recorder.start(on_silence_stop=_on_silence)
@@ -7184,11 +7531,12 @@ class HermesCLI:
             wav_path = self._voice_recorder.stop()
 
             # Audio cue: double beep after stream stopped (no CoreAudio conflict)
-            try:
-                from tools.voice_mode import play_beep
-                play_beep(frequency=660, count=2)
-            except Exception:
-                pass
+            if self._voice_beeps_enabled():
+                try:
+                    from tools.voice_mode import play_beep
+                    play_beep(frequency=660, count=2)
+                except Exception:
+                    pass
 
             if wav_path is None:
                 _cprint(f"{_DIM}No speech detected.{_RST}")
@@ -7271,7 +7619,6 @@ class HermesCLI:
         try:
             from tools.tts_tool import text_to_speech_tool
             from tools.voice_mode import play_audio_file
-            import re
 
             # Strip markdown and non-speech content for cleaner TTS
             tts_text = text[:4000] if len(text) > 4000 else text
@@ -7339,6 +7686,17 @@ class HermesCLI:
             _cprint(f"Unknown voice subcommand: {subcommand}")
             _cprint("Usage: /voice [on|off|tts|status]")
 
+    def _voice_beeps_enabled(self) -> bool:
+        """Return whether CLI voice mode should play record start/stop beeps."""
+        try:
+            from hermes_cli.config import load_config
+            voice_cfg = load_config().get("voice", {})
+            if isinstance(voice_cfg, dict):
+                return bool(voice_cfg.get("beep_enabled", True))
+        except Exception:
+            pass
+        return True
+
     def _enable_voice_mode(self):
         """Enable voice mode after checking requirements."""
         if self._voice_mode:
@@ -7648,7 +8006,9 @@ class HermesCLI:
             return
 
         selected = state.get("selected", 0)
-        choices = state.get("choices") or []
+        choices = state.get("choices")
+        if not isinstance(choices, list):
+            choices = []
         if not (0 <= selected < len(choices)):
             return
 
@@ -7740,8 +8100,18 @@ class HermesCLI:
         choice_wrapped: list[tuple[int, str]] = []
         for i, choice in enumerate(choices):
             label = choice_labels.get(choice, choice)
-            prefix = '❯ ' if i == selected else '  '
-            for wrapped in _wrap_panel_text(f"{prefix}{label}", inner_text_width, subsequent_indent="  "):
+            # Show number prefix for quick selection (1-9 for items 1-9, 0 for 10th item)
+            if i < 9:
+                num_prefix = str(i + 1)
+            elif i == 9:
+                num_prefix = '0'
+            else:
+                num_prefix = ' '  # No number for items beyond 10th
+            if i == selected:
+                prefix = f'❯ {num_prefix}. '
+            else:
+                prefix = f'  {num_prefix}. '
+            for wrapped in _wrap_panel_text(f"{prefix}{label}", inner_text_width, subsequent_indent="    "):
                 choice_wrapped.append((i, wrapped))
 
         # Budget vertical space so HSplit never clips the command or choices.
@@ -7904,7 +8274,6 @@ class HermesCLI:
         if not self._init_agent(
             model_override=turn_route["model"],
             runtime_override=turn_route["runtime"],
-            route_label=turn_route["label"],
             request_overrides=turn_route.get("request_overrides"),
         ):
             return None
@@ -8033,6 +8402,17 @@ class HermesCLI:
 
             def run_agent():
                 nonlocal result
+                # Set callbacks inside the agent thread so thread-local storage
+                # in terminal_tool is populated for this thread.  The main thread
+                # registration (run() line ~9046) is invisible here because
+                # _callback_tls is threading.local().  Matches the pattern used
+                # by acp_adapter/server.py for ACP sessions.
+                set_sudo_password_callback(self._sudo_password_callback)
+                set_approval_callback(self._approval_callback)
+                try:
+                    set_secret_capture_callback(self._secret_capture_callback)
+                except Exception:
+                    pass
                 agent_message = _voice_prefix + message if _voice_prefix else message
                 # Prepend pending model switch note so the model knows about the switch
                 _msn = getattr(self, '_pending_model_switch_note', None)
@@ -8058,10 +8438,23 @@ class HermesCLI:
                         "failed": True,
                         "error": _summary,
                     }
+                finally:
+                    # Clear thread-local callbacks so a reused thread doesn't
+                    # hold stale references to a disposed CLI instance.
+                    try:
+                        set_sudo_password_callback(None)
+                        set_approval_callback(None)
+                        set_secret_capture_callback(None)
+                    except Exception:
+                        pass
 
             # Start agent in background thread (daemon so it cannot keep the
             # process alive when the user closes the terminal tab — SIGHUP
             # exits the main thread and daemon threads are reaped automatically).
+            # Start per-prompt elapsed timer — frozen after the agent thread
+            # finishes; reset on the next turn.
+            self._prompt_start_time = time.time()
+            self._prompt_duration = 0.0
             agent_thread = threading.Thread(target=run_agent, daemon=True)
             agent_thread.start()
 
@@ -8091,8 +8484,7 @@ class HermesCLI:
                             try:
                                 _dbg = _hermes_home / "interrupt_debug.log"
                                 with open(_dbg, "a") as _f:
-                                    import time as _t
-                                    _f.write(f"{_t.strftime('%H:%M:%S')} interrupt fired: msg={str(interrupt_msg)[:60]!r}, "
+                                    _f.write(f"{time.strftime('%H:%M:%S')} interrupt fired: msg={str(interrupt_msg)[:60]!r}, "
                                              f"children={len(self.agent._active_children)}, "
                                              f"parent._interrupt={self.agent._interrupt_requested}\n")
                                     for _ci, _ch in enumerate(self.agent._active_children):
@@ -8139,6 +8531,12 @@ class HermesCLI:
                 # but guard against edge cases.
                 agent_thread.join(timeout=30)
 
+            # Freeze per-prompt elapsed timer once the agent thread has
+            # exited (or been abandoned as a daemon after interrupt).
+            if self._prompt_start_time is not None:
+                self._prompt_duration = max(0.0, time.time() - self._prompt_start_time)
+                self._prompt_start_time = None
+
             # Proactively clean up async clients whose event loop is dead.
             # The agent thread may have created AsyncOpenAI clients bound
             # to a per-thread event loop; if that loop is now closed, those
@@ -8162,13 +8560,26 @@ class HermesCLI:
             # buffer so tool/status lines render ABOVE our response box.
             # The flush pushes data into the renderer queue; the short
             # sleep lets the renderer actually paint it before we draw.
-            import time as _time
             sys.stdout.flush()
-            _time.sleep(0.15)
+            time.sleep(0.15)
 
             # Update history with full conversation
             self.conversation_history = result.get("messages", self.conversation_history) if result else self.conversation_history
 
+            # If auto-compression fired mid-turn, the agent created a new
+            # continuation session and mutated self.agent.session_id. Sync
+            # the CLI's session_id so /status, /resume, title generation,
+            # and the exit summary all target the live child session rather
+            # than the ended parent. Mirrors the gateway's post-run sync
+            # (gateway/run.py around line 9983).
+            if (
+                self.agent
+                and getattr(self.agent, "session_id", None)
+                and self.agent.session_id != self.session_id
+            ):
+                self.session_id = self.agent.session_id
+                self._pending_title = None
+
             # Get the final response
             response = result.get("final_response", "") if result else ""
 
@@ -8258,7 +8669,7 @@ class HermesCLI:
                 else:
                     _chat_console = ChatConsole()
                     _chat_console.print(Panel(
-                        _rich_text_from_ansi(response),
+                        _render_final_assistant_content(response, mode=self.final_response_markdown),
                         title=f"[{_resp_color} bold]{label}[/]",
                         title_align="left",
                         border_style=_resp_color,
@@ -8603,7 +9014,7 @@ class HermesCLI:
         except Exception:
             _welcome_text = "Welcome to Hermes Agent! Type your message or /help for commands."
             _welcome_color = "#FFF8DC"
-        self.console.print(f"[{_welcome_color}]{_welcome_text}[/]")
+        self._console_print(f"[{_welcome_color}]{_welcome_text}[/]")
         # Show a random tip to help users discover features
         try:
             from hermes_cli.tips import get_random_tip
@@ -8612,16 +9023,16 @@ class HermesCLI:
                 _tip_color = _welcome_skin.get_color("banner_dim", "#B8860B")
             except Exception:
                 _tip_color = "#B8860B"
-            self.console.print(f"[dim {_tip_color}]✦ Tip: {_tip}[/]")
+            self._console_print(f"[dim {_tip_color}]✦ Tip: {_tip}[/]")
         except Exception:
             pass  # Tips are non-critical — never break startup
         if self.preloaded_skills and not self._startup_skills_line_shown:
             skills_label = ", ".join(self.preloaded_skills)
-            self.console.print(
+            self._console_print(
                 f"[bold {_accent_hex()}]Activated skills:[/] {skills_label}"
             )
             self._startup_skills_line_shown = True
-        self.console.print()
+        self._console_print()
         
         # State for async operation
         self._agent_running = False
@@ -8787,6 +9198,17 @@ class HermesCLI:
                     event.app.current_buffer.reset(append_to_history=True)
                     return
 
+                # Handle /steer while the agent is running immediately on the
+                # UI thread.  Queuing through _pending_input would deadlock the
+                # steer until after the agent loop finishes (process_loop is
+                # blocked inside self.chat()), which turns /steer into a
+                # post-run next-turn message — defeating mid-run injection.
+                # agent.steer() is thread-safe (holds _pending_steer_lock).
+                if self._should_handle_steer_command_inline(text, has_images=has_images):
+                    self.process_command(text)
+                    event.app.current_buffer.reset(append_to_history=True)
+                    return
+
                 # Snapshot and clear attached images
                 images = list(self._attached_images)
                 self._attached_images.clear()
@@ -8805,8 +9227,7 @@ class HermesCLI:
                         try:
                             _dbg = _hermes_home / "interrupt_debug.log"
                             with open(_dbg, "a") as _f:
-                                import time as _t
-                                _f.write(f"{_t.strftime('%H:%M:%S')} ENTER: queued interrupt msg={str(payload)[:60]!r}, "
+                                _f.write(f"{time.strftime('%H:%M:%S')} ENTER: queued interrupt msg={str(payload)[:60]!r}, "
                                          f"agent_running={self._agent_running}\n")
                         except Exception:
                             pass
@@ -8824,6 +9245,16 @@ class HermesCLI:
             """Ctrl+Enter (c-j) inserts a newline. Most terminals send c-j for Ctrl+Enter."""
             event.current_buffer.insert_text('\n')
 
+        @kb.add(
+            'c-g',
+            filter=Condition(
+                lambda: not self._clarify_state and not self._approval_state and not self._sudo_state and not self._secret_state
+            ),
+        )
+        def handle_open_in_editor(event):
+            """Ctrl+G opens the current draft in an external editor."""
+            cli_ref._open_external_editor(event.current_buffer)
+
         @kb.add('tab', eager=True)
         def handle_tab(event):
             """Tab: accept completion, auto-suggestion, or start completions.
@@ -8875,6 +9306,29 @@ class HermesCLI:
                 self._clarify_state["selected"] = min(max_idx, self._clarify_state["selected"] + 1)
                 event.app.invalidate()
 
+        # Number keys for quick clarify selection (1-9, 0 for 10th item)
+        def _make_clarify_number_handler(idx):
+            def handler(event):
+                if self._clarify_state and not self._clarify_freetext:
+                    choices = self._clarify_state.get("choices") or []
+                    # Map index to choice (treating "Other" as the last option)
+                    if idx < len(choices):
+                        # Select a numbered choice
+                        self._clarify_state["response_queue"].put(choices[idx])
+                        self._clarify_state = None
+                        self._clarify_freetext = False
+                        event.app.invalidate()
+                    elif idx == len(choices):
+                        # Select "Other" option
+                        self._clarify_freetext = True
+                        event.app.invalidate()
+            return handler
+
+        for _num in range(10):
+            # 1-9 select items 0-8, 0 selects item 9 (10thitem)
+            _idx = 9 if _num == 0 else _num - 1
+            kb.add(str(_num), filter=Condition(lambda: bool(self._clarify_state) and not self._clarify_freetext))(_make_clarify_number_handler(_idx))
+
         # --- Dangerous command approval: arrow-key navigation ---
 
         @kb.add('up', filter=Condition(lambda: bool(self._approval_state)))
@@ -8916,6 +9370,20 @@ class HermesCLI:
             event.app.current_buffer.reset()
             event.app.invalidate()
 
+        # Number keys for quick approval selection (1-9, 0 for 10th item)
+        def _make_approval_number_handler(idx):
+            def handler(event):
+                if self._approval_state and idx < len(self._approval_state["choices"]):
+                    self._approval_state["selected"] = idx
+                    self._handle_approval_selection()
+                    event.app.invalidate()
+            return handler
+
+        for _num in range(10):
+            # 1-9 select items 0-8, 0 selects item 9 (10th item)
+            _idx = 9 if _num == 0 else _num - 1
+            kb.add(str(_num), filter=Condition(lambda: bool(self._approval_state)))(_make_approval_number_handler(_idx))
+
         # --- History navigation: up/down browse history in normal input mode ---
         # The TextArea is multiline, so by default up/down only move the cursor.
         # Buffer.auto_up/auto_down handle both: cursor movement when multi-line,
@@ -8944,8 +9412,7 @@ class HermesCLI:
             2. Interrupt the running agent (first press)
             3. Force exit (second press within 2s, or when idle)
             """
-            import time as _time
-            now = _time.time()
+            now = time.time()
 
             # Cancel active voice recording.
             # Run cancel() in a background thread to prevent blocking the
@@ -9053,12 +9520,11 @@ class HermesCLI:
         @kb.add('c-z')
         def handle_ctrl_z(event):
             """Handle Ctrl+Z - suspend process to background (Unix only)."""
-            import sys
             if sys.platform == 'win32':
                 _cprint(f"\n{_DIM}Suspend (Ctrl+Z) is not supported on Windows.{_RST}")
                 event.app.invalidate()
                 return
-            import os, signal as _sig
+            import signal as _sig
             from prompt_toolkit.application import run_in_terminal
             from hermes_cli.skin_engine import get_active_skin
             agent_name = get_active_skin().get_branding("agent_name", "Hermes Agent")
@@ -9275,6 +9741,7 @@ class HermesCLI:
         _prev_text_len = [0]
         _prev_newline_count = [0]
         _paste_just_collapsed = [False]
+        self._skip_paste_collapse = False
 
         def _on_text_changed(buf):
             """Detect large pastes and collapse them to a file reference.
@@ -9294,8 +9761,9 @@ class HermesCLI:
             text = buf.text
             chars_added = len(text) - _prev_text_len[0]
             _prev_text_len[0] = len(text)
-            if _paste_just_collapsed[0]:
+            if _paste_just_collapsed[0] or self._skip_paste_collapse:
                 _paste_just_collapsed[0] = False
+                self._skip_paste_collapse = False
                 _prev_newline_count[0] = text.count('\n')
                 return
             line_count = text.count('\n')
@@ -9304,12 +9772,10 @@ class HermesCLI:
             is_paste = chars_added > 1 or newlines_added >= 4
             if line_count >= 5 and is_paste and not text.startswith('/'):
                 _paste_counter[0] += 1
-                # Save to temp file
                 paste_dir = _hermes_home / "pastes"
                 paste_dir.mkdir(parents=True, exist_ok=True)
                 paste_file = paste_dir / f"paste_{_paste_counter[0]}_{datetime.now().strftime('%H%M%S')}.txt"
                 paste_file.write_text(text, encoding="utf-8")
-                # Replace buffer with compact reference
                 _paste_just_collapsed[0] = True
                 buf.text = f"[Pasted text #{_paste_counter[0]}: {line_count + 1} lines \u2192 {paste_file}]"
                 buf.cursor_position = len(buf.text)
@@ -9372,31 +9838,29 @@ class HermesCLI:
         # extra instructions (sudo countdown, approval navigation, clarify).
         # The agent-running interrupt hint is now an inline placeholder above.
         def get_hint_text():
-            import time as _time
-
             if cli_ref._sudo_state:
-                remaining = max(0, int(cli_ref._sudo_deadline - _time.monotonic()))
+                remaining = max(0, int(cli_ref._sudo_deadline - time.monotonic()))
                 return [
                     ('class:hint', '  password hidden · Enter to skip'),
                     ('class:clarify-countdown', f'  ({remaining}s)'),
                 ]
 
             if cli_ref._secret_state:
-                remaining = max(0, int(cli_ref._secret_deadline - _time.monotonic()))
+                remaining = max(0, int(cli_ref._secret_deadline - time.monotonic()))
                 return [
                     ('class:hint', '  secret hidden · Enter to skip'),
                     ('class:clarify-countdown', f'  ({remaining}s)'),
                 ]
 
             if cli_ref._approval_state:
-                remaining = max(0, int(cli_ref._approval_deadline - _time.monotonic()))
+                remaining = max(0, int(cli_ref._approval_deadline - time.monotonic()))
                 return [
                     ('class:hint', '  ↑/↓ to select, Enter to confirm'),
                     ('class:clarify-countdown', f'  ({remaining}s)'),
                 ]
 
             if cli_ref._clarify_state:
-                remaining = max(0, int(cli_ref._clarify_deadline - _time.monotonic()))
+                remaining = max(0, int(cli_ref._clarify_deadline - time.monotonic()))
                 countdown = f'  ({remaining}s)' if cli_ref._clarify_deadline else ''
                 if cli_ref._clarify_freetext:
                     return [
@@ -9488,14 +9952,32 @@ class HermesCLI:
             selected = state.get("selected", 0)
             preview_lines = _wrap_panel_text(question, 60)
             for i, choice in enumerate(choices):
-                prefix = "❯ " if i == selected and not cli_ref._clarify_freetext else "  "
-                preview_lines.extend(_wrap_panel_text(f"{prefix}{choice}", 60, subsequent_indent="  "))
+                # Show number prefix for quick selection (1-9 for items 1-9, 0 for 10th item)
+                if i < 9:
+                    num_prefix = str(i + 1)
+                elif i == 9:
+                    num_prefix = '0'
+                else:
+                    num_prefix = ' '
+                if i == selected and not cli_ref._clarify_freetext:
+                    prefix = f"❯ {num_prefix}. "
+                else:
+                    prefix = f"  {num_prefix}. "
+                preview_lines.extend(_wrap_panel_text(f"{prefix}{choice}", 60, subsequent_indent="    "))
+            # "Other" option in preview
+            other_num = len(choices) + 1
+            if other_num < 10:
+                other_num_prefix = str(other_num)
+            elif other_num == 10:
+                other_num_prefix = '0'
+            else:
+                other_num_prefix = ' '
             other_label = (
-                "❯ Other (type below)" if cli_ref._clarify_freetext
-                else "❯ Other (type your answer)" if selected == len(choices)
-                else "  Other (type your answer)"
+                f"❯ {other_num_prefix}. Other (type below)" if cli_ref._clarify_freetext
+                else f"❯ {other_num_prefix}. Other (type your answer)" if selected == len(choices)
+                else f"  {other_num_prefix}. Other (type your answer)"
             )
-            preview_lines.extend(_wrap_panel_text(other_label, 60, subsequent_indent="  "))
+            preview_lines.extend(_wrap_panel_text(other_label, 60, subsequent_indent="    "))
             box_width = _panel_box_width("Hermes needs your input", preview_lines)
             inner_text_width = max(8, box_width - 2)
 
@@ -9503,18 +9985,35 @@ class HermesCLI:
             choice_wrapped: list[tuple[int, str]] = []
             if choices:
                 for i, choice in enumerate(choices):
-                    prefix = '❯ ' if i == selected and not cli_ref._clarify_freetext else '  '
-                    for wrapped in _wrap_panel_text(f"{prefix}{choice}", inner_text_width, subsequent_indent="  "):
+                    # Show number prefix for quick selection (1-9 for items 1-9, 0 for 10th item)
+                    if i < 9:
+                        num_prefix = str(i + 1)
+                    elif i == 9:
+                        num_prefix = '0'
+                    else:
+                        num_prefix = ' '
+                    if i == selected and not cli_ref._clarify_freetext:
+                        prefix = f'❯ {num_prefix}. '
+                    else:
+                        prefix = f'  {num_prefix}. '
+                    for wrapped in _wrap_panel_text(f"{prefix}{choice}", inner_text_width, subsequent_indent="    "):
                         choice_wrapped.append((i, wrapped))
                 # Trailing Other row(s)
                 other_idx = len(choices)
-                if selected == other_idx and not cli_ref._clarify_freetext:
-                    other_label_mand = '❯ Other (type your answer)'
-                elif cli_ref._clarify_freetext:
-                    other_label_mand = '❯ Other (type below)'
+                other_num = other_idx + 1
+                if other_num < 10:
+                    other_num_prefix = str(other_num)
+                elif other_num == 10:
+                    other_num_prefix = '0'
                 else:
-                    other_label_mand = '  Other (type your answer)'
-                other_wrapped = _wrap_panel_text(other_label_mand, inner_text_width, subsequent_indent="  ")
+                    other_num_prefix = ' '
+                if selected == other_idx and not cli_ref._clarify_freetext:
+                    other_label_mand = f'❯ {other_num_prefix}. Other (type your answer)'
+                elif cli_ref._clarify_freetext:
+                    other_label_mand = f'❯ {other_num_prefix}. Other (type below)'
+                else:
+                    other_label_mand = f'  {other_num_prefix}. Other (type your answer)'
+                other_wrapped = _wrap_panel_text(other_label_mand, inner_text_width, subsequent_indent="    ")
             elif cli_ref._clarify_freetext:
                 # Freetext-only mode: the guidance line takes the place of choices.
                 other_wrapped = _wrap_panel_text(
@@ -9579,6 +10078,15 @@ class HermesCLI:
 
                 # "Other" option (trailing row(s), only shown when choices exist)
                 other_idx = len(choices)
+                # Calculate number prefix for "Other" option
+                other_num = other_idx + 1
+                if other_num < 10:
+                    other_num_prefix = str(other_num)
+                elif other_num == 10:
+                    other_num_prefix = '0'
+                else:
+                    other_num_prefix = ' '
+                
                 if selected == other_idx and not cli_ref._clarify_freetext:
                     other_style = 'class:clarify-selected'
                 elif cli_ref._clarify_freetext:
@@ -9686,7 +10194,8 @@ class HermesCLI:
             if stage == "provider":
                 title = "⚙ Model Picker — Select Provider"
                 choices = []
-                for p in state.get("providers") or []:
+                _providers = state.get("providers")
+                for p in _providers if isinstance(_providers, list) else []:
                     count = p.get("total_models", len(p.get("models", [])))
                     label = f"{p['name']} ({count} model{'s' if count != 1 else ''})"
                     if p.get("is_current"):
@@ -9943,22 +10452,20 @@ class HermesCLI:
         app._on_resize = _resize_clear_ghosts
 
         def spinner_loop():
-            import time as _time
-
             last_idle_refresh = 0.0
             while not self._should_exit:
                 if not self._app:
-                    _time.sleep(0.1)
+                    time.sleep(0.1)
                     continue
                 if self._command_running:
                     self._invalidate(min_interval=0.1)
-                    _time.sleep(0.1)
+                    time.sleep(0.1)
                 else:
-                    now = _time.monotonic()
+                    now = time.monotonic()
                     if now - last_idle_refresh >= 1.0:
                         last_idle_refresh = now
                         self._invalidate(min_interval=1.0)
-                    _time.sleep(0.2)
+                    time.sleep(0.2)
 
         spinner_thread = threading.Thread(target=spinner_loop, daemon=True)
         spinner_thread.start()
@@ -10027,49 +10534,12 @@ class HermesCLI:
                         continue
                     
                     # Expand paste references back to full content
-                    import re as _re
-                    _paste_ref_re = _re.compile(r'\[Pasted text #\d+: \d+ lines \u2192 (.+?)\]')
+                    _paste_ref_re = re.compile(r'\[Pasted text #\d+: \d+ lines \u2192 (.+?)\]')
                     paste_refs = list(_paste_ref_re.finditer(user_input)) if isinstance(user_input, str) else []
                     if paste_refs:
-                        def _expand_ref(m):
-                            p = Path(m.group(1))
-                            return p.read_text(encoding="utf-8") if p.exists() else m.group(0)
-                        expanded = _paste_ref_re.sub(_expand_ref, user_input)
-                        total_lines = expanded.count('\n') + 1
-                        n_pastes = len(paste_refs)
-                        _user_bar = f"[{_accent_hex()}]{'─' * 40}[/]"
-                        print()
-                        ChatConsole().print(_user_bar)
-                        # Show any surrounding user text alongside the paste summary
-                        split_parts = _paste_ref_re.split(user_input)
-                        visible_user_text = " ".join(
-                            split_parts[i].strip() for i in range(0, len(split_parts), 2) if split_parts[i].strip()
-                        )
-                        if visible_user_text:
-                            ChatConsole().print(
-                                f"[bold {_accent_hex()}]\u25cf[/] [bold]{_escape(visible_user_text)}[/] "
-                                f"[dim]({n_pastes} pasted block{'s' if n_pastes > 1 else ''}, {total_lines} lines total)[/]"
-                            )
-                        else:
-                            ChatConsole().print(
-                                f"[bold {_accent_hex()}]\u25cf[/] [bold]{_escape(f'[Pasted text: {total_lines} lines]')}[/]"
-                            )
-                        user_input = expanded
-                    else:
-                        _user_bar = f"[{_accent_hex()}]{'─' * 40}[/]"
-                        if '\n' in user_input:
-                            first_line = user_input.split('\n')[0]
-                            line_count = user_input.count('\n') + 1
-                            print()
-                            ChatConsole().print(_user_bar)
-                            ChatConsole().print(
-                                f"[bold {_accent_hex()}]●[/] [bold]{_escape(first_line)}[/] "
-                                f"[dim](+{line_count - 1} lines)[/]"
-                            )
-                        else:
-                            print()
-                            ChatConsole().print(_user_bar)
-                            ChatConsole().print(f"[bold {_accent_hex()}]●[/] [bold]{_escape(user_input)}[/]")
+                        user_input = self._expand_paste_references(user_input)
+                    print()
+                    self._print_user_message_preview(user_input)
                     
                     # Show image attachment count
                     if submit_images:
@@ -10156,13 +10626,12 @@ class HermesCLI:
             try:
                 if getattr(self, "agent", None) and getattr(self, "_agent_running", False):
                     self.agent.interrupt(f"received signal {signum}")
-                    import time as _t
                     try:
                         _grace = float(os.getenv("HERMES_SIGTERM_GRACE", "1.5"))
                     except (TypeError, ValueError):
                         _grace = 1.5
                     if _grace > 0:
-                        _t.sleep(_grace)
+                        time.sleep(_grace)
             except Exception:
                 pass  # never block signal handling
             raise KeyboardInterrupt()
@@ -10195,8 +10664,7 @@ class HermesCLI:
         # uv-managed Python, fd 0 can be invalid or unregisterable with the
         # asyncio selector, causing "KeyError: '0 is not registered'" (#6393).
         try:
-            import os as _os
-            _os.fstat(0)
+            os.fstat(0)
         except OSError:
             print(
                 "Error: stdin (fd 0) is not available.\n"
@@ -10489,13 +10957,12 @@ def main(
             _agent = getattr(cli, "agent", None)
             if _agent is not None:
                 _agent.interrupt(f"received signal {signum}")
-                import time as _t
                 try:
                     _grace = float(os.getenv("HERMES_SIGTERM_GRACE", "1.5"))
                 except (TypeError, ValueError):
                     _grace = 1.5
                 if _grace > 0:
-                    _t.sleep(_grace)
+                    time.sleep(_grace)
         except Exception:
             pass  # never block signal handling
         raise KeyboardInterrupt()
@@ -10528,7 +10995,6 @@ def main(
                 if cli._init_agent(
                     model_override=turn_route["model"],
                     runtime_override=turn_route["runtime"],
-                    route_label=turn_route["label"],
                     request_overrides=turn_route.get("request_overrides"),
                 ):
                     cli.agent.quiet_mode = True
@@ -10542,6 +11008,15 @@ def main(
                         user_message=effective_query,
                         conversation_history=cli.conversation_history,
                     )
+                    # Sync session_id if mid-run compression created a
+                    # continuation session. The exit line below reports
+                    # session_id to stderr for automation wrappers; without
+                    # this sync it would point at the ended parent.
+                    if (
+                        getattr(cli.agent, "session_id", None)
+                        and cli.agent.session_id != cli.session_id
+                    ):
+                        cli.session_id = cli.agent.session_id
                     response = result.get("final_response", "") if isinstance(result, dict) else str(result)
                     if response:
                         print(response)
diff --git a/cron/jobs.py b/cron/jobs.py
index 06d782888f..8fb3f868a9 100644
--- a/cron/jobs.py
+++ b/cron/jobs.py
@@ -9,6 +9,7 @@ import copy
 import json
 import logging
 import tempfile
+import threading
 import os
 import re
 import uuid
@@ -34,6 +35,11 @@ except ImportError:
 HERMES_DIR = get_hermes_home().resolve()
 CRON_DIR = HERMES_DIR / "cron"
 JOBS_FILE = CRON_DIR / "jobs.json"
+
+# In-process lock protecting load_jobs→modify→save_jobs cycles.
+# Required when tick() runs jobs in parallel threads — without this,
+# concurrent mark_job_run / advance_next_run calls can clobber each other.
+_jobs_file_lock = threading.Lock()
 OUTPUT_DIR = CRON_DIR / "output"
 ONESHOT_GRACE_SECONDS = 120
 
@@ -594,43 +600,44 @@ def mark_job_run(job_id: str, success: bool, error: Optional[str] = None,
     ``delivery_error`` is tracked separately from the agent error — a job
     can succeed (agent produced output) but fail delivery (platform down).
     """
-    jobs = load_jobs()
-    for i, job in enumerate(jobs):
-        if job["id"] == job_id:
-            now = _hermes_now().isoformat()
-            job["last_run_at"] = now
-            job["last_status"] = "ok" if success else "error"
-            job["last_error"] = error if not success else None
-            # Track delivery failures separately — cleared on successful delivery
-            job["last_delivery_error"] = delivery_error
-            
-            # Increment completed count
-            if job.get("repeat"):
-                job["repeat"]["completed"] = job["repeat"].get("completed", 0) + 1
+    with _jobs_file_lock:
+        jobs = load_jobs()
+        for i, job in enumerate(jobs):
+            if job["id"] == job_id:
+                now = _hermes_now().isoformat()
+                job["last_run_at"] = now
+                job["last_status"] = "ok" if success else "error"
+                job["last_error"] = error if not success else None
+                # Track delivery failures separately — cleared on successful delivery
+                job["last_delivery_error"] = delivery_error
                 
-                # Check if we've hit the repeat limit
-                times = job["repeat"].get("times")
-                completed = job["repeat"]["completed"]
-                if times is not None and times > 0 and completed >= times:
-                    # Remove the job (limit reached)
-                    jobs.pop(i)
-                    save_jobs(jobs)
-                    return
-            
-            # Compute next run
-            job["next_run_at"] = compute_next_run(job["schedule"], now)
+                # Increment completed count
+                if job.get("repeat"):
+                    job["repeat"]["completed"] = job["repeat"].get("completed", 0) + 1
+                    
+                    # Check if we've hit the repeat limit
+                    times = job["repeat"].get("times")
+                    completed = job["repeat"]["completed"]
+                    if times is not None and times > 0 and completed >= times:
+                        # Remove the job (limit reached)
+                        jobs.pop(i)
+                        save_jobs(jobs)
+                        return
+                
+                # Compute next run
+                job["next_run_at"] = compute_next_run(job["schedule"], now)
 
-            # If no next run (one-shot completed), disable
-            if job["next_run_at"] is None:
-                job["enabled"] = False
-                job["state"] = "completed"
-            elif job.get("state") != "paused":
-                job["state"] = "scheduled"
+                # If no next run (one-shot completed), disable
+                if job["next_run_at"] is None:
+                    job["enabled"] = False
+                    job["state"] = "completed"
+                elif job.get("state") != "paused":
+                    job["state"] = "scheduled"
 
-            save_jobs(jobs)
-            return
+                save_jobs(jobs)
+                return
 
-    logger.warning("mark_job_run: job_id %s not found, skipping save", job_id)
+        logger.warning("mark_job_run: job_id %s not found, skipping save", job_id)
 
 
 def advance_next_run(job_id: str) -> bool:
@@ -645,20 +652,21 @@ def advance_next_run(job_id: str) -> bool:
 
     Returns True if next_run_at was advanced, False otherwise.
     """
-    jobs = load_jobs()
-    for job in jobs:
-        if job["id"] == job_id:
-            kind = job.get("schedule", {}).get("kind")
-            if kind not in ("cron", "interval"):
+    with _jobs_file_lock:
+        jobs = load_jobs()
+        for job in jobs:
+            if job["id"] == job_id:
+                kind = job.get("schedule", {}).get("kind")
+                if kind not in ("cron", "interval"):
+                    return False
+                now = _hermes_now().isoformat()
+                new_next = compute_next_run(job["schedule"], now)
+                if new_next and new_next != job.get("next_run_at"):
+                    job["next_run_at"] = new_next
+                    save_jobs(jobs)
+                    return True
                 return False
-            now = _hermes_now().isoformat()
-            new_next = compute_next_run(job["schedule"], now)
-            if new_next and new_next != job.get("next_run_at"):
-                job["next_run_at"] = new_next
-                save_jobs(jobs)
-                return True
-            return False
-    return False
+        return False
 
 
 def get_due_jobs() -> List[Dict[str, Any]]:
diff --git a/cron/scheduler.py b/cron/scheduler.py
index 8938063c7f..61d5537d90 100644
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@@ -252,7 +252,11 @@ def _send_media_via_adapter(adapter, chat_id: str, media_files: list, metadata:
                 coro = adapter.send_document(chat_id=chat_id, file_path=media_path, metadata=metadata)
 
             future = asyncio.run_coroutine_threadsafe(coro, loop)
-            result = future.result(timeout=30)
+            try:
+                result = future.result(timeout=30)
+            except TimeoutError:
+                future.cancel()
+                raise
             if result and not getattr(result, "success", True):
                 logger.warning(
                     "Job '%s': media send failed for %s: %s",
@@ -382,7 +386,11 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
                         runtime_adapter.send(chat_id, text_to_send, metadata=send_metadata),
                         loop,
                     )
-                    send_result = future.result(timeout=60)
+                    try:
+                        send_result = future.result(timeout=60)
+                    except TimeoutError:
+                        future.cancel()
+                        raise
                     if send_result and not getattr(send_result, "success", True):
                         err = getattr(send_result, "error", "unknown")
                         logger.warning(
@@ -422,7 +430,6 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
                 # prevent "coroutine was never awaited" RuntimeWarning, then retry in a
                 # fresh thread that has no running loop.
                 coro.close()
-                import concurrent.futures
                 with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
                     future = pool.submit(asyncio.run, _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files))
                     result = future.result(timeout=30)
@@ -564,15 +571,53 @@ def _run_job_script(script_path: str) -> tuple[bool, str]:
         return False, f"Script execution failed: {exc}"
 
 
-def _build_job_prompt(job: dict) -> str:
-    """Build the effective prompt for a cron job, optionally loading one or more skills first."""
+def _parse_wake_gate(script_output: str) -> bool:
+    """Parse the last non-empty stdout line of a cron job's pre-check script
+    as a wake gate.
+
+    The convention (ported from nanoclaw #1232): if the last stdout line is
+    JSON like ``{"wakeAgent": false}``, the agent is skipped entirely — no
+    LLM run, no delivery. Any other output (non-JSON, missing flag, gate
+    absent, or ``wakeAgent: true``) means wake the agent normally.
+
+    Returns True if the agent should wake, False to skip.
+    """
+    if not script_output:
+        return True
+    stripped_lines = [line for line in script_output.splitlines() if line.strip()]
+    if not stripped_lines:
+        return True
+    last_line = stripped_lines[-1].strip()
+    try:
+        gate = json.loads(last_line)
+    except (json.JSONDecodeError, ValueError):
+        return True
+    if not isinstance(gate, dict):
+        return True
+    return gate.get("wakeAgent", True) is not False
+
+
+def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
+    """Build the effective prompt for a cron job, optionally loading one or more skills first.
+
+    Args:
+        job: The cron job dict.
+        prerun_script: Optional ``(success, stdout)`` from a script that has
+            already been executed by the caller (e.g. for a wake-gate check).
+            When provided, the script is not re-executed and the cached
+            result is used for prompt injection. When omitted, the script
+            (if any) runs inline as before.
+    """
     prompt = job.get("prompt", "")
     skills = job.get("skills")
 
     # Run data-collection script if configured, inject output as context.
     script_path = job.get("script")
     if script_path:
-        success, script_output = _run_job_script(script_path)
+        if prerun_script is not None:
+            success, script_output = prerun_script
+        else:
+            success, script_output = _run_job_script(script_path)
         if success:
             if script_output:
                 prompt = (
@@ -674,7 +719,30 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
     
     job_id = job["id"]
     job_name = job["name"]
-    prompt = _build_job_prompt(job)
+
+    # Wake-gate: if this job has a pre-check script, run it BEFORE building
+    # the prompt so a ``{"wakeAgent": false}`` response can short-circuit
+    # the whole agent run. We pass the result into _build_job_prompt so
+    # the script is only executed once.
+    prerun_script = None
+    script_path = job.get("script")
+    if script_path:
+        prerun_script = _run_job_script(script_path)
+        _ran_ok, _script_output = prerun_script
+        if _ran_ok and not _parse_wake_gate(_script_output):
+            logger.info(
+                "Job '%s' (ID: %s): wakeAgent=false, skipping agent run",
+                job_name, job_id,
+            )
+            silent_doc = (
+                f"# Cron Job: {job_name}\n\n"
+                f"**Job ID:** {job_id}\n"
+                f"**Run Time:** {_hermes_now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
+                "Script gate returned `wakeAgent=false` — agent skipped.\n"
+            )
+            return True, silent_doc, SILENT_MARKER, None
+
+    prompt = _build_job_prompt(job, prerun_script=prerun_script)
     origin = _resolve_origin(job)
     _cron_session_id = f"cron_{job_id}_{_hermes_now().strftime('%Y%m%d_%H%M%S')}"
 
@@ -686,14 +754,17 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
     # scheduler process — every job this process runs is a cron job.
     os.environ["HERMES_CRON_SESSION"] = "1"
 
+    # Use ContextVars for per-job session/delivery state so parallel jobs
+    # don't clobber each other's targets (os.environ is process-global).
+    from gateway.session_context import set_session_vars, clear_session_vars, _VAR_MAP
+
+    _ctx_tokens = set_session_vars(
+        platform=origin["platform"] if origin else "",
+        chat_id=str(origin["chat_id"]) if origin else "",
+        chat_name=origin.get("chat_name", "") if origin else "",
+    )
+
     try:
-        # Inject origin context so the agent's send_message tool knows the chat.
-        # Must be INSIDE the try block so the finally cleanup always runs.
-        if origin:
-            os.environ["HERMES_SESSION_PLATFORM"] = origin["platform"]
-            os.environ["HERMES_SESSION_CHAT_ID"] = str(origin["chat_id"])
-            if origin.get("chat_name"):
-                os.environ["HERMES_SESSION_CHAT_NAME"] = origin["chat_name"]
         # Re-read .env and config.yaml fresh every run so provider/key
         # changes take effect without a gateway restart.
         from dotenv import load_dotenv
@@ -704,10 +775,10 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
 
         delivery_target = _resolve_delivery_target(job)
         if delivery_target:
-            os.environ["HERMES_CRON_AUTO_DELIVER_PLATFORM"] = delivery_target["platform"]
-            os.environ["HERMES_CRON_AUTO_DELIVER_CHAT_ID"] = str(delivery_target["chat_id"])
+            _VAR_MAP["HERMES_CRON_AUTO_DELIVER_PLATFORM"].set(delivery_target["platform"])
+            _VAR_MAP["HERMES_CRON_AUTO_DELIVER_CHAT_ID"].set(str(delivery_target["chat_id"]))
             if delivery_target.get("thread_id") is not None:
-                os.environ["HERMES_CRON_AUTO_DELIVER_THREAD_ID"] = str(delivery_target["thread_id"])
+                _VAR_MAP["HERMES_CRON_AUTO_DELIVER_THREAD_ID"].set(str(delivery_target["thread_id"]))
 
         model = job.get("model") or os.getenv("HERMES_MODEL") or ""
 
@@ -746,14 +817,13 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
         prefill_messages = None
         prefill_file = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "") or _cfg.get("prefill_messages_file", "")
         if prefill_file:
-            import json as _json
             pfpath = Path(prefill_file).expanduser()
             if not pfpath.is_absolute():
                 pfpath = _hermes_home / pfpath
             if pfpath.exists():
                 try:
                     with open(pfpath, "r", encoding="utf-8") as _pf:
-                        prefill_messages = _json.load(_pf)
+                        prefill_messages = json.load(_pf)
                     if not isinstance(prefill_messages, list):
                         prefill_messages = None
                 except Exception as e:
@@ -765,7 +835,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
 
         # Provider routing
         pr = _cfg.get("provider_routing", {})
-        smart_routing = _cfg.get("smart_model_routing", {}) or {}
 
         from hermes_cli.runtime_provider import (
             resolve_runtime_provider,
@@ -782,24 +851,9 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
             message = format_runtime_provider_error(exc)
             raise RuntimeError(message) from exc
 
-        from agent.smart_model_routing import resolve_turn_route
-        turn_route = resolve_turn_route(
-            prompt,
-            smart_routing,
-            {
-                "model": model,
-                "api_key": runtime.get("api_key"),
-                "base_url": runtime.get("base_url"),
-                "provider": runtime.get("provider"),
-                "api_mode": runtime.get("api_mode"),
-                "command": runtime.get("command"),
-                "args": list(runtime.get("args") or []),
-            },
-        )
-
         fallback_model = _cfg.get("fallback_providers") or _cfg.get("fallback_model") or None
         credential_pool = None
-        runtime_provider = str(turn_route["runtime"].get("provider") or "").strip().lower()
+        runtime_provider = str(runtime.get("provider") or "").strip().lower()
         if runtime_provider:
             try:
                 from agent.credential_pool import load_pool
@@ -816,13 +870,13 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
                 logger.debug("Job '%s': failed to load credential pool for %s: %s", job_id, runtime_provider, e)
 
         agent = AIAgent(
-            model=turn_route["model"],
-            api_key=turn_route["runtime"].get("api_key"),
-            base_url=turn_route["runtime"].get("base_url"),
-            provider=turn_route["runtime"].get("provider"),
-            api_mode=turn_route["runtime"].get("api_mode"),
-            acp_command=turn_route["runtime"].get("command"),
-            acp_args=turn_route["runtime"].get("args"),
+            model=model,
+            api_key=runtime.get("api_key"),
+            base_url=runtime.get("base_url"),
+            provider=runtime.get("provider"),
+            api_mode=runtime.get("api_mode"),
+            acp_command=runtime.get("command"),
+            acp_args=runtime.get("args"),
             max_iterations=max_iterations,
             reasoning_config=reasoning_config,
             prefill_messages=prefill_messages,
@@ -967,16 +1021,8 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
         return False, output, "", error_msg
 
     finally:
-        # Clean up injected env vars so they don't leak to other jobs
-        for key in (
-            "HERMES_SESSION_PLATFORM",
-            "HERMES_SESSION_CHAT_ID",
-            "HERMES_SESSION_CHAT_NAME",
-            "HERMES_CRON_AUTO_DELIVER_PLATFORM",
-            "HERMES_CRON_AUTO_DELIVER_CHAT_ID",
-            "HERMES_CRON_AUTO_DELIVER_THREAD_ID",
-        ):
-            os.environ.pop(key, None)
+        # Clean up ContextVar session/delivery state for this job.
+        clear_session_vars(_ctx_tokens)
         if _session_db:
             try:
                 _session_db.end_session(_cron_session_id, "cron_complete")
@@ -1029,15 +1075,41 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
         if verbose:
             logger.info("%s - %s job(s) due", _hermes_now().strftime('%H:%M:%S'), len(due_jobs))
 
-        executed = 0
+        # Advance next_run_at for all recurring jobs FIRST, under the file lock,
+        # before any execution begins.  This preserves at-most-once semantics.
         for job in due_jobs:
-            try:
-                # For recurring jobs (cron/interval), advance next_run_at to the
-                # next future occurrence BEFORE execution.  This way, if the
-                # process crashes mid-run, the job won't re-fire on restart.
-                # One-shot jobs are left alone so they can retry on restart.
-                advance_next_run(job["id"])
+            advance_next_run(job["id"])
 
+        # Resolve max parallel workers: env var > config.yaml > unbounded.
+        # Set HERMES_CRON_MAX_PARALLEL=1 to restore old serial behaviour.
+        _max_workers: Optional[int] = None
+        try:
+            _env_par = os.getenv("HERMES_CRON_MAX_PARALLEL", "").strip()
+            if _env_par:
+                _max_workers = int(_env_par) or None
+        except (ValueError, TypeError):
+            logger.warning("Invalid HERMES_CRON_MAX_PARALLEL value; defaulting to unbounded")
+        if _max_workers is None:
+            try:
+                _ucfg = load_config() or {}
+                _cfg_par = (
+                    _ucfg.get("cron", {}) if isinstance(_ucfg, dict) else {}
+                ).get("max_parallel_jobs")
+                if _cfg_par is not None:
+                    _max_workers = int(_cfg_par) or None
+            except Exception:
+                pass
+
+        if verbose:
+            logger.info(
+                "Running %d job(s) in parallel (max_workers=%s)",
+                len(due_jobs),
+                _max_workers if _max_workers else "unbounded",
+            )
+
+        def _process_job(job: dict) -> bool:
+            """Run one due job end-to-end: execute, save, deliver, mark."""
+            try:
                 success, output, final_response, error = run_job(job)
 
                 output_file = save_job_output(job["id"], output)
@@ -1069,13 +1141,23 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
                     error = "Agent completed but produced empty response (model error, timeout, or misconfiguration)"
 
                 mark_job_run(job["id"], success, error, delivery_error=delivery_error)
-                executed += 1
+                return True
 
             except Exception as e:
                 logger.error("Error processing job %s: %s", job['id'], e)
                 mark_job_run(job["id"], False, str(e))
+                return False
 
-        return executed
+        # Run all due jobs concurrently, each in its own ContextVar copy
+        # so session/delivery state stays isolated per-thread.
+        with concurrent.futures.ThreadPoolExecutor(max_workers=_max_workers) as _tick_pool:
+            _futures = []
+            for job in due_jobs:
+                _ctx = contextvars.copy_context()
+                _futures.append(_tick_pool.submit(_ctx.run, _process_job, job))
+            _results = [f.result() for f in _futures]
+
+        return sum(_results)
     finally:
         if fcntl:
             fcntl.flock(lock_fd, fcntl.LOCK_UN)
diff --git a/docs/acp-setup.md b/docs/acp-setup.md
deleted file mode 100644
index 8da4e2a215..0000000000
--- a/docs/acp-setup.md
+++ /dev/null
@@ -1,228 +0,0 @@
-# Hermes Agent — ACP (Agent Client Protocol) Setup Guide
-
-Hermes Agent supports the **Agent Client Protocol (ACP)**, allowing it to run as
-a coding agent inside your editor. ACP lets your IDE send tasks to Hermes, and
-Hermes responds with file edits, terminal commands, and explanations — all shown
-natively in the editor UI.
-
----
-
-## Prerequisites
-
-- Hermes Agent installed and configured (`hermes setup` completed)
-- An API key / provider set up in `~/.hermes/.env` or via `hermes login`
-- Python 3.11+
-
-Install the ACP extra:
-
-```bash
-pip install -e ".[acp]"
-```
-
----
-
-## VS Code Setup
-
-### 1. Install the ACP Client extension
-
-Open VS Code and install **ACP Client** from the marketplace:
-
-- Press `Ctrl+Shift+X` (or `Cmd+Shift+X` on macOS)
-- Search for **"ACP Client"**
-- Click **Install**
-
-Or install from the command line:
-
-```bash
-code --install-extension anysphere.acp-client
-```
-
-### 2. Configure settings.json
-
-Open your VS Code settings (`Ctrl+,` → click the `{}` icon for JSON) and add:
-
-```json
-{
-  "acpClient.agents": [
-    {
-      "name": "hermes-agent",
-      "registryDir": "/path/to/hermes-agent/acp_registry"
-    }
-  ]
-}
-```
-
-Replace `/path/to/hermes-agent` with the actual path to your Hermes Agent
-installation (e.g. `~/.hermes/hermes-agent`).
-
-Alternatively, if `hermes` is on your PATH, the ACP Client can discover it
-automatically via the registry directory.
-
-### 3. Restart VS Code
-
-After configuring, restart VS Code. You should see **Hermes Agent** appear in
-the ACP agent picker in the chat/agent panel.
-
----
-
-## Zed Setup
-
-Zed has built-in ACP support.
-
-### 1. Configure Zed settings
-
-Open Zed settings (`Cmd+,` on macOS or `Ctrl+,` on Linux) and add to your
-`settings.json`:
-
-```json
-{
-  "agent_servers": {
-    "hermes-agent": {
-      "type": "custom",
-      "command": "hermes",
-      "args": ["acp"],
-    },
-  },
-}
-```
-
-### 2. Restart Zed
-
-Hermes Agent will appear in the agent panel. Select it and start a conversation.
-
----
-
-## JetBrains Setup (IntelliJ, PyCharm, WebStorm, etc.)
-
-### 1. Install the ACP plugin
-
-- Open **Settings** → **Plugins** → **Marketplace**
-- Search for **"ACP"** or **"Agent Client Protocol"**
-- Install and restart the IDE
-
-### 2. Configure the agent
-
-- Open **Settings** → **Tools** → **ACP Agents**
-- Click **+** to add a new agent
-- Set the registry directory to your `acp_registry/` folder:
-  `/path/to/hermes-agent/acp_registry`
-- Click **OK**
-
-### 3. Use the agent
-
-Open the ACP panel (usually in the right sidebar) and select **Hermes Agent**.
-
----
-
-## What You Will See
-
-Once connected, your editor provides a native interface to Hermes Agent:
-
-### Chat Panel
-A conversational interface where you can describe tasks, ask questions, and
-give instructions. Hermes responds with explanations and actions.
-
-### File Diffs
-When Hermes edits files, you see standard diffs in the editor. You can:
-- **Accept** individual changes
-- **Reject** changes you don't want
-- **Review** the full diff before applying
-
-### Terminal Commands
-When Hermes needs to run shell commands (builds, tests, installs), the editor
-shows them in an integrated terminal. Depending on your settings:
-- Commands may run automatically
-- Or you may be prompted to **approve** each command
-
-### Approval Flow
-For potentially destructive operations, the editor will prompt you for
-approval before Hermes proceeds. This includes:
-- File deletions
-- Shell commands
-- Git operations
-
----
-
-## Configuration
-
-Hermes Agent under ACP uses the **same configuration** as the CLI:
-
-- **API keys / providers**: `~/.hermes/.env`
-- **Agent config**: `~/.hermes/config.yaml`
-- **Skills**: `~/.hermes/skills/`
-- **Sessions**: `~/.hermes/state.db`
-
-You can run `hermes setup` to configure providers, or edit `~/.hermes/.env`
-directly.
-
-### Changing the model
-
-Edit `~/.hermes/config.yaml`:
-
-```yaml
-model: openrouter/nous/hermes-3-llama-3.1-70b
-```
-
-Or set the `HERMES_MODEL` environment variable.
-
-### Toolsets
-
-ACP sessions use the curated `hermes-acp` toolset by default. It is designed for editor workflows and intentionally excludes things like messaging delivery, cronjob management, and audio-first UX features.
-
----
-
-## Troubleshooting
-
-### Agent doesn't appear in the editor
-
-1. **Check the registry path** — make sure the `acp_registry/` directory path
-   in your editor settings is correct and contains `agent.json`.
-2. **Check `hermes` is on PATH** — run `which hermes` in a terminal. If not
-   found, you may need to activate your virtualenv or add it to PATH.
-3. **Restart the editor** after changing settings.
-
-### Agent starts but errors immediately
-
-1. Run `hermes doctor` to check your configuration.
-2. Check that you have a valid API key: `hermes status`
-3. Try running `hermes acp` directly in a terminal to see error output.
-
-### "Module not found" errors
-
-Make sure you installed the ACP extra:
-
-```bash
-pip install -e ".[acp]"
-```
-
-### Slow responses
-
-- ACP streams responses, so you should see incremental output. If the agent
-  appears stuck, check your network connection and API provider status.
-- Some providers have rate limits. Try switching to a different model/provider.
-
-### Permission denied for terminal commands
-
-If the editor blocks terminal commands, check your ACP Client extension
-settings for auto-approval or manual-approval preferences.
-
-### Logs
-
-Hermes logs are written to stderr when running in ACP mode. Check:
-- VS Code: **Output** panel → select **ACP Client** or **Hermes Agent**
-- Zed: **View** → **Toggle Terminal** and check the process output
-- JetBrains: **Event Log** or the ACP tool window
-
-You can also enable verbose logging:
-
-```bash
-HERMES_LOG_LEVEL=DEBUG hermes acp
-```
-
----
-
-## Further Reading
-
-- [ACP Specification](https://github.com/anysphere/acp)
-- [Hermes Agent Documentation](https://github.com/NousResearch/hermes-agent)
-- Run `hermes --help` for all CLI options
diff --git a/docs/honcho-integration-spec.html b/docs/honcho-integration-spec.html
deleted file mode 100644
index 455fb84f23..0000000000
--- a/docs/honcho-integration-spec.html
+++ /dev/null
@@ -1,698 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-<head>
-<meta charset="UTF-8">
-<meta name="viewport" content="width=device-width, initial-scale=1.0">
-<title>honcho-integration-spec</title>
-<style>
-  :root {
-    --bg:             #0b0e14;
-    --bg-surface:     #11151c;
-    --bg-elevated:    #181d27;
-    --bg-code:        #0d1018;
-    --fg:             #c9d1d9;
-    --fg-bright:      #e6edf3;
-    --fg-muted:       #6e7681;
-    --fg-subtle:      #484f58;
-    --accent:         #7eb8f6;
-    --accent-dim:     #3d6ea5;
-    --accent-glow:    rgba(126, 184, 246, 0.08);
-    --green:          #7ee6a8;
-    --green-dim:      #2ea04f;
-    --orange:         #e6a855;
-    --red:            #f47067;
-    --purple:         #bc8cff;
-    --cyan:           #56d4dd;
-    --border:         #21262d;
-    --border-subtle:  #161b22;
-    --radius:         6px;
-    --font-sans:      'New York', ui-serif, 'Iowan Old Style', 'Apple Garamond', Baskerville, 'Times New Roman', 'Noto Emoji', serif;
-    --font-mono:      'Departure Mono', 'Noto Emoji', monospace;
-  }
-
-  *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
-  html { scroll-behavior: smooth; scroll-padding-top: 2rem; }
-  body {
-    font-family: var(--font-sans);
-    background: var(--bg);
-    color: var(--fg);
-    line-height: 1.7;
-    font-size: 15px;
-    -webkit-font-smoothing: antialiased;
-  }
-
-  .container { max-width: 860px; margin: 0 auto; padding: 3rem 2rem 6rem; }
-
-  .hero {
-    text-align: center;
-    padding: 4rem 0 3rem;
-    border-bottom: 1px solid var(--border);
-    margin-bottom: 3rem;
-  }
-  .hero h1 { font-family: var(--font-mono); font-size: 2.2rem; font-weight: 700; color: var(--fg-bright); letter-spacing: -0.03em; margin-bottom: 0.5rem; }
-  .hero h1 span { color: var(--accent); }
-  .hero .subtitle { font-family: var(--font-sans); color: var(--fg-muted); font-size: 0.92rem; max-width: 560px; margin: 0 auto; line-height: 1.6; }
-  .hero .meta { margin-top: 1.5rem; display: flex; justify-content: center; gap: 1.5rem; flex-wrap: wrap; }
-  .hero .meta span { font-size: 0.8rem; color: var(--fg-subtle); font-family: var(--font-mono); }
-
-  .toc { background: var(--bg-surface); border: 1px solid var(--border); border-radius: var(--radius); padding: 1.5rem 2rem; margin-bottom: 3rem; }
-  .toc h2 { font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.1em; color: var(--fg-muted); margin-bottom: 1rem; }
-  .toc ol { list-style: none; counter-reset: toc; columns: 2; column-gap: 2rem; }
-  .toc li { counter-increment: toc; break-inside: avoid; margin-bottom: 0.35rem; }
-  .toc li::before { content: counter(toc, decimal-leading-zero) " "; color: var(--fg-subtle); font-family: var(--font-mono); font-size: 0.75rem; margin-right: 0.25rem; }
-  .toc a { font-family: var(--font-mono); color: var(--fg); text-decoration: none; font-size: 0.82rem; transition: color 0.15s; }
-  .toc a:hover { color: var(--accent); }
-
-  section { margin-bottom: 4rem; }
-  section + section { padding-top: 1rem; }
-
-  h2 { font-family: var(--font-mono); font-size: 1.3rem; font-weight: 700; color: var(--fg-bright); letter-spacing: -0.01em; margin-bottom: 1.25rem; padding-bottom: 0.5rem; border-bottom: 1px solid var(--border); }
-  h3 { font-family: var(--font-mono); font-size: 1rem; font-weight: 600; color: var(--fg-bright); margin-top: 2rem; margin-bottom: 0.75rem; }
-  h4 { font-family: var(--font-mono); font-size: 0.9rem; font-weight: 600; color: var(--accent); margin-top: 1.5rem; margin-bottom: 0.5rem; }
-
-  p { margin-bottom: 1rem; font-size: 0.95rem; line-height: 1.75; }
-  strong { color: var(--fg-bright); font-weight: 600; }
-  a { color: var(--accent); text-decoration: none; }
-  a:hover { text-decoration: underline; }
-
-  ul, ol { margin-bottom: 1rem; padding-left: 1.5rem; font-size: 0.93rem; line-height: 1.7; }
-  li { margin-bottom: 0.35rem; }
-  li::marker { color: var(--fg-subtle); }
-
-  .table-wrap { overflow-x: auto; margin-bottom: 1.5rem; }
-  table { width: 100%; border-collapse: collapse; font-size: 0.88rem; }
-  th, td { text-align: left; padding: 0.6rem 1rem; border-bottom: 1px solid var(--border-subtle); }
-  th { font-family: var(--font-mono); font-size: 0.72rem; text-transform: uppercase; letter-spacing: 0.06em; color: var(--fg-muted); background: var(--bg-surface); border-bottom-color: var(--border); white-space: nowrap; }
-  td { font-family: var(--font-sans); font-size: 0.88rem; color: var(--fg); }
-  tr:hover td { background: var(--accent-glow); }
-  td code { background: var(--bg-elevated); padding: 0.15em 0.4em; border-radius: 3px; font-family: var(--font-mono); font-size: 0.82em; color: var(--cyan); }
-
-  pre { background: var(--bg-code); border: 1px solid var(--border); border-radius: var(--radius); padding: 1.25rem 1.5rem; overflow-x: auto; margin-bottom: 1.5rem; font-family: var(--font-mono); font-size: 0.82rem; line-height: 1.65; color: var(--fg); }
-  pre code { background: none; padding: 0; color: inherit; font-size: inherit; }
-  code { font-family: var(--font-mono); font-size: 0.85em; }
-  p code, li code { background: var(--bg-elevated); padding: 0.15em 0.4em; border-radius: 3px; color: var(--cyan); font-size: 0.85em; }
-
-  .kw { color: var(--purple); }
-  .str { color: var(--green); }
-  .cm { color: var(--fg-subtle); font-style: italic; }
-  .num { color: var(--orange); }
-  .key { color: var(--accent); }
-
-  .mermaid { margin: 1.5rem 0 2rem; text-align: center; }
-  .mermaid svg { max-width: 100%; height: auto; }
-
-  .callout { font-family: var(--font-sans); background: var(--bg-surface); border-left: 3px solid var(--accent-dim); border-radius: 0 var(--radius) var(--radius) 0; padding: 1rem 1.25rem; margin-bottom: 1.5rem; font-size: 0.88rem; color: var(--fg-muted); line-height: 1.6; }
-  .callout strong { font-family: var(--font-mono); color: var(--fg-bright); }
-  .callout.success { border-left-color: var(--green-dim); }
-  .callout.warn { border-left-color: var(--orange); }
-
-  .badge { display: inline-block; font-family: var(--font-mono); font-size: 0.65rem; font-weight: 600; text-transform: uppercase; letter-spacing: 0.05em; padding: 0.2em 0.6em; border-radius: 3px; vertical-align: middle; margin-left: 0.4rem; }
-  .badge-done { background: var(--green-dim); color: #fff; }
-  .badge-wip { background: var(--orange); color: #0b0e14; }
-  .badge-todo { background: var(--fg-subtle); color: var(--fg); }
-
-  .checklist { list-style: none; padding-left: 0; }
-  .checklist li { padding-left: 1.5rem; position: relative; margin-bottom: 0.5rem; }
-  .checklist li::before { position: absolute; left: 0; font-family: var(--font-mono); font-size: 0.85rem; }
-  .checklist li.done { color: var(--fg-muted); }
-  .checklist li.done::before { content: "\2713"; color: var(--green); }
-  .checklist li.todo::before { content: "\25CB"; color: var(--fg-subtle); }
-  .checklist li.wip::before { content: "\25D4"; color: var(--orange); }
-
-  .compare { display: grid; grid-template-columns: 1fr 1fr; gap: 1rem; margin-bottom: 2rem; }
-  .compare-card { background: var(--bg-surface); border: 1px solid var(--border); border-radius: var(--radius); padding: 1.25rem; }
-  .compare-card h4 { margin-top: 0; font-size: 0.82rem; }
-  .compare-card.after { border-color: var(--accent-dim); }
-  .compare-card ul { font-family: var(--font-mono); padding-left: 1.25rem; font-size: 0.8rem; }
-
-  hr { border: none; border-top: 1px solid var(--border); margin: 3rem 0; }
-
-  .progress-bar { position: fixed; top: 0; left: 0; height: 2px; background: var(--accent); z-index: 999; transition: width 0.1s linear; }
-
-  @media (max-width: 640px) {
-    .container { padding: 2rem 1rem 4rem; }
-    .hero h1 { font-size: 1.6rem; }
-    .toc ol { columns: 1; }
-    .compare { grid-template-columns: 1fr; }
-    table { font-size: 0.8rem; }
-    th, td { padding: 0.4rem 0.6rem; }
-  }
-</style>
-<link rel="preconnect" href="https://fonts.googleapis.com">
-<link href="https://fonts.googleapis.com/css2?family=Noto+Emoji&display=swap" rel="stylesheet">
-<style>
-  @font-face {
-    font-family: 'Departure Mono';
-    src: url('https://cdn.jsdelivr.net/gh/rektdeckard/departure-mono@latest/fonts/DepartureMono-Regular.woff2') format('woff2');
-    font-weight: normal;
-    font-style: normal;
-    font-display: swap;
-  }
-</style>
-</head>
-<body>
-
-<div class="progress-bar" id="progress"></div>
-
-<div class="container">
-
-<header class="hero">
-  <h1>honcho<span>-integration-spec</span></h1>
-  <p class="subtitle">Comparison of Hermes Agent vs. openclaw-honcho — and a porting spec for bringing Hermes patterns into other Honcho integrations.</p>
-  <div class="meta">
-    <span>hermes-agent / openclaw-honcho</span>
-    <span>Python + TypeScript</span>
-    <span>2026-03-09</span>
-  </div>
-</header>
-
-<nav class="toc">
-  <h2>Contents</h2>
-  <ol>
-    <li><a href="#overview">Overview</a></li>
-    <li><a href="#architecture">Architecture comparison</a></li>
-    <li><a href="#diff-table">Diff table</a></li>
-    <li><a href="#patterns">Hermes patterns to port</a></li>
-    <li><a href="#spec-async">Spec: async prefetch</a></li>
-    <li><a href="#spec-reasoning">Spec: dynamic reasoning level</a></li>
-    <li><a href="#spec-modes">Spec: per-peer memory modes</a></li>
-    <li><a href="#spec-identity">Spec: AI peer identity formation</a></li>
-    <li><a href="#spec-sessions">Spec: session naming strategies</a></li>
-    <li><a href="#spec-cli">Spec: CLI surface injection</a></li>
-    <li><a href="#openclaw-checklist">openclaw-honcho checklist</a></li>
-    <li><a href="#nanobot-checklist">nanobot-honcho checklist</a></li>
-  </ol>
-</nav>
-
-<!-- OVERVIEW -->
-<section id="overview">
-  <h2>Overview</h2>
-
-  <p>Two independent Honcho integrations have been built for two different agent runtimes: <strong>Hermes Agent</strong> (Python, baked into the runner) and <strong>openclaw-honcho</strong> (TypeScript plugin via hook/tool API). Both use the same Honcho peer paradigm — dual peer model, <code>session.context()</code>, <code>peer.chat()</code> — but they made different tradeoffs at every layer.</p>
-
-  <p>This document maps those tradeoffs and defines a porting spec: a set of Hermes-originated patterns, each stated as an integration-agnostic interface, that any Honcho integration can adopt regardless of runtime or language.</p>
-
-  <div class="callout">
-    <strong>Scope</strong> Both integrations work correctly today. This spec is about the delta — patterns in Hermes that are worth propagating and patterns in openclaw-honcho that Hermes should eventually adopt. The spec is additive, not prescriptive.
-  </div>
-</section>
-
-<!-- ARCHITECTURE -->
-<section id="architecture">
-  <h2>Architecture comparison</h2>
-
-  <h3>Hermes: baked-in runner</h3>
-  <p>Honcho is initialised directly inside <code>AIAgent.__init__</code>. There is no plugin boundary. Session management, context injection, async prefetch, and CLI surface are all first-class concerns of the runner. Context is injected once per session (baked into <code>_cached_system_prompt</code>) and never re-fetched mid-session — this maximises prefix cache hits at the LLM provider.</p>
-
-  <div class="mermaid">
-%%{init: {'theme': 'dark', 'themeVariables': { 'primaryColor': '#1f3150', 'primaryTextColor': '#c9d1d9', 'primaryBorderColor': '#3d6ea5', 'lineColor': '#3d6ea5', 'secondaryColor': '#162030', 'tertiaryColor': '#11151c' }}}%%
-flowchart TD
-    U["user message"] --> P["_honcho_prefetch()<br/>(reads cache — no HTTP)"]
-    P --> SP["_build_system_prompt()<br/>(first turn only, cached)"]
-    SP --> LLM["LLM call"]
-    LLM --> R["response"]
-    R --> FP["_honcho_fire_prefetch()<br/>(daemon threads, turn end)"]
-    FP --> C1["prefetch_context() thread"]
-    FP --> C2["prefetch_dialectic() thread"]
-    C1 --> CACHE["_context_cache / _dialectic_cache"]
-    C2 --> CACHE
-
-    style U fill:#162030,stroke:#3d6ea5,color:#c9d1d9
-    style P fill:#1f3150,stroke:#3d6ea5,color:#c9d1d9
-    style SP fill:#1f3150,stroke:#3d6ea5,color:#c9d1d9
-    style LLM fill:#162030,stroke:#3d6ea5,color:#c9d1d9
-    style R fill:#162030,stroke:#3d6ea5,color:#c9d1d9
-    style FP fill:#2a1a40,stroke:#bc8cff,color:#c9d1d9
-    style C1 fill:#2a1a40,stroke:#bc8cff,color:#c9d1d9
-    style C2 fill:#2a1a40,stroke:#bc8cff,color:#c9d1d9
-    style CACHE fill:#11151c,stroke:#484f58,color:#6e7681
-  </div>
-
-  <h3>openclaw-honcho: hook-based plugin</h3>
-  <p>The plugin registers hooks against OpenClaw's event bus. Context is fetched synchronously inside <code>before_prompt_build</code> on every turn. Message capture happens in <code>agent_end</code>. The multi-agent hierarchy is tracked via <code>subagent_spawned</code>. This model is correct but every turn pays a blocking Honcho round-trip before the LLM call can begin.</p>
-
-  <div class="mermaid">
-%%{init: {'theme': 'dark', 'themeVariables': { 'primaryColor': '#1f3150', 'primaryTextColor': '#c9d1d9', 'primaryBorderColor': '#3d6ea5', 'lineColor': '#3d6ea5', 'secondaryColor': '#162030', 'tertiaryColor': '#11151c' }}}%%
-flowchart TD
-    U2["user message"] --> BPB["before_prompt_build<br/>(BLOCKING HTTP — every turn)"]
-    BPB --> CTX["session.context()"]
-    CTX --> SP2["system prompt assembled"]
-    SP2 --> LLM2["LLM call"]
-    LLM2 --> R2["response"]
-    R2 --> AE["agent_end hook"]
-    AE --> SAVE["session.addMessages()<br/>session.setMetadata()"]
-
-    style U2 fill:#162030,stroke:#3d6ea5,color:#c9d1d9
-    style BPB fill:#3a1515,stroke:#f47067,color:#c9d1d9
-    style CTX fill:#3a1515,stroke:#f47067,color:#c9d1d9
-    style SP2 fill:#1f3150,stroke:#3d6ea5,color:#c9d1d9
-    style LLM2 fill:#162030,stroke:#3d6ea5,color:#c9d1d9
-    style R2 fill:#162030,stroke:#3d6ea5,color:#c9d1d9
-    style AE fill:#162030,stroke:#3d6ea5,color:#c9d1d9
-    style SAVE fill:#11151c,stroke:#484f58,color:#6e7681
-  </div>
-</section>
-
-<!-- DIFF TABLE -->
-<section id="diff-table">
-  <h2>Diff table</h2>
-
-  <div class="table-wrap">
-    <table>
-      <thead>
-        <tr>
-          <th>Dimension</th>
-          <th>Hermes Agent</th>
-          <th>openclaw-honcho</th>
-        </tr>
-      </thead>
-      <tbody>
-        <tr>
-          <td><strong>Context injection timing</strong></td>
-          <td>Once per session (cached). Zero HTTP on response path after turn 1.</td>
-          <td>Every turn, blocking. Fresh context per turn but adds latency.</td>
-        </tr>
-        <tr>
-          <td><strong>Prefetch strategy</strong></td>
-          <td>Daemon threads fire at turn end; consumed next turn from cache.</td>
-          <td>None. Blocking call at prompt-build time.</td>
-        </tr>
-        <tr>
-          <td><strong>Dialectic (peer.chat)</strong></td>
-          <td>Prefetched async; result injected into system prompt next turn.</td>
-          <td>On-demand via <code>honcho_recall</code> / <code>honcho_analyze</code> tools.</td>
-        </tr>
-        <tr>
-          <td><strong>Reasoning level</strong></td>
-          <td>Dynamic: scales with message length. Floor = config default. Cap = "high".</td>
-          <td>Fixed per tool: recall=minimal, analyze=medium.</td>
-        </tr>
-        <tr>
-          <td><strong>Memory modes</strong></td>
-          <td><code>user_memory_mode</code> / <code>agent_memory_mode</code>: hybrid / honcho / local.</td>
-          <td>None. Always writes to Honcho.</td>
-        </tr>
-        <tr>
-          <td><strong>Write frequency</strong></td>
-          <td>async (background queue), turn, session, N turns.</td>
-          <td>After every agent_end (no control).</td>
-        </tr>
-        <tr>
-          <td><strong>AI peer identity</strong></td>
-          <td><code>observe_me=True</code>, <code>seed_ai_identity()</code>, <code>get_ai_representation()</code>, SOUL.md → AI peer.</td>
-          <td>Agent files uploaded to agent peer at setup. No ongoing self-observation seeding.</td>
-        </tr>
-        <tr>
-          <td><strong>Context scope</strong></td>
-          <td>User peer + AI peer representation, both injected.</td>
-          <td>User peer (owner) representation + conversation summary. <code>peerPerspective</code> on context call.</td>
-        </tr>
-        <tr>
-          <td><strong>Session naming</strong></td>
-          <td>per-directory / global / manual map / title-based.</td>
-          <td>Derived from platform session key.</td>
-        </tr>
-        <tr>
-          <td><strong>Multi-agent</strong></td>
-          <td>Single-agent only.</td>
-          <td>Parent observer hierarchy via <code>subagent_spawned</code>.</td>
-        </tr>
-        <tr>
-          <td><strong>Tool surface</strong></td>
-          <td>Single <code>query_user_context</code> tool (on-demand dialectic).</td>
-          <td>6 tools: session, profile, search, context (fast) + recall, analyze (LLM).</td>
-        </tr>
-        <tr>
-          <td><strong>Platform metadata</strong></td>
-          <td>Not stripped.</td>
-          <td>Explicitly stripped before Honcho storage.</td>
-        </tr>
-        <tr>
-          <td><strong>Message dedup</strong></td>
-          <td>None (sends on every save cycle).</td>
-          <td><code>lastSavedIndex</code> in session metadata prevents re-sending.</td>
-        </tr>
-        <tr>
-          <td><strong>CLI surface in prompt</strong></td>
-          <td>Management commands injected into system prompt. Agent knows its own CLI.</td>
-          <td>Not injected.</td>
-        </tr>
-        <tr>
-          <td><strong>AI peer name in identity</strong></td>
-          <td>Replaces "Hermes Agent" in DEFAULT_AGENT_IDENTITY when configured.</td>
-          <td>Not implemented.</td>
-        </tr>
-        <tr>
-          <td><strong>QMD / local file search</strong></td>
-          <td>Not implemented.</td>
-          <td>Passthrough tools when QMD backend configured.</td>
-        </tr>
-        <tr>
-          <td><strong>Workspace metadata</strong></td>
-          <td>Not implemented.</td>
-          <td><code>agentPeerMap</code> in workspace metadata tracks agent&#8594;peer ID.</td>
-        </tr>
-      </tbody>
-    </table>
-  </div>
-</section>
-
-<!-- PATTERNS -->
-<section id="patterns">
-  <h2>Hermes patterns to port</h2>
-
-  <p>Six patterns from Hermes are worth adopting in any Honcho integration. They are described below as integration-agnostic interfaces — the implementation will differ per runtime, but the contract is the same.</p>
-
-  <div class="compare">
-    <div class="compare-card">
-      <h4>Patterns Hermes contributes</h4>
-      <ul>
-        <li>Async prefetch (zero-latency)</li>
-        <li>Dynamic reasoning level</li>
-        <li>Per-peer memory modes</li>
-        <li>AI peer identity formation</li>
-        <li>Session naming strategies</li>
-        <li>CLI surface injection</li>
-      </ul>
-    </div>
-    <div class="compare-card after">
-      <h4>Patterns openclaw contributes back</h4>
-      <ul>
-        <li>lastSavedIndex dedup</li>
-        <li>Platform metadata stripping</li>
-        <li>Multi-agent observer hierarchy</li>
-        <li>peerPerspective on context()</li>
-        <li>Tiered tool surface (fast/LLM)</li>
-        <li>Workspace agentPeerMap</li>
-      </ul>
-    </div>
-  </div>
-</section>
-
-<!-- SPEC: ASYNC PREFETCH -->
-<section id="spec-async">
-  <h2>Spec: async prefetch</h2>
-
-  <h3>Problem</h3>
-  <p>Calling <code>session.context()</code> and <code>peer.chat()</code> synchronously before each LLM call adds 200–800ms of Honcho round-trip latency to every turn. Users experience this as the agent "thinking slowly."</p>
-
-  <h3>Pattern</h3>
-  <p>Fire both calls as non-blocking background work at the <strong>end</strong> of each turn. Store results in a per-session cache keyed by session ID. At the <strong>start</strong> of the next turn, pop from cache — the HTTP is already done. First turn is cold (empty cache); all subsequent turns are zero-latency on the response path.</p>
-
-  <h3>Interface contract</h3>
-  <pre><code><span class="cm">// TypeScript (openclaw / nanobot plugin shape)</span>
-
-<span class="kw">interface</span> <span class="key">AsyncPrefetch</span> {
-  <span class="cm">// Fire context + dialectic fetches at turn end. Non-blocking.</span>
-  firePrefetch(sessionId: <span class="str">string</span>, userMessage: <span class="str">string</span>): <span class="kw">void</span>;
-
-  <span class="cm">// Pop cached results at turn start. Returns empty if cache is cold.</span>
-  popContextResult(sessionId: <span class="str">string</span>): ContextResult | <span class="kw">null</span>;
-  popDialecticResult(sessionId: <span class="str">string</span>): <span class="str">string</span> | <span class="kw">null</span>;
-}
-
-<span class="kw">type</span> <span class="key">ContextResult</span> = {
-  representation: <span class="str">string</span>;
-  card: <span class="str">string</span>[];
-  aiRepresentation?: <span class="str">string</span>;  <span class="cm">// AI peer context if enabled</span>
-  summary?: <span class="str">string</span>;            <span class="cm">// conversation summary if fetched</span>
-};</code></pre>
-
-  <h3>Implementation notes</h3>
-  <ul>
-    <li>Python: <code>threading.Thread(daemon=True)</code>. Write to <code>dict[session_id, result]</code> — GIL makes this safe for simple writes.</li>
-    <li>TypeScript: <code>Promise</code> stored in <code>Map&lt;string, Promise&lt;ContextResult&gt;&gt;</code>. Await at pop time. If not resolved yet, skip (return null) — do not block.</li>
-    <li>The pop is destructive: clears the cache entry after reading so stale data never accumulates.</li>
-    <li>Prefetch should also fire on first turn (even though it won't be consumed until turn 2) — this ensures turn 2 is never cold.</li>
-  </ul>
-
-  <h3>openclaw-honcho adoption</h3>
-  <p>Move <code>session.context()</code> from <code>before_prompt_build</code> to a post-<code>agent_end</code> background task. Store result in <code>state.contextCache</code>. In <code>before_prompt_build</code>, read from cache instead of calling Honcho. If cache is empty (turn 1), inject nothing — the prompt is still valid without Honcho context on the first turn.</p>
-</section>
-
-<!-- SPEC: DYNAMIC REASONING LEVEL -->
-<section id="spec-reasoning">
-  <h2>Spec: dynamic reasoning level</h2>
-
-  <h3>Problem</h3>
-  <p>Honcho's dialectic endpoint supports reasoning levels from <code>minimal</code> to <code>max</code>. A fixed level per tool wastes budget on simple queries and under-serves complex ones.</p>
-
-  <h3>Pattern</h3>
-  <p>Select the reasoning level dynamically based on the user's message. Use the configured default as a floor. Bump by message length. Cap auto-selection at <code>high</code> — never select <code>max</code> automatically.</p>
-
-  <h3>Interface contract</h3>
-  <pre><code><span class="cm">// Shared helper — identical logic in any language</span>
-
-<span class="kw">const</span> LEVELS = [<span class="str">"minimal"</span>, <span class="str">"low"</span>, <span class="str">"medium"</span>, <span class="str">"high"</span>, <span class="str">"max"</span>];
-
-<span class="kw">function</span> <span class="key">dynamicReasoningLevel</span>(
-  query: <span class="str">string</span>,
-  configDefault: <span class="str">string</span> = <span class="str">"low"</span>
-): <span class="str">string</span> {
-  <span class="kw">const</span> baseIdx = Math.max(<span class="num">0</span>, LEVELS.indexOf(configDefault));
-  <span class="kw">const</span> n = query.length;
-  <span class="kw">const</span> bump = n &lt; <span class="num">120</span> ? <span class="num">0</span> : n &lt; <span class="num">400</span> ? <span class="num">1</span> : <span class="num">2</span>;
-  <span class="kw">return</span> LEVELS[Math.min(baseIdx + bump, <span class="num">3</span>)]; <span class="cm">// cap at "high" (idx 3)</span>
-}</code></pre>
-
-  <h3>Config key</h3>
-  <p>Add a <code>dialecticReasoningLevel</code> config field (string, default <code>"low"</code>). This sets the floor. Users can raise or lower it. The dynamic bump always applies on top.</p>
-
-  <h3>openclaw-honcho adoption</h3>
-  <p>Apply in <code>honcho_recall</code> and <code>honcho_analyze</code>: replace the fixed <code>reasoningLevel</code> with the dynamic selector. <code>honcho_recall</code> should use floor <code>"minimal"</code> and <code>honcho_analyze</code> floor <code>"medium"</code> — both still bump with message length.</p>
-</section>
-
-<!-- SPEC: PER-PEER MEMORY MODES -->
-<section id="spec-modes">
-  <h2>Spec: per-peer memory modes</h2>
-
-  <h3>Problem</h3>
-  <p>Users want independent control over whether user context and agent context are written locally, to Honcho, or both. A single <code>memoryMode</code> shorthand is not granular enough.</p>
-
-  <h3>Pattern</h3>
-  <p>Three modes per peer: <code>hybrid</code> (write both local + Honcho), <code>honcho</code> (Honcho only, disable local files), <code>local</code> (local files only, skip Honcho sync for this peer). Two orthogonal axes: user peer and agent peer.</p>
-
-  <h3>Config schema</h3>
-  <pre><code><span class="cm">// ~/.openclaw/openclaw.json  (or ~/.nanobot/config.json)</span>
-{
-  <span class="str">"plugins"</span>: {
-    <span class="str">"openclaw-honcho"</span>: {
-      <span class="str">"config"</span>: {
-        <span class="str">"apiKey"</span>: <span class="str">"..."</span>,
-        <span class="str">"memoryMode"</span>: <span class="str">"hybrid"</span>,          <span class="cm">// shorthand: both peers</span>
-        <span class="str">"userMemoryMode"</span>: <span class="str">"honcho"</span>,       <span class="cm">// override for user peer</span>
-        <span class="str">"agentMemoryMode"</span>: <span class="str">"hybrid"</span>       <span class="cm">// override for agent peer</span>
-      }
-    }
-  }
-}</code></pre>
-
-  <h3>Resolution order</h3>
-  <ol>
-    <li>Per-peer field (<code>userMemoryMode</code> / <code>agentMemoryMode</code>) — wins if present.</li>
-    <li>Shorthand <code>memoryMode</code> — applies to both peers as default.</li>
-    <li>Hardcoded default: <code>"hybrid"</code>.</li>
-  </ol>
-
-  <h3>Effect on Honcho sync</h3>
-  <ul>
-    <li><code>userMemoryMode=local</code>: skip adding user peer messages to Honcho.</li>
-    <li><code>agentMemoryMode=local</code>: skip adding assistant peer messages to Honcho.</li>
-    <li>Both local: skip <code>session.addMessages()</code> entirely.</li>
-    <li><code>userMemoryMode=honcho</code>: disable local USER.md writes.</li>
-    <li><code>agentMemoryMode=honcho</code>: disable local MEMORY.md / SOUL.md writes.</li>
-  </ul>
-</section>
-
-<!-- SPEC: AI PEER IDENTITY -->
-<section id="spec-identity">
-  <h2>Spec: AI peer identity formation</h2>
-
-  <h3>Problem</h3>
-  <p>Honcho builds the user's representation organically by observing what the user says. The same mechanism exists for the AI peer — but only if <code>observe_me=True</code> is set for the agent peer. Without it, the agent peer accumulates nothing and Honcho's AI-side model never forms.</p>
-
-  <p>Additionally, existing persona files (SOUL.md, IDENTITY.md) should seed the AI peer's Honcho representation at first activation, rather than waiting for it to emerge from scratch.</p>
-
-  <h3>Part A: observe_me=True for agent peer</h3>
-  <pre><code><span class="cm">// TypeScript — in session.addPeers() call</span>
-<span class="kw">await</span> session.addPeers([
-  [ownerPeer.id, { observeMe: <span class="kw">true</span>,  observeOthers: <span class="kw">false</span> }],
-  [agentPeer.id, { observeMe: <span class="kw">true</span>,  observeOthers: <span class="kw">true</span>  }], <span class="cm">// was false</span>
-]);</code></pre>
-
-  <p>This is a one-line change but foundational. Without it, Honcho's AI peer representation stays empty regardless of what the agent says.</p>
-
-  <h3>Part B: seedAiIdentity()</h3>
-  <pre><code><span class="kw">async function</span> <span class="key">seedAiIdentity</span>(
-  session: HonchoSession,
-  agentPeer: Peer,
-  content: <span class="str">string</span>,
-  source: <span class="str">string</span>
-): Promise&lt;<span class="kw">boolean</span>&gt; {
-  <span class="kw">const</span> wrapped = [
-    <span class="str">`&lt;ai_identity_seed&gt;`</span>,
-    <span class="str">`&lt;source&gt;${source}&lt;/source&gt;`</span>,
-    <span class="str">``</span>,
-    content.trim(),
-    <span class="str">`&lt;/ai_identity_seed&gt;`</span>,
-  ].join(<span class="str">"\n"</span>);
-
-  <span class="kw">await</span> agentPeer.addMessage(<span class="str">"assistant"</span>, wrapped);
-  <span class="kw">return true</span>;
-}</code></pre>
-
-  <h3>Part C: migrate agent files at setup</h3>
-  <p>During <code>openclaw honcho setup</code>, upload agent-self files (SOUL.md, IDENTITY.md, AGENTS.md, BOOTSTRAP.md) to the agent peer using <code>seedAiIdentity()</code> instead of <code>session.uploadFile()</code>. This routes the content through Honcho's observation pipeline rather than the file store.</p>
-
-  <h3>Part D: AI peer name in identity</h3>
-  <p>When the agent has a configured name (non-default), inject it into the agent's self-identity prefix. In OpenClaw this means adding to the injected system prompt section:</p>
-  <pre><code><span class="cm">// In context hook return value</span>
-<span class="kw">return</span> {
-  systemPrompt: [
-    agentName ? <span class="str">`You are ${agentName}.`</span> : <span class="str">""</span>,
-    <span class="str">"## User Memory Context"</span>,
-    ...sections,
-  ].filter(Boolean).join(<span class="str">"\n\n"</span>)
-};</code></pre>
-
-  <h3>CLI surface: honcho identity subcommand</h3>
-  <pre><code>openclaw honcho identity &lt;file&gt;    <span class="cm"># seed from file</span>
-openclaw honcho identity --show    <span class="cm"># show current AI peer representation</span></code></pre>
-</section>
-
-<!-- SPEC: SESSION NAMING -->
-<section id="spec-sessions">
-  <h2>Spec: session naming strategies</h2>
-
-  <h3>Problem</h3>
-  <p>When Honcho is used across multiple projects or directories, a single global session means every project shares the same context. Per-directory sessions provide isolation without requiring users to name sessions manually.</p>
-
-  <h3>Strategies</h3>
-  <div class="table-wrap">
-    <table>
-      <thead><tr><th>Strategy</th><th>Session key</th><th>When to use</th></tr></thead>
-      <tbody>
-        <tr><td><code>per-directory</code></td><td>basename of CWD</td><td>Default. Each project gets its own session.</td></tr>
-        <tr><td><code>global</code></td><td>fixed string <code>"global"</code></td><td>Single cross-project session.</td></tr>
-        <tr><td>manual map</td><td>user-configured per path</td><td><code>sessions</code> config map overrides directory basename.</td></tr>
-        <tr><td>title-based</td><td>sanitized session title</td><td>When agent supports named sessions; title set mid-conversation.</td></tr>
-      </tbody>
-    </table>
-  </div>
-
-  <h3>Config schema</h3>
-  <pre><code>{
-  <span class="str">"sessionStrategy"</span>: <span class="str">"per-directory"</span>,   <span class="cm">// "per-directory" | "global"</span>
-  <span class="str">"sessionPeerPrefix"</span>: <span class="kw">false</span>,            <span class="cm">// prepend peer name to session key</span>
-  <span class="str">"sessions"</span>: {                            <span class="cm">// manual overrides</span>
-    <span class="str">"/home/user/projects/foo"</span>: <span class="str">"foo-project"</span>
-  }
-}</code></pre>
-
-  <h3>CLI surface</h3>
-  <pre><code>openclaw honcho sessions              <span class="cm"># list all mappings</span>
-openclaw honcho map &lt;name&gt;           <span class="cm"># map cwd to session name</span>
-openclaw honcho map                   <span class="cm"># no-arg = list mappings</span></code></pre>
-
-  <p>Resolution order: manual map wins &rarr; session title &rarr; directory basename &rarr; platform key.</p>
-</section>
-
-<!-- SPEC: CLI SURFACE INJECTION -->
-<section id="spec-cli">
-  <h2>Spec: CLI surface injection</h2>
-
-  <h3>Problem</h3>
-  <p>When a user asks "how do I change my memory settings?" or "what Honcho commands are available?" the agent either hallucinates or says it doesn't know. The agent should know its own management interface.</p>
-
-  <h3>Pattern</h3>
-  <p>When Honcho is active, append a compact command reference to the system prompt. The agent can cite these commands directly instead of guessing.</p>
-
-  <pre><code><span class="cm">// In context hook, append to systemPrompt</span>
-<span class="kw">const</span> honchoSection = [
-  <span class="str">"# Honcho memory integration"</span>,
-  <span class="str">`Active. Session: ${sessionKey}. Mode: ${mode}.`</span>,
-  <span class="str">"Management commands:"</span>,
-  <span class="str">"  openclaw honcho status                    — show config + connection"</span>,
-  <span class="str">"  openclaw honcho mode [hybrid|honcho|local] — show or set memory mode"</span>,
-  <span class="str">"  openclaw honcho sessions                  — list session mappings"</span>,
-  <span class="str">"  openclaw honcho map &lt;name&gt;                — map directory to session"</span>,
-  <span class="str">"  openclaw honcho identity [file] [--show]  — seed or show AI identity"</span>,
-  <span class="str">"  openclaw honcho setup                     — full interactive wizard"</span>,
-].join(<span class="str">"\n"</span>);</code></pre>
-
-  <div class="callout warn">
-    <strong>Keep it compact.</strong> This section is injected every turn. Keep it under 300 chars of context. List commands, not explanations — the agent can explain them on request.
-  </div>
-</section>
-
-<!-- OPENCLAW CHECKLIST -->
-<section id="openclaw-checklist">
-  <h2>openclaw-honcho checklist</h2>
-
-  <p>Ordered by impact. Each item maps to a spec section above.</p>
-
-  <ul class="checklist">
-    <li class="todo"><strong>Async prefetch</strong> — move <code>session.context()</code> out of <code>before_prompt_build</code> into post-<code>agent_end</code> background Promise. Pop from cache at prompt build. (<a href="#spec-async">spec</a>)</li>
-    <li class="todo"><strong>observe_me=True for agent peer</strong> — one-line change in <code>session.addPeers()</code> config for agent peer. (<a href="#spec-identity">spec</a>)</li>
-    <li class="todo"><strong>Dynamic reasoning level</strong> — add <code>dynamicReasoningLevel()</code> helper; apply in <code>honcho_recall</code> and <code>honcho_analyze</code>. Add <code>dialecticReasoningLevel</code> to config schema. (<a href="#spec-reasoning">spec</a>)</li>
-    <li class="todo"><strong>Per-peer memory modes</strong> — add <code>userMemoryMode</code> / <code>agentMemoryMode</code> to config; gate Honcho sync and local writes accordingly. (<a href="#spec-modes">spec</a>)</li>
-    <li class="todo"><strong>seedAiIdentity()</strong> — add helper; apply during setup migration for SOUL.md / IDENTITY.md instead of <code>session.uploadFile()</code>. (<a href="#spec-identity">spec</a>)</li>
-    <li class="todo"><strong>Session naming strategies</strong> — add <code>sessionStrategy</code>, <code>sessions</code> map, <code>sessionPeerPrefix</code> to config; implement resolution function. (<a href="#spec-sessions">spec</a>)</li>
-    <li class="todo"><strong>CLI surface injection</strong> — append command reference to <code>before_prompt_build</code> return value when Honcho is active. (<a href="#spec-cli">spec</a>)</li>
-    <li class="todo"><strong>honcho identity subcommand</strong> — add <code>openclaw honcho identity</code> CLI command. (<a href="#spec-identity">spec</a>)</li>
-    <li class="todo"><strong>AI peer name injection</strong> — if <code>aiPeer</code> name configured, prepend to injected system prompt. (<a href="#spec-identity">spec</a>)</li>
-    <li class="todo"><strong>honcho mode / honcho sessions / honcho map</strong> — CLI parity with Hermes. (<a href="#spec-sessions">spec</a>)</li>
-  </ul>
-
-  <div class="callout success">
-    <strong>Already done in openclaw-honcho (do not re-implement):</strong> lastSavedIndex dedup, platform metadata stripping, multi-agent parent observer hierarchy, peerPerspective on context(), tiered tool surface (fast/LLM), workspace agentPeerMap, QMD passthrough, self-hosted Honcho support.
-  </div>
-</section>
-
-<!-- NANOBOT CHECKLIST -->
-<section id="nanobot-checklist">
-  <h2>nanobot-honcho checklist</h2>
-
-  <p>nanobot-honcho is a greenfield integration. Start from openclaw-honcho's architecture (hook-based, dual peer) and apply all Hermes patterns from day one rather than retrofitting. Priority order:</p>
-
-  <h3>Phase 1 — core correctness</h3>
-  <ul class="checklist">
-    <li class="todo">Dual peer model (owner + agent peer), both with <code>observe_me=True</code></li>
-    <li class="todo">Message capture at turn end with <code>lastSavedIndex</code> dedup</li>
-    <li class="todo">Platform metadata stripping before Honcho storage</li>
-    <li class="todo">Async prefetch from day one — do not implement blocking context injection</li>
-    <li class="todo">Legacy file migration at first activation (USER.md → owner peer, SOUL.md → <code>seedAiIdentity()</code>)</li>
-  </ul>
-
-  <h3>Phase 2 — configuration</h3>
-  <ul class="checklist">
-    <li class="todo">Config schema: <code>apiKey</code>, <code>workspaceId</code>, <code>baseUrl</code>, <code>memoryMode</code>, <code>userMemoryMode</code>, <code>agentMemoryMode</code>, <code>dialecticReasoningLevel</code>, <code>sessionStrategy</code>, <code>sessions</code></li>
-    <li class="todo">Per-peer memory mode gating</li>
-    <li class="todo">Dynamic reasoning level</li>
-    <li class="todo">Session naming strategies</li>
-  </ul>
-
-  <h3>Phase 3 — tools and CLI</h3>
-  <ul class="checklist">
-    <li class="todo">Tool surface: <code>honcho_profile</code>, <code>honcho_recall</code>, <code>honcho_analyze</code>, <code>honcho_search</code>, <code>honcho_context</code></li>
-    <li class="todo">CLI: <code>setup</code>, <code>status</code>, <code>sessions</code>, <code>map</code>, <code>mode</code>, <code>identity</code></li>
-    <li class="todo">CLI surface injection into system prompt</li>
-    <li class="todo">AI peer name wired into agent identity</li>
-  </ul>
-</section>
-
-</div>
-
-<script type="module">
-  import mermaid from 'https://cdn.jsdelivr.net/npm/mermaid@11/dist/mermaid.esm.min.mjs';
-  mermaid.initialize({ startOnLoad: true, securityLevel: 'loose', fontFamily: 'Departure Mono, Noto Emoji, monospace' });
-</script>
-<script>
-  window.addEventListener('scroll', () => {
-    const bar = document.getElementById('progress');
-    const max = document.documentElement.scrollHeight - window.innerHeight;
-    bar.style.width = (max > 0 ? (window.scrollY / max) * 100 : 0) + '%';
-  });
-</script>
-</body>
-</html>
diff --git a/docs/honcho-integration-spec.md b/docs/honcho-integration-spec.md
deleted file mode 100644
index 7731a262d9..0000000000
--- a/docs/honcho-integration-spec.md
+++ /dev/null
@@ -1,377 +0,0 @@
-# honcho-integration-spec
-
-Comparison of Hermes Agent vs. openclaw-honcho — and a porting spec for bringing Hermes patterns into other Honcho integrations.
-
----
-
-## Overview
-
-Two independent Honcho integrations have been built for two different agent runtimes: **Hermes Agent** (Python, baked into the runner) and **openclaw-honcho** (TypeScript plugin via hook/tool API). Both use the same Honcho peer paradigm — dual peer model, `session.context()`, `peer.chat()` — but they made different tradeoffs at every layer.
-
-This document maps those tradeoffs and defines a porting spec: a set of Hermes-originated patterns, each stated as an integration-agnostic interface, that any Honcho integration can adopt regardless of runtime or language.
-
-> **Scope** Both integrations work correctly today. This spec is about the delta — patterns in Hermes that are worth propagating and patterns in openclaw-honcho that Hermes should eventually adopt. The spec is additive, not prescriptive.
-
----
-
-## Architecture comparison
-
-### Hermes: baked-in runner
-
-Honcho is initialised directly inside `AIAgent.__init__`. There is no plugin boundary. Session management, context injection, async prefetch, and CLI surface are all first-class concerns of the runner. Context is injected once per session (baked into `_cached_system_prompt`) and never re-fetched mid-session — this maximises prefix cache hits at the LLM provider.
-
-Turn flow:
-
-```
-user message
-  → _honcho_prefetch()       (reads cache — no HTTP)
-  → _build_system_prompt()   (first turn only, cached)
-  → LLM call
-  → response
-  → _honcho_fire_prefetch()  (daemon threads, turn end)
-       → prefetch_context() thread  ──┐
-       → prefetch_dialectic() thread ─┴→ _context_cache / _dialectic_cache
-```
-
-### openclaw-honcho: hook-based plugin
-
-The plugin registers hooks against OpenClaw's event bus. Context is fetched synchronously inside `before_prompt_build` on every turn. Message capture happens in `agent_end`. The multi-agent hierarchy is tracked via `subagent_spawned`. This model is correct but every turn pays a blocking Honcho round-trip before the LLM call can begin.
-
-Turn flow:
-
-```
-user message
-  → before_prompt_build (BLOCKING HTTP — every turn)
-       → session.context()
-  → system prompt assembled
-  → LLM call
-  → response
-  → agent_end hook
-       → session.addMessages()
-       → session.setMetadata()
-```
-
----
-
-## Diff table
-
-| Dimension | Hermes Agent | openclaw-honcho |
-|---|---|---|
-| **Context injection timing** | Once per session (cached). Zero HTTP on response path after turn 1. | Every turn, blocking. Fresh context per turn but adds latency. |
-| **Prefetch strategy** | Daemon threads fire at turn end; consumed next turn from cache. | None. Blocking call at prompt-build time. |
-| **Dialectic (peer.chat)** | Prefetched async; result injected into system prompt next turn. | On-demand via `honcho_recall` / `honcho_analyze` tools. |
-| **Reasoning level** | Dynamic: scales with message length. Floor = config default. Cap = "high". | Fixed per tool: recall=minimal, analyze=medium. |
-| **Memory modes** | `user_memory_mode` / `agent_memory_mode`: hybrid / honcho / local. | None. Always writes to Honcho. |
-| **Write frequency** | async (background queue), turn, session, N turns. | After every agent_end (no control). |
-| **AI peer identity** | `observe_me=True`, `seed_ai_identity()`, `get_ai_representation()`, SOUL.md → AI peer. | Agent files uploaded to agent peer at setup. No ongoing self-observation. |
-| **Context scope** | User peer + AI peer representation, both injected. | User peer (owner) representation + conversation summary. `peerPerspective` on context call. |
-| **Session naming** | per-directory / global / manual map / title-based. | Derived from platform session key. |
-| **Multi-agent** | Single-agent only. | Parent observer hierarchy via `subagent_spawned`. |
-| **Tool surface** | Single `query_user_context` tool (on-demand dialectic). | 6 tools: session, profile, search, context (fast) + recall, analyze (LLM). |
-| **Platform metadata** | Not stripped. | Explicitly stripped before Honcho storage. |
-| **Message dedup** | None. | `lastSavedIndex` in session metadata prevents re-sending. |
-| **CLI surface in prompt** | Management commands injected into system prompt. Agent knows its own CLI. | Not injected. |
-| **AI peer name in identity** | Replaces "Hermes Agent" in DEFAULT_AGENT_IDENTITY when configured. | Not implemented. |
-| **QMD / local file search** | Not implemented. | Passthrough tools when QMD backend configured. |
-| **Workspace metadata** | Not implemented. | `agentPeerMap` in workspace metadata tracks agent→peer ID. |
-
----
-
-## Patterns
-
-Six patterns from Hermes are worth adopting in any Honcho integration. Each is described as an integration-agnostic interface.
-
-**Hermes contributes:**
-- Async prefetch (zero-latency)
-- Dynamic reasoning level
-- Per-peer memory modes
-- AI peer identity formation
-- Session naming strategies
-- CLI surface injection
-
-**openclaw-honcho contributes back (Hermes should adopt):**
-- `lastSavedIndex` dedup
-- Platform metadata stripping
-- Multi-agent observer hierarchy
-- `peerPerspective` on `context()`
-- Tiered tool surface (fast/LLM)
-- Workspace `agentPeerMap`
-
----
-
-## Spec: async prefetch
-
-### Problem
-
-Calling `session.context()` and `peer.chat()` synchronously before each LLM call adds 200–800ms of Honcho round-trip latency to every turn.
-
-### Pattern
-
-Fire both calls as non-blocking background work at the **end** of each turn. Store results in a per-session cache keyed by session ID. At the **start** of the next turn, pop from cache — the HTTP is already done. First turn is cold (empty cache); all subsequent turns are zero-latency on the response path.
-
-### Interface contract
-
-```typescript
-interface AsyncPrefetch {
-  // Fire context + dialectic fetches at turn end. Non-blocking.
-  firePrefetch(sessionId: string, userMessage: string): void;
-
-  // Pop cached results at turn start. Returns empty if cache is cold.
-  popContextResult(sessionId: string): ContextResult | null;
-  popDialecticResult(sessionId: string): string | null;
-}
-
-type ContextResult = {
-  representation: string;
-  card: string[];
-  aiRepresentation?: string;  // AI peer context if enabled
-  summary?: string;           // conversation summary if fetched
-};
-```
-
-### Implementation notes
-
-- **Python:** `threading.Thread(daemon=True)`. Write to `dict[session_id, result]` — GIL makes this safe for simple writes.
-- **TypeScript:** `Promise` stored in `Map<string, Promise<ContextResult>>`. Await at pop time. If not resolved yet, return null — do not block.
-- The pop is destructive: clears the cache entry after reading so stale data never accumulates.
-- Prefetch should also fire on first turn (even though it won't be consumed until turn 2).
-
-### openclaw-honcho adoption
-
-Move `session.context()` from `before_prompt_build` to a post-`agent_end` background task. Store result in `state.contextCache`. In `before_prompt_build`, read from cache instead of calling Honcho. If cache is empty (turn 1), inject nothing — the prompt is still valid without Honcho context on the first turn.
-
----
-
-## Spec: dynamic reasoning level
-
-### Problem
-
-Honcho's dialectic endpoint supports reasoning levels from `minimal` to `max`. A fixed level per tool wastes budget on simple queries and under-serves complex ones.
-
-### Pattern
-
-Select the reasoning level dynamically based on the user's message. Use the configured default as a floor. Bump by message length. Cap auto-selection at `high` — never select `max` automatically.
-
-### Logic
-
-```
-< 120 chars  → default (typically "low")
-120–400 chars → one level above default (cap at "high")
-> 400 chars  → two levels above default (cap at "high")
-```
-
-### Config key
-
-Add `dialecticReasoningLevel` (string, default `"low"`). This sets the floor. The dynamic bump always applies on top.
-
-### openclaw-honcho adoption
-
-Apply in `honcho_recall` and `honcho_analyze`: replace fixed `reasoningLevel` with the dynamic selector. `honcho_recall` uses floor `"minimal"`, `honcho_analyze` uses floor `"medium"` — both still bump with message length.
-
----
-
-## Spec: per-peer memory modes
-
-### Problem
-
-Users want independent control over whether user context and agent context are written locally, to Honcho, or both.
-
-### Modes
-
-| Mode | Effect |
-|---|---|
-| `hybrid` | Write to both local files and Honcho (default) |
-| `honcho` | Honcho only — disable corresponding local file writes |
-| `local` | Local files only — skip Honcho sync for this peer |
-
-### Config schema
-
-```json
-{
-  "memoryMode": "hybrid",
-  "userMemoryMode": "honcho",
-  "agentMemoryMode": "hybrid"
-}
-```
-
-Resolution order: per-peer field wins → shorthand `memoryMode` → default `"hybrid"`.
-
-### Effect on Honcho sync
-
-- `userMemoryMode=local`: skip adding user peer messages to Honcho
-- `agentMemoryMode=local`: skip adding assistant peer messages to Honcho
-- Both local: skip `session.addMessages()` entirely
-- `userMemoryMode=honcho`: disable local USER.md writes
-- `agentMemoryMode=honcho`: disable local MEMORY.md / SOUL.md writes
-
----
-
-## Spec: AI peer identity formation
-
-### Problem
-
-Honcho builds the user's representation organically by observing what the user says. The same mechanism exists for the AI peer — but only if `observe_me=True` is set for the agent peer. Without it, the agent peer accumulates nothing.
-
-Additionally, existing persona files (SOUL.md, IDENTITY.md) should seed the AI peer's Honcho representation at first activation.
-
-### Part A: observe_me=True for agent peer
-
-```typescript
-await session.addPeers([
-  [ownerPeer.id, { observeMe: true,  observeOthers: false }],
-  [agentPeer.id, { observeMe: true,  observeOthers: true  }], // was false
-]);
-```
-
-One-line change. Foundational. Without it, the AI peer representation stays empty regardless of what the agent says.
-
-### Part B: seedAiIdentity()
-
-```typescript
-async function seedAiIdentity(
-  agentPeer: Peer,
-  content: string,
-  source: string
-): Promise<boolean> {
-  const wrapped = [
-    `<ai_identity_seed>`,
-    `<source>${source}</source>`,
-    ``,
-    content.trim(),
-    `</ai_identity_seed>`,
-  ].join("\n");
-
-  await agentPeer.addMessage("assistant", wrapped);
-  return true;
-}
-```
-
-### Part C: migrate agent files at setup
-
-During `honcho setup`, upload agent-self files (SOUL.md, IDENTITY.md, AGENTS.md) to the agent peer via `seedAiIdentity()` instead of `session.uploadFile()`. This routes content through Honcho's observation pipeline.
-
-### Part D: AI peer name in identity
-
-When the agent has a configured name, prepend it to the injected system prompt:
-
-```typescript
-const namePrefix = agentName ? `You are ${agentName}.\n\n` : "";
-return { systemPrompt: namePrefix + "## User Memory Context\n\n" + sections };
-```
-
-### CLI surface
-
-```
-honcho identity <file>    # seed from file
-honcho identity --show    # show current AI peer representation
-```
-
----
-
-## Spec: session naming strategies
-
-### Problem
-
-A single global session means every project shares the same Honcho context. Per-directory sessions provide isolation without requiring users to name sessions manually.
-
-### Strategies
-
-| Strategy | Session key | When to use |
-|---|---|---|
-| `per-directory` | basename of CWD | Default. Each project gets its own session. |
-| `global` | fixed string `"global"` | Single cross-project session. |
-| manual map | user-configured per path | `sessions` config map overrides directory basename. |
-| title-based | sanitized session title | When agent supports named sessions set mid-conversation. |
-
-### Config schema
-
-```json
-{
-  "sessionStrategy": "per-directory",
-  "sessionPeerPrefix": false,
-  "sessions": {
-    "/home/user/projects/foo": "foo-project"
-  }
-}
-```
-
-### CLI surface
-
-```
-honcho sessions              # list all mappings
-honcho map <name>            # map cwd to session name
-honcho map                   # no-arg = list mappings
-```
-
-Resolution order: manual map → session title → directory basename → platform key.
-
----
-
-## Spec: CLI surface injection
-
-### Problem
-
-When a user asks "how do I change my memory settings?" the agent either hallucinates or says it doesn't know. The agent should know its own management interface.
-
-### Pattern
-
-When Honcho is active, append a compact command reference to the system prompt. Keep it under 300 chars.
-
-```
-# Honcho memory integration
-Active. Session: {sessionKey}. Mode: {mode}.
-Management commands:
-  honcho status                    — show config + connection
-  honcho mode [hybrid|honcho|local] — show or set memory mode
-  honcho sessions                  — list session mappings
-  honcho map <name>                — map directory to session
-  honcho identity [file] [--show]  — seed or show AI identity
-  honcho setup                     — full interactive wizard
-```
-
----
-
-## openclaw-honcho checklist
-
-Ordered by impact:
-
-- [ ] **Async prefetch** — move `session.context()` out of `before_prompt_build` into post-`agent_end` background Promise
-- [ ] **observe_me=True for agent peer** — one-line change in `session.addPeers()`
-- [ ] **Dynamic reasoning level** — add helper; apply in `honcho_recall` and `honcho_analyze`; add `dialecticReasoningLevel` to config
-- [ ] **Per-peer memory modes** — add `userMemoryMode` / `agentMemoryMode` to config; gate Honcho sync and local writes
-- [ ] **seedAiIdentity()** — add helper; use during setup migration for SOUL.md / IDENTITY.md
-- [ ] **Session naming strategies** — add `sessionStrategy`, `sessions` map, `sessionPeerPrefix`
-- [ ] **CLI surface injection** — append command reference to `before_prompt_build` return value
-- [ ] **honcho identity subcommand** — seed from file or `--show` current representation
-- [ ] **AI peer name injection** — if `aiPeer` name configured, prepend to injected system prompt
-- [ ] **honcho mode / sessions / map** — CLI parity with Hermes
-
-Already done in openclaw-honcho (do not re-implement): `lastSavedIndex` dedup, platform metadata stripping, multi-agent parent observer, `peerPerspective` on `context()`, tiered tool surface, workspace `agentPeerMap`, QMD passthrough, self-hosted Honcho.
-
----
-
-## nanobot-honcho checklist
-
-Greenfield integration. Start from openclaw-honcho's architecture and apply all Hermes patterns from day one.
-
-### Phase 1 — core correctness
-
-- [ ] Dual peer model (owner + agent peer), both with `observe_me=True`
-- [ ] Message capture at turn end with `lastSavedIndex` dedup
-- [ ] Platform metadata stripping before Honcho storage
-- [ ] Async prefetch from day one — do not implement blocking context injection
-- [ ] Legacy file migration at first activation (USER.md → owner peer, SOUL.md → `seedAiIdentity()`)
-
-### Phase 2 — configuration
-
-- [ ] Config schema: `apiKey`, `workspaceId`, `baseUrl`, `memoryMode`, `userMemoryMode`, `agentMemoryMode`, `dialecticReasoningLevel`, `sessionStrategy`, `sessions`
-- [ ] Per-peer memory mode gating
-- [ ] Dynamic reasoning level
-- [ ] Session naming strategies
-
-### Phase 3 — tools and CLI
-
-- [ ] Tool surface: `honcho_profile`, `honcho_recall`, `honcho_analyze`, `honcho_search`, `honcho_context`
-- [ ] CLI: `setup`, `status`, `sessions`, `map`, `mode`, `identity`
-- [ ] CLI surface injection into system prompt
-- [ ] AI peer name wired into agent identity
diff --git a/docs/migration/openclaw.md b/docs/migration/openclaw.md
deleted file mode 100644
index 30f2f97e4d..0000000000
--- a/docs/migration/openclaw.md
+++ /dev/null
@@ -1,142 +0,0 @@
-# Migrating from OpenClaw to Hermes Agent
-
-This guide covers how to import your OpenClaw settings, memories, skills, and API keys into Hermes Agent.
-
-## Three Ways to Migrate
-
-### 1. Automatic (during first-time setup)
-
-When you run `hermes setup` for the first time and Hermes detects `~/.openclaw`, it automatically offers to import your OpenClaw data before configuration begins. Just accept the prompt and everything is handled for you.
-
-### 2. CLI Command (quick, scriptable)
-
-```bash
-hermes claw migrate                      # Preview then migrate (always shows preview first)
-hermes claw migrate --dry-run            # Preview only, no changes
-hermes claw migrate --preset user-data   # Migrate without API keys/secrets
-hermes claw migrate --yes                # Skip confirmation prompt
-```
-
-The migration always shows a full preview of what will be imported before making any changes. You review the preview and confirm before anything is written.
-
-**All options:**
-
-| Flag | Description |
-|------|-------------|
-| `--source PATH` | Path to OpenClaw directory (default: `~/.openclaw`) |
-| `--dry-run` | Preview only — no files are modified |
-| `--preset {user-data,full}` | Migration preset (default: `full`). `user-data` excludes secrets |
-| `--overwrite` | Overwrite existing files (default: skip conflicts) |
-| `--migrate-secrets` | Include allowlisted secrets (auto-enabled with `full` preset) |
-| `--workspace-target PATH` | Copy workspace instructions (AGENTS.md) to this absolute path |
-| `--skill-conflict {skip,overwrite,rename}` | How to handle skill name conflicts (default: `skip`) |
-| `--yes`, `-y` | Skip confirmation prompts |
-
-### 3. Agent-Guided (interactive, with previews)
-
-Ask the agent to run the migration for you:
-
-```
-> Migrate my OpenClaw setup to Hermes
-```
-
-The agent will use the `openclaw-migration` skill to:
-1. Run a preview first to show what would change
-2. Ask about conflict resolution (SOUL.md, skills, etc.)
-3. Let you choose between `user-data` and `full` presets
-4. Execute the migration with your choices
-5. Print a detailed summary of what was migrated
-
-## What Gets Migrated
-
-### `user-data` preset
-| Item | Source | Destination |
-|------|--------|-------------|
-| SOUL.md | `~/.openclaw/workspace/SOUL.md` | `~/.hermes/SOUL.md` |
-| Memory entries | `~/.openclaw/workspace/MEMORY.md` | `~/.hermes/memories/MEMORY.md` |
-| User profile | `~/.openclaw/workspace/USER.md` | `~/.hermes/memories/USER.md` |
-| Skills | `~/.openclaw/workspace/skills/` | `~/.hermes/skills/openclaw-imports/` |
-| Command allowlist | `~/.openclaw/workspace/exec_approval_patterns.yaml` | Merged into `~/.hermes/config.yaml` |
-| Messaging settings | `~/.openclaw/config.yaml` (TELEGRAM_ALLOWED_USERS, MESSAGING_CWD) | `~/.hermes/.env` |
-| TTS assets | `~/.openclaw/workspace/tts/` | `~/.hermes/tts/` |
-
-Workspace files are also checked at `workspace.default/` and `workspace-main/` as fallback paths (OpenClaw renamed `workspace/` to `workspace-main/` in recent versions).
-
-### `full` preset (adds to `user-data`)
-| Item | Source | Destination |
-|------|--------|-------------|
-| Telegram bot token | `openclaw.json` channels config | `~/.hermes/.env` |
-| OpenRouter API key | `.env`, `openclaw.json`, or `openclaw.json["env"]` | `~/.hermes/.env` |
-| OpenAI API key | `.env`, `openclaw.json`, or `openclaw.json["env"]` | `~/.hermes/.env` |
-| Anthropic API key | `.env`, `openclaw.json`, or `openclaw.json["env"]` | `~/.hermes/.env` |
-| ElevenLabs API key | `.env`, `openclaw.json`, or `openclaw.json["env"]` | `~/.hermes/.env` |
-
-API keys are searched across four sources: inline config values, `~/.openclaw/.env`, the `openclaw.json` `"env"` sub-object, and per-agent auth profiles.
-
-Only allowlisted secrets are ever imported. Other credentials are skipped and reported.
-
-## OpenClaw Schema Compatibility
-
-The migration handles both old and current OpenClaw config layouts:
-
-- **Channel tokens**: Reads from flat paths (`channels.telegram.botToken`) and the newer `accounts.default` layout (`channels.telegram.accounts.default.botToken`)
-- **TTS provider**: OpenClaw renamed "edge" to "microsoft" — both are recognized and mapped to Hermes' "edge"
-- **Provider API types**: Both short (`openai`, `anthropic`) and hyphenated (`openai-completions`, `anthropic-messages`, `google-generative-ai`) values are mapped correctly
-- **thinkingDefault**: All enum values are handled including newer ones (`minimal`, `xhigh`, `adaptive`)
-- **Matrix**: Uses `accessToken` field (not `botToken`)
-- **SecretRef formats**: Plain strings, env templates (`${VAR}`), and `source: "env"` SecretRefs are resolved. `source: "file"` and `source: "exec"` SecretRefs produce a warning — add those keys manually after migration.
-
-## Conflict Handling
-
-By default, the migration **will not overwrite** existing Hermes data:
-
-- **SOUL.md** — skipped if one already exists in `~/.hermes/`
-- **Memory entries** — skipped if memories already exist (to avoid duplicates)
-- **Skills** — skipped if a skill with the same name already exists
-- **API keys** — skipped if the key is already set in `~/.hermes/.env`
-
-To overwrite conflicts, use `--overwrite`. The migration creates backups before overwriting.
-
-For skills, you can also use `--skill-conflict rename` to import conflicting skills under a new name (e.g., `skill-name-imported`).
-
-## Migration Report
-
-Every migration produces a report showing:
-- **Migrated items** — what was successfully imported
-- **Conflicts** — items skipped because they already exist
-- **Skipped items** — items not found in the source
-- **Errors** — items that failed to import
-
-For executed migrations, the full report is saved to `~/.hermes/migration/openclaw/<timestamp>/`.
-
-## Post-Migration Notes
-
-- **Skills require a new session** — imported skills take effect after restarting your agent or starting a new chat.
-- **WhatsApp requires re-pairing** — WhatsApp uses QR-code pairing, not token-based auth. Run `hermes whatsapp` to pair.
-- **Archive cleanup** — after migration, you'll be offered to rename `~/.openclaw/` to `.openclaw.pre-migration/` to prevent state confusion. You can also run `hermes claw cleanup` later.
-
-## Troubleshooting
-
-### "OpenClaw directory not found"
-The migration looks for `~/.openclaw` by default, then tries `~/.clawdbot` and `~/.moltbot`. If your OpenClaw is installed elsewhere, use `--source`:
-```bash
-hermes claw migrate --source /path/to/.openclaw
-```
-
-### "Migration script not found"
-The migration script ships with Hermes Agent. If you installed via pip (not git clone), the `optional-skills/` directory may not be present. Install the skill from the Skills Hub:
-```bash
-hermes skills install openclaw-migration
-```
-
-### Memory overflow
-If your OpenClaw MEMORY.md or USER.md exceeds Hermes' character limits, excess entries are exported to an overflow file in the migration report directory. You can manually review and add the most important ones.
-
-### API keys not found
-Keys might be stored in different places depending on your OpenClaw setup:
-- `~/.openclaw/.env` file
-- Inline in `openclaw.json` under `models.providers.*.apiKey`
-- In `openclaw.json` under the `"env"` or `"env.vars"` sub-objects
-- In `~/.openclaw/agents/main/agent/auth-profiles.json`
-
-The migration checks all four. If keys use `source: "file"` or `source: "exec"` SecretRefs, they can't be resolved automatically — add them via `hermes config set`.
diff --git a/docs/plans/2026-03-16-pricing-accuracy-architecture-design.md b/docs/plans/2026-03-16-pricing-accuracy-architecture-design.md
deleted file mode 100644
index a75f14ff5a..0000000000
--- a/docs/plans/2026-03-16-pricing-accuracy-architecture-design.md
+++ /dev/null
@@ -1,608 +0,0 @@
-# Pricing Accuracy Architecture
-
-Date: 2026-03-16
-
-## Goal
-
-Hermes should only show dollar costs when they are backed by an official source for the user's actual billing path.
-
-This design replaces the current static, heuristic pricing flow in:
-
-- `run_agent.py`
-- `agent/usage_pricing.py`
-- `agent/insights.py`
-- `cli.py`
-
-with a provider-aware pricing system that:
-
-- handles cache billing correctly
-- distinguishes `actual` vs `estimated` vs `included` vs `unknown`
-- reconciles post-hoc costs when providers expose authoritative billing data
-- supports direct providers, OpenRouter, subscriptions, enterprise pricing, and custom endpoints
-
-## Problems In The Current Design
-
-Current Hermes behavior has four structural issues:
-
-1. It stores only `prompt_tokens` and `completion_tokens`, which is insufficient for providers that bill cache reads and cache writes separately.
-2. It uses a static model price table and fuzzy heuristics, which can drift from current official pricing.
-3. It assumes public API list pricing matches the user's real billing path.
-4. It has no distinction between live estimates and reconciled billed cost.
-
-## Design Principles
-
-1. Normalize usage before pricing.
-2. Never fold cached tokens into plain input cost.
-3. Track certainty explicitly.
-4. Treat the billing path as part of the model identity.
-5. Prefer official machine-readable sources over scraped docs.
-6. Use post-hoc provider cost APIs when available.
-7. Show `n/a` rather than inventing precision.
-
-## High-Level Architecture
-
-The new system has four layers:
-
-1. `usage_normalization`
-   Converts raw provider usage into a canonical usage record.
-2. `pricing_source_resolution`
-   Determines the billing path, source of truth, and applicable pricing source.
-3. `cost_estimation_and_reconciliation`
-   Produces an immediate estimate when possible, then replaces or annotates it with actual billed cost later.
-4. `presentation`
-   `/usage`, `/insights`, and the status bar display cost with certainty metadata.
-
-## Canonical Usage Record
-
-Add a canonical usage model that every provider path maps into before any pricing math happens.
-
-Suggested structure:
-
-```python
-@dataclass
-class CanonicalUsage:
-    provider: str
-    billing_provider: str
-    model: str
-    billing_route: str
-
-    input_tokens: int = 0
-    output_tokens: int = 0
-    cache_read_tokens: int = 0
-    cache_write_tokens: int = 0
-    reasoning_tokens: int = 0
-    request_count: int = 1
-
-    raw_usage: dict[str, Any] | None = None
-    raw_usage_fields: dict[str, str] | None = None
-    computed_fields: set[str] | None = None
-
-    provider_request_id: str | None = None
-    provider_generation_id: str | None = None
-    provider_response_id: str | None = None
-```
-
-Rules:
-
-- `input_tokens` means non-cached input only.
-- `cache_read_tokens` and `cache_write_tokens` are never merged into `input_tokens`.
-- `output_tokens` excludes cache metrics.
-- `reasoning_tokens` is telemetry unless a provider officially bills it separately.
-
-This is the same normalization pattern used by `opencode`, extended with provenance and reconciliation ids.
-
-## Provider Normalization Rules
-
-### OpenAI Direct
-
-Source usage fields:
-
-- `prompt_tokens`
-- `completion_tokens`
-- `prompt_tokens_details.cached_tokens`
-
-Normalization:
-
-- `cache_read_tokens = cached_tokens`
-- `input_tokens = prompt_tokens - cached_tokens`
-- `cache_write_tokens = 0` unless OpenAI exposes it in the relevant route
-- `output_tokens = completion_tokens`
-
-### Anthropic Direct
-
-Source usage fields:
-
-- `input_tokens`
-- `output_tokens`
-- `cache_read_input_tokens`
-- `cache_creation_input_tokens`
-
-Normalization:
-
-- `input_tokens = input_tokens`
-- `output_tokens = output_tokens`
-- `cache_read_tokens = cache_read_input_tokens`
-- `cache_write_tokens = cache_creation_input_tokens`
-
-### OpenRouter
-
-Estimate-time usage normalization should use the response usage payload with the same rules as the underlying provider when possible.
-
-Reconciliation-time records should also store:
-
-- OpenRouter generation id
-- native token fields when available
-- `total_cost`
-- `cache_discount`
-- `upstream_inference_cost`
-- `is_byok`
-
-### Gemini / Vertex
-
-Use official Gemini or Vertex usage fields where available.
-
-If cached content tokens are exposed:
-
-- map them to `cache_read_tokens`
-
-If a route exposes no cache creation metric:
-
-- store `cache_write_tokens = 0`
-- preserve the raw usage payload for later extension
-
-### DeepSeek And Other Direct Providers
-
-Normalize only the fields that are officially exposed.
-
-If a provider does not expose cache buckets:
-
-- do not infer them unless the provider explicitly documents how to derive them
-
-### Subscription / Included-Cost Routes
-
-These still use the canonical usage model.
-
-Tokens are tracked normally. Cost depends on billing mode, not on whether usage exists.
-
-## Billing Route Model
-
-Hermes must stop keying pricing solely by `model`.
-
-Introduce a billing route descriptor:
-
-```python
-@dataclass
-class BillingRoute:
-    provider: str
-    base_url: str | None
-    model: str
-    billing_mode: str
-    organization_hint: str | None = None
-```
-
-`billing_mode` values:
-
-- `official_cost_api`
-- `official_generation_api`
-- `official_models_api`
-- `official_docs_snapshot`
-- `subscription_included`
-- `user_override`
-- `custom_contract`
-- `unknown`
-
-Examples:
-
-- OpenAI direct API with Costs API access: `official_cost_api`
-- Anthropic direct API with Usage & Cost API access: `official_cost_api`
-- OpenRouter request before reconciliation: `official_models_api`
-- OpenRouter request after generation lookup: `official_generation_api`
-- GitHub Copilot style subscription route: `subscription_included`
-- local OpenAI-compatible server: `unknown`
-- enterprise contract with configured rates: `custom_contract`
-
-## Cost Status Model
-
-Every displayed cost should have:
-
-```python
-@dataclass
-class CostResult:
-    amount_usd: Decimal | None
-    status: Literal["actual", "estimated", "included", "unknown"]
-    source: Literal[
-        "provider_cost_api",
-        "provider_generation_api",
-        "provider_models_api",
-        "official_docs_snapshot",
-        "user_override",
-        "custom_contract",
-        "none",
-    ]
-    label: str
-    fetched_at: datetime | None
-    pricing_version: str | None
-    notes: list[str]
-```
-
-Presentation rules:
-
-- `actual`: show dollar amount as final
-- `estimated`: show dollar amount with estimate labeling
-- `included`: show `included` or `$0.00 (included)` depending on UX choice
-- `unknown`: show `n/a`
-
-## Official Source Hierarchy
-
-Resolve cost using this order:
-
-1. Request-level or account-level official billed cost
-2. Official machine-readable model pricing
-3. Official docs snapshot
-4. User override or custom contract
-5. Unknown
-
-The system must never skip to a lower level if a higher-confidence source exists for the current billing route.
-
-## Provider-Specific Truth Rules
-
-### OpenAI Direct
-
-Preferred truth:
-
-1. Costs API for reconciled spend
-2. Official pricing page for live estimate
-
-### Anthropic Direct
-
-Preferred truth:
-
-1. Usage & Cost API for reconciled spend
-2. Official pricing docs for live estimate
-
-### OpenRouter
-
-Preferred truth:
-
-1. `GET /api/v1/generation` for reconciled `total_cost`
-2. `GET /api/v1/models` pricing for live estimate
-
-Do not use underlying provider public pricing as the source of truth for OpenRouter billing.
-
-### Gemini / Vertex
-
-Preferred truth:
-
-1. official billing export or billing API for reconciled spend when available for the route
-2. official pricing docs for estimate
-
-### DeepSeek
-
-Preferred truth:
-
-1. official machine-readable cost source if available in the future
-2. official pricing docs snapshot today
-
-### Subscription-Included Routes
-
-Preferred truth:
-
-1. explicit route config marking the model as included in subscription
-
-These should display `included`, not an API list-price estimate.
-
-### Custom Endpoint / Local Model
-
-Preferred truth:
-
-1. user override
-2. custom contract config
-3. unknown
-
-These should default to `unknown`.
-
-## Pricing Catalog
-
-Replace the current `MODEL_PRICING` dict with a richer pricing catalog.
-
-Suggested record:
-
-```python
-@dataclass
-class PricingEntry:
-    provider: str
-    route_pattern: str
-    model_pattern: str
-
-    input_cost_per_million: Decimal | None = None
-    output_cost_per_million: Decimal | None = None
-    cache_read_cost_per_million: Decimal | None = None
-    cache_write_cost_per_million: Decimal | None = None
-    request_cost: Decimal | None = None
-    image_cost: Decimal | None = None
-
-    source: str = "official_docs_snapshot"
-    source_url: str | None = None
-    fetched_at: datetime | None = None
-    pricing_version: str | None = None
-```
-
-The catalog should be route-aware:
-
-- `openai:gpt-5`
-- `anthropic:claude-opus-4-6`
-- `openrouter:anthropic/claude-opus-4.6`
-- `copilot:gpt-4o`
-
-This avoids conflating direct-provider billing with aggregator billing.
-
-## Pricing Sync Architecture
-
-Introduce a pricing sync subsystem instead of manually maintaining a single hardcoded table.
-
-Suggested modules:
-
-- `agent/pricing/catalog.py`
-- `agent/pricing/sources.py`
-- `agent/pricing/sync.py`
-- `agent/pricing/reconcile.py`
-- `agent/pricing/types.py`
-
-### Sync Sources
-
-- OpenRouter models API
-- official provider docs snapshots where no API exists
-- user overrides from config
-
-### Sync Output
-
-Cache pricing entries locally with:
-
-- source URL
-- fetch timestamp
-- version/hash
-- confidence/source type
-
-### Sync Frequency
-
-- startup warm cache
-- background refresh every 6 to 24 hours depending on source
-- manual `hermes pricing sync`
-
-## Reconciliation Architecture
-
-Live requests may produce only an estimate initially. Hermes should reconcile them later when a provider exposes actual billed cost.
-
-Suggested flow:
-
-1. Agent call completes.
-2. Hermes stores canonical usage plus reconciliation ids.
-3. Hermes computes an immediate estimate if a pricing source exists.
-4. A reconciliation worker fetches actual cost when supported.
-5. Session and message records are updated with `actual` cost.
-
-This can run:
-
-- inline for cheap lookups
-- asynchronously for delayed provider accounting
-
-## Persistence Changes
-
-Session storage should stop storing only aggregate prompt/completion totals.
-
-Add fields for both usage and cost certainty:
-
-- `input_tokens`
-- `output_tokens`
-- `cache_read_tokens`
-- `cache_write_tokens`
-- `reasoning_tokens`
-- `estimated_cost_usd`
-- `actual_cost_usd`
-- `cost_status`
-- `cost_source`
-- `pricing_version`
-- `billing_provider`
-- `billing_mode`
-
-If schema expansion is too large for one PR, add a new pricing events table:
-
-```text
-session_cost_events
-  id
-  session_id
-  request_id
-  provider
-  model
-  billing_mode
-  input_tokens
-  output_tokens
-  cache_read_tokens
-  cache_write_tokens
-  estimated_cost_usd
-  actual_cost_usd
-  cost_status
-  cost_source
-  pricing_version
-  created_at
-  updated_at
-```
-
-## Hermes Touchpoints
-
-### `run_agent.py`
-
-Current responsibility:
-
-- parse raw provider usage
-- update session token counters
-
-New responsibility:
-
-- build `CanonicalUsage`
-- update canonical counters
-- store reconciliation ids
-- emit usage event to pricing subsystem
-
-### `agent/usage_pricing.py`
-
-Current responsibility:
-
-- static lookup table
-- direct cost arithmetic
-
-New responsibility:
-
-- move or replace with pricing catalog facade
-- no fuzzy model-family heuristics
-- no direct pricing without billing-route context
-
-### `cli.py`
-
-Current responsibility:
-
-- compute session cost directly from prompt/completion totals
-
-New responsibility:
-
-- display `CostResult`
-- show status badges:
-  - `actual`
-  - `estimated`
-  - `included`
-  - `n/a`
-
-### `agent/insights.py`
-
-Current responsibility:
-
-- recompute historical estimates from static pricing
-
-New responsibility:
-
-- aggregate stored pricing events
-- prefer actual cost over estimate
-- surface estimates only when reconciliation is unavailable
-
-## UX Rules
-
-### Status Bar
-
-Show one of:
-
-- `$1.42`
-- `~$1.42`
-- `included`
-- `cost n/a`
-
-Where:
-
-- `$1.42` means `actual`
-- `~$1.42` means `estimated`
-- `included` means subscription-backed or explicitly zero-cost route
-- `cost n/a` means unknown
-
-### `/usage`
-
-Show:
-
-- token buckets
-- estimated cost
-- actual cost if available
-- cost status
-- pricing source
-
-### `/insights`
-
-Aggregate:
-
-- actual cost totals
-- estimated-only totals
-- unknown-cost sessions count
-- included-cost sessions count
-
-## Config And Overrides
-
-Add user-configurable pricing overrides in config:
-
-```yaml
-pricing:
-  mode: hybrid
-  sync_on_startup: true
-  sync_interval_hours: 12
-  overrides:
-    - provider: openrouter
-      model: anthropic/claude-opus-4.6
-      billing_mode: custom_contract
-      input_cost_per_million: 4.25
-      output_cost_per_million: 22.0
-      cache_read_cost_per_million: 0.5
-      cache_write_cost_per_million: 6.0
-  included_routes:
-    - provider: copilot
-      model: "*"
-    - provider: codex-subscription
-      model: "*"
-```
-
-Overrides must win over catalog defaults for the matching billing route.
-
-## Rollout Plan
-
-### Phase 1
-
-- add canonical usage model
-- split cache token buckets in `run_agent.py`
-- stop pricing cache-inflated prompt totals
-- preserve current UI with improved backend math
-
-### Phase 2
-
-- add route-aware pricing catalog
-- integrate OpenRouter models API sync
-- add `estimated` vs `included` vs `unknown`
-
-### Phase 3
-
-- add reconciliation for OpenRouter generation cost
-- add actual cost persistence
-- update `/insights` to prefer actual cost
-
-### Phase 4
-
-- add direct OpenAI and Anthropic reconciliation paths
-- add user overrides and contract pricing
-- add pricing sync CLI command
-
-## Testing Strategy
-
-Add tests for:
-
-- OpenAI cached token subtraction
-- Anthropic cache read/write separation
-- OpenRouter estimated vs actual reconciliation
-- subscription-backed models showing `included`
-- custom endpoints showing `n/a`
-- override precedence
-- stale catalog fallback behavior
-
-Current tests that assume heuristic pricing should be replaced with route-aware expectations.
-
-## Non-Goals
-
-- exact enterprise billing reconstruction without an official source or user override
-- backfilling perfect historical cost for old sessions that lack cache bucket data
-- scraping arbitrary provider web pages at request time
-
-## Recommendation
-
-Do not expand the existing `MODEL_PRICING` dict.
-
-That path cannot satisfy the product requirement. Hermes should instead migrate to:
-
-- canonical usage normalization
-- route-aware pricing sources
-- estimate-then-reconcile cost lifecycle
-- explicit certainty states in the UI
-
-This is the minimum architecture that makes the statement "Hermes pricing is backed by official sources where possible, and otherwise clearly labeled" defensible.
diff --git a/docs/plans/2026-04-01-ink-gateway-tui-migration-plan.md b/docs/plans/2026-04-01-ink-gateway-tui-migration-plan.md
deleted file mode 100644
index 0210a878cb..0000000000
--- a/docs/plans/2026-04-01-ink-gateway-tui-migration-plan.md
+++ /dev/null
@@ -1,108 +0,0 @@
-# Ink Gateway TUI Migration — Post-mortem
-
-Planned: 2026-04-01 · Delivered: 2026-04 · Status: shipped, classic (prompt_toolkit) CLI still present
-
-## What Shipped
-
-Three layers, same repo, Python runtime unchanged.
-
-```
-ui-tui (Node/TS)  ──stdio JSON-RPC──▶  tui_gateway (Py)  ──▶  AIAgent (run_agent.py)
-```
-
-### Backend — `tui_gateway/`
-
-```
-tui_gateway/
-├── entry.py          # subprocess entrypoint, stdio read/write loop
-├── server.py         # everything: sessions dict, @method handlers, _emit
-├── render.py         # stream renderer, diff rendering, message rendering
-├── slash_worker.py   # subprocess that runs hermes_cli slash commands
-└── __init__.py
-```
-
-`server.py` owns the full runtime-control surface: session store (`_sessions: dict[str, dict]`), method registry (`@method("…")` decorator), event emitter (`_emit`), agent lifecycle (`_make_agent`, `_init_session`, `_wire_callbacks`), approval/sudo/clarify round-trips, and JSON-RPC dispatch.
-
-Protocol methods (`@method(...)` in `server.py`):
-
-- session: `session.{create, resume, list, close, interrupt, usage, history, compress, branch, title, save, undo}`
-- prompt: `prompt.{submit, background, btw}`
-- tools: `tools.{list, show, configure}`
-- slash: `slash.exec`, `command.{dispatch, resolve}`, `commands.catalog`, `complete.{path, slash}`
-- approvals: `approval.respond`, `sudo.respond`, `clarify.respond`, `secret.respond`
-- config/state: `config.{get, set, show}`, `model.options`, `reload.mcp`
-- ops: `shell.exec`, `cli.exec`, `terminal.resize`, `input.detect_drop`, `clipboard.paste`, `paste.collapse`, `image.attach`, `process.stop`
-- misc: `agents.list`, `skills.manage`, `plugins.list`, `cron.manage`, `insights.get`, `rollback.{list, diff, restore}`, `browser.manage`
-
-Protocol events (`_emit(…)` → handled in `ui-tui/src/app/createGatewayEventHandler.ts`):
-
-- lifecycle: `gateway.{ready, stderr}`, `session.info`, `skin.changed`
-- stream: `message.{start, delta, complete}`, `thinking.delta`, `reasoning.{delta, available}`, `status.update`
-- tools: `tool.{start, progress, complete, generating}`, `subagent.{start, thinking, tool, progress, complete}`
-- interactive: `approval.request`, `sudo.request`, `clarify.request`, `secret.request`
-- async: `background.complete`, `btw.complete`, `error`
-
-### Frontend — `ui-tui/src/`
-
-```
-src/
-├── entry.tsx            # node bootstrap: bootBanner → spawn python → dynamic-import Ink → render(<App/>)
-├── app.tsx              # <GatewayProvider> wraps <AppLayout>
-├── bootBanner.ts        # raw-ANSI banner to stdout in ~2ms, pre-React
-├── gatewayClient.ts     # JSON-RPC client over child_process stdio
-├── gatewayTypes.ts      # typed RPC responses + GatewayEvent union
-├── theme.ts             # DEFAULT_THEME + fromSkin
-│
-├── app/                 # hooks + stores — the orchestration layer
-│   ├── uiStore.ts             # nanostore: sid, info, busy, usage, theme, status…
-│   ├── turnStore.ts           # nanostore: per-turn activity / reasoning / tools
-│   ├── turnController.ts      # imperative singleton for stream-time operations
-│   ├── overlayStore.ts        # nanostore: modal/overlay state
-│   ├── useMainApp.ts          # top-level composition hook
-│   ├── useSessionLifecycle.ts # session.create/resume/close/reset
-│   ├── useSubmission.ts       # shell/slash/prompt dispatch + interpolation
-│   ├── useConfigSync.ts       # config.get + mtime poll
-│   ├── useComposerState.ts    # input buffer, paste snippets, editor mode
-│   ├── useInputHandlers.ts    # key bindings
-│   ├── createGatewayEventHandler.ts  # event-stream dispatcher
-│   ├── createSlashHandler.ts         # slash command router (registry + python fallback)
-│   └── slash/commands/        # core.ts, ops.ts, session.ts — TS-owned slash commands
-│
-├── components/          # AppLayout, AppChrome, AppOverlays, MessageLine, Thinking, Markdown, pickers, prompts, Banner, SessionPanel
-├── config/              # env, limits, timing constants
-├── content/             # charms, faces, fortunes, hotkeys, placeholders, verbs
-├── domain/              # details, messages, paths, roles, slash, usage, viewport
-├── protocol/            # interpolation, paste regex
-├── hooks/               # useCompletion, useInputHistory, useQueue, useVirtualHistory
-└── lib/                 # history, messages, osc52, rpc, text
-```
-
-### CLI entry points — `hermes_cli/main.py`
-
-- `hermes --tui`      → `node dist/entry.js` (auto-builds when `.ts`/`.tsx` newer than `dist/entry.js`)
-- `hermes --tui --dev` → `tsx src/entry.tsx` (skip build)
-- `HERMES_TUI_DIR=…`  → external prebuilt dist (nix, distro packaging)
-
-## Diverged From Original Plan
-
-| Plan | Reality | Why |
-|---|---|---|
-| `tui_gateway/{controller,session_state,events,protocol}.py` | all collapsed into `server.py` | no second consumer ever emerged, keeping one file cheaper than four |
-| `ui-tui/src/main.tsx` | split into `entry.tsx` (bootstrap) + `app.tsx` (shell) | boot banner + early python spawn wanted a pre-React moment |
-| `ui-tui/src/state/store.ts` | three nanostores (`uiStore`, `turnStore`, `overlayStore`) | separate lifetimes: ui persists, turn resets per reply, overlay is modal |
-| `approval.requested` / `sudo.requested` / `clarify.requested` | `*.request` (no `-ed`) | cosmetic |
-| `session.cancel` | dropped | `session.interrupt` covers it |
-| `HERMES_EXPERIMENTAL_TUI=1`, `display.experimental_tui: true`, `/tui on/off/status` | none shipped | `--tui` went from opt-in to first-class without an experimental phase |
-
-## Post-migration Additions (not in original plan)
-
-- **Async `session.create`** — returns sid in ~1ms, agent builds on a background thread, `session.info` broadcasts when ready; `_wait_agent()` gates every agent-touching handler via `_sess`
-- **`bootBanner`** — raw-ANSI logo painted to stdout at T≈2ms, before Ink loads; `<AlternateScreen>` wipes it seamlessly when React mounts
-- **Selection uniform bg** — `theme.color.selectionBg` wired via `useSelection().setSelectionBgColor`; replaces SGR-inverse per-cell swap that fragmented over amber/gold fg
-- **Slash command registry** — TS-owned commands in `app/slash/commands/{core,ops,session}.ts`, everything else falls through to `slash.exec` (python worker)
-- **Turn store + controller split** — imperative singleton (`turnController`) holds refs/timers, nanostore (`turnStore`) holds render-visible state
-
-## What's Still Open
-
-- **Classic CLI not deleted.** `cli.py` still has ~80 `prompt_toolkit` references; classic REPL is still the default when `--tui` is absent. The original plan's "Cut 4 · prompt_toolkit removal later" hasn't happened.
-- **No config-file opt-in.** `HERMES_EXPERIMENTAL_TUI` and `display.experimental_tui` were never built; only the CLI flag exists. Fine for now — if we want "default to TUI", a single line in `main.py` flips it.
diff --git a/docs/skins/example-skin.yaml b/docs/skins/example-skin.yaml
deleted file mode 100644
index fb0be89da6..0000000000
--- a/docs/skins/example-skin.yaml
+++ /dev/null
@@ -1,106 +0,0 @@
-# ============================================================================
-# Hermes Agent — Example Skin Template
-# ============================================================================
-#
-# Copy this file to ~/.hermes/skins/<name>.yaml to create a custom skin.
-# All fields are optional — missing values inherit from the default skin.
-# Activate with: /skin <name>  or  display.skin: <name> in config.yaml
-#
-# Keys are marked:
-#   (both)    — applies to both the classic CLI and the TUI
-#   (classic) — classic CLI only (see hermes --tui in user-guide/tui.md)
-#   (tui)     — TUI only
-#
-# See hermes_cli/skin_engine.py for the full schema reference.
-# ============================================================================
-
-# Required: unique skin name (used in /skin command and config)
-name: example
-description: An example custom skin — copy and modify this template
-
-# ── Colors ──────────────────────────────────────────────────────────────────
-# Hex color values. These control the visual palette.
-colors:
-  # Banner panel (the startup welcome box) — (both)
-  banner_border: "#CD7F32"        # Panel border
-  banner_title: "#FFD700"         # Panel title text
-  banner_accent: "#FFBF00"        # Section headers (Available Tools, Skills, etc.)
-  banner_dim: "#B8860B"           # Dim/muted text (separators, model info)
-  banner_text: "#FFF8DC"          # Body text (tool names, skill names)
-
-  # UI elements — (both)
-  ui_accent: "#FFBF00"            # General accent (falls back to banner_accent)
-  ui_label: "#4dd0e1"             # Labels
-  ui_ok: "#4caf50"                # Success indicators
-  ui_error: "#ef5350"             # Error indicators
-  ui_warn: "#ffa726"              # Warning indicators
-
-  # Input area
-  prompt: "#FFF8DC"               # Prompt text / `❯` glyph color (both)
-  input_rule: "#CD7F32"           # Horizontal rule above input (classic)
-
-  # Response box — (classic)
-  response_border: "#FFD700"      # Response box border
-
-  # Session display — (both)
-  session_label: "#DAA520"        # "Session: " label
-  session_border: "#8B8682"       # Session ID text
-
-  # TUI / CLI surfaces — (classic: status bar, voice badge, completion meta)
-  status_bar_bg: "#1a1a2e"              # Status / usage bar background (classic)
-  voice_status_bg: "#1a1a2e"            # Voice-mode badge background (classic)
-  completion_menu_bg: "#1a1a2e"         # Completion list background (both)
-  completion_menu_current_bg: "#333355" # Active completion row background (both)
-  completion_menu_meta_bg: "#1a1a2e"    # Completion meta column bg (classic)
-  completion_menu_meta_current_bg: "#333355"  # Active meta bg (classic)
-
-  # Drag-to-select background — (tui)
-  selection_bg: "#3a3a55"               # Uniform selection highlight in the TUI
-
-# ── Spinner ─────────────────────────────────────────────────────────────────
-# (classic) — the TUI uses its own animated indicators; spinner config here
-# is only read by the classic prompt_toolkit CLI.
-spinner:
-  # Faces shown while waiting for the API response
-  waiting_faces:
-    - "(｡◕‿◕｡)"
-    - "(◕‿◕✿)"
-    - "٩(◕‿◕｡)۶"
-
-  # Faces shown during extended thinking/reasoning
-  thinking_faces:
-    - "(｡•́︿•̀｡)"
-    - "(◔_◔)"
-    - "(¬‿¬)"
-
-  # Verbs used in spinner messages (e.g., "pondering your request...")
-  thinking_verbs:
-    - "pondering"
-    - "contemplating"
-    - "musing"
-    - "ruminating"
-
-  # Optional: left/right decorations around the spinner
-  # Each entry is a [left, right] pair. Omit entirely for no wings.
-  # wings:
-  #   - ["⟪⚔", "⚔⟫"]
-  #   - ["⟪▲", "▲⟫"]
-
-# ── Branding ────────────────────────────────────────────────────────────────
-# Text strings used throughout the interface.
-branding:
-  agent_name: "Hermes Agent"                  # (both) Banner title, about display
-  welcome: "Welcome! Type your message or /help for commands."  # (both)
-  goodbye: "Goodbye! ⚕"                       # (both) Exit message
-  response_label: " ⚕ Hermes "                # (classic) Response box header label
-  prompt_symbol: "❯ "                          # (both) Input prompt glyph
-  help_header: "(^_^)? Available Commands"     # (both) /help overlay title
-
-# ── Tool Output ─────────────────────────────────────────────────────────────
-# Character used as the prefix for tool output lines. (both)
-# Default is "┊" (thin dotted vertical line). Some alternatives:
-#   "╎" (light triple dash vertical)
-#   "▏" (left one-eighth block)
-#   "│" (box drawing light vertical)
-#   "┃" (box drawing heavy vertical)
-tool_prefix: "┊"
diff --git a/docs/specs/container-cli-review-fixes.md b/docs/specs/container-cli-review-fixes.md
deleted file mode 100644
index 0eb9070dbf..0000000000
--- a/docs/specs/container-cli-review-fixes.md
+++ /dev/null
@@ -1,329 +0,0 @@
-# Container-Aware CLI Review Fixes Spec
-
-**PR:** NousResearch/hermes-agent#7543
-**Review:** cursor[bot] bugbot review (4094049442) + two prior rounds
-**Date:** 2026-04-12
-**Branch:** `feat/container-aware-cli-clean`
-
-## Review Issues Summary
-
-Six issues were raised across three bugbot review rounds. Three were fixed in intermediate commits (38277a6a, 726cf90f). This spec addresses remaining design concerns surfaced by those reviews and simplifies the implementation based on interview decisions.
-
-| # | Issue | Severity | Status |
-|---|-------|----------|--------|
-| 1 | `os.execvp` retry loop unreachable | Medium | Fixed in 79e8cd12 (switched to subprocess.run) |
-| 2 | Redundant `shutil.which("sudo")` | Medium | Fixed in 38277a6a (reuses `sudo` var) |
-| 3 | Missing `chown -h` on symlink update | Low | Fixed in 38277a6a |
-| 4 | Container routing after `parse_args()` | High | Fixed in 726cf90f |
-| 5 | Hardcoded `/home/${user}` | Medium | Fixed in 726cf90f |
-| 6 | Group membership not gated on `container.enable` | Low | Fixed in 726cf90f |
-
-The mechanical fixes are in place but the overall design needs revision. The retry loop, error swallowing, and process model have deeper issues than what the bugbot flagged.
-
----
-
-## Spec: Revised `_exec_in_container`
-
-### Design Principles
-
-1. **Let it crash.** No silent fallbacks. If `.container-mode` exists but something goes wrong, the error propagates naturally (Python traceback). The only case where container routing is skipped is when `.container-mode` doesn't exist or `HERMES_DEV=1`.
-2. **No retries.** Probe once for sudo, exec once. If it fails, docker/podman's stderr reaches the user verbatim.
-3. **Completely transparent.** No error wrapping, no prefixes, no spinners. Docker's output goes straight through.
-4. **`os.execvp` on the happy path.** Replace the Python process entirely so there's no idle parent during interactive sessions. Note: `execvp` never returns on success (process is replaced) and raises `OSError` on failure (it does not return a value). The container process's exit code becomes the process exit code by definition — no explicit propagation needed.
-5. **One human-readable exception to "let it crash".** `subprocess.TimeoutExpired` from the sudo probe gets a specific catch with a readable message, since a raw traceback for "your Docker daemon is slow" is confusing. All other exceptions propagate naturally.
-
-### Execution Flow
-
-```
-1. get_container_exec_info()
-   - HERMES_DEV=1 → return None (skip routing)
-   - Inside container → return None (skip routing)
-   - .container-mode doesn't exist → return None (skip routing)
-   - .container-mode exists → parse and return dict
-   - .container-mode exists but malformed/unreadable → LET IT CRASH (no try/except)
-
-2. _exec_in_container(container_info, sys.argv[1:])
-   a. shutil.which(backend) → if None, print "{backend} not found on PATH" and sys.exit(1)
-   b. Sudo probe: subprocess.run([runtime, "inspect", "--format", "ok", container_name], timeout=15)
-      - If succeeds → needs_sudo = False
-      - If fails → try subprocess.run([sudo, "-n", runtime, "inspect", ...], timeout=15)
-        - If succeeds → needs_sudo = True
-        - If fails → print error with sudoers hint (including why -n is required) and sys.exit(1)
-      - If TimeoutExpired → catch specifically, print human-readable message about slow daemon
-   c. Build exec_cmd: [sudo? + runtime, "exec", tty_flags, "-u", exec_user, env_flags, container, hermes_bin, *cli_args]
-   d. os.execvp(exec_cmd[0], exec_cmd)
-      - On success: process is replaced — Python is gone, container exit code IS the process exit code
-      - On OSError: let it crash (natural traceback)
-```
-
-### Changes to `hermes_cli/main.py`
-
-#### `_exec_in_container` — rewrite
-
-Remove:
-- The entire retry loop (`max_retries`, `for attempt in range(...)`)
-- Spinner logic (`"Waiting for container..."`, dots)
-- Exit code classification (125/126/127 handling)
-- `subprocess.run` for the exec call (keep it only for the sudo probe)
-- Special TTY vs non-TTY retry counts
-- The `time` import (no longer needed)
-
-Change:
-- Use `os.execvp(exec_cmd[0], exec_cmd)` as the final call
-- Keep the `subprocess` import only for the sudo probe
-- Keep TTY detection for the `-it` vs `-i` flag
-- Keep env var forwarding (TERM, COLORTERM, LANG, LC_ALL)
-- Keep the sudo probe as-is (it's the one "smart" part)
-- Bump probe `timeout` from 5s to 15s — cold podman on a loaded machine needs headroom
-- Catch `subprocess.TimeoutExpired` specifically on both probe calls — print a readable message about the daemon being unresponsive instead of a raw traceback
-- Expand the sudoers hint error message to explain *why* `-n` (non-interactive) is required: a password prompt would hang the CLI or break piped commands
-
-The function becomes roughly:
-
-```python
-def _exec_in_container(container_info: dict, cli_args: list):
-    """Replace the current process with a command inside the managed container.
-
-    Probes whether sudo is needed (rootful containers), then os.execvp
-    into the container. If exec fails, the OS error propagates naturally.
-    """
-    import shutil
-    import subprocess
-
-    backend = container_info["backend"]
-    container_name = container_info["container_name"]
-    exec_user = container_info["exec_user"]
-    hermes_bin = container_info["hermes_bin"]
-
-    runtime = shutil.which(backend)
-    if not runtime:
-        print(f"Error: {backend} not found on PATH. Cannot route to container.",
-              file=sys.stderr)
-        sys.exit(1)
-
-    # Probe whether we need sudo to see the rootful container.
-    # Timeout is 15s — cold podman on a loaded machine can take a while.
-    # TimeoutExpired is caught specifically for a human-readable message;
-    # all other exceptions propagate naturally.
-    needs_sudo = False
-    sudo = None
-    try:
-        probe = subprocess.run(
-            [runtime, "inspect", "--format", "ok", container_name],
-            capture_output=True, text=True, timeout=15,
-        )
-    except subprocess.TimeoutExpired:
-        print(
-            f"Error: timed out waiting for {backend} to respond.\n"
-            f"The {backend} daemon may be unresponsive or starting up.",
-            file=sys.stderr,
-        )
-        sys.exit(1)
-
-    if probe.returncode != 0:
-        sudo = shutil.which("sudo")
-        if sudo:
-            try:
-                probe2 = subprocess.run(
-                    [sudo, "-n", runtime, "inspect", "--format", "ok", container_name],
-                    capture_output=True, text=True, timeout=15,
-                )
-            except subprocess.TimeoutExpired:
-                print(
-                    f"Error: timed out waiting for sudo {backend} to respond.",
-                    file=sys.stderr,
-                )
-                sys.exit(1)
-
-            if probe2.returncode == 0:
-                needs_sudo = True
-            else:
-                print(
-                    f"Error: container '{container_name}' not found via {backend}.\n"
-                    f"\n"
-                    f"The NixOS service runs the container as root. Your user cannot\n"
-                    f"see it because {backend} uses per-user namespaces.\n"
-                    f"\n"
-                    f"Fix: grant passwordless sudo for {backend}. The -n (non-interactive)\n"
-                    f"flag is required because the CLI calls sudo non-interactively —\n"
-                    f"a password prompt would hang or break piped commands:\n"
-                    f"\n"
-                    f'  security.sudo.extraRules = [{{\n'
-                    f'    users = [ "{os.getenv("USER", "your-user")}" ];\n'
-                    f'    commands = [{{ command = "{runtime}"; options = [ "NOPASSWD" ]; }}];\n'
-                    f'  }}];\n'
-                    f"\n"
-                    f"Or run: sudo hermes {' '.join(cli_args)}",
-                    file=sys.stderr,
-                )
-                sys.exit(1)
-        else:
-            print(
-                f"Error: container '{container_name}' not found via {backend}.\n"
-                f"The container may be running under root. Try: sudo hermes {' '.join(cli_args)}",
-                file=sys.stderr,
-            )
-            sys.exit(1)
-
-    is_tty = sys.stdin.isatty()
-    tty_flags = ["-it"] if is_tty else ["-i"]
-
-    env_flags = []
-    for var in ("TERM", "COLORTERM", "LANG", "LC_ALL"):
-        val = os.environ.get(var)
-        if val:
-            env_flags.extend(["-e", f"{var}={val}"])
-
-    cmd_prefix = [sudo, "-n", runtime] if needs_sudo else [runtime]
-    exec_cmd = (
-        cmd_prefix + ["exec"]
-        + tty_flags
-        + ["-u", exec_user]
-        + env_flags
-        + [container_name, hermes_bin]
-        + cli_args
-    )
-
-    # execvp replaces this process entirely — it never returns on success.
-    # On failure it raises OSError, which propagates naturally.
-    os.execvp(exec_cmd[0], exec_cmd)
-```
-
-#### Container routing call site in `main()` — remove try/except
-
-Current:
-```python
-try:
-    from hermes_cli.config import get_container_exec_info
-    container_info = get_container_exec_info()
-    if container_info:
-        _exec_in_container(container_info, sys.argv[1:])
-        sys.exit(1)  # exec failed if we reach here
-except SystemExit:
-    raise
-except Exception:
-    pass  # Container routing unavailable, proceed locally
-```
-
-Revised:
-```python
-from hermes_cli.config import get_container_exec_info
-container_info = get_container_exec_info()
-if container_info:
-    _exec_in_container(container_info, sys.argv[1:])
-    # Unreachable: os.execvp never returns on success (process is replaced)
-    # and raises OSError on failure (which propagates as a traceback).
-    # This line exists only as a defensive assertion.
-    sys.exit(1)
-```
-
-No try/except. If `.container-mode` doesn't exist, `get_container_exec_info()` returns `None` and we skip routing. If it exists but is broken, the exception propagates with a natural traceback.
-
-Note: `sys.exit(1)` after `_exec_in_container` is dead code in all paths — `os.execvp` either replaces the process or raises. It's kept as a belt-and-suspenders assertion with a comment marking it unreachable, not as actual error handling.
-
-### Changes to `hermes_cli/config.py`
-
-#### `get_container_exec_info` — remove inner try/except
-
-Current code catches `(OSError, IOError)` and returns `None`. This silently hides permission errors, corrupt files, etc.
-
-Change: Remove the try/except around file reading. Keep the early returns for `HERMES_DEV=1` and `_is_inside_container()`. The `FileNotFoundError` from `open()` when `.container-mode` doesn't exist should still return `None` (this is the "container mode not enabled" case). All other exceptions propagate.
-
-```python
-def get_container_exec_info() -> Optional[dict]:
-    if os.environ.get("HERMES_DEV") == "1":
-        return None
-    if _is_inside_container():
-        return None
-
-    container_mode_file = get_hermes_home() / ".container-mode"
-
-    try:
-        with open(container_mode_file, "r") as f:
-            # ... parse key=value lines ...
-    except FileNotFoundError:
-        return None
-    # All other exceptions (PermissionError, malformed data, etc.) propagate
-
-    return { ... }
-```
-
----
-
-## Spec: NixOS Module Changes
-
-### Symlink creation — simplify to two branches
-
-Current: 4 branches (symlink exists, directory exists, other file, doesn't exist).
-
-Revised: 2 branches.
-
-```bash
-if [ -d "${symlinkPath}" ] && [ ! -L "${symlinkPath}" ]; then
-  # Real directory — back it up, then create symlink
-  _backup="${symlinkPath}.bak.$(date +%s)"
-  echo "hermes-agent: backing up existing ${symlinkPath} to $_backup"
-  mv "${symlinkPath}" "$_backup"
-fi
-# For everything else (symlink, doesn't exist, etc.) — just force-create
-ln -sfn "${target}" "${symlinkPath}"
-chown -h ${user}:${cfg.group} "${symlinkPath}"
-```
-
-`ln -sfn` handles: existing symlink (replaces), doesn't exist (creates), and after the `mv` above (creates). The only case that needs special handling is a real directory, because `ln -sfn` cannot atomically replace a directory.
-
-Note: there is a theoretical race between the `[ -d ... ]` check and the `mv` (something could create/remove the directory in between). In practice this is a NixOS activation script running as root during `nixos-rebuild switch` — no other process should be touching `~/.hermes` at that moment. Not worth adding locking for.
-
-### Sudoers — document, don't auto-configure
-
-Do NOT add `security.sudo.extraRules` to the module. Document the sudoers requirement in the module's description/comments and in the error message the CLI prints when sudo probe fails.
-
-### Group membership gating — keep as-is
-
-The fix in 726cf90f (`cfg.container.enable && cfg.container.hostUsers != []`) is correct. Leftover group membership when container mode is disabled is harmless. No cleanup needed.
-
----
-
-## Spec: Test Rewrite
-
-The existing test file (`tests/hermes_cli/test_container_aware_cli.py`) has 16 tests. With the simplified exec model, several are obsolete.
-
-### Tests to keep (update as needed)
-
-- `test_is_inside_container_dockerenv` — unchanged
-- `test_is_inside_container_containerenv` — unchanged
-- `test_is_inside_container_cgroup_docker` — unchanged
-- `test_is_inside_container_false_on_host` — unchanged
-- `test_get_container_exec_info_returns_metadata` — unchanged
-- `test_get_container_exec_info_none_inside_container` — unchanged
-- `test_get_container_exec_info_none_without_file` — unchanged
-- `test_get_container_exec_info_skipped_when_hermes_dev` — unchanged
-- `test_get_container_exec_info_not_skipped_when_hermes_dev_zero` — unchanged
-- `test_get_container_exec_info_defaults` — unchanged
-- `test_get_container_exec_info_docker_backend` — unchanged
-
-### Tests to add
-
-- `test_get_container_exec_info_crashes_on_permission_error` — verify that `PermissionError` propagates (no silent `None` return)
-- `test_exec_in_container_calls_execvp` — verify `os.execvp` is called with correct args (runtime, tty flags, user, env, container, binary, cli args)
-- `test_exec_in_container_sudo_probe_sets_prefix` — verify that when first probe fails and sudo probe succeeds, `os.execvp` is called with `sudo -n` prefix
-- `test_exec_in_container_no_runtime_hard_fails` — keep existing, verify `sys.exit(1)` when `shutil.which` returns None
-- `test_exec_in_container_non_tty_uses_i_only` — update to check `os.execvp` args instead of `subprocess.run` args
-- `test_exec_in_container_probe_timeout_prints_message` — verify that `subprocess.TimeoutExpired` from the probe produces a human-readable error and `sys.exit(1)`, not a raw traceback
-- `test_exec_in_container_container_not_running_no_sudo` — verify the path where runtime exists (`shutil.which` returns a path) but probe returns non-zero and no sudo is available. Should print the "container may be running under root" error. This is distinct from `no_runtime_hard_fails` which covers `shutil.which` returning None.
-
-### Tests to delete
-
-- `test_exec_in_container_tty_retries_on_container_failure` — retry loop removed
-- `test_exec_in_container_non_tty_retries_silently_exits_126` — retry loop removed
-- `test_exec_in_container_propagates_hermes_exit_code` — no subprocess.run to check exit codes; execvp replaces the process. Note: exit code propagation still works correctly — when `os.execvp` succeeds, the container's process *becomes* this process, so its exit code is the process exit code by OS semantics. No application code needed, no test needed. A comment in the function docstring documents this intent for future readers.
-
----
-
-## Out of Scope
-
-- Auto-configuring sudoers rules in the NixOS module
-- Any changes to `get_container_exec_info` parsing logic beyond the try/except narrowing
-- Changes to `.container-mode` file format
-- Changes to the `HERMES_DEV=1` bypass
-- Changes to container detection logic (`_is_inside_container`)
diff --git a/environments/tool_context.py b/environments/tool_context.py
index 10f537d724..550c5e851c 100644
--- a/environments/tool_context.py
+++ b/environments/tool_context.py
@@ -53,7 +53,6 @@ def _run_tool_in_thread(tool_name: str, arguments: Dict[str, Any], task_id: str)
     try:
         loop = asyncio.get_running_loop()
         # We're in an async context -- need to run in thread
-        import concurrent.futures
         with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
             future = pool.submit(
                 handle_function_call, tool_name, arguments, task_id
diff --git a/gateway/config.py b/gateway/config.py
index 2d74073234..67ebf73461 100644
--- a/gateway/config.py
+++ b/gateway/config.py
@@ -576,6 +576,14 @@ def load_gateway_config() -> GatewayConfig:
                     bridged["free_response_channels"] = platform_cfg["free_response_channels"]
                 if "mention_patterns" in platform_cfg:
                     bridged["mention_patterns"] = platform_cfg["mention_patterns"]
+                if "dm_policy" in platform_cfg:
+                    bridged["dm_policy"] = platform_cfg["dm_policy"]
+                if "allow_from" in platform_cfg:
+                    bridged["allow_from"] = platform_cfg["allow_from"]
+                if "group_policy" in platform_cfg:
+                    bridged["group_policy"] = platform_cfg["group_policy"]
+                if "group_allow_from" in platform_cfg:
+                    bridged["group_allow_from"] = platform_cfg["group_allow_from"]
                 if plat == Platform.DISCORD and "channel_skill_bindings" in platform_cfg:
                     bridged["channel_skill_bindings"] = platform_cfg["channel_skill_bindings"]
                 if "channel_prompts" in platform_cfg:
@@ -608,6 +616,8 @@ def load_gateway_config() -> GatewayConfig:
                     if isinstance(frc, list):
                         frc = ",".join(str(v) for v in frc)
                     os.environ["SLACK_FREE_RESPONSE_CHANNELS"] = str(frc)
+                if "reactions" in slack_cfg and not os.getenv("SLACK_REACTIONS"):
+                    os.environ["SLACK_REACTIONS"] = str(slack_cfg["reactions"]).lower()
 
             # Discord settings → env vars (env vars take precedence)
             discord_cfg = yaml_cfg.get("discord", {})
@@ -662,8 +672,7 @@ def load_gateway_config() -> GatewayConfig:
                 if "require_mention" in telegram_cfg and not os.getenv("TELEGRAM_REQUIRE_MENTION"):
                     os.environ["TELEGRAM_REQUIRE_MENTION"] = str(telegram_cfg["require_mention"]).lower()
                 if "mention_patterns" in telegram_cfg and not os.getenv("TELEGRAM_MENTION_PATTERNS"):
-                    import json as _json
-                    os.environ["TELEGRAM_MENTION_PATTERNS"] = _json.dumps(telegram_cfg["mention_patterns"])
+                    os.environ["TELEGRAM_MENTION_PATTERNS"] = json.dumps(telegram_cfg["mention_patterns"])
                 frc = telegram_cfg.get("free_response_chats")
                 if frc is not None and not os.getenv("TELEGRAM_FREE_RESPONSE_CHATS"):
                     if isinstance(frc, list):
@@ -700,6 +709,20 @@ def load_gateway_config() -> GatewayConfig:
                     if isinstance(frc, list):
                         frc = ",".join(str(v) for v in frc)
                     os.environ["WHATSAPP_FREE_RESPONSE_CHATS"] = str(frc)
+                if "dm_policy" in whatsapp_cfg and not os.getenv("WHATSAPP_DM_POLICY"):
+                    os.environ["WHATSAPP_DM_POLICY"] = str(whatsapp_cfg["dm_policy"]).lower()
+                af = whatsapp_cfg.get("allow_from")
+                if af is not None and not os.getenv("WHATSAPP_ALLOWED_USERS"):
+                    if isinstance(af, list):
+                        af = ",".join(str(v) for v in af)
+                    os.environ["WHATSAPP_ALLOWED_USERS"] = str(af)
+                if "group_policy" in whatsapp_cfg and not os.getenv("WHATSAPP_GROUP_POLICY"):
+                    os.environ["WHATSAPP_GROUP_POLICY"] = str(whatsapp_cfg["group_policy"]).lower()
+                gaf = whatsapp_cfg.get("group_allow_from")
+                if gaf is not None and not os.getenv("WHATSAPP_GROUP_ALLOWED_USERS"):
+                    if isinstance(gaf, list):
+                        gaf = ",".join(str(v) for v in gaf)
+                    os.environ["WHATSAPP_GROUP_ALLOWED_USERS"] = str(gaf)
 
             # DingTalk settings → env vars (env vars take precedence)
             dingtalk_cfg = yaml_cfg.get("dingtalk", {})
@@ -1237,7 +1260,6 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
             if legacy_home:
                 qq_home = legacy_home
                 qq_home_name_env = "QQ_HOME_CHANNEL_NAME"
-                import logging
                 logging.getLogger(__name__).warning(
                     "QQ_HOME_CHANNEL is deprecated; rename to QQBOT_HOME_CHANNEL "
                     "in your .env for consistency with the platform key."
diff --git a/gateway/platforms/api_server.py b/gateway/platforms/api_server.py
index 9687472f57..a6b52ff323 100644
--- a/gateway/platforms/api_server.py
+++ b/gateway/platforms/api_server.py
@@ -117,6 +117,160 @@ def _normalize_chat_content(
         return ""
 
 
+# Content part type aliases used by the OpenAI Chat Completions and Responses
+# APIs.  We accept both spellings on input and emit a single canonical internal
+# shape (``{"type": "text", ...}`` / ``{"type": "image_url", ...}``) that the
+# rest of the agent pipeline already understands.
+_TEXT_PART_TYPES = frozenset({"text", "input_text", "output_text"})
+_IMAGE_PART_TYPES = frozenset({"image_url", "input_image"})
+_FILE_PART_TYPES = frozenset({"file", "input_file"})
+
+
+def _normalize_multimodal_content(content: Any) -> Any:
+    """Validate and normalize multimodal content for the API server.
+
+    Returns a plain string when the content is text-only, or a list of
+    ``{"type": "text"|"image_url", ...}`` parts when images are present.
+    The output shape is the native OpenAI Chat Completions vision format,
+    which the agent pipeline accepts verbatim (OpenAI-wire providers) or
+    converts (``_preprocess_anthropic_content`` for Anthropic).
+
+    Raises ``ValueError`` with an OpenAI-style code on invalid input:
+      * ``unsupported_content_type`` — file/input_file/file_id parts, or
+        non-image ``data:`` URLs.
+      * ``invalid_image_url`` — missing URL or unsupported scheme.
+      * ``invalid_content_part`` — malformed text/image objects.
+
+    Callers translate the ValueError into a 400 response.
+    """
+    # Scalar passthrough mirrors ``_normalize_chat_content``.
+    if content is None:
+        return ""
+    if isinstance(content, str):
+        return content[:MAX_NORMALIZED_TEXT_LENGTH] if len(content) > MAX_NORMALIZED_TEXT_LENGTH else content
+    if not isinstance(content, list):
+        # Mirror the legacy text-normalizer's fallback so callers that
+        # pre-existed image support still get a string back.
+        return _normalize_chat_content(content)
+
+    items = content[:MAX_CONTENT_LIST_SIZE] if len(content) > MAX_CONTENT_LIST_SIZE else content
+    normalized_parts: List[Dict[str, Any]] = []
+    text_accum_len = 0
+
+    for part in items:
+        if isinstance(part, str):
+            if part:
+                trimmed = part[:MAX_NORMALIZED_TEXT_LENGTH]
+                normalized_parts.append({"type": "text", "text": trimmed})
+                text_accum_len += len(trimmed)
+            continue
+
+        if not isinstance(part, dict):
+            # Ignore unknown scalars for forward compatibility with future
+            # Responses API additions (e.g. ``refusal``).  The same policy
+            # the text normalizer applies.
+            continue
+
+        raw_type = part.get("type")
+        part_type = str(raw_type or "").strip().lower()
+
+        if part_type in _TEXT_PART_TYPES:
+            text = part.get("text")
+            if text is None:
+                continue
+            if not isinstance(text, str):
+                text = str(text)
+            if text:
+                trimmed = text[:MAX_NORMALIZED_TEXT_LENGTH]
+                normalized_parts.append({"type": "text", "text": trimmed})
+                text_accum_len += len(trimmed)
+            continue
+
+        if part_type in _IMAGE_PART_TYPES:
+            detail = part.get("detail")
+            image_ref = part.get("image_url")
+            # OpenAI Responses sends ``input_image`` with a top-level
+            # ``image_url`` string; Chat Completions sends ``image_url`` as
+            # ``{"url": "...", "detail": "..."}``.  Support both.
+            if isinstance(image_ref, dict):
+                url_value = image_ref.get("url")
+                detail = image_ref.get("detail", detail)
+            else:
+                url_value = image_ref
+            if not isinstance(url_value, str) or not url_value.strip():
+                raise ValueError("invalid_image_url:Image parts must include a non-empty image URL.")
+            url_value = url_value.strip()
+            lowered = url_value.lower()
+            if lowered.startswith("data:"):
+                if not lowered.startswith("data:image/") or "," not in url_value:
+                    raise ValueError(
+                        "unsupported_content_type:Only image data URLs are supported. "
+                        "Non-image data payloads are not supported."
+                    )
+            elif not (lowered.startswith("http://") or lowered.startswith("https://")):
+                raise ValueError(
+                    "invalid_image_url:Image inputs must use http(s) URLs or data:image/... URLs."
+                )
+            image_part: Dict[str, Any] = {"type": "image_url", "image_url": {"url": url_value}}
+            if detail is not None:
+                if not isinstance(detail, str) or not detail.strip():
+                    raise ValueError("invalid_content_part:Image detail must be a non-empty string when provided.")
+                image_part["image_url"]["detail"] = detail.strip()
+            normalized_parts.append(image_part)
+            continue
+
+        if part_type in _FILE_PART_TYPES:
+            raise ValueError(
+                "unsupported_content_type:Inline image inputs are supported, "
+                "but uploaded files and document inputs are not supported on this endpoint."
+            )
+
+        # Unknown part type — reject explicitly so clients get a clear error
+        # instead of a silently dropped turn.
+        raise ValueError(
+            f"unsupported_content_type:Unsupported content part type {raw_type!r}. "
+            "Only text and image_url/input_image parts are supported."
+        )
+
+    if not normalized_parts:
+        return ""
+
+    # Text-only: collapse to a plain string so downstream logging/trajectory
+    # code sees the native shape and prompt caching on text-only turns is
+    # unaffected.
+    if all(p.get("type") == "text" for p in normalized_parts):
+        return "\n".join(p["text"] for p in normalized_parts if p.get("text"))
+
+    return normalized_parts
+
+
+def _content_has_visible_payload(content: Any) -> bool:
+    """True when content has any text or image attachment.  Used to reject empty turns."""
+    if isinstance(content, str):
+        return bool(content.strip())
+    if isinstance(content, list):
+        for part in content:
+            if isinstance(part, dict):
+                ptype = str(part.get("type") or "").strip().lower()
+                if ptype in _TEXT_PART_TYPES and str(part.get("text") or "").strip():
+                    return True
+                if ptype in _IMAGE_PART_TYPES:
+                    return True
+    return False
+
+
+def _multimodal_validation_error(exc: ValueError, *, param: str) -> "web.Response":
+    """Translate a ``_normalize_multimodal_content`` ValueError into a 400 response."""
+    raw = str(exc)
+    code, _, message = raw.partition(":")
+    if not message:
+        code, message = "invalid_content_part", raw
+    return web.json_response(
+        _openai_error(message, code=code, param=param),
+        status=400,
+    )
+
+
 def check_api_server_requirements() -> bool:
     """Check if API server dependencies are available."""
     return AIOHTTP_AVAILABLE
@@ -169,7 +323,6 @@ class ResponseStore:
         ).fetchone()
         if row is None:
             return None
-        import time
         self._conn.execute(
             "UPDATE responses SET accessed_at = ? WHERE response_id = ?",
             (time.time(), response_id),
@@ -179,7 +332,6 @@ class ResponseStore:
 
     def put(self, response_id: str, data: Dict[str, Any]) -> None:
         """Store a response, evicting the oldest if at capacity."""
-        import time
         self._conn.execute(
             "INSERT OR REPLACE INTO responses (response_id, data, accessed_at) VALUES (?, ?, ?)",
             (response_id, json.dumps(data, default=str), time.time()),
@@ -315,12 +467,12 @@ class _IdempotencyCache:
     def __init__(self, max_items: int = 1000, ttl_seconds: int = 300):
         from collections import OrderedDict
         self._store = OrderedDict()
+        self._inflight: Dict[tuple[str, str], "asyncio.Task[Any]"] = {}
         self._ttl = ttl_seconds
         self._max = max_items
 
     def _purge(self):
-        import time as _t
-        now = _t.time()
+        now = time.time()
         expired = [k for k, v in self._store.items() if now - v["ts"] > self._ttl]
         for k in expired:
             self._store.pop(k, None)
@@ -332,11 +484,27 @@ class _IdempotencyCache:
         item = self._store.get(key)
         if item and item["fp"] == fingerprint:
             return item["resp"]
-        resp = await compute_coro()
-        import time as _t
-        self._store[key] = {"resp": resp, "fp": fingerprint, "ts": _t.time()}
-        self._purge()
-        return resp
+
+        inflight_key = (key, fingerprint)
+        task = self._inflight.get(inflight_key)
+        if task is None:
+            async def _compute_and_store():
+                resp = await compute_coro()
+                import time as _t
+                self._store[key] = {"resp": resp, "fp": fingerprint, "ts": _t.time()}
+                self._purge()
+                return resp
+
+            task = asyncio.create_task(_compute_and_store())
+            self._inflight[inflight_key] = task
+
+            def _clear_inflight(done_task: "asyncio.Task[Any]") -> None:
+                if self._inflight.get(inflight_key) is done_task:
+                    self._inflight.pop(inflight_key, None)
+
+            task.add_done_callback(_clear_inflight)
+
+        return await asyncio.shield(task)
 
 
 _idem_cache = _IdempotencyCache()
@@ -366,6 +534,30 @@ def _derive_chat_session_id(
     return f"api-{digest}"
 
 
+_CRON_AVAILABLE = False
+try:
+    from cron.jobs import (
+        list_jobs as _cron_list,
+        get_job as _cron_get,
+        create_job as _cron_create,
+        update_job as _cron_update,
+        remove_job as _cron_remove,
+        pause_job as _cron_pause,
+        resume_job as _cron_resume,
+        trigger_job as _cron_trigger,
+    )
+    _CRON_AVAILABLE = True
+except ImportError:
+    _cron_list = None
+    _cron_get = None
+    _cron_create = None
+    _cron_update = None
+    _cron_remove = None
+    _cron_pause = None
+    _cron_resume = None
+    _cron_trigger = None
+
+
 class APIServerAdapter(BasePlatformAdapter):
     """
     OpenAI-compatible HTTP API server adapter.
@@ -637,26 +829,32 @@ class APIServerAdapter(BasePlatformAdapter):
         system_prompt = None
         conversation_messages: List[Dict[str, str]] = []
 
-        for msg in messages:
+        for idx, msg in enumerate(messages):
             role = msg.get("role", "")
-            content = _normalize_chat_content(msg.get("content", ""))
+            raw_content = msg.get("content", "")
             if role == "system":
-                # Accumulate system messages
+                # System messages don't support images (Anthropic rejects, OpenAI
+                # text-model systems don't render them).  Flatten to text.
+                content = _normalize_chat_content(raw_content)
                 if system_prompt is None:
                     system_prompt = content
                 else:
                     system_prompt = system_prompt + "\n" + content
             elif role in ("user", "assistant"):
+                try:
+                    content = _normalize_multimodal_content(raw_content)
+                except ValueError as exc:
+                    return _multimodal_validation_error(exc, param=f"messages[{idx}].content")
                 conversation_messages.append({"role": role, "content": content})
 
         # Extract the last user message as the primary input
-        user_message = ""
+        user_message: Any = ""
         history = []
         if conversation_messages:
             user_message = conversation_messages[-1].get("content", "")
             history = conversation_messages[:-1]
 
-        if not user_message:
+        if not _content_has_visible_payload(user_message):
             return web.json_response(
                 {"error": {"message": "No user message found in messages", "type": "invalid_request_error"}},
                 status=400,
@@ -1424,16 +1622,19 @@ class APIServerAdapter(BasePlatformAdapter):
             # No error if conversation doesn't exist yet — it's a new conversation
 
         # Normalize input to message list
-        input_messages: List[Dict[str, str]] = []
+        input_messages: List[Dict[str, Any]] = []
         if isinstance(raw_input, str):
             input_messages = [{"role": "user", "content": raw_input}]
         elif isinstance(raw_input, list):
-            for item in raw_input:
+            for idx, item in enumerate(raw_input):
                 if isinstance(item, str):
                     input_messages.append({"role": "user", "content": item})
                 elif isinstance(item, dict):
                     role = item.get("role", "user")
-                    content = _normalize_chat_content(item.get("content", ""))
+                    try:
+                        content = _normalize_multimodal_content(item.get("content", ""))
+                    except ValueError as exc:
+                        return _multimodal_validation_error(exc, param=f"input[{idx}].content")
                     input_messages.append({"role": role, "content": content})
         else:
             return web.json_response(_openai_error("'input' must be a string or array"), status=400)
@@ -1442,7 +1643,7 @@ class APIServerAdapter(BasePlatformAdapter):
         # This lets stateless clients supply their own history instead of
         # relying on server-side response chaining via previous_response_id.
         # Precedence: explicit conversation_history > previous_response_id.
-        conversation_history: List[Dict[str, str]] = []
+        conversation_history: List[Dict[str, Any]] = []
         raw_history = body.get("conversation_history")
         if raw_history:
             if not isinstance(raw_history, list):
@@ -1456,7 +1657,11 @@ class APIServerAdapter(BasePlatformAdapter):
                         _openai_error(f"conversation_history[{i}] must have 'role' and 'content' fields"),
                         status=400,
                     )
-                conversation_history.append({"role": str(entry["role"]), "content": str(entry["content"])})
+                try:
+                    entry_content = _normalize_multimodal_content(entry["content"])
+                except ValueError as exc:
+                    return _multimodal_validation_error(exc, param=f"conversation_history[{i}].content")
+                conversation_history.append({"role": str(entry["role"]), "content": entry_content})
             if previous_response_id:
                 logger.debug("Both conversation_history and previous_response_id provided; using conversation_history")
 
@@ -1476,8 +1681,8 @@ class APIServerAdapter(BasePlatformAdapter):
             conversation_history.append(msg)
 
         # Last input message is the user_message
-        user_message = input_messages[-1].get("content", "") if input_messages else ""
-        if not user_message:
+        user_message: Any = input_messages[-1].get("content", "") if input_messages else ""
+        if not _content_has_visible_payload(user_message):
             return web.json_response(_openai_error("No user message found in input"), status=400)
 
         # Truncation support
@@ -1682,44 +1887,16 @@ class APIServerAdapter(BasePlatformAdapter):
     # Cron jobs API
     # ------------------------------------------------------------------
 
-    # Check cron module availability once (not per-request)
-    _CRON_AVAILABLE = False
-    try:
-        from cron.jobs import (
-            list_jobs as _cron_list,
-            get_job as _cron_get,
-            create_job as _cron_create,
-            update_job as _cron_update,
-            remove_job as _cron_remove,
-            pause_job as _cron_pause,
-            resume_job as _cron_resume,
-            trigger_job as _cron_trigger,
-        )
-        # Wrap as staticmethod to prevent descriptor binding — these are plain
-        # module functions, not instance methods.  Without this, self._cron_*()
-        # injects ``self`` as the first positional argument and every call
-        # raises TypeError.
-        _cron_list = staticmethod(_cron_list)
-        _cron_get = staticmethod(_cron_get)
-        _cron_create = staticmethod(_cron_create)
-        _cron_update = staticmethod(_cron_update)
-        _cron_remove = staticmethod(_cron_remove)
-        _cron_pause = staticmethod(_cron_pause)
-        _cron_resume = staticmethod(_cron_resume)
-        _cron_trigger = staticmethod(_cron_trigger)
-        _CRON_AVAILABLE = True
-    except ImportError:
-        pass
-
     _JOB_ID_RE = __import__("re").compile(r"[a-f0-9]{12}")
     # Allowed fields for update — prevents clients injecting arbitrary keys
     _UPDATE_ALLOWED_FIELDS = {"name", "schedule", "prompt", "deliver", "skills", "skill", "repeat", "enabled"}
     _MAX_NAME_LENGTH = 200
     _MAX_PROMPT_LENGTH = 5000
 
-    def _check_jobs_available(self) -> Optional["web.Response"]:
+    @staticmethod
+    def _check_jobs_available() -> Optional["web.Response"]:
         """Return error response if cron module isn't available."""
-        if not self._CRON_AVAILABLE:
+        if not _CRON_AVAILABLE:
             return web.json_response(
                 {"error": "Cron module not available"}, status=501,
             )
@@ -1744,7 +1921,7 @@ class APIServerAdapter(BasePlatformAdapter):
             return cron_err
         try:
             include_disabled = request.query.get("include_disabled", "").lower() in ("true", "1")
-            jobs = self._cron_list(include_disabled=include_disabled)
+            jobs = _cron_list(include_disabled=include_disabled)
             return web.json_response({"jobs": jobs})
         except Exception as e:
             return web.json_response({"error": str(e)}, status=500)
@@ -1792,7 +1969,7 @@ class APIServerAdapter(BasePlatformAdapter):
             if repeat is not None:
                 kwargs["repeat"] = repeat
 
-            job = self._cron_create(**kwargs)
+            job = _cron_create(**kwargs)
             return web.json_response({"job": job})
         except Exception as e:
             return web.json_response({"error": str(e)}, status=500)
@@ -1809,7 +1986,7 @@ class APIServerAdapter(BasePlatformAdapter):
         if id_err:
             return id_err
         try:
-            job = self._cron_get(job_id)
+            job = _cron_get(job_id)
             if not job:
                 return web.json_response({"error": "Job not found"}, status=404)
             return web.json_response({"job": job})
@@ -1842,7 +2019,7 @@ class APIServerAdapter(BasePlatformAdapter):
                 return web.json_response(
                     {"error": f"Prompt must be ≤ {self._MAX_PROMPT_LENGTH} characters"}, status=400,
                 )
-            job = self._cron_update(job_id, sanitized)
+            job = _cron_update(job_id, sanitized)
             if not job:
                 return web.json_response({"error": "Job not found"}, status=404)
             return web.json_response({"job": job})
@@ -1861,7 +2038,7 @@ class APIServerAdapter(BasePlatformAdapter):
         if id_err:
             return id_err
         try:
-            success = self._cron_remove(job_id)
+            success = _cron_remove(job_id)
             if not success:
                 return web.json_response({"error": "Job not found"}, status=404)
             return web.json_response({"ok": True})
@@ -1880,7 +2057,7 @@ class APIServerAdapter(BasePlatformAdapter):
         if id_err:
             return id_err
         try:
-            job = self._cron_pause(job_id)
+            job = _cron_pause(job_id)
             if not job:
                 return web.json_response({"error": "Job not found"}, status=404)
             return web.json_response({"job": job})
@@ -1899,7 +2076,7 @@ class APIServerAdapter(BasePlatformAdapter):
         if id_err:
             return id_err
         try:
-            job = self._cron_resume(job_id)
+            job = _cron_resume(job_id)
             if not job:
                 return web.json_response({"error": "Job not found"}, status=404)
             return web.json_response({"job": job})
@@ -1918,7 +2095,7 @@ class APIServerAdapter(BasePlatformAdapter):
         if id_err:
             return id_err
         try:
-            job = self._cron_trigger(job_id)
+            job = _cron_trigger(job_id)
             if not job:
                 return web.json_response({"error": "Job not found"}, status=404)
             return web.json_response({"job": job})
diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py
index 65f7226e10..56bb3c5cb4 100644
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -6,6 +6,7 @@ and implement the required methods.
 """
 
 import asyncio
+import inspect
 import ipaddress
 import logging
 import os
@@ -18,6 +19,8 @@ import uuid
 from abc import ABC, abstractmethod
 from urllib.parse import urlsplit
 
+from utils import normalize_proxy_url
+
 logger = logging.getLogger(__name__)
 
 
@@ -158,13 +161,13 @@ def resolve_proxy_url(platform_env_var: str | None = None) -> str | None:
     if platform_env_var:
         value = (os.environ.get(platform_env_var) or "").strip()
         if value:
-            return value
+            return normalize_proxy_url(value)
     for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
                 "https_proxy", "http_proxy", "all_proxy"):
         value = (os.environ.get(key) or "").strip()
         if value:
-            return value
-    return _detect_macos_system_proxy()
+            return normalize_proxy_url(value)
+    return normalize_proxy_url(_detect_macos_system_proxy())
 
 
 def proxy_kwargs_for_bot(proxy_url: str | None) -> dict:
@@ -390,12 +393,9 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
     if not is_safe_url(url):
         raise ValueError(f"Blocked unsafe URL (SSRF protection): {safe_url_for_log(url)}")
 
-    import asyncio
     import httpx
-    import logging as _logging
-    _log = _logging.getLogger(__name__)
+    _log = logging.getLogger(__name__)
 
-    last_exc = None
     async with httpx.AsyncClient(
         timeout=30.0,
         follow_redirects=True,
@@ -413,7 +413,6 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
                 response.raise_for_status()
                 return cache_image_from_bytes(response.content, ext)
             except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
-                last_exc = exc
                 if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
                     raise
                 if attempt < retries:
@@ -429,7 +428,6 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
                     await asyncio.sleep(wait)
                     continue
                 raise
-    raise last_exc
 
 
 def cleanup_image_cache(max_age_hours: int = 24) -> int:
@@ -509,12 +507,9 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
     if not is_safe_url(url):
         raise ValueError(f"Blocked unsafe URL (SSRF protection): {safe_url_for_log(url)}")
 
-    import asyncio
     import httpx
-    import logging as _logging
-    _log = _logging.getLogger(__name__)
+    _log = logging.getLogger(__name__)
 
-    last_exc = None
     async with httpx.AsyncClient(
         timeout=30.0,
         follow_redirects=True,
@@ -532,7 +527,6 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
                 response.raise_for_status()
                 return cache_audio_from_bytes(response.content, ext)
             except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
-                last_exc = exc
                 if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
                     raise
                 if attempt < retries:
@@ -548,7 +542,39 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
                     await asyncio.sleep(wait)
                     continue
                 raise
-    raise last_exc
+
+
+# ---------------------------------------------------------------------------
+# Video cache utilities
+#
+# Same pattern as image/audio cache -- videos from platforms are downloaded
+# here so the agent can reference them by local file path.
+# ---------------------------------------------------------------------------
+
+VIDEO_CACHE_DIR = get_hermes_dir("cache/videos", "video_cache")
+
+SUPPORTED_VIDEO_TYPES = {
+    ".mp4": "video/mp4",
+    ".mov": "video/quicktime",
+    ".webm": "video/webm",
+    ".mkv": "video/x-matroska",
+    ".avi": "video/x-msvideo",
+}
+
+
+def get_video_cache_dir() -> Path:
+    """Return the video cache directory, creating it if it doesn't exist."""
+    VIDEO_CACHE_DIR.mkdir(parents=True, exist_ok=True)
+    return VIDEO_CACHE_DIR
+
+
+def cache_video_from_bytes(data: bytes, ext: str = ".mp4") -> str:
+    """Save raw video bytes to the cache and return the absolute file path."""
+    cache_dir = get_video_cache_dir()
+    filename = f"video_{uuid.uuid4().hex[:12]}{ext}"
+    filepath = cache_dir / filename
+    filepath.write_bytes(data)
+    return str(filepath)
 
 
 # ---------------------------------------------------------------------------
@@ -880,10 +906,11 @@ class BasePlatformAdapter(ABC):
         # working on a task after --replace or manual restarts.
         self._background_tasks: set[asyncio.Task] = set()
         # One-shot callbacks to fire after the main response is delivered.
-        # Keyed by session_key.  GatewayRunner uses this to defer
-        # background-review notifications ("💾 Skill created") until the
-        # primary reply has been sent.
-        self._post_delivery_callbacks: Dict[str, Callable] = {}
+        # Keyed by session_key. Values are either a bare callback (legacy) or
+        # a ``(generation, callback)`` tuple so GatewayRunner can make deferred
+        # deliveries generation-aware and avoid stale runs clearing callbacks
+        # registered by a fresher run for the same session.
+        self._post_delivery_callbacks: Dict[str, Any] = {}
         self._expected_cancelled_tasks: set[asyncio.Task] = set()
         self._busy_session_handler: Optional[Callable[[MessageEvent, str], Awaitable[bool]]] = None
         # Chats where auto-TTS on voice input is disabled (set by /voice off)
@@ -1316,7 +1343,7 @@ class BasePlatformAdapter(ABC):
         # Extract MEDIA:<path> tags, allowing optional whitespace after the colon
         # and quoted/backticked paths for LLM-formatted outputs.
         media_pattern = re.compile(
-            r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
+            r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|pdf)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
         )
         for match in media_pattern.finditer(content):
             path = match.group("path").strip()
@@ -1401,7 +1428,13 @@ class BasePlatformAdapter(ABC):
 
         return paths, cleaned
 
-    async def _keep_typing(self, chat_id: str, interval: float = 2.0, metadata=None) -> None:
+    async def _keep_typing(
+        self,
+        chat_id: str,
+        interval: float = 2.0,
+        metadata=None,
+        stop_event: asyncio.Event | None = None,
+    ) -> None:
         """
         Continuously send typing indicator until cancelled.
         
@@ -1415,9 +1448,18 @@ class BasePlatformAdapter(ABC):
         """
         try:
             while True:
+                if stop_event is not None and stop_event.is_set():
+                    return
                 if chat_id not in self._typing_paused:
                     await self.send_typing(chat_id, metadata=metadata)
-                await asyncio.sleep(interval)
+                if stop_event is None:
+                    await asyncio.sleep(interval)
+                    continue
+                try:
+                    await asyncio.wait_for(stop_event.wait(), timeout=interval)
+                except asyncio.TimeoutError:
+                    continue
+                return
         except asyncio.CancelledError:
             pass  # Normal cancellation when handler completes
         finally:
@@ -1444,6 +1486,59 @@ class BasePlatformAdapter(ABC):
         """Resume typing indicator for a chat after approval resolves."""
         self._typing_paused.discard(chat_id)
 
+    async def interrupt_session_activity(self, session_key: str, chat_id: str) -> None:
+        """Signal the active session loop to stop and clear typing immediately."""
+        if session_key:
+            interrupt_event = self._active_sessions.get(session_key)
+            if interrupt_event is not None:
+                interrupt_event.set()
+        try:
+            await self.stop_typing(chat_id)
+        except Exception:
+            pass
+
+    def register_post_delivery_callback(
+        self,
+        session_key: str,
+        callback: Callable,
+        *,
+        generation: int | None = None,
+    ) -> None:
+        """Register a deferred callback to fire after the main response.
+
+        ``generation`` lets callers tie the callback to a specific gateway run
+        generation so stale runs cannot clear callbacks owned by a fresher run.
+        """
+        if not session_key or not callable(callback):
+            return
+        if generation is None:
+            self._post_delivery_callbacks[session_key] = callback
+        else:
+            self._post_delivery_callbacks[session_key] = (int(generation), callback)
+
+    def pop_post_delivery_callback(
+        self,
+        session_key: str,
+        *,
+        generation: int | None = None,
+    ) -> Callable | None:
+        """Pop a deferred callback, optionally requiring generation ownership."""
+        if not session_key:
+            return None
+        entry = self._post_delivery_callbacks.get(session_key)
+        if entry is None:
+            return None
+        if isinstance(entry, tuple) and len(entry) == 2:
+            entry_generation, callback = entry
+            if generation is not None and int(entry_generation) != int(generation):
+                return None
+            self._post_delivery_callbacks.pop(session_key, None)
+            return callback if callable(callback) else None
+        if generation is not None:
+            return None
+        self._post_delivery_callbacks.pop(session_key, None)
+        return entry if callable(entry) else None
+
     # ── Processing lifecycle hooks ──────────────────────────────────────────
     # Subclasses override these to react to message processing events
     # (e.g. Discord adds 👀/✅/❌ reactions).
@@ -1684,8 +1779,6 @@ class BasePlatformAdapter(ABC):
           HERMES_HUMAN_DELAY_MIN_MS: minimum delay in ms (default 800, custom mode)
           HERMES_HUMAN_DELAY_MAX_MS: maximum delay in ms (default 2500, custom mode)
         """
-        import random
-
         mode = os.getenv("HERMES_HUMAN_DELAY_MODE", "off").lower()
         if mode == "off":
             return 0.0
@@ -1714,10 +1807,23 @@ class BasePlatformAdapter(ABC):
         # Fall back to a new Event only if the entry was removed externally.
         interrupt_event = self._active_sessions.get(session_key) or asyncio.Event()
         self._active_sessions[session_key] = interrupt_event
+        callback_generation = getattr(interrupt_event, "_hermes_run_generation", None)
         
         # Start continuous typing indicator (refreshes every 2 seconds)
         _thread_metadata = {"thread_id": event.source.thread_id} if event.source.thread_id else None
-        typing_task = asyncio.create_task(self._keep_typing(event.source.chat_id, metadata=_thread_metadata))
+        _keep_typing_kwargs = {"metadata": _thread_metadata}
+        try:
+            _keep_typing_sig = inspect.signature(self._keep_typing)
+        except (TypeError, ValueError):
+            _keep_typing_sig = None
+        if _keep_typing_sig is None or "stop_event" in _keep_typing_sig.parameters:
+            _keep_typing_kwargs["stop_event"] = interrupt_event
+        typing_task = asyncio.create_task(
+            self._keep_typing(
+                event.source.chat_id,
+                **_keep_typing_kwargs,
+            )
+        )
         
         try:
             await self._run_processing_hook("on_processing_start", event)
@@ -1976,7 +2082,14 @@ class BasePlatformAdapter(ABC):
         finally:
             # Fire any one-shot post-delivery callback registered for this
             # session (e.g. deferred background-review notifications).
-            _post_cb = getattr(self, "_post_delivery_callbacks", {}).pop(session_key, None)
+            _callback_generation = callback_generation
+            if hasattr(self, "pop_post_delivery_callback"):
+                _post_cb = self.pop_post_delivery_callback(
+                    session_key,
+                    generation=_callback_generation,
+                )
+            else:
+                _post_cb = getattr(self, "_post_delivery_callbacks", {}).pop(session_key, None)
             if callable(_post_cb):
                 try:
                     _post_cb()
@@ -2022,10 +2135,10 @@ class BasePlatformAdapter(ABC):
                     pass
                 # Leave _active_sessions[session_key] populated — the drain
                 # task's own lifecycle will clean it up.
-                return
-            # Clean up session tracking
-            if session_key in self._active_sessions:
-                del self._active_sessions[session_key]
+            else:
+                # Clean up session tracking
+                if session_key in self._active_sessions:
+                    del self._active_sessions[session_key]
     
     async def cancel_background_tasks(self) -> None:
         """Cancel any in-flight background message-processing tasks.
@@ -2033,12 +2146,26 @@ class BasePlatformAdapter(ABC):
         Used during gateway shutdown/replacement so active sessions from the old
         process do not keep running after adapters are being torn down.
         """
-        tasks = [task for task in self._background_tasks if not task.done()]
-        for task in tasks:
-            self._expected_cancelled_tasks.add(task)
-            task.cancel()
-        if tasks:
+        # Loop until no new tasks appear.  Without this, a message
+        # arriving during the `await asyncio.gather` below would spawn
+        # a fresh _process_message_background task (added to
+        # self._background_tasks at line ~1668 via handle_message),
+        # and the _background_tasks.clear() at the end of this method
+        # would drop the reference — the task runs untracked against a
+        # disconnecting adapter, logs send-failures, and may linger
+        # until it completes on its own.  Retrying the drain until the
+        # task set stabilizes closes the window.
+        MAX_DRAIN_ROUNDS = 5
+        for _ in range(MAX_DRAIN_ROUNDS):
+            tasks = [task for task in self._background_tasks if not task.done()]
+            if not tasks:
+                break
+            for task in tasks:
+                self._expected_cancelled_tasks.add(task)
+                task.cancel()
             await asyncio.gather(*tasks, return_exceptions=True)
+            # Loop: late-arrival tasks spawned during the gather above
+            # will be in self._background_tasks now.  Re-check.
         self._background_tasks.clear()
         self._expected_cancelled_tasks.clear()
         self._pending_messages.clear()
diff --git a/gateway/platforms/bluebubbles.py b/gateway/platforms/bluebubbles.py
index a8a2929698..39d4e537eb 100644
--- a/gateway/platforms/bluebubbles.py
+++ b/gateway/platforms/bluebubbles.py
@@ -75,7 +75,7 @@ def _redact(text: str) -> str:
 def check_bluebubbles_requirements() -> bool:
     try:
         import aiohttp  # noqa: F401
-        import httpx as _httpx  # noqa: F401
+        import httpx  # noqa: F401
     except ImportError:
         return False
     return True
diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py
index b1585637ff..d43e18d73d 100644
--- a/gateway/platforms/discord.py
+++ b/gateway/platforms/discord.py
@@ -498,6 +498,7 @@ class DiscordAdapter(BasePlatformAdapter):
         self._allowed_role_ids: set = set()  # For DISCORD_ALLOWED_ROLES filtering
         # Voice channel state (per-guild)
         self._voice_clients: Dict[int, Any] = {}  # guild_id -> VoiceClient
+        self._voice_locks: Dict[int, asyncio.Lock] = {}  # guild_id -> serialize join/leave
         # Text batching: merge rapid successive messages (Telegram-style)
         self._text_batch_delay_seconds = float(os.getenv("HERMES_DISCORD_TEXT_BATCH_DELAY_SECONDS", "0.6"))
         self._text_batch_split_delay_seconds = float(os.getenv("HERMES_DISCORD_TEXT_BATCH_SPLIT_DELAY_SECONDS", "2.0"))
@@ -540,7 +541,6 @@ class DiscordAdapter(BasePlatformAdapter):
             # ctypes.util.find_library fails on macOS with Homebrew-installed libs,
             # so fall back to known Homebrew paths if needed.
             if not opus_path:
-                import sys
                 _homebrew_paths = (
                     "/opt/homebrew/lib/libopus.dylib",  # Apple Silicon
                     "/usr/local/lib/libopus.dylib",     # Intel Mac
@@ -636,6 +636,15 @@ class DiscordAdapter(BasePlatformAdapter):
 
             @self._client.event
             async def on_message(message: DiscordMessage):
+                # Block until _resolve_allowed_usernames has swapped
+                # any raw usernames in DISCORD_ALLOWED_USERS for numeric
+                # IDs (otherwise on_message's author.id lookup can miss).
+                if not adapter_self._ready_event.is_set():
+                    try:
+                        await asyncio.wait_for(adapter_self._ready_event.wait(), timeout=30.0)
+                    except asyncio.TimeoutError:
+                        pass
+
                 # Dedup: Discord RESUME replays events after reconnects (#4777)
                 if adapter_self._dedup.is_duplicate(str(message.id)):
                     return
@@ -1071,6 +1080,8 @@ class DiscordAdapter(BasePlatformAdapter):
         chat_id: str,
         message_id: str,
         content: str,
+        *,
+        finalize: bool = False,
     ) -> SendResult:
         """Edit a previously sent Discord message."""
         if not self._client:
@@ -1237,51 +1248,53 @@ class DiscordAdapter(BasePlatformAdapter):
             return False
         guild_id = channel.guild.id
 
-        # Already connected in this guild?
-        existing = self._voice_clients.get(guild_id)
-        if existing and existing.is_connected():
-            if existing.channel.id == channel.id:
+        async with self._voice_locks.setdefault(guild_id, asyncio.Lock()):
+            # Already connected in this guild?
+            existing = self._voice_clients.get(guild_id)
+            if existing and existing.is_connected():
+                if existing.channel.id == channel.id:
+                    self._reset_voice_timeout(guild_id)
+                    return True
+                await existing.move_to(channel)
                 self._reset_voice_timeout(guild_id)
                 return True
-            await existing.move_to(channel)
+
+            vc = await channel.connect()
+            self._voice_clients[guild_id] = vc
             self._reset_voice_timeout(guild_id)
+
+            # Start voice receiver (Phase 2: listen to users)
+            try:
+                receiver = VoiceReceiver(vc, allowed_user_ids=self._allowed_user_ids)
+                receiver.start()
+                self._voice_receivers[guild_id] = receiver
+                self._voice_listen_tasks[guild_id] = asyncio.ensure_future(
+                    self._voice_listen_loop(guild_id)
+                )
+            except Exception as e:
+                logger.warning("Voice receiver failed to start: %s", e)
+
             return True
 
-        vc = await channel.connect()
-        self._voice_clients[guild_id] = vc
-        self._reset_voice_timeout(guild_id)
-
-        # Start voice receiver (Phase 2: listen to users)
-        try:
-            receiver = VoiceReceiver(vc, allowed_user_ids=self._allowed_user_ids)
-            receiver.start()
-            self._voice_receivers[guild_id] = receiver
-            self._voice_listen_tasks[guild_id] = asyncio.ensure_future(
-                self._voice_listen_loop(guild_id)
-            )
-        except Exception as e:
-            logger.warning("Voice receiver failed to start: %s", e)
-
-        return True
-
     async def leave_voice_channel(self, guild_id: int) -> None:
         """Disconnect from the voice channel in a guild."""
-        # Stop voice receiver first
-        receiver = self._voice_receivers.pop(guild_id, None)
-        if receiver:
-            receiver.stop()
-        listen_task = self._voice_listen_tasks.pop(guild_id, None)
-        if listen_task:
-            listen_task.cancel()
+        async with self._voice_locks.setdefault(guild_id, asyncio.Lock()):
+            # Stop voice receiver first
+            receiver = self._voice_receivers.pop(guild_id, None)
+            if receiver:
+                receiver.stop()
+            listen_task = self._voice_listen_tasks.pop(guild_id, None)
+            if listen_task:
+                listen_task.cancel()
 
-        vc = self._voice_clients.pop(guild_id, None)
-        if vc and vc.is_connected():
-            await vc.disconnect()
-        task = self._voice_timeout_tasks.pop(guild_id, None)
-        if task:
-            task.cancel()
-        self._voice_text_channels.pop(guild_id, None)
-        self._voice_sources.pop(guild_id, None)
+            vc = self._voice_clients.pop(guild_id, None)
+            if vc and vc.is_connected():
+                await vc.disconnect()
+            task = self._voice_timeout_tasks.pop(guild_id, None)
+            if task:
+                task.cancel()
+            self._voice_text_channels.pop(guild_id, None)
+            self._voice_sources.pop(guild_id, None)
 
     # Maximum seconds to wait for voice playback before giving up
     PLAYBACK_TIMEOUT = 120
@@ -1408,8 +1421,7 @@ class DiscordAdapter(BasePlatformAdapter):
         speaking_user_ids: set = set()
         receiver = self._voice_receivers.get(guild_id)
         if receiver:
-            import time as _time
-            now = _time.monotonic()
+            now = time.monotonic()
             with receiver._lock:
                 for ssrc, last_t in receiver._last_packet_time.items():
                     # Consider "speaking" if audio received within last 2 seconds
@@ -2948,6 +2960,17 @@ class DiscordAdapter(BasePlatformAdapter):
             parent_channel_id = self._get_parent_channel_id(message.channel)
 
         is_voice_linked_channel = False
+
+        # Save mention-stripped text before auto-threading since create_thread()
+        # can clobber message.content, breaking /command detection in channels.
+        raw_content = message.content.strip()
+        normalized_content = raw_content
+        mention_prefix = False
+        if self._client.user and self._client.user in message.mentions:
+            mention_prefix = True
+            normalized_content = normalized_content.replace(f"<@{self._client.user.id}>", "").strip()
+            normalized_content = normalized_content.replace(f"<@!{self._client.user.id}>", "").strip()
+            message.content = normalized_content
         if not isinstance(message.channel, discord.DMChannel):
             channel_ids = {str(message.channel.id)}
             if parent_channel_id:
@@ -2985,13 +3008,8 @@ class DiscordAdapter(BasePlatformAdapter):
             in_bot_thread = is_thread and thread_id in self._threads
 
             if require_mention and not is_free_channel and not in_bot_thread:
-                if self._client.user not in message.mentions:
+                if self._client.user not in message.mentions and not mention_prefix:
                     return
-
-            if self._client.user and self._client.user in message.mentions:
-                message.content = message.content.replace(f"<@{self._client.user.id}>", "").strip()
-                message.content = message.content.replace(f"<@!{self._client.user.id}>", "").strip()
-
         # Auto-thread: when enabled, automatically create a thread for every
         # @mention in a text channel so each conversation is isolated (like Slack).
         # Messages already inside threads or DMs are unaffected.
@@ -3013,7 +3031,7 @@ class DiscordAdapter(BasePlatformAdapter):
 
         # Determine message type
         msg_type = MessageType.TEXT
-        if message.content.startswith("/"):
+        if normalized_content.startswith("/"):
             msg_type = MessageType.COMMAND
         elif message.attachments:
             # Check attachment types
@@ -3153,7 +3171,9 @@ class DiscordAdapter(BasePlatformAdapter):
                                 att.filename, e, exc_info=True,
                             )
 
-        event_text = message.content
+        # Use normalized_content (saved before auto-threading) instead of message.content,
+        # to detect /slash commands in channel messages.
+        event_text = normalized_content
         if pending_text_injection:
             event_text = f"{pending_text_injection}\n\n{event_text}" if event_text else pending_text_injection
 
@@ -3265,7 +3285,20 @@ class DiscordAdapter(BasePlatformAdapter):
                 "[Discord] Flushing text batch %s (%d chars)",
                 key, len(event.text or ""),
             )
-            await self.handle_message(event)
+            # Shield the downstream dispatch so that a subsequent chunk
+            # arriving while handle_message is mid-flight cannot cancel
+            # the running agent turn.  _enqueue_text_event always cancels
+            # the prior flush task when a new chunk lands; without this
+            # shield, CancelledError would propagate from our task down
+            # into handle_message → the agent's streaming request,
+            # aborting the response the user was waiting on.  The new
+            # chunk is handled by the fresh flush task regardless.
+            await asyncio.shield(self.handle_message(event))
+        except asyncio.CancelledError:
+            # Only reached if cancel landed before the pop — the shielded
+            # handle_message is unaffected either way.  Let the task exit
+            # cleanly so the finally block cleans up.
+            pass
         finally:
             if self._pending_text_batch_tasks.get(key) is current_task:
                 self._pending_text_batch_tasks.pop(key, None)
diff --git a/gateway/platforms/feishu.py b/gateway/platforms/feishu.py
index 351337e827..85cebe5381 100644
--- a/gateway/platforms/feishu.py
+++ b/gateway/platforms/feishu.py
@@ -8,7 +8,8 @@ Supports:
 - Gateway allowlist integration via FEISHU_ALLOWED_USERS
 - Persistent dedup state across restarts
 - Per-chat serial message processing (matches openclaw createChatQueue)
-- Persistent ACK emoji reaction on inbound messages
+- Processing status reactions: Typing while working, removed on success,
+  swapped for CrossMark on failure
 - Reaction events routed as synthetic text events (matches openclaw)
 - Interactive card button-click events routed as synthetic COMMAND events
 - Webhook anomaly tracking (matches openclaw createWebhookAnomalyTracker)
@@ -29,6 +30,7 @@ import re
 import threading
 import time
 import uuid
+from collections import OrderedDict
 from dataclasses import dataclass, field
 from datetime import datetime
 from pathlib import Path
@@ -98,6 +100,7 @@ from gateway.platforms.base import (
     BasePlatformAdapter,
     MessageEvent,
     MessageType,
+    ProcessingOutcome,
     SendResult,
     SUPPORTED_DOCUMENT_TYPES,
     cache_document_from_bytes,
@@ -119,6 +122,8 @@ _MARKDOWN_HINT_RE = re.compile(
     re.MULTILINE,
 )
 _MARKDOWN_LINK_RE = re.compile(r"\[([^\]]+)\]\(([^)]+)\)")
+_MARKDOWN_FENCE_OPEN_RE = re.compile(r"^```([^\n`]*)\s*$")
+_MARKDOWN_FENCE_CLOSE_RE = re.compile(r"^```\s*$")
 _MENTION_RE = re.compile(r"@_user_\d+")
 _MULTISPACE_RE = re.compile(r"[ \t]{2,}")
 _POST_CONTENT_INVALID_RE = re.compile(r"content format of the post type is incorrect", re.IGNORECASE)
@@ -188,7 +193,17 @@ _APPROVAL_LABEL_MAP: Dict[str, str] = {
 }
 _FEISHU_BOT_MSG_TRACK_SIZE = 512                   # LRU size for tracking sent message IDs
 _FEISHU_REPLY_FALLBACK_CODES = frozenset({230011, 231003})  # reply target withdrawn/missing → create fallback
-_FEISHU_ACK_EMOJI = "OK"
+
+# Feishu reactions render as prominent badges, unlike Discord/Telegram's
+# small footer emoji — a success badge on every message would add noise, so
+# we only mark start (Typing) and failure (CrossMark); the reply itself is
+# the success signal.
+_FEISHU_REACTION_IN_PROGRESS = "Typing"
+_FEISHU_REACTION_FAILURE = "CrossMark"
+# Bound on the (message_id → reaction_id) handle cache. Happy-path entries
+# drain on completion; the cap is a safeguard against unbounded growth from
+# delete-failures, not a capacity plan.
+_FEISHU_PROCESSING_REACTION_CACHE_SIZE = 1024
 
 # QR onboarding constants
 _ONBOARD_ACCOUNTS_URLS = {
@@ -430,23 +445,66 @@ def _coerce_required_int(value: Any, default: int, min_value: int = 0) -> int:
 
 
 def _build_markdown_post_payload(content: str) -> str:
+    rows = _build_markdown_post_rows(content)
     return json.dumps(
         {
             "zh_cn": {
-                "content": [
-                    [
-                        {
-                            "tag": "md",
-                            "text": content,
-                        }
-                    ]
-                ],
+                "content": rows,
             }
         },
         ensure_ascii=False,
     )
 
 
+def _build_markdown_post_rows(content: str) -> List[List[Dict[str, str]]]:
+    """Build Feishu post rows while isolating fenced code blocks.
+
+    Feishu's `md` renderer can swallow trailing content when a fenced code block
+    appears inside one large markdown element. Split the reply at real fence
+    lines so prose before/after the code block remains visible while code stays
+    in a dedicated row.
+    """
+    if not content:
+        return [[{"tag": "md", "text": ""}]]
+    if "```" not in content:
+        return [[{"tag": "md", "text": content}]]
+
+    rows: List[List[Dict[str, str]]] = []
+    current: List[str] = []
+    in_code_block = False
+
+    def _flush_current() -> None:
+        nonlocal current
+        if not current:
+            return
+        segment = "\n".join(current)
+        if segment.strip():
+            rows.append([{"tag": "md", "text": segment}])
+        current = []
+
+    for raw_line in content.splitlines():
+        stripped_line = raw_line.strip()
+        is_fence = bool(
+            _MARKDOWN_FENCE_CLOSE_RE.match(stripped_line)
+            if in_code_block
+            else _MARKDOWN_FENCE_OPEN_RE.match(stripped_line)
+        )
+
+        if is_fence:
+            if not in_code_block:
+                _flush_current()
+            current.append(raw_line)
+            in_code_block = not in_code_block
+            if not in_code_block:
+                _flush_current()
+            continue
+
+        current.append(raw_line)
+
+    _flush_current()
+    return rows or [[{"tag": "md", "text": content}]]
+
+
 def parse_feishu_post_payload(payload: Any) -> FeishuPostParseResult:
     resolved = _resolve_post_payload(payload)
     if not resolved:
@@ -1096,6 +1154,9 @@ class FeishuAdapter(BasePlatformAdapter):
         # Exec approval button state (approval_id → {session_key, message_id, chat_id})
         self._approval_state: Dict[int, Dict[str, str]] = {}
         self._approval_counter = itertools.count(1)
+        # Feishu reaction deletion requires the opaque reaction_id returned
+        # by create, so we cache it per message_id.
+        self._pending_processing_reactions: "OrderedDict[str, str]" = OrderedDict()
         self._load_seen_message_ids()
 
     @staticmethod
@@ -1423,6 +1484,8 @@ class FeishuAdapter(BasePlatformAdapter):
         chat_id: str,
         message_id: str,
         content: str,
+        *,
+        finalize: bool = False,
     ) -> SendResult:
         """Edit a previously sent Feishu text/post message."""
         if not self._client:
@@ -1925,8 +1988,8 @@ class FeishuAdapter(BasePlatformAdapter):
         if not message_id or self._is_duplicate(message_id):
             logger.debug("[Feishu] Dropping duplicate/missing message_id: %s", message_id)
             return
-        if getattr(sender, "sender_type", "") == "bot":
-            logger.debug("[Feishu] Dropping bot-originated event: %s", message_id)
+        if self._is_self_sent_bot_message(event):
+            logger.debug("[Feishu] Dropping self-sent bot event: %s", message_id)
             return
 
         chat_type = getattr(message, "chat_type", "p2p")
@@ -2003,12 +2066,12 @@ class FeishuAdapter(BasePlatformAdapter):
             operator_type,
             emoji_type,
         )
-        # Only process reactions from real users. Ignore app/bot-generated reactions
-        # and Hermes' own ACK emoji to avoid feedback loops.
+        # Drop bot/app-origin reactions to break the feedback loop from our
+        # own lifecycle reactions. A human reacting with the same emoji (e.g.
+        # clicking Typing on a bot message) is still routed through.
         loop = self._loop
         if (
             operator_type in {"bot", "app"}
-            or emoji_type == _FEISHU_ACK_EMOJI
             or not message_id
             or loop is None
             or bool(getattr(loop, "is_closed", lambda: False)())
@@ -2232,33 +2295,35 @@ class FeishuAdapter(BasePlatformAdapter):
 
     async def _handle_message_with_guards(self, event: MessageEvent) -> None:
         """Dispatch a single event through the agent pipeline with per-chat serialization
-        and a persistent ACK emoji reaction before processing starts.
+        before handing the event off to the agent.
 
-        - Per-chat lock: ensures messages in the same chat are processed one at a time
-          (matches openclaw's createChatQueue serial queue behaviour).
-        - ACK indicator: adds a CHECK reaction to the triggering message before handing
-          off to the agent and leaves it in place as a receipt marker.
+        Per-chat lock ensures messages in the same chat are processed one at a
+        time (matches openclaw's createChatQueue serial queue behaviour).
         """
         chat_id = getattr(event.source, "chat_id", "") or "" if event.source else ""
         chat_lock = self._get_chat_lock(chat_id)
         async with chat_lock:
-            message_id = event.message_id
-            if message_id:
-                await self._add_ack_reaction(message_id)
             await self.handle_message(event)
 
-    async def _add_ack_reaction(self, message_id: str) -> Optional[str]:
-        """Add a persistent ACK emoji reaction to signal the message was received."""
-        if not self._client or not message_id:
+    # =========================================================================
+    # Processing status reactions
+    # =========================================================================
+
+    def _reactions_enabled(self) -> bool:
+        return os.getenv("FEISHU_REACTIONS", "true").strip().lower() not in ("false", "0", "no")
+
+    async def _add_reaction(self, message_id: str, emoji_type: str) -> Optional[str]:
+        """Return the reaction_id on success, else None. The id is needed later for deletion."""
+        if not self._client or not message_id or not emoji_type:
             return None
         try:
-            from lark_oapi.api.im.v1 import (  # lazy import — keeps optional dep optional
+            from lark_oapi.api.im.v1 import (
                 CreateMessageReactionRequest,
                 CreateMessageReactionRequestBody,
             )
             body = (
                 CreateMessageReactionRequestBody.builder()
-                .reaction_type({"emoji_type": _FEISHU_ACK_EMOJI})
+                .reaction_type({"emoji_type": emoji_type})
                 .build()
             )
             request = (
@@ -2271,16 +2336,93 @@ class FeishuAdapter(BasePlatformAdapter):
             if response and getattr(response, "success", lambda: False)():
                 data = getattr(response, "data", None)
                 return getattr(data, "reaction_id", None)
-            logger.warning(
-                "[Feishu] Failed to add ack reaction to %s: code=%s msg=%s",
+            logger.debug(
+                "[Feishu] Add reaction %s on %s rejected: code=%s msg=%s",
+                emoji_type,
                 message_id,
                 getattr(response, "code", None),
                 getattr(response, "msg", None),
             )
         except Exception:
-            logger.warning("[Feishu] Failed to add ack reaction to %s", message_id, exc_info=True)
+            logger.warning(
+                "[Feishu] Add reaction %s on %s raised",
+                emoji_type,
+                message_id,
+                exc_info=True,
+            )
         return None
 
+    async def _remove_reaction(self, message_id: str, reaction_id: str) -> bool:
+        if not self._client or not message_id or not reaction_id:
+            return False
+        try:
+            from lark_oapi.api.im.v1 import DeleteMessageReactionRequest
+            request = (
+                DeleteMessageReactionRequest.builder()
+                .message_id(message_id)
+                .reaction_id(reaction_id)
+                .build()
+            )
+            response = await asyncio.to_thread(self._client.im.v1.message_reaction.delete, request)
+            if response and getattr(response, "success", lambda: False)():
+                return True
+            logger.debug(
+                "[Feishu] Remove reaction %s on %s rejected: code=%s msg=%s",
+                reaction_id,
+                message_id,
+                getattr(response, "code", None),
+                getattr(response, "msg", None),
+            )
+        except Exception:
+            logger.warning(
+                "[Feishu] Remove reaction %s on %s raised",
+                reaction_id,
+                message_id,
+                exc_info=True,
+            )
+        return False
+
+    def _remember_processing_reaction(self, message_id: str, reaction_id: str) -> None:
+        cache = self._pending_processing_reactions
+        cache[message_id] = reaction_id
+        cache.move_to_end(message_id)
+        while len(cache) > _FEISHU_PROCESSING_REACTION_CACHE_SIZE:
+            cache.popitem(last=False)
+
+    def _pop_processing_reaction(self, message_id: str) -> Optional[str]:
+        return self._pending_processing_reactions.pop(message_id, None)
+
+    async def on_processing_start(self, event: MessageEvent) -> None:
+        if not self._reactions_enabled():
+            return
+        message_id = event.message_id
+        if not message_id or message_id in self._pending_processing_reactions:
+            return
+        reaction_id = await self._add_reaction(message_id, _FEISHU_REACTION_IN_PROGRESS)
+        if reaction_id:
+            self._remember_processing_reaction(message_id, reaction_id)
+
+    async def on_processing_complete(
+        self, event: MessageEvent, outcome: ProcessingOutcome
+    ) -> None:
+        if not self._reactions_enabled():
+            return
+        message_id = event.message_id
+        if not message_id:
+            return
+
+        start_reaction_id = self._pending_processing_reactions.get(message_id)
+        if start_reaction_id:
+            if not await self._remove_reaction(message_id, start_reaction_id):
+                # Don't stack a second badge on top of a Typing we couldn't
+                # remove — UI would read as both "working" and "done/failed"
+                # simultaneously. Keep the handle so LRU eventually evicts it.
+                return
+            self._pop_processing_reaction(message_id)
+
+        if outcome is ProcessingOutcome.FAILURE:
+            await self._add_reaction(message_id, _FEISHU_REACTION_FAILURE)
+
     # =========================================================================
     # Webhook server and security
     # =========================================================================
@@ -3249,6 +3391,23 @@ class FeishuAdapter(BasePlatformAdapter):
             return self._post_mentions_bot(normalized.mentioned_ids)
         return False
 
+    def _is_self_sent_bot_message(self, event: Any) -> bool:
+        """Return True only for Feishu events emitted by this Hermes bot."""
+        sender = getattr(event, "sender", None)
+        sender_type = str(getattr(sender, "sender_type", "") or "").strip().lower()
+        if sender_type not in {"bot", "app"}:
+            return False
+
+        sender_id = getattr(sender, "sender_id", None)
+        sender_open_id = str(getattr(sender_id, "open_id", "") or "").strip()
+        sender_user_id = str(getattr(sender_id, "user_id", "") or "").strip()
+
+        if self._bot_open_id and sender_open_id == self._bot_open_id:
+            return True
+        if self._bot_user_id and sender_user_id == self._bot_user_id:
+            return True
+        return False
+
     def _message_mentions_bot(self, mentions: List[Any]) -> bool:
         """Check whether any mention targets the configured or inferred bot identity."""
         for mention in mentions:
@@ -3276,10 +3435,55 @@ class FeishuAdapter(BasePlatformAdapter):
         return False
 
     async def _hydrate_bot_identity(self) -> None:
-        """Best-effort discovery of bot identity for precise group mention gating."""
+        """Best-effort discovery of bot identity for precise group mention gating
+        and self-sent bot event filtering.
+
+        Populates ``_bot_open_id`` and ``_bot_name`` from /open-apis/bot/v3/info
+        (no extra scopes required beyond the tenant access token). Falls back to
+        the application info endpoint for ``_bot_name`` only when the first probe
+        doesn't return it. Each field is hydrated independently — a value already
+        supplied via env vars (FEISHU_BOT_OPEN_ID / FEISHU_BOT_USER_ID /
+        FEISHU_BOT_NAME) is preserved and skips its probe.
+        """
         if not self._client:
             return
-        if any((self._bot_open_id, self._bot_user_id, self._bot_name)):
+        if self._bot_open_id and self._bot_name:
+            # Everything the self-send filter and precise mention gate need is
+            # already in place; nothing to probe.
+            return
+
+        # Primary probe: /open-apis/bot/v3/info — returns bot_name + open_id, no
+        # extra scopes required. This is the same endpoint the onboarding wizard
+        # uses via probe_bot().
+        if not self._bot_open_id or not self._bot_name:
+            try:
+                resp = await asyncio.to_thread(
+                    self._client.request,
+                    method="GET",
+                    url="/open-apis/bot/v3/info",
+                    body=None,
+                    raw_response=True,
+                )
+                content = getattr(resp, "content", None)
+                if content:
+                    payload = json.loads(content)
+                    parsed = _parse_bot_response(payload) or {}
+                    open_id = (parsed.get("bot_open_id") or "").strip()
+                    bot_name = (parsed.get("bot_name") or "").strip()
+                    if open_id and not self._bot_open_id:
+                        self._bot_open_id = open_id
+                    if bot_name and not self._bot_name:
+                        self._bot_name = bot_name
+            except Exception:
+                logger.debug(
+                    "[Feishu] /bot/v3/info probe failed during hydration",
+                    exc_info=True,
+                )
+
+        # Fallback probe for _bot_name only: application info endpoint. Needs
+        # admin:app.info:readonly or application:application:self_manage scope,
+        # so it's best-effort.
+        if self._bot_name:
             return
         try:
             request = self._build_get_application_request(app_id=self._app_id, lang="en_us")
@@ -3288,17 +3492,17 @@ class FeishuAdapter(BasePlatformAdapter):
                 code = getattr(response, "code", None)
                 if code == 99991672:
                     logger.warning(
-                        "[Feishu] Unable to hydrate bot identity from application info. "
+                        "[Feishu] Unable to hydrate bot name from application info. "
                         "Grant admin:app.info:readonly or application:application:self_manage "
                         "so group @mention gating can resolve the bot name precisely."
                     )
                 return
             app = getattr(getattr(response, "data", None), "app", None)
             app_name = (getattr(app, "app_name", None) or "").strip()
-            if app_name:
+            if app_name and not self._bot_name:
                 self._bot_name = app_name
         except Exception:
-            logger.debug("[Feishu] Failed to hydrate bot identity", exc_info=True)
+            logger.debug("[Feishu] Failed to hydrate bot name from application info", exc_info=True)
 
     # =========================================================================
     # Deduplication — seen message ID cache (persistent)
diff --git a/gateway/platforms/matrix.py b/gateway/platforms/matrix.py
index cdd67b337d..a5f9352b55 100644
--- a/gateway/platforms/matrix.py
+++ b/gateway/platforms/matrix.py
@@ -825,7 +825,7 @@ class MatrixAdapter(BasePlatformAdapter):
 
 
     async def edit_message(
-        self, chat_id: str, message_id: str, content: str
+        self, chat_id: str, message_id: str, content: str, *, finalize: bool = False
     ) -> SendResult:
         """Edit an existing message (via m.replace)."""
 
diff --git a/gateway/platforms/mattermost.py b/gateway/platforms/mattermost.py
index 18367a8e44..0e6c9631d7 100644
--- a/gateway/platforms/mattermost.py
+++ b/gateway/platforms/mattermost.py
@@ -304,7 +304,7 @@ class MattermostAdapter(BasePlatformAdapter):
         )
 
     async def edit_message(
-        self, chat_id: str, message_id: str, content: str
+        self, chat_id: str, message_id: str, content: str, *, finalize: bool = False
     ) -> SendResult:
         """Edit an existing post."""
         formatted = self.format_message(content)
@@ -410,7 +410,6 @@ class MattermostAdapter(BasePlatformAdapter):
             logger.warning("Mattermost: blocked unsafe URL (SSRF protection)")
             return await self.send(chat_id, f"{caption or ''}\n{url}".strip(), reply_to)
 
-        import asyncio
         import aiohttp
 
         last_exc = None
diff --git a/gateway/platforms/qqbot/__init__.py b/gateway/platforms/qqbot/__init__.py
index 7119dd979e..130269b5f2 100644
--- a/gateway/platforms/qqbot/__init__.py
+++ b/gateway/platforms/qqbot/__init__.py
@@ -26,9 +26,8 @@ from .adapter import (  # noqa: F401
 # -- Onboard (QR-code scan-to-configure) -----------------------------------
 from .onboard import (  # noqa: F401
     BindStatus,
-    create_bind_task,
-    poll_bind_result,
     build_connect_url,
+    qr_register,
 )
 from .crypto import decrypt_secret, generate_bind_key  # noqa: F401
 
@@ -44,9 +43,8 @@ __all__ = [
     "_ssrf_redirect_guard",
     # onboard
     "BindStatus",
-    "create_bind_task",
-    "poll_bind_result",
     "build_connect_url",
+    "qr_register",
     # crypto
     "decrypt_secret",
     "generate_bind_key",
diff --git a/gateway/platforms/qqbot/adapter.py b/gateway/platforms/qqbot/adapter.py
index ced7442711..df3987f2eb 100644
--- a/gateway/platforms/qqbot/adapter.py
+++ b/gateway/platforms/qqbot/adapter.py
@@ -1086,11 +1086,8 @@ class QQAdapter(BasePlatformAdapter):
             return MessageType.VIDEO
         if "image" in first_type or "photo" in first_type:
             return MessageType.PHOTO
-        # Unknown content type with an attachment — don't assume PHOTO
-        # to prevent non-image files from being sent to vision analysis.
         logger.debug(
-            "[%s] Unknown media content_type '%s', defaulting to TEXT",
-            self._log_tag,
+            "Unknown media content_type '%s', defaulting to TEXT",
             first_type,
         )
         return MessageType.TEXT
@@ -1826,14 +1823,12 @@ class QQAdapter(BasePlatformAdapter):
             body["file_name"] = file_name
 
         # Retry transient upload failures
-        last_exc = None
         for attempt in range(3):
             try:
                 return await self._api_request(
                     "POST", path, body, timeout=FILE_UPLOAD_TIMEOUT
                 )
             except RuntimeError as exc:
-                last_exc = exc
                 err_msg = str(exc)
                 if any(
                         kw in err_msg
@@ -1842,8 +1837,8 @@ class QQAdapter(BasePlatformAdapter):
                     raise
                 if attempt < 2:
                     await asyncio.sleep(1.5 * (attempt + 1))
-
-        raise last_exc  # type: ignore[misc]
+                else:
+                    raise
 
     # Maximum time (seconds) to wait for reconnection before giving up on send.
     _RECONNECT_WAIT_SECONDS = 15.0
diff --git a/gateway/platforms/qqbot/onboard.py b/gateway/platforms/qqbot/onboard.py
index 65750b3f10..b48c39a4f8 100644
--- a/gateway/platforms/qqbot/onboard.py
+++ b/gateway/platforms/qqbot/onboard.py
@@ -1,6 +1,10 @@
 """
 QQBot scan-to-configure (QR code onboard) module.
 
+Mirrors the Feishu onboarding pattern: synchronous HTTP + a single public
+entry-point ``qr_register()`` that handles the full flow (create task →
+display QR code → poll → decrypt credentials).
+
 Calls the ``q.qq.com`` ``create_bind_task`` / ``poll_bind_result`` APIs to
 generate a QR-code URL and poll for scan completion.  On success the caller
 receives the bot's *app_id*, *client_secret* (decrypted locally), and the
@@ -12,18 +16,20 @@ Reference: https://bot.q.qq.com/wiki/develop/api-v2/
 from __future__ import annotations
 
 import logging
+import time
 from enum import IntEnum
-from typing import Tuple
+from typing import Optional, Tuple
 from urllib.parse import quote
 
 from .constants import (
     ONBOARD_API_TIMEOUT,
     ONBOARD_CREATE_PATH,
+    ONBOARD_POLL_INTERVAL,
     ONBOARD_POLL_PATH,
     PORTAL_HOST,
     QR_URL_TEMPLATE,
 )
-from .crypto import generate_bind_key
+from .crypto import decrypt_secret, generate_bind_key
 from .utils import get_api_headers
 
 logger = logging.getLogger(__name__)
@@ -35,7 +41,7 @@ logger = logging.getLogger(__name__)
 
 
 class BindStatus(IntEnum):
-    """Status codes returned by ``poll_bind_result``."""
+    """Status codes returned by ``_poll_bind_result``."""
 
     NONE = 0
     PENDING = 1
@@ -44,18 +50,40 @@ class BindStatus(IntEnum):
 
 
 # ---------------------------------------------------------------------------
-# Public API
+# QR rendering
+# ---------------------------------------------------------------------------
+
+try:
+    import qrcode as _qrcode_mod
+except (ImportError, TypeError):
+    _qrcode_mod = None  # type: ignore[assignment]
+
+
+def _render_qr(url: str) -> bool:
+    """Try to render a QR code in the terminal. Returns True if successful."""
+    if _qrcode_mod is None:
+        return False
+    try:
+        qr = _qrcode_mod.QRCode(
+            error_correction=_qrcode_mod.constants.ERROR_CORRECT_M,
+            border=2,
+        )
+        qr.add_data(url)
+        qr.make(fit=True)
+        qr.print_ascii(invert=True)
+        return True
+    except Exception:
+        return False
+
+
+# ---------------------------------------------------------------------------
+# Synchronous HTTP helpers (mirrors Feishu _post_registration pattern)
 # ---------------------------------------------------------------------------
 
 
-async def create_bind_task(
-    timeout: float = ONBOARD_API_TIMEOUT,
-) -> Tuple[str, str]:
+def _create_bind_task(timeout: float = ONBOARD_API_TIMEOUT) -> Tuple[str, str]:
     """Create a bind task and return *(task_id, aes_key_base64)*.
 
-    The AES key is generated locally and sent to the server so it can
-    encrypt the bot credentials before returning them.
-
     Raises:
         RuntimeError: If the API returns a non-zero ``retcode``.
     """
@@ -64,8 +92,8 @@ async def create_bind_task(
     url = f"https://{PORTAL_HOST}{ONBOARD_CREATE_PATH}"
     key = generate_bind_key()
 
-    async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
-        resp = await client.post(url, json={"key": key}, headers=get_api_headers())
+    with httpx.Client(timeout=timeout, follow_redirects=True) as client:
+        resp = client.post(url, json={"key": key}, headers=get_api_headers())
         resp.raise_for_status()
         data = resp.json()
 
@@ -80,7 +108,7 @@ async def create_bind_task(
     return task_id, key
 
 
-async def poll_bind_result(
+def _poll_bind_result(
     task_id: str,
     timeout: float = ONBOARD_API_TIMEOUT,
 ) -> Tuple[BindStatus, str, str, str]:
@@ -89,12 +117,6 @@ async def poll_bind_result(
     Returns:
         A 4-tuple of ``(status, bot_appid, bot_encrypt_secret, user_openid)``.
 
-        * ``bot_encrypt_secret`` is AES-256-GCM encrypted — decrypt it with
-          :func:`~gateway.platforms.qqbot.crypto.decrypt_secret` using the
-          key from :func:`create_bind_task`.
-        * ``user_openid`` is the OpenID of the person who scanned the code
-          (available when ``status == COMPLETED``).
-
     Raises:
         RuntimeError: If the API returns a non-zero ``retcode``.
     """
@@ -102,8 +124,8 @@ async def poll_bind_result(
 
     url = f"https://{PORTAL_HOST}{ONBOARD_POLL_PATH}"
 
-    async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
-        resp = await client.post(url, json={"task_id": task_id}, headers=get_api_headers())
+    with httpx.Client(timeout=timeout, follow_redirects=True) as client:
+        resp = client.post(url, json={"task_id": task_id}, headers=get_api_headers())
         resp.raise_for_status()
         data = resp.json()
 
@@ -122,3 +144,77 @@ async def poll_bind_result(
 def build_connect_url(task_id: str) -> str:
     """Build the QR-code target URL for a given *task_id*."""
     return QR_URL_TEMPLATE.format(task_id=quote(task_id))
+
+
+# ---------------------------------------------------------------------------
+# Public entry-point
+# ---------------------------------------------------------------------------
+
+_MAX_REFRESHES = 3
+
+
+def qr_register(timeout_seconds: int = 600) -> Optional[dict]:
+    """Run the QQBot scan-to-configure QR registration flow.
+
+    Mirrors ``feishu.qr_register()``: handles create → display → poll →
+    decrypt in one call.  Unexpected errors propagate to the caller.
+
+    :returns:
+        ``{"app_id": ..., "client_secret": ..., "user_openid": ...}`` on
+        success, or ``None`` on failure / expiry / cancellation.
+    """
+    deadline = time.monotonic() + timeout_seconds
+
+    for refresh_count in range(_MAX_REFRESHES + 1):
+        # ── Create bind task ──
+        try:
+            task_id, aes_key = _create_bind_task()
+        except Exception as exc:
+            logger.warning("[QQBot onboard] Failed to create bind task: %s", exc)
+            return None
+
+        url = build_connect_url(task_id)
+
+        # ── Display QR code + URL ──
+        print()
+        if _render_qr(url):
+            print(f"  Scan the QR code above, or open this URL directly:\n  {url}")
+        else:
+            print(f"  Open this URL in QQ on your phone:\n  {url}")
+            print("  Tip: pip install qrcode  to display a scannable QR code here")
+        print()
+
+        # ── Poll loop ──
+        while time.monotonic() < deadline:
+            try:
+                status, app_id, encrypted_secret, user_openid = _poll_bind_result(task_id)
+            except Exception:
+                time.sleep(ONBOARD_POLL_INTERVAL)
+                continue
+
+            if status == BindStatus.COMPLETED:
+                client_secret = decrypt_secret(encrypted_secret, aes_key)
+                print()
+                print(f"  QR scan complete! (App ID: {app_id})")
+                if user_openid:
+                    print(f"  Scanner's OpenID: {user_openid}")
+                return {
+                    "app_id": app_id,
+                    "client_secret": client_secret,
+                    "user_openid": user_openid,
+                }
+
+            if status == BindStatus.EXPIRED:
+                if refresh_count >= _MAX_REFRESHES:
+                    logger.warning("[QQBot onboard] QR code expired %d times — giving up", _MAX_REFRESHES)
+                    return None
+                print(f"\n  QR code expired, refreshing... ({refresh_count + 1}/{_MAX_REFRESHES})")
+                break  # next for-loop iteration creates a new task
+
+            time.sleep(ONBOARD_POLL_INTERVAL)
+        else:
+            # deadline reached without completing
+            logger.warning("[QQBot onboard] Poll timed out after %ds", timeout_seconds)
+            return None
+
+    return None
diff --git a/gateway/platforms/signal.py b/gateway/platforms/signal.py
index 4df4193bc0..9a0a6256a4 100644
--- a/gateway/platforms/signal.py
+++ b/gateway/platforms/signal.py
@@ -18,6 +18,7 @@ import logging
 import os
 import random
 import time
+import uuid
 from datetime import datetime, timezone
 from pathlib import Path
 from typing import Dict, List, Optional, Any
@@ -127,6 +128,27 @@ def _render_mentions(text: str, mentions: list) -> str:
     return text
 
 
+def _is_signal_service_id(value: str) -> bool:
+    """Return True if *value* already looks like a Signal service identifier."""
+    if not value:
+        return False
+    if value.startswith("PNI:") or value.startswith("u:"):
+        return True
+    try:
+        uuid.UUID(value)
+        return True
+    except (ValueError, AttributeError, TypeError):
+        return False
+
+
+def _looks_like_e164_number(value: str) -> bool:
+    """Return True for a plausible E.164 phone number."""
+    if not value or not value.startswith("+"):
+        return False
+    digits = value[1:]
+    return digits.isdigit() and 7 <= len(digits) <= 15
+
+
 def check_signal_requirements() -> bool:
     """Check if Signal is configured (has URL and account)."""
     return bool(os.getenv("SIGNAL_HTTP_URL") and os.getenv("SIGNAL_ACCOUNT"))
@@ -179,6 +201,12 @@ class SignalAdapter(BasePlatformAdapter):
         # in Note to Self / self-chat mode (mirrors WhatsApp recentlySentIds)
         self._recent_sent_timestamps: set = set()
         self._max_recent_timestamps = 50
+        # Signal increasingly exposes ACI/PNI UUIDs as stable recipient IDs.
+        # Keep a best-effort mapping so outbound sends can upgrade from a
+        # phone number to the corresponding UUID when signal-cli prefers it.
+        self._recipient_uuid_by_number: Dict[str, str] = {}
+        self._recipient_number_by_uuid: Dict[str, str] = {}
+        self._recipient_cache_lock = asyncio.Lock()
 
         logger.info("Signal adapter initialized: url=%s account=%s groups=%s",
                      self.http_url, redact_phone(self.account),
@@ -195,31 +223,40 @@ class SignalAdapter(BasePlatformAdapter):
             return False
 
         # Acquire scoped lock to prevent duplicate Signal listeners for the same phone
+        lock_acquired = False
         try:
             if not self._acquire_platform_lock('signal-phone', self.account, 'Signal account'):
                 return False
+            lock_acquired = True
         except Exception as e:
             logger.warning("Signal: Could not acquire phone lock (non-fatal): %s", e)
 
         self.client = httpx.AsyncClient(timeout=30.0)
-
-        # Health check — verify signal-cli daemon is reachable
         try:
-            resp = await self.client.get(f"{self.http_url}/api/v1/check", timeout=10.0)
-            if resp.status_code != 200:
-                logger.error("Signal: health check failed (status %d)", resp.status_code)
+            # Health check — verify signal-cli daemon is reachable
+            try:
+                resp = await self.client.get(f"{self.http_url}/api/v1/check", timeout=10.0)
+                if resp.status_code != 200:
+                    logger.error("Signal: health check failed (status %d)", resp.status_code)
+                    return False
+            except Exception as e:
+                logger.error("Signal: cannot reach signal-cli at %s: %s", self.http_url, e)
                 return False
-        except Exception as e:
-            logger.error("Signal: cannot reach signal-cli at %s: %s", self.http_url, e)
-            return False
 
-        self._running = True
-        self._last_sse_activity = time.time()
-        self._sse_task = asyncio.create_task(self._sse_listener())
-        self._health_monitor_task = asyncio.create_task(self._health_monitor())
+            self._running = True
+            self._last_sse_activity = time.time()
+            self._sse_task = asyncio.create_task(self._sse_listener())
+            self._health_monitor_task = asyncio.create_task(self._health_monitor())
 
-        logger.info("Signal: connected to %s", self.http_url)
-        return True
+            logger.info("Signal: connected to %s", self.http_url)
+            return True
+        finally:
+            if not self._running:
+                if self.client:
+                    await self.client.aclose()
+                    self.client = None
+                if lock_acquired:
+                    self._release_platform_lock()
 
     async def disconnect(self) -> None:
         """Stop SSE listener and clean up."""
@@ -400,6 +437,7 @@ class SignalAdapter(BasePlatformAdapter):
         )
         sender_name = envelope_data.get("sourceName", "")
         sender_uuid = envelope_data.get("sourceUuid", "")
+        self._remember_recipient_identifiers(sender, sender_uuid)
 
         if not sender:
             logger.debug("Signal: ignoring envelope with no sender")
@@ -518,6 +556,64 @@ class SignalAdapter(BasePlatformAdapter):
 
         await self.handle_message(event)
 
+    def _remember_recipient_identifiers(self, number: Optional[str], service_id: Optional[str]) -> None:
+        """Cache any number↔UUID mapping observed from Signal envelopes."""
+        if not number or not service_id or not _is_signal_service_id(service_id):
+            return
+        self._recipient_uuid_by_number[number] = service_id
+        self._recipient_number_by_uuid[service_id] = number
+
+    def _extract_contact_uuid(self, contact: Any, phone_number: str) -> Optional[str]:
+        """Best-effort extraction of a Signal service ID from listContacts output."""
+        if not isinstance(contact, dict):
+            return None
+
+        number = contact.get("number")
+        recipient = contact.get("recipient")
+        service_id = contact.get("uuid") or contact.get("serviceId")
+        if not service_id:
+            profile = contact.get("profile")
+            if isinstance(profile, dict):
+                service_id = profile.get("serviceId") or profile.get("uuid")
+
+        if service_id and _is_signal_service_id(service_id):
+            matches_number = number == phone_number or recipient == phone_number
+            if matches_number:
+                return service_id
+        return None
+
+    async def _resolve_recipient(self, chat_id: str) -> str:
+        """Return the preferred Signal recipient identifier for a direct chat."""
+        if (
+            not chat_id
+            or chat_id.startswith("group:")
+            or _is_signal_service_id(chat_id)
+            or not _looks_like_e164_number(chat_id)
+        ):
+            return chat_id
+
+        cached = self._recipient_uuid_by_number.get(chat_id)
+        if cached:
+            return cached
+
+        async with self._recipient_cache_lock:
+            cached = self._recipient_uuid_by_number.get(chat_id)
+            if cached:
+                return cached
+
+            contacts = await self._rpc("listContacts", {
+                "account": self.account,
+                "allRecipients": True,
+            })
+            if isinstance(contacts, list):
+                for contact in contacts:
+                    number = contact.get("number") if isinstance(contact, dict) else None
+                    service_id = self._extract_contact_uuid(contact, chat_id)
+                    if number and service_id:
+                        self._remember_recipient_identifiers(number, service_id)
+
+            return self._recipient_uuid_by_number.get(chat_id, chat_id)
+
     # ------------------------------------------------------------------
     # Attachment Handling
     # ------------------------------------------------------------------
@@ -633,7 +729,7 @@ class SignalAdapter(BasePlatformAdapter):
         if chat_id.startswith("group:"):
             params["groupId"] = chat_id[6:]
         else:
-            params["recipient"] = [chat_id]
+            params["recipient"] = [await self._resolve_recipient(chat_id)]
 
         result = await self._rpc("send", params)
 
@@ -684,7 +780,7 @@ class SignalAdapter(BasePlatformAdapter):
         if chat_id.startswith("group:"):
             params["groupId"] = chat_id[6:]
         else:
-            params["recipient"] = [chat_id]
+            params["recipient"] = [await self._resolve_recipient(chat_id)]
 
         fails = self._typing_failures.get(chat_id, 0)
         result = await self._rpc(
@@ -745,7 +841,7 @@ class SignalAdapter(BasePlatformAdapter):
         if chat_id.startswith("group:"):
             params["groupId"] = chat_id[6:]
         else:
-            params["recipient"] = [chat_id]
+            params["recipient"] = [await self._resolve_recipient(chat_id)]
 
         result = await self._rpc("send", params)
         if result is not None:
@@ -784,7 +880,7 @@ class SignalAdapter(BasePlatformAdapter):
         if chat_id.startswith("group:"):
             params["groupId"] = chat_id[6:]
         else:
-            params["recipient"] = [chat_id]
+            params["recipient"] = [await self._resolve_recipient(chat_id)]
 
         result = await self._rpc("send", params)
         if result is not None:
diff --git a/gateway/platforms/slack.py b/gateway/platforms/slack.py
index ba444c53e8..191689a5ae 100644
--- a/gateway/platforms/slack.py
+++ b/gateway/platforms/slack.py
@@ -38,6 +38,7 @@ from gateway.platforms.base import (
     BasePlatformAdapter,
     MessageEvent,
     MessageType,
+    ProcessingOutcome,
     SendResult,
     SUPPORTED_DOCUMENT_TYPES,
     safe_url_for_log,
@@ -113,6 +114,11 @@ class SlackAdapter(BasePlatformAdapter):
         # Cache for _fetch_thread_context results: cache_key → _ThreadContextCache
         self._thread_context_cache: Dict[str, _ThreadContextCache] = {}
         self._THREAD_CACHE_TTL = 60.0
+        # Track message IDs that should get reaction lifecycle (DMs / @mentions).
+        self._reacting_message_ids: set = set()
+        # Track active assistant thread status indicators so stop_typing can
+        # clear them (chat_id → thread_ts).
+        self._active_status_threads: Dict[str, str] = {}
 
     async def connect(self) -> bool:
         """Connect to Slack via Socket Mode."""
@@ -150,9 +156,11 @@ class SlackAdapter(BasePlatformAdapter):
             except Exception as e:
                 logger.warning("[Slack] Failed to read %s: %s", tokens_file, e)
 
+        lock_acquired = False
         try:
             if not self._acquire_platform_lock('slack-app-token', app_token, 'Slack app token'):
                 return False
+            lock_acquired = True
 
             # First token is the primary — used for AsyncApp / Socket Mode
             primary_token = bot_tokens[0]
@@ -228,6 +236,9 @@ class SlackAdapter(BasePlatformAdapter):
         except Exception as e:  # pragma: no cover - defensive logging
             logger.error("[Slack] Connection failed: %s", e, exc_info=True)
             return False
+        finally:
+            if lock_acquired and not self._running:
+                self._release_platform_lock()
 
     async def disconnect(self) -> None:
         """Disconnect from Slack."""
@@ -316,6 +327,8 @@ class SlackAdapter(BasePlatformAdapter):
         chat_id: str,
         message_id: str,
         content: str,
+        *,
+        finalize: bool = False,
     ) -> SendResult:
         """Edit a previously sent Slack message."""
         if not self._app:
@@ -355,6 +368,7 @@ class SlackAdapter(BasePlatformAdapter):
         if not thread_ts:
             return  # Can only set status in a thread context
 
+        self._active_status_threads[chat_id] = thread_ts
         try:
             await self._get_client(chat_id).assistant_threads_setStatus(
                 channel_id=chat_id,
@@ -366,6 +380,22 @@ class SlackAdapter(BasePlatformAdapter):
             # in an assistant-enabled context. Falls back to reactions.
             logger.debug("[Slack] assistant.threads.setStatus failed: %s", e)
 
+    async def stop_typing(self, chat_id: str) -> None:
+        """Clear the assistant thread status indicator."""
+        if not self._app:
+            return
+        thread_ts = self._active_status_threads.pop(chat_id, None)
+        if not thread_ts:
+            return
+        try:
+            await self._get_client(chat_id).assistant_threads_setStatus(
+                channel_id=chat_id,
+                thread_ts=thread_ts,
+                status="",
+            )
+        except Exception as e:
+            logger.debug("[Slack] assistant.threads.setStatus clear failed: %s", e)
+
     def _dm_top_level_threads_as_sessions(self) -> bool:
         """Whether top-level Slack DMs get per-message session threads.
 
@@ -577,6 +607,38 @@ class SlackAdapter(BasePlatformAdapter):
             logger.debug("[Slack] reactions.remove failed (%s): %s", emoji, e)
             return False
 
+    def _reactions_enabled(self) -> bool:
+        """Check if message reactions are enabled via config/env."""
+        return os.getenv("SLACK_REACTIONS", "true").lower() not in ("false", "0", "no")
+
+    async def on_processing_start(self, event: MessageEvent) -> None:
+        """Add an in-progress reaction when message processing begins."""
+        if not self._reactions_enabled():
+            return
+        ts = getattr(event, "message_id", None)
+        if not ts or ts not in self._reacting_message_ids:
+            return
+        channel_id = getattr(event.source, "chat_id", None)
+        if channel_id:
+            await self._add_reaction(channel_id, ts, "eyes")
+
+    async def on_processing_complete(self, event: MessageEvent, outcome: ProcessingOutcome) -> None:
+        """Swap the in-progress reaction for a final success/failure reaction."""
+        if not self._reactions_enabled():
+            return
+        ts = getattr(event, "message_id", None)
+        if not ts or ts not in self._reacting_message_ids:
+            return
+        self._reacting_message_ids.discard(ts)
+        channel_id = getattr(event.source, "chat_id", None)
+        if not channel_id:
+            return
+        await self._remove_reaction(channel_id, ts, "eyes")
+        if outcome == ProcessingOutcome.SUCCESS:
+            await self._add_reaction(channel_id, ts, "white_check_mark")
+        elif outcome == ProcessingOutcome.FAILURE:
+            await self._add_reaction(channel_id, ts, "x")
+
     # ----- User identity resolution -----
 
     async def _resolve_user_name(self, user_id: str, chat_id: str = "") -> str:
@@ -1206,17 +1268,12 @@ class SlackAdapter(BasePlatformAdapter):
         # Only react when bot is directly addressed (DM or @mention).
         # In listen-all channels (require_mention=false), reacting to every
         # casual message would be noisy.
-        _should_react = is_dm or is_mentioned
-
+        _should_react = (is_dm or is_mentioned) and self._reactions_enabled()
         if _should_react:
-            await self._add_reaction(channel_id, ts, "eyes")
+            self._reacting_message_ids.add(ts)
 
         await self.handle_message(msg_event)
 
-        if _should_react:
-            await self._remove_reaction(channel_id, ts, "eyes")
-            await self._add_reaction(channel_id, ts, "white_check_mark")
-
     # ----- Approval button support (Block Kit) -----
 
     async def send_exec_approval(
@@ -1593,11 +1650,9 @@ class SlackAdapter(BasePlatformAdapter):
 
     async def _download_slack_file(self, url: str, ext: str, audio: bool = False, team_id: str = "") -> str:
         """Download a Slack file using the bot token for auth, with retry."""
-        import asyncio
         import httpx
 
         bot_token = self._team_clients[team_id].token if team_id and team_id in self._team_clients else self.config.token
-        last_exc = None
 
         async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
             for attempt in range(3):
@@ -1627,7 +1682,6 @@ class SlackAdapter(BasePlatformAdapter):
                         from gateway.platforms.base import cache_image_from_bytes
                         return cache_image_from_bytes(response.content, ext)
                 except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
-                    last_exc = exc
                     if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
                         raise
                     if attempt < 2:
@@ -1636,15 +1690,12 @@ class SlackAdapter(BasePlatformAdapter):
                         await asyncio.sleep(1.5 * (attempt + 1))
                         continue
                     raise
-        raise last_exc
 
     async def _download_slack_file_bytes(self, url: str, team_id: str = "") -> bytes:
         """Download a Slack file and return raw bytes, with retry."""
-        import asyncio
         import httpx
 
         bot_token = self._team_clients[team_id].token if team_id and team_id in self._team_clients else self.config.token
-        last_exc = None
 
         async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
             for attempt in range(3):
@@ -1656,7 +1707,6 @@ class SlackAdapter(BasePlatformAdapter):
                     response.raise_for_status()
                     return response.content
                 except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
-                    last_exc = exc
                     if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
                         raise
                     if attempt < 2:
@@ -1665,7 +1715,6 @@ class SlackAdapter(BasePlatformAdapter):
                         await asyncio.sleep(1.5 * (attempt + 1))
                         continue
                     raise
-        raise last_exc
 
     # ── Channel mention gating ─────────────────────────────────────────────
 
diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py
index f71614054c..bec0d690a3 100644
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -11,6 +11,7 @@ import asyncio
 import json
 import logging
 import os
+import tempfile
 import html as _html
 import re
 from typing import Dict, List, Optional, Any
@@ -70,8 +71,10 @@ from gateway.platforms.base import (
     SendResult,
     cache_image_from_bytes,
     cache_audio_from_bytes,
+    cache_video_from_bytes,
     cache_document_from_bytes,
     resolve_proxy_url,
+    SUPPORTED_VIDEO_TYPES,
     SUPPORTED_DOCUMENT_TYPES,
     utf16_len,
     _prefix_within_utf16_limit,
@@ -493,6 +496,13 @@ class TelegramAdapter(BasePlatformAdapter):
                     "[%s] DM topic '%s' already exists in chat %s (will be mapped from incoming messages)",
                     self.name, name, chat_id,
                 )
+            elif "not a forum" in error_text or "forums_disabled" in error_text:
+                logger.warning(
+                    "[%s] Cannot create DM topic '%s' in chat %s: Topics mode is not enabled. "
+                    "The user must open the DM with this bot in Telegram, tap the bot name "
+                    "at the top, and enable 'Topics' in chat settings before topics can be created.",
+                    self.name, name, chat_id,
+                )
             else:
                 logger.warning(
                     "[%s] Failed to create DM topic '%s' in chat %s: %s",
@@ -534,8 +544,23 @@ class TelegramAdapter(BasePlatformAdapter):
                         break
 
             if changed:
-                with open(config_path, "w") as f:
-                    _yaml.dump(config, f, default_flow_style=False, sort_keys=False)
+                fd, tmp_path = tempfile.mkstemp(
+                    dir=str(config_path.parent),
+                    suffix=".tmp",
+                    prefix=".config_",
+                )
+                try:
+                    with os.fdopen(fd, "w", encoding="utf-8") as f:
+                        _yaml.dump(config, f, default_flow_style=False, sort_keys=False)
+                        f.flush()
+                        os.fsync(f.fileno())
+                    os.replace(tmp_path, config_path)
+                except BaseException:
+                    try:
+                        os.unlink(tmp_path)
+                    except OSError:
+                        pass
+                    raise
                 logger.info(
                     "[%s] Persisted thread_id=%s for topic '%s' in config.yaml",
                     self.name, thread_id, topic_name,
@@ -769,8 +794,28 @@ class TelegramAdapter(BasePlatformAdapter):
                 # Telegram pushes updates to our HTTP endpoint.  This
                 # enables cloud platforms (Fly.io, Railway) to auto-wake
                 # suspended machines on inbound HTTP traffic.
+                #
+                # SECURITY: TELEGRAM_WEBHOOK_SECRET is REQUIRED. Without it,
+                # python-telegram-bot passes secret_token=None and the
+                # webhook endpoint accepts any HTTP POST — attackers can
+                # inject forged updates as if from Telegram. Refuse to
+                # start rather than silently run in fail-open mode.
+                # See GHSA-3vpc-7q5r-276h.
                 webhook_port = int(os.getenv("TELEGRAM_WEBHOOK_PORT", "8443"))
-                webhook_secret = os.getenv("TELEGRAM_WEBHOOK_SECRET", "").strip() or None
+                webhook_secret = os.getenv("TELEGRAM_WEBHOOK_SECRET", "").strip()
+                if not webhook_secret:
+                    raise RuntimeError(
+                        "TELEGRAM_WEBHOOK_SECRET is required when "
+                        "TELEGRAM_WEBHOOK_URL is set. Without it, the "
+                        "webhook endpoint accepts forged updates from "
+                        "anyone who can reach it — see "
+                        "https://github.com/NousResearch/hermes-agent/"
+                        "security/advisories/GHSA-3vpc-7q5r-276h.\n\n"
+                        "Generate a secret and set it in your .env:\n"
+                        "  export TELEGRAM_WEBHOOK_SECRET=\"$(openssl rand -hex 32)\"\n\n"
+                        "Then register it with Telegram when setting the "
+                        "webhook via setWebhook's secret_token parameter."
+                    )
                 from urllib.parse import urlparse
                 webhook_path = urlparse(webhook_url).path or "/telegram"
 
@@ -1081,6 +1126,8 @@ class TelegramAdapter(BasePlatformAdapter):
         chat_id: str,
         message_id: str,
         content: str,
+        *,
+        finalize: bool = False,
     ) -> SendResult:
         """Edit a previously sent Telegram message."""
         if not self._bot:
@@ -1657,6 +1704,21 @@ class TelegramAdapter(BasePlatformAdapter):
         except Exception as exc:
             logger.error("Failed to write update response from callback: %s", exc)
 
+    def _missing_media_path_error(self, label: str, path: str) -> str:
+        """Build an actionable file-not-found error for gateway MEDIA delivery.
+
+        Paths like /workspace/... or /output/... often only exist inside the
+        Docker sandbox, while the gateway process runs on the host.
+        """
+        error = f"{label} file not found: {path}"
+        if path.startswith(("/workspace/", "/output/", "/outputs/")):
+            error += (
+                " (path may only exist inside the Docker sandbox. "
+                "Bind-mount a host directory and emit the host-visible "
+                "path in MEDIA: for gateway file delivery.)"
+            )
+        return error
+
     async def send_voice(
         self,
         chat_id: str,
@@ -1671,9 +1733,8 @@ class TelegramAdapter(BasePlatformAdapter):
             return SendResult(success=False, error="Not connected")
         
         try:
-            import os
             if not os.path.exists(audio_path):
-                return SendResult(success=False, error=f"Audio file not found: {audio_path}")
+                return SendResult(success=False, error=self._missing_media_path_error("Audio", audio_path))
             
             with open(audio_path, "rb") as audio_file:
                 # .ogg files -> send as voice (round playable bubble)
@@ -1720,9 +1781,8 @@ class TelegramAdapter(BasePlatformAdapter):
             return SendResult(success=False, error="Not connected")
 
         try:
-            import os
             if not os.path.exists(image_path):
-                return SendResult(success=False, error=f"Image file not found: {image_path}")
+                return SendResult(success=False, error=self._missing_media_path_error("Image", image_path))
 
             _thread = self._metadata_thread_id(metadata)
             with open(image_path, "rb") as image_file:
@@ -1759,7 +1819,7 @@ class TelegramAdapter(BasePlatformAdapter):
 
         try:
             if not os.path.exists(file_path):
-                return SendResult(success=False, error=f"File not found: {file_path}")
+                return SendResult(success=False, error=self._missing_media_path_error("File", file_path))
 
             display_name = file_name or os.path.basename(file_path)
             _thread = self._metadata_thread_id(metadata)
@@ -1793,7 +1853,7 @@ class TelegramAdapter(BasePlatformAdapter):
 
         try:
             if not os.path.exists(video_path):
-                return SendResult(success=False, error=f"Video file not found: {video_path}")
+                return SendResult(success=False, error=self._missing_media_path_error("Video", video_path))
 
             _thread = self._metadata_thread_id(metadata)
             with open(video_path, "rb") as f:
@@ -2033,7 +2093,7 @@ class TelegramAdapter(BasePlatformAdapter):
             url = m.group(2).replace('\\', '\\\\').replace(')', '\\)')
             return _ph(f'[{display}]({url})')
 
-        text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', _convert_link, text)
+        text = re.sub(r'\[([^\]]+)\]\(([^()]*(?:\([^()]*\)[^()]*)*)\)', _convert_link, text)
 
         # 4) Convert markdown headers (## Title) → bold *Title*
         def _convert_header(m):
@@ -2241,22 +2301,27 @@ class TelegramAdapter(BasePlatformAdapter):
 
         bot_username = (getattr(self._bot, "username", None) or "").lstrip("@").lower()
         bot_id = getattr(self._bot, "id", None)
+        expected = f"@{bot_username}" if bot_username else None
 
         def _iter_sources():
             yield getattr(message, "text", None) or "", getattr(message, "entities", None) or []
             yield getattr(message, "caption", None) or "", getattr(message, "caption_entities", None) or []
 
+        # Telegram parses mentions server-side and emits MessageEntity objects
+        # (type=mention for @username, type=text_mention for @FirstName targeting
+        # a user without a public username). Only those entities are authoritative —
+        # raw substring matches like "foo@hermes_bot.example" are not mentions
+        # (bug #12545). Entities also correctly handle @handles inside URLs, code
+        # blocks, and quoted text, where a regex scan would over-match.
         for source_text, entities in _iter_sources():
-            if bot_username and f"@{bot_username}" in source_text.lower():
-                return True
             for entity in entities:
                 entity_type = str(getattr(entity, "type", "")).split(".")[-1].lower()
-                if entity_type == "mention" and bot_username:
+                if entity_type == "mention" and expected:
                     offset = int(getattr(entity, "offset", -1))
                     length = int(getattr(entity, "length", 0))
                     if offset < 0 or length <= 0:
                         continue
-                    if source_text[offset:offset + length].strip().lower() == f"@{bot_username}":
+                    if source_text[offset:offset + length].strip().lower() == expected:
                         return True
                 elif entity_type == "text_mention":
                     user = getattr(entity, "user", None)
@@ -2288,10 +2353,16 @@ class TelegramAdapter(BasePlatformAdapter):
         DMs remain unrestricted. Group/supergroup messages are accepted when:
         - the chat is explicitly allowlisted in ``free_response_chats``
         - ``require_mention`` is disabled
-        - the message is a command
         - the message replies to the bot
         - the bot is @mentioned
         - the text/caption matches a configured regex wake-word pattern
+
+        When ``require_mention`` is enabled, slash commands are not given
+        special treatment — they must pass the same mention/reply checks
+        as any other group message.  Users can still trigger commands via
+        the Telegram bot menu (``/command@botname``) or by explicitly
+        mentioning the bot (``@botname /command``), both of which are
+        recognised as mentions by :meth:`_message_mentions_bot`.
         """
         if not self._is_group_chat(message):
             return True
@@ -2306,8 +2377,6 @@ class TelegramAdapter(BasePlatformAdapter):
             return True
         if not self._telegram_require_mention():
             return True
-        if is_command:
-            return True
         if self._is_reply_to_bot(message):
             return True
         if self._message_mentions_bot(message):
@@ -2590,6 +2659,23 @@ class TelegramAdapter(BasePlatformAdapter):
             except Exception as e:
                 logger.warning("[Telegram] Failed to cache audio: %s", e, exc_info=True)
 
+        elif msg.video:
+            try:
+                file_obj = await msg.video.get_file()
+                video_bytes = await file_obj.download_as_bytearray()
+                ext = ".mp4"
+                if getattr(file_obj, "file_path", None):
+                    for candidate in SUPPORTED_VIDEO_TYPES:
+                        if file_obj.file_path.lower().endswith(candidate):
+                            ext = candidate
+                            break
+                cached_path = cache_video_from_bytes(bytes(video_bytes), ext=ext)
+                event.media_urls = [cached_path]
+                event.media_types = [SUPPORTED_VIDEO_TYPES.get(ext, "video/mp4")]
+                logger.info("[Telegram] Cached user video at %s", cached_path)
+            except Exception as e:
+                logger.warning("[Telegram] Failed to cache video: %s", e, exc_info=True)
+
         # Download document files to cache for agent processing
         elif msg.document:
             doc = msg.document
@@ -2606,6 +2692,21 @@ class TelegramAdapter(BasePlatformAdapter):
                     mime_to_ext = {v: k for k, v in SUPPORTED_DOCUMENT_TYPES.items()}
                     ext = mime_to_ext.get(doc.mime_type, "")
 
+                if not ext and doc.mime_type:
+                    video_mime_to_ext = {v: k for k, v in SUPPORTED_VIDEO_TYPES.items()}
+                    ext = video_mime_to_ext.get(doc.mime_type, "")
+
+                if ext in SUPPORTED_VIDEO_TYPES:
+                    file_obj = await doc.get_file()
+                    video_bytes = await file_obj.download_as_bytearray()
+                    cached_path = cache_video_from_bytes(bytes(video_bytes), ext=ext)
+                    event.media_urls = [cached_path]
+                    event.media_types = [SUPPORTED_VIDEO_TYPES[ext]]
+                    event.message_type = MessageType.VIDEO
+                    logger.info("[Telegram] Cached user video document at %s", cached_path)
+                    await self.handle_message(event)
+                    return
+
                 # Check if supported
                 if ext not in SUPPORTED_DOCUMENT_TYPES:
                     supported_list = ", ".join(sorted(SUPPORTED_DOCUMENT_TYPES.keys()))
@@ -2744,13 +2845,11 @@ class TelegramAdapter(BasePlatformAdapter):
             logger.info("[Telegram] Analyzing sticker at %s", cached_path)
 
             from tools.vision_tools import vision_analyze_tool
-            import json as _json
-
             result_json = await vision_analyze_tool(
                 image_url=cached_path,
                 user_prompt=STICKER_VISION_PROMPT,
             )
-            result = _json.loads(result_json)
+            result = json.loads(result_json)
 
             if result.get("success"):
                 description = result.get("analysis", "a sticker")
diff --git a/gateway/platforms/webhook.py b/gateway/platforms/webhook.py
index c37445b17e..e3a736a451 100644
--- a/gateway/platforms/webhook.py
+++ b/gateway/platforms/webhook.py
@@ -13,6 +13,10 @@ Each route defines:
   - skills: optional list of skills to load for the agent
   - deliver: where to send the response (github_comment, telegram, etc.)
   - deliver_extra: additional delivery config (repo, pr_number, chat_id)
+  - deliver_only: if true, skip the agent — the rendered prompt IS the
+    message that gets delivered.  Use for external push notifications
+    (Supabase, monitoring alerts, inter-agent pings) where zero LLM cost
+    and sub-second delivery matter more than agent reasoning.
 
 Security:
   - HMAC secret is required per route (validated at startup)
@@ -122,6 +126,19 @@ class WebhookAdapter(BasePlatformAdapter):
                     f"For testing without auth, set secret to '{_INSECURE_NO_AUTH}'."
                 )
 
+            # deliver_only routes bypass the agent — the POST body becomes a
+            # direct push notification via the configured delivery target.
+            # Validate up-front so misconfiguration surfaces at startup rather
+            # than on the first webhook POST.
+            if route.get("deliver_only"):
+                deliver = route.get("deliver", "log")
+                if not deliver or deliver == "log":
+                    raise ValueError(
+                        f"[webhook] Route '{name}' has deliver_only=true but "
+                        f"deliver is '{deliver}'. Direct delivery requires a "
+                        f"real target (telegram, discord, slack, github_comment, etc.)."
+                    )
+
         app = web.Application()
         app.router.add_get("/health", self._handle_health)
         app.router.add_post("/webhooks/{route_name}", self._handle_webhook)
@@ -296,24 +313,14 @@ class WebhookAdapter(BasePlatformAdapter):
                 {"error": "Payload too large"}, status=413
             )
 
-        # ── Rate limiting ────────────────────────────────────────
-        now = time.time()
-        window = self._rate_counts.setdefault(route_name, [])
-        window[:] = [t for t in window if now - t < 60]
-        if len(window) >= self._rate_limit:
-            return web.json_response(
-                {"error": "Rate limit exceeded"}, status=429
-            )
-        window.append(now)
-
-        # Read body
+        # Read body (must be done before any validation)
         try:
             raw_body = await request.read()
         except Exception as e:
             logger.error("[webhook] Failed to read body: %s", e)
             return web.json_response({"error": "Bad request"}, status=400)
 
-        # Validate HMAC signature (skip for INSECURE_NO_AUTH testing mode)
+        # Validate HMAC signature FIRST (skip for INSECURE_NO_AUTH testing mode)
         secret = route_config.get("secret", self._global_secret)
         if secret and secret != _INSECURE_NO_AUTH:
             if not self._validate_signature(request, raw_body, secret):
@@ -324,6 +331,16 @@ class WebhookAdapter(BasePlatformAdapter):
                     {"error": "Invalid signature"}, status=401
                 )
 
+        # ── Rate limiting (after auth) ───────────────────────────
+        now = time.time()
+        window = self._rate_counts.setdefault(route_name, [])
+        window[:] = [t for t in window if now - t < 60]
+        if len(window) >= self._rate_limit:
+            return web.json_response(
+                {"error": "Rate limit exceeded"}, status=429
+            )
+        window.append(now)
+
         # Parse payload
         try:
             payload = json.loads(raw_body)
@@ -419,6 +436,64 @@ class WebhookAdapter(BasePlatformAdapter):
             )
         self._seen_deliveries[delivery_id] = now
 
+        # ── Direct delivery mode (deliver_only) ─────────────────
+        # Skip the agent entirely — the rendered prompt IS the message we
+        # deliver.  Use case: external services (Supabase, monitoring,
+        # cron jobs, other agents) that need to push a plain notification
+        # to a user's chat with zero LLM cost.  Reuses the same HMAC auth,
+        # rate limiting, idempotency, and template rendering as agent mode.
+        if route_config.get("deliver_only"):
+            delivery = {
+                "deliver": route_config.get("deliver", "log"),
+                "deliver_extra": self._render_delivery_extra(
+                    route_config.get("deliver_extra", {}), payload
+                ),
+                "payload": payload,
+            }
+            logger.info(
+                "[webhook] direct-deliver event=%s route=%s target=%s msg_len=%d delivery=%s",
+                event_type,
+                route_name,
+                delivery["deliver"],
+                len(prompt),
+                delivery_id,
+            )
+            try:
+                result = await self._direct_deliver(prompt, delivery)
+            except Exception:
+                logger.exception(
+                    "[webhook] direct-deliver failed route=%s delivery=%s",
+                    route_name,
+                    delivery_id,
+                )
+                return web.json_response(
+                    {"status": "error", "error": "Delivery failed", "delivery_id": delivery_id},
+                    status=502,
+                )
+
+            if result.success:
+                return web.json_response(
+                    {
+                        "status": "delivered",
+                        "route": route_name,
+                        "target": delivery["deliver"],
+                        "delivery_id": delivery_id,
+                    },
+                    status=200,
+                )
+            # Delivery attempted but target rejected it — surface as 502
+            # with a generic error (don't leak adapter-level detail).
+            logger.warning(
+                "[webhook] direct-deliver target rejected route=%s target=%s error=%s",
+                route_name,
+                delivery["deliver"],
+                result.error,
+            )
+            return web.json_response(
+                {"status": "error", "error": "Delivery failed", "delivery_id": delivery_id},
+                status=502,
+            )
+
         # Use delivery_id in session key so concurrent webhooks on the
         # same route get independent agent runs (not queued/interrupted).
         session_chat_id = f"webhook:{route_name}:{delivery_id}"
@@ -572,6 +647,34 @@ class WebhookAdapter(BasePlatformAdapter):
     # Response delivery
     # ------------------------------------------------------------------
 
+    async def _direct_deliver(
+        self, content: str, delivery: dict
+    ) -> SendResult:
+        """Deliver *content* directly without invoking the agent.
+
+        Used by ``deliver_only`` routes: the rendered template becomes the
+        literal message body, and we dispatch to the same delivery helpers
+        that the agent-mode ``send()`` flow uses.  All target types that
+        work in agent mode work here — Telegram, Discord, Slack, GitHub
+        PR comments, etc.
+        """
+        deliver_type = delivery.get("deliver", "log")
+
+        if deliver_type == "log":
+            # Shouldn't reach here — startup validation rejects deliver_only
+            # with deliver=log — but guard defensively.
+            logger.info("[webhook] direct-deliver log-only: %s", content[:200])
+            return SendResult(success=True)
+
+        if deliver_type == "github_comment":
+            return await self._deliver_github_comment(content, delivery)
+
+        # Fall through to the cross-platform dispatcher, which validates the
+        # target name and routes via the gateway runner.
+        return await self._deliver_cross_platform(
+            deliver_type, content, delivery
+        )
+
     async def _deliver_github_comment(
         self, content: str, delivery: dict
     ) -> SendResult:
diff --git a/gateway/platforms/wecom.py b/gateway/platforms/wecom.py
index 9e5dd04e0d..a6506d18a9 100644
--- a/gateway/platforms/wecom.py
+++ b/gateway/platforms/wecom.py
@@ -624,13 +624,16 @@ class WeComAdapter(BasePlatformAdapter):
         msgtype = str(body.get("msgtype") or "").lower()
 
         if msgtype == "mixed":
-            mixed = body.get("mixed") if isinstance(body.get("mixed"), dict) else {}
-            items = mixed.get("msg_item") if isinstance(mixed.get("msg_item"), list) else []
+            _raw_mixed = body.get("mixed")
+            mixed = _raw_mixed if isinstance(_raw_mixed, dict) else {}
+            _raw_items = mixed.get("msg_item")
+            items = _raw_items if isinstance(_raw_items, list) else []
             for item in items:
                 if not isinstance(item, dict):
                     continue
                 if str(item.get("msgtype") or "").lower() == "text":
-                    text_block = item.get("text") if isinstance(item.get("text"), dict) else {}
+                    _raw_text = item.get("text")
+                    text_block = _raw_text if isinstance(_raw_text, dict) else {}
                     content = str(text_block.get("content") or "").strip()
                     if content:
                         text_parts.append(content)
@@ -672,8 +675,10 @@ class WeComAdapter(BasePlatformAdapter):
         msgtype = str(body.get("msgtype") or "").lower()
 
         if msgtype == "mixed":
-            mixed = body.get("mixed") if isinstance(body.get("mixed"), dict) else {}
-            items = mixed.get("msg_item") if isinstance(mixed.get("msg_item"), list) else []
+            _raw_mixed = body.get("mixed")
+            mixed = _raw_mixed if isinstance(_raw_mixed, dict) else {}
+            _raw_items = mixed.get("msg_item")
+            items = _raw_items if isinstance(_raw_items, list) else []
             for item in items:
                 if not isinstance(item, dict):
                     continue
@@ -1459,3 +1464,134 @@ class WeComAdapter(BasePlatformAdapter):
             "name": chat_id,
             "type": "group" if chat_id and chat_id.lower().startswith("group") else "dm",
         }
+
+
+# ------------------------------------------------------------------
+# QR code scan flow for obtaining bot credentials
+# ------------------------------------------------------------------
+
+_QR_GENERATE_URL = "https://work.weixin.qq.com/ai/qc/generate"
+_QR_QUERY_URL = "https://work.weixin.qq.com/ai/qc/query_result"
+_QR_CODE_PAGE = "https://work.weixin.qq.com/ai/qc/gen?source=hermes&scode="
+_QR_POLL_INTERVAL = 3  # seconds
+_QR_POLL_TIMEOUT = 300  # 5 minutes
+
+
+def qr_scan_for_bot_info(
+    *,
+    timeout_seconds: int = _QR_POLL_TIMEOUT,
+) -> Optional[Dict[str, str]]:
+    """Run the WeCom QR scan flow to obtain bot_id and secret.
+
+    Fetches a QR code from WeCom, renders it in the terminal, and polls
+    until the user scans it or the timeout expires.
+
+    Returns ``{"bot_id": ..., "secret": ...}`` on success, ``None`` on
+    failure or timeout.
+
+    Note: the ``work.weixin.qq.com/ai/qc/{generate,query_result}`` endpoints
+    used here are not part of WeCom's public developer API — they back the
+    admin-console web UI's bot-creation flow and may change without notice.
+    The same pattern is used by the feishu/dingtalk QR setup wizards.
+    """
+    try:
+        import urllib.request
+        import urllib.parse
+    except ImportError:  # pragma: no cover
+        logger.error("urllib is required for WeCom QR scan")
+        return None
+
+    generate_url = f"{_QR_GENERATE_URL}?source=hermes"
+
+    # ── Step 1: Fetch QR code ──
+    print("  Connecting to WeCom...", end="", flush=True)
+    try:
+        req = urllib.request.Request(generate_url, headers={"User-Agent": "HermesAgent/1.0"})
+        with urllib.request.urlopen(req, timeout=15) as resp:
+            raw = json.loads(resp.read().decode("utf-8"))
+    except Exception as exc:
+        logger.error("WeCom QR: failed to fetch QR code: %s", exc)
+        print(f" failed: {exc}")
+        return None
+
+    data = raw.get("data") or {}
+    scode = str(data.get("scode") or "").strip()
+    auth_url = str(data.get("auth_url") or "").strip()
+
+    if not scode or not auth_url:
+        logger.error("WeCom QR: unexpected response format: %s", raw)
+        print(" failed: unexpected response format")
+        return None
+
+    print(" done.")
+
+    # ── Step 2: Render QR code in terminal ──
+    print()
+    qr_rendered = False
+    try:
+        import qrcode as _qrcode
+        qr = _qrcode.QRCode()
+        qr.add_data(auth_url)
+        qr.make(fit=True)
+        qr.print_ascii(invert=True)
+        qr_rendered = True
+    except ImportError:
+        pass
+    except Exception:
+        pass
+
+    page_url = f"{_QR_CODE_PAGE}{urllib.parse.quote(scode)}"
+    if qr_rendered:
+        print(f"\n  Scan the QR code above, or open this URL directly:\n  {page_url}")
+    else:
+        print(f"  Open this URL in WeCom on your phone:\n\n  {page_url}\n")
+        print("  Tip: pip install qrcode  to display a scannable QR code here next time")
+    print()
+    print("  Fetching configuration results...", end="", flush=True)
+
+    # ── Step 3: Poll for result ──
+    import time
+    deadline = time.time() + timeout_seconds
+    query_url = f"{_QR_QUERY_URL}?scode={urllib.parse.quote(scode)}"
+    poll_count = 0
+
+    while time.time() < deadline:
+        try:
+            req = urllib.request.Request(query_url, headers={"User-Agent": "HermesAgent/1.0"})
+            with urllib.request.urlopen(req, timeout=10) as resp:
+                result = json.loads(resp.read().decode("utf-8"))
+        except Exception as exc:
+            logger.debug("WeCom QR poll error: %s", exc)
+            time.sleep(_QR_POLL_INTERVAL)
+            continue
+
+        poll_count += 1
+        # Print a dot on every poll so progress is visible within 3s.
+        print(".", end="", flush=True)
+
+        result_data = result.get("data") or {}
+        status = str(result_data.get("status") or "").lower()
+
+        if status == "success":
+            print()  # newline after "Fetching configuration results..." dots
+            bot_info = result_data.get("bot_info") or {}
+            bot_id = str(bot_info.get("botid") or bot_info.get("bot_id") or "").strip()
+            secret = str(bot_info.get("secret") or "").strip()
+            if bot_id and secret:
+                return {"bot_id": bot_id, "secret": secret}
+            logger.warning(
+                "WeCom QR: scan reported success but bot_info missing or incomplete: %s",
+                result_data,
+            )
+            print(
+                "  QR scan reported success but no bot credentials were returned.\n"
+                "  This usually means the bot was not actually created on the WeCom side.\n"
+                "  Falling back to manual credential entry."
+            )
+            return None
+
+        time.sleep(_QR_POLL_INTERVAL)
+
+    print()  # newline after dots
+    print(f"  QR scan timed out ({timeout_seconds // 60} minutes). Please try again.")
+    return None
diff --git a/gateway/platforms/whatsapp.py b/gateway/platforms/whatsapp.py
index d1de5b8568..a82417a601 100644
--- a/gateway/platforms/whatsapp.py
+++ b/gateway/platforms/whatsapp.py
@@ -66,6 +66,37 @@ def _kill_port_process(port: int) -> None:
     except Exception:
         pass
 
+
+def _terminate_bridge_process(proc, *, force: bool = False) -> None:
+    """Terminate the bridge process using process-tree semantics where possible."""
+    if _IS_WINDOWS:
+        cmd = ["taskkill", "/PID", str(proc.pid), "/T"]
+        if force:
+            cmd.append("/F")
+        try:
+            result = subprocess.run(
+                cmd,
+                capture_output=True,
+                text=True,
+                timeout=10,
+            )
+        except FileNotFoundError:
+            if force:
+                proc.kill()
+            else:
+                proc.terminate()
+            return
+
+        if result.returncode != 0:
+            details = (result.stderr or result.stdout or "").strip()
+            raise OSError(details or f"taskkill failed for PID {proc.pid}")
+        return
+
+    import signal
+
+    sig = signal.SIGTERM if not force else signal.SIGKILL
+    os.killpg(os.getpgid(proc.pid), sig)
+
 import sys
 sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
 
@@ -118,6 +149,10 @@ class WhatsAppAdapter(BasePlatformAdapter):
     - bridge_script: Path to the Node.js bridge script
     - bridge_port: Port for HTTP communication (default: 3000)
     - session_path: Path to store WhatsApp session data
+    - dm_policy: "open" | "allowlist" | "disabled" — how DMs are handled (default: "open")
+    - allow_from: List of sender IDs allowed in DMs (when dm_policy="allowlist")
+    - group_policy: "open" | "allowlist" | "disabled" — which groups are processed (default: "open")
+    - group_allow_from: List of group JIDs allowed (when group_policy="allowlist")
     """
     
     # WhatsApp message limits — practical UX limit, not protocol max.
@@ -140,6 +175,10 @@ class WhatsAppAdapter(BasePlatformAdapter):
             get_hermes_dir("platforms/whatsapp/session", "whatsapp/session")
         ))
         self._reply_prefix: Optional[str] = config.extra.get("reply_prefix")
+        self._dm_policy = str(config.extra.get("dm_policy") or os.getenv("WHATSAPP_DM_POLICY", "open")).strip().lower()
+        self._allow_from = self._coerce_allow_list(config.extra.get("allow_from") or config.extra.get("allowFrom"))
+        self._group_policy = str(config.extra.get("group_policy") or os.getenv("WHATSAPP_GROUP_POLICY", "open")).strip().lower()
+        self._group_allow_from = self._coerce_allow_list(config.extra.get("group_allow_from") or config.extra.get("groupAllowFrom"))
         self._mention_patterns = self._compile_mention_patterns()
         self._message_queue: asyncio.Queue = asyncio.Queue()
         self._bridge_log_fh = None
@@ -163,6 +202,33 @@ class WhatsAppAdapter(BasePlatformAdapter):
             return {str(part).strip() for part in raw if str(part).strip()}
         return {part.strip() for part in str(raw).split(",") if part.strip()}
 
+    @staticmethod
+    def _coerce_allow_list(raw) -> set[str]:
+        """Parse allow_from / group_allow_from from config or env var."""
+        if raw is None:
+            return set()
+        if isinstance(raw, list):
+            return {str(part).strip() for part in raw if str(part).strip()}
+        return {part.strip() for part in str(raw).split(",") if part.strip()}
+
+    def _is_dm_allowed(self, sender_id: str) -> bool:
+        """Check whether a DM from the given sender should be processed."""
+        if self._dm_policy == "disabled":
+            return False
+        if self._dm_policy == "allowlist":
+            return sender_id in self._allow_from
+        # "open" — all DMs allowed
+        return True
+
+    def _is_group_allowed(self, chat_id: str) -> bool:
+        """Check whether a group chat should be processed."""
+        if self._group_policy == "disabled":
+            return False
+        if self._group_policy == "allowlist":
+            return chat_id in self._group_allow_from
+        # "open" — all groups allowed
+        return True
+
     def _compile_mention_patterns(self):
         patterns = self.config.extra.get("mention_patterns")
         if patterns is None:
@@ -255,8 +321,18 @@ class WhatsAppAdapter(BasePlatformAdapter):
         return cleaned.strip() or text
 
     def _should_process_message(self, data: Dict[str, Any]) -> bool:
-        if not data.get("isGroup"):
+        is_group = data.get("isGroup", False)
+        if is_group:
+            chat_id = str(data.get("chatId") or "")
+            if not self._is_group_allowed(chat_id):
+                return False
+        else:
+            sender_id = str(data.get("senderId") or data.get("from") or "")
+            if not self._is_dm_allowed(sender_id):
+                return False
+            # DMs that pass the policy gate are always processed
             return True
+        # Group messages: check mention / free-response settings
         chat_id = str(data.get("chatId") or "")
         if chat_id in self._whatsapp_free_response_chats():
             return True
@@ -289,39 +365,40 @@ class WhatsAppAdapter(BasePlatformAdapter):
         logger.info("[%s] Bridge found at %s", self.name, bridge_path)
         
         # Acquire scoped lock to prevent duplicate sessions
+        lock_acquired = False
         try:
             if not self._acquire_platform_lock('whatsapp-session', str(self._session_path), 'WhatsApp session'):
                 return False
+            lock_acquired = True
         except Exception as e:
             logger.warning("[%s] Could not acquire session lock (non-fatal): %s", self.name, e)
 
-        # Auto-install npm dependencies if node_modules doesn't exist
-        bridge_dir = bridge_path.parent
-        if not (bridge_dir / "node_modules").exists():
-            print(f"[{self.name}] Installing WhatsApp bridge dependencies...")
-            try:
-                install_result = subprocess.run(
-                    ["npm", "install", "--silent"],
-                    cwd=str(bridge_dir),
-                    capture_output=True,
-                    text=True,
-                    timeout=60,
-                )
-                if install_result.returncode != 0:
-                    print(f"[{self.name}] npm install failed: {install_result.stderr}")
-                    return False
-                print(f"[{self.name}] Dependencies installed")
-            except Exception as e:
-                print(f"[{self.name}] Failed to install dependencies: {e}")
-                return False
-        
         try:
+            # Auto-install npm dependencies if node_modules doesn't exist
+            bridge_dir = bridge_path.parent
+            if not (bridge_dir / "node_modules").exists():
+                print(f"[{self.name}] Installing WhatsApp bridge dependencies...")
+                try:
+                    install_result = subprocess.run(
+                        ["npm", "install", "--silent"],
+                        cwd=str(bridge_dir),
+                        capture_output=True,
+                        text=True,
+                        timeout=60,
+                    )
+                    if install_result.returncode != 0:
+                        print(f"[{self.name}] npm install failed: {install_result.stderr}")
+                        return False
+                    print(f"[{self.name}] Dependencies installed")
+                except Exception as e:
+                    print(f"[{self.name}] Failed to install dependencies: {e}")
+                    return False
+
             # Ensure session directory exists
             self._session_path.mkdir(parents=True, exist_ok=True)
             
             # Check if bridge is already running and connected
             import aiohttp
-            import asyncio
             try:
                 async with aiohttp.ClientSession() as session:
                     async with session.get(
@@ -452,10 +529,13 @@ class WhatsAppAdapter(BasePlatformAdapter):
             return True
             
         except Exception as e:
-            self._release_platform_lock()
             logger.error("[%s] Failed to start bridge: %s", self.name, e, exc_info=True)
-            self._close_bridge_log()
             return False
+        finally:
+            if not self._running:
+                if lock_acquired:
+                    self._release_platform_lock()
+                self._close_bridge_log()
     
     def _close_bridge_log(self) -> None:
         """Close the bridge log file handle if open."""
@@ -487,22 +567,14 @@ class WhatsAppAdapter(BasePlatformAdapter):
         """Stop the WhatsApp bridge and clean up any orphaned processes."""
         if self._bridge_process:
             try:
-                # Kill the entire process group so child node processes die too
-                import signal
                 try:
-                    if _IS_WINDOWS:
-                        self._bridge_process.terminate()
-                    else:
-                        os.killpg(os.getpgid(self._bridge_process.pid), signal.SIGTERM)
+                    _terminate_bridge_process(self._bridge_process, force=False)
                 except (ProcessLookupError, PermissionError):
                     self._bridge_process.terminate()
                 await asyncio.sleep(1)
                 if self._bridge_process.poll() is None:
                     try:
-                        if _IS_WINDOWS:
-                            self._bridge_process.kill()
-                        else:
-                            os.killpg(os.getpgid(self._bridge_process.pid), signal.SIGKILL)
+                        _terminate_bridge_process(self._bridge_process, force=True)
                     except (ProcessLookupError, PermissionError):
                         self._bridge_process.kill()
             except Exception as e:
@@ -655,6 +727,8 @@ class WhatsAppAdapter(BasePlatformAdapter):
         chat_id: str,
         message_id: str,
         content: str,
+        *,
+        finalize: bool = False,
     ) -> SendResult:
         """Edit a previously sent message via the WhatsApp bridge."""
         if not self._running or not self._http_session:
@@ -766,6 +840,17 @@ class WhatsAppAdapter(BasePlatformAdapter):
         """Send a video natively via bridge — plays inline in WhatsApp."""
         return await self._send_media_to_bridge(chat_id, video_path, "video", caption)
 
+    async def send_voice(
+        self,
+        chat_id: str,
+        audio_path: str,
+        caption: Optional[str] = None,
+        reply_to: Optional[str] = None,
+        **kwargs,
+    ) -> SendResult:
+        """Send an audio file as a WhatsApp voice message via bridge."""
+        return await self._send_media_to_bridge(chat_id, audio_path, "audio", caption)
+
     async def send_document(
         self,
         chat_id: str,
diff --git a/gateway/run.py b/gateway/run.py
index b72e95eb83..617a38418e 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -30,6 +30,8 @@ from pathlib import Path
 from datetime import datetime
 from typing import Dict, Optional, Any, List
 
+from agent.account_usage import fetch_account_usage, render_account_usage_lines
+
 # --- Agent cache tuning ---------------------------------------------------
 # Bounds the per-session AIAgent cache to prevent unbounded growth in
 # long-lived gateways (each AIAgent holds LLM clients, tool schemas,
@@ -86,7 +88,7 @@ sys.path.insert(0, str(Path(__file__).parent.parent))
 
 # Resolve Hermes home directory (respects HERMES_HOME override)
 from hermes_constants import get_hermes_home
-from utils import atomic_yaml_write, is_truthy_value
+from utils import atomic_yaml_write, base_url_host_matches, is_truthy_value
 _hermes_home = get_hermes_home()
 
 # Load environment variables from ~/.hermes/.env first.
@@ -96,6 +98,10 @@ from hermes_cli.env_loader import load_hermes_dotenv
 _env_path = _hermes_home / '.env'
 load_hermes_dotenv(hermes_home=_hermes_home, project_env=Path(__file__).resolve().parents[1] / '.env')
 
+
+_DOCKER_VOLUME_SPEC_RE = re.compile(r"^(?P<host>.+):(?P<container>/[^:]+?)(?::(?P<options>[^:]+))?$")
+_DOCKER_MEDIA_OUTPUT_CONTAINER_PATHS = {"/output", "/outputs"}
+
 # Bridge config.yaml values into the environment so os.getenv() picks them up.
 # config.yaml is authoritative for terminal settings — overrides .env.
 _config_path = _hermes_home / 'config.yaml'
@@ -275,6 +281,7 @@ from gateway.session import (
     build_session_context,
     build_session_context_prompt,
     build_session_key,
+    is_shared_multi_user_session,
 )
 from gateway.delivery import DeliveryRouter
 from gateway.platforms.base import (
@@ -398,6 +405,33 @@ def _dequeue_pending_event(adapter, session_key: str) -> MessageEvent | None:
     return adapter.get_pending_message(session_key)
 
 
+_INTERRUPT_REASON_STOP = "Stop requested"
+_INTERRUPT_REASON_RESET = "Session reset requested"
+_INTERRUPT_REASON_TIMEOUT = "Execution timed out (inactivity)"
+_INTERRUPT_REASON_SSE_DISCONNECT = "SSE client disconnected"
+_INTERRUPT_REASON_GATEWAY_SHUTDOWN = "Gateway shutting down"
+_INTERRUPT_REASON_GATEWAY_RESTART = "Gateway restarting"
+
+_CONTROL_INTERRUPT_MESSAGES = frozenset(
+    {
+        _INTERRUPT_REASON_STOP.lower(),
+        _INTERRUPT_REASON_RESET.lower(),
+        _INTERRUPT_REASON_TIMEOUT.lower(),
+        _INTERRUPT_REASON_SSE_DISCONNECT.lower(),
+        _INTERRUPT_REASON_GATEWAY_SHUTDOWN.lower(),
+        _INTERRUPT_REASON_GATEWAY_RESTART.lower(),
+    }
+)
+
+
+def _is_control_interrupt_message(message: Optional[str]) -> bool:
+    """Return True when an interrupt message is internal control flow."""
+    if not message:
+        return False
+    normalized = " ".join(str(message).strip().split()).lower()
+    return normalized in _CONTROL_INTERRUPT_MESSAGES
+
+
 def _check_unavailable_skill(command_name: str) -> str | None:
     """Check if a command matches a known-but-inactive skill.
 
@@ -585,6 +619,7 @@ class GatewayRunner:
     def __init__(self, config: Optional[GatewayConfig] = None):
         self.config = config or load_gateway_config()
         self.adapters: Dict[Platform, BasePlatformAdapter] = {}
+        self._warn_if_docker_media_delivery_is_risky()
 
         # Load ephemeral config from config.yaml / env vars.
         # Both are injected at API-call time only and never persisted.
@@ -597,7 +632,6 @@ class GatewayRunner:
         self._restart_drain_timeout = self._load_restart_drain_timeout()
         self._provider_routing = self._load_provider_routing()
         self._fallback_model = self._load_fallback_model()
-        self._smart_model_routing = self._load_smart_model_routing()
 
         # Wire process registry into session store for reset protection
         from tools.process_registry import process_registry
@@ -625,6 +659,7 @@ class GatewayRunner:
         self._running_agents_ts: Dict[str, float] = {}  # start timestamp per session
         self._pending_messages: Dict[str, str] = {}  # Queued messages during interrupt
         self._busy_ack_ts: Dict[str, float] = {}  # last busy-ack timestamp per session (debounce)
+        self._session_run_generation: Dict[str, int] = {}
 
         # Cache AIAgent instances per session to preserve prompt caching.
         # Without this, a new AIAgent is created per message, rebuilding the
@@ -675,7 +710,26 @@ class GatewayRunner:
             self._session_db = SessionDB()
         except Exception as e:
             logger.debug("SQLite session store not available: %s", e)
-        
+
+        # Opportunistic state.db maintenance: prune ended sessions older
+        # than sessions.retention_days + optional VACUUM. Tracks last-run
+        # in state_meta so it only actually executes once per
+        # sessions.min_interval_hours.  Gateway is long-lived so blocking
+        # a few seconds once per day is acceptable; failures are logged
+        # but never raised.
+        if self._session_db is not None:
+            try:
+                from hermes_cli.config import load_config as _load_full_config
+                _sess_cfg = (_load_full_config().get("sessions") or {})
+                if _sess_cfg.get("auto_prune", False):
+                    self._session_db.maybe_auto_prune_and_vacuum(
+                        retention_days=int(_sess_cfg.get("retention_days", 90)),
+                        min_interval_hours=int(_sess_cfg.get("min_interval_hours", 24)),
+                        vacuum=bool(_sess_cfg.get("vacuum_after_prune", True)),
+                    )
+            except Exception as exc:
+                logger.debug("state.db auto-maintenance skipped: %s", exc)
+
         # DM pairing store for code-based user authorization
         from gateway.pairing import PairingStore
         self.pairing_store = PairingStore()
@@ -691,6 +745,53 @@ class GatewayRunner:
         self._background_tasks: set = set()
 
 
+    def _warn_if_docker_media_delivery_is_risky(self) -> None:
+        """Warn when Docker-backed gateways lack an explicit export mount.
+
+        MEDIA delivery happens in the gateway process, so paths emitted by the model
+        must be readable from the host. A plain container-local path like
+        `/workspace/report.txt` or `/output/report.txt` often exists only inside
+        Docker, so users commonly need a dedicated export mount such as
+        `host-dir:/output`.
+        """
+        if os.getenv("TERMINAL_ENV", "").strip().lower() != "docker":
+            return
+
+        connected = self.config.get_connected_platforms()
+        messaging_platforms = [p for p in connected if p not in {Platform.LOCAL, Platform.API_SERVER, Platform.WEBHOOK}]
+        if not messaging_platforms:
+            return
+
+        raw_volumes = os.getenv("TERMINAL_DOCKER_VOLUMES", "").strip()
+        volumes: List[str] = []
+        if raw_volumes:
+            try:
+                parsed = json.loads(raw_volumes)
+                if isinstance(parsed, list):
+                    volumes = [str(v) for v in parsed if isinstance(v, str)]
+            except Exception:
+                logger.debug("Could not parse TERMINAL_DOCKER_VOLUMES for gateway media warning", exc_info=True)
+
+        has_explicit_output_mount = False
+        for spec in volumes:
+            match = _DOCKER_VOLUME_SPEC_RE.match(spec)
+            if not match:
+                continue
+            container_path = match.group("container")
+            if container_path in _DOCKER_MEDIA_OUTPUT_CONTAINER_PATHS:
+                has_explicit_output_mount = True
+                break
+
+        if has_explicit_output_mount:
+            return
+
+        logger.warning(
+            "Docker backend is enabled for the messaging gateway but no explicit host-visible "
+            "output mount (for example '/home/user/.hermes/cache/documents:/output') is configured. "
+            "This is fine if the model already emits host-visible paths, but MEDIA file delivery can fail "
+            "for container-local paths like '/workspace/...' or '/output/...'."
+        )
+
 
 
     # -- Setup skill availability ----------------------------------------
@@ -707,6 +808,10 @@ class GatewayRunner:
 
     _VOICE_MODE_PATH = _hermes_home / "gateway_voice_mode.json"
 
+    def _voice_key(self, platform: Platform, chat_id: str) -> str:
+        """Return a platform-namespaced key for voice mode state."""
+        return f"{platform.value}:{chat_id}"
+
     def _load_voice_modes(self) -> Dict[str, str]:
         try:
             data = json.loads(self._VOICE_MODE_PATH.read_text())
@@ -717,11 +822,21 @@ class GatewayRunner:
             return {}
 
         valid_modes = {"off", "voice_only", "all"}
-        return {
-            str(chat_id): mode
-            for chat_id, mode in data.items()
-            if mode in valid_modes
-        }
+        result = {}
+        for chat_id, mode in data.items():
+            if mode not in valid_modes:
+                continue
+            key = str(chat_id)
+            # Skip legacy unprefixed keys (warn and skip)
+            if ":" not in key:
+                logger.warning(
+                    "Skipping legacy unprefixed voice mode key %r during migration. "
+                    "Re-enable voice mode on that chat to rebuild the prefixed key.",
+                    key,
+                )
+                continue
+            result[key] = mode
+        return result
 
     def _save_voice_modes(self) -> None:
         try:
@@ -747,9 +862,14 @@ class GatewayRunner:
         disabled_chats = getattr(adapter, "_auto_tts_disabled_chats", None)
         if not isinstance(disabled_chats, set):
             return
+        platform = getattr(adapter, "platform", None)
+        if not isinstance(platform, Platform):
+            return
         disabled_chats.clear()
+        prefix = f"{platform.value}:"
         disabled_chats.update(
-            chat_id for chat_id, mode in self._voice_mode.items() if mode == "off"
+            key[len(prefix):] for key, mode in self._voice_mode.items()
+            if mode == "off" and key.startswith(prefix)
         )
 
     async def _safe_adapter_disconnect(self, adapter, platform) -> None:
@@ -1002,11 +1122,16 @@ class GatewayRunner:
         return model, runtime_kwargs
 
     def _resolve_turn_agent_config(self, user_message: str, model: str, runtime_kwargs: dict) -> dict:
-        from agent.smart_model_routing import resolve_turn_route
+        """Build the effective model/runtime config for a single turn.
+
+        Always uses the session's primary model/provider.  If `/fast` is
+        enabled and the model supports Priority Processing / Anthropic fast
+        mode, attach `request_overrides` so the API call is marked
+        accordingly.
+        """
         from hermes_cli.models import resolve_fast_mode_overrides
 
-        primary = {
-            "model": model,
+        runtime = {
             "api_key": runtime_kwargs.get("api_key"),
             "base_url": runtime_kwargs.get("base_url"),
             "provider": runtime_kwargs.get("provider"),
@@ -1015,7 +1140,18 @@ class GatewayRunner:
             "args": list(runtime_kwargs.get("args") or []),
             "credential_pool": runtime_kwargs.get("credential_pool"),
         }
-        route = resolve_turn_route(user_message, getattr(self, "_smart_model_routing", {}), primary)
+        route = {
+            "model": model,
+            "runtime": runtime,
+            "signature": (
+                model,
+                runtime["provider"],
+                runtime["base_url"],
+                runtime["api_mode"],
+                runtime["command"],
+                tuple(runtime["args"]),
+            ),
+        }
 
         service_tier = getattr(self, "_service_tier", None)
         if not service_tier:
@@ -1023,7 +1159,7 @@ class GatewayRunner:
             return route
 
         try:
-            overrides = resolve_fast_mode_overrides(route.get("model"))
+            overrides = resolve_fast_mode_overrides(route["model"])
         except Exception:
             overrides = None
         route["request_overrides"] = overrides
@@ -1152,7 +1288,6 @@ class GatewayRunner:
         the prefill_messages_file key in ~/.hermes/config.yaml.
         Relative paths are resolved from ~/.hermes/.
         """
-        import json as _json
         file_path = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "")
         if not file_path:
             try:
@@ -1174,7 +1309,7 @@ class GatewayRunner:
             return []
         try:
             with open(path, "r", encoding="utf-8") as f:
-                data = _json.load(f)
+                data = json.load(f)
             if not isinstance(data, list):
                 logger.warning("Prefill messages file must contain a JSON array: %s", path)
                 return []
@@ -1381,20 +1516,6 @@ class GatewayRunner:
             pass
         return None
 
-    @staticmethod
-    def _load_smart_model_routing() -> dict:
-        """Load optional smart cheap-vs-strong model routing config."""
-        try:
-            import yaml as _y
-            cfg_path = _hermes_home / "config.yaml"
-            if cfg_path.exists():
-                with open(cfg_path, encoding="utf-8") as _f:
-                    cfg = _y.safe_load(_f) or {}
-                return cfg.get("smart_model_routing", {}) or {}
-        except Exception:
-            pass
-        return {}
-
     def _snapshot_running_agents(self) -> Dict[str, Any]:
         return {
             session_key: agent
@@ -1567,12 +1688,32 @@ class GatewayRunner:
 
         notified: set = set()
         for session_key in active:
-            # Parse platform + chat_id from the session key.
-            _parsed = _parse_session_key(session_key)
-            if not _parsed:
-                continue
-            platform_str = _parsed["platform"]
-            chat_id = _parsed["chat_id"]
+            source = None
+            try:
+                if getattr(self, "session_store", None) is not None:
+                    self.session_store._ensure_loaded()
+                    entry = self.session_store._entries.get(session_key)
+                    source = getattr(entry, "origin", None) if entry else None
+            except Exception as e:
+                logger.debug(
+                    "Failed to load session origin for shutdown notification %s: %s",
+                    session_key,
+                    e,
+                )
+
+            if source is not None:
+                platform_str = source.platform.value
+                chat_id = source.chat_id
+                thread_id = source.thread_id
+            else:
+                # Fall back to parsing the session key when no persisted
+                # origin is available (legacy sessions/tests).
+                _parsed = _parse_session_key(session_key)
+                if not _parsed:
+                    continue
+                platform_str = _parsed["platform"]
+                chat_id = _parsed["chat_id"]
+                thread_id = _parsed.get("thread_id")
 
             # Deduplicate: one notification per chat, even if multiple
             # sessions (different users/threads) share the same chat.
@@ -1588,7 +1729,6 @@ class GatewayRunner:
 
                 # Include thread_id if present so the message lands in the
                 # correct forum topic / thread.
-                thread_id = _parsed.get("thread_id")
                 metadata = {"thread_id": thread_id} if thread_id else None
 
                 await adapter.send(chat_id, msg, metadata=metadata)
@@ -1841,6 +1981,39 @@ class GatewayRunner:
                 "or configure platform allowlists (e.g., TELEGRAM_ALLOWED_USERS=your_id)."
             )
         
+        # Discover Python plugins before shell hooks so plugin block
+        # decisions take precedence in tie cases.  The CLI startup path
+        # does this via an explicit call in hermes_cli/main.py; the
+        # gateway lazily imports run_agent inside per-request handlers,
+        # so the discover_plugins() side-effect in model_tools.py is NOT
+        # guaranteed to have run by the time we reach this point.
+        try:
+            from hermes_cli.plugins import discover_plugins
+            discover_plugins()
+        except Exception:
+            logger.debug(
+                "plugin discovery failed at gateway startup", exc_info=True,
+            )
+
+        # Register declarative shell hooks from cli-config.yaml.  Gateway
+        # has no TTY, so consent has to come from one of the three opt-in
+        # channels (--accept-hooks on launch, HERMES_ACCEPT_HOOKS env var,
+        # or hooks_auto_accept: true in config.yaml).  We pass
+        # accept_hooks=False here and let register_from_config resolve
+        # the effective value from env + config itself — the CLI-side
+        # registration already honored --accept-hooks, and re-reading
+        # hooks_auto_accept here would just duplicate that lookup.
+        # Failures are logged but must never block gateway startup.
+        try:
+            from hermes_cli.config import load_config
+            from agent.shell_hooks import register_from_config
+            register_from_config(load_config(), accept_hooks=False)
+        except Exception:
+            logger.debug(
+                "shell-hook registration failed at gateway startup",
+                exc_info=True,
+            )
+
         # Discover and load event hooks
         self.hooks.discover_and_load()
         
@@ -2441,7 +2614,7 @@ class GatewayRunner:
                             _sk[:20], _e,
                         )
                 self._interrupt_running_agents(
-                    "Gateway restarting" if self._restart_requested else "Gateway shutting down"
+                    _INTERRUPT_REASON_GATEWAY_RESTART if self._restart_requested else _INTERRUPT_REASON_GATEWAY_SHUTDOWN
                 )
                 interrupt_deadline = asyncio.get_running_loop().time() + 5.0
                 while self._running_agents and asyncio.get_running_loop().time() < interrupt_deadline:
@@ -2862,10 +3035,59 @@ class GatewayRunner:
         return bool(check_ids & allowed_ids)
 
     def _get_unauthorized_dm_behavior(self, platform: Optional[Platform]) -> str:
-        """Return how unauthorized DMs should be handled for a platform."""
+        """Return how unauthorized DMs should be handled for a platform.
+
+        Resolution order:
+        1. Explicit per-platform ``unauthorized_dm_behavior`` in config — always wins.
+        2. Explicit global ``unauthorized_dm_behavior`` in config — wins when no per-platform.
+        3. When an allowlist (``PLATFORM_ALLOWED_USERS`` or ``GATEWAY_ALLOWED_USERS``) is
+           configured, default to ``"ignore"`` — the allowlist signals that the owner has
+           deliberately restricted access; spamming unknown contacts with pairing codes
+           is both noisy and a potential info-leak. (#9337)
+        4. No allowlist and no explicit config → ``"pair"`` (open-gateway default).
+        """
         config = getattr(self, "config", None)
-        if config and hasattr(config, "get_unauthorized_dm_behavior"):
-            return config.get_unauthorized_dm_behavior(platform)
+
+        # Check for an explicit per-platform override first.
+        if config and hasattr(config, "get_unauthorized_dm_behavior") and platform:
+            platform_cfg = config.platforms.get(platform) if hasattr(config, "platforms") else None
+            if platform_cfg and "unauthorized_dm_behavior" in getattr(platform_cfg, "extra", {}):
+                # Operator explicitly configured behavior for this platform — respect it.
+                return config.get_unauthorized_dm_behavior(platform)
+
+        # Check for an explicit global config override.
+        if config and hasattr(config, "unauthorized_dm_behavior"):
+            if config.unauthorized_dm_behavior != "pair":  # non-default → explicit override
+                return config.unauthorized_dm_behavior
+
+        # No explicit override.  Fall back to allowlist-aware default:
+        # if any allowlist is configured for this platform, silently drop
+        # unauthorized messages instead of sending pairing codes.
+        if platform:
+            platform_env_map = {
+                Platform.TELEGRAM: "TELEGRAM_ALLOWED_USERS",
+                Platform.DISCORD:  "DISCORD_ALLOWED_USERS",
+                Platform.WHATSAPP: "WHATSAPP_ALLOWED_USERS",
+                Platform.SLACK:    "SLACK_ALLOWED_USERS",
+                Platform.SIGNAL:   "SIGNAL_ALLOWED_USERS",
+                Platform.EMAIL:    "EMAIL_ALLOWED_USERS",
+                Platform.SMS:      "SMS_ALLOWED_USERS",
+                Platform.MATTERMOST: "MATTERMOST_ALLOWED_USERS",
+                Platform.MATRIX:   "MATRIX_ALLOWED_USERS",
+                Platform.DINGTALK: "DINGTALK_ALLOWED_USERS",
+                Platform.FEISHU:   "FEISHU_ALLOWED_USERS",
+                Platform.WECOM:    "WECOM_ALLOWED_USERS",
+                Platform.WECOM_CALLBACK: "WECOM_CALLBACK_ALLOWED_USERS",
+                Platform.WEIXIN:   "WEIXIN_ALLOWED_USERS",
+                Platform.BLUEBUBBLES: "BLUEBUBBLES_ALLOWED_USERS",
+                Platform.QQBOT:    "QQ_ALLOWED_USERS",
+            }
+            if os.getenv(platform_env_map.get(platform, ""), "").strip():
+                return "ignore"
+
+        if os.getenv("GATEWAY_ALLOWED_USERS", "").strip():
+            return "ignore"
+
         return "pair"
     
     async def _handle_message(self, event: MessageEvent) -> Optional[str]:
@@ -3012,6 +3234,10 @@ class GatewayRunner:
                     _quick_key[:30], _stale_age, _stale_idle,
                     _raw_stale_timeout, _stale_detail,
                 )
+                self._invalidate_session_run_generation(
+                    _quick_key,
+                    reason="stale_running_agent_eviction",
+                )
                 self._release_running_agent_state(_quick_key)
 
         if _quick_key in self._running_agents:
@@ -3035,15 +3261,12 @@ class GatewayRunner:
             # _interrupt_requested.  Force-clean _running_agents so the session
             # is unlocked and subsequent messages are processed normally.
             if _cmd_def_inner and _cmd_def_inner.name == "stop":
-                running_agent = self._running_agents.get(_quick_key)
-                if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
-                    running_agent.interrupt("Stop requested")
-                # Force-clean: remove the session lock regardless of agent state
-                adapter = self.adapters.get(source.platform)
-                if adapter and hasattr(adapter, 'get_pending_message'):
-                    adapter.get_pending_message(_quick_key)  # consume and discard
-                self._pending_messages.pop(_quick_key, None)
-                self._release_running_agent_state(_quick_key)
+                await self._interrupt_and_clear_session(
+                    _quick_key,
+                    source,
+                    interrupt_reason=_INTERRUPT_REASON_STOP,
+                    invalidation_reason="stop_command",
+                )
                 logger.info("STOP for session %s — agent interrupted, session lock released", _quick_key[:20])
                 return "⚡ Stopped. You can continue this session."
 
@@ -3055,17 +3278,15 @@ class GatewayRunner:
             # doesn't get re-processed as a user message after the
             # interrupt completes.
             if _cmd_def_inner and _cmd_def_inner.name == "new":
-                running_agent = self._running_agents.get(_quick_key)
-                if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
-                    running_agent.interrupt("Session reset requested")
                 # Clear any pending messages so the old text doesn't replay
-                adapter = self.adapters.get(source.platform)
-                if adapter and hasattr(adapter, 'get_pending_message'):
-                    adapter.get_pending_message(_quick_key)  # consume and discard
-                self._pending_messages.pop(_quick_key, None)
+                await self._interrupt_and_clear_session(
+                    _quick_key,
+                    source,
+                    interrupt_reason=_INTERRUPT_REASON_RESET,
+                    invalidation_reason="new_command",
+                )
                 # Clean up the running agent entry so the reset handler
                 # doesn't think an agent is still active.
-                self._release_running_agent_state(_quick_key)
                 return await self._handle_reset_command(event)
 
             # /queue <prompt> — queue without interrupting
@@ -3075,10 +3296,9 @@ class GatewayRunner:
                     return "Usage: /queue <prompt>"
                 adapter = self.adapters.get(source.platform)
                 if adapter:
-                    from gateway.platforms.base import MessageEvent as _ME, MessageType as _MT
-                    queued_event = _ME(
+                    queued_event = MessageEvent(
                         text=queued_text,
-                        message_type=_MT.TEXT,
+                        message_type=MessageType.TEXT,
                         source=event.source,
                         message_id=event.message_id,
                         channel_prompt=event.channel_prompt,
@@ -3100,10 +3320,9 @@ class GatewayRunner:
                     # Agent hasn't started yet — queue as turn-boundary fallback.
                     adapter = self.adapters.get(source.platform)
                     if adapter:
-                        from gateway.platforms.base import MessageEvent as _ME, MessageType as _MT
-                        queued_event = _ME(
+                        queued_event = MessageEvent(
                             text=steer_text,
-                            message_type=_MT.TEXT,
+                            message_type=MessageType.TEXT,
                             source=event.source,
                             message_id=event.message_id,
                             channel_prompt=event.channel_prompt,
@@ -3123,10 +3342,9 @@ class GatewayRunner:
                 # Running agent is missing or lacks steer() — fall back to queue.
                 adapter = self.adapters.get(source.platform)
                 if adapter:
-                    from gateway.platforms.base import MessageEvent as _ME, MessageType as _MT
-                    queued_event = _ME(
+                    queued_event = MessageEvent(
                         text=steer_text,
-                        message_type=_MT.TEXT,
+                        message_type=MessageType.TEXT,
                         source=event.source,
                         message_id=event.message_id,
                         channel_prompt=event.channel_prompt,
@@ -3156,6 +3374,20 @@ class GatewayRunner:
             if _cmd_def_inner and _cmd_def_inner.name == "background":
                 return await self._handle_background_command(event)
 
+            # Session-level toggles that are safe to run mid-agent —
+            # /yolo can unblock a pending approval prompt, /verbose cycles
+            # the tool-progress display mode for the ongoing stream.
+            # Both modify session state without needing agent interaction
+            # and must not be queued (the safety net would discard them).
+            # /fast and /reasoning are config-only and take effect next
+            # message, so they fall through to the catch-all busy response
+            # below — users should wait and set them between turns.
+            if _cmd_def_inner and _cmd_def_inner.name in ("yolo", "verbose"):
+                if _cmd_def_inner.name == "yolo":
+                    return await self._handle_yolo_command(event)
+                if _cmd_def_inner.name == "verbose":
+                    return await self._handle_verbose_command(event)
+
             # Gateway-handled info/control commands with dedicated
             # running-agent handlers.
             if _cmd_def_inner and _cmd_def_inner.name in _DEDICATED_HANDLERS:
@@ -3461,9 +3693,8 @@ class GatewayRunner:
                 plugin_handler = get_plugin_command_handler(command.replace("_", "-"))
                 if plugin_handler:
                     user_args = event.get_command_args().strip()
-                    import asyncio as _aio
                     result = plugin_handler(user_args)
-                    if _aio.iscoroutine(result):
+                    if asyncio.iscoroutine(result):
                         result = await result
                     return str(result) if result else None
             except Exception as e:
@@ -3546,9 +3777,10 @@ class GatewayRunner:
         # same session — corrupting the transcript.
         self._running_agents[_quick_key] = _AGENT_PENDING_SENTINEL
         self._running_agents_ts[_quick_key] = time.time()
+        _run_generation = self._begin_session_run_generation(_quick_key)
 
         try:
-            return await self._handle_message_with_agent(event, source, _quick_key)
+            return await self._handle_message_with_agent(event, source, _quick_key, _run_generation)
         finally:
             # If _run_agent replaced the sentinel with a real agent and
             # then cleaned it up, this is a no-op.  If we exited early
@@ -3579,12 +3811,12 @@ class GatewayRunner:
         history = history or []
         message_text = event.text or ""
 
-        _is_shared_thread = (
-            source.chat_type != "dm"
-            and source.thread_id
-            and not getattr(self.config, "thread_sessions_per_user", False)
+        _is_shared_multi_user = is_shared_multi_user_session(
+            source,
+            group_sessions_per_user=getattr(self.config, "group_sessions_per_user", True),
+            thread_sessions_per_user=getattr(self.config, "thread_sessions_per_user", False),
         )
-        if _is_shared_thread and source.user_name:
+        if _is_shared_multi_user and source.user_name:
             message_text = f"[{source.user_name}] {message_text}"
 
         if event.media_urls:
@@ -3644,9 +3876,7 @@ class GatewayRunner:
             for i, path in enumerate(event.media_urls):
                 mtype = event.media_types[i] if i < len(event.media_types) else ""
                 if mtype in ("", "application/octet-stream"):
-                    import os as _os2
-
-                    _ext = _os2.path.splitext(path)[1].lower()
+                    _ext = os.path.splitext(path)[1].lower()
                     if _ext in _TEXT_EXTENSIONS:
                         mtype = "text/plain"
                     else:
@@ -3656,13 +3886,10 @@ class GatewayRunner:
                 if not mtype.startswith(("application/", "text/")):
                     continue
 
-                import os as _os
-                import re as _re
-
-                basename = _os.path.basename(path)
+                basename = os.path.basename(path)
                 parts = basename.split("_", 2)
                 display_name = parts[2] if len(parts) >= 3 else basename
-                display_name = _re.sub(r'[^\w.\- ]', '_', display_name)
+                display_name = re.sub(r'[^\w.\- ]', '_', display_name)
 
                 if mtype.startswith("text/"):
                     context_note = (
@@ -3679,14 +3906,14 @@ class GatewayRunner:
                 message_text = f"{context_note}\n\n{message_text}"
 
         if getattr(event, "reply_to_text", None) and event.reply_to_message_id:
+            # Always inject the reply-to pointer — even when the quoted text
+            # already appears in history. The prefix isn't deduplication, it's
+            # disambiguation: it tells the agent *which* prior message the user
+            # is referencing. History can contain the same or similar text
+            # multiple times, and without an explicit pointer the agent has to
+            # guess (or answer for both subjects). Token overhead is minimal.
             reply_snippet = event.reply_to_text[:500]
-            found_in_history = any(
-                reply_snippet[:200] in (msg.get("content") or "")
-                for msg in history
-                if msg.get("role") in ("assistant", "user", "tool")
-            )
-            if not found_in_history:
-                message_text = f'[Replying to: "{reply_snippet}"]\n\n{message_text}'
+            message_text = f'[Replying to: "{reply_snippet}"]\n\n{message_text}'
 
         if "@" in message_text:
             try:
@@ -3694,9 +3921,11 @@ class GatewayRunner:
                 from agent.model_metadata import get_model_context_length
 
                 _msg_cwd = os.environ.get("TERMINAL_CWD", os.path.expanduser("~"))
+                _msg_runtime = _resolve_runtime_agent_kwargs()
                 _msg_ctx_len = get_model_context_length(
                     self._model,
-                    base_url=self._base_url or "",
+                    base_url=self._base_url or _msg_runtime.get("base_url") or "",
+                    api_key=_msg_runtime.get("api_key") or "",
                 )
                 _ctx_result = await preprocess_context_references_async(
                     message_text,
@@ -3719,7 +3948,7 @@ class GatewayRunner:
 
         return message_text
 
-    async def _handle_message_with_agent(self, event, source, _quick_key: str):
+    async def _handle_message_with_agent(self, event, source, _quick_key: str, run_generation: int):
         """Inner handler that runs under the _running_agents sentinel guard."""
         _msg_start_time = time.time()
         _platform_name = source.platform.value if hasattr(source.platform, "value") else str(source.platform)
@@ -4176,6 +4405,15 @@ class GatewayRunner:
         if message_text is None:
             return
 
+        # Bind this gateway run generation to the adapter's active-session
+        # event so deferred post-delivery callbacks can be released by the
+        # same run that registered them.
+        self._bind_adapter_run_generation(
+            self.adapters.get(source.platform),
+            session_key,
+            run_generation,
+        )
+
         try:
             # Emit agent:start hook
             hook_ctx = {
@@ -4194,6 +4432,7 @@ class GatewayRunner:
                 source=source,
                 session_id=session_entry.session_id,
                 session_key=session_key,
+                run_generation=run_generation,
                 event_message_id=event.message_id,
                 channel_prompt=event.channel_prompt,
             )
@@ -4206,6 +4445,22 @@ class GatewayRunner:
             except Exception:
                 pass
 
+            if not self._is_session_run_current(_quick_key, run_generation):
+                logger.info(
+                    "Discarding stale agent result for %s — generation %d is no longer current",
+                    _quick_key[:20] if _quick_key else "?",
+                    run_generation,
+                )
+                _stale_adapter = self.adapters.get(source.platform)
+                if getattr(type(_stale_adapter), "pop_post_delivery_callback", None) is not None:
+                    _stale_adapter.pop_post_delivery_callback(
+                        _quick_key,
+                        generation=run_generation,
+                    )
+                elif _stale_adapter and hasattr(_stale_adapter, "_post_delivery_callbacks"):
+                    _stale_adapter._post_delivery_callbacks.pop(_quick_key, None)
+                return None
+
             response = agent_result.get("final_response") or ""
 
             # Convert the agent's internal "(empty)" sentinel into a
@@ -4620,6 +4875,7 @@ class GatewayRunner:
         
         # Get existing session key
         session_key = self._session_key_for_source(source)
+        self._invalidate_session_run_generation(session_key, reason="session_reset")
         
         # Flush memories in the background (fire-and-forget) so the user
         # gets the "Session reset!" response immediately.
@@ -4879,14 +5135,23 @@ class GatewayRunner:
         agent = self._running_agents.get(session_key)
         if agent is _AGENT_PENDING_SENTINEL:
             # Force-clean the sentinel so the session is unlocked.
-            self._release_running_agent_state(session_key)
+            await self._interrupt_and_clear_session(
+                session_key,
+                source,
+                interrupt_reason=_INTERRUPT_REASON_STOP,
+                invalidation_reason="stop_command_pending",
+            )
             logger.info("STOP (pending) for session %s — sentinel cleared", session_key[:20])
             return "⚡ Stopped. The agent hadn't started yet — you can continue this session."
         if agent:
-            agent.interrupt("Stop requested")
             # Force-clean the session lock so a truly hung agent doesn't
             # keep it locked forever.
-            self._release_running_agent_state(session_key)
+            await self._interrupt_and_clear_session(
+                session_key,
+                source,
+                interrupt_reason=_INTERRUPT_REASON_STOP,
+                invalidation_reason="stop_command_handler",
+            )
             return "⚡ Stopped. You can continue this session."
         else:
             return "No active task to stop."
@@ -4922,7 +5187,6 @@ class GatewayRunner:
         # Save the requester's routing info so the new gateway process can
         # notify them once it comes back online.
         try:
-            import json as _json
             notify_data = {
                 "platform": event.source.platform.value if event.source.platform else None,
                 "chat_id": event.source.chat_id,
@@ -4930,7 +5194,7 @@ class GatewayRunner:
             if event.source.thread_id:
                 notify_data["thread_id"] = event.source.thread_id
             (_hermes_home / ".restart_notify.json").write_text(
-                _json.dumps(notify_data)
+                json.dumps(notify_data)
             )
         except Exception as e:
             logger.debug("Failed to write restart notify file: %s", e)
@@ -4941,16 +5205,14 @@ class GatewayRunner:
         # marker persists so the new gateway can still detect a delayed
         # /restart redelivery from Telegram.  Overwritten on every /restart.
         try:
-            import json as _json
-            import time as _time
             dedup_data = {
                 "platform": event.source.platform.value if event.source.platform else None,
-                "requested_at": _time.time(),
+                "requested_at": time.time(),
             }
             if event.platform_update_id is not None:
                 dedup_data["update_id"] = event.platform_update_id
             (_hermes_home / ".restart_last_processed.json").write_text(
-                _json.dumps(dedup_data)
+                json.dumps(dedup_data)
             )
         except Exception as e:
             logger.debug("Failed to write restart dedup marker: %s", e)
@@ -4998,12 +5260,10 @@ class GatewayRunner:
             return False
 
         try:
-            import json as _json
-            import time as _time
             marker_path = _hermes_home / ".restart_last_processed.json"
             if not marker_path.exists():
                 return False
-            data = _json.loads(marker_path.read_text())
+            data = json.loads(marker_path.read_text())
         except Exception:
             return False
 
@@ -5017,7 +5277,7 @@ class GatewayRunner:
         # swallow a fresh /restart from the user.
         requested_at = data.get("requested_at")
         if isinstance(requested_at, (int, float)):
-            if _time.time() - requested_at > 300:
+            if time.time() - requested_at > 300:
                 return False
         return event.platform_update_id <= recorded_uid
 
@@ -5408,7 +5668,7 @@ class GatewayRunner:
 
         # Cache notice
         cache_enabled = (
-            ("openrouter" in (result.base_url or "").lower() and "claude" in result.new_model.lower())
+            (base_url_host_matches(result.base_url or "", "openrouter.ai") and "claude" in result.new_model.lower())
             or result.api_mode == "anthropic_messages"
         )
         if cache_enabled:
@@ -5664,11 +5924,13 @@ class GatewayRunner:
         """Handle /voice [on|off|tts|channel|leave|status] command."""
         args = event.get_command_args().strip().lower()
         chat_id = event.source.chat_id
+        platform = event.source.platform
+        voice_key = self._voice_key(platform, chat_id)
 
-        adapter = self.adapters.get(event.source.platform)
+        adapter = self.adapters.get(platform)
 
         if args in ("on", "enable"):
-            self._voice_mode[chat_id] = "voice_only"
+            self._voice_mode[voice_key] = "voice_only"
             self._save_voice_modes()
             if adapter:
                 self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=False)
@@ -5678,13 +5940,13 @@ class GatewayRunner:
                 "Use /voice tts to get voice replies for all messages."
             )
         elif args in ("off", "disable"):
-            self._voice_mode[chat_id] = "off"
+            self._voice_mode[voice_key] = "off"
             self._save_voice_modes()
             if adapter:
                 self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=True)
             return "Voice mode disabled. Text-only replies."
         elif args == "tts":
-            self._voice_mode[chat_id] = "all"
+            self._voice_mode[voice_key] = "all"
             self._save_voice_modes()
             if adapter:
                 self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=False)
@@ -5697,7 +5959,7 @@ class GatewayRunner:
         elif args == "leave":
             return await self._handle_voice_channel_leave(event)
         elif args == "status":
-            mode = self._voice_mode.get(chat_id, "off")
+            mode = self._voice_mode.get(voice_key, "off")
             labels = {
                 "off": "Off (text only)",
                 "voice_only": "On (voice reply to voice messages)",
@@ -5721,15 +5983,15 @@ class GatewayRunner:
             return f"Voice mode: {labels.get(mode, mode)}"
         else:
             # Toggle: off → on, on/all → off
-            current = self._voice_mode.get(chat_id, "off")
+            current = self._voice_mode.get(voice_key, "off")
             if current == "off":
-                self._voice_mode[chat_id] = "voice_only"
+                self._voice_mode[voice_key] = "voice_only"
                 self._save_voice_modes()
                 if adapter:
                     self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=False)
                 return "Voice mode enabled."
             else:
-                self._voice_mode[chat_id] = "off"
+                self._voice_mode[voice_key] = "off"
                 self._save_voice_modes()
                 if adapter:
                     self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=True)
@@ -5775,7 +6037,7 @@ class GatewayRunner:
             adapter._voice_text_channels[guild_id] = int(event.source.chat_id)
             if hasattr(adapter, "_voice_sources"):
                 adapter._voice_sources[guild_id] = event.source.to_dict()
-            self._voice_mode[event.source.chat_id] = "all"
+            self._voice_mode[self._voice_key(event.source.platform, event.source.chat_id)] = "all"
             self._save_voice_modes()
             self._set_adapter_auto_tts_disabled(adapter, event.source.chat_id, disabled=False)
             return (
@@ -5802,7 +6064,7 @@ class GatewayRunner:
         except Exception as e:
             logger.warning("Error leaving voice channel: %s", e)
         # Always clean up state even if leave raised an exception
-        self._voice_mode[event.source.chat_id] = "off"
+        self._voice_mode[self._voice_key(event.source.platform, event.source.chat_id)] = "off"
         self._save_voice_modes()
         self._set_adapter_auto_tts_disabled(adapter, event.source.chat_id, disabled=True)
         if hasattr(adapter, "_voice_input_callback"):
@@ -5814,7 +6076,7 @@ class GatewayRunner:
 
         Cleans up runner-side voice_mode state that the adapter cannot reach.
         """
-        self._voice_mode[chat_id] = "off"
+        self._voice_mode[self._voice_key(Platform.DISCORD, chat_id)] = "off"
         self._save_voice_modes()
         adapter = self.adapters.get(Platform.DISCORD)
         self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=True)
@@ -5900,7 +6162,7 @@ class GatewayRunner:
             return False
 
         chat_id = event.source.chat_id
-        voice_mode = self._voice_mode.get(chat_id, "off")
+        voice_mode = self._voice_mode.get(self._voice_key(event.source.platform, chat_id), "off")
         is_voice_input = (event.message_type == MessageType.VOICE)
 
         should = (
@@ -6213,6 +6475,11 @@ class GatewayRunner:
                     session_id=task_id,
                     platform=platform_key,
                     user_id=source.user_id,
+                    user_name=source.user_name,
+                    chat_id=source.chat_id,
+                    chat_name=source.chat_name,
+                    chat_type=source.chat_type,
+                    thread_id=source.thread_id,
                     session_db=self._session_db,
                     fallback_model=self._fallback_model,
                 )
@@ -6973,6 +7240,7 @@ class GatewayRunner:
                     tool_calls=msg.get("tool_calls"),
                     tool_call_id=msg.get("tool_call_id"),
                     reasoning=msg.get("reasoning"),
+                    reasoning_content=msg.get("reasoning_content"),
                 )
             except Exception:
                 pass  # Best-effort copy
@@ -7021,6 +7289,38 @@ class GatewayRunner:
                     if cached:
                         agent = cached[0]
 
+        # Resolve provider/base_url/api_key for the account-usage fetch.
+        # Prefer the live agent; fall back to persisted billing data on the
+        # SessionDB row so `/usage` still returns account info between turns
+        # when no agent is resident.
+        provider = getattr(agent, "provider", None) if agent and agent is not _AGENT_PENDING_SENTINEL else None
+        base_url = getattr(agent, "base_url", None) if agent and agent is not _AGENT_PENDING_SENTINEL else None
+        api_key = getattr(agent, "api_key", None) if agent and agent is not _AGENT_PENDING_SENTINEL else None
+        if not provider and getattr(self, "_session_db", None) is not None:
+            try:
+                _entry_for_billing = self.session_store.get_or_create_session(source)
+                persisted = self._session_db.get_session(_entry_for_billing.session_id) or {}
+            except Exception:
+                persisted = {}
+            provider = provider or persisted.get("billing_provider")
+            base_url = base_url or persisted.get("billing_base_url")
+
+        # Fetch account usage off the event loop so slow provider APIs don't
+        # block the gateway. Failures are non-fatal -- account_lines stays [].
+        account_lines: list[str] = []
+        if provider:
+            try:
+                account_snapshot = await asyncio.to_thread(
+                    fetch_account_usage,
+                    provider,
+                    base_url=base_url,
+                    api_key=api_key,
+                )
+            except Exception:
+                account_snapshot = None
+            if account_snapshot:
+                account_lines = render_account_usage_lines(account_snapshot, markdown=True)
+
         if agent and hasattr(agent, "session_total_tokens") and agent.session_api_calls > 0:
             lines = []
 
@@ -7078,6 +7378,10 @@ class GatewayRunner:
             if ctx.compression_count:
                 lines.append(f"Compressions: {ctx.compression_count}")
 
+            if account_lines:
+                lines.append("")
+                lines.extend(account_lines)
+
             return "\n".join(lines)
 
         # No agent at all -- check session history for a rough count
@@ -7087,23 +7391,26 @@ class GatewayRunner:
             from agent.model_metadata import estimate_messages_tokens_rough
             msgs = [m for m in history if m.get("role") in ("user", "assistant") and m.get("content")]
             approx = estimate_messages_tokens_rough(msgs)
-            return (
-                f"📊 **Session Info**\n"
-                f"Messages: {len(msgs)}\n"
-                f"Estimated context: ~{approx:,} tokens\n"
-                f"_(Detailed usage available after the first agent response)_"
-            )
+            lines = [
+                "📊 **Session Info**",
+                f"Messages: {len(msgs)}",
+                f"Estimated context: ~{approx:,} tokens",
+                "_(Detailed usage available after the first agent response)_",
+            ]
+            if account_lines:
+                lines.append("")
+                lines.extend(account_lines)
+            return "\n".join(lines)
+        if account_lines:
+            return "\n".join(account_lines)
         return "No usage data available for this session."
 
     async def _handle_insights_command(self, event: MessageEvent) -> str:
         """Handle /insights command -- show usage insights and analytics."""
-        import asyncio as _asyncio
-
         args = event.get_command_args().strip()
 
         # Normalize Unicode dashes (Telegram/iOS auto-converts -- to em/en dash)
-        import re as _re
-        args = _re.sub(r'[\u2012\u2013\u2014\u2015](days|source)', r'--\1', args)
+        args = re.sub(r'[\u2012\u2013\u2014\u2015](days|source)', r'--\1', args)
 
         days = 30
         source = None
@@ -7132,7 +7439,7 @@ class GatewayRunner:
             from hermes_state import SessionDB
             from agent.insights import InsightsEngine
 
-            loop = _asyncio.get_running_loop()
+            loop = asyncio.get_running_loop()
 
             def _run_insights():
                 db = SessionDB()
@@ -7490,9 +7797,6 @@ class GatewayRunner:
         the messenger.  The user's next message is intercepted by
         ``_handle_message`` and written to ``.update_response``.
         """
-        import json
-        import re as _re
-
         pending_path = _hermes_home / ".update_pending.json"
         claimed_path = _hermes_home / ".update_pending.claimed.json"
         output_path = _hermes_home / ".update_output.txt"
@@ -7537,7 +7841,7 @@ class GatewayRunner:
             return
 
         def _strip_ansi(text: str) -> str:
-            return _re.sub(r'\x1b\[[0-9;]*[A-Za-z]', '', text)
+            return re.sub(r'\x1b\[[0-9;]*[A-Za-z]', '', text)
 
         bytes_sent = 0
         last_stream_time = loop.time()
@@ -7685,9 +7989,6 @@ class GatewayRunner:
         cannot resolve the adapter (e.g. after a gateway restart where the
         platform hasn't reconnected yet).
         """
-        import json
-        import re as _re
-
         pending_path = _hermes_home / ".update_pending.json"
         claimed_path = _hermes_home / ".update_pending.claimed.json"
         output_path = _hermes_home / ".update_output.txt"
@@ -7733,7 +8034,7 @@ class GatewayRunner:
 
             if adapter and chat_id:
                 # Strip ANSI escape codes for clean display
-                output = _re.sub(r'\x1b\[[0-9;]*m', '', output).strip()
+                output = re.sub(r'\x1b\[[0-9;]*m', '', output).strip()
                 if output:
                     if len(output) > 3500:
                         output = "…" + output[-3500:]
@@ -7766,14 +8067,12 @@ class GatewayRunner:
 
     async def _send_restart_notification(self) -> None:
         """Notify the chat that initiated /restart that the gateway is back."""
-        import json as _json
-
         notify_path = _hermes_home / ".restart_notify.json"
         if not notify_path.exists():
             return
 
         try:
-            data = _json.loads(notify_path.read_text())
+            data = json.loads(notify_path.read_text())
             platform_str = data.get("platform")
             chat_id = data.get("chat_id")
             thread_id = data.get("thread_id")
@@ -7859,7 +8158,6 @@ class GatewayRunner:
             The enriched message string with vision descriptions prepended.
         """
         from tools.vision_tools import vision_analyze_tool
-        import json as _json
 
         analysis_prompt = (
             "Describe everything visible in this image in thorough detail. "
@@ -7875,7 +8173,7 @@ class GatewayRunner:
                     image_url=path,
                     user_prompt=analysis_prompt,
                 )
-                result = _json.loads(result_json)
+                result = json.loads(result_json)
                 if result.get("success"):
                     description = result.get("analysis", "")
                     enriched_parts.append(
@@ -7934,7 +8232,6 @@ class GatewayRunner:
             return disabled_note
 
         from tools.transcription_tools import transcribe_audio
-        import asyncio
 
         enriched_parts = []
         for path in audio_paths:
@@ -8070,7 +8367,6 @@ class GatewayRunner:
         if not adapter:
             return
         try:
-            from gateway.platforms.base import MessageEvent, MessageType
             synth_event = MessageEvent(
                 text=synth_text,
                 message_type=MessageType.TEXT,
@@ -8175,7 +8471,6 @@ class GatewayRunner:
                             break
                     if adapter and source.chat_id:
                         try:
-                            from gateway.platforms.base import MessageEvent, MessageType
                             synth_event = MessageEvent(
                                 text=synth_text,
                                 message_type=MessageType.TEXT,
@@ -8333,6 +8628,84 @@ class GatewayRunner:
         if hasattr(self, "_busy_ack_ts"):
             self._busy_ack_ts.pop(session_key, None)
 
+    def _begin_session_run_generation(self, session_key: str) -> int:
+        """Claim a fresh run generation token for ``session_key``.
+
+        Every top-level gateway turn gets a monotonically increasing token.
+        If a later command like /stop or /new invalidates that token while the
+        old worker is still unwinding, the late result can be recognized and
+        dropped instead of bleeding into the fresh session.
+        """
+        if not session_key:
+            return 0
+        generations = self.__dict__.get("_session_run_generation")
+        if generations is None:
+            generations = {}
+            self._session_run_generation = generations
+        next_generation = int(generations.get(session_key, 0)) + 1
+        generations[session_key] = next_generation
+        return next_generation
+
+    def _invalidate_session_run_generation(self, session_key: str, *, reason: str = "") -> int:
+        """Invalidate any in-flight run token for ``session_key``."""
+        generation = self._begin_session_run_generation(session_key)
+        if reason:
+            logger.info(
+                "Invalidated run generation for %s → %d (%s)",
+                session_key[:20],
+                generation,
+                reason,
+            )
+        return generation
+
+    def _is_session_run_current(self, session_key: str, generation: int) -> bool:
+        """Return True when ``generation`` is still current for ``session_key``."""
+        if not session_key:
+            return True
+        generations = self.__dict__.get("_session_run_generation") or {}
+        return int(generations.get(session_key, 0)) == int(generation)
+
+    def _bind_adapter_run_generation(
+        self,
+        adapter: Any,
+        session_key: str,
+        generation: int | None,
+    ) -> None:
+        """Bind a gateway run generation to the adapter's active-session event."""
+        if not adapter or not session_key or generation is None:
+            return
+        try:
+            interrupt_event = getattr(adapter, "_active_sessions", {}).get(session_key)
+            if interrupt_event is not None:
+                setattr(interrupt_event, "_hermes_run_generation", int(generation))
+        except Exception:
+            pass
+
+    async def _interrupt_and_clear_session(
+        self,
+        session_key: str,
+        source: SessionSource,
+        *,
+        interrupt_reason: str,
+        invalidation_reason: str,
+        release_running_state: bool = True,
+    ) -> None:
+        """Interrupt the current run and clear queued session state consistently."""
+        if not session_key:
+            return
+        running_agent = self._running_agents.get(session_key)
+        if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
+            running_agent.interrupt(interrupt_reason)
+        self._invalidate_session_run_generation(session_key, reason=invalidation_reason)
+        adapter = self.adapters.get(source.platform)
+        if adapter and hasattr(adapter, "interrupt_session_activity"):
+            await adapter.interrupt_session_activity(session_key, source.chat_id)
+        if adapter and hasattr(adapter, "get_pending_message"):
+            adapter.get_pending_message(session_key)  # consume and discard
+        self._pending_messages.pop(session_key, None)
+        if release_running_state:
+            self._release_running_agent_state(session_key)
+
     def _evict_cached_agent(self, session_key: str) -> None:
         """Remove a cached agent for a session (called on /new, /model, etc)."""
         _lock = getattr(self, "_agent_cache_lock", None)
@@ -8514,6 +8887,7 @@ class GatewayRunner:
         source: "SessionSource",
         session_id: str,
         session_key: str = None,
+        run_generation: Optional[int] = None,
         event_message_id: Optional[str] = None,
     ) -> Dict[str, Any]:
         """Forward the message to a remote Hermes API server instead of
@@ -8549,6 +8923,11 @@ class GatewayRunner:
 
         proxy_key = os.getenv("GATEWAY_PROXY_KEY", "").strip()
 
+        def _run_still_current() -> bool:
+            if run_generation is None or not session_key:
+                return True
+            return self._is_session_run_current(session_key, run_generation)
+
         # Build messages in OpenAI chat format --------------------------
         #
         # The remote api_server can maintain session continuity via
@@ -8613,7 +8992,6 @@ class GatewayRunner:
         if _streaming_enabled:
             try:
                 from gateway.stream_consumer import GatewayStreamConsumer, StreamConsumerConfig
-                from gateway.config import Platform
                 _adapter = self.adapters.get(source.platform)
                 if _adapter:
                     _adapter_supports_edit = getattr(_adapter, "SUPPORTS_MESSAGE_EDITING", True)
@@ -8678,6 +9056,21 @@ class GatewayRunner:
                     # Parse SSE stream
                     buffer = ""
                     async for chunk in resp.content.iter_any():
+                        if not _run_still_current():
+                            logger.info(
+                                "Discarding stale proxy stream for %s — generation %d is no longer current",
+                                session_key[:20] if session_key else "?",
+                                run_generation or 0,
+                            )
+                            return {
+                                "final_response": "",
+                                "messages": [],
+                                "api_calls": 0,
+                                "tools": [],
+                                "history_offset": len(history),
+                                "session_id": session_id,
+                                "response_previewed": False,
+                            }
                         text = chunk.decode("utf-8", errors="replace")
                         buffer += text
 
@@ -8727,6 +9120,21 @@ class GatewayRunner:
                     stream_task.cancel()
 
         _elapsed = time.time() - _start
+        if not _run_still_current():
+            logger.info(
+                "Discarding stale proxy result for %s — generation %d is no longer current",
+                session_key[:20] if session_key else "?",
+                run_generation or 0,
+            )
+            return {
+                "final_response": "",
+                "messages": [],
+                "api_calls": 0,
+                "tools": [],
+                "history_offset": len(history),
+                "session_id": session_id,
+                "response_previewed": False,
+            }
         logger.info(
             "proxy response: url=%s session=%s time=%.1fs response=%d chars",
             proxy_url, (session_id or "")[:20], _elapsed, len(full_response),
@@ -8755,6 +9163,7 @@ class GatewayRunner:
         source: SessionSource,
         session_id: str,
         session_key: str = None,
+        run_generation: Optional[int] = None,
         _interrupt_depth: int = 0,
         event_message_id: Optional[str] = None,
         channel_prompt: Optional[str] = None,
@@ -8780,11 +9189,17 @@ class GatewayRunner:
                 source=source,
                 session_id=session_id,
                 session_key=session_key,
+                run_generation=run_generation,
                 event_message_id=event_message_id,
             )
 
         from run_agent import AIAgent
         import queue
+
+        def _run_still_current() -> bool:
+            if run_generation is None or not session_key:
+                return True
+            return self._is_session_run_current(session_key, run_generation)
         
         user_config = _load_gateway_config()
         platform_key = _platform_config_key(source.platform)
@@ -8839,7 +9254,7 @@ class GatewayRunner:
         
         def progress_callback(event_type: str, tool_name: str = None, preview: str = None, args: dict = None, **kwargs):
             """Callback invoked by agent on tool lifecycle events."""
-            if not progress_queue:
+            if not progress_queue or not _run_still_current():
                 return
 
             # Only act on tool.started events (ignore tool.completed, reasoning.available, etc.)
@@ -8860,8 +9275,7 @@ class GatewayRunner:
                 if args:
                     from agent.display import get_tool_preview_max_len
                     _pl = get_tool_preview_max_len()
-                    import json as _json
-                    args_str = _json.dumps(args, ensure_ascii=False, default=str)
+                    args_str = json.dumps(args, ensure_ascii=False, default=str)
                     # When tool_preview_length is 0 (default), don't truncate
                     # in verbose mode — the user explicitly asked for full
                     # detail.  Platform message-length limits handle the rest.
@@ -8927,8 +9341,7 @@ class GatewayRunner:
             # Skip tool progress for platforms that don't support message
             # editing (e.g. iMessage/BlueBubbles) — each progress update
             # would become a separate message bubble, which is noisy.
-            from gateway.platforms.base import BasePlatformAdapter as _BaseAdapter
-            if type(adapter).edit_message is _BaseAdapter.edit_message:
+            if type(adapter).edit_message is BasePlatformAdapter.edit_message:
                 while not progress_queue.empty():
                     try:
                         progress_queue.get_nowait()
@@ -8944,6 +9357,14 @@ class GatewayRunner:
 
             while True:
                 try:
+                    if not _run_still_current():
+                        while not progress_queue.empty():
+                            try:
+                                progress_queue.get_nowait()
+                            except Exception:
+                                break
+                        return
+
                     raw = progress_queue.get_nowait()
 
                     # Handle dedup messages: update last line with repeat counter
@@ -8969,6 +9390,9 @@ class GatewayRunner:
                         await asyncio.sleep(_remaining)
                         continue
 
+                    if not _run_still_current():
+                        return
+
                     if can_edit and progress_msg_id is not None:
                         # Try to edit the existing progress message
                         full_text = "\n".join(progress_lines)
@@ -9004,7 +9428,8 @@ class GatewayRunner:
 
                     # Restore typing indicator
                     await asyncio.sleep(0.3)
-                    await adapter.send_typing(source.chat_id, metadata=_progress_metadata)
+                    if _run_still_current():
+                        await adapter.send_typing(source.chat_id, metadata=_progress_metadata)
 
                 except queue.Empty:
                     await asyncio.sleep(0.3)
@@ -9048,6 +9473,8 @@ class GatewayRunner:
         _hooks_ref = self.hooks
 
         def _step_callback_sync(iteration: int, prev_tools: list) -> None:
+            if not _run_still_current():
+                return
             try:
                 # prev_tools may be list[str] or list[dict] with "name"/"result"
                 # keys.  Normalise to keep "tool_names" backward-compatible for
@@ -9078,7 +9505,7 @@ class GatewayRunner:
         _status_thread_metadata = {"thread_id": _progress_thread_id} if _progress_thread_id else None
 
         def _status_callback_sync(event_type: str, message: str) -> None:
-            if not _status_adapter:
+            if not _status_adapter or not _run_still_current():
                 return
             try:
                 asyncio.run_coroutine_threadsafe(
@@ -9209,12 +9636,16 @@ class GatewayRunner:
                             metadata={"thread_id": _progress_thread_id} if _progress_thread_id else None,
                         )
                         if _want_stream_deltas:
-                            _stream_delta_cb = _stream_consumer.on_delta
+                            def _stream_delta_cb(text: str) -> None:
+                                if _run_still_current():
+                                    _stream_consumer.on_delta(text)
                         stream_consumer_holder[0] = _stream_consumer
                 except Exception as _sc_err:
                     logger.debug("Could not set up stream consumer: %s", _sc_err)
 
             def _interim_assistant_cb(text: str, *, already_streamed: bool = False) -> None:
+                if not _run_still_current():
+                    return
                 if _stream_consumer is not None:
                     if already_streamed:
                         _stream_consumer.on_segment_break()
@@ -9292,6 +9723,11 @@ class GatewayRunner:
                     session_id=session_id,
                     platform=platform_key,
                     user_id=source.user_id,
+                    user_name=source.user_name,
+                    chat_id=source.chat_id,
+                    chat_name=source.chat_name,
+                    chat_type=source.chat_type,
+                    thread_id=source.thread_id,
                     gateway_session_key=session_key,
                     session_db=self._session_db,
                     fallback_model=self._fallback_model,
@@ -9318,7 +9754,7 @@ class GatewayRunner:
             _bg_review_pending_lock = threading.Lock()
 
             def _deliver_bg_review_message(message: str) -> None:
-                if not _status_adapter:
+                if not _status_adapter or not _run_still_current():
                     return
                 try:
                     asyncio.run_coroutine_threadsafe(
@@ -9342,7 +9778,7 @@ class GatewayRunner:
 
             # Background review delivery — send "💾 Memory updated" etc. to user
             def _bg_review_send(message: str) -> None:
-                if not _status_adapter:
+                if not _status_adapter or not _run_still_current():
                     return
                 if not _bg_review_release.is_set():
                     with _bg_review_pending_lock:
@@ -9355,9 +9791,16 @@ class GatewayRunner:
             # Register the release hook on the adapter so base.py's finally
             # block can fire it after delivering the main response.
             if _status_adapter and session_key:
-                _pdc = getattr(_status_adapter, "_post_delivery_callbacks", None)
-                if _pdc is not None:
-                    _pdc[session_key] = _release_bg_review_messages
+                if getattr(type(_status_adapter), "register_post_delivery_callback", None) is not None:
+                    _status_adapter.register_post_delivery_callback(
+                        session_key,
+                        _release_bg_review_messages,
+                        generation=run_generation,
+                    )
+                else:
+                    _pdc = getattr(_status_adapter, "_post_delivery_callbacks", None)
+                    if _pdc is not None:
+                        _pdc[session_key] = _release_bg_review_messages
 
             # Store agent reference for interrupt support
             agent_holder[0] = agent
@@ -9959,7 +10402,7 @@ class GatewayRunner:
                 # Interrupt the agent if it's still running so the thread
                 # pool worker is freed.
                 if _timed_out_agent and hasattr(_timed_out_agent, "interrupt"):
-                    _timed_out_agent.interrupt("Execution timed out (inactivity)")
+                    _timed_out_agent.interrupt(_INTERRUPT_REASON_TIMEOUT)
 
                 _timeout_mins = int(_agent_timeout // 60) or 1
 
@@ -10024,11 +10467,29 @@ class GatewayRunner:
             if result and adapter and session_key:
                 pending_event = _dequeue_pending_event(adapter, session_key)
                 if result.get("interrupted") and not pending_event and result.get("interrupt_message"):
-                    pending = result.get("interrupt_message")
+                    interrupt_message = result.get("interrupt_message")
+                    if _is_control_interrupt_message(interrupt_message):
+                        logger.info(
+                            "Ignoring control interrupt message for session %s: %s",
+                            session_key[:20] if session_key else "?",
+                            interrupt_message,
+                        )
+                    else:
+                        pending = interrupt_message
                 elif pending_event:
                     pending = pending_event.text or _build_media_placeholder(pending_event)
                     logger.debug("Processing queued message after agent completion: '%s...'", pending[:40])
 
+            # Leftover /steer: if a steer arrived after the last tool batch
+            # (e.g. during the final API call), the agent couldn't inject it
+            # and returned it in result["pending_steer"]. Deliver it as the
+            # next user turn so it isn't silently dropped.
+            if result and not pending and not pending_event:
+                _leftover_steer = result.get("pending_steer")
+                if _leftover_steer:
+                    pending = _leftover_steer
+                    logger.debug("Delivering leftover /steer as next turn: '%s...'", pending[:40])
+
             # Safety net: if the pending text is a slash command (e.g. "/stop",
             # "/new"), discard it — commands should never be passed to the agent
             # as user input.  The primary fix is in base.py (commands bypass the
@@ -10129,7 +10590,17 @@ class GatewayRunner:
                     # first response has been delivered.  Pop from the
                     # adapter's callback dict (prevents double-fire in
                     # base.py's finally block) and call it.
-                    if adapter and hasattr(adapter, "_post_delivery_callbacks"):
+                    if getattr(type(adapter), "pop_post_delivery_callback", None) is not None:
+                        _bg_cb = adapter.pop_post_delivery_callback(
+                            session_key,
+                            generation=run_generation,
+                        )
+                        if callable(_bg_cb):
+                            try:
+                                _bg_cb()
+                            except Exception:
+                                pass
+                    elif adapter and hasattr(adapter, "_post_delivery_callbacks"):
                         _bg_cb = adapter._post_delivery_callbacks.pop(session_key, None)
                         if callable(_bg_cb):
                             try:
@@ -10177,6 +10648,7 @@ class GatewayRunner:
                     source=next_source,
                     session_id=session_id,
                     session_key=session_key,
+                    run_generation=run_generation,
                     _interrupt_depth=_interrupt_depth + 1,
                     event_message_id=next_message_id,
                     channel_prompt=next_channel_prompt,
@@ -10322,7 +10794,6 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
     # The PID file is scoped to HERMES_HOME, so future multi-profile
     # setups (each profile using a distinct HERMES_HOME) will naturally
     # allow concurrent instances without tripping this guard.
-    import time as _time
     from gateway.status import get_running_pid, remove_pid_file, terminate_pid
     existing_pid = get_running_pid()
     if existing_pid is not None and existing_pid != os.getpid():
@@ -10362,7 +10833,7 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
             for _ in range(20):
                 try:
                     os.kill(existing_pid, 0)
-                    _time.sleep(0.5)
+                    time.sleep(0.5)
                 except (ProcessLookupError, PermissionError):
                     break  # Process is gone
             else:
@@ -10373,10 +10844,16 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
                 )
                 try:
                     terminate_pid(existing_pid, force=True)
-                    _time.sleep(0.5)
+                    time.sleep(0.5)
                 except (ProcessLookupError, PermissionError, OSError):
                     pass
             remove_pid_file()
+            # remove_pid_file() is a no-op when the PID doesn't match.
+            # Force-unlink to cover the old-process-crashed case.
+            try:
+                (get_hermes_home() / "gateway.pid").unlink(missing_ok=True)
+            except Exception:
+                pass
             # Clean up any takeover marker the old process didn't consume
             # (e.g. SIGKILL'd before its shutdown handler could read it).
             try:
@@ -10515,6 +10992,30 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
     else:
         logger.info("Skipping signal handlers (not running in main thread).")
     
+    # Claim the PID file BEFORE bringing up any platform adapters.
+    # This closes the --replace race window: two concurrent `gateway run
+    # --replace` invocations both pass the termination-wait above, but
+    # only the winner of the O_CREAT|O_EXCL race below will ever open
+    # Telegram polling, Discord gateway sockets, etc. The loser exits
+    # cleanly before touching any external service.
+    import atexit
+    from gateway.status import write_pid_file, remove_pid_file, get_running_pid
+    _current_pid = get_running_pid()
+    if _current_pid is not None and _current_pid != os.getpid():
+        logger.error(
+            "Another gateway instance (PID %d) started during our startup. "
+            "Exiting to avoid double-running.", _current_pid
+        )
+        return False
+    try:
+        write_pid_file()
+    except FileExistsError:
+        logger.error(
+            "PID file race lost to another gateway instance. Exiting."
+        )
+        return False
+    atexit.register(remove_pid_file)
+
     # Start the gateway
     success = await runner.start()
     if not success:
@@ -10524,12 +11025,6 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
             logger.error("Gateway exiting cleanly: %s", runner.exit_reason)
         return True
     
-    # Write PID file so CLI can detect gateway is running
-    import atexit
-    from gateway.status import write_pid_file, remove_pid_file
-    write_pid_file()
-    atexit.register(remove_pid_file)
-    
     # Start background cron ticker so scheduled jobs fire automatically.
     # Pass the event loop so cron delivery can use live adapters (E2EE support).
     cron_stop = threading.Event()
diff --git a/gateway/session.py b/gateway/session.py
index 8b31c2b0aa..ea3f174909 100644
--- a/gateway/session.py
+++ b/gateway/session.py
@@ -152,6 +152,7 @@ class SessionContext:
     source: SessionSource
     connected_platforms: List[Platform]
     home_channels: Dict[Platform, HomeChannel]
+    shared_multi_user_session: bool = False
     
     # Session metadata
     session_key: str = ""
@@ -166,6 +167,7 @@ class SessionContext:
             "home_channels": {
                 p.value: hc.to_dict() for p, hc in self.home_channels.items()
             },
+            "shared_multi_user_session": self.shared_multi_user_session,
             "session_key": self.session_key,
             "session_id": self.session_id,
             "created_at": self.created_at.isoformat() if self.created_at else None,
@@ -240,18 +242,16 @@ def build_session_context_prompt(
         lines.append(f"**Channel Topic:** {context.source.chat_topic}")
 
     # User identity.
-    # In shared thread sessions (non-DM with thread_id), multiple users
-    # contribute to the same conversation.  Don't pin a single user name
-    # in the system prompt — it changes per-turn and would bust the prompt
-    # cache.  Instead, note that this is a multi-user thread; individual
-    # sender names are prefixed on each user message by the gateway.
-    _is_shared_thread = (
-        context.source.chat_type != "dm"
-        and context.source.thread_id
-    )
-    if _is_shared_thread:
+    # In shared multi-user sessions (shared threads OR shared non-thread groups
+    # when group_sessions_per_user=False), multiple users contribute to the same
+    # conversation.  Don't pin a single user name in the system prompt — it
+    # changes per-turn and would bust the prompt cache.  Instead, note that
+    # this is a multi-user session; individual sender names are prefixed on
+    # each user message by the gateway.
+    if context.shared_multi_user_session:
+        session_label = "Multi-user thread" if context.source.thread_id else "Multi-user session"
         lines.append(
-            "**Session type:** Multi-user thread — messages are prefixed "
+            f"**Session type:** {session_label} — messages are prefixed "
             "with [sender name]. Multiple users may participate."
         )
     elif context.source.user_name:
@@ -467,6 +467,27 @@ class SessionEntry:
         )
 
 
+def is_shared_multi_user_session(
+    source: SessionSource,
+    *,
+    group_sessions_per_user: bool = True,
+    thread_sessions_per_user: bool = False,
+) -> bool:
+    """Return True when a non-DM session is shared across participants.
+
+    Mirrors the isolation rules in :func:`build_session_key`:
+      - DMs are never shared.
+      - Threads are shared unless ``thread_sessions_per_user`` is True.
+      - Non-thread group/channel sessions are shared unless
+        ``group_sessions_per_user`` is True (default: True = isolated).
+    """
+    if source.chat_type == "dm":
+        return False
+    if source.thread_id:
+        return not thread_sessions_per_user
+    return not group_sessions_per_user
+
+
 def build_session_key(
     source: SessionSource,
     group_sessions_per_user: bool = True,
@@ -926,12 +947,18 @@ class SessionStore:
                     continue
                 # Never prune sessions with an active background process
                 # attached — the user may still be waiting on output.
+                # The callback is keyed by session_key (see process_registry.
+                # has_active_for_session); passing session_id here used to
+                # never match, so active sessions got pruned anyway.
                 if self._has_active_processes_fn is not None:
                     try:
-                        if self._has_active_processes_fn(entry.session_id):
+                        if self._has_active_processes_fn(entry.session_key):
                             continue
-                    except Exception:
-                        pass
+                    except Exception as exc:
+                        logger.debug(
+                            "has_active_processes_fn raised during prune for %s: %s",
+                            entry.session_key, exc,
+                        )
                 if entry.updated_at < cutoff:
                     removed_keys.append(key)
             for key in removed_keys:
@@ -1120,6 +1147,10 @@ class SessionStore:
                     tool_name=message.get("tool_name"),
                     tool_calls=message.get("tool_calls"),
                     tool_call_id=message.get("tool_call_id"),
+                    reasoning=message.get("reasoning") if message.get("role") == "assistant" else None,
+                    reasoning_content=message.get("reasoning_content") if message.get("role") == "assistant" else None,
+                    reasoning_details=message.get("reasoning_details") if message.get("role") == "assistant" else None,
+                    codex_reasoning_items=message.get("codex_reasoning_items") if message.get("role") == "assistant" else None,
                 )
             except Exception as e:
                 logger.debug("Session DB operation failed: %s", e)
@@ -1149,6 +1180,7 @@ class SessionStore:
                         tool_calls=msg.get("tool_calls"),
                         tool_call_id=msg.get("tool_call_id"),
                         reasoning=msg.get("reasoning") if role == "assistant" else None,
+                        reasoning_content=msg.get("reasoning_content") if role == "assistant" else None,
                         reasoning_details=msg.get("reasoning_details") if role == "assistant" else None,
                         codex_reasoning_items=msg.get("codex_reasoning_items") if role == "assistant" else None,
                     )
@@ -1232,6 +1264,11 @@ def build_session_context(
         source=source,
         connected_platforms=connected,
         home_channels=home_channels,
+        shared_multi_user_session=is_shared_multi_user_session(
+            source,
+            group_sessions_per_user=getattr(config, "group_sessions_per_user", True),
+            thread_sessions_per_user=getattr(config, "thread_sessions_per_user", False),
+        ),
     )
     
     if session_entry:
diff --git a/gateway/session_context.py b/gateway/session_context.py
index 7f8aca3eb9..9dc051e3a2 100644
--- a/gateway/session_context.py
+++ b/gateway/session_context.py
@@ -56,6 +56,12 @@ _SESSION_USER_ID: ContextVar = ContextVar("HERMES_SESSION_USER_ID", default=_UNS
 _SESSION_USER_NAME: ContextVar = ContextVar("HERMES_SESSION_USER_NAME", default=_UNSET)
 _SESSION_KEY: ContextVar = ContextVar("HERMES_SESSION_KEY", default=_UNSET)
 
+# Cron auto-delivery vars — set per-job in run_job() so concurrent jobs
+# don't clobber each other's delivery targets.
+_CRON_AUTO_DELIVER_PLATFORM: ContextVar = ContextVar("HERMES_CRON_AUTO_DELIVER_PLATFORM", default=_UNSET)
+_CRON_AUTO_DELIVER_CHAT_ID: ContextVar = ContextVar("HERMES_CRON_AUTO_DELIVER_CHAT_ID", default=_UNSET)
+_CRON_AUTO_DELIVER_THREAD_ID: ContextVar = ContextVar("HERMES_CRON_AUTO_DELIVER_THREAD_ID", default=_UNSET)
+
 _VAR_MAP = {
     "HERMES_SESSION_PLATFORM": _SESSION_PLATFORM,
     "HERMES_SESSION_CHAT_ID": _SESSION_CHAT_ID,
@@ -64,6 +70,9 @@ _VAR_MAP = {
     "HERMES_SESSION_USER_ID": _SESSION_USER_ID,
     "HERMES_SESSION_USER_NAME": _SESSION_USER_NAME,
     "HERMES_SESSION_KEY": _SESSION_KEY,
+    "HERMES_CRON_AUTO_DELIVER_PLATFORM": _CRON_AUTO_DELIVER_PLATFORM,
+    "HERMES_CRON_AUTO_DELIVER_CHAT_ID": _CRON_AUTO_DELIVER_CHAT_ID,
+    "HERMES_CRON_AUTO_DELIVER_THREAD_ID": _CRON_AUTO_DELIVER_THREAD_ID,
 }
 
 
diff --git a/gateway/status.py b/gateway/status.py
index e1598e1797..74763332c8 100644
--- a/gateway/status.py
+++ b/gateway/status.py
@@ -225,8 +225,28 @@ def _cleanup_invalid_pid_path(pid_path: Path, *, cleanup_stale: bool) -> None:
 
 
 def write_pid_file() -> None:
-    """Write the current process PID and metadata to the gateway PID file."""
-    _write_json_file(_get_pid_path(), _build_pid_record())
+    """Write the current process PID and metadata to the gateway PID file.
+
+    Uses atomic O_CREAT | O_EXCL creation so that concurrent --replace
+    invocations race: exactly one process wins and the rest get
+    FileExistsError.
+    """
+    path = _get_pid_path()
+    path.parent.mkdir(parents=True, exist_ok=True)
+    record = json.dumps(_build_pid_record())
+    try:
+        fd = os.open(path, os.O_CREAT | os.O_EXCL | os.O_WRONLY)
+    except FileExistsError:
+        raise  # Let caller decide: another gateway is racing us
+    try:
+        with os.fdopen(fd, "w", encoding="utf-8") as f:
+            f.write(record)
+    except Exception:
+        try:
+            path.unlink(missing_ok=True)
+        except OSError:
+            pass
+        raise
 
 
 def write_runtime_status(
diff --git a/gateway/stream_consumer.py b/gateway/stream_consumer.py
index ae00aee392..78e365712d 100644
--- a/gateway/stream_consumer.py
+++ b/gateway/stream_consumer.py
@@ -430,6 +430,21 @@ class GatewayStreamConsumer:
                 # a real string like "msg_1", not "__no_edit__", so that case
                 # still resets and creates a fresh segment as intended.)
                 if got_segment_break:
+                    # If the segment-break edit failed to deliver the
+                    # accumulated content (flood control that has not yet
+                    # promoted to fallback mode, or fallback mode itself),
+                    # _accumulated still holds pre-boundary text the user
+                    # never saw. Flush that tail as a continuation message
+                    # before the reset below wipes _accumulated — otherwise
+                    # text generated before the tool boundary is silently
+                    # dropped (issue #8124).
+                    if (
+                        self._accumulated
+                        and not current_update_visible
+                        and self._message_id
+                        and self._message_id != "__no_edit__"
+                    ):
+                        await self._flush_segment_tail_on_edit_failure()
                     self._reset_segment_state(preserve_no_edit=True)
 
                 await asyncio.sleep(0.05)  # Small yield to not busy-loop
@@ -556,6 +571,30 @@ class GatewayStreamConsumer:
             if final_text.strip() and final_text != self._visible_prefix():
                 continuation = final_text
             else:
+                # Defence-in-depth for #7183: the last edit may still show the
+                # cursor character because fallback mode was entered after an
+                # edit failure left it stuck.  Try one final edit to strip it
+                # so the message doesn't freeze with a visible ▉.  Best-effort
+                # — if this edit also fails (flood control still active),
+                # _try_strip_cursor has already been called on fallback entry
+                # and the adaptive-backoff retries will have had their shot.
+                if (
+                    self._message_id
+                    and self._last_sent_text
+                    and self.cfg.cursor
+                    and self._last_sent_text.endswith(self.cfg.cursor)
+                ):
+                    clean_text = self._last_sent_text[:-len(self.cfg.cursor)]
+                    try:
+                        result = await self.adapter.edit_message(
+                            chat_id=self.chat_id,
+                            message_id=self._message_id,
+                            content=clean_text,
+                        )
+                        if result.success:
+                            self._last_sent_text = clean_text
+                    except Exception:
+                        pass
                 self._already_sent = True
                 self._final_response_sent = True
                 return
@@ -620,6 +659,39 @@ class GatewayStreamConsumer:
         err_lower = err.lower()
         return "flood" in err_lower or "retry after" in err_lower or "rate" in err_lower
 
+    async def _flush_segment_tail_on_edit_failure(self) -> None:
+        """Deliver un-sent tail content before a segment-break reset.
+
+        When an edit fails (flood control, transport error) and a tool
+        boundary arrives before the next retry, ``_accumulated`` holds text
+        that was generated but never shown to the user. Without this flush,
+        the segment reset would discard that tail and leave a frozen cursor
+        in the partial message.
+
+        Sends the tail that sits after the last successfully-delivered
+        prefix as a new message, and best-effort strips the stuck cursor
+        from the previous partial message.
+        """
+        if not self._fallback_final_send:
+            await self._try_strip_cursor()
+        visible = self._fallback_prefix or self._visible_prefix()
+        tail = self._accumulated
+        if visible and tail.startswith(visible):
+            tail = tail[len(visible):].lstrip()
+        tail = self._clean_for_display(tail)
+        if not tail.strip():
+            return
+        try:
+            result = await self.adapter.send(
+                chat_id=self.chat_id,
+                content=tail,
+                metadata=self.metadata,
+            )
+            if result.success:
+                self._already_sent = True
+        except Exception as e:
+            logger.error("Segment-break tail flush error: %s", e)
+
     async def _try_strip_cursor(self) -> None:
         """Best-effort edit to remove the cursor from the last visible message.
 
diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
index 4623147a5a..3fab36a2c3 100644
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -20,6 +20,7 @@ import logging
 import os
 import shutil
 import shlex
+import ssl
 import stat
 import base64
 import hashlib
@@ -71,6 +72,8 @@ DEFAULT_QWEN_BASE_URL = "https://portal.qwen.ai/v1"
 DEFAULT_GITHUB_MODELS_BASE_URL = "https://api.githubcopilot.com"
 DEFAULT_COPILOT_ACP_BASE_URL = "acp://copilot"
 DEFAULT_OLLAMA_CLOUD_BASE_URL = "https://ollama.com/v1"
+STEPFUN_STEP_PLAN_INTL_BASE_URL = "https://api.stepfun.ai/step_plan/v1"
+STEPFUN_STEP_PLAN_CN_BASE_URL = "https://api.stepfun.com/step_plan/v1"
 CODEX_OAUTH_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann"
 CODEX_OAUTH_TOKEN_URL = "https://auth.openai.com/oauth/token"
 CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120
@@ -151,7 +154,7 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
         id="gemini",
         name="Google AI Studio",
         auth_type="api_key",
-        inference_base_url="https://generativelanguage.googleapis.com/v1beta/openai",
+        inference_base_url="https://generativelanguage.googleapis.com/v1beta",
         api_key_env_vars=("GOOGLE_API_KEY", "GEMINI_API_KEY"),
         base_url_env_var="GEMINI_BASE_URL",
     ),
@@ -167,8 +170,11 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
         id="kimi-coding",
         name="Kimi / Moonshot",
         auth_type="api_key",
+        # Legacy platform.moonshot.ai keys use this endpoint (OpenAI-compat).
+        # sk-kimi- (Kimi Code) keys are auto-redirected to api.kimi.com/coding
+        # by _resolve_kimi_base_url() below.
         inference_base_url="https://api.moonshot.ai/v1",
-        api_key_env_vars=("KIMI_API_KEY",),
+        api_key_env_vars=("KIMI_API_KEY", "KIMI_CODING_API_KEY"),
         base_url_env_var="KIMI_BASE_URL",
     ),
     "kimi-coding-cn": ProviderConfig(
@@ -178,6 +184,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
         inference_base_url="https://api.moonshot.cn/v1",
         api_key_env_vars=("KIMI_CN_API_KEY",),
     ),
+    "stepfun": ProviderConfig(
+        id="stepfun",
+        name="StepFun Step Plan",
+        auth_type="api_key",
+        inference_base_url=STEPFUN_STEP_PLAN_INTL_BASE_URL,
+        api_key_env_vars=("STEPFUN_API_KEY",),
+        base_url_env_var="STEPFUN_BASE_URL",
+    ),
     "arcee": ProviderConfig(
         id="arcee",
         name="Arcee AI",
@@ -339,10 +353,16 @@ def get_anthropic_key() -> str:
 # =============================================================================
 
 # Kimi Code (kimi.com/code) issues keys prefixed "sk-kimi-" that only work
-# on api.kimi.com/coding/v1.  Legacy keys from platform.moonshot.ai work on
-# api.moonshot.ai/v1 (the default).  Auto-detect when user hasn't set
+# on api.kimi.com/coding.  Legacy keys from platform.moonshot.ai work on
+# api.moonshot.ai/v1 (the old default).  Auto-detect when user hasn't set
 # KIMI_BASE_URL explicitly.
-KIMI_CODE_BASE_URL = "https://api.kimi.com/coding/v1"
+#
+# Note: the base URL intentionally has NO /v1 suffix.  The /coding endpoint
+# speaks the Anthropic Messages protocol, and the anthropic SDK appends
+# "/v1/messages" internally — so "/coding" + SDK suffix → "/coding/v1/messages"
+# (the correct target). Using "/coding/v1" here would produce
+# "/coding/v1/v1/messages" (a 404).
+KIMI_CODE_BASE_URL = "https://api.kimi.com/coding"
 
 
 def _resolve_kimi_base_url(api_key: str, default_url: str, env_override: str) -> str:
@@ -353,6 +373,9 @@ def _resolve_kimi_base_url(api_key: str, default_url: str, env_override: str) ->
     """
     if env_override:
         return env_override
+    # No key → nothing to infer from.  Return default without inspecting.
+    if not api_key:
+        return default_url
     if api_key.startswith("sk-kimi-"):
         return KIMI_CODE_BASE_URL
     return default_url
@@ -480,6 +503,14 @@ def _resolve_zai_base_url(api_key: str, default_url: str, env_override: str) ->
     if env_override:
         return env_override
 
+    # No API key set → don't probe (would fire N×M HTTPS requests with an
+    # empty Bearer token, all returning 401).  This path is hit during
+    # auxiliary-client auto-detection when the user has no Z.AI credentials
+    # at all — the caller discards the result immediately, so the probe is
+    # pure latency for every AIAgent construction.
+    if not api_key:
+        return default_url
+
     # Check provider-state cache for a previously-detected endpoint.
     auth_store = _load_auth_store()
     state = _load_provider_state(auth_store, "zai") or {}
@@ -971,6 +1002,7 @@ def resolve_provider(
         "x-ai": "xai", "x.ai": "xai", "grok": "xai",
         "kimi": "kimi-coding", "kimi-for-coding": "kimi-coding", "moonshot": "kimi-coding",
         "kimi-cn": "kimi-coding-cn", "moonshot-cn": "kimi-coding-cn",
+        "step": "stepfun", "stepfun-coding-plan": "stepfun",
         "arcee-ai": "arcee", "arceeai": "arcee",
         "minimax-china": "minimax-cn", "minimax_cn": "minimax-cn",
         "claude": "anthropic", "claude-code": "anthropic",
@@ -1652,7 +1684,7 @@ def _resolve_verify(
     insecure: Optional[bool] = None,
     ca_bundle: Optional[str] = None,
     auth_state: Optional[Dict[str, Any]] = None,
-) -> bool | str:
+) -> bool | ssl.SSLContext:
     tls_state = auth_state.get("tls") if isinstance(auth_state, dict) else {}
     tls_state = tls_state if isinstance(tls_state, dict) else {}
 
@@ -1672,13 +1704,12 @@ def _resolve_verify(
     if effective_ca:
         ca_path = str(effective_ca)
         if not os.path.isfile(ca_path):
-            import logging
-            logging.getLogger("hermes.auth").warning(
+            logger.warning(
                 "CA bundle path does not exist: %s — falling back to default certificates",
                 ca_path,
             )
             return True
-        return ca_path
+        return ssl.create_default_context(cafile=ca_path)
     return True
 
 
@@ -2721,6 +2752,17 @@ def _update_config_for_provider(
         # Clear stale base_url to prevent contamination when switching providers
         model_cfg.pop("base_url", None)
 
+    # Clear stale api_key/api_mode left over from a previous custom provider.
+    # When the user switches from e.g. a MiniMax custom endpoint
+    # (api_mode=anthropic_messages, api_key=mxp-...) to a built-in provider
+    # (e.g. OpenRouter), the stale api_key/api_mode would override the new
+    # provider's credentials and transport choice.  Built-in providers that
+    # need a specific api_mode (copilot, xai) set it at request-resolution
+    # time via `_copilot_runtime_api_mode` / `_detect_api_mode_for_url`, so
+    # removing the persisted value here is safe.
+    model_cfg.pop("api_key", None)
+    model_cfg.pop("api_mode", None)
+
     # When switching to a non-OpenRouter provider, ensure model.default is
     # valid for the new provider.  An OpenRouter-formatted name like
     # "anthropic/claude-opus-4.6" will fail on direct-API providers.
@@ -3353,7 +3395,7 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
                 )
 
             from hermes_cli.models import (
-                _PROVIDER_MODELS, get_pricing_for_provider, filter_nous_free_models,
+                _PROVIDER_MODELS, get_pricing_for_provider,
                 check_nous_free_tier, partition_nous_models_by_tier,
             )
             model_ids = _PROVIDER_MODELS.get("nous", [])
@@ -3362,7 +3404,6 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
             unavailable_models: list = []
             if model_ids:
                 pricing = get_pricing_for_provider("nous")
-                model_ids = filter_nous_free_models(model_ids, pricing)
                 free_tier = check_nous_free_tier()
                 if free_tier:
                     model_ids, unavailable_models = partition_nous_models_by_tier(
diff --git a/hermes_cli/auth_commands.py b/hermes_cli/auth_commands.py
index 30e5182949..9c33200107 100644
--- a/hermes_cli/auth_commands.py
+++ b/hermes_cli/auth_commands.py
@@ -152,6 +152,23 @@ def auth_add_command(args) -> None:
 
     pool = load_pool(provider)
 
+    # Clear ALL suppressions for this provider — re-adding a credential is
+    # a strong signal the user wants auth re-enabled.  This covers env:*
+    # (shell-exported vars), gh_cli (copilot), claude_code, qwen-cli,
+    # device_code (codex), etc.  One consistent re-engagement pattern.
+    # Matches the Codex device_code re-link pattern that predates this.
+    if not provider.startswith(CUSTOM_POOL_PREFIX):
+        try:
+            from hermes_cli.auth import (
+                _load_auth_store,
+                unsuppress_credential_source,
+            )
+            suppressed = _load_auth_store().get("suppressed_sources", {})
+            for src in list(suppressed.get(provider, []) or []):
+                unsuppress_credential_source(provider, src)
+        except Exception:
+            pass
+
     if requested_type == AUTH_TYPE_API_KEY:
         token = (getattr(args, "api_key", None) or "").strip()
         if not token:
@@ -338,71 +355,28 @@ def auth_remove_command(args) -> None:
         raise SystemExit(f'No credential matching "{target}" for provider {provider}.')
     print(f"Removed {provider} credential #{index} ({removed.label})")
 
-    # If this was an env-seeded credential, also clear the env var from .env
-    # so it doesn't get re-seeded on the next load_pool() call.
-    if removed.source.startswith("env:"):
-        env_var = removed.source[len("env:"):]
-        if env_var:
-            from hermes_cli.config import remove_env_value
-            cleared = remove_env_value(env_var)
-            if cleared:
-                print(f"Cleared {env_var} from .env")
+    # Unified removal dispatch.  Every credential source Hermes reads from
+    # (env vars, external OAuth files, auth.json blocks, custom config)
+    # has a RemovalStep registered in agent.credential_sources.  The step
+    # handles its source-specific cleanup and we centralise suppression +
+    # user-facing output here so every source behaves identically from
+    # the user's perspective.
+    from agent.credential_sources import find_removal_step
+    from hermes_cli.auth import suppress_credential_source
 
-    # If this was a singleton-seeded credential (OAuth device_code, hermes_pkce),
-    # clear the underlying auth store / credential file so it doesn't get
-    # re-seeded on the next load_pool() call.
-    elif provider == "openai-codex" and (
-        removed.source == "device_code" or removed.source.endswith(":device_code")
-    ):
-        # Codex tokens live in TWO places: the Hermes auth store and
-        # ~/.codex/auth.json (the Codex CLI shared file).  On every refresh,
-        # refresh_codex_oauth_pure() writes to both.  So clearing only the
-        # Hermes auth store is not enough — _seed_from_singletons() will
-        # auto-import from ~/.codex/auth.json on the next load_pool() and
-        # the removal is instantly undone.  Mark the source as suppressed
-        # so auto-import is skipped; leave ~/.codex/auth.json untouched so
-        # the Codex CLI itself keeps working.
-        from hermes_cli.auth import (
-            _load_auth_store, _save_auth_store, _auth_store_lock,
-            suppress_credential_source,
-        )
-        with _auth_store_lock():
-            auth_store = _load_auth_store()
-            providers_dict = auth_store.get("providers")
-            if isinstance(providers_dict, dict) and provider in providers_dict:
-                del providers_dict[provider]
-                _save_auth_store(auth_store)
-                print(f"Cleared {provider} OAuth tokens from auth store")
-        suppress_credential_source(provider, "device_code")
-        print("Suppressed openai-codex device_code source — it will not be re-seeded.")
-        print("Note: Codex CLI credentials still live in ~/.codex/auth.json")
-        print("Run `hermes auth add openai-codex` to re-enable if needed.")
+    step = find_removal_step(provider, removed.source)
+    if step is None:
+        # Unregistered source — e.g. "manual", which has nothing external
+        # to clean up.  The pool entry is already gone; we're done.
+        return
 
-    elif removed.source == "device_code" and provider == "nous":
-        from hermes_cli.auth import (
-            _load_auth_store, _save_auth_store, _auth_store_lock,
-        )
-        with _auth_store_lock():
-            auth_store = _load_auth_store()
-            providers_dict = auth_store.get("providers")
-            if isinstance(providers_dict, dict) and provider in providers_dict:
-                del providers_dict[provider]
-                _save_auth_store(auth_store)
-                print(f"Cleared {provider} OAuth tokens from auth store")
-
-    elif removed.source == "hermes_pkce" and provider == "anthropic":
-        from hermes_constants import get_hermes_home
-        oauth_file = get_hermes_home() / ".anthropic_oauth.json"
-        if oauth_file.exists():
-            oauth_file.unlink()
-            print("Cleared Hermes Anthropic OAuth credentials")
-
-    elif removed.source == "claude_code" and provider == "anthropic":
-        from hermes_cli.auth import suppress_credential_source
-        suppress_credential_source(provider, "claude_code")
-        print("Suppressed claude_code credential — it will not be re-seeded.")
-        print("Note: Claude Code credentials still live in ~/.claude/.credentials.json")
-        print("Run `hermes auth add anthropic` to re-enable if needed.")
+    result = step.remove_fn(provider, removed)
+    for line in result.cleaned:
+        print(line)
+    if result.suppress:
+        suppress_credential_source(provider, removed.source)
+    for line in result.hints:
+        print(line)
 
 
 def auth_reset_command(args) -> None:
diff --git a/hermes_cli/backup.py b/hermes_cli/backup.py
index 667b8915af..8b5b90ef1f 100644
--- a/hermes_cli/backup.py
+++ b/hermes_cli/backup.py
@@ -201,7 +201,7 @@ def run_backup(args) -> None:
                 else:
                     zf.write(abs_path, arcname=str(rel_path))
                     total_bytes += abs_path.stat().st_size
-            except (PermissionError, OSError) as exc:
+            except (PermissionError, OSError, ValueError) as exc:
                 errors.append(f"  {rel_path}: {exc}")
                 continue
 
diff --git a/hermes_cli/codex_models.py b/hermes_cli/codex_models.py
index f5616b68d6..9e2181b501 100644
--- a/hermes_cli/codex_models.py
+++ b/hermes_cli/codex_models.py
@@ -24,7 +24,6 @@ _FORWARD_COMPAT_TEMPLATE_MODELS: List[tuple[str, tuple[str, ...]]] = [
     ("gpt-5.4-mini", ("gpt-5.3-codex", "gpt-5.2-codex")),
     ("gpt-5.4", ("gpt-5.3-codex", "gpt-5.2-codex")),
     ("gpt-5.3-codex", ("gpt-5.2-codex",)),
-    ("gpt-5.3-codex-spark", ("gpt-5.3-codex", "gpt-5.2-codex")),
 ]
 
 
diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py
index f753d6f3a7..8b43a351fb 100644
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@@ -497,9 +497,8 @@ def _collect_gateway_skill_entries(
     # --- Tier 1: Plugin slash commands (never trimmed) ---------------------
     plugin_pairs: list[tuple[str, str]] = []
     try:
-        from hermes_cli.plugins import get_plugin_manager
-        pm = get_plugin_manager()
-        plugin_cmds = getattr(pm, "_plugin_commands", {})
+        from hermes_cli.plugins import get_plugin_commands
+        plugin_cmds = get_plugin_commands()
         for cmd_name in sorted(plugin_cmds):
             name = sanitize_name(cmd_name) if sanitize_name else cmd_name
             if not name:
@@ -925,12 +924,22 @@ class SlashCommandCompleter(Completer):
                     display_meta=meta,
                 )
 
-        # If the user typed @file: or @folder:, delegate to path completions
+        # If the user typed @file: / @folder: (or just @file / @folder with
+        # no colon yet), delegate to path completions.  Accepting the bare
+        # form lets the picker surface directories as soon as the user has
+        # typed `@folder`, without requiring them to first accept the static
+        # `@folder:` hint and re-trigger completion.
         for prefix in ("@file:", "@folder:"):
-            if word.startswith(prefix):
-                path_part = word[len(prefix):] or "."
+            bare = prefix[:-1]
+
+            if word == bare or word.startswith(prefix):
+                want_dir = prefix == "@folder:"
+                path_part = '' if word == bare else word[len(prefix):]
                 expanded = os.path.expanduser(path_part)
-                if expanded.endswith("/"):
+
+                if not expanded or expanded == ".":
+                    search_dir, match_prefix = ".", ""
+                elif expanded.endswith("/"):
                     search_dir, match_prefix = expanded, ""
                 else:
                     search_dir = os.path.dirname(expanded) or "."
@@ -946,15 +955,21 @@ class SlashCommandCompleter(Completer):
                 for entry in sorted(entries):
                     if match_prefix and not entry.lower().startswith(prefix_lower):
                         continue
-                    if count >= limit:
-                        break
                     full_path = os.path.join(search_dir, entry)
                     is_dir = os.path.isdir(full_path)
+                    # `@folder:` must only surface directories; `@file:` only
+                    # regular files.  Without this filter `@folder:` listed
+                    # every .env / .gitignore in the cwd, defeating the
+                    # explicit prefix and confusing users expecting a
+                    # directory picker.
+                    if want_dir != is_dir:
+                        continue
+                    if count >= limit:
+                        break
                     display_path = os.path.relpath(full_path)
                     suffix = "/" if is_dir else ""
-                    kind = "folder" if is_dir else "file"
                     meta = "dir" if is_dir else _file_size_label(full_path)
-                    completion = f"@{kind}:{display_path}{suffix}"
+                    completion = f"{prefix}{display_path}{suffix}"
                     yield Completion(
                         completion,
                         start_position=-len(word),
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index d53899b135..81275a7f9a 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -13,6 +13,7 @@ This module provides:
 """
 
 import copy
+import logging
 import os
 import platform
 import re
@@ -24,6 +25,7 @@ from dataclasses import dataclass
 from pathlib import Path
 from typing import Dict, Any, Optional, List, Tuple
 
+logger = logging.getLogger(__name__)
 
 _IS_WINDOWS = platform.system() == "Windows"
 _ENV_VAR_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
@@ -385,6 +387,26 @@ DEFAULT_CONFIG = {
         # (terminal and execute_code).  Skill-declared required_environment_variables
         # are passed through automatically; this list is for non-skill use cases.
         "env_passthrough": [],
+        # Extra files to source in the login shell when building the
+        # per-session environment snapshot.  Use this when tools like nvm,
+        # pyenv, asdf, or custom PATH entries are registered by files that
+        # a bash login shell would skip — most commonly ``~/.bashrc``
+        # (bash doesn't source bashrc in non-interactive login mode) or
+        # zsh-specific files like ``~/.zshrc`` / ``~/.zprofile``.
+        # Paths support ``~`` / ``${VAR}``. Missing files are silently
+        # skipped. When empty, Hermes auto-appends ``~/.bashrc`` if the
+        # snapshot shell is bash (this is the ``auto_source_bashrc``
+        # behaviour — disable with that key if you want strict login-only
+        # semantics).
+        "shell_init_files": [],
+        # When true (default), Hermes sources ``~/.bashrc`` in the login
+        # shell used to build the environment snapshot.  This captures
+        # PATH additions, shell functions, and aliases defined in the
+        # user's bashrc — which a plain ``bash -l -c`` would otherwise
+        # miss because bash skips bashrc in non-interactive login mode.
+        # Turn this off if you have a bashrc that misbehaves when sourced
+        # non-interactively (e.g. one that hard-exits on TTY checks).
+        "auto_source_bashrc": True,
         "docker_image": "nikolaik/python-nodejs:python3.11-nodejs20",
         "docker_forward_env": [],
         # Explicit environment variables to set inside Docker containers.
@@ -403,7 +425,11 @@ DEFAULT_CONFIG = {
         "container_persistent": True,   # Persist filesystem across sessions
         # Docker volume mounts — share host directories with the container.
         # Each entry is "host_path:container_path" (standard Docker -v syntax).
-        # Example: ["/home/user/projects:/workspace/projects", "/data:/data"]
+        # Example:
+        # ["/home/user/projects:/workspace/projects",
+        #  "/home/user/.hermes/cache/documents:/output"]
+        # For gateway MEDIA delivery, write inside Docker to /output/... and emit
+        # the host-visible path in MEDIA:, not the container path.
         "docker_volumes": [],
         # Explicit opt-in: mount the host cwd into /workspace for Docker sessions.
         # Default off because passing host directories into a sandbox weakens isolation.
@@ -470,13 +496,6 @@ DEFAULT_CONFIG = {
         },
     },
 
-    "smart_model_routing": {
-        "enabled": False,
-        "max_simple_chars": 160,
-        "max_simple_words": 28,
-        "cheap_model": {},
-    },
-    
     # Auxiliary model config — provider:model for each side task.
     # Format: provider is the provider name, model is the model slug.
     # "auto" for provider = auto-detect best available provider.
@@ -490,6 +509,7 @@ DEFAULT_CONFIG = {
             "base_url": "",        # direct OpenAI-compatible endpoint (takes precedence over provider)
             "api_key": "",         # API key for base_url (falls back to OPENAI_API_KEY)
             "timeout": 120,        # seconds — LLM API call timeout; vision payloads need generous timeout
+            "extra_body": {},      # OpenAI-compatible provider-specific request fields
             "download_timeout": 30,  # seconds — image HTTP download timeout; increase for slow connections
         },
         "web_extract": {
@@ -498,6 +518,7 @@ DEFAULT_CONFIG = {
             "base_url": "",
             "api_key": "",
             "timeout": 360,        # seconds (6min) — per-attempt LLM summarization timeout; increase for slow local models
+            "extra_body": {},
         },
         "compression": {
             "provider": "auto",
@@ -505,6 +526,7 @@ DEFAULT_CONFIG = {
             "base_url": "",
             "api_key": "",
             "timeout": 120,        # seconds — compression summarises large contexts; increase for local models
+            "extra_body": {},
         },
         "session_search": {
             "provider": "auto",
@@ -512,6 +534,8 @@ DEFAULT_CONFIG = {
             "base_url": "",
             "api_key": "",
             "timeout": 30,
+            "extra_body": {},
+            "max_concurrency": 3,  # Clamp parallel summaries to avoid request-burst 429s on small providers
         },
         "skills_hub": {
             "provider": "auto",
@@ -519,6 +543,7 @@ DEFAULT_CONFIG = {
             "base_url": "",
             "api_key": "",
             "timeout": 30,
+            "extra_body": {},
         },
         "approval": {
             "provider": "auto",
@@ -526,6 +551,7 @@ DEFAULT_CONFIG = {
             "base_url": "",
             "api_key": "",
             "timeout": 30,
+            "extra_body": {},
         },
         "mcp": {
             "provider": "auto",
@@ -533,6 +559,7 @@ DEFAULT_CONFIG = {
             "base_url": "",
             "api_key": "",
             "timeout": 30,
+            "extra_body": {},
         },
         "flush_memories": {
             "provider": "auto",
@@ -540,6 +567,7 @@ DEFAULT_CONFIG = {
             "base_url": "",
             "api_key": "",
             "timeout": 30,
+            "extra_body": {},
         },
         "title_generation": {
             "provider": "auto",
@@ -547,6 +575,7 @@ DEFAULT_CONFIG = {
             "base_url": "",
             "api_key": "",
             "timeout": 30,
+            "extra_body": {},
         },
     },
     
@@ -558,9 +587,14 @@ DEFAULT_CONFIG = {
         "bell_on_complete": False,
         "show_reasoning": False,
         "streaming": False,
+        "final_response_markdown": "strip",  # render | strip | raw
         "inline_diffs": True,     # Show inline diff previews for write actions (write_file, patch, skill_manage)
         "show_cost": False,       # Show $ cost in the status bar (off by default)
         "skin": "default",
+        "user_message_preview": {  # CLI: how many submitted user-message lines to echo back in scrollback
+            "first_lines": 2,
+            "last_lines": 2,
+        },
         "interim_assistant_messages": True,  # Gateway: show natural mid-turn assistant status messages
         "tool_progress_command": False,  # Enable /verbose command in messaging gateway
         "tool_progress_overrides": {},  # DEPRECATED — use display.platforms instead
@@ -579,6 +613,10 @@ DEFAULT_CONFIG = {
     },
     
     # Text-to-speech configuration
+    # Each provider supports an optional `max_text_length:` override for the
+    # per-request input-character cap. Omit it to use the provider's documented
+    # limit (OpenAI 4096, xAI 15000, MiniMax 10000, ElevenLabs 5k-40k model-aware,
+    # Gemini 5000, Edge 5000, Mistral 4000, NeuTTS/KittenTTS 2000).
     "tts": {
         "provider": "edge",  # "edge" (free) | "elevenlabs" (premium) | "openai" | "xai" | "minimax" | "mistral" | "neutts" (local)
         "edge": {
@@ -631,6 +669,7 @@ DEFAULT_CONFIG = {
         "record_key": "ctrl+b",
         "max_recording_seconds": 120,
         "auto_tts": False,
+        "beep_enabled": True,         # Play record start/stop beeps in CLI voice mode
         "silence_threshold": 200,     # RMS below this = silence (0-32767)
         "silence_duration": 3.0,      # Seconds of silence before auto-stop
     },
@@ -677,6 +716,12 @@ DEFAULT_CONFIG = {
                                # independent of the parent's max_iterations)
         "reasoning_effort": "",  # reasoning effort for subagents: "xhigh", "high", "medium",
                                  # "low", "minimal", "none" (empty = inherit parent's level)
+        "max_concurrent_children": 3,  # max parallel children per batch; floor of 1 enforced, no ceiling
+        # Orchestrator role controls (see tools/delegate_tool.py:_get_max_spawn_depth
+        # and _get_orchestrator_enabled).  Values are clamped to [1, 3] with a
+        # warning log if out of range.
+        "max_spawn_depth": 1,        # depth cap (1 = flat [default], 2 = orchestrator→leaf, 3 = three-level)
+        "orchestrator_enabled": True,  # kill switch for role="orchestrator"
     },
 
     # Ephemeral prefill messages file — JSON list of {role, content} dicts
@@ -689,6 +734,20 @@ DEFAULT_CONFIG = {
     # always goes to ~/.hermes/skills/.
     "skills": {
         "external_dirs": [],   # e.g. ["~/.agents/skills", "/shared/team-skills"]
+        # Substitute ${HERMES_SKILL_DIR} and ${HERMES_SESSION_ID} in SKILL.md
+        # content with the absolute skill directory and the active session id
+        # before the agent sees it.  Lets skill authors reference bundled
+        # scripts without the agent having to join paths.
+        "template_vars": True,
+        # Pre-execute inline shell snippets written as !`cmd` in SKILL.md
+        # body.  Their stdout is inlined into the skill message before the
+        # agent reads it, so skills can inject dynamic context (dates, git
+        # state, detected tool versions, …).  Off by default because any
+        # content from the skill author runs on the host without approval;
+        # only enable for skill sources you trust.
+        "inline_shell": False,
+        # Timeout (seconds) for each !`cmd` snippet when inline_shell is on.
+        "inline_shell_timeout": 10,
     },
 
     # Honcho AI-native memory -- reads ~/.honcho/config.json as single source of truth.
@@ -708,6 +767,14 @@ DEFAULT_CONFIG = {
         "auto_thread": True,           # Auto-create threads on @mention in channels (like Slack)
         "reactions": True,             # Add 👀/✅/❌ reactions to messages during processing
         "channel_prompts": {},         # Per-channel ephemeral system prompts (forum parents apply to child threads)
+        # discord_server tool: restrict which actions the agent may call.
+        # Default (empty) = all actions allowed (subject to bot privileged intents).
+        # Accepts comma-separated string ("list_guilds,list_channels,fetch_messages")
+        # or YAML list. Unknown names are dropped with a warning at load time.
+        # Actions: list_guilds, server_info, list_channels, channel_info,
+        # list_roles, member_info, search_members, fetch_messages, list_pins,
+        # pin_message, unpin_message, create_thread, add_role, remove_role.
+        "server_actions": "",
     },
 
     # WhatsApp platform settings (gateway mode)
@@ -751,6 +818,21 @@ DEFAULT_CONFIG = {
     "command_allowlist": [],
     # User-defined quick commands that bypass the agent loop (type: exec only)
     "quick_commands": {},
+
+    # Shell-script hooks — declarative bridge that invokes shell scripts
+    # on plugin-hook events (pre_tool_call, post_tool_call, pre_llm_call,
+    # subagent_stop, etc.).  Each entry maps an event name to a list of
+    # {matcher, command, timeout} dicts.  First registration of a new
+    # command prompts the user for consent; subsequent runs reuse the
+    # stored approval from ~/.hermes/shell-hooks-allowlist.json.
+    # See `website/docs/user-guide/features/hooks.md` for schema + examples.
+    "hooks": {},
+
+    # Auto-accept shell-hook registrations without a TTY prompt.  Also
+    # toggleable per-invocation via --accept-hooks or HERMES_ACCEPT_HOOKS=1.
+    # Gateway / cron / non-interactive runs need this (or one of the other
+    # channels) to pick up newly-added hooks.
+    "hooks_auto_accept": False,
     # Custom personalities — add your own entries here
     # Supports string format: {"name": "system prompt"}
     # Or dict format: {"name": {"description": "...", "system_prompt": "...", "tone": "...", "style": "..."}}
@@ -774,6 +856,11 @@ DEFAULT_CONFIG = {
         # Wrap delivered cron responses with a header (task name) and footer
         # ("The agent cannot see this message").  Set to false for clean output.
         "wrap_response": True,
+        # Maximum number of due jobs to run in parallel per tick.
+        # null/0 = unbounded (limited only by thread count).
+        # 1 = serial (pre-v0.9 behaviour).
+        # Also overridable via HERMES_CRON_MAX_PARALLEL env var.
+        "max_parallel_jobs": None,
     },
 
     # execute_code settings — controls the tool used for programmatic tool calls.
@@ -806,8 +893,36 @@ DEFAULT_CONFIG = {
         "force_ipv4": False,
     },
 
+    # Session storage — controls automatic cleanup of ~/.hermes/state.db.
+    # state.db accumulates every session, message, tool call, and FTS5 index
+    # entry forever.  Without auto-pruning, a heavy user (gateway + cron)
+    # reports 384MB+ databases with 68K+ messages, which slows down FTS5
+    # inserts, /resume listing, and insights queries.
+    "sessions": {
+        # When true, prune ended sessions older than retention_days once
+        # per (roughly) min_interval_hours at CLI/gateway/cron startup.
+        # Only touches ended sessions — active sessions are always preserved.
+        # Default false: session history is valuable for search recall, and
+        # silently deleting it could surprise users.  Opt in explicitly.
+        "auto_prune": False,
+        # How many days of ended-session history to keep.  Matches the
+        # default of ``hermes sessions prune``.
+        "retention_days": 90,
+        # VACUUM after a prune that actually deleted rows.  SQLite does not
+        # reclaim disk space on DELETE — freed pages are just reused on
+        # subsequent INSERTs — so without VACUUM the file stays bloated
+        # even after pruning.  VACUUM blocks writes for a few seconds per
+        # 100MB, so it only runs at startup, and only when prune deleted
+        # ≥1 session.
+        "vacuum_after_prune": True,
+        # Minimum hours between auto-maintenance runs (avoids repeating
+        # the sweep on every CLI invocation).  Tracked via state_meta in
+        # state.db itself, so it's shared across all processes.
+        "min_interval_hours": 24,
+    },
+
     # Config schema version - bump this when adding new required fields
-    "_config_version": 19,
+    "_config_version": 22,
 }
 
 # =============================================================================
@@ -963,6 +1078,22 @@ OPTIONAL_ENV_VARS = {
         "category": "provider",
         "advanced": True,
     },
+    "STEPFUN_API_KEY": {
+        "description": "StepFun Step Plan API key",
+        "prompt": "StepFun Step Plan API key",
+        "url": "https://platform.stepfun.com/",
+        "password": True,
+        "category": "provider",
+        "advanced": True,
+    },
+    "STEPFUN_BASE_URL": {
+        "description": "StepFun Step Plan base URL override",
+        "prompt": "StepFun Step Plan base URL (leave empty for default)",
+        "url": None,
+        "password": False,
+        "category": "provider",
+        "advanced": True,
+    },
     "ARCEEAI_API_KEY": {
         "description": "Arcee AI API key",
         "prompt": "Arcee AI API key",
@@ -1830,12 +1961,53 @@ def _normalize_custom_provider_entry(
     if not isinstance(entry, dict):
         return None
 
+    # Accept camelCase aliases commonly used in hand-written configs.
+    _CAMEL_ALIASES: Dict[str, str] = {
+        "apiKey": "api_key",
+        "baseUrl": "base_url",
+        "apiMode": "api_mode",
+        "keyEnv": "key_env",
+        "defaultModel": "default_model",
+        "contextLength": "context_length",
+        "rateLimitDelay": "rate_limit_delay",
+    }
+    _KNOWN_KEYS = {
+        "name", "api", "url", "base_url", "api_key", "key_env",
+        "api_mode", "transport", "model", "default_model", "models",
+        "context_length", "rate_limit_delay",
+    }
+    for camel, snake in _CAMEL_ALIASES.items():
+        if camel in entry and snake not in entry:
+            logger.warning(
+                "providers.%s: camelCase key '%s' auto-mapped to '%s' "
+                "(use snake_case to avoid this warning)",
+                provider_key or "?", camel, snake,
+            )
+            entry[snake] = entry[camel]
+    unknown = set(entry.keys()) - _KNOWN_KEYS - set(_CAMEL_ALIASES.keys())
+    if unknown:
+        logger.warning(
+            "providers.%s: unknown config keys ignored: %s",
+            provider_key or "?", ", ".join(sorted(unknown)),
+        )
+
+    from urllib.parse import urlparse
+
     base_url = ""
-    for url_key in ("api", "url", "base_url"):
+    for url_key in ("base_url", "url", "api"):
         raw_url = entry.get(url_key)
         if isinstance(raw_url, str) and raw_url.strip():
-            base_url = raw_url.strip()
-            break
+            candidate = raw_url.strip()
+            parsed = urlparse(candidate)
+            if parsed.scheme and parsed.netloc:
+                base_url = candidate
+                break
+            else:
+                logger.warning(
+                    "providers.%s: '%s' value '%s' is not a valid URL "
+                    "(no scheme or host) — skipped",
+                    provider_key or "?", url_key, candidate,
+                )
     if not base_url:
         return None
 
@@ -1974,6 +2146,7 @@ _KNOWN_ROOT_KEYS = {
     "fallback_providers", "credential_pool_strategies", "toolsets",
     "agent", "terminal", "display", "compression", "delegation",
     "auxiliary", "custom_providers", "context", "memory", "gateway",
+    "sessions",
 }
 
 # Valid fields inside a custom_providers list entry
@@ -2131,7 +2304,6 @@ def print_config_warnings(config: Optional[Dict[str, Any]] = None) -> None:
     if not issues:
         return
 
-    import sys
     lines = ["\033[33m⚠ Config issues detected in config.yaml:\033[0m"]
     for ci in issues:
         marker = "\033[31m✗\033[0m" if ci.severity == "error" else "\033[33m⚠\033[0m"
@@ -2146,7 +2318,6 @@ def warn_deprecated_cwd_env_vars(config: Optional[Dict[str, Any]] = None) -> Non
     These env vars are deprecated — the canonical setting is terminal.cwd
     in config.yaml.  Prints a migration hint to stderr.
     """
-    import os, sys
     messaging_cwd = os.environ.get("MESSAGING_CWD")
     terminal_cwd_env = os.environ.get("TERMINAL_CWD")
 
@@ -2464,6 +2635,71 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
                     else:
                         print("  ✓ Removed unused compression.summary_* keys")
 
+    # ── Version 20 → 21: plugins are now opt-in; grandfather existing user plugins ──
+    # The loader now requires plugins to appear in ``plugins.enabled`` before
+    # loading. Existing installs had all discovered plugins loading by default
+    # (minus anything in ``plugins.disabled``). To avoid silently breaking
+    # those setups on upgrade, populate ``plugins.enabled`` with the set of
+    # currently-installed user plugins that aren't already disabled.
+    #
+    # Bundled plugins (shipped in the repo itself) are NOT grandfathered —
+    # they ship off for everyone, including existing users, so any user who
+    # wants one has to opt in explicitly.
+    if current_ver < 21:
+        config = read_raw_config()
+        plugins_cfg = config.get("plugins")
+        if not isinstance(plugins_cfg, dict):
+            plugins_cfg = {}
+        # Only migrate if the enabled allow-list hasn't been set yet.
+        if "enabled" not in plugins_cfg:
+            disabled = plugins_cfg.get("disabled", []) or []
+            if not isinstance(disabled, list):
+                disabled = []
+            disabled_set = set(disabled)
+
+            # Scan ``$HERMES_HOME/plugins/`` for currently installed user plugins.
+            grandfathered: List[str] = []
+            try:
+                user_plugins_dir = get_hermes_home() / "plugins"
+                if user_plugins_dir.is_dir():
+                    for child in sorted(user_plugins_dir.iterdir()):
+                        if not child.is_dir():
+                            continue
+                        manifest_file = child / "plugin.yaml"
+                        if not manifest_file.exists():
+                            manifest_file = child / "plugin.yml"
+                        if not manifest_file.exists():
+                            continue
+                        try:
+                            with open(manifest_file) as _mf:
+                                manifest = yaml.safe_load(_mf) or {}
+                        except Exception:
+                            manifest = {}
+                        name = manifest.get("name") or child.name
+                        if name in disabled_set:
+                            continue
+                        grandfathered.append(name)
+            except Exception:
+                grandfathered = []
+
+            plugins_cfg["enabled"] = grandfathered
+            config["plugins"] = plugins_cfg
+            save_config(config)
+            results["config_added"].append(
+                f"plugins.enabled (opt-in allow-list, {len(grandfathered)} grandfathered)"
+            )
+            if not quiet:
+                if grandfathered:
+                    print(
+                        f"  ✓ Plugins now opt-in: grandfathered "
+                        f"{len(grandfathered)} existing plugin(s) into plugins.enabled"
+                    )
+                else:
+                    print(
+                        "  ✓ Plugins now opt-in: no existing plugins to grandfather. "
+                        "Use `hermes plugins enable <name>` to activate."
+                    )
+
     if current_ver < latest_ver and not quiet:
         print(f"Config version: {current_ver} → {latest_ver}")
     
@@ -2861,24 +3097,11 @@ _FALLBACK_COMMENT = """
 #   minimax      (MINIMAX_API_KEY)     — MiniMax
 #   minimax-cn   (MINIMAX_CN_API_KEY)  — MiniMax (China)
 #
-# For custom OpenAI-compatible endpoints, add base_url and api_key_env.
+# For custom OpenAI-compatible endpoints, add base_url and key_env.
 #
 # fallback_model:
 #   provider: openrouter
 #   model: anthropic/claude-sonnet-4
-#
-# ── Smart Model Routing ────────────────────────────────────────────────
-# Optional cheap-vs-strong routing for simple turns.
-# Keeps the primary model for complex work, but can route short/simple
-# messages to a cheaper model across providers.
-#
-# smart_model_routing:
-#   enabled: true
-#   max_simple_chars: 160
-#   max_simple_words: 28
-#   cheap_model:
-#     provider: openrouter
-#     model: google/gemini-2.5-flash
 """
 
 
@@ -2905,24 +3128,11 @@ _COMMENTED_SECTIONS = """
 #   minimax      (MINIMAX_API_KEY)     — MiniMax
 #   minimax-cn   (MINIMAX_CN_API_KEY)  — MiniMax (China)
 #
-# For custom OpenAI-compatible endpoints, add base_url and api_key_env.
+# For custom OpenAI-compatible endpoints, add base_url and key_env.
 #
 # fallback_model:
 #   provider: openrouter
 #   model: anthropic/claude-sonnet-4
-#
-# ── Smart Model Routing ────────────────────────────────────────────────
-# Optional cheap-vs-strong routing for simple turns.
-# Keeps the primary model for complex work, but can route short/simple
-# messages to a cheaper model across providers.
-#
-# smart_model_routing:
-#   enabled: true
-#   max_simple_chars: 160
-#   max_simple_words: 28
-#   cheap_model:
-#     provider: openrouter
-#     model: google/gemini-2.5-flash
 """
 
 
@@ -3115,7 +3325,6 @@ def _check_non_ascii_credential(key: str, value: str) -> str:
             bad_chars.append(f"  position {i}: {ch!r} (U+{ord(ch):04X})")
     sanitized = value.encode("ascii", errors="ignore").decode("ascii")
 
-    import sys
     print(
         f"\n  Warning: {key} contains non-ASCII characters that will break API requests.\n"
         f"  This usually happens when copy-pasting from a PDF, rich-text editor,\n"
@@ -3385,6 +3594,10 @@ def show_config():
     print(f"  Personality:  {display.get('personality', 'kawaii')}")
     print(f"  Reasoning:    {'on' if display.get('show_reasoning', False) else 'off'}")
     print(f"  Bell:         {'on' if display.get('bell_on_complete', False) else 'off'}")
+    ump = display.get('user_message_preview', {}) if isinstance(display.get('user_message_preview', {}), dict) else {}
+    ump_first = ump.get('first_lines', 2)
+    ump_last = ump.get('last_lines', 2)
+    print(f"  User preview: first {ump_first} line(s), last {ump_last} line(s)")
 
     # Terminal
     print()
diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py
index 4138aeaa27..064b1d68d1 100644
--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@@ -30,6 +30,7 @@ load_dotenv(PROJECT_ROOT / ".env", override=False, encoding="utf-8")
 
 from hermes_cli.colors import Colors, color
 from hermes_constants import OPENROUTER_MODELS_URL
+from utils import base_url_host_matches
 
 
 _PROVIDER_ENV_HINTS = (
@@ -277,6 +278,86 @@ def run_doctor(args):
     config_path = HERMES_HOME / 'config.yaml'
     if config_path.exists():
         check_ok(f"{_DHH}/config.yaml exists")
+
+        # Validate model.provider and model.default values
+        try:
+            import yaml as _yaml
+            cfg = _yaml.safe_load(config_path.read_text(encoding="utf-8")) or {}
+            model_section = cfg.get("model") or {}
+            provider_raw = (model_section.get("provider") or "").strip()
+            provider = provider_raw.lower()
+            default_model = (model_section.get("default") or model_section.get("model") or "").strip()
+
+            known_providers: set = set()
+            try:
+                from hermes_cli.auth import PROVIDER_REGISTRY
+                known_providers = set(PROVIDER_REGISTRY.keys()) | {"openrouter", "custom", "auto"}
+            except Exception:
+                pass
+            try:
+                from hermes_cli.auth import resolve_provider as _resolve_provider
+            except Exception:
+                _resolve_provider = None
+
+            canonical_provider = provider
+            if provider and _resolve_provider is not None and provider != "auto":
+                try:
+                    canonical_provider = _resolve_provider(provider)
+                except Exception:
+                    canonical_provider = None
+
+            if provider and provider != "auto":
+                if canonical_provider is None or (known_providers and canonical_provider not in known_providers):
+                    known_list = ", ".join(sorted(known_providers)) if known_providers else "(unavailable)"
+                    check_fail(
+                        f"model.provider '{provider_raw}' is not a recognised provider",
+                        f"(known: {known_list})",
+                    )
+                    issues.append(
+                        f"model.provider '{provider_raw}' is unknown. "
+                        f"Valid providers: {known_list}. "
+                        f"Fix: run 'hermes config set model.provider <valid_provider>'"
+                    )
+
+            # Warn if model is set to a provider-prefixed name on a provider that doesn't use them
+            if default_model and "/" in default_model and canonical_provider and canonical_provider not in ("openrouter", "custom", "auto", "ai-gateway", "kilocode", "opencode-zen", "huggingface", "nous"):
+                check_warn(
+                    f"model.default '{default_model}' uses a vendor/model slug but provider is '{provider_raw}'",
+                    "(vendor-prefixed slugs belong to aggregators like openrouter)",
+                )
+                issues.append(
+                    f"model.default '{default_model}' is vendor-prefixed but model.provider is '{provider_raw}'. "
+                    "Either set model.provider to 'openrouter', or drop the vendor prefix."
+                )
+
+            # Check credentials for the configured provider.
+            # Limit to API-key providers in PROVIDER_REGISTRY — other provider
+            # types (OAuth, SDK, openrouter/anthropic/custom/auto) have their
+            # own env-var checks elsewhere in doctor, and get_auth_status()
+            # returns a bare {logged_in: False} for anything it doesn't
+            # explicitly dispatch, which would produce false positives.
+            if canonical_provider and canonical_provider not in ("auto", "custom", "openrouter"):
+                try:
+                    from hermes_cli.auth import PROVIDER_REGISTRY, get_auth_status
+                    pconfig = PROVIDER_REGISTRY.get(canonical_provider)
+                    if pconfig and getattr(pconfig, "auth_type", "") == "api_key":
+                        status = get_auth_status(canonical_provider) or {}
+                        configured = bool(status.get("configured") or status.get("logged_in") or status.get("api_key"))
+                        if not configured:
+                            check_fail(
+                                f"model.provider '{canonical_provider}' is set but no API key is configured",
+                                "(check ~/.hermes/.env or run 'hermes setup')",
+                            )
+                            issues.append(
+                                f"No credentials found for provider '{canonical_provider}'. "
+                                f"Run 'hermes setup' or set the provider's API key in {_DHH}/.env, "
+                                f"or switch providers with 'hermes config set model.provider <name>'"
+                            )
+                except Exception:
+                    pass
+
+        except Exception as e:
+            check_warn("Could not validate model/provider config", f"({e})")
     else:
         fallback_config = PROJECT_ROOT / 'cli-config.yaml'
         if fallback_config.exists():
@@ -778,6 +859,16 @@ def run_doctor(args):
             elif response.status_code == 401:
                 print(f"\r  {color('✗', Colors.RED)} OpenRouter API {color('(invalid API key)', Colors.DIM)}                ")
                 issues.append("Check OPENROUTER_API_KEY in .env")
+            elif response.status_code == 402:
+                print(f"\r  {color('✗', Colors.RED)} OpenRouter API {color('(out of credits — payment required)', Colors.DIM)}")
+                issues.append(
+                    "OpenRouter account has insufficient credits. "
+                    "Fix: run 'hermes config set model.provider <provider>' to switch providers, "
+                    "or fund your OpenRouter account at https://openrouter.ai/settings/credits"
+                )
+            elif response.status_code == 429:
+                print(f"\r  {color('✗', Colors.RED)} OpenRouter API {color('(rate limited)', Colors.DIM)}                ")
+                issues.append("OpenRouter rate limit hit — consider switching to a different provider or waiting")
             else:
                 print(f"\r  {color('✗', Colors.RED)} OpenRouter API {color(f'(HTTP {response.status_code})', Colors.DIM)}                ")
         except Exception as e:
@@ -821,6 +912,7 @@ def run_doctor(args):
     _apikey_providers = [
         ("Z.AI / GLM",      ("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"), "https://api.z.ai/api/paas/v4/models", "GLM_BASE_URL", True),
         ("Kimi / Moonshot",  ("KIMI_API_KEY",),                              "https://api.moonshot.ai/v1/models",   "KIMI_BASE_URL", True),
+        ("StepFun Step Plan",   ("STEPFUN_API_KEY",),                           "https://api.stepfun.ai/step_plan/v1/models", "STEPFUN_BASE_URL", True),
         ("Kimi / Moonshot (China)", ("KIMI_CN_API_KEY",),                    "https://api.moonshot.cn/v1/models",   None, True),
         ("Arcee AI",         ("ARCEEAI_API_KEY",),                            "https://api.arcee.ai/api/v1/models",  "ARCEE_BASE_URL", True),
         ("DeepSeek",         ("DEEPSEEK_API_KEY",),                           "https://api.deepseek.com/v1/models",  "DEEPSEEK_BASE_URL", True),
@@ -852,18 +944,22 @@ def run_doctor(args):
             try:
                 import httpx
                 _base = os.getenv(_base_env, "") if _base_env else ""
-                # Auto-detect Kimi Code keys (sk-kimi-) → api.kimi.com
+                # Auto-detect Kimi Code keys (sk-kimi-) → api.kimi.com/coding/v1
+                # (OpenAI-compat surface, which exposes /models for health check).
                 if not _base and _key.startswith("sk-kimi-"):
                     _base = "https://api.kimi.com/coding/v1"
-                # Anthropic-compat endpoints (/anthropic) don't support /models.
-                # Rewrite to the OpenAI-compat /v1 surface for health checks.
+                # Anthropic-compat endpoints (/anthropic, api.kimi.com/coding
+                # with no /v1) don't support /models.  Rewrite to the OpenAI-compat
+                # /v1 surface for health checks.
                 if _base and _base.rstrip("/").endswith("/anthropic"):
                     from agent.auxiliary_client import _to_openai_base_url
                     _base = _to_openai_base_url(_base)
+                if base_url_host_matches(_base, "api.kimi.com") and _base.rstrip("/").endswith("/coding"):
+                    _base = _base.rstrip("/") + "/v1"
                 _url = (_base.rstrip("/") + "/models") if _base else _default_url
                 _headers = {"Authorization": f"Bearer {_key}"}
-                if "api.kimi.com" in _url.lower():
-                    _headers["User-Agent"] = "KimiCLI/1.30.0"
+                if base_url_host_matches(_base, "api.kimi.com"):
+                    _headers["User-Agent"] = "claude-code/0.1.0"
                 _resp = httpx.get(
                     _url,
                     headers=_headers,
diff --git a/hermes_cli/dump.py b/hermes_cli/dump.py
index f3a174e71b..90364a261a 100644
--- a/hermes_cli/dump.py
+++ b/hermes_cli/dump.py
@@ -160,7 +160,6 @@ def _config_overrides(config: dict) -> dict[str, str]:
         ("display", "streaming"),
         ("display", "skin"),
         ("display", "show_reasoning"),
-        ("smart_model_routing", "enabled"),
         ("privacy", "redact_pii"),
         ("tts", "provider"),
     ]
diff --git a/hermes_cli/env_loader.py b/hermes_cli/env_loader.py
index 853f0d2626..009f3de273 100644
--- a/hermes_cli/env_loader.py
+++ b/hermes_cli/env_loader.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import os
+import sys
 from pathlib import Path
 
 from dotenv import load_dotenv
@@ -14,6 +15,26 @@ from dotenv import load_dotenv
 # pure ASCII (they become HTTP header values).
 _CREDENTIAL_SUFFIXES = ("_API_KEY", "_TOKEN", "_SECRET", "_KEY")
 
+# Names we've already warned about during this process, so repeated
+# load_hermes_dotenv() calls (user env + project env, gateway hot-reload,
+# tests) don't spam the same warning multiple times.
+_WARNED_KEYS: set[str] = set()
+
+
+def _format_offending_chars(value: str, limit: int = 3) -> str:
+    """Return a compact 'U+XXXX ('c'), ...' summary of non-ASCII codepoints."""
+    seen: list[str] = []
+    for ch in value:
+        if ord(ch) > 127:
+            label = f"U+{ord(ch):04X}"
+            if ch.isprintable():
+                label += f" ({ch!r})"
+            if label not in seen:
+                seen.append(label)
+            if len(seen) >= limit:
+                break
+    return ", ".join(seen)
+
 
 def _sanitize_loaded_credentials() -> None:
     """Strip non-ASCII characters from credential env vars in os.environ.
@@ -21,14 +42,42 @@ def _sanitize_loaded_credentials() -> None:
     Called after dotenv loads so the rest of the codebase never sees
     non-ASCII API keys.  Only touches env vars whose names end with
     known credential suffixes (``_API_KEY``, ``_TOKEN``, etc.).
+
+    Emits a one-line warning to stderr when characters are stripped.
+    Silent stripping would mask copy-paste corruption (Unicode lookalike
+    glyphs from PDFs / rich-text editors, ZWSP from web pages) as opaque
+    provider-side "invalid API key" errors (see #6843).
     """
     for key, value in list(os.environ.items()):
         if not any(key.endswith(suffix) for suffix in _CREDENTIAL_SUFFIXES):
             continue
         try:
             value.encode("ascii")
+            continue
         except UnicodeEncodeError:
-            os.environ[key] = value.encode("ascii", errors="ignore").decode("ascii")
+            pass
+        cleaned = value.encode("ascii", errors="ignore").decode("ascii")
+        os.environ[key] = cleaned
+        if key in _WARNED_KEYS:
+            continue
+        _WARNED_KEYS.add(key)
+        stripped = len(value) - len(cleaned)
+        detail = _format_offending_chars(value) or "non-printable"
+        print(
+            f"  Warning: {key} contained {stripped} non-ASCII character"
+            f"{'s' if stripped != 1 else ''} ({detail}) — stripped so the "
+            f"key can be sent as an HTTP header.",
+            file=sys.stderr,
+        )
+        print(
+            "  This usually means the key was copy-pasted from a PDF, "
+            "rich-text editor, or web page that substituted lookalike\n"
+            "  Unicode glyphs for ASCII letters. If authentication fails "
+            "(e.g. \"API key not valid\"), re-copy the key from the\n"
+            "  provider's dashboard and run `hermes setup` (or edit the "
+            ".env file in a plain-text editor).",
+            file=sys.stderr,
+        )
 
 
 def _load_dotenv_with_fallback(path: Path, *, override: bool) -> None:
@@ -111,6 +160,8 @@ def load_hermes_dotenv(
     # Fix corrupted .env files before python-dotenv parses them (#8908).
     if user_env.exists():
         _sanitize_env_file_if_needed(user_env)
+    if project_env_path and project_env_path.exists():
+        _sanitize_env_file_if_needed(project_env_path)
 
     if user_env.exists():
         _load_dotenv_with_fallback(user_env, override=True)
diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py
index bc809cadf9..59bd37d113 100644
--- a/hermes_cli/gateway.py
+++ b/hermes_cli/gateway.py
@@ -994,8 +994,6 @@ def get_systemd_linger_status() -> tuple[bool | None, str]:
     if not is_linux():
         return None, "not supported on this platform"
 
-    import shutil
-
     if not shutil.which("loginctl"):
         return None, "loginctl not found"
 
@@ -1347,7 +1345,6 @@ def _ensure_linger_enabled() -> None:
         return
 
     import getpass
-    import shutil
 
     username = getpass.getuser()
     linger_file = Path(f"/var/lib/systemd/linger/{username}")
@@ -1656,7 +1653,6 @@ def get_launchd_label() -> str:
 
 
 def _launchd_domain() -> str:
-    import os
     return f"gui/{os.getuid()}"
 
 
@@ -2643,9 +2639,120 @@ def _setup_dingtalk():
 
 
 def _setup_wecom():
-    """Configure WeCom (Enterprise WeChat) via the standard platform setup."""
-    wecom_platform = next(p for p in _PLATFORMS if p["key"] == "wecom")
-    _setup_standard_platform(wecom_platform)
+    """Interactive setup for WeCom — scan QR code or manual credential input."""
+    print()
+    print(color("  ─── 💬 WeCom (Enterprise WeChat) Setup ───", Colors.CYAN))
+
+    existing_bot_id = get_env_value("WECOM_BOT_ID")
+    existing_secret = get_env_value("WECOM_SECRET")
+    if existing_bot_id and existing_secret:
+        print()
+        print_success("WeCom is already configured.")
+        if not prompt_yes_no("  Reconfigure WeCom?", False):
+            return
+
+    # ── Choose setup method ──
+    print()
+    method_choices = [
+        "Scan QR code to obtain Bot ID and Secret automatically (recommended)",
+        "Enter existing Bot ID and Secret manually",
+    ]
+    method_idx = prompt_choice("  How would you like to set up WeCom?", method_choices, 0)
+
+    bot_id = None
+    secret = None
+
+    if method_idx == 0:
+        # ── QR scan flow ──
+        try:
+            from gateway.platforms.wecom import qr_scan_for_bot_info
+        except Exception as exc:
+            print_error(f"  WeCom QR scan import failed: {exc}")
+            qr_scan_for_bot_info = None
+
+        if qr_scan_for_bot_info is not None:
+            try:
+                credentials = qr_scan_for_bot_info()
+            except KeyboardInterrupt:
+                print()
+                print_warning("  WeCom setup cancelled.")
+                return
+            except Exception as exc:
+                print_warning(f"  QR scan failed: {exc}")
+                credentials = None
+            if credentials:
+                bot_id = credentials.get("bot_id", "")
+                secret = credentials.get("secret", "")
+                print_success("  ✔ QR scan successful! Bot ID and Secret obtained.")
+
+        if not bot_id or not secret:
+            print_info("  QR scan did not complete. Continuing with manual input.")
+            bot_id = None
+            secret = None
+
+    # ── Manual credential input ──
+    if not bot_id or not secret:
+        print()
+        print_info("  1. Go to WeCom Application → Workspace → Smart Robot -> Create smart robots")
+        print_info("  2. Select API Mode")
+        print_info("  3. Copy the Bot ID and Secret from the bot's credentials info")
+        print_info("  4. The bot connects via WebSocket — no public endpoint needed")
+        print()
+        bot_id = prompt("  Bot ID", password=False)
+        if not bot_id:
+            print_warning("  Skipped — WeCom won't work without a Bot ID.")
+            return
+        secret = prompt("  Secret", password=True)
+        if not secret:
+            print_warning("  Skipped — WeCom won't work without a Secret.")
+            return
+
+    # ── Save core credentials ──
+    save_env_value("WECOM_BOT_ID", bot_id)
+    save_env_value("WECOM_SECRET", secret)
+
+    # ── Allowed users (deny-by-default security) ──
+    print()
+    print_info("  The gateway DENIES all users by default for security.")
+    print_info("  Enter user IDs to create an allowlist, or leave empty.")
+    allowed = prompt("  Allowed user IDs (comma-separated, or empty)", password=False)
+    if allowed:
+        cleaned = allowed.replace(" ", "")
+        save_env_value("WECOM_ALLOWED_USERS", cleaned)
+        print_success("  Saved — only these users can interact with the bot.")
+    else:
+        print()
+        access_choices = [
+            "Enable open access (anyone can message the bot)",
+            "Use DM pairing (unknown users request access, you approve with 'hermes pairing approve')",
+            "Disable direct messages",
+            "Skip for now (bot will deny all users until configured)",
+        ]
+        access_idx = prompt_choice("  How should unauthorized users be handled?", access_choices, 1)
+        if access_idx == 0:
+            save_env_value("WECOM_DM_POLICY", "open")
+            save_env_value("GATEWAY_ALLOW_ALL_USERS", "true")
+            print_warning("  Open access enabled — anyone can use your bot!")
+        elif access_idx == 1:
+            save_env_value("WECOM_DM_POLICY", "pairing")
+            print_success("  DM pairing mode — users will receive a code to request access.")
+            print_info("  Approve with: hermes pairing approve <platform> <code>")
+        elif access_idx == 2:
+            save_env_value("WECOM_DM_POLICY", "disabled")
+            print_warning("  Direct messages disabled.")
+        else:
+            print_info("  Skipped — configure later with 'hermes gateway setup'")
+
+    # ── Home channel (optional) ──
+    print()
+    print_info("  Chat ID for scheduled results and notifications.")
+    home = prompt("  Home chat ID (optional, for cron/notifications)", password=False)
+    if home:
+        save_env_value("WECOM_HOME_CHANNEL", home)
+        print_success(f"  Home channel set to {home}")
+
+    print()
+    print_success("💬 WeCom configured!")
 
 
 def _is_service_installed() -> bool:
@@ -3025,7 +3132,8 @@ def _setup_qqbot():
     if method_idx == 0:
         # ── QR scan-to-configure ──
         try:
-            credentials = _qqbot_qr_flow()
+            from gateway.platforms.qqbot import qr_register
+            credentials = qr_register()
         except KeyboardInterrupt:
             print()
             print_warning("  QQ Bot setup cancelled.")
@@ -3107,106 +3215,6 @@ def _setup_qqbot():
     print_info(f"  App ID: {credentials['app_id']}")
 
 
-def _qqbot_render_qr(url: str) -> bool:
-    """Try to render a QR code in the terminal. Returns True if successful."""
-    try:
-        import qrcode as _qr
-        qr = _qr.QRCode(border=1,error_correction=_qr.constants.ERROR_CORRECT_L)
-        qr.add_data(url)
-        qr.make(fit=True)
-        qr.print_ascii(invert=True)
-        return True
-    except Exception:
-        return False
-
-
-def _qqbot_qr_flow():
-    """Run the QR-code scan-to-configure flow.
-
-    Returns a dict with app_id, client_secret, user_openid on success,
-    or None on failure/cancel.
-    """
-    try:
-        from gateway.platforms.qqbot import (
-            create_bind_task, poll_bind_result, build_connect_url,
-            decrypt_secret, BindStatus,
-        )
-        from gateway.platforms.qqbot.constants import ONBOARD_POLL_INTERVAL
-    except Exception as exc:
-        print_error(f"  QQBot onboard import failed: {exc}")
-        return None
-
-    import asyncio
-    import time
-
-    MAX_REFRESHES = 3
-    refresh_count = 0
-
-    while refresh_count <= MAX_REFRESHES:
-        loop = asyncio.new_event_loop()
-
-        # ── Create bind task ──
-        try:
-            task_id, aes_key = loop.run_until_complete(create_bind_task())
-        except Exception as e:
-            print_warning(f"  Failed to create bind task: {e}")
-            loop.close()
-            return None
-
-        url = build_connect_url(task_id)
-
-        # ── Display QR code + URL ──
-        print()
-        if _qqbot_render_qr(url):
-            print(f"  Scan the QR code above, or open this URL directly:\n  {url}")
-        else:
-            print(f"  Open this URL in QQ on your phone:\n  {url}")
-            print_info("  Tip: pip install qrcode  to show a scannable QR code here")
-
-        # ── Poll loop (silent — keep QR visible at bottom) ──
-        try:
-            while True:
-                try:
-                    status, app_id, encrypted_secret, user_openid = loop.run_until_complete(
-                        poll_bind_result(task_id)
-                    )
-                except Exception:
-                    time.sleep(ONBOARD_POLL_INTERVAL)
-                    continue
-
-                if status == BindStatus.COMPLETED:
-                    client_secret = decrypt_secret(encrypted_secret, aes_key)
-                    print()
-                    print_success(f"  QR scan complete! (App ID: {app_id})")
-                    if user_openid:
-                        print_info(f"  Scanner's OpenID: {user_openid}")
-                    return {
-                        "app_id": app_id,
-                        "client_secret": client_secret,
-                        "user_openid": user_openid,
-                    }
-
-                if status == BindStatus.EXPIRED:
-                    refresh_count += 1
-                    if refresh_count > MAX_REFRESHES:
-                        print()
-                        print_warning(f"  QR code expired {MAX_REFRESHES} times — giving up.")
-                        return None
-                    print()
-                    print_warning(f"  QR code expired, refreshing... ({refresh_count}/{MAX_REFRESHES})")
-                    loop.close()
-                    break  # outer while creates a new task
-
-                time.sleep(ONBOARD_POLL_INTERVAL)
-        except KeyboardInterrupt:
-            loop.close()
-            raise
-        finally:
-            loop.close()
-
-    return None
-
-
 def _setup_signal():
     """Interactive setup for Signal messenger."""
     import shutil
@@ -3394,6 +3402,8 @@ def gateway_setup():
             _setup_feishu()
         elif platform["key"] == "qqbot":
             _setup_qqbot()
+        elif platform["key"] == "wecom":
+            _setup_wecom()
         else:
             _setup_standard_platform(platform)
 
diff --git a/hermes_cli/hooks.py b/hermes_cli/hooks.py
new file mode 100644
index 0000000000..97d9e36b30
--- /dev/null
+++ b/hermes_cli/hooks.py
@@ -0,0 +1,385 @@
+"""hermes hooks — inspect and manage shell-script hooks.
+
+Usage::
+
+    hermes hooks list
+    hermes hooks test <event> [--for-tool X] [--payload-file F]
+    hermes hooks revoke <command>
+    hermes hooks doctor
+
+Consent records live under ``~/.hermes/shell-hooks-allowlist.json`` and
+hook definitions come from the ``hooks:`` block in ``~/.hermes/config.yaml``
+(the same config read by the CLI / gateway at startup).
+
+This module is a thin CLI shell over :mod:`agent.shell_hooks`; every
+shared concern (payload serialisation, response parsing, allowlist
+format) lives there.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+
+def hooks_command(args) -> None:
+    """Entry point for ``hermes hooks`` — dispatches to the requested action."""
+    sub = getattr(args, "hooks_action", None)
+
+    if not sub:
+        print("Usage: hermes hooks {list|test|revoke|doctor}")
+        print("Run 'hermes hooks --help' for details.")
+        return
+
+    if sub in ("list", "ls"):
+        _cmd_list(args)
+    elif sub == "test":
+        _cmd_test(args)
+    elif sub in ("revoke", "remove", "rm"):
+        _cmd_revoke(args)
+    elif sub == "doctor":
+        _cmd_doctor(args)
+    else:
+        print(f"Unknown hooks subcommand: {sub}")
+
+
+# ---------------------------------------------------------------------------
+# list
+# ---------------------------------------------------------------------------
+
+def _cmd_list(_args) -> None:
+    from hermes_cli.config import load_config
+    from agent import shell_hooks
+
+    specs = shell_hooks.iter_configured_hooks(load_config())
+
+    if not specs:
+        print("No shell hooks configured in ~/.hermes/config.yaml.")
+        print("See `hermes hooks --help` or")
+        print("    website/docs/user-guide/features/hooks.md")
+        print("for the config schema and worked examples.")
+        return
+
+    by_event: Dict[str, List] = {}
+    for spec in specs:
+        by_event.setdefault(spec.event, []).append(spec)
+
+    allowlist = shell_hooks.load_allowlist()
+    approved = {
+        (e.get("event"), e.get("command"))
+        for e in allowlist.get("approvals", [])
+        if isinstance(e, dict)
+    }
+
+    print(f"Configured shell hooks ({len(specs)} total):\n")
+
+    for event in sorted(by_event.keys()):
+        print(f"  [{event}]")
+        for spec in by_event[event]:
+            is_approved = (spec.event, spec.command) in approved
+            status = "✓ allowed" if is_approved else "✗ not allowlisted"
+            matcher_part = f" matcher={spec.matcher!r}" if spec.matcher else ""
+            print(
+                f"    - {spec.command}{matcher_part} "
+                f"(timeout={spec.timeout}s, {status})"
+            )
+
+            if is_approved:
+                entry = shell_hooks.allowlist_entry_for(spec.event, spec.command)
+                if entry and entry.get("approved_at"):
+                    print(f"      approved_at: {entry['approved_at']}")
+                    mtime_now = shell_hooks.script_mtime_iso(spec.command)
+                    mtime_at = entry.get("script_mtime_at_approval")
+                    if mtime_now and mtime_at and mtime_now > mtime_at:
+                        print(
+                            f"      ⚠ script modified since approval "
+                            f"(was {mtime_at}, now {mtime_now}) — "
+                            f"run `hermes hooks doctor` to re-validate"
+                        )
+        print()
+
+
+# ---------------------------------------------------------------------------
+# test
+# ---------------------------------------------------------------------------
+
+# Synthetic kwargs matching the real invoke_hook() call sites — these are
+# passed verbatim to agent.shell_hooks.run_once(), which routes them through
+# the same _serialize_payload() that production firings use.  That way the
+# stdin a script sees under `hermes hooks test` and `hermes hooks doctor`
+# is identical in shape to what it will see at runtime.
+_DEFAULT_PAYLOADS = {
+    "pre_tool_call": {
+        "tool_name": "terminal",
+        "args": {"command": "echo hello"},
+        "session_id": "test-session",
+        "task_id": "test-task",
+        "tool_call_id": "test-call",
+    },
+    "post_tool_call": {
+        "tool_name": "terminal",
+        "args": {"command": "echo hello"},
+        "session_id": "test-session",
+        "task_id": "test-task",
+        "tool_call_id": "test-call",
+        "result": '{"output": "hello"}',
+    },
+    "pre_llm_call": {
+        "session_id": "test-session",
+        "user_message": "What is the weather?",
+        "conversation_history": [],
+        "is_first_turn": True,
+        "model": "gpt-4",
+        "platform": "cli",
+    },
+    "post_llm_call": {
+        "session_id": "test-session",
+        "model": "gpt-4",
+        "platform": "cli",
+    },
+    "on_session_start": {"session_id": "test-session"},
+    "on_session_end": {"session_id": "test-session"},
+    "on_session_finalize": {"session_id": "test-session"},
+    "on_session_reset": {"session_id": "test-session"},
+    "pre_api_request": {
+        "session_id": "test-session",
+        "task_id": "test-task",
+        "platform": "cli",
+        "model": "claude-sonnet-4-6",
+        "provider": "anthropic",
+        "base_url": "https://api.anthropic.com",
+        "api_mode": "anthropic_messages",
+        "api_call_count": 1,
+        "message_count": 4,
+        "tool_count": 12,
+        "approx_input_tokens": 2048,
+        "request_char_count": 8192,
+        "max_tokens": 4096,
+    },
+    "post_api_request": {
+        "session_id": "test-session",
+        "task_id": "test-task",
+        "platform": "cli",
+        "model": "claude-sonnet-4-6",
+        "provider": "anthropic",
+        "base_url": "https://api.anthropic.com",
+        "api_mode": "anthropic_messages",
+        "api_call_count": 1,
+        "api_duration": 1.234,
+        "finish_reason": "stop",
+        "message_count": 4,
+        "response_model": "claude-sonnet-4-6",
+        "usage": {"input_tokens": 2048, "output_tokens": 512},
+        "assistant_content_chars": 1200,
+        "assistant_tool_call_count": 0,
+    },
+    "subagent_stop": {
+        "parent_session_id": "parent-sess",
+        "child_role": None,
+        "child_summary": "Synthetic summary for hooks test",
+        "child_status": "completed",
+        "duration_ms": 1234,
+    },
+}
+
+
+def _cmd_test(args) -> None:
+    from hermes_cli.config import load_config
+    from hermes_cli.plugins import VALID_HOOKS
+    from agent import shell_hooks
+
+    event = args.event
+    if event not in VALID_HOOKS:
+        print(f"Unknown event: {event!r}")
+        print(f"Valid events: {', '.join(sorted(VALID_HOOKS))}")
+        return
+
+    # Synthetic kwargs in the same shape invoke_hook() would pass.  Merged
+    # with --for-tool (overrides tool_name) and --payload-file (extra kwargs).
+    payload = dict(_DEFAULT_PAYLOADS.get(event, {"session_id": "test-session"}))
+
+    if getattr(args, "for_tool", None):
+        payload["tool_name"] = args.for_tool
+
+    if getattr(args, "payload_file", None):
+        try:
+            custom = json.loads(Path(args.payload_file).read_text())
+            if isinstance(custom, dict):
+                payload.update(custom)
+            else:
+                print(f"Warning: {args.payload_file} is not a JSON object; ignoring")
+        except Exception as exc:
+            print(f"Error reading payload file: {exc}")
+            return
+
+    specs = shell_hooks.iter_configured_hooks(load_config())
+    specs = [s for s in specs if s.event == event]
+
+    if getattr(args, "for_tool", None):
+        specs = [
+            s for s in specs
+            if s.event not in ("pre_tool_call", "post_tool_call")
+            or s.matches_tool(args.for_tool)
+        ]
+
+    if not specs:
+        print(f"No shell hooks configured for event: {event}")
+        if getattr(args, "for_tool", None):
+            print(f"(with matcher filter --for-tool={args.for_tool})")
+        return
+
+    print(f"Firing {len(specs)} hook(s) for event '{event}':\n")
+    for spec in specs:
+        print(f"  → {spec.command}")
+        result = shell_hooks.run_once(spec, payload)
+        _print_run_result(result)
+        print()
+
+
+def _print_run_result(result: Dict[str, Any]) -> None:
+    if result.get("error"):
+        print(f"      ✗ error: {result['error']}")
+        return
+    if result.get("timed_out"):
+        print(f"      ✗ timed out after {result['elapsed_seconds']}s")
+        return
+
+    rc = result.get("returncode")
+    elapsed = result.get("elapsed_seconds", 0)
+    print(f"      exit={rc}  elapsed={elapsed}s")
+
+    stdout = (result.get("stdout") or "").strip()
+    stderr = (result.get("stderr") or "").strip()
+    if stdout:
+        print(f"      stdout: {_truncate(stdout, 400)}")
+    if stderr:
+        print(f"      stderr: {_truncate(stderr, 400)}")
+
+    parsed = result.get("parsed")
+    if parsed:
+        print(f"      parsed (Hermes wire shape): {json.dumps(parsed)}")
+    else:
+        print("      parsed: <none — hook contributed nothing to the dispatcher>")
+
+
+def _truncate(s: str, n: int) -> str:
+    return s if len(s) <= n else s[: n - 3] + "..."
+
+
+# ---------------------------------------------------------------------------
+# revoke
+# ---------------------------------------------------------------------------
+
+def _cmd_revoke(args) -> None:
+    from agent import shell_hooks
+
+    removed = shell_hooks.revoke(args.command)
+    if removed == 0:
+        print(f"No allowlist entry found for command: {args.command}")
+        return
+    print(f"Removed {removed} allowlist entry/entries for: {args.command}")
+    print(
+        "Note: currently running CLI / gateway processes keep their "
+        "already-registered callbacks until they restart."
+    )
+
+
+# ---------------------------------------------------------------------------
+# doctor
+# ---------------------------------------------------------------------------
+
+def _cmd_doctor(_args) -> None:
+    from hermes_cli.config import load_config
+    from agent import shell_hooks
+
+    specs = shell_hooks.iter_configured_hooks(load_config())
+
+    if not specs:
+        print("No shell hooks configured — nothing to check.")
+        return
+
+    print(f"Checking {len(specs)} configured shell hook(s)...\n")
+
+    problems = 0
+    for spec in specs:
+        print(f"  [{spec.event}] {spec.command}")
+        problems += _doctor_one(spec, shell_hooks)
+        print()
+
+    if problems:
+        print(f"{problems} issue(s) found.  Fix before relying on these hooks.")
+    else:
+        print("All shell hooks look healthy.")
+
+
+def _doctor_one(spec, shell_hooks) -> int:
+    problems = 0
+
+    # 1. Script exists and is executable
+    if shell_hooks.script_is_executable(spec.command):
+        print("      ✓ script exists and is executable")
+    else:
+        problems += 1
+        print("      ✗ script missing or not executable "
+              "(chmod +x the file, or fix the path)")
+
+    # 2. Allowlist status
+    entry = shell_hooks.allowlist_entry_for(spec.event, spec.command)
+    if entry:
+        print(f"      ✓ allowlisted (approved {entry.get('approved_at', '?')})")
+    else:
+        problems += 1
+        print("      ✗ not allowlisted — hook will NOT fire at runtime "
+              "(run with --accept-hooks once, or confirm at the TTY prompt)")
+
+    # 3. Mtime drift
+    if entry and entry.get("script_mtime_at_approval"):
+        mtime_now = shell_hooks.script_mtime_iso(spec.command)
+        mtime_at = entry["script_mtime_at_approval"]
+        if mtime_now and mtime_at and mtime_now > mtime_at:
+            problems += 1
+            print(f"      ⚠ script modified since approval "
+                  f"(was {mtime_at}, now {mtime_now}) — review changes, "
+                  f"then `hermes hooks revoke` + re-approve to refresh")
+        elif mtime_now and mtime_at and mtime_now == mtime_at:
+            print("      ✓ script unchanged since approval")
+
+    # 4. Produces valid JSON for a synthetic payload — only when the entry
+    # is already allowlisted.  Otherwise `hermes hooks doctor` would execute
+    # every script listed in a freshly-pulled config before the user has
+    # reviewed them, which directly contradicts the documented workflow
+    # ("spot newly-added hooks *before they register*").
+    if not entry:
+        print("      ℹ skipped JSON smoke test — not allowlisted yet. "
+              "Approve the hook first (via TTY prompt or --accept-hooks), "
+              "then re-run `hermes hooks doctor`.")
+    elif shell_hooks.script_is_executable(spec.command):
+        payload = _DEFAULT_PAYLOADS.get(spec.event, {"extra": {}})
+        result = shell_hooks.run_once(spec, payload)
+        if result.get("timed_out"):
+            problems += 1
+            print(f"      ✗ timed out after {result['elapsed_seconds']}s "
+                  f"on synthetic payload (timeout={spec.timeout}s)")
+        elif result.get("error"):
+            problems += 1
+            print(f"      ✗ execution error: {result['error']}")
+        else:
+            rc = result.get("returncode")
+            elapsed = result.get("elapsed_seconds", 0)
+            stdout = (result.get("stdout") or "").strip()
+            if stdout:
+                try:
+                    json.loads(stdout)
+                    print(f"      ✓ produced valid JSON on synthetic payload "
+                          f"(exit={rc}, {elapsed}s)")
+                except json.JSONDecodeError:
+                    problems += 1
+                    print(f"      ✗ stdout was not valid JSON (exit={rc}, "
+                          f"{elapsed}s): {_truncate(stdout, 120)}")
+            else:
+                print(f"      ✓ ran clean with empty stdout "
+                      f"(exit={rc}, {elapsed}s) — hook is observer-only")
+
+    return problems
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index 7e0220d918..404e59089a 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -51,6 +51,19 @@ import sys
 from pathlib import Path
 from typing import Optional
 
+def _add_accept_hooks_flag(parser) -> None:
+    """Attach the ``--accept-hooks`` flag.  Shared across every agent
+    subparser so the flag works regardless of CLI position."""
+    parser.add_argument(
+        "--accept-hooks",
+        action="store_true",
+        default=argparse.SUPPRESS,
+        help=(
+            "Auto-approve unseen shell hooks without a TTY prompt "
+            "(equivalent to HERMES_ACCEPT_HOOKS=1 / hooks_auto_accept: true)."
+        ),
+    )
+
 
 def _require_tty(command_name: str) -> None:
     """Exit with a clear error if stdin is not a terminal.
@@ -180,7 +193,7 @@ import time as _time
 from datetime import datetime
 
 from hermes_cli import __version__, __release_date__
-from hermes_constants import OPENROUTER_BASE_URL
+from hermes_constants import AI_GATEWAY_BASE_URL, OPENROUTER_BASE_URL
 
 logger = logging.getLogger(__name__)
 
@@ -605,7 +618,6 @@ def _exec_in_container(container_info: dict, cli_args: list):
         container_info: dict with backend, container_name, exec_user, hermes_bin
         cli_args: the original CLI arguments (everything after 'hermes')
     """
-    import shutil
 
     backend = container_info["backend"]
     container_name = container_info["container_name"]
@@ -693,6 +705,10 @@ def _resolve_session_by_name_or_id(name_or_id: str) -> Optional[str]:
     - If it looks like a session ID (contains underscore + hex), try direct lookup first.
     - Otherwise, treat it as a title and use resolve_session_by_title (auto-latest).
     - Falls back to the other method if the first doesn't match.
+    - If the resolved session is a compression root, follow the chain forward
+      to the latest continuation. Users who remember the old root ID (e.g.
+      from an exit summary printed before the bug fix, or from notes) get
+      resumed at the live tip instead of a stale parent with no messages.
     """
     try:
         from hermes_state import SessionDB
@@ -701,14 +717,23 @@ def _resolve_session_by_name_or_id(name_or_id: str) -> Optional[str]:
 
         # Try as exact session ID first
         session = db.get_session(name_or_id)
+        resolved_id: Optional[str] = None
         if session:
-            db.close()
-            return session["id"]
+            resolved_id = session["id"]
+        else:
+            # Try as title (with auto-latest for lineage)
+            resolved_id = db.resolve_session_by_title(name_or_id)
+
+        if resolved_id:
+            # Project forward through compression chain so resumes land on
+            # the live tip instead of a dead compressed parent.
+            try:
+                resolved_id = db.get_compression_tip(resolved_id) or resolved_id
+            except Exception:
+                pass
 
-        # Try as title (with auto-latest for lineage)
-        session_id = db.resolve_session_by_title(name_or_id)
         db.close()
-        return session_id
+        return resolved_id
     except Exception:
         pass
     return None
@@ -990,6 +1015,17 @@ def _launch_tui(resume_session_id: Optional[str] = None, tui_dev: bool = False):
     )
     env.setdefault("HERMES_PYTHON", sys.executable)
     env.setdefault("HERMES_CWD", os.getcwd())
+    # Guarantee an 8GB V8 heap + exposed GC for the TUI. Default node cap is
+    # ~1.5–4GB depending on version and can fatal-OOM on long sessions with
+    # large transcripts / reasoning blobs. Token-level merge: respect any
+    # user-supplied --max-old-space-size (they may have set it higher) and
+    # avoid duplicating --expose-gc.
+    _tokens = env.get("NODE_OPTIONS", "").split()
+    if not any(t.startswith("--max-old-space-size=") for t in _tokens):
+        _tokens.append("--max-old-space-size=8192")
+    if "--expose-gc" not in _tokens:
+        _tokens.append("--expose-gc")
+    env["NODE_OPTIONS"] = " ".join(_tokens)
     if resume_session_id:
         env["HERMES_TUI_RESUME"] = resume_session_id
 
@@ -1144,8 +1180,6 @@ def cmd_gateway(args):
 def cmd_whatsapp(args):
     """Set up WhatsApp: choose mode, configure, install bridge, pair via QR."""
     _require_tty("whatsapp")
-    import subprocess
-    from pathlib import Path
     from hermes_cli.config import get_env_value, save_env_value
 
     print()
@@ -1254,16 +1288,27 @@ def cmd_whatsapp(args):
         return
 
     if not (bridge_dir / "node_modules").exists():
-        print("\n→ Installing WhatsApp bridge dependencies...")
-        result = subprocess.run(
-            ["npm", "install"],
-            cwd=str(bridge_dir),
-            capture_output=True,
-            text=True,
-            timeout=120,
-        )
+        print("\n→ Installing WhatsApp bridge dependencies (this can take a few minutes)...")
+        npm = shutil.which("npm")
+        if not npm:
+            print("  ✗ npm not found on PATH — install Node.js first")
+            return
+        try:
+            result = subprocess.run(
+                [npm, "install", "--no-fund", "--no-audit", "--progress=false"],
+                cwd=str(bridge_dir),
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.PIPE,
+                text=True,
+            )
+        except KeyboardInterrupt:
+            print("\n  ✗ Install cancelled")
+            return
         if result.returncode != 0:
-            print(f"  ✗ npm install failed: {result.stderr}")
+            err = (result.stderr or "").strip()
+            preview = "\n".join(err.splitlines()[-30:]) if err else "(no output)"
+            print("  ✗ npm install failed:")
+            print(preview)
             return
         print("  ✓ Dependencies installed")
     else:
@@ -1282,8 +1327,6 @@ def cmd_whatsapp(args):
         except (EOFError, KeyboardInterrupt):
             response = "n"
         if response.lower() in ("y", "yes"):
-            import shutil
-
             shutil.rmtree(session_dir, ignore_errors=True)
             session_dir.mkdir(parents=True, exist_ok=True)
             print("  ✓ Session cleared")
@@ -1379,8 +1422,6 @@ def select_provider_and_model(args=None):
 
     # Read effective provider the same way the CLI does at startup:
     # config.yaml model.provider > env var > auto-detect
-    import os
-
     config_provider = None
     model_cfg = config.get("model")
     if isinstance(model_cfg, dict):
@@ -1491,6 +1532,8 @@ def select_provider_and_model(args=None):
     # Step 2: Provider-specific setup + model selection
     if selected_provider == "openrouter":
         _model_flow_openrouter(config, current_model)
+    elif selected_provider == "ai-gateway":
+        _model_flow_ai_gateway(config, current_model)
     elif selected_provider == "nous":
         _model_flow_nous(config, current_model, args=args)
     elif selected_provider == "openai-codex":
@@ -1523,6 +1566,8 @@ def select_provider_and_model(args=None):
         _model_flow_anthropic(config, current_model)
     elif selected_provider == "kimi-coding":
         _model_flow_kimi(config, current_model)
+    elif selected_provider == "stepfun":
+        _model_flow_stepfun(config, current_model)
     elif selected_provider == "bedrock":
         _model_flow_bedrock(config, current_model)
     elif selected_provider in (
@@ -1536,7 +1581,6 @@ def select_provider_and_model(args=None):
         "kilocode",
         "opencode-zen",
         "opencode-go",
-        "ai-gateway",
         "alibaba",
         "huggingface",
         "xiaomi",
@@ -2008,6 +2052,63 @@ def _model_flow_openrouter(config, current_model=""):
         print("No change.")
 
 
+def _model_flow_ai_gateway(config, current_model=""):
+    """Vercel AI Gateway provider: ensure API key, then pick model with pricing."""
+    from hermes_cli.auth import (
+        _prompt_model_selection,
+        _save_model_choice,
+        deactivate_provider,
+    )
+    from hermes_cli.config import get_env_value, save_env_value
+
+    api_key = get_env_value("AI_GATEWAY_API_KEY")
+    if not api_key:
+        print("No Vercel AI Gateway API key configured.")
+        print("Create API key here: https://vercel.com/d?to=%2F%5Bteam%5D%2F%7E%2Fai-gateway&title=AI+Gateway")
+        print("Add a payment method to get $5 in free credits.")
+        print()
+        try:
+            import getpass
+
+            key = getpass.getpass("AI Gateway API key (or Enter to cancel): ").strip()
+        except (KeyboardInterrupt, EOFError):
+            print()
+            return
+        if not key:
+            print("Cancelled.")
+            return
+        save_env_value("AI_GATEWAY_API_KEY", key)
+        print("API key saved.")
+        print()
+
+    from hermes_cli.models import ai_gateway_model_ids, get_pricing_for_provider
+
+    models_list = ai_gateway_model_ids(force_refresh=True)
+    pricing = get_pricing_for_provider("ai-gateway", force_refresh=True)
+
+    selected = _prompt_model_selection(
+        models_list, current_model=current_model, pricing=pricing
+    )
+    if selected:
+        _save_model_choice(selected)
+
+        from hermes_cli.config import load_config, save_config
+
+        cfg = load_config()
+        model = cfg.get("model")
+        if not isinstance(model, dict):
+            model = {"default": model} if model else {}
+            cfg["model"] = model
+        model["provider"] = "ai-gateway"
+        model["base_url"] = AI_GATEWAY_BASE_URL
+        model["api_mode"] = "chat_completions"
+        save_config(cfg)
+        deactivate_provider()
+        print(f"Default model set to: {selected} (via Vercel AI Gateway)")
+    else:
+        print("No change.")
+
+
 def _model_flow_nous(config, current_model="", args=None):
     """Nous Portal provider: ensure logged in, then pick model."""
     from hermes_cli.auth import (
@@ -2028,7 +2129,6 @@ def _model_flow_nous(config, current_model="", args=None):
         save_env_value,
     )
     from hermes_cli.nous_subscription import prompt_enable_tool_gateway
-    import argparse
 
     state = get_provider_auth_state("nous")
     if not state or not state.get("access_token"):
@@ -2067,7 +2167,6 @@ def _model_flow_nous(config, current_model="", args=None):
     from hermes_cli.models import (
         _PROVIDER_MODELS,
         get_pricing_for_provider,
-        filter_nous_free_models,
         check_nous_free_tier,
         partition_nous_models_by_tier,
     )
@@ -2110,10 +2209,8 @@ def _model_flow_nous(config, current_model="", args=None):
     # Check if user is on free tier
     free_tier = check_nous_free_tier()
 
-    # For both tiers: apply the allowlist filter first (removes non-allowlisted
-    # free models and allowlist models that aren't actually free).
-    # Then for free users: partition remaining models into selectable/unavailable.
-    model_ids = filter_nous_free_models(model_ids, pricing)
+    # For free users: partition models into selectable/unavailable based on
+    # whether they are free per the Portal-reported pricing.
     unavailable_models: list[str] = []
     if free_tier:
         model_ids, unavailable_models = partition_nous_models_by_tier(
@@ -2196,7 +2293,6 @@ def _model_flow_openai_codex(config, current_model=""):
         DEFAULT_CODEX_BASE_URL,
     )
     from hermes_cli.codex_models import get_codex_model_ids
-    import argparse
 
     status = get_codex_auth_status()
     if not status.get("logged_in"):
@@ -2351,7 +2447,7 @@ def _model_flow_google_gemini_cli(_config, current_model=""):
         return
 
     models = list(_PROVIDER_MODELS.get("google-gemini-cli") or [])
-    default = current_model or (models[0] if models else "gemini-2.5-flash")
+    default = current_model or (models[0] if models else "gemini-3-flash-preview")
     selected = _prompt_model_selection(models, current_model=default)
     if selected:
         _save_model_choice(selected)
@@ -3327,8 +3423,9 @@ def _model_flow_kimi(config, current_model=""):
 
     # Step 3: Model selection — show appropriate models for the endpoint
     if is_coding_plan:
-        # Coding Plan models (kimi-k2.5 first)
+        # Coding Plan models (kimi-k2.6 first)
         model_list = [
+            "kimi-k2.6",
             "kimi-k2.5",
             "kimi-for-coding",
             "kimi-k2-thinking",
@@ -3367,6 +3464,140 @@ def _model_flow_kimi(config, current_model=""):
         print("No change.")
 
 
+def _infer_stepfun_region(base_url: str) -> str:
+    """Infer the current StepFun region from the configured endpoint."""
+    normalized = (base_url or "").strip().lower()
+    if "api.stepfun.com" in normalized:
+        return "china"
+    return "international"
+
+
+def _stepfun_base_url_for_region(region: str) -> str:
+    from hermes_cli.auth import (
+        STEPFUN_STEP_PLAN_CN_BASE_URL,
+        STEPFUN_STEP_PLAN_INTL_BASE_URL,
+    )
+
+    return (
+        STEPFUN_STEP_PLAN_CN_BASE_URL
+        if region == "china"
+        else STEPFUN_STEP_PLAN_INTL_BASE_URL
+    )
+
+
+def _model_flow_stepfun(config, current_model=""):
+    """StepFun Step Plan flow with region-specific endpoints."""
+    from hermes_cli.auth import (
+        PROVIDER_REGISTRY,
+        _prompt_model_selection,
+        _save_model_choice,
+        deactivate_provider,
+    )
+    from hermes_cli.config import get_env_value, save_env_value, load_config, save_config
+    from hermes_cli.models import fetch_api_models
+
+    provider_id = "stepfun"
+    pconfig = PROVIDER_REGISTRY[provider_id]
+    key_env = pconfig.api_key_env_vars[0] if pconfig.api_key_env_vars else ""
+    base_url_env = pconfig.base_url_env_var or ""
+
+    existing_key = ""
+    for ev in pconfig.api_key_env_vars:
+        existing_key = get_env_value(ev) or os.getenv(ev, "")
+        if existing_key:
+            break
+
+    if not existing_key:
+        print(f"No {pconfig.name} API key configured.")
+        if key_env:
+            try:
+                import getpass
+                new_key = getpass.getpass(f"{key_env} (or Enter to cancel): ").strip()
+            except (KeyboardInterrupt, EOFError):
+                print()
+                return
+            if not new_key:
+                print("Cancelled.")
+                return
+            save_env_value(key_env, new_key)
+            existing_key = new_key
+            print("API key saved.")
+            print()
+    else:
+        print(f"  {pconfig.name} API key: {existing_key[:8]}... ✓")
+        print()
+
+    current_base = ""
+    if base_url_env:
+        current_base = get_env_value(base_url_env) or os.getenv(base_url_env, "")
+    if not current_base:
+        model_cfg = config.get("model")
+        if isinstance(model_cfg, dict):
+            current_base = str(model_cfg.get("base_url") or "").strip()
+    current_region = _infer_stepfun_region(current_base or pconfig.inference_base_url)
+
+    region_choices = [
+        ("international", f"International ({_stepfun_base_url_for_region('international')})"),
+        ("china", f"China ({_stepfun_base_url_for_region('china')})"),
+    ]
+    ordered_regions = []
+    for region_key, label in region_choices:
+        if region_key == current_region:
+            ordered_regions.insert(0, (region_key, f"{label}  ← currently active"))
+        else:
+            ordered_regions.append((region_key, label))
+    ordered_regions.append(("cancel", "Cancel"))
+
+    region_idx = _prompt_provider_choice([label for _, label in ordered_regions])
+    if region_idx is None or ordered_regions[region_idx][0] == "cancel":
+        print("No change.")
+        return
+
+    selected_region = ordered_regions[region_idx][0]
+    effective_base = _stepfun_base_url_for_region(selected_region)
+    if base_url_env:
+        save_env_value(base_url_env, effective_base)
+
+    live_models = fetch_api_models(existing_key, effective_base)
+    if live_models:
+        model_list = live_models
+        print(f"  Found {len(model_list)} model(s) from {pconfig.name} API")
+    else:
+        model_list = _PROVIDER_MODELS.get(provider_id, [])
+        if model_list:
+            print(
+                f"  Could not auto-detect models from {pconfig.name} API — "
+                "showing Step Plan fallback catalog."
+            )
+
+    if model_list:
+        selected = _prompt_model_selection(model_list, current_model=current_model)
+    else:
+        try:
+            selected = input("Model name: ").strip()
+        except (KeyboardInterrupt, EOFError):
+            selected = None
+
+    if selected:
+        _save_model_choice(selected)
+
+        cfg = load_config()
+        model = cfg.get("model")
+        if not isinstance(model, dict):
+            model = {"default": model} if model else {}
+            cfg["model"] = model
+        model["provider"] = provider_id
+        model["base_url"] = effective_base
+        model.pop("api_mode", None)
+        save_config(cfg)
+        deactivate_provider()
+
+        config["model"] = dict(model)
+        print(f"Default model set to: {selected} (via {pconfig.name})")
+    else:
+        print("No change.")
+
+
 def _model_flow_bedrock_api_key(config, region, current_model=""):
     """Bedrock API Key mode — uses the OpenAI-compatible bedrock-mantle endpoint.
 
@@ -4067,6 +4298,12 @@ def cmd_webhook(args):
     webhook_command(args)
 
 
+def cmd_hooks(args):
+    """Shell-hook inspection and management."""
+    from hermes_cli.hooks import hooks_command
+    hooks_command(args)
+
+
 def cmd_doctor(args):
     """Check configuration and dependencies."""
     from hermes_cli.doctor import run_doctor
@@ -4176,9 +4413,7 @@ def _clear_bytecode_cache(root: Path) -> int:
         ]
         if os.path.basename(dirpath) == "__pycache__":
             try:
-                import shutil as _shutil
-
-                _shutil.rmtree(dirpath)
+                shutil.rmtree(dirpath)
                 removed += 1
             except OSError:
                 pass
@@ -4217,8 +4452,6 @@ def _gateway_prompt(prompt_text: str, default: str = "", timeout: float = 300.0)
     tmp.replace(prompt_path)
 
     # Poll for response
-    import time as _time
-
     deadline = _time.monotonic() + timeout
     while _time.monotonic() < deadline:
         if response_path.exists():
@@ -4250,7 +4483,6 @@ def _build_web_ui(web_dir: Path, *, fatal: bool = False) -> bool:
     """
     if not (web_dir / "package.json").exists():
         return True
-    import shutil
 
     npm = shutil.which("npm")
     if not npm:
@@ -4287,7 +4519,6 @@ def _update_via_zip(args):
     Used on Windows when git file I/O is broken (antivirus, NTFS filter
     drivers causing 'Invalid argument' errors on file creation).
     """
-    import shutil
     import tempfile
     import zipfile
     from urllib.request import urlretrieve
@@ -4364,7 +4595,6 @@ def _update_via_zip(args):
     # breaks on this machine, keep base deps and reinstall the remaining extras
     # individually so update does not silently strip working capabilities.
     print("→ Updating Python dependencies...")
-    import subprocess
 
     uv_bin = shutil.which("uv")
     if uv_bin:
@@ -5115,9 +5345,11 @@ def _install_hangup_protection(gateway_mode: bool = False):
     # (2) Mirror output to update.log and wrap stdio for broken-pipe
     # tolerance.  Any failure here is non-fatal; we just skip the wrap.
     try:
-        from hermes_cli.config import get_hermes_home
+        # Late-bound import so tests can monkeypatch
+        # hermes_cli.config.get_hermes_home to simulate setup failure.
+        from hermes_cli.config import get_hermes_home as _get_hermes_home
 
-        logs_dir = get_hermes_home() / "logs"
+        logs_dir = _get_hermes_home() / "logs"
         logs_dir.mkdir(parents=True, exist_ok=True)
         log_path = logs_dir / "update.log"
         log_file = open(log_path, "a", buffering=1, encoding="utf-8")
@@ -5692,8 +5924,6 @@ def _cmd_update_impl(args, gateway_mode: bool):
                                     # Verify the service actually survived the
                                     # restart.  systemctl restart returns 0 even
                                     # if the new process crashes immediately.
-                                    import time as _time
-
                                     _time.sleep(3)
                                     verify = subprocess.run(
                                         scope_cmd + ["is-active", svc_name],
@@ -6346,6 +6576,17 @@ For more help on a command:
         default=False,
         help="Run in an isolated git worktree (for parallel agents)",
     )
+    parser.add_argument(
+        "--accept-hooks",
+        action="store_true",
+        default=False,
+        help=(
+            "Auto-approve any unseen shell hooks declared in config.yaml "
+            "without a TTY prompt.  Equivalent to HERMES_ACCEPT_HOOKS=1 or "
+            "hooks_auto_accept: true in config.yaml.  Use on CI / headless "
+            "runs that can't prompt."
+        ),
+    )
     parser.add_argument(
         "--skills",
         "-s",
@@ -6425,6 +6666,7 @@ For more help on a command:
             "zai",
             "kimi-coding",
             "kimi-coding-cn",
+            "stepfun",
             "minimax",
             "minimax-cn",
             "kilocode",
@@ -6468,6 +6710,16 @@ For more help on a command:
         default=argparse.SUPPRESS,
         help="Run in an isolated git worktree (for parallel agents on the same repo)",
     )
+    chat_parser.add_argument(
+        "--accept-hooks",
+        action="store_true",
+        default=argparse.SUPPRESS,
+        help=(
+            "Auto-approve any unseen shell hooks declared in config.yaml "
+            "without a TTY prompt (see also HERMES_ACCEPT_HOOKS env var and "
+            "hooks_auto_accept: in config.yaml)."
+        ),
+    )
     chat_parser.add_argument(
         "--checkpoints",
         action="store_true",
@@ -6587,6 +6839,8 @@ For more help on a command:
         action="store_true",
         help="Replace any existing gateway instance (useful for systemd)",
     )
+    _add_accept_hooks_flag(gateway_run)
+    _add_accept_hooks_flag(gateway_parser)
 
     # gateway start
     gateway_start = gateway_subparsers.add_parser(
@@ -6951,6 +7205,7 @@ For more help on a command:
         "run", help="Run a job on the next scheduler tick"
     )
     cron_run.add_argument("job_id", help="Job ID to trigger")
+    _add_accept_hooks_flag(cron_run)
 
     cron_remove = cron_subparsers.add_parser(
         "remove", aliases=["rm", "delete"], help="Remove a scheduled job"
@@ -6961,8 +7216,9 @@ For more help on a command:
     cron_subparsers.add_parser("status", help="Check if cron scheduler is running")
 
     # cron tick (mostly for debugging)
-    cron_subparsers.add_parser("tick", help="Run due jobs once and exit")
-
+    cron_tick = cron_subparsers.add_parser("tick", help="Run due jobs once and exit")
+    _add_accept_hooks_flag(cron_tick)
+    _add_accept_hooks_flag(cron_parser)
     cron_parser.set_defaults(func=cmd_cron)
 
     # =========================================================================
@@ -7002,6 +7258,13 @@ For more help on a command:
     wh_sub.add_argument(
         "--secret", default="", help="HMAC secret (auto-generated if omitted)"
     )
+    wh_sub.add_argument(
+        "--deliver-only",
+        action="store_true",
+        help="Skip the agent — deliver the rendered prompt directly as the "
+        "message. Zero LLM cost. Requires --deliver to be a real target "
+        "(not 'log').",
+    )
 
     webhook_subparsers.add_parser(
         "list", aliases=["ls"], help="List all dynamic subscriptions"
@@ -7022,6 +7285,67 @@ For more help on a command:
 
     webhook_parser.set_defaults(func=cmd_webhook)
 
+    # =========================================================================
+    # hooks command — shell-hook inspection and management
+    # =========================================================================
+    hooks_parser = subparsers.add_parser(
+        "hooks",
+        help="Inspect and manage shell-script hooks",
+        description=(
+            "Inspect shell-script hooks declared in ~/.hermes/config.yaml, "
+            "test them against synthetic payloads, and manage the first-use "
+            "consent allowlist at ~/.hermes/shell-hooks-allowlist.json."
+        ),
+    )
+    hooks_subparsers = hooks_parser.add_subparsers(dest="hooks_action")
+
+    hooks_subparsers.add_parser(
+        "list", aliases=["ls"],
+        help="List configured hooks with matcher, timeout, and consent status",
+    )
+
+    _hk_test = hooks_subparsers.add_parser(
+        "test",
+        help="Fire every hook matching <event> against a synthetic payload",
+    )
+    _hk_test.add_argument(
+        "event",
+        help="Hook event name (e.g. pre_tool_call, pre_llm_call, subagent_stop)",
+    )
+    _hk_test.add_argument(
+        "--for-tool", dest="for_tool", default=None,
+        help=(
+            "Only fire hooks whose matcher matches this tool name "
+            "(used for pre_tool_call / post_tool_call)"
+        ),
+    )
+    _hk_test.add_argument(
+        "--payload-file", dest="payload_file", default=None,
+        help=(
+            "Path to a JSON file whose contents are merged into the "
+            "synthetic payload before execution"
+        ),
+    )
+
+    _hk_revoke = hooks_subparsers.add_parser(
+        "revoke", aliases=["remove", "rm"],
+        help="Remove a command's allowlist entries (takes effect on next restart)",
+    )
+    _hk_revoke.add_argument(
+        "command",
+        help="The exact command string to revoke (as declared in config.yaml)",
+    )
+
+    hooks_subparsers.add_parser(
+        "doctor",
+        help=(
+            "Check each configured hook: exec bit, allowlist, mtime drift, "
+            "JSON validity, and synthetic run timing"
+        ),
+    )
+
+    hooks_parser.set_defaults(func=cmd_hooks)
+
     # =========================================================================
     # doctor command
     # =========================================================================
@@ -7429,6 +7753,17 @@ Examples:
         action="store_true",
         help="Remove existing plugin and reinstall",
     )
+    _install_enable_group = plugins_install.add_mutually_exclusive_group()
+    _install_enable_group.add_argument(
+        "--enable",
+        action="store_true",
+        help="Auto-enable the plugin after install (skip confirmation prompt)",
+    )
+    _install_enable_group.add_argument(
+        "--no-enable",
+        action="store_true",
+        help="Install disabled (skip confirmation prompt); enable later with `hermes plugins enable <name>`",
+    )
 
     plugins_update = plugins_subparsers.add_parser(
         "update", help="Pull latest changes for an installed plugin"
@@ -7476,9 +7811,7 @@ Examples:
             )
             cmd_info["setup_fn"](plugin_parser)
     except Exception as _exc:
-        import logging as _log
-
-        _log.getLogger(__name__).debug("Plugin CLI discovery failed: %s", _exc)
+        logging.getLogger(__name__).debug("Plugin CLI discovery failed: %s", _exc)
 
     # =========================================================================
     # memory command
@@ -7684,6 +8017,7 @@ Examples:
         action="store_true",
         help="Enable verbose logging on stderr",
     )
+    _add_accept_hooks_flag(mcp_serve_p)
 
     mcp_add_p = mcp_sub.add_parser(
         "add", help="Add an MCP server (discovery-first install)"
@@ -7722,6 +8056,8 @@ Examples:
     )
     mcp_login_p.add_argument("name", help="Server name to re-authenticate")
 
+    _add_accept_hooks_flag(mcp_parser)
+
     def cmd_mcp(args):
         from hermes_cli.mcp_config import mcp_command
 
@@ -7860,7 +8196,6 @@ Examples:
                     return
                 line = _json.dumps(data, ensure_ascii=False) + "\n"
                 if args.output == "-":
-                    import sys
 
                     sys.stdout.write(line)
                 else:
@@ -7870,7 +8205,6 @@ Examples:
             else:
                 sessions = db.export_all(source=args.source)
                 if args.output == "-":
-                    import sys
 
                     for s in sessions:
                         sys.stdout.write(_json.dumps(s, ensure_ascii=False) + "\n")
@@ -7941,8 +8275,6 @@ Examples:
 
             # Launch hermes --resume <id> by replacing the current process
             print(f"Resuming session: {selected_id}")
-            import shutil
-
             hermes_bin = shutil.which("hermes")
             if hermes_bin:
                 os.execvp(hermes_bin, ["hermes", "--resume", selected_id])
@@ -8133,6 +8465,7 @@ Examples:
         help="Run Hermes Agent as an ACP (Agent Client Protocol) server",
         description="Start Hermes Agent in ACP mode for editor integration (VS Code, Zed, JetBrains)",
     )
+    _add_accept_hooks_flag(acp_parser)
 
     def cmd_acp(args):
         """Launch Hermes Agent as an ACP server."""
@@ -8406,6 +8739,42 @@ Examples:
         cmd_version(args)
         return
 
+    # Discover Python plugins and register shell hooks once, before any
+    # command that can fire lifecycle hooks.  Both are idempotent; gated
+    # so introspection/management commands (hermes hooks list, cron
+    # list, gateway status, mcp add, ...) don't pay discovery cost or
+    # trigger consent prompts for hooks the user is still inspecting.
+    # Groups with mixed admin/CRUD vs. agent-running entries narrow via
+    # the nested subcommand (dest varies by parser).
+    _AGENT_COMMANDS = {None, "chat", "acp", "rl"}
+    _AGENT_SUBCOMMANDS = {
+        "cron":    ("cron_command",    {"run", "tick"}),
+        "gateway": ("gateway_command", {"run"}),
+        "mcp":     ("mcp_action",      {"serve"}),
+    }
+    _sub_attr, _sub_set = _AGENT_SUBCOMMANDS.get(args.command, (None, None))
+    if (
+        args.command in _AGENT_COMMANDS
+        or (_sub_attr and getattr(args, _sub_attr, None) in _sub_set)
+    ):
+        _accept_hooks = bool(getattr(args, "accept_hooks", False))
+        try:
+            from hermes_cli.plugins import discover_plugins
+            discover_plugins()
+        except Exception:
+            logger.debug(
+                "plugin discovery failed at CLI startup", exc_info=True,
+            )
+        try:
+            from hermes_cli.config import load_config
+            from agent.shell_hooks import register_from_config
+            register_from_config(load_config(), accept_hooks=_accept_hooks)
+        except Exception:
+            logger.debug(
+                "shell-hook registration failed at CLI startup",
+                exc_info=True,
+            )
+
     # Handle top-level --resume / --continue as shortcut to chat
     if (args.resume or args.continue_last) and args.command is None:
         args.command = "chat"
diff --git a/hermes_cli/model_switch.py b/hermes_cli/model_switch.py
index 004582a574..e5feaa8654 100644
--- a/hermes_cli/model_switch.py
+++ b/hermes_cli/model_switch.py
@@ -143,7 +143,7 @@ MODEL_ALIASES: dict[str, ModelIdentity] = {
     # Z.AI / GLM
     "glm":       ModelIdentity("z-ai", "glm"),
 
-    # StepFun
+    # Step Plan (StepFun)
     "step":      ModelIdentity("stepfun", "step"),
 
     # Xiaomi
@@ -678,6 +678,7 @@ def switch_model(
         _da = DIRECT_ALIASES.get(resolved_alias)
         if _da is not None and _da.base_url:
             base_url = _da.base_url
+            api_mode = ""  # clear so determine_api_mode re-detects from URL
             if not api_key:
                 api_key = "no-key-required"
 
@@ -1035,21 +1036,49 @@ def list_authenticated_providers(
         seen_slugs.add(_cp.slug.lower())
 
     # --- 3. User-defined endpoints from config ---
+    # Track (name, base_url) of what section 3 emits so section 4 can skip
+    # any overlapping ``custom_providers:`` entries.  Callers typically pass
+    # both (gateway/CLI invoke ``get_compatible_custom_providers()`` which
+    # merges ``providers:`` into the list) — without this, the same endpoint
+    # produces two picker rows: one bare-slug ("openrouter") from section 3
+    # and one "custom:openrouter" from section 4, both labelled identically.
+    _section3_emitted_pairs: set = set()
     if user_providers and isinstance(user_providers, dict):
         for ep_name, ep_cfg in user_providers.items():
             if not isinstance(ep_cfg, dict):
                 continue
+            # Skip if this slug was already emitted (e.g. canonical provider
+            # with the same name) or will be picked up by section 4.
+            if ep_name.lower() in seen_slugs:
+                continue
             display_name = ep_cfg.get("name", "") or ep_name
-            api_url = ep_cfg.get("api", "") or ep_cfg.get("url", "") or ""
-            default_model = ep_cfg.get("default_model", "")
+            # ``base_url`` is Hermes's canonical write key (matches
+            # custom_providers and _save_custom_provider); ``api`` / ``url``
+            # remain as fallbacks for hand-edited / legacy configs.
+            api_url = (
+                ep_cfg.get("base_url", "")
+                or ep_cfg.get("api", "")
+                or ep_cfg.get("url", "")
+                or ""
+            )
+            # ``default_model`` is the legacy key; ``model`` matches what
+            # custom_providers entries use, so accept either.
+            default_model = ep_cfg.get("default_model", "") or ep_cfg.get("model", "")
 
             # Build models list from both default_model and full models array
             models_list = []
             if default_model:
                 models_list.append(default_model)
-            # Also include the full models list from config
+            # Also include the full models list from config.
+            # Hermes writes ``models:`` as a dict keyed by model id
+            # (see hermes_cli/main.py::_save_custom_provider); older
+            # configs or hand-edited files may still use a list.
             cfg_models = ep_cfg.get("models", [])
-            if isinstance(cfg_models, list):
+            if isinstance(cfg_models, dict):
+                for m in cfg_models:
+                    if m and m not in models_list:
+                        models_list.append(m)
+            elif isinstance(cfg_models, list):
                 for m in cfg_models:
                     if m and m not in models_list:
                         models_list.append(m)
@@ -1066,6 +1095,14 @@ def list_authenticated_providers(
                 "source": "user-config",
                 "api_url": api_url,
             })
+            seen_slugs.add(ep_name.lower())
+            seen_slugs.add(custom_provider_slug(display_name).lower())
+            _pair = (
+                str(display_name).strip().lower(),
+                str(api_url).strip().rstrip("/").lower(),
+            )
+            if _pair[0] and _pair[1]:
+                _section3_emitted_pairs.add(_pair)
 
     # --- 4. Saved custom providers from config ---
     # Each ``custom_providers`` entry represents one model under a named
@@ -1100,13 +1137,41 @@ def list_authenticated_providers(
                     "api_url": api_url,
                     "models": [],
                 }
+            # The singular ``model:`` field only holds the currently
+            # active model. Hermes's own writer (main.py::_save_custom_provider)
+            # stores every configured model as a dict under ``models:``;
+            # downstream readers (agent/models_dev.py, gateway/run.py,
+            # run_agent.py, hermes_cli/config.py) already consume that dict.
+            # The /model picker previously ignored it, so multi-model
+            # custom providers appeared to have only the active model.
             default_model = (entry.get("model") or "").strip()
             if default_model and default_model not in groups[slug]["models"]:
                 groups[slug]["models"].append(default_model)
 
+            cfg_models = entry.get("models", {})
+            if isinstance(cfg_models, dict):
+                for m in cfg_models:
+                    if m and m not in groups[slug]["models"]:
+                        groups[slug]["models"].append(m)
+            elif isinstance(cfg_models, list):
+                for m in cfg_models:
+                    if m and m not in groups[slug]["models"]:
+                        groups[slug]["models"].append(m)
+
         for slug, grp in groups.items():
             if slug.lower() in seen_slugs:
                 continue
+            # Skip if section 3 already emitted this endpoint under its
+            # ``providers:`` dict key — matches on (display_name, base_url),
+            # the tuple section 4 groups by.  Prevents two picker rows
+            # labelled identically when callers pass both ``user_providers``
+            # and a compatibility-merged ``custom_providers`` list.
+            _pair_key = (
+                str(grp["name"]).strip().lower(),
+                str(grp["api_url"]).strip().rstrip("/").lower(),
+            )
+            if _pair_key[0] and _pair_key[1] and _pair_key in _section3_emitted_pairs:
+                continue
             results.append({
                 "slug": slug,
                 "name": grp["name"],
diff --git a/hermes_cli/models.py b/hermes_cli/models.py
index a0d7c2220c..4b3493506d 100644
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -16,6 +16,12 @@ from difflib import get_close_matches
 from pathlib import Path
 from typing import Any, NamedTuple, Optional
 
+from hermes_cli import __version__ as _HERMES_VERSION
+
+# Identify ourselves so endpoints fronted by Cloudflare's Browser Integrity
+# Check (error 1010) don't reject the default ``Python-urllib/*`` signature.
+_HERMES_USER_AGENT = f"hermes-cli/{_HERMES_VERSION}"
+
 COPILOT_BASE_URL = "https://api.githubcopilot.com"
 COPILOT_MODELS_URL = f"{COPILOT_BASE_URL}/models"
 COPILOT_EDITOR_VERSION = "vscode/1.104.1"
@@ -26,7 +32,7 @@ COPILOT_REASONING_EFFORTS_O_SERIES = ["low", "medium", "high"]
 # Fallback OpenRouter snapshot used when the live catalog is unavailable.
 # (model_id, display description shown in menus)
 OPENROUTER_MODELS: list[tuple[str, str]] = [
-    ("moonshotai/kimi-k2.5",            "recommended"),
+    ("moonshotai/kimi-k2.6",            "recommended"),
     ("anthropic/claude-opus-4.7",       ""),
     ("anthropic/claude-opus-4.6",       ""),
     ("anthropic/claude-sonnet-4.6",     ""),
@@ -47,6 +53,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
     ("stepfun/step-3.5-flash",          ""),
     ("minimax/minimax-m2.7",            ""),
     ("minimax/minimax-m2.5",            ""),
+    ("minimax/minimax-m2.5:free",       "free"),
     ("z-ai/glm-5.1",                    ""),
     ("z-ai/glm-5v-turbo",               ""),
     ("z-ai/glm-5-turbo",                ""),
@@ -62,6 +69,31 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
 _openrouter_catalog_cache: list[tuple[str, str]] | None = None
 
 
+# Fallback Vercel AI Gateway snapshot used when the live catalog is unavailable.
+# OSS / open-weight models prioritized first, then closed-source by family.
+# Slugs match Vercel's actual /v1/models catalog (e.g. alibaba/ for Qwen,
+# zai/ and xai/ without hyphens).
+VERCEL_AI_GATEWAY_MODELS: list[tuple[str, str]] = [
+    ("moonshotai/kimi-k2.6",                 "recommended"),
+    ("alibaba/qwen3.6-plus",                 ""),
+    ("zai/glm-5.1",                          ""),
+    ("minimax/minimax-m2.7",                 ""),
+    ("anthropic/claude-sonnet-4.6",          ""),
+    ("anthropic/claude-opus-4.7",            ""),
+    ("anthropic/claude-opus-4.6",            ""),
+    ("anthropic/claude-haiku-4.5",           ""),
+    ("openai/gpt-5.4",                       ""),
+    ("openai/gpt-5.4-mini",                  ""),
+    ("openai/gpt-5.3-codex",                 ""),
+    ("google/gemini-3.1-pro-preview",        ""),
+    ("google/gemini-3-flash",                ""),
+    ("google/gemini-3.1-flash-lite-preview", ""),
+    ("xai/grok-4.20-reasoning",              ""),
+]
+
+_ai_gateway_catalog_cache: list[tuple[str, str]] | None = None
+
+
 def _codex_curated_models() -> list[str]:
     """Derive the openai-codex curated list from codex_models.py.
 
@@ -75,7 +107,7 @@ def _codex_curated_models() -> list[str]:
 
 _PROVIDER_MODELS: dict[str, list[str]] = {
     "nous": [
-        "moonshotai/kimi-k2.5",
+        "moonshotai/kimi-k2.6",
         "xiaomi/mimo-v2-pro",
         "anthropic/claude-opus-4.7",
         "anthropic/claude-opus-4.6",
@@ -94,17 +126,15 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
         "stepfun/step-3.5-flash",
         "minimax/minimax-m2.7",
         "minimax/minimax-m2.5",
+        "minimax/minimax-m2.5:free",
         "z-ai/glm-5.1",
         "z-ai/glm-5v-turbo",
         "z-ai/glm-5-turbo",
         "x-ai/grok-4.20-beta",
         "nvidia/nemotron-3-super-120b-a12b",
-        "nvidia/nemotron-3-super-120b-a12b:free",
-        "arcee-ai/trinity-large-preview:free",
         "arcee-ai/trinity-large-thinking",
         "openai/gpt-5.4-pro",
         "openai/gpt-5.4-nano",
-        "openrouter/elephant-alpha",
     ],
     "openai-codex": _codex_curated_models(),
     "copilot-acp": [
@@ -128,16 +158,14 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
     ],
     "gemini": [
         "gemini-3.1-pro-preview",
+        "gemini-3-pro-preview",
         "gemini-3-flash-preview",
         "gemini-3.1-flash-lite-preview",
-        "gemini-2.5-pro",
-        "gemini-2.5-flash",
-        "gemini-2.5-flash-lite",
     ],
     "google-gemini-cli": [
-        "gemini-2.5-pro",
-        "gemini-2.5-flash",
-        "gemini-2.5-flash-lite",
+        "gemini-3.1-pro-preview",
+        "gemini-3-pro-preview",
+        "gemini-3-flash-preview",
     ],
     "zai": [
         "glm-5.1",
@@ -161,12 +189,13 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
         # (map to OpenRouter defaults — users get familiar picks on NIM)
         "qwen/qwen3.5-397b-a17b",
         "deepseek-ai/deepseek-v3.2",
-        "moonshotai/kimi-k2.5",
+        "moonshotai/kimi-k2.6",
         "minimaxai/minimax-m2.5",
         "z-ai/glm5",
         "openai/gpt-oss-120b",
     ],
     "kimi-coding": [
+        "kimi-k2.6",
         "kimi-k2.5",
         "kimi-for-coding",
         "kimi-k2-thinking",
@@ -175,12 +204,18 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
         "kimi-k2-0905-preview",
     ],
     "kimi-coding-cn": [
+        "kimi-k2.6",
         "kimi-k2.5",
         "kimi-k2-thinking",
         "kimi-k2-turbo-preview",
         "kimi-k2-0905-preview",
     ],
+    "stepfun": [
+        "step-3.5-flash",
+        "step-3.5-flash-2603",
+    ],
     "moonshot": [
+        "kimi-k2.6",
         "kimi-k2.5",
         "kimi-k2-thinking",
         "kimi-k2-turbo-preview",
@@ -227,7 +262,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
         "gpt-5.4-pro",
         "gpt-5.4",
         "gpt-5.3-codex",
-        "gpt-5.3-codex-spark",
         "gpt-5.2",
         "gpt-5.2-codex",
         "gpt-5.1",
@@ -261,6 +295,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
         "big-pickle",
     ],
     "opencode-go": [
+        "kimi-k2.6",
         "kimi-k2.5",
         "glm-5.1",
         "glm-5",
@@ -268,20 +303,8 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
         "mimo-v2-omni",
         "minimax-m2.7",
         "minimax-m2.5",
-    ],
-    "ai-gateway": [
-        "anthropic/claude-opus-4.6",
-        "anthropic/claude-sonnet-4.6",
-        "anthropic/claude-sonnet-4.5",
-        "anthropic/claude-haiku-4.5",
-        "openai/gpt-5",
-        "openai/gpt-4.1",
-        "openai/gpt-4.1-mini",
-        "google/gemini-3-pro-preview",
-        "google/gemini-3-flash",
-        "google/gemini-2.5-pro",
-        "google/gemini-2.5-flash",
-        "deepseek/deepseek-v3.2",
+        "qwen3.6-plus",
+        "qwen3.5-plus",
     ],
     "kilocode": [
         "anthropic/claude-opus-4.6",
@@ -315,6 +338,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
         "zai-org/GLM-5",
         "XiaomiMiMo/MiMo-V2-Flash",
         "moonshotai/Kimi-K2-Thinking",
+        "moonshotai/Kimi-K2.6",
     ],
     # AWS Bedrock — static fallback list used when dynamic discovery is
     # unavailable (no boto3, no credentials, or API error).  The agent
@@ -334,18 +358,18 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
     ],
 }
 
+# Vercel AI Gateway: derive the bare-model-id catalog from the curated
+# ``VERCEL_AI_GATEWAY_MODELS`` snapshot so both the picker (tuples with descriptions)
+# and the static fallback catalog (bare ids) stay in sync from a single
+# source of truth.
+_PROVIDER_MODELS["ai-gateway"] = [mid for mid, _ in VERCEL_AI_GATEWAY_MODELS]
+
 # ---------------------------------------------------------------------------
-# Nous Portal free-model filtering
+# Nous Portal free-model helper
 # ---------------------------------------------------------------------------
-# Models that are ALLOWED to appear when priced as free on Nous Portal.
-# Any other free model is hidden — prevents promotional/temporary free models
-# from cluttering the selection when users are paying subscribers.
-# Models in this list are ALSO filtered out if they are NOT free (i.e. they
-# should only appear in the menu when they are genuinely free).
-_NOUS_ALLOWED_FREE_MODELS: frozenset[str] = frozenset({
-    "xiaomi/mimo-v2-pro",
-    "xiaomi/mimo-v2-omni",
-})
+# The Nous Portal models endpoint is the source of truth for which models
+# are currently offered (free or paid). We trust whatever it returns and
+# surface it to users as-is — no local allowlist filtering.
 
 
 def _is_model_free(model_id: str, pricing: dict[str, dict[str, str]]) -> bool:
@@ -359,35 +383,6 @@ def _is_model_free(model_id: str, pricing: dict[str, dict[str, str]]) -> bool:
         return False
 
 
-def filter_nous_free_models(
-    model_ids: list[str],
-    pricing: dict[str, dict[str, str]],
-) -> list[str]:
-    """Filter the Nous Portal model list according to free-model policy.
-
-    Rules:
-      • Paid models that are NOT in the allowlist → keep (normal case).
-      • Free models that are NOT in the allowlist → drop.
-      • Allowlist models that ARE free → keep.
-      • Allowlist models that are NOT free → drop.
-    """
-    if not pricing:
-        return model_ids  # no pricing data — can't filter, show everything
-
-    result: list[str] = []
-    for mid in model_ids:
-        free = _is_model_free(mid, pricing)
-        if mid in _NOUS_ALLOWED_FREE_MODELS:
-            # Allowlist model: only show when it's actually free
-            if free:
-                result.append(mid)
-        else:
-            # Regular model: keep only when it's NOT free
-            if not free:
-                result.append(mid)
-    return result
-
-
 # ---------------------------------------------------------------------------
 # Nous Portal account tier detection
 # ---------------------------------------------------------------------------
@@ -451,8 +446,7 @@ def partition_nous_models_by_tier(
 ) -> tuple[list[str], list[str]]:
     """Split Nous models into (selectable, unavailable) based on user tier.
 
-    For paid-tier users: all models are selectable, none unavailable
-    (free-model filtering is handled separately by ``filter_nous_free_models``).
+    For paid-tier users: all models are selectable, none unavailable.
 
     For free-tier users: only free models are selectable; paid models
     are returned as unavailable (shown grayed out in the menu).
@@ -491,8 +485,6 @@ def check_nous_free_tier() -> bool:
     Returns False (assume paid) on any error — never blocks paying users.
     """
     global _free_tier_cache
-    import time
-
     now = time.monotonic()
     if _free_tier_cache is not None:
         cached_result, cached_at = _free_tier_cache
@@ -524,6 +516,157 @@ def check_nous_free_tier() -> bool:
         return False  # default to paid on error — don't block users
 
 
+# ---------------------------------------------------------------------------
+# Nous Portal recommended models
+#
+# The Portal publishes a curated list of suggested models (separated into
+# paid and free tiers) plus dedicated recommendations for compaction (text
+# summarisation / auxiliary) and vision tasks. We fetch it once per process
+# with a TTL cache so callers can ask "what's the best aux model right now?"
+# without hitting the network on every lookup.
+#
+# Shape of the response (fields we care about):
+#   {
+#     "paidRecommendedModels":     [ {modelName, ...}, ... ],
+#     "freeRecommendedModels":     [ {modelName, ...}, ... ],
+#     "paidRecommendedCompactionModel":  {modelName, ...} | null,
+#     "paidRecommendedVisionModel":      {modelName, ...} | null,
+#     "freeRecommendedCompactionModel":  {modelName, ...} | null,
+#     "freeRecommendedVisionModel":      {modelName, ...} | null,
+#   }
+# ---------------------------------------------------------------------------
+
+NOUS_RECOMMENDED_MODELS_PATH = "/api/nous/recommended-models"
+_NOUS_RECOMMENDED_CACHE_TTL: int = 600  # seconds (10 minutes)
+# (result_dict, timestamp) keyed by portal_base_url so staging vs prod don't collide.
+_nous_recommended_cache: dict[str, tuple[dict[str, Any], float]] = {}
+
+
+def fetch_nous_recommended_models(
+    portal_base_url: str = "",
+    timeout: float = 5.0,
+    *,
+    force_refresh: bool = False,
+) -> dict[str, Any]:
+    """Fetch the Nous Portal's curated recommended-models payload.
+
+    Hits ``<portal>/api/nous/recommended-models``. The endpoint is public —
+    no auth is required. Results are cached per portal URL for
+    ``_NOUS_RECOMMENDED_CACHE_TTL`` seconds; pass ``force_refresh=True`` to
+    bypass the cache.
+
+    Returns the parsed JSON dict on success, or ``{}`` on any failure
+    (network, parse, non-2xx). Callers must treat missing/null fields as
+    "no recommendation" and fall back to their own default.
+    """
+    base = (portal_base_url or "https://portal.nousresearch.com").rstrip("/")
+    now = time.monotonic()
+    cached = _nous_recommended_cache.get(base)
+    if not force_refresh and cached is not None:
+        payload, cached_at = cached
+        if now - cached_at < _NOUS_RECOMMENDED_CACHE_TTL:
+            return payload
+
+    url = f"{base}{NOUS_RECOMMENDED_MODELS_PATH}"
+    try:
+        req = urllib.request.Request(
+            url,
+            headers={"Accept": "application/json"},
+        )
+        with urllib.request.urlopen(req, timeout=timeout) as resp:
+            data = json.loads(resp.read().decode())
+        if not isinstance(data, dict):
+            data = {}
+    except Exception:
+        data = {}
+
+    _nous_recommended_cache[base] = (data, now)
+    return data
+
+
+def _resolve_nous_portal_url() -> str:
+    """Best-effort lookup of the Portal base URL the user is authed against."""
+    try:
+        from hermes_cli.auth import (
+            DEFAULT_NOUS_PORTAL_URL,
+            get_provider_auth_state,
+        )
+        state = get_provider_auth_state("nous") or {}
+        portal = str(state.get("portal_base_url") or "").strip()
+        if portal:
+            return portal.rstrip("/")
+        return str(DEFAULT_NOUS_PORTAL_URL).rstrip("/")
+    except Exception:
+        return "https://portal.nousresearch.com"
+
+
+def _extract_model_name(entry: Any) -> Optional[str]:
+    """Pull the ``modelName`` field from a recommended-model entry, else None."""
+    if not isinstance(entry, dict):
+        return None
+    model_name = entry.get("modelName")
+    if isinstance(model_name, str) and model_name.strip():
+        return model_name.strip()
+    return None
+
+
+def get_nous_recommended_aux_model(
+    *,
+    vision: bool = False,
+    free_tier: Optional[bool] = None,
+    portal_base_url: str = "",
+    force_refresh: bool = False,
+) -> Optional[str]:
+    """Return the Portal's recommended model name for an auxiliary task.
+
+    Picks the best field from the Portal's recommended-models payload:
+
+    * ``vision=True``  → ``paidRecommendedVisionModel``  (paid tier) or
+                         ``freeRecommendedVisionModel``  (free tier)
+    * ``vision=False`` → ``paidRecommendedCompactionModel`` or
+                         ``freeRecommendedCompactionModel``
+
+    When ``free_tier`` is ``None`` (default) the user's tier is auto-detected
+    via :func:`check_nous_free_tier`. Pass an explicit bool to bypass the
+    detection — useful for tests or when the caller already knows the tier.
+
+    For paid-tier users we prefer the paid recommendation but gracefully fall
+    back to the free recommendation if the Portal returned ``null`` for the
+    paid field (common during the staged rollout of new paid models).
+
+    Returns ``None`` when every candidate is missing, null, or the fetch
+    fails — callers should fall back to their own default (currently
+    ``google/gemini-3-flash-preview``).
+    """
+    base = portal_base_url or _resolve_nous_portal_url()
+    payload = fetch_nous_recommended_models(base, force_refresh=force_refresh)
+    if not payload:
+        return None
+
+    if free_tier is None:
+        try:
+            free_tier = check_nous_free_tier()
+        except Exception:
+            # On any detection error, assume paid — paid users see both fields
+            # anyway so this is a safe default that maximises model quality.
+            free_tier = False
+
+    if vision:
+        paid_key, free_key = "paidRecommendedVisionModel", "freeRecommendedVisionModel"
+    else:
+        paid_key, free_key = "paidRecommendedCompactionModel", "freeRecommendedCompactionModel"
+
+    # Preference order:
+    #   free tier  → free only
+    #   paid tier  → paid, then free (if paid field is null)
+    candidates = [free_key] if free_tier else [paid_key, free_key]
+    for key in candidates:
+        name = _extract_model_name(payload.get(key))
+        if name:
+            return name
+    return None
+
+
 # ---------------------------------------------------------------------------
 # Canonical provider list — single source of truth for provider identity.
 # Every code path that lists, displays, or iterates providers derives from
@@ -544,6 +687,7 @@ class ProviderEntry(NamedTuple):
 CANONICAL_PROVIDERS: list[ProviderEntry] = [
     ProviderEntry("nous",           "Nous Portal",              "Nous Portal (Nous Research subscription)"),
     ProviderEntry("openrouter",     "OpenRouter",               "OpenRouter (100+ models, pay-per-use)"),
+    ProviderEntry("ai-gateway",     "Vercel AI Gateway",        "Vercel AI Gateway (200+ models, $5 free credit, no markup)"),
     ProviderEntry("anthropic",      "Anthropic",                "Anthropic (Claude models — API key or Claude Code)"),
     ProviderEntry("openai-codex",   "OpenAI Codex",             "OpenAI Codex"),
     ProviderEntry("xiaomi",         "Xiaomi MiMo",              "Xiaomi MiMo (MiMo-V2 models — pro, omni, flash)"),
@@ -552,13 +696,14 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
     ProviderEntry("copilot",        "GitHub Copilot",           "GitHub Copilot (uses GITHUB_TOKEN or gh auth token)"),
     ProviderEntry("copilot-acp",    "GitHub Copilot ACP",       "GitHub Copilot ACP (spawns `copilot --acp --stdio`)"),
     ProviderEntry("huggingface",    "Hugging Face",             "Hugging Face Inference Providers (20+ open models)"),
-    ProviderEntry("gemini",         "Google AI Studio",         "Google AI Studio (Gemini models — OpenAI-compatible endpoint)"),
+    ProviderEntry("gemini",         "Google AI Studio",         "Google AI Studio (Gemini models — native Gemini API)"),
     ProviderEntry("google-gemini-cli", "Google Gemini (OAuth)",   "Google Gemini via OAuth + Code Assist (free tier supported; no API key needed)"),
     ProviderEntry("deepseek",       "DeepSeek",                 "DeepSeek (DeepSeek-V3, R1, coder — direct API)"),
     ProviderEntry("xai",            "xAI",                      "xAI (Grok models — direct API)"),
     ProviderEntry("zai",            "Z.AI / GLM",               "Z.AI / GLM (Zhipu AI direct API)"),
     ProviderEntry("kimi-coding",    "Kimi / Kimi Coding Plan",  "Kimi Coding Plan (api.kimi.com) & Moonshot API"),
     ProviderEntry("kimi-coding-cn", "Kimi / Moonshot (China)",  "Kimi / Moonshot China (Moonshot CN direct API)"),
+    ProviderEntry("stepfun",        "StepFun Step Plan",       "StepFun Step Plan (agent/coding models via Step Plan API)"),
     ProviderEntry("minimax",        "MiniMax",                  "MiniMax (global direct API)"),
     ProviderEntry("minimax-cn",     "MiniMax (China)",          "MiniMax China (domestic direct API)"),
     ProviderEntry("alibaba",        "Alibaba Cloud (DashScope)","Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"),
@@ -567,7 +712,6 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
     ProviderEntry("kilocode",       "Kilo Code",                "Kilo Code (Kilo Gateway API)"),
     ProviderEntry("opencode-zen",   "OpenCode Zen",             "OpenCode Zen (35+ curated models, pay-as-you-go)"),
     ProviderEntry("opencode-go",    "OpenCode Go",              "OpenCode Go (open models, $10/month subscription)"),
-    ProviderEntry("ai-gateway",     "Vercel AI Gateway",        "Vercel AI Gateway (200+ models, pay-per-use)"),
     ProviderEntry("bedrock",        "AWS Bedrock",              "AWS Bedrock (Claude, Nova, Llama, DeepSeek — IAM or API key)"),
 ]
 
@@ -594,6 +738,8 @@ _PROVIDER_ALIASES = {
     "moonshot": "kimi-coding",
     "kimi-cn": "kimi-coding-cn",
     "moonshot-cn": "kimi-coding-cn",
+    "step": "stepfun",
+    "stepfun-coding-plan": "stepfun",
     "arcee-ai": "arcee",
     "arceeai": "arcee",
     "minimax-china": "minimax-cn",
@@ -663,6 +809,31 @@ def _openrouter_model_is_free(pricing: Any) -> bool:
         return False
 
 
+def _openrouter_model_supports_tools(item: Any) -> bool:
+    """Return True when the model's ``supported_parameters`` advertise tool calling.
+
+    hermes-agent is tool-calling-first — every provider path assumes the model
+    can invoke tools. Models that don't advertise ``tools`` in their
+    ``supported_parameters`` (e.g. image-only or completion-only models) cannot
+    be driven by the agent loop and would fail at the first tool call.
+
+    **Permissive when the field is missing.** Some OpenRouter-compatible gateways
+    (Nous Portal, private mirrors, older catalog snapshots) don't populate
+    ``supported_parameters`` at all. Treat that as "unknown capability → allow"
+    so the picker doesn't silently empty for those users. Only hide models
+    whose ``supported_parameters`` is an explicit list that omits ``tools``.
+
+    Ported from Kilo-Org/kilocode#9068.
+    """
+    if not isinstance(item, dict):
+        return True
+    params = item.get("supported_parameters")
+    if not isinstance(params, list):
+        # Field absent / malformed / None — be permissive.
+        return True
+    return "tools" in params
+
+
 def fetch_openrouter_models(
     timeout: float = 8.0,
     *,
@@ -705,6 +876,11 @@ def fetch_openrouter_models(
         live_item = live_by_id.get(preferred_id)
         if live_item is None:
             continue
+        # Hide models that don't advertise tool-calling support — hermes-agent
+        # requires it and surfacing them leads to immediate runtime failures
+        # when the user selects them. Ported from Kilo-Org/kilocode#9068.
+        if not _openrouter_model_supports_tools(live_item):
+            continue
         desc = "free" if _openrouter_model_is_free(live_item.get("pricing")) else ""
         curated.append((preferred_id, desc))
 
@@ -722,6 +898,93 @@ def model_ids(*, force_refresh: bool = False) -> list[str]:
     return [mid for mid, _ in fetch_openrouter_models(force_refresh=force_refresh)]
 
 
+def _ai_gateway_model_is_free(pricing: Any) -> bool:
+    """Return True if an AI Gateway model has $0 input AND output pricing."""
+    if not isinstance(pricing, dict):
+        return False
+    try:
+        return float(pricing.get("input", "0")) == 0 and float(pricing.get("output", "0")) == 0
+    except (TypeError, ValueError):
+        return False
+
+
+def fetch_ai_gateway_models(
+    timeout: float = 8.0,
+    *,
+    force_refresh: bool = False,
+) -> list[tuple[str, str]]:
+    """Return the curated AI Gateway picker list, refreshed from the live catalog when possible."""
+    global _ai_gateway_catalog_cache
+
+    if _ai_gateway_catalog_cache is not None and not force_refresh:
+        return list(_ai_gateway_catalog_cache)
+
+    from hermes_constants import AI_GATEWAY_BASE_URL
+
+    fallback = list(VERCEL_AI_GATEWAY_MODELS)
+    preferred_ids = [mid for mid, _ in fallback]
+
+    try:
+        req = urllib.request.Request(
+            f"{AI_GATEWAY_BASE_URL.rstrip('/')}/models",
+            headers={"Accept": "application/json"},
+        )
+        with urllib.request.urlopen(req, timeout=timeout) as resp:
+            payload = json.loads(resp.read().decode())
+    except Exception:
+        return list(_ai_gateway_catalog_cache or fallback)
+
+    live_items = payload.get("data", [])
+    if not isinstance(live_items, list):
+        return list(_ai_gateway_catalog_cache or fallback)
+
+    live_by_id: dict[str, dict[str, Any]] = {}
+    for item in live_items:
+        if not isinstance(item, dict):
+            continue
+        mid = str(item.get("id") or "").strip()
+        if not mid:
+            continue
+        live_by_id[mid] = item
+
+    curated: list[tuple[str, str]] = []
+    for preferred_id in preferred_ids:
+        live_item = live_by_id.get(preferred_id)
+        if live_item is None:
+            continue
+        desc = "free" if _ai_gateway_model_is_free(live_item.get("pricing")) else ""
+        curated.append((preferred_id, desc))
+
+    if not curated:
+        return list(_ai_gateway_catalog_cache or fallback)
+
+    # If the live catalog offers a free Moonshot model, auto-promote it to
+    # position #1 as "recommended" — dynamic discovery without a PR.
+    free_moonshot = next(
+        (
+            mid
+            for mid, item in live_by_id.items()
+            if mid.startswith("moonshotai/")
+            and _ai_gateway_model_is_free(item.get("pricing"))
+        ),
+        None,
+    )
+    if free_moonshot:
+        curated = [(mid, desc) for mid, desc in curated if mid != free_moonshot]
+        curated.insert(0, (free_moonshot, "recommended"))
+    else:
+        first_id, _ = curated[0]
+        curated[0] = (first_id, "recommended")
+
+    _ai_gateway_catalog_cache = curated
+    return list(curated)
+
+
+def ai_gateway_model_ids(*, force_refresh: bool = False) -> list[str]:
+    """Return just the AI Gateway model-id strings."""
+    return [mid for mid, _ in fetch_ai_gateway_models(force_refresh=force_refresh)]
+
+
 
 
 # ---------------------------------------------------------------------------
@@ -866,6 +1129,56 @@ def fetch_models_with_pricing(
     return result
 
 
+def fetch_ai_gateway_pricing(
+    timeout: float = 8.0,
+    *,
+    force_refresh: bool = False,
+) -> dict[str, dict[str, str]]:
+    """Fetch Vercel AI Gateway /v1/models and return hermes-shaped pricing.
+
+    Vercel uses ``input`` / ``output`` field names; hermes's picker expects
+    ``prompt`` / ``completion``. This translates. Cache read/write field names
+    already match.
+    """
+    from hermes_constants import AI_GATEWAY_BASE_URL
+
+    cache_key = AI_GATEWAY_BASE_URL.rstrip("/")
+    if not force_refresh and cache_key in _pricing_cache:
+        return _pricing_cache[cache_key]
+
+    try:
+        req = urllib.request.Request(
+            f"{cache_key}/models",
+            headers={"Accept": "application/json"},
+        )
+        with urllib.request.urlopen(req, timeout=timeout) as resp:
+            payload = json.loads(resp.read().decode())
+    except Exception:
+        _pricing_cache[cache_key] = {}
+        return {}
+
+    result: dict[str, dict[str, str]] = {}
+    for item in payload.get("data", []):
+        if not isinstance(item, dict):
+            continue
+        mid = item.get("id")
+        pricing = item.get("pricing")
+        if not (mid and isinstance(pricing, dict)):
+            continue
+        entry: dict[str, str] = {
+            "prompt": str(pricing.get("input", "")),
+            "completion": str(pricing.get("output", "")),
+        }
+        if pricing.get("input_cache_read"):
+            entry["input_cache_read"] = str(pricing["input_cache_read"])
+        if pricing.get("input_cache_write"):
+            entry["input_cache_write"] = str(pricing["input_cache_write"])
+        result[mid] = entry
+
+    _pricing_cache[cache_key] = result
+    return result
+
+
 def _resolve_openrouter_api_key() -> str:
     """Best-effort OpenRouter API key for pricing fetch."""
     return os.getenv("OPENROUTER_API_KEY", "").strip()
@@ -884,7 +1197,7 @@ def _resolve_nous_pricing_credentials() -> tuple[str, str]:
 
 
 def get_pricing_for_provider(provider: str, *, force_refresh: bool = False) -> dict[str, dict[str, str]]:
-    """Return live pricing for providers that support it (openrouter, nous)."""
+    """Return live pricing for providers that support it (openrouter, nous, ai-gateway)."""
     normalized = normalize_provider(provider)
     if normalized == "openrouter":
         return fetch_models_with_pricing(
@@ -892,6 +1205,8 @@ def get_pricing_for_provider(provider: str, *, force_refresh: bool = False) -> d
             base_url="https://openrouter.ai/api",
             force_refresh=force_refresh,
         )
+    if normalized == "ai-gateway":
+        return fetch_ai_gateway_pricing(force_refresh=force_refresh)
     if normalized == "nous":
         api_key, base_url = _resolve_nous_pricing_credentials()
         if base_url:
@@ -1096,7 +1411,6 @@ def detect_provider_for_model(
             from hermes_cli.auth import PROVIDER_REGISTRY
             pconfig = PROVIDER_REGISTRY.get(direct_match)
             if pconfig:
-                import os
                 for env_var in pconfig.api_key_env_vars:
                     if os.getenv(env_var, "").strip():
                         has_creds = True
@@ -1306,6 +1620,19 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
                     return live
         except Exception:
             pass
+    if normalized == "stepfun":
+        try:
+            from hermes_cli.auth import resolve_api_key_provider_credentials
+
+            creds = resolve_api_key_provider_credentials("stepfun")
+            api_key = str(creds.get("api_key") or "").strip()
+            base_url = str(creds.get("base_url") or "").strip()
+            if api_key and base_url:
+                live = fetch_api_models(api_key, base_url)
+                if live:
+                    return live
+        except Exception:
+            pass
     if normalized == "anthropic":
         live = _fetch_anthropic_models()
         if live:
@@ -1771,7 +2098,7 @@ def probe_api_models(
         candidates.append((alternate_base, True))
 
     tried: list[str] = []
-    headers: dict[str, str] = {}
+    headers: dict[str, str] = {"User-Agent": _HERMES_USER_AGENT}
     if api_key:
         headers["Authorization"] = f"Bearer {api_key}"
     if normalized.startswith(COPILOT_BASE_URL):
@@ -2106,6 +2433,51 @@ def validate_requested_model(
                 ),
             }
 
+    # MiniMax providers don't expose a /models endpoint — validate against
+    # the static catalog instead, similar to openai-codex.
+    if normalized in ("minimax", "minimax-cn"):
+        try:
+            catalog_models = provider_model_ids(normalized)
+        except Exception:
+            catalog_models = []
+        if catalog_models:
+            # Case-insensitive lookup (catalog uses mixed case like MiniMax-M2.7)
+            catalog_lower = {m.lower(): m for m in catalog_models}
+            if requested_for_lookup.lower() in catalog_lower:
+                return {
+                    "accepted": True,
+                    "persist": True,
+                    "recognized": True,
+                    "message": None,
+                }
+            # Auto-correct close matches (case-insensitive)
+            catalog_lower_list = list(catalog_lower.keys())
+            auto = get_close_matches(requested_for_lookup.lower(), catalog_lower_list, n=1, cutoff=0.9)
+            if auto:
+                corrected = catalog_lower[auto[0]]
+                return {
+                    "accepted": True,
+                    "persist": True,
+                    "recognized": True,
+                    "corrected_model": corrected,
+                    "message": f"Auto-corrected `{requested}` → `{corrected}`",
+                }
+            suggestions = get_close_matches(requested_for_lookup.lower(), catalog_lower_list, n=3, cutoff=0.5)
+            suggestion_text = ""
+            if suggestions:
+                suggestion_text = "\n  Similar models: " + ", ".join(f"`{catalog_lower[s]}`" for s in suggestions)
+            return {
+                "accepted": True,
+                "persist": True,
+                "recognized": False,
+                "message": (
+                    f"Note: `{requested}` was not found in the MiniMax catalog."
+                    f"{suggestion_text}"
+                    "\n  MiniMax does not expose a /models endpoint, so Hermes cannot verify the model name."
+                    "\n  The model may still work if it exists on the server."
+                ),
+            }
+
     # Probe the live API to check if the model actually exists
     api_models = fetch_api_models(api_key, base_url)
 
@@ -2188,13 +2560,70 @@ def validate_requested_model(
         except Exception:
             pass  # Fall through to generic warning
 
+    # Static-catalog fallback: when the /models probe was unreachable,
+    # validate against the curated list from provider_model_ids() — same
+    # pattern as the openai-codex and minimax branches above.  This fixes
+    # /model switches in the gateway for providers like opencode-go and
+    # opencode-zen whose /models endpoint returns 404 against the HTML
+    # marketing site.  Without this block, validate_requested_model would
+    # reject every model on such providers, switch_model() would return
+    # success=False, and the gateway would never write to
+    # _session_model_overrides.
     provider_label = _PROVIDER_LABELS.get(normalized, normalized)
+    try:
+        catalog_models = provider_model_ids(normalized)
+    except Exception:
+        catalog_models = []
+
+    if catalog_models:
+        catalog_lower = {m.lower(): m for m in catalog_models}
+        if requested_for_lookup.lower() in catalog_lower:
+            return {
+                "accepted": True,
+                "persist": True,
+                "recognized": True,
+                "message": None,
+            }
+        catalog_lower_list = list(catalog_lower.keys())
+        auto = get_close_matches(
+            requested_for_lookup.lower(), catalog_lower_list, n=1, cutoff=0.9
+        )
+        if auto:
+            corrected = catalog_lower[auto[0]]
+            return {
+                "accepted": True,
+                "persist": True,
+                "recognized": True,
+                "corrected_model": corrected,
+                "message": f"Auto-corrected `{requested}` → `{corrected}`",
+            }
+        suggestions = get_close_matches(
+            requested_for_lookup.lower(), catalog_lower_list, n=3, cutoff=0.5
+        )
+        suggestion_text = ""
+        if suggestions:
+            suggestion_text = "\n  Similar models: " + ", ".join(
+                f"`{catalog_lower[s]}`" for s in suggestions
+            )
+        return {
+            "accepted": True,
+            "persist": True,
+            "recognized": False,
+            "message": (
+                f"Note: `{requested}` was not found in the {provider_label} curated catalog "
+                f"and the /models endpoint was unreachable.{suggestion_text}"
+                f"\n  The model may still work if it exists on the provider."
+            ),
+        }
+
+    # No catalog available — accept with a warning, matching the comment's
+    # stated intent ("Accept and persist, but warn").
     return {
-        "accepted": False,
-        "persist": False,
+        "accepted": True,
+        "persist": True,
         "recognized": False,
         "message": (
-            f"Could not reach the {provider_label} API to validate `{requested}`. "
+            f"Note: could not reach the {provider_label} API to validate `{requested}`. "
             f"If the service isn't down, this model may not be valid."
         ),
     }
diff --git a/hermes_cli/nous_subscription.py b/hermes_cli/nous_subscription.py
index 691126a4c6..78181aab2b 100644
--- a/hermes_cli/nous_subscription.py
+++ b/hermes_cli/nous_subscription.py
@@ -10,6 +10,7 @@ from hermes_cli.auth import get_nous_auth_status
 from hermes_cli.config import get_env_value, load_config
 from tools.managed_tool_gateway import is_managed_tool_gateway_ready
 from tools.tool_backend_helpers import (
+    fal_key_is_configured,
     has_direct_modal_credentials,
     managed_nous_tools_enabled,
     normalize_browser_cloud_provider,
@@ -271,7 +272,7 @@ def get_nous_subscription_features(
     direct_firecrawl = bool(get_env_value("FIRECRAWL_API_KEY") or get_env_value("FIRECRAWL_API_URL"))
     direct_parallel = bool(get_env_value("PARALLEL_API_KEY"))
     direct_tavily = bool(get_env_value("TAVILY_API_KEY"))
-    direct_fal = bool(get_env_value("FAL_KEY"))
+    direct_fal = fal_key_is_configured()
     direct_openai_tts = bool(resolve_openai_audio_api_key())
     direct_elevenlabs = bool(get_env_value("ELEVENLABS_API_KEY"))
     direct_camofox = bool(get_env_value("CAMOFOX_URL"))
@@ -520,7 +521,7 @@ def apply_nous_managed_defaults(
         browser_cfg["cloud_provider"] = "browser-use"
         changed.add("browser")
 
-    if "image_gen" in selected_toolsets and not get_env_value("FAL_KEY"):
+    if "image_gen" in selected_toolsets and not fal_key_is_configured():
         changed.add("image_gen")
 
     return changed
@@ -548,7 +549,7 @@ def _get_gateway_direct_credentials() -> Dict[str, bool]:
             or get_env_value("TAVILY_API_KEY")
             or get_env_value("EXA_API_KEY")
         ),
-        "image_gen": bool(get_env_value("FAL_KEY")),
+        "image_gen": fal_key_is_configured(),
         "tts": bool(
             resolve_openai_audio_api_key()
             or get_env_value("ELEVENLABS_API_KEY")
@@ -586,7 +587,6 @@ def get_gateway_eligible_tools(
         return [], [], []
 
     if config is None:
-        from hermes_cli.config import load_config
         config = load_config() or {}
 
     # Quick provider check without the heavy get_nous_subscription_features call
diff --git a/hermes_cli/plugins.py b/hermes_cli/plugins.py
index 2385a5c942..3dd7af823d 100644
--- a/hermes_cli/plugins.py
+++ b/hermes_cli/plugins.py
@@ -2,14 +2,20 @@
 Hermes Plugin System
 ====================
 
-Discovers, loads, and manages plugins from three sources:
+Discovers, loads, and manages plugins from four sources:
 
-1. **User plugins**   – ``~/.hermes/plugins/<name>/``
-2. **Project plugins** – ``./.hermes/plugins/<name>/`` (opt-in via
+1. **Bundled plugins** – ``<repo>/plugins/<name>/`` (shipped with hermes-agent;
+   ``memory/`` and ``context_engine/`` subdirs are excluded — they have their
+   own discovery paths)
+2. **User plugins**   – ``~/.hermes/plugins/<name>/``
+3. **Project plugins** – ``./.hermes/plugins/<name>/`` (opt-in via
    ``HERMES_ENABLE_PROJECT_PLUGINS``)
-3. **Pip plugins**     – packages that expose the ``hermes_agent.plugins``
+4. **Pip plugins**     – packages that expose the ``hermes_agent.plugins``
    entry-point group.
 
+Later sources override earlier ones on name collision, so a user or project
+plugin with the same name as a bundled plugin replaces it.
+
 Each directory plugin must contain a ``plugin.yaml`` manifest **and** an
 ``__init__.py`` with a ``register(ctx)`` function.
 
@@ -54,6 +60,8 @@ logger = logging.getLogger(__name__)
 VALID_HOOKS: Set[str] = {
     "pre_tool_call",
     "post_tool_call",
+    "transform_terminal_output",
+    "transform_tool_result",
     "pre_llm_call",
     "post_llm_call",
     "pre_api_request",
@@ -62,6 +70,7 @@ VALID_HOOKS: Set[str] = {
     "on_session_end",
     "on_session_finalize",
     "on_session_reset",
+    "subagent_stop",
 }
 
 ENTRY_POINTS_GROUP = "hermes_agent.plugins"
@@ -75,7 +84,12 @@ def _env_enabled(name: str) -> bool:
 
 
 def _get_disabled_plugins() -> set:
-    """Read the disabled plugins list from config.yaml."""
+    """Read the disabled plugins list from config.yaml.
+
+    Kept for backward compat and explicit deny-list semantics. A plugin
+    name in this set will never load, even if it appears in
+    ``plugins.enabled``.
+    """
     try:
         from hermes_cli.config import load_config
         config = load_config()
@@ -85,10 +99,43 @@ def _get_disabled_plugins() -> set:
         return set()
 
 
+def _get_enabled_plugins() -> Optional[set]:
+    """Read the enabled-plugins allow-list from config.yaml.
+
+    Plugins are opt-in by default — only plugins whose name appears in
+    this set are loaded. Returns:
+
+    * ``None`` — the key is missing or malformed. Callers should treat
+      this as "nothing enabled yet" (the opt-in default); the first
+      ``migrate_config`` run populates the key with a grandfathered set
+      of currently-installed user plugins so existing setups don't
+      break on upgrade.
+    * ``set()`` — an empty list was explicitly set; nothing loads.
+    * ``set(...)`` — the concrete allow-list.
+    """
+    try:
+        from hermes_cli.config import load_config
+        config = load_config()
+        plugins_cfg = config.get("plugins")
+        if not isinstance(plugins_cfg, dict):
+            return None
+        if "enabled" not in plugins_cfg:
+            return None
+        enabled = plugins_cfg.get("enabled")
+        if not isinstance(enabled, list):
+            return None
+        return set(enabled)
+    except Exception:
+        return None
+
+
 # ---------------------------------------------------------------------------
 # Data classes
 # ---------------------------------------------------------------------------
 
+_VALID_PLUGIN_KINDS: Set[str] = {"standalone", "backend", "exclusive"}
+
+
 @dataclass
 class PluginManifest:
     """Parsed representation of a plugin.yaml manifest."""
@@ -102,6 +149,23 @@ class PluginManifest:
     provides_hooks: List[str] = field(default_factory=list)
     source: str = ""        # "user", "project", or "entrypoint"
     path: Optional[str] = None
+    # Plugin kind — see plugins.py module docstring for semantics.
+    # ``standalone`` (default): hooks/tools of its own; opt-in via
+    #                           ``plugins.enabled``.
+    # ``backend``: pluggable backend for an existing core tool (e.g.
+    #              image_gen). Built-in (bundled) backends auto-load;
+    #              user-installed still gated by ``plugins.enabled``.
+    # ``exclusive``: category with exactly one active provider (memory).
+    #              Selection via ``<category>.provider`` config key; the
+    #              category's own discovery system handles loading and the
+    #              general scanner skips these.
+    kind: str = "standalone"
+    # Registry key — path-derived, used by ``plugins.enabled``/``disabled``
+    # lookups and by ``hermes plugins list``. For a flat plugin at
+    # ``plugins/disk-cleanup/`` the key is ``disk-cleanup``; for a nested
+    # category plugin at ``plugins/image_gen/openai/`` the key is
+    # ``image_gen/openai``. When empty, falls back to ``name``.
+    key: str = ""
 
 
 @dataclass
@@ -322,6 +386,33 @@ class PluginContext:
             self.manifest.name, engine.name,
         )
 
+    # -- image gen provider registration ------------------------------------
+
+    def register_image_gen_provider(self, provider) -> None:
+        """Register an image generation backend.
+
+        ``provider`` must be an instance of
+        :class:`agent.image_gen_provider.ImageGenProvider`. The
+        ``provider.name`` attribute is what ``image_gen.provider`` in
+        ``config.yaml`` matches against when routing ``image_generate``
+        tool calls.
+        """
+        from agent.image_gen_provider import ImageGenProvider
+        from agent.image_gen_registry import register_provider
+
+        if not isinstance(provider, ImageGenProvider):
+            logger.warning(
+                "Plugin '%s' tried to register an image_gen provider that does "
+                "not inherit from ImageGenProvider. Ignoring.",
+                self.manifest.name,
+            )
+            return
+        register_provider(provider)
+        logger.info(
+            "Plugin '%s' registered image_gen provider: %s",
+            self.manifest.name, provider.name,
+        )
+
     # -- hook registration --------------------------------------------------
 
     def register_hook(self, hook_name: str, callback: Callable) -> None:
@@ -420,26 +511,103 @@ class PluginManager:
 
         manifests: List[PluginManifest] = []
 
-        # 1. User plugins (~/.hermes/plugins/)
+        # 1. Bundled plugins (<repo>/plugins/<name>/)
+        #
+        # Repo-shipped plugins live next to hermes_cli/. Two layouts are
+        # supported (see ``_scan_directory`` for details):
+        #
+        #   - flat: ``plugins/disk-cleanup/plugin.yaml`` (standalone)
+        #   - category: ``plugins/image_gen/openai/plugin.yaml`` (backend)
+        #
+        # ``memory/`` and ``context_engine/`` are skipped at the top level —
+        # they have their own discovery systems. Porting those to the
+        # category-namespace ``kind: exclusive`` model is a future PR.
+        repo_plugins = Path(__file__).resolve().parent.parent / "plugins"
+        manifests.extend(
+            self._scan_directory(
+                repo_plugins,
+                source="bundled",
+                skip_names={"memory", "context_engine"},
+            )
+        )
+
+        # 2. User plugins (~/.hermes/plugins/)
         user_dir = get_hermes_home() / "plugins"
         manifests.extend(self._scan_directory(user_dir, source="user"))
 
-        # 2. Project plugins (./.hermes/plugins/)
+        # 3. Project plugins (./.hermes/plugins/)
         if _env_enabled("HERMES_ENABLE_PROJECT_PLUGINS"):
             project_dir = Path.cwd() / ".hermes" / "plugins"
             manifests.extend(self._scan_directory(project_dir, source="project"))
 
-        # 3. Pip / entry-point plugins
+        # 4. Pip / entry-point plugins
         manifests.extend(self._scan_entry_points())
 
-        # Load each manifest (skip user-disabled plugins)
+        # Load each manifest (skip user-disabled plugins).
+        # Later sources override earlier ones on key collision — user
+        # plugins take precedence over bundled, project plugins take
+        # precedence over user. Dedup here so we only load the final
+        # winner. Keys are path-derived (``image_gen/openai``,
+        # ``disk-cleanup``) so ``tts/openai`` and ``image_gen/openai``
+        # don't collide even when both manifests say ``name: openai``.
         disabled = _get_disabled_plugins()
+        enabled = _get_enabled_plugins()  # None = opt-in default (nothing enabled)
+        winners: Dict[str, PluginManifest] = {}
         for manifest in manifests:
-            if manifest.name in disabled:
+            winners[manifest.key or manifest.name] = manifest
+        for manifest in winners.values():
+            lookup_key = manifest.key or manifest.name
+
+            # Explicit disable always wins (matches on key or on legacy
+            # bare name for back-compat with existing user configs).
+            if lookup_key in disabled or manifest.name in disabled:
                 loaded = LoadedPlugin(manifest=manifest, enabled=False)
                 loaded.error = "disabled via config"
-                self._plugins[manifest.name] = loaded
-                logger.debug("Skipping disabled plugin '%s'", manifest.name)
+                self._plugins[lookup_key] = loaded
+                logger.debug("Skipping disabled plugin '%s'", lookup_key)
+                continue
+
+            # Exclusive plugins (memory providers) have their own
+            # discovery/activation path. The general loader records the
+            # manifest for introspection but does not load the module.
+            if manifest.kind == "exclusive":
+                loaded = LoadedPlugin(manifest=manifest, enabled=False)
+                loaded.error = (
+                    "exclusive plugin — activate via <category>.provider config"
+                )
+                self._plugins[lookup_key] = loaded
+                logger.debug(
+                    "Skipping '%s' (exclusive, handled by category discovery)",
+                    lookup_key,
+                )
+                continue
+
+            # Built-in backends auto-load — they ship with hermes and must
+            # just work. Selection among them (e.g. which image_gen backend
+            # services calls) is driven by ``<category>.provider`` config,
+            # enforced by the tool wrapper.
+            if manifest.kind == "backend" and manifest.source == "bundled":
+                self._load_plugin(manifest)
+                continue
+
+            # Everything else (standalone, user-installed backends,
+            # entry-point plugins) is opt-in via plugins.enabled.
+            # Accept both the path-derived key and the legacy bare name
+            # so existing configs keep working.
+            is_enabled = (
+                enabled is not None
+                and (lookup_key in enabled or manifest.name in enabled)
+            )
+            if not is_enabled:
+                loaded = LoadedPlugin(manifest=manifest, enabled=False)
+                loaded.error = (
+                    "not enabled in config (run `hermes plugins enable {}` to activate)"
+                    .format(lookup_key)
+                )
+                self._plugins[lookup_key] = loaded
+                logger.debug(
+                    "Skipping '%s' (not in plugins.enabled)", lookup_key
+                )
                 continue
             self._load_plugin(manifest)
 
@@ -454,8 +622,46 @@ class PluginManager:
     # Directory scanning
     # -----------------------------------------------------------------------
 
-    def _scan_directory(self, path: Path, source: str) -> List[PluginManifest]:
-        """Read ``plugin.yaml`` manifests from subdirectories of *path*."""
+    def _scan_directory(
+        self,
+        path: Path,
+        source: str,
+        skip_names: Optional[Set[str]] = None,
+    ) -> List[PluginManifest]:
+        """Read ``plugin.yaml`` manifests from subdirectories of *path*.
+
+        Supports two layouts, mixed freely:
+
+        * **Flat** — ``<root>/<plugin-name>/plugin.yaml``. Key is
+          ``<plugin-name>`` (e.g. ``disk-cleanup``).
+        * **Category** — ``<root>/<category>/<plugin-name>/plugin.yaml``,
+          where the ``<category>`` directory itself has no ``plugin.yaml``.
+          Key is ``<category>/<plugin-name>`` (e.g. ``image_gen/openai``).
+          Depth is capped at two segments.
+
+        *skip_names* is an optional allow-list of names to ignore at the
+        top level (kept for back-compat; the current call sites no longer
+        pass it now that categories are first-class).
+        """
+        return self._scan_directory_level(
+            path, source, skip_names=skip_names, prefix="", depth=0
+        )
+
+    def _scan_directory_level(
+        self,
+        path: Path,
+        source: str,
+        *,
+        skip_names: Optional[Set[str]],
+        prefix: str,
+        depth: int,
+    ) -> List[PluginManifest]:
+        """Recursive implementation of :meth:`_scan_directory`.
+
+        ``prefix`` is the category path already accumulated ("" at root,
+        "image_gen" one level in). ``depth`` is the recursion depth; we
+        cap at 2 so ``<root>/a/b/c/`` is ignored.
+        """
         manifests: List[PluginManifest] = []
         if not path.is_dir():
             return manifests
@@ -463,35 +669,112 @@ class PluginManager:
         for child in sorted(path.iterdir()):
             if not child.is_dir():
                 continue
+            if depth == 0 and skip_names and child.name in skip_names:
+                continue
             manifest_file = child / "plugin.yaml"
             if not manifest_file.exists():
                 manifest_file = child / "plugin.yml"
-            if not manifest_file.exists():
-                logger.debug("Skipping %s (no plugin.yaml)", child)
+
+            if manifest_file.exists():
+                manifest = self._parse_manifest(
+                    manifest_file, child, source, prefix
+                )
+                if manifest is not None:
+                    manifests.append(manifest)
                 continue
 
-            try:
-                if yaml is None:
-                    logger.warning("PyYAML not installed – cannot load %s", manifest_file)
-                    continue
-                data = yaml.safe_load(manifest_file.read_text()) or {}
-                manifest = PluginManifest(
-                    name=data.get("name", child.name),
-                    version=str(data.get("version", "")),
-                    description=data.get("description", ""),
-                    author=data.get("author", ""),
-                    requires_env=data.get("requires_env", []),
-                    provides_tools=data.get("provides_tools", []),
-                    provides_hooks=data.get("provides_hooks", []),
-                    source=source,
-                    path=str(child),
+            # No manifest at this level. If we're still within the depth
+            # cap, treat this directory as a category namespace and recurse
+            # one level in looking for children with manifests.
+            if depth >= 1:
+                logger.debug("Skipping %s (no plugin.yaml, depth cap reached)", child)
+                continue
+
+            sub_prefix = f"{prefix}/{child.name}" if prefix else child.name
+            manifests.extend(
+                self._scan_directory_level(
+                    child,
+                    source,
+                    skip_names=None,
+                    prefix=sub_prefix,
+                    depth=depth + 1,
                 )
-                manifests.append(manifest)
-            except Exception as exc:
-                logger.warning("Failed to parse %s: %s", manifest_file, exc)
+            )
 
         return manifests
 
+    def _parse_manifest(
+        self,
+        manifest_file: Path,
+        plugin_dir: Path,
+        source: str,
+        prefix: str,
+    ) -> Optional[PluginManifest]:
+        """Parse a single ``plugin.yaml`` into a :class:`PluginManifest`.
+
+        Returns ``None`` on parse failure (logs a warning).
+        """
+        try:
+            if yaml is None:
+                logger.warning("PyYAML not installed – cannot load %s", manifest_file)
+                return None
+            data = yaml.safe_load(manifest_file.read_text()) or {}
+
+            name = data.get("name", plugin_dir.name)
+            key = f"{prefix}/{plugin_dir.name}" if prefix else name
+
+            raw_kind = data.get("kind", "standalone")
+            if not isinstance(raw_kind, str):
+                raw_kind = "standalone"
+            kind = raw_kind.strip().lower()
+            if kind not in _VALID_PLUGIN_KINDS:
+                logger.warning(
+                    "Plugin %s: unknown kind '%s' (valid: %s); treating as 'standalone'",
+                    key, raw_kind, ", ".join(sorted(_VALID_PLUGIN_KINDS)),
+                )
+                kind = "standalone"
+
+            # Auto-coerce user-installed memory providers to kind="exclusive"
+            # so they're routed to plugins/memory discovery instead of being
+            # loaded by the general PluginManager (which has no
+            # register_memory_provider on PluginContext). Mirrors the
+            # heuristic in plugins/memory/__init__.py:_is_memory_provider_dir.
+            # Bundled memory providers are already skipped via skip_names.
+            if kind == "standalone" and "kind" not in data:
+                init_file = plugin_dir / "__init__.py"
+                if init_file.exists():
+                    try:
+                        source_text = init_file.read_text(errors="replace")[:8192]
+                        if (
+                            "register_memory_provider" in source_text
+                            or "MemoryProvider" in source_text
+                        ):
+                            kind = "exclusive"
+                            logger.debug(
+                                "Plugin %s: detected memory provider, "
+                                "treating as kind='exclusive'",
+                                key,
+                            )
+                    except Exception:
+                        pass
+
+            return PluginManifest(
+                name=name,
+                version=str(data.get("version", "")),
+                description=data.get("description", ""),
+                author=data.get("author", ""),
+                requires_env=data.get("requires_env", []),
+                provides_tools=data.get("provides_tools", []),
+                provides_hooks=data.get("provides_hooks", []),
+                source=source,
+                path=str(plugin_dir),
+                kind=kind,
+                key=key,
+            )
+        except Exception as exc:
+            logger.warning("Failed to parse %s: %s", manifest_file, exc)
+            return None
+
     # -----------------------------------------------------------------------
     # Entry-point scanning
     # -----------------------------------------------------------------------
@@ -514,6 +797,7 @@ class PluginManager:
                     name=ep.name,
                     source="entrypoint",
                     path=ep.value,
+                    key=ep.name,
                 )
                 manifests.append(manifest)
         except Exception as exc:
@@ -530,7 +814,7 @@ class PluginManager:
         loaded = LoadedPlugin(manifest=manifest)
 
         try:
-            if manifest.source in ("user", "project"):
+            if manifest.source in ("user", "project", "bundled"):
                 module = self._load_directory_module(manifest)
             else:
                 module = self._load_entrypoint_module(manifest)
@@ -575,10 +859,16 @@ class PluginManager:
             loaded.error = str(exc)
             logger.warning("Failed to load plugin '%s': %s", manifest.name, exc)
 
-        self._plugins[manifest.name] = loaded
+        self._plugins[manifest.key or manifest.name] = loaded
 
     def _load_directory_module(self, manifest: PluginManifest) -> types.ModuleType:
-        """Import a directory-based plugin as ``hermes_plugins.<name>``."""
+        """Import a directory-based plugin as ``hermes_plugins.<slug>``.
+
+        The module slug is derived from ``manifest.key`` so category-namespaced
+        plugins (``image_gen/openai``) import as
+        ``hermes_plugins.image_gen__openai`` without colliding with any
+        future ``tts/openai``.
+        """
         plugin_dir = Path(manifest.path)  # type: ignore[arg-type]
         init_file = plugin_dir / "__init__.py"
         if not init_file.exists():
@@ -591,7 +881,9 @@ class PluginManager:
             ns_pkg.__package__ = _NS_PARENT
             sys.modules[_NS_PARENT] = ns_pkg
 
-        module_name = f"{_NS_PARENT}.{manifest.name.replace('-', '_')}"
+        key = manifest.key or manifest.name
+        slug = key.replace("/", "__").replace("-", "_")
+        module_name = f"{_NS_PARENT}.{slug}"
         spec = importlib.util.spec_from_file_location(
             module_name,
             init_file,
@@ -672,10 +964,12 @@ class PluginManager:
     def list_plugins(self) -> List[Dict[str, Any]]:
         """Return a list of info dicts for all discovered plugins."""
         result: List[Dict[str, Any]] = []
-        for name, loaded in sorted(self._plugins.items()):
+        for key, loaded in sorted(self._plugins.items()):
             result.append(
                 {
-                    "name": name,
+                    "name": loaded.manifest.name,
+                    "key": loaded.manifest.key or loaded.manifest.name,
+                    "kind": loaded.manifest.kind,
                     "version": loaded.manifest.version,
                     "description": loaded.manifest.description,
                     "source": loaded.manifest.source,
@@ -779,23 +1073,31 @@ def get_pre_tool_call_block_message(
     return None
 
 
+def _ensure_plugins_discovered() -> PluginManager:
+    """Return the global manager after running idempotent plugin discovery."""
+    manager = get_plugin_manager()
+    manager.discover_and_load()
+    return manager
+
+
 def get_plugin_context_engine():
     """Return the plugin-registered context engine, or None."""
-    return get_plugin_manager()._context_engine
+    return _ensure_plugins_discovered()._context_engine
 
 
 def get_plugin_command_handler(name: str) -> Optional[Callable]:
     """Return the handler for a plugin-registered slash command, or ``None``."""
-    entry = get_plugin_manager()._plugin_commands.get(name)
+    entry = _ensure_plugins_discovered()._plugin_commands.get(name)
     return entry["handler"] if entry else None
 
 
 def get_plugin_commands() -> Dict[str, dict]:
     """Return the full plugin commands dict (name → {handler, description, plugin}).
 
-    Safe to call before discovery — returns an empty dict if no plugins loaded.
+    Triggers idempotent plugin discovery so callers can use plugin commands
+    before any explicit discover_plugins() call.
     """
-    return get_plugin_manager()._plugin_commands
+    return _ensure_plugins_discovered()._plugin_commands
 
 
 def get_plugin_toolsets() -> List[tuple]:
diff --git a/hermes_cli/plugins_cmd.py b/hermes_cli/plugins_cmd.py
index c92d8b0dc6..230e134207 100644
--- a/hermes_cli/plugins_cmd.py
+++ b/hermes_cli/plugins_cmd.py
@@ -15,6 +15,7 @@ import shutil
 import subprocess
 import sys
 from pathlib import Path
+from typing import Optional
 
 from hermes_constants import get_hermes_home
 
@@ -281,8 +282,16 @@ def _require_installed_plugin(name: str, plugins_dir: Path, console) -> Path:
 # ---------------------------------------------------------------------------
 
 
-def cmd_install(identifier: str, force: bool = False) -> None:
-    """Install a plugin from a Git URL or owner/repo shorthand."""
+def cmd_install(
+    identifier: str,
+    force: bool = False,
+    enable: Optional[bool] = None,
+) -> None:
+    """Install a plugin from a Git URL or owner/repo shorthand.
+
+    After install, prompt "Enable now? [y/N]" unless *enable* is provided
+    (True = auto-enable without prompting, False = install disabled).
+    """
     import tempfile
     from rich.console import Console
 
@@ -391,6 +400,40 @@ def cmd_install(identifier: str, force: bool = False) -> None:
 
     _display_after_install(target, identifier)
 
+    # Determine the canonical plugin name for enable-list bookkeeping.
+    installed_name = installed_manifest.get("name") or target.name
+
+    # Decide whether to enable: explicit flag > interactive prompt > default off
+    should_enable = enable
+    if should_enable is None:
+        # Interactive prompt unless stdin isn't a TTY (scripted install).
+        if sys.stdin.isatty() and sys.stdout.isatty():
+            try:
+                answer = input(
+                    f"  Enable '{installed_name}' now? [y/N]: "
+                ).strip().lower()
+                should_enable = answer in ("y", "yes")
+            except (EOFError, KeyboardInterrupt):
+                should_enable = False
+        else:
+            should_enable = False
+
+    if should_enable:
+        enabled = _get_enabled_set()
+        disabled = _get_disabled_set()
+        enabled.add(installed_name)
+        disabled.discard(installed_name)
+        _save_enabled_set(enabled)
+        _save_disabled_set(disabled)
+        console.print(
+            f"[green]✓[/green] Plugin [bold]{installed_name}[/bold] enabled."
+        )
+    else:
+        console.print(
+            f"[dim]Plugin installed but not enabled. "
+            f"Run `hermes plugins enable {installed_name}` to activate.[/dim]"
+        )
+
     console.print("[dim]Restart the gateway for the plugin to take effect:[/dim]")
     console.print("[dim]  hermes gateway restart[/dim]")
     console.print()
@@ -468,7 +511,11 @@ def cmd_remove(name: str) -> None:
 
 
 def _get_disabled_set() -> set:
-    """Read the disabled plugins set from config.yaml."""
+    """Read the disabled plugins set from config.yaml.
+
+    An explicit deny-list. A plugin name here never loads, even if also
+    listed in ``plugins.enabled``.
+    """
     try:
         from hermes_cli.config import load_config
         config = load_config()
@@ -488,103 +535,196 @@ def _save_disabled_set(disabled: set) -> None:
     save_config(config)
 
 
+def _get_enabled_set() -> set:
+    """Read the enabled plugins allow-list from config.yaml.
+
+    Plugins are opt-in: only names here are loaded. Returns ``set()`` if
+    the key is missing (same behaviour as "nothing enabled yet").
+    """
+    try:
+        from hermes_cli.config import load_config
+        config = load_config()
+        plugins_cfg = config.get("plugins", {})
+        if not isinstance(plugins_cfg, dict):
+            return set()
+        enabled = plugins_cfg.get("enabled", [])
+        return set(enabled) if isinstance(enabled, list) else set()
+    except Exception:
+        return set()
+
+
+def _save_enabled_set(enabled: set) -> None:
+    """Write the enabled plugins list to config.yaml."""
+    from hermes_cli.config import load_config, save_config
+    config = load_config()
+    if "plugins" not in config:
+        config["plugins"] = {}
+    config["plugins"]["enabled"] = sorted(enabled)
+    save_config(config)
+
+
 def cmd_enable(name: str) -> None:
-    """Enable a previously disabled plugin."""
+    """Add a plugin to the enabled allow-list (and remove it from disabled)."""
     from rich.console import Console
 
     console = Console()
-    plugins_dir = _plugins_dir()
-
-    # Verify the plugin exists
-    target = plugins_dir / name
-    if not target.is_dir():
-        console.print(f"[red]Plugin '{name}' is not installed.[/red]")
+    # Discover the plugin — check installed (user) AND bundled.
+    if not _plugin_exists(name):
+        console.print(f"[red]Plugin '{name}' is not installed or bundled.[/red]")
         sys.exit(1)
 
+    enabled = _get_enabled_set()
     disabled = _get_disabled_set()
-    if name not in disabled:
+
+    if name in enabled and name not in disabled:
         console.print(f"[dim]Plugin '{name}' is already enabled.[/dim]")
         return
 
+    enabled.add(name)
     disabled.discard(name)
+    _save_enabled_set(enabled)
     _save_disabled_set(disabled)
-    console.print(f"[green]✓[/green] Plugin [bold]{name}[/bold] enabled. Takes effect on next session.")
+    console.print(
+        f"[green]✓[/green] Plugin [bold]{name}[/bold] enabled. "
+        "Takes effect on next session."
+    )
 
 
 def cmd_disable(name: str) -> None:
-    """Disable a plugin without removing it."""
+    """Remove a plugin from the enabled allow-list (and add to disabled)."""
     from rich.console import Console
 
     console = Console()
-    plugins_dir = _plugins_dir()
-
-    # Verify the plugin exists
-    target = plugins_dir / name
-    if not target.is_dir():
-        console.print(f"[red]Plugin '{name}' is not installed.[/red]")
+    if not _plugin_exists(name):
+        console.print(f"[red]Plugin '{name}' is not installed or bundled.[/red]")
         sys.exit(1)
 
+    enabled = _get_enabled_set()
     disabled = _get_disabled_set()
-    if name in disabled:
+
+    if name not in enabled and name in disabled:
         console.print(f"[dim]Plugin '{name}' is already disabled.[/dim]")
         return
 
+    enabled.discard(name)
     disabled.add(name)
+    _save_enabled_set(enabled)
     _save_disabled_set(disabled)
-    console.print(f"[yellow]\u2298[/yellow] Plugin [bold]{name}[/bold] disabled. Takes effect on next session.")
+    console.print(
+        f"[yellow]\u2298[/yellow] Plugin [bold]{name}[/bold] disabled. "
+        "Takes effect on next session."
+    )
 
 
-def cmd_list() -> None:
-    """List installed plugins."""
-    from rich.console import Console
-    from rich.table import Table
+def _plugin_exists(name: str) -> bool:
+    """Return True if a plugin with *name* is installed (user) or bundled."""
+    # Installed: directory name or manifest name match in user plugins dir
+    user_dir = _plugins_dir()
+    if user_dir.is_dir():
+        if (user_dir / name).is_dir():
+            return True
+        for child in user_dir.iterdir():
+            if not child.is_dir():
+                continue
+            manifest = _read_manifest(child)
+            if manifest.get("name") == name:
+                return True
+    # Bundled: <repo>/plugins/<name>/
+    from pathlib import Path as _P
+    import hermes_cli
+    repo_plugins = _P(hermes_cli.__file__).resolve().parent.parent / "plugins"
+    if repo_plugins.is_dir():
+        candidate = repo_plugins / name
+        if candidate.is_dir() and (
+            (candidate / "plugin.yaml").exists()
+            or (candidate / "plugin.yml").exists()
+        ):
+            return True
+    return False
 
+
+def _discover_all_plugins() -> list:
+    """Return a list of (name, version, description, source, dir_path) for
+    every plugin the loader can see — user + bundled + project.
+
+    Matches the ordering/dedup of ``PluginManager.discover_and_load``:
+    bundled first, then user, then project; user overrides bundled on
+    name collision.
+    """
     try:
         import yaml
     except ImportError:
         yaml = None
 
-    console = Console()
-    plugins_dir = _plugins_dir()
+    seen: dict = {}  # name -> (name, version, description, source, path)
 
-    dirs = sorted(d for d in plugins_dir.iterdir() if d.is_dir())
-    if not dirs:
+    # Bundled (<repo>/plugins/<name>/), excluding memory/ and context_engine/
+    import hermes_cli
+    repo_plugins = Path(hermes_cli.__file__).resolve().parent.parent / "plugins"
+    for base, source in ((repo_plugins, "bundled"), (_plugins_dir(), "user")):
+        if not base.is_dir():
+            continue
+        for d in sorted(base.iterdir()):
+            if not d.is_dir():
+                continue
+            if source == "bundled" and d.name in ("memory", "context_engine"):
+                continue
+            manifest_file = d / "plugin.yaml"
+            if not manifest_file.exists():
+                manifest_file = d / "plugin.yml"
+            if not manifest_file.exists():
+                continue
+            name = d.name
+            version = ""
+            description = ""
+            if yaml:
+                try:
+                    with open(manifest_file) as f:
+                        manifest = yaml.safe_load(f) or {}
+                    name = manifest.get("name", d.name)
+                    version = manifest.get("version", "")
+                    description = manifest.get("description", "")
+                except Exception:
+                    pass
+            # User plugins override bundled on name collision.
+            if name in seen and source == "bundled":
+                continue
+            src_label = source
+            if source == "user" and (d / ".git").exists():
+                src_label = "git"
+            seen[name] = (name, version, description, src_label, d)
+    return list(seen.values())
+
+
+def cmd_list() -> None:
+    """List all plugins (bundled + user) with enabled/disabled state."""
+    from rich.console import Console
+    from rich.table import Table
+
+    console = Console()
+    entries = _discover_all_plugins()
+    if not entries:
         console.print("[dim]No plugins installed.[/dim]")
         console.print("[dim]Install with:[/dim] hermes plugins install owner/repo")
         return
 
+    enabled = _get_enabled_set()
     disabled = _get_disabled_set()
 
-    table = Table(title="Installed Plugins", show_lines=False)
+    table = Table(title="Plugins", show_lines=False)
     table.add_column("Name", style="bold")
     table.add_column("Status")
     table.add_column("Version", style="dim")
     table.add_column("Description")
     table.add_column("Source", style="dim")
 
-    for d in dirs:
-        manifest_file = d / "plugin.yaml"
-        name = d.name
-        version = ""
-        description = ""
-        source = "local"
-
-        if manifest_file.exists() and yaml:
-            try:
-                with open(manifest_file) as f:
-                    manifest = yaml.safe_load(f) or {}
-                name = manifest.get("name", d.name)
-                version = manifest.get("version", "")
-                description = manifest.get("description", "")
-            except Exception:
-                pass
-
-        # Check if it's a git repo (installed via hermes plugins install)
-        if (d / ".git").exists():
-            source = "git"
-
-        is_disabled = name in disabled or d.name in disabled
-        status = "[red]disabled[/red]" if is_disabled else "[green]enabled[/green]"
+    for name, version, description, source, _dir in entries:
+        if name in disabled:
+            status = "[red]disabled[/red]"
+        elif name in enabled:
+            status = "[green]enabled[/green]"
+        else:
+            status = "[yellow]not enabled[/yellow]"
         table.add_row(name, status, str(version), description, source)
 
     console.print()
@@ -592,6 +732,7 @@ def cmd_list() -> None:
     console.print()
     console.print("[dim]Interactive toggle:[/dim] hermes plugins")
     console.print("[dim]Enable/disable:[/dim] hermes plugins enable/disable <name>")
+    console.print("[dim]Plugins are opt-in by default — only 'enabled' plugins load.[/dim]")
 
 
 # ---------------------------------------------------------------------------
@@ -742,41 +883,25 @@ def cmd_toggle() -> None:
     """Interactive composite UI — general plugins + provider plugin categories."""
     from rich.console import Console
 
-    try:
-        import yaml
-    except ImportError:
-        yaml = None
-
     console = Console()
-    plugins_dir = _plugins_dir()
 
-    # -- General plugins discovery --
-    dirs = sorted(d for d in plugins_dir.iterdir() if d.is_dir())
-    disabled = _get_disabled_set()
+    # -- General plugins discovery (bundled + user) --
+    entries = _discover_all_plugins()
+    enabled_set = _get_enabled_set()
+    disabled_set = _get_disabled_set()
 
     plugin_names = []
     plugin_labels = []
     plugin_selected = set()
 
-    for i, d in enumerate(dirs):
-        manifest_file = d / "plugin.yaml"
-        name = d.name
-        description = ""
-
-        if manifest_file.exists() and yaml:
-            try:
-                with open(manifest_file) as f:
-                    manifest = yaml.safe_load(f) or {}
-                name = manifest.get("name", d.name)
-                description = manifest.get("description", "")
-            except Exception:
-                pass
-
-        plugin_names.append(name)
+    for i, (name, _version, description, source, _d) in enumerate(entries):
         label = f"{name} \u2014 {description}" if description else name
+        if source == "bundled":
+            label = f"{label} [bundled]"
+        plugin_names.append(name)
         plugin_labels.append(label)
-
-        if name not in disabled and d.name not in disabled:
+        # Selected (enabled) when in enabled-set AND not in disabled-set
+        if name in enabled_set and name not in disabled_set:
             plugin_selected.add(i)
 
     # -- Provider categories --
@@ -804,10 +929,10 @@ def cmd_toggle() -> None:
     try:
         import curses
         _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
-                          disabled, categories, console)
+                          disabled_set, categories, console)
     except ImportError:
         _run_composite_fallback(plugin_names, plugin_labels, plugin_selected,
-                                disabled, categories, console)
+                                disabled_set, categories, console)
 
 
 def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
@@ -1020,18 +1145,29 @@ def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
     curses.wrapper(_draw)
     flush_stdin()
 
-    # Persist general plugin changes
-    new_disabled = set()
+    # Persist general plugin changes. The new allow-list is the set of
+    # plugin names that were checked; anything not checked is explicitly
+    # disabled (written to disabled-list) so it remains off even if the
+    # plugin code does something clever like auto-enable in the future.
+    new_enabled: set = set()
+    new_disabled: set = set(disabled)  # preserve existing disabled state for unseen plugins
     for i, name in enumerate(plugin_names):
-        if i not in chosen:
+        if i in chosen:
+            new_enabled.add(name)
+            new_disabled.discard(name)
+        else:
             new_disabled.add(name)
 
-    if new_disabled != disabled:
+    prev_enabled = _get_enabled_set()
+    enabled_changed = new_enabled != prev_enabled
+    disabled_changed = new_disabled != disabled
+
+    if enabled_changed or disabled_changed:
+        _save_enabled_set(new_enabled)
         _save_disabled_set(new_disabled)
-        enabled_count = len(plugin_names) - len(new_disabled)
         console.print(
-            f"\n[green]\u2713[/green] General plugins: {enabled_count} enabled, "
-            f"{len(new_disabled)} disabled."
+            f"\n[green]\u2713[/green] General plugins: {len(new_enabled)} enabled, "
+            f"{len(plugin_names) - len(new_enabled)} disabled."
         )
     elif n_plugins > 0:
         console.print("\n[dim]General plugins unchanged.[/dim]")
@@ -1078,11 +1214,17 @@ def _run_composite_fallback(plugin_names, plugin_labels, plugin_selected,
                 return
             print()
 
-        new_disabled = set()
+        new_enabled: set = set()
+        new_disabled: set = set(disabled)
         for i, name in enumerate(plugin_names):
-            if i not in chosen:
+            if i in chosen:
+                new_enabled.add(name)
+                new_disabled.discard(name)
+            else:
                 new_disabled.add(name)
-        if new_disabled != disabled:
+        prev_enabled = _get_enabled_set()
+        if new_enabled != prev_enabled or new_disabled != disabled:
+            _save_enabled_set(new_enabled)
             _save_disabled_set(new_disabled)
 
     # Provider categories
@@ -1108,7 +1250,17 @@ def plugins_command(args) -> None:
     action = getattr(args, "plugins_action", None)
 
     if action == "install":
-        cmd_install(args.identifier, force=getattr(args, "force", False))
+        # Map argparse tri-state: --enable=True, --no-enable=False, neither=None (prompt)
+        enable_arg = None
+        if getattr(args, "enable", False):
+            enable_arg = True
+        elif getattr(args, "no_enable", False):
+            enable_arg = False
+        cmd_install(
+            args.identifier,
+            force=getattr(args, "force", False),
+            enable=enable_arg,
+        )
     elif action == "update":
         cmd_update(args.name)
     elif action in ("remove", "rm", "uninstall"):
diff --git a/hermes_cli/providers.py b/hermes_cli/providers.py
index a71055cfe4..e842086a41 100644
--- a/hermes_cli/providers.py
+++ b/hermes_cli/providers.py
@@ -23,6 +23,8 @@ import logging
 from dataclasses import dataclass
 from typing import Any, Dict, List, Optional, Tuple
 
+from utils import base_url_host_matches, base_url_hostname
+
 logger = logging.getLogger(__name__)
 
 
@@ -92,6 +94,12 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
         transport="openai_chat",
         base_url_env_var="KIMI_BASE_URL",
     ),
+    "stepfun": HermesOverlay(
+        transport="openai_chat",
+        extra_env_vars=("STEPFUN_API_KEY",),
+        base_url_override="https://api.stepfun.ai/step_plan/v1",
+        base_url_env_var="STEPFUN_BASE_URL",
+    ),
     "minimax": HermesOverlay(
         transport="anthropic_messages",
         base_url_env_var="MINIMAX_BASE_URL",
@@ -208,6 +216,10 @@ ALIASES: Dict[str, str] = {
     "kimi-coding-cn": "kimi-for-coding",
     "moonshot": "kimi-for-coding",
 
+    # stepfun
+    "step": "stepfun",
+    "stepfun-coding-plan": "stepfun",
+
     # minimax-cn
     "minimax-china": "minimax-cn",
     "minimax_cn": "minimax-cn",
@@ -292,6 +304,7 @@ _LABEL_OVERRIDES: Dict[str, str] = {
     "nous": "Nous Portal",
     "openai-codex": "OpenAI Codex",
     "copilot-acp": "GitHub Copilot ACP",
+    "stepfun": "StepFun Step Plan",
     "xiaomi": "Xiaomi MiMo",
     "local": "Local endpoint",
     "bedrock": "AWS Bedrock",
@@ -322,12 +335,16 @@ def normalize_provider(name: str) -> str:
 
 
 def get_provider(name: str) -> Optional[ProviderDef]:
-    """Look up a provider by id or alias, merging all data sources.
+    """Look up a built-in provider by id or alias.
 
     Resolution order:
       1. Hermes overlays (for providers not in models.dev: nous, openai-codex, etc.)
       2. models.dev catalog + Hermes overlay
-      3. User-defined providers from config (TODO: Phase 4)
+
+    User-defined providers from config.yaml (``providers:`` / ``custom_providers:``)
+    are resolved by :func:`resolve_provider_full`, which layers ``resolve_user_provider``
+    and ``resolve_custom_provider`` on top of this function. Callers that need
+    user-config support should use ``resolve_provider_full`` instead.
 
     Returns a fully-resolved ProviderDef or None.
     """
@@ -421,6 +438,16 @@ def determine_api_mode(provider: str, base_url: str = "") -> str:
     """
     pdef = get_provider(provider)
     if pdef is not None:
+        # Even for known providers, check URL heuristics for special endpoints
+        # (e.g. kimi /coding endpoint needs anthropic_messages even on 'custom')
+        if base_url:
+            url_lower = base_url.rstrip("/").lower()
+            if "api.kimi.com/coding" in url_lower:
+                return "anthropic_messages"
+            if url_lower.endswith("/anthropic") or "api.anthropic.com" in url_lower:
+                return "anthropic_messages"
+            if "api.openai.com" in url_lower:
+                return "codex_responses"
         return TRANSPORT_TO_API_MODE.get(pdef.transport, "chat_completions")
 
     # Direct provider checks for providers not in HERMES_OVERLAYS
@@ -430,11 +457,14 @@ def determine_api_mode(provider: str, base_url: str = "") -> str:
     # URL-based heuristics for custom / unknown providers
     if base_url:
         url_lower = base_url.rstrip("/").lower()
-        if url_lower.endswith("/anthropic") or "api.anthropic.com" in url_lower:
+        hostname = base_url_hostname(base_url)
+        if url_lower.endswith("/anthropic") or hostname == "api.anthropic.com":
             return "anthropic_messages"
-        if "api.openai.com" in url_lower:
+        if hostname == "api.kimi.com" and "/coding" in url_lower:
+            return "anthropic_messages"
+        if hostname == "api.openai.com":
             return "codex_responses"
-        if "bedrock-runtime" in url_lower and "amazonaws.com" in url_lower:
+        if hostname.startswith("bedrock-runtime.") and base_url_host_matches(base_url, "amazonaws.com"):
             return "bedrock_converse"
 
     return "chat_completions"
diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py
index a5c286fe01..922946e2ad 100644
--- a/hermes_cli/runtime_provider.py
+++ b/hermes_cli/runtime_provider.py
@@ -29,6 +29,7 @@ from hermes_cli.auth import (
 )
 from hermes_cli.config import get_compatible_custom_providers, load_config
 from hermes_constants import OPENROUTER_BASE_URL
+from utils import base_url_host_matches, base_url_hostname
 
 
 def _normalize_custom_provider_name(value: str) -> str:
@@ -38,14 +39,27 @@ def _normalize_custom_provider_name(value: str) -> str:
 def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
     """Auto-detect api_mode from the resolved base URL.
 
-    Direct api.openai.com endpoints need the Responses API for GPT-5.x
-    tool calls with reasoning (chat/completions returns 400).
+    - Direct api.openai.com endpoints need the Responses API for GPT-5.x
+      tool calls with reasoning (chat/completions returns 400).
+    - Third-party Anthropic-compatible gateways (MiniMax, Zhipu GLM,
+      LiteLLM proxies, etc.) conventionally expose the native Anthropic
+      protocol under a ``/anthropic`` suffix — treat those as
+      ``anthropic_messages`` transport instead of the default
+      ``chat_completions``.
+    - Kimi Code's ``api.kimi.com/coding`` endpoint also speaks the
+      Anthropic Messages protocol (the /coding route accepts Claude
+      Code's native request shape).
     """
     normalized = (base_url or "").strip().lower().rstrip("/")
-    if "api.x.ai" in normalized:
+    hostname = base_url_hostname(base_url)
+    if hostname == "api.x.ai":
         return "codex_responses"
-    if "api.openai.com" in normalized and "openrouter" not in normalized:
+    if hostname == "api.openai.com":
         return "codex_responses"
+    if normalized.endswith("/anthropic"):
+        return "anthropic_messages"
+    if hostname == "api.kimi.com" and "/coding" in normalized:
+        return "anthropic_messages"
     return None
 
 
@@ -194,8 +208,13 @@ def _resolve_runtime_from_pool_entry(
         elif provider in ("opencode-zen", "opencode-go"):
             from hermes_cli.models import opencode_model_api_mode
             api_mode = opencode_model_api_mode(provider, model_cfg.get("default", ""))
-        elif base_url.rstrip("/").endswith("/anthropic"):
-            api_mode = "anthropic_messages"
+        else:
+            # Auto-detect Anthropic-compatible endpoints (/anthropic suffix,
+            # Kimi /coding, api.openai.com → codex_responses, api.x.ai →
+            # codex_responses).
+            detected = _detect_api_mode_for_url(base_url)
+            if detected:
+                api_mode = detected
 
     # OpenCode base URLs end with /v1 for OpenAI-compatible models, but the
     # Anthropic SDK prepends its own /v1/messages to the base_url.  Strip the
@@ -469,7 +488,7 @@ def _resolve_openrouter_runtime(
     # When hitting a custom endpoint (e.g. Z.ai, local LLM), prefer
     # OPENAI_API_KEY so the OpenRouter key doesn't leak to an unrelated
     # provider (issues #420, #560).
-    _is_openrouter_url = "openrouter.ai" in base_url
+    _is_openrouter_url = base_url_host_matches(base_url, "openrouter.ai")
     if _is_openrouter_url:
         api_key_candidates = [
             explicit_api_key,
@@ -479,8 +498,12 @@ def _resolve_openrouter_runtime(
     else:
         # Custom endpoint: use api_key from config when using config base_url (#1760).
         # When the endpoint is Ollama Cloud, check OLLAMA_API_KEY — it's
-        # the canonical env var for ollama.com authentication.
-        _is_ollama_url = "ollama.com" in base_url.lower()
+        # the canonical env var for ollama.com authentication. Match on
+        # HOST, not substring — a custom base_url whose path contains
+        # "ollama.com" (e.g. http://127.0.0.1/ollama.com/v1) or whose
+        # hostname is a look-alike (ollama.com.attacker.test) must not
+        # receive the Ollama credential. See GHSA-76xc-57q6-vm5m.
+        _is_ollama_url = base_url_host_matches(base_url, "ollama.com")
         api_key_candidates = [
             explicit_api_key,
             (cfg_api_key if use_config_base_url else ""),
@@ -642,8 +665,12 @@ def _resolve_explicit_runtime(
             configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
             if configured_mode:
                 api_mode = configured_mode
-            elif base_url.rstrip("/").endswith("/anthropic"):
-                api_mode = "anthropic_messages"
+            else:
+                # Auto-detect from URL (Anthropic /anthropic suffix,
+                # api.openai.com → Responses, Kimi /coding, etc.).
+                detected = _detect_api_mode_for_url(base_url)
+                if detected:
+                    api_mode = detected
 
         return {
             "provider": provider,
@@ -890,8 +917,7 @@ def resolve_runtime_provider(
                 code="no_aws_credentials",
             )
         # Read bedrock-specific config from config.yaml
-        from hermes_cli.config import load_config as _load_bedrock_config
-        _bedrock_cfg = _load_bedrock_config().get("bedrock", {})
+        _bedrock_cfg = load_config().get("bedrock", {})
         # Region priority: config.yaml bedrock.region → env var → us-east-1
         region = (_bedrock_cfg.get("region") or "").strip() or resolve_bedrock_region()
         auth_source = resolve_aws_auth_env_var() or "aws-sdk-default-chain"
@@ -965,10 +991,13 @@ def resolve_runtime_provider(
             elif provider in ("opencode-zen", "opencode-go"):
                 from hermes_cli.models import opencode_model_api_mode
                 api_mode = opencode_model_api_mode(provider, model_cfg.get("default", ""))
-            # Auto-detect Anthropic-compatible endpoints by URL convention
-            # (e.g. https://api.minimax.io/anthropic, https://dashscope.../anthropic)
-            elif base_url.rstrip("/").endswith("/anthropic"):
-                api_mode = "anthropic_messages"
+            else:
+                # Auto-detect Anthropic-compatible endpoints by URL convention
+                # (e.g. https://api.minimax.io/anthropic, https://dashscope.../anthropic)
+                # plus api.openai.com → codex_responses and api.x.ai → codex_responses.
+                detected = _detect_api_mode_for_url(base_url)
+                if detected:
+                    api_mode = detected
         # Strip trailing /v1 for OpenCode Anthropic models (see comment above).
         if api_mode == "anthropic_messages" and provider in ("opencode-zen", "opencode-go"):
             base_url = re.sub(r"/v1/?$", "", base_url)
diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index 8f6b633c6a..1fe5ae0580 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -22,6 +22,7 @@ from typing import Optional, Dict, Any
 
 from hermes_cli.nous_subscription import get_nous_subscription_features
 from tools.tool_backend_helpers import managed_nous_tools_enabled
+from utils import base_url_hostname
 from hermes_constants import get_optional_skills_dir
 
 logger = logging.getLogger(__name__)
@@ -89,19 +90,20 @@ _DEFAULT_PROVIDER_MODELS = {
         "grok-code-fast-1",
     ],
     "gemini": [
-        "gemini-3.1-pro-preview", "gemini-3-flash-preview", "gemini-3.1-flash-lite-preview",
-        "gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite",
+        "gemini-3.1-pro-preview", "gemini-3-pro-preview",
+        "gemini-3-flash-preview", "gemini-3.1-flash-lite-preview",
     ],
     "zai": ["glm-5.1", "glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
-    "kimi-coding": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
-    "kimi-coding-cn": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
+    "kimi-coding": ["kimi-k2.6", "kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
+    "kimi-coding-cn": ["kimi-k2.6", "kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
+    "stepfun": ["step-3.5-flash", "step-3.5-flash-2603"],
     "arcee": ["trinity-large-thinking", "trinity-large-preview", "trinity-mini"],
     "minimax": ["MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-M2"],
     "minimax-cn": ["MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-M2"],
     "ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"],
     "kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"],
     "opencode-zen": ["gpt-5.4", "gpt-5.3-codex", "claude-sonnet-4-6", "gemini-3-flash", "glm-5", "kimi-k2.5", "minimax-m2.7"],
-    "opencode-go": ["glm-5.1", "glm-5", "kimi-k2.5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.5", "minimax-m2.7"],
+    "opencode-go": ["kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.5", "minimax-m2.7", "qwen3.6-plus", "qwen3.5-plus"],
     "huggingface": [
         "Qwen/Qwen3.5-397B-A17B", "Qwen/Qwen3-235B-A22B-Thinking-2507",
         "Qwen/Qwen3-Coder-480B-A35B-Instruct", "deepseek-ai/DeepSeek-R1-0528",
@@ -407,13 +409,36 @@ def _print_setup_summary(config: dict, hermes_home):
             ("Browser Automation", False, missing_browser_hint)
         )
 
-    # FAL (image generation)
+    # Image generation — FAL (direct or via Nous), or any plugin-registered
+    # provider (OpenAI, etc.)
     if subscription_features.image_gen.managed_by_nous:
         tool_status.append(("Image Generation (Nous subscription)", True, None))
     elif subscription_features.image_gen.available:
         tool_status.append(("Image Generation", True, None))
     else:
-        tool_status.append(("Image Generation", False, "FAL_KEY"))
+        # Fall back to probing plugin-registered providers so OpenAI-only
+        # setups don't show as "missing FAL_KEY".
+        _img_backend = None
+        try:
+            from agent.image_gen_registry import list_providers
+            from hermes_cli.plugins import _ensure_plugins_discovered
+
+            _ensure_plugins_discovered()
+            for _p in list_providers():
+                if _p.name == "fal":
+                    continue
+                try:
+                    if _p.is_available():
+                        _img_backend = _p.display_name
+                        break
+                except Exception:
+                    continue
+        except Exception:
+            pass
+        if _img_backend:
+            tool_status.append((f"Image Generation ({_img_backend})", True, None))
+        else:
+            tool_status.append(("Image Generation", False, "FAL_KEY or OPENAI_API_KEY"))
 
     # TTS — show configured provider
     tts_provider = config.get("tts", {}).get("provider", "edge")
@@ -433,7 +458,6 @@ def _print_setup_summary(config: dict, hermes_home):
         tool_status.append(("Text-to-Speech (Google Gemini)", True, None))
     elif tts_provider == "neutts":
         try:
-            import importlib.util
             neutts_ok = importlib.util.find_spec("neutts") is not None
         except Exception:
             neutts_ok = False
@@ -441,6 +465,16 @@ def _print_setup_summary(config: dict, hermes_home):
             tool_status.append(("Text-to-Speech (NeuTTS local)", True, None))
         else:
             tool_status.append(("Text-to-Speech (NeuTTS — not installed)", False, "run 'hermes setup tts'"))
+    elif tts_provider == "kittentts":
+        try:
+            import importlib.util
+            kittentts_ok = importlib.util.find_spec("kittentts") is not None
+        except Exception:
+            kittentts_ok = False
+        if kittentts_ok:
+            tool_status.append(("Text-to-Speech (KittenTTS local)", True, None))
+        else:
+            tool_status.append(("Text-to-Speech (KittenTTS — not installed)", False, "run 'hermes setup tts'"))
     else:
         tool_status.append(("Text-to-Speech (Edge TTS)", True, None))
 
@@ -771,6 +805,7 @@ def setup_model_provider(config: dict, *, quick: bool = False):
             "zai": "Z.AI / GLM",
             "kimi-coding": "Kimi / Moonshot",
             "kimi-coding-cn": "Kimi / Moonshot (China)",
+            "stepfun": "StepFun Step Plan",
             "minimax": "MiniMax",
             "minimax-cn": "MiniMax CN",
             "anthropic": "Anthropic",
@@ -803,7 +838,8 @@ def setup_model_provider(config: dict, *, quick: bool = False):
         elif _vision_idx == 1:  # OpenAI-compatible endpoint
             _base_url = prompt("  Base URL (blank for OpenAI)").strip() or "https://api.openai.com/v1"
             _api_key_label = "  API key"
-            if "api.openai.com" in _base_url.lower():
+            _is_native_openai = base_url_hostname(_base_url) == "api.openai.com"
+            if _is_native_openai:
                 _api_key_label = "  OpenAI API key"
             _oai_key = prompt(_api_key_label, password=True).strip()
             if _oai_key:
@@ -811,7 +847,7 @@ def setup_model_provider(config: dict, *, quick: bool = False):
                 # Save vision base URL to config (not .env — only secrets go there)
                 _vaux = config.setdefault("auxiliary", {}).setdefault("vision", {})
                 _vaux["base_url"] = _base_url
-                if "api.openai.com" in _base_url.lower():
+                if _is_native_openai:
                     _oai_vision_models = ["gpt-4o", "gpt-4o-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano"]
                     _vm_choices = _oai_vision_models + ["Use default (gpt-4o-mini)"]
                     _vm_idx = prompt_choice("Select vision model:", _vm_choices, 0)
@@ -847,7 +883,6 @@ def setup_model_provider(config: dict, *, quick: bool = False):
 
 def _check_espeak_ng() -> bool:
     """Check if espeak-ng is installed."""
-    import shutil
     return shutil.which("espeak-ng") is not None or shutil.which("espeak") is not None
 
 
@@ -901,6 +936,31 @@ def _install_neutts_deps() -> bool:
         return False
 
 
+def _install_kittentts_deps() -> bool:
+    """Install KittenTTS dependencies with user approval. Returns True on success."""
+    import subprocess
+    import sys
+
+    wheel_url = (
+        "https://github.com/KittenML/KittenTTS/releases/download/"
+        "0.8.1/kittentts-0.8.1-py3-none-any.whl"
+    )
+    print()
+    print_info("Installing kittentts Python package (~25-80MB model downloaded on first use)...")
+    print()
+    try:
+        subprocess.run(
+            [sys.executable, "-m", "pip", "install", "-U", wheel_url, "soundfile", "--quiet"],
+            check=True, timeout=300,
+        )
+        print_success("kittentts installed successfully")
+        return True
+    except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e:
+        print_error(f"Failed to install kittentts: {e}")
+        print_info(f"Try manually: python -m pip install -U '{wheel_url}' soundfile")
+        return False
+
+
 def _setup_tts_provider(config: dict):
     """Interactive TTS provider selection with install flow for NeuTTS."""
     tts_config = config.get("tts", {})
@@ -916,6 +976,7 @@ def _setup_tts_provider(config: dict):
         "mistral": "Mistral Voxtral TTS",
         "gemini": "Google Gemini TTS",
         "neutts": "NeuTTS",
+        "kittentts": "KittenTTS",
     }
     current_label = provider_labels.get(current_provider, current_provider)
 
@@ -939,9 +1000,10 @@ def _setup_tts_provider(config: dict):
             "Mistral Voxtral TTS (multilingual, native Opus, needs API key)",
             "Google Gemini TTS (30 prebuilt voices, prompt-controllable, needs API key)",
             "NeuTTS (local on-device, free, ~300MB model download)",
+            "KittenTTS (local on-device, free, lightweight ~25-80MB ONNX)",
         ]
     )
-    providers.extend(["edge", "elevenlabs", "openai", "xai", "minimax", "mistral", "gemini", "neutts"])
+    providers.extend(["edge", "elevenlabs", "openai", "xai", "minimax", "mistral", "gemini", "neutts", "kittentts"])
     choices.append(f"Keep current ({current_label})")
     keep_current_idx = len(choices) - 1
     idx = prompt_choice("Select TTS provider:", choices, keep_current_idx)
@@ -962,7 +1024,6 @@ def _setup_tts_provider(config: dict):
     if selected == "neutts":
         # Check if already installed
         try:
-            import importlib.util
             already_installed = importlib.util.find_spec("neutts") is not None
         except Exception:
             already_installed = False
@@ -1061,6 +1122,29 @@ def _setup_tts_provider(config: dict):
                 print_warning("No API key provided. Falling back to Edge TTS.")
                 selected = "edge"
 
+    elif selected == "kittentts":
+        # Check if already installed
+        try:
+            import importlib.util
+            already_installed = importlib.util.find_spec("kittentts") is not None
+        except Exception:
+            already_installed = False
+
+        if already_installed:
+            print_success("KittenTTS is already installed")
+        else:
+            print()
+            print_info("KittenTTS is lightweight (~25-80MB, CPU-only, no API key required).")
+            print_info("Voices: Jasper, Bella, Luna, Bruno, Rosie, Hugo, Kiki, Leo")
+            print()
+            if prompt_yes_no("Install KittenTTS now?", True):
+                if not _install_kittentts_deps():
+                    print_warning("KittenTTS installation incomplete. Falling back to Edge TTS.")
+                    selected = "edge"
+            else:
+                print_info("Skipping install. Set tts.provider to 'kittentts' after installing manually.")
+                selected = "edge"
+
     # Save the selection
     if "tts" not in config:
         config["tts"] = {}
@@ -1082,8 +1166,6 @@ def setup_tts(config: dict):
 def setup_terminal_backend(config: dict):
     """Configure the terminal execution backend."""
     import platform as _platform
-    import shutil
-
     print_header("Terminal Backend")
     print_info("Choose where Hermes runs shell commands and code.")
     print_info("This affects tool execution, file access, and isolation.")
@@ -1460,7 +1542,9 @@ def setup_agent_settings(config: dict):
     )
     print_info("Maximum tool-calling iterations per conversation.")
     print_info("Higher = more complex tasks, but costs more tokens.")
-    print_info("Default is 90, which works for most tasks. Use 150+ for open exploration.")
+    print_info(
+        f"Press Enter to keep {current_max}. Use 90 for most tasks or 150+ for open exploration."
+    )
 
     max_iter_str = prompt("Max iterations", current_max)
     try:
@@ -2356,6 +2440,74 @@ def setup_tools(config: dict, first_install: bool = False):
 # =============================================================================
 
 
+def _model_section_has_credentials(config: dict) -> bool:
+    """Return True when any known inference provider has usable credentials.
+
+    Sources of truth:
+      * ``PROVIDER_REGISTRY`` in ``hermes_cli.auth`` — lists every supported
+        provider along with its ``api_key_env_vars``.
+      * ``active_provider`` in the auth store — covers OAuth device-code /
+        external-OAuth providers (Nous, Codex, Qwen, Gemini CLI, ...).
+      * The legacy OpenRouter aggregator env vars, which route generic
+        ``OPENAI_API_KEY`` / ``OPENROUTER_API_KEY`` values through OpenRouter.
+    """
+    try:
+        from hermes_cli.auth import get_active_provider
+        if get_active_provider():
+            return True
+    except Exception:
+        pass
+
+    try:
+        from hermes_cli.auth import PROVIDER_REGISTRY
+    except Exception:
+        PROVIDER_REGISTRY = {}  # type: ignore[assignment]
+
+    def _has_key(pconfig) -> bool:
+        for env_var in pconfig.api_key_env_vars:
+            # CLAUDE_CODE_OAUTH_TOKEN is set by Claude Code itself, not by
+            # the user — mirrors is_provider_explicitly_configured in auth.py.
+            if env_var == "CLAUDE_CODE_OAUTH_TOKEN":
+                continue
+            if get_env_value(env_var):
+                return True
+        return False
+
+    # Prefer the provider declared in config.yaml, avoids false positives
+    # from stray env vars (GH_TOKEN, etc.) when the user has already picked
+    # a different provider.
+    model_cfg = config.get("model") if isinstance(config, dict) else None
+    if isinstance(model_cfg, dict):
+        provider_id = (model_cfg.get("provider") or "").strip().lower()
+        if provider_id in PROVIDER_REGISTRY:
+            if _has_key(PROVIDER_REGISTRY[provider_id]):
+                return True
+        if provider_id == "openrouter":
+            for env_var in ("OPENROUTER_API_KEY", "OPENAI_API_KEY"):
+                if get_env_value(env_var):
+                    return True
+
+    # OpenRouter aggregator fallback (no provider declared in config).
+    for env_var in ("OPENROUTER_API_KEY", "OPENAI_API_KEY"):
+        if get_env_value(env_var):
+            return True
+
+    for pid, pconfig in PROVIDER_REGISTRY.items():
+        # Skip copilot in auto-detect: GH_TOKEN / GITHUB_TOKEN are
+        # commonly set for git tooling.  Mirrors resolve_provider in auth.py.
+        if pid == "copilot":
+            continue
+        if _has_key(pconfig):
+            return True
+    return False
+
+
+def _gateway_platform_short_label(label: str) -> str:
+    """Strip trailing parenthetical qualifiers from a gateway platform label."""
+    base = label.split("(", 1)[0].strip()
+    return base or label
+
+
 def _get_section_config_summary(config: dict, section_key: str) -> Optional[str]:
     """Return a short summary if a setup section is already configured, else None.
 
@@ -2364,20 +2516,7 @@ def _get_section_config_summary(config: dict, section_key: str) -> Optional[str]
     so that test patches on ``setup_mod.get_env_value`` take effect.
     """
     if section_key == "model":
-        has_key = bool(
-            get_env_value("OPENROUTER_API_KEY")
-            or get_env_value("OPENAI_API_KEY")
-            or get_env_value("ANTHROPIC_API_KEY")
-        )
-        if not has_key:
-            # Check for OAuth providers
-            try:
-                from hermes_cli.auth import get_active_provider
-                if get_active_provider():
-                    has_key = True
-            except Exception:
-                pass
-        if not has_key:
+        if not _model_section_has_credentials(config):
             return None
         model = config.get("model")
         if isinstance(model, str) and model.strip():
@@ -2395,37 +2534,11 @@ def _get_section_config_summary(config: dict, section_key: str) -> Optional[str]
         return f"max turns: {max_turns}"
 
     elif section_key == "gateway":
-        platforms = []
-        if get_env_value("TELEGRAM_BOT_TOKEN"):
-            platforms.append("Telegram")
-        if get_env_value("DISCORD_BOT_TOKEN"):
-            platforms.append("Discord")
-        if get_env_value("SLACK_BOT_TOKEN"):
-            platforms.append("Slack")
-        if get_env_value("SIGNAL_ACCOUNT"):
-            platforms.append("Signal")
-        if get_env_value("EMAIL_ADDRESS"):
-            platforms.append("Email")
-        if get_env_value("TWILIO_ACCOUNT_SID"):
-            platforms.append("SMS")
-        if get_env_value("MATRIX_ACCESS_TOKEN") or get_env_value("MATRIX_PASSWORD"):
-            platforms.append("Matrix")
-        if get_env_value("MATTERMOST_TOKEN"):
-            platforms.append("Mattermost")
-        if get_env_value("WHATSAPP_PHONE_NUMBER_ID"):
-            platforms.append("WhatsApp")
-        if get_env_value("DINGTALK_CLIENT_ID"):
-            platforms.append("DingTalk")
-        if get_env_value("FEISHU_APP_ID"):
-            platforms.append("Feishu")
-        if get_env_value("WECOM_BOT_ID"):
-            platforms.append("WeCom")
-        if get_env_value("WEIXIN_ACCOUNT_ID"):
-            platforms.append("Weixin")
-        if get_env_value("BLUEBUBBLES_SERVER_URL"):
-            platforms.append("BlueBubbles")
-        if get_env_value("WEBHOOK_ENABLED"):
-            platforms.append("Webhooks")
+        platforms = [
+            _gateway_platform_short_label(label)
+            for label, env_var, _ in _GATEWAY_PLATFORMS
+            if get_env_value(env_var)
+        ]
         if platforms:
             return ", ".join(platforms)
         return None  # No platforms configured — section must run
diff --git a/hermes_cli/status.py b/hermes_cli/status.py
index 540afc3037..8541f0a05f 100644
--- a/hermes_cli/status.py
+++ b/hermes_cli/status.py
@@ -122,6 +122,7 @@ def show_status(args):
         "OpenAI": "OPENAI_API_KEY",
         "Z.AI/GLM": "GLM_API_KEY",
         "Kimi": "KIMI_API_KEY",
+        "StepFun Step Plan": "STEPFUN_API_KEY",
         "MiniMax": "MINIMAX_API_KEY",
         "MiniMax-CN": "MINIMAX_CN_API_KEY",
         "Firecrawl": "FIRECRAWL_API_KEY",
@@ -252,6 +253,7 @@ def show_status(args):
     apikey_providers = {
         "Z.AI / GLM":       ("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"),
         "Kimi / Moonshot":  ("KIMI_API_KEY",),
+        "StepFun Step Plan": ("STEPFUN_API_KEY",),
         "MiniMax":          ("MINIMAX_API_KEY",),
         "MiniMax (China)":  ("MINIMAX_CN_API_KEY",),
     }
diff --git a/hermes_cli/timeouts.py b/hermes_cli/timeouts.py
new file mode 100644
index 0000000000..59db4012be
--- /dev/null
+++ b/hermes_cli/timeouts.py
@@ -0,0 +1,82 @@
+from __future__ import annotations
+
+
+def _coerce_timeout(raw: object) -> float | None:
+    try:
+        timeout = float(raw)
+    except (TypeError, ValueError):
+        return None
+    if timeout <= 0:
+        return None
+    return timeout
+
+
+def get_provider_request_timeout(
+    provider_id: str, model: str | None = None
+) -> float | None:
+    """Return a configured provider request timeout in seconds, if any."""
+    if not provider_id:
+        return None
+
+    try:
+        from hermes_cli.config import load_config
+    except ImportError:
+        return None
+
+    config = load_config()
+    providers = config.get("providers", {}) if isinstance(config, dict) else {}
+    provider_config = (
+        providers.get(provider_id, {}) if isinstance(providers, dict) else {}
+    )
+    if not isinstance(provider_config, dict):
+        return None
+
+    model_config = _get_model_config(provider_config, model)
+    if model_config is not None:
+        timeout = _coerce_timeout(model_config.get("timeout_seconds"))
+        if timeout is not None:
+            return timeout
+
+    return _coerce_timeout(provider_config.get("request_timeout_seconds"))
+
+
+def get_provider_stale_timeout(
+    provider_id: str, model: str | None = None
+) -> float | None:
+    """Return a configured non-stream stale timeout in seconds, if any."""
+    if not provider_id:
+        return None
+
+    try:
+        from hermes_cli.config import load_config
+    except ImportError:
+        return None
+
+    config = load_config()
+    providers = config.get("providers", {}) if isinstance(config, dict) else {}
+    provider_config = (
+        providers.get(provider_id, {}) if isinstance(providers, dict) else {}
+    )
+    if not isinstance(provider_config, dict):
+        return None
+
+    model_config = _get_model_config(provider_config, model)
+    if model_config is not None:
+        timeout = _coerce_timeout(model_config.get("stale_timeout_seconds"))
+        if timeout is not None:
+            return timeout
+
+    return _coerce_timeout(provider_config.get("stale_timeout_seconds"))
+
+
+def _get_model_config(
+    provider_config: dict[str, object], model: str | None
+) -> dict[str, object] | None:
+    if not model:
+        return None
+
+    models = provider_config.get("models", {})
+    model_config = models.get(model, {}) if isinstance(models, dict) else {}
+    if isinstance(model_config, dict):
+        return model_config
+    return None
diff --git a/hermes_cli/tips.py b/hermes_cli/tips.py
index aa6cb9729f..24acc15f53 100644
--- a/hermes_cli/tips.py
+++ b/hermes_cli/tips.py
@@ -127,7 +127,7 @@ TIPS = [
 
     # --- Tools & Capabilities ---
     "execute_code runs Python scripts that call Hermes tools programmatically — results stay out of context.",
-    "delegate_task spawns up to 3 concurrent sub-agents with isolated contexts for parallel work.",
+    "delegate_task spawns up to 3 concurrent sub-agents by default (configurable via delegation.max_concurrent_children) with isolated contexts for parallel work.",
     "web_extract works on PDF URLs — pass any PDF link and it converts to markdown.",
     "search_files is ripgrep-backed and faster than grep — use it instead of terminal grep.",
     "patch uses 9 fuzzy matching strategies so minor whitespace differences won't break edits.",
@@ -245,7 +245,7 @@ TIPS = [
     "Three plugin types: general (tools/hooks), memory providers, and context engines.",
     "hermes plugins install owner/repo installs plugins directly from GitHub.",
     "8 external memory providers available: Honcho, OpenViking, Mem0, Hindsight, and more.",
-    "Plugin hooks include pre_tool_call, post_tool_call, pre_llm_call, and post_llm_call.",
+    "Plugin hooks include pre/post_tool_call, pre/post_llm_call, and transform_terminal_output for output canonicalization.",
 
     # --- Miscellaneous ---
     "Prompt caching (Anthropic) reduces costs by reusing cached system prompt prefixes.",
@@ -323,7 +323,6 @@ TIPS = [
     "GPT-5 and Codex use 'developer' role instead of 'system' in the message format.",
     "Per-task auxiliary overrides: auxiliary.vision.provider, auxiliary.compression.model, etc. in config.yaml.",
     "The auxiliary client treats 'main' as a provider alias — resolves to your actual primary provider + model.",
-    "Smart routing can auto-route simple queries to a cheaper model — set smart_model_routing.enabled: true.",
     "hermes claw migrate --dry-run previews OpenClaw migration without writing anything.",
     "File paths pasted with quotes or escaped spaces are handled automatically — no manual cleanup needed.",
     "Slash commands never trigger the large-paste collapse — /command with big arguments works correctly.",
@@ -346,4 +345,3 @@ def get_random_tip(exclude_recent: int = 0) -> str:
     return random.choice(TIPS)
 
 
-
diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py
index 8e4bde883f..7a9a598f95 100644
--- a/hermes_cli/tools_config.py
+++ b/hermes_cli/tools_config.py
@@ -24,7 +24,8 @@ from hermes_cli.nous_subscription import (
     apply_nous_managed_defaults,
     get_nous_subscription_features,
 )
-from tools.tool_backend_helpers import managed_nous_tools_enabled
+from tools.tool_backend_helpers import fal_key_is_configured, managed_nous_tools_enabled
+from utils import base_url_hostname
 
 logger = logging.getLogger(__name__)
 
@@ -181,6 +182,14 @@ TOOL_CATEGORIES = {
                 ],
                 "tts_provider": "gemini",
             },
+            {
+                "name": "KittenTTS",
+                "badge": "local · free",
+                "tag": "Lightweight local ONNX TTS (~25MB), no API key",
+                "env_vars": [],
+                "tts_provider": "kittentts",
+                "post_setup": "kittentts",
+            },
         ],
     },
     "web": {
@@ -422,6 +431,36 @@ def _run_post_setup(post_setup_key: str):
             _print_warning("    Node.js not found. Install Camofox via Docker:")
             _print_info("      docker run -p 9377:9377 -e CAMOFOX_PORT=9377 jo-inc/camofox-browser")
 
+    elif post_setup_key == "kittentts":
+        try:
+            __import__("kittentts")
+            _print_success("    kittentts is already installed")
+            return
+        except ImportError:
+            pass
+        import subprocess
+        _print_info("    Installing kittentts (~25-80MB model, CPU-only)...")
+        wheel_url = (
+            "https://github.com/KittenML/KittenTTS/releases/download/"
+            "0.8.1/kittentts-0.8.1-py3-none-any.whl"
+        )
+        try:
+            result = subprocess.run(
+                [sys.executable, "-m", "pip", "install", "-U", wheel_url, "soundfile", "--quiet"],
+                capture_output=True, text=True, timeout=300,
+            )
+            if result.returncode == 0:
+                _print_success("    kittentts installed")
+                _print_info("    Voices: Jasper, Bella, Luna, Bruno, Rosie, Hugo, Kiki, Leo")
+                _print_info("    Models: KittenML/kitten-tts-nano-0.8-int8 (25MB), micro (41MB), mini (80MB)")
+            else:
+                _print_warning("    kittentts install failed:")
+                _print_info(f"      {result.stderr.strip()[:300]}")
+                _print_info(f"    Run manually: python -m pip install -U '{wheel_url}' soundfile")
+        except subprocess.TimeoutExpired:
+            _print_warning("    kittentts install timed out (>5min)")
+            _print_info(f"    Run manually: python -m pip install -U '{wheel_url}' soundfile")
+
     elif post_setup_key == "rl_training":
         try:
             __import__("tinker_atropos")
@@ -546,6 +585,10 @@ def _get_platform_tools(
             ts_tools = set(resolve_toolset(ts_key))
             if ts_tools and ts_tools.issubset(all_tool_names):
                 enabled_toolsets.add(ts_key)
+        default_off = set(_DEFAULT_OFF_TOOLSETS)
+        if platform in default_off:
+            default_off.remove(platform)
+        enabled_toolsets -= default_off
 
     # Plugin toolsets: enabled by default unless explicitly disabled.
     # A plugin toolset is "known" for a platform once `hermes tools`
@@ -804,6 +847,51 @@ def _configure_toolset(ts_key: str, config: dict):
         _configure_simple_requirements(ts_key)
 
 
+def _plugin_image_gen_providers() -> list[dict]:
+    """Build picker-row dicts from plugin-registered image gen providers.
+
+    Each returned dict looks like a regular ``TOOL_CATEGORIES`` provider
+    row but carries an ``image_gen_plugin_name`` marker so downstream
+    code (config writing, model picker) knows to route through the
+    plugin registry instead of the in-tree FAL backend.
+
+    FAL is skipped — it's already exposed by the hardcoded
+    ``TOOL_CATEGORIES["image_gen"]`` entries. When FAL gets ported to
+    a plugin in a follow-up PR, the hardcoded entries go away and this
+    function surfaces it alongside OpenAI automatically.
+    """
+    try:
+        from agent.image_gen_registry import list_providers
+        from hermes_cli.plugins import _ensure_plugins_discovered
+
+        _ensure_plugins_discovered()
+        providers = list_providers()
+    except Exception:
+        return []
+
+    rows: list[dict] = []
+    for provider in providers:
+        if getattr(provider, "name", None) == "fal":
+            # FAL has its own hardcoded rows today.
+            continue
+        try:
+            schema = provider.get_setup_schema()
+        except Exception:
+            continue
+        if not isinstance(schema, dict):
+            continue
+        rows.append(
+            {
+                "name": schema.get("name", provider.display_name),
+                "badge": schema.get("badge", ""),
+                "tag": schema.get("tag", ""),
+                "env_vars": schema.get("env_vars", []),
+                "image_gen_plugin_name": provider.name,
+            }
+        )
+    return rows
+
+
 def _visible_providers(cat: dict, config: dict) -> list[dict]:
     """Return provider entries visible for the current auth/config state."""
     features = get_nous_subscription_features(config)
@@ -814,6 +902,12 @@ def _visible_providers(cat: dict, config: dict) -> list[dict]:
         if provider.get("requires_nous_auth") and not features.nous_auth_present:
             continue
         visible.append(provider)
+
+    # Inject plugin-registered image_gen backends (OpenAI today, more
+    # later) so the picker lists them alongside FAL / Nous Subscription.
+    if cat.get("name") == "Image Generation":
+        visible.extend(_plugin_image_gen_providers())
+
     return visible
 
 
@@ -833,7 +927,24 @@ def _toolset_needs_configuration_prompt(ts_key: str, config: dict) -> bool:
         browser_cfg = config.get("browser", {})
         return not isinstance(browser_cfg, dict) or "cloud_provider" not in browser_cfg
     if ts_key == "image_gen":
-        return not get_env_value("FAL_KEY")
+        # Satisfied when the in-tree FAL backend is configured OR any
+        # plugin-registered image gen provider is available.
+        if fal_key_is_configured():
+            return False
+        try:
+            from agent.image_gen_registry import list_providers
+            from hermes_cli.plugins import _ensure_plugins_discovered
+
+            _ensure_plugins_discovered()
+            for provider in list_providers():
+                try:
+                    if provider.is_available():
+                        return False
+                except Exception:
+                    continue
+        except Exception:
+            pass
+        return True
 
     return not _toolset_has_keys(ts_key, config)
 
@@ -1052,6 +1163,88 @@ def _configure_imagegen_model(backend_name: str, config: dict) -> None:
     _print_success(f"  Model set to: {chosen}")
 
 
+def _plugin_image_gen_catalog(plugin_name: str):
+    """Return ``(catalog_dict, default_model_id)`` for a plugin provider.
+
+    ``catalog_dict`` is shaped like the legacy ``FAL_MODELS`` table —
+    ``{model_id: {"display", "speed", "strengths", "price", ...}}`` —
+    so the existing picker code paths work without change. Returns
+    ``({}, None)`` if the provider isn't registered or has no models.
+    """
+    try:
+        from agent.image_gen_registry import get_provider
+        from hermes_cli.plugins import _ensure_plugins_discovered
+
+        _ensure_plugins_discovered()
+        provider = get_provider(plugin_name)
+    except Exception:
+        return {}, None
+    if provider is None:
+        return {}, None
+    try:
+        models = provider.list_models() or []
+        default = provider.default_model()
+    except Exception:
+        return {}, None
+    catalog = {m["id"]: m for m in models if isinstance(m, dict) and "id" in m}
+    return catalog, default
+
+
+def _configure_imagegen_model_for_plugin(plugin_name: str, config: dict) -> None:
+    """Prompt the user to pick a model for a plugin-registered backend.
+
+    Writes selection to ``image_gen.model``. Mirrors
+    :func:`_configure_imagegen_model` but sources its catalog from the
+    plugin registry instead of :data:`IMAGEGEN_BACKENDS`.
+    """
+    catalog, default_model = _plugin_image_gen_catalog(plugin_name)
+    if not catalog:
+        return
+
+    cur_cfg = config.setdefault("image_gen", {})
+    if not isinstance(cur_cfg, dict):
+        cur_cfg = {}
+        config["image_gen"] = cur_cfg
+    current_model = cur_cfg.get("model") or default_model
+    if current_model not in catalog:
+        current_model = default_model
+
+    model_ids = list(catalog.keys())
+    ordered = [current_model] + [m for m in model_ids if m != current_model]
+
+    widths = {
+        "model": max(len(m) for m in model_ids),
+        "speed": max((len(catalog[m].get("speed", "")) for m in model_ids), default=6),
+        "strengths": max((len(catalog[m].get("strengths", "")) for m in model_ids), default=0),
+    }
+
+    print()
+    header = (
+        f"  {'Model':<{widths['model']}}  "
+        f"{'Speed':<{widths['speed']}}  "
+        f"{'Strengths':<{widths['strengths']}}  "
+        f"Price"
+    )
+    print(color(header, Colors.CYAN))
+
+    rows = []
+    for mid in ordered:
+        row = _format_imagegen_model_row(mid, catalog[mid], widths)
+        if mid == current_model:
+            row += "  ← currently in use"
+        rows.append(row)
+
+    idx = _prompt_choice(
+        f"  Choose {plugin_name} model:",
+        rows,
+        default=0,
+    )
+
+    chosen = ordered[idx]
+    cur_cfg["model"] = chosen
+    _print_success(f"  Model set to: {chosen}")
+
+
 def _configure_provider(provider: dict, config: dict):
     """Configure a single provider - prompt for API keys and set config."""
     env_vars = provider.get("env_vars", [])
@@ -1108,10 +1301,28 @@ def _configure_provider(provider: dict, config: dict):
         _print_success(f"  {provider['name']} - no configuration needed!")
         if managed_feature:
             _print_info("  Requests for this tool will be billed to your Nous subscription.")
+        # Plugin-registered image_gen provider: write image_gen.provider
+        # and route model selection to the plugin's own catalog.
+        plugin_name = provider.get("image_gen_plugin_name")
+        if plugin_name:
+            img_cfg = config.setdefault("image_gen", {})
+            if not isinstance(img_cfg, dict):
+                img_cfg = {}
+                config["image_gen"] = img_cfg
+            img_cfg["provider"] = plugin_name
+            _print_success(f"  image_gen.provider set to: {plugin_name}")
+            _configure_imagegen_model_for_plugin(plugin_name, config)
+            return
         # Imagegen backends prompt for model selection after backend pick.
         backend = provider.get("imagegen_backend")
         if backend:
             _configure_imagegen_model(backend, config)
+            # In-tree FAL is the only non-plugin backend today. Keep
+            # image_gen.provider clear so the dispatch shim falls through
+            # to the legacy FAL path.
+            img_cfg = config.setdefault("image_gen", {})
+            if isinstance(img_cfg, dict) and img_cfg.get("provider") not in (None, "", "fal"):
+                img_cfg["provider"] = "fal"
         return
 
     # Prompt for each required env var
@@ -1146,10 +1357,23 @@ def _configure_provider(provider: dict, config: dict):
 
     if all_configured:
         _print_success(f"  {provider['name']} configured!")
+        plugin_name = provider.get("image_gen_plugin_name")
+        if plugin_name:
+            img_cfg = config.setdefault("image_gen", {})
+            if not isinstance(img_cfg, dict):
+                img_cfg = {}
+                config["image_gen"] = img_cfg
+            img_cfg["provider"] = plugin_name
+            _print_success(f"  image_gen.provider set to: {plugin_name}")
+            _configure_imagegen_model_for_plugin(plugin_name, config)
+            return
         # Imagegen backends prompt for model selection after env vars are in.
         backend = provider.get("imagegen_backend")
         if backend:
             _configure_imagegen_model(backend, config)
+            img_cfg = config.setdefault("image_gen", {})
+            if isinstance(img_cfg, dict) and img_cfg.get("provider") not in (None, "", "fal"):
+                img_cfg["provider"] = "fal"
 
 
 def _configure_simple_requirements(ts_key: str):
@@ -1175,17 +1399,17 @@ def _configure_simple_requirements(ts_key: str):
                 _print_warning("    Skipped")
         elif idx == 1:
             base_url = _prompt("    OPENAI_BASE_URL (blank for OpenAI)").strip() or "https://api.openai.com/v1"
-            key_label = "    OPENAI_API_KEY" if "api.openai.com" in base_url.lower() else "    API key"
+            is_native_openai = base_url_hostname(base_url) == "api.openai.com"
+            key_label = "    OPENAI_API_KEY" if is_native_openai else "    API key"
             api_key = _prompt(key_label, password=True)
             if api_key and api_key.strip():
                 save_env_value("OPENAI_API_KEY", api_key.strip())
                 # Save vision base URL to config (not .env — only secrets go there)
-                from hermes_cli.config import load_config, save_config
                 _cfg = load_config()
                 _aux = _cfg.setdefault("auxiliary", {}).setdefault("vision", {})
                 _aux["base_url"] = base_url
                 save_config(_cfg)
-                if "api.openai.com" in base_url.lower():
+                if is_native_openai:
                     save_env_value("AUXILIARY_VISION_MODEL", "gpt-4o-mini")
                 _print_success("    Saved")
             else:
diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py
index 110b81e4b5..9cdfdb37df 100644
--- a/hermes_cli/web_server.py
+++ b/hermes_cli/web_server.py
@@ -16,6 +16,7 @@ import json
 import logging
 import os
 import secrets
+import subprocess
 import sys
 import threading
 import time
@@ -114,6 +115,91 @@ def _require_token(request: Request) -> None:
         raise HTTPException(status_code=401, detail="Unauthorized")
 
 
+# Accepted Host header values for loopback binds. DNS rebinding attacks
+# point a victim browser at an attacker-controlled hostname (evil.test)
+# which resolves to 127.0.0.1 after a TTL flip — bypassing same-origin
+# checks because the browser now considers evil.test and our dashboard
+# "same origin". Validating the Host header at the app layer rejects any
+# request whose Host isn't one we bound for. See GHSA-ppp5-vxwm-4cf7.
+_LOOPBACK_HOST_VALUES: frozenset = frozenset({
+    "localhost", "127.0.0.1", "::1",
+})
+
+
+def _is_accepted_host(host_header: str, bound_host: str) -> bool:
+    """True if the Host header targets the interface we bound to.
+
+    Accepts:
+    - Exact bound host (with or without port suffix)
+    - Loopback aliases when bound to loopback
+    - Any host when bound to 0.0.0.0 (explicit opt-in to non-loopback,
+      no protection possible at this layer)
+    """
+    if not host_header:
+        return False
+    # Strip port suffix. IPv6 addresses use bracket notation:
+    #   [::1]         — no port
+    #   [::1]:9119    — with port
+    # Plain hosts/v4:
+    #   localhost:9119
+    #   127.0.0.1:9119
+    h = host_header.strip()
+    if h.startswith("["):
+        # IPv6 bracketed — port (if any) follows "]:"
+        close = h.find("]")
+        if close != -1:
+            host_only = h[1:close]  # strip brackets
+        else:
+            host_only = h.strip("[]")
+    else:
+        host_only = h.rsplit(":", 1)[0] if ":" in h else h
+    host_only = host_only.lower()
+
+    # 0.0.0.0 bind means operator explicitly opted into all-interfaces
+    # (requires --insecure per web_server.start_server). No Host-layer
+    # defence can protect that mode; rely on operator network controls.
+    if bound_host in ("0.0.0.0", "::"):
+        return True
+
+    # Loopback bind: accept the loopback names
+    bound_lc = bound_host.lower()
+    if bound_lc in _LOOPBACK_HOST_VALUES:
+        return host_only in _LOOPBACK_HOST_VALUES
+
+    # Explicit non-loopback bind: require exact host match
+    return host_only == bound_lc
+
+
+@app.middleware("http")
+async def host_header_middleware(request: Request, call_next):
+    """Reject requests whose Host header doesn't match the bound interface.
+
+    Defends against DNS rebinding: a victim browser on a localhost
+    dashboard is tricked into fetching from an attacker hostname that
+    TTL-flips to 127.0.0.1. CORS and same-origin checks don't help —
+    the browser now treats the attacker origin as same-origin with the
+    dashboard. Host-header validation at the app layer catches it.
+
+    See GHSA-ppp5-vxwm-4cf7.
+    """
+    # Store the bound host on app.state so this middleware can read it —
+    # set by start_server() at listen time.
+    bound_host = getattr(app.state, "bound_host", None)
+    if bound_host:
+        host_header = request.headers.get("host", "")
+        if not _is_accepted_host(host_header, bound_host):
+            return JSONResponse(
+                status_code=400,
+                content={
+                    "detail": (
+                        "Invalid Host header. Dashboard requests must use "
+                        "the hostname the server was bound to."
+                    ),
+                },
+            )
+    return await call_next(request)
+
+
 @app.middleware("http")
 async def auth_middleware(request: Request, call_next):
     """Require the session token on all /api/ routes except the public list."""
@@ -232,8 +318,8 @@ _CATEGORY_MERGE: Dict[str, str] = {
     "checkpoints": "agent",
     "approvals": "security",
     "human_delay": "display",
-    "smart_model_routing": "agent",
     "dashboard": "display",
+    "code_execution": "agent",
 }
 
 # Display order for tabs — unlisted categories sort alphabetically after these.
@@ -476,6 +562,138 @@ async def get_status():
     }
 
 
+# ---------------------------------------------------------------------------
+# Gateway + update actions (invoked from the Status page).
+#
+# Both commands are spawned as detached subprocesses so the HTTP request
+# returns immediately.  stdin is closed (``DEVNULL``) so any stray ``input()``
+# calls fail fast with EOF rather than hanging forever.  stdout/stderr are
+# streamed to a per-action log file under ``~/.hermes/logs/<action>.log`` so
+# the dashboard can tail them back to the user.
+# ---------------------------------------------------------------------------
+
+_ACTION_LOG_DIR: Path = get_hermes_home() / "logs"
+
+# Short ``name`` (from the URL) → absolute log file path.
+_ACTION_LOG_FILES: Dict[str, str] = {
+    "gateway-restart": "gateway-restart.log",
+    "hermes-update": "hermes-update.log",
+}
+
+# ``name`` → most recently spawned Popen handle.  Used so ``status`` can
+# report liveness and exit code without shelling out to ``ps``.
+_ACTION_PROCS: Dict[str, subprocess.Popen] = {}
+
+
+def _spawn_hermes_action(subcommand: List[str], name: str) -> subprocess.Popen:
+    """Spawn ``hermes <subcommand>`` detached and record the Popen handle.
+
+    Uses the running interpreter's ``hermes_cli.main`` module so the action
+    inherits the same venv/PYTHONPATH the web server is using.
+    """
+    log_file_name = _ACTION_LOG_FILES[name]
+    _ACTION_LOG_DIR.mkdir(parents=True, exist_ok=True)
+    log_path = _ACTION_LOG_DIR / log_file_name
+    log_file = open(log_path, "ab", buffering=0)
+    log_file.write(
+        f"\n=== {name} started {time.strftime('%Y-%m-%d %H:%M:%S')} ===\n".encode()
+    )
+
+    cmd = [sys.executable, "-m", "hermes_cli.main", *subcommand]
+
+    popen_kwargs: Dict[str, Any] = {
+        "cwd": str(PROJECT_ROOT),
+        "stdin": subprocess.DEVNULL,
+        "stdout": log_file,
+        "stderr": subprocess.STDOUT,
+        "env": {**os.environ, "HERMES_NONINTERACTIVE": "1"},
+    }
+    if sys.platform == "win32":
+        popen_kwargs["creationflags"] = (
+            subprocess.CREATE_NEW_PROCESS_GROUP  # type: ignore[attr-defined]
+            | getattr(subprocess, "DETACHED_PROCESS", 0)
+        )
+    else:
+        popen_kwargs["start_new_session"] = True
+
+    proc = subprocess.Popen(cmd, **popen_kwargs)
+    _ACTION_PROCS[name] = proc
+    return proc
+
+
+def _tail_lines(path: Path, n: int) -> List[str]:
+    """Return the last ``n`` lines of ``path``.  Reads the whole file — fine
+    for our small per-action logs.  Binary-decoded with ``errors='replace'``
+    so log corruption doesn't 500 the endpoint."""
+    if not path.exists():
+        return []
+    try:
+        text = path.read_text(errors="replace")
+    except OSError:
+        return []
+    lines = text.splitlines()
+    return lines[-n:] if n > 0 else lines
+
+
+@app.post("/api/gateway/restart")
+async def restart_gateway():
+    """Kick off a ``hermes gateway restart`` in the background."""
+    try:
+        proc = _spawn_hermes_action(["gateway", "restart"], "gateway-restart")
+    except Exception as exc:
+        _log.exception("Failed to spawn gateway restart")
+        raise HTTPException(status_code=500, detail=f"Failed to restart gateway: {exc}")
+    return {
+        "ok": True,
+        "pid": proc.pid,
+        "name": "gateway-restart",
+    }
+
+
+@app.post("/api/hermes/update")
+async def update_hermes():
+    """Kick off ``hermes update`` in the background."""
+    try:
+        proc = _spawn_hermes_action(["update"], "hermes-update")
+    except Exception as exc:
+        _log.exception("Failed to spawn hermes update")
+        raise HTTPException(status_code=500, detail=f"Failed to start update: {exc}")
+    return {
+        "ok": True,
+        "pid": proc.pid,
+        "name": "hermes-update",
+    }
+
+
+@app.get("/api/actions/{name}/status")
+async def get_action_status(name: str, lines: int = 200):
+    """Tail an action log and report whether the process is still running."""
+    log_file_name = _ACTION_LOG_FILES.get(name)
+    if log_file_name is None:
+        raise HTTPException(status_code=404, detail=f"Unknown action: {name}")
+
+    log_path = _ACTION_LOG_DIR / log_file_name
+    tail = _tail_lines(log_path, min(max(lines, 1), 2000))
+
+    proc = _ACTION_PROCS.get(name)
+    if proc is None:
+        running = False
+        exit_code: Optional[int] = None
+        pid: Optional[int] = None
+    else:
+        exit_code = proc.poll()
+        running = exit_code is None
+        pid = proc.pid
+
+    return {
+        "name": name,
+        "running": running,
+        "exit_code": exit_code,
+        "pid": pid,
+        "lines": tail,
+    }
+
+
 @app.get("/api/sessions")
 async def get_sessions(limit: int = 20, offset: int = 0):
     try:
@@ -1958,6 +2176,8 @@ async def update_config_raw(body: RawConfigUpdate):
 @app.get("/api/analytics/usage")
 async def get_usage_analytics(days: int = 30):
     from hermes_state import SessionDB
+    from agent.insights import InsightsEngine
+
     db = SessionDB()
     try:
         cutoff = time.time() - (days * 86400)
@@ -1969,7 +2189,8 @@ async def get_usage_analytics(days: int = 30):
                    SUM(reasoning_tokens) as reasoning_tokens,
                    COALESCE(SUM(estimated_cost_usd), 0) as estimated_cost,
                    COALESCE(SUM(actual_cost_usd), 0) as actual_cost,
-                   COUNT(*) as sessions
+                   COUNT(*) as sessions,
+                   SUM(COALESCE(api_call_count, 0)) as api_calls
             FROM sessions WHERE started_at > ?
             GROUP BY day ORDER BY day
         """, (cutoff,))
@@ -1980,7 +2201,8 @@ async def get_usage_analytics(days: int = 30):
                    SUM(input_tokens) as input_tokens,
                    SUM(output_tokens) as output_tokens,
                    COALESCE(SUM(estimated_cost_usd), 0) as estimated_cost,
-                   COUNT(*) as sessions
+                   COUNT(*) as sessions,
+                   SUM(COALESCE(api_call_count, 0)) as api_calls
             FROM sessions WHERE started_at > ? AND model IS NOT NULL
             GROUP BY model ORDER BY SUM(input_tokens) + SUM(output_tokens) DESC
         """, (cutoff,))
@@ -1993,12 +2215,29 @@ async def get_usage_analytics(days: int = 30):
                    SUM(reasoning_tokens) as total_reasoning,
                    COALESCE(SUM(estimated_cost_usd), 0) as total_estimated_cost,
                    COALESCE(SUM(actual_cost_usd), 0) as total_actual_cost,
-                   COUNT(*) as total_sessions
+                   COUNT(*) as total_sessions,
+                   SUM(COALESCE(api_call_count, 0)) as total_api_calls
             FROM sessions WHERE started_at > ?
         """, (cutoff,))
         totals = dict(cur3.fetchone())
+        insights_report = InsightsEngine(db).generate(days=days)
+        skills = insights_report.get("skills", {
+            "summary": {
+                "total_skill_loads": 0,
+                "total_skill_edits": 0,
+                "total_skill_actions": 0,
+                "distinct_skills_used": 0,
+            },
+            "top_skills": [],
+        })
 
-        return {"daily": daily, "by_model": by_model, "totals": totals, "period_days": days}
+        return {
+            "daily": daily,
+            "by_model": by_model,
+            "totals": totals,
+            "period_days": days,
+            "skills": skills,
+        }
     finally:
         db.close()
 
@@ -2305,13 +2544,15 @@ def start_server(
             "authentication. Only use on trusted networks.", host,
         )
 
+    # Record the bound host so host_header_middleware can validate incoming
+    # Host headers against it. Defends against DNS rebinding (GHSA-ppp5-vxwm-4cf7).
+    app.state.bound_host = host
+
     if open_browser:
-        import threading
         import webbrowser
 
         def _open():
-            import time as _t
-            _t.sleep(1.0)
+            time.sleep(1.0)
             webbrowser.open(f"http://{host}:{port}")
 
         threading.Thread(target=_open, daemon=True).start()
diff --git a/hermes_cli/webhook.py b/hermes_cli/webhook.py
index 8ff135e29e..378f11b4a7 100644
--- a/hermes_cli/webhook.py
+++ b/hermes_cli/webhook.py
@@ -155,6 +155,15 @@ def _cmd_subscribe(args):
         "created_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
     }
 
+    if getattr(args, "deliver_only", False):
+        if route["deliver"] == "log":
+            print(
+                "Error: --deliver-only requires --deliver to be a real target "
+                "(telegram, discord, slack, github_comment, etc.) — not 'log'."
+            )
+            return
+        route["deliver_only"] = True
+
     if args.deliver_chat_id:
         route["deliver_extra"] = {"chat_id": args.deliver_chat_id}
 
@@ -172,9 +181,12 @@ def _cmd_subscribe(args):
     else:
         print("  Events: (all)")
     print(f"  Deliver: {route['deliver']}")
+    if route.get("deliver_only"):
+        print("  Mode: direct delivery (no agent, zero LLM cost)")
     if route.get("prompt"):
         prompt_preview = route["prompt"][:80] + ("..." if len(route["prompt"]) > 80 else "")
-        print(f"  Prompt: {prompt_preview}")
+        label = "Message" if route.get("deliver_only") else "Prompt"
+        print(f"  {label}: {prompt_preview}")
     print(f"\n  Configure your service to POST to the URL above.")
     print(f"  Use the secret for HMAC-SHA256 signature validation.")
     print(f"  The gateway must be running to receive events (hermes gateway run).\n")
@@ -192,6 +204,8 @@ def _cmd_list(args):
     for name, route in subs.items():
         events = ", ".join(route.get("events", [])) or "(all)"
         deliver = route.get("deliver", "log")
+        if route.get("deliver_only"):
+            deliver = f"{deliver} (direct — no agent)"
         desc = route.get("description", "")
         print(f"  ◆ {name}")
         if desc:
diff --git a/hermes_state.py b/hermes_state.py
index af97f7fbd8..0ea9815b5a 100644
--- a/hermes_state.py
+++ b/hermes_state.py
@@ -31,7 +31,7 @@ T = TypeVar("T")
 
 DEFAULT_DB_PATH = get_hermes_home() / "state.db"
 
-SCHEMA_VERSION = 6
+SCHEMA_VERSION = 8
 
 SCHEMA_SQL = """
 CREATE TABLE IF NOT EXISTS schema_version (
@@ -65,6 +65,7 @@ CREATE TABLE IF NOT EXISTS sessions (
     cost_source TEXT,
     pricing_version TEXT,
     title TEXT,
+    api_call_count INTEGER DEFAULT 0,
     FOREIGN KEY (parent_session_id) REFERENCES sessions(id)
 );
 
@@ -80,10 +81,16 @@ CREATE TABLE IF NOT EXISTS messages (
     token_count INTEGER,
     finish_reason TEXT,
     reasoning TEXT,
+    reasoning_content TEXT,
     reasoning_details TEXT,
     codex_reasoning_items TEXT
 );
 
+CREATE TABLE IF NOT EXISTS state_meta (
+    key TEXT PRIMARY KEY,
+    value TEXT
+);
+
 CREATE INDEX IF NOT EXISTS idx_sessions_source ON sessions(source);
 CREATE INDEX IF NOT EXISTS idx_sessions_parent ON sessions(parent_session_id);
 CREATE INDEX IF NOT EXISTS idx_sessions_started ON sessions(started_at DESC);
@@ -329,6 +336,26 @@ class SessionDB:
                     except sqlite3.OperationalError:
                         pass  # Column already exists
                 cursor.execute("UPDATE schema_version SET version = 6")
+            if current_version < 7:
+                # v7: preserve provider-native reasoning_content separately from
+                # normalized reasoning text. Kimi/Moonshot replay can require
+                # this field on assistant tool-call messages when thinking is on.
+                try:
+                    cursor.execute('ALTER TABLE messages ADD COLUMN "reasoning_content" TEXT')
+                except sqlite3.OperationalError:
+                    pass  # Column already exists
+                cursor.execute("UPDATE schema_version SET version = 7")
+            if current_version < 8:
+                # v8: add api_call_count column to sessions — tracks the number
+                # of individual LLM API calls made within a session (as opposed
+                # to the session count itself).
+                try:
+                    cursor.execute(
+                        'ALTER TABLE sessions ADD COLUMN "api_call_count" INTEGER DEFAULT 0'
+                    )
+                except sqlite3.OperationalError:
+                    pass  # Column already exists
+                cursor.execute("UPDATE schema_version SET version = 8")
 
         # Unique title index — always ensure it exists (safe to run after migrations
         # since the title column is guaranteed to exist at this point)
@@ -383,10 +410,19 @@ class SessionDB:
         return session_id
 
     def end_session(self, session_id: str, end_reason: str) -> None:
-        """Mark a session as ended."""
+        """Mark a session as ended.
+
+        No-ops when the session is already ended. The first end_reason wins:
+        compression-split sessions must keep their ``end_reason = 'compression'``
+        record even if a later stale ``end_session()`` call (e.g. from a
+        desynced CLI session_id after ``/resume`` or ``/branch``) targets them
+        with a different reason. Use ``reopen_session()`` first if you
+        intentionally need to re-end a closed session with a new reason.
+        """
         def _do(conn):
             conn.execute(
-                "UPDATE sessions SET ended_at = ?, end_reason = ? WHERE id = ?",
+                "UPDATE sessions SET ended_at = ?, end_reason = ? "
+                "WHERE id = ? AND ended_at IS NULL",
                 (time.time(), end_reason, session_id),
             )
         self._execute_write(_do)
@@ -426,6 +462,7 @@ class SessionDB:
         billing_provider: Optional[str] = None,
         billing_base_url: Optional[str] = None,
         billing_mode: Optional[str] = None,
+        api_call_count: int = 0,
         absolute: bool = False,
     ) -> None:
         """Update token counters and backfill model if not already set.
@@ -455,7 +492,8 @@ class SessionDB:
                    billing_provider = COALESCE(billing_provider, ?),
                    billing_base_url = COALESCE(billing_base_url, ?),
                    billing_mode = COALESCE(billing_mode, ?),
-                   model = COALESCE(model, ?)
+                   model = COALESCE(model, ?),
+                   api_call_count = ?
                    WHERE id = ?"""
         else:
             sql = """UPDATE sessions SET
@@ -475,7 +513,8 @@ class SessionDB:
                    billing_provider = COALESCE(billing_provider, ?),
                    billing_base_url = COALESCE(billing_base_url, ?),
                    billing_mode = COALESCE(billing_mode, ?),
-                   model = COALESCE(model, ?)
+                   model = COALESCE(model, ?),
+                   api_call_count = COALESCE(api_call_count, 0) + ?
                    WHERE id = ?"""
         params = (
             input_tokens,
@@ -493,6 +532,7 @@ class SessionDB:
             billing_base_url,
             billing_mode,
             model,
+            api_call_count,
             session_id,
         )
         def _do(conn):
@@ -714,6 +754,42 @@ class SessionDB:
 
         return f"{base} #{max_num + 1}"
 
+    def get_compression_tip(self, session_id: str) -> Optional[str]:
+        """Walk the compression-continuation chain forward and return the tip.
+
+        A compression continuation is a child session where:
+        1. The parent's ``end_reason = 'compression'``
+        2. The child was created AFTER the parent was ended (started_at >= ended_at)
+
+        The second condition distinguishes compression continuations from
+        delegate subagents or branch children, which can also have a
+        ``parent_session_id`` but were created while the parent was still live.
+
+        Returns the session_id of the latest continuation in the chain, or the
+        input ``session_id`` if it isn't part of a compression chain (or if the
+        input itself doesn't exist).
+        """
+        current = session_id
+        # Bound the walk defensively — compression chains this deep are
+        # pathological and shouldn't happen in practice. 100 = plenty.
+        for _ in range(100):
+            with self._lock:
+                cursor = self._conn.execute(
+                    "SELECT id FROM sessions "
+                    "WHERE parent_session_id = ? "
+                    "  AND started_at >= ("
+                    "      SELECT ended_at FROM sessions "
+                    "      WHERE id = ? AND end_reason = 'compression'"
+                    "  ) "
+                    "ORDER BY started_at DESC LIMIT 1",
+                    (current, current),
+                )
+                row = cursor.fetchone()
+            if row is None:
+                return current
+            current = row["id"]
+        return current
+
     def list_sessions_rich(
         self,
         source: str = None,
@@ -721,6 +797,7 @@ class SessionDB:
         limit: int = 20,
         offset: int = 0,
         include_children: bool = False,
+        project_compression_tips: bool = True,
     ) -> List[Dict[str, Any]]:
         """List sessions with preview (first user message) and last active timestamp.
 
@@ -732,6 +809,14 @@ class SessionDB:
 
         By default, child sessions (subagent runs, compression continuations)
         are excluded.  Pass ``include_children=True`` to include them.
+
+        With ``project_compression_tips=True`` (default), sessions that are
+        roots of compression chains are projected forward to their latest
+        continuation — one logical conversation = one list entry, showing the
+        live continuation's id/message_count/title/last_active. This prevents
+        compressed continuations from being invisible to users while keeping
+        delegate subagents and branches hidden. Pass ``False`` to return the
+        raw root rows (useful for admin/debug UIs).
         """
         where_clauses = []
         params = []
@@ -782,8 +867,77 @@ class SessionDB:
                 s["preview"] = ""
             sessions.append(s)
 
+        # Project compression roots forward to their tips. Each row whose
+        # end_reason is 'compression' has a continuation child; replace the
+        # surfaced fields (id, message_count, title, last_active, ended_at,
+        # end_reason, preview) with the tip's values so the list entry acts
+        # as the live conversation. Keep the root's started_at to preserve
+        # chronological ordering by original conversation start.
+        if project_compression_tips and not include_children:
+            projected = []
+            for s in sessions:
+                if s.get("end_reason") != "compression":
+                    projected.append(s)
+                    continue
+                tip_id = self.get_compression_tip(s["id"])
+                if tip_id == s["id"]:
+                    projected.append(s)
+                    continue
+                tip_row = self._get_session_rich_row(tip_id)
+                if not tip_row:
+                    projected.append(s)
+                    continue
+                # Preserve the root's started_at for stable sort order, but
+                # surface the tip's identity and activity data.
+                merged = dict(s)
+                for key in (
+                    "id", "ended_at", "end_reason", "message_count",
+                    "tool_call_count", "title", "last_active", "preview",
+                    "model", "system_prompt",
+                ):
+                    if key in tip_row:
+                        merged[key] = tip_row[key]
+                merged["_lineage_root_id"] = s["id"]
+                projected.append(merged)
+            sessions = projected
+
         return sessions
 
+    def _get_session_rich_row(self, session_id: str) -> Optional[Dict[str, Any]]:
+        """Fetch a single session with the same enriched columns as
+        ``list_sessions_rich`` (preview + last_active). Returns None if the
+        session doesn't exist.
+        """
+        query = """
+            SELECT s.*,
+                COALESCE(
+                    (SELECT SUBSTR(REPLACE(REPLACE(m.content, X'0A', ' '), X'0D', ' '), 1, 63)
+                     FROM messages m
+                     WHERE m.session_id = s.id AND m.role = 'user' AND m.content IS NOT NULL
+                     ORDER BY m.timestamp, m.id LIMIT 1),
+                    ''
+                ) AS _preview_raw,
+                COALESCE(
+                    (SELECT MAX(m2.timestamp) FROM messages m2 WHERE m2.session_id = s.id),
+                    s.started_at
+                ) AS last_active
+            FROM sessions s
+            WHERE s.id = ?
+        """
+        with self._lock:
+            cursor = self._conn.execute(query, (session_id,))
+            row = cursor.fetchone()
+        if not row:
+            return None
+        s = dict(row)
+        raw = s.pop("_preview_raw", "").strip()
+        if raw:
+            text = raw[:60]
+            s["preview"] = text + ("..." if len(raw) > 60 else "")
+        else:
+            s["preview"] = ""
+        return s
+
     # =========================================================================
     # Message storage
     # =========================================================================
@@ -799,6 +953,7 @@ class SessionDB:
         token_count: int = None,
         finish_reason: str = None,
         reasoning: str = None,
+        reasoning_content: str = None,
         reasoning_details: Any = None,
         codex_reasoning_items: Any = None,
     ) -> int:
@@ -828,8 +983,8 @@ class SessionDB:
             cursor = conn.execute(
                 """INSERT INTO messages (session_id, role, content, tool_call_id,
                    tool_calls, tool_name, timestamp, token_count, finish_reason,
-                   reasoning, reasoning_details, codex_reasoning_items)
-                   VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
+                   reasoning, reasoning_content, reasoning_details, codex_reasoning_items)
+                   VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
                 (
                     session_id,
                     role,
@@ -841,6 +996,7 @@ class SessionDB:
                     token_count,
                     finish_reason,
                     reasoning,
+                    reasoning_content,
                     reasoning_details_json,
                     codex_items_json,
                 ),
@@ -891,7 +1047,7 @@ class SessionDB:
         with self._lock:
             cursor = self._conn.execute(
                 "SELECT role, content, tool_call_id, tool_calls, tool_name, "
-                "reasoning, reasoning_details, codex_reasoning_items "
+                "reasoning, reasoning_content, reasoning_details, codex_reasoning_items "
                 "FROM messages WHERE session_id = ? ORDER BY timestamp, id",
                 (session_id,),
             )
@@ -915,6 +1071,8 @@ class SessionDB:
             if row["role"] == "assistant":
                 if row["reasoning"]:
                     msg["reasoning"] = row["reasoning"]
+                if row["reasoning_content"] is not None:
+                    msg["reasoning_content"] = row["reasoning_content"]
                 if row["reasoning_details"]:
                     try:
                         msg["reasoning_details"] = json.loads(row["reasoning_details"])
@@ -1126,10 +1284,37 @@ class SessionDB:
             try:
                 with self._lock:
                     ctx_cursor = self._conn.execute(
-                        """SELECT role, content FROM messages
-                           WHERE session_id = ? AND id >= ? - 1 AND id <= ? + 1
-                           ORDER BY id""",
-                        (match["session_id"], match["id"], match["id"]),
+                        """WITH target AS (
+                               SELECT session_id, timestamp, id
+                               FROM messages
+                               WHERE id = ?
+                           )
+                           SELECT role, content
+                           FROM (
+                               SELECT m.id, m.timestamp, m.role, m.content
+                               FROM messages m
+                               JOIN target t ON t.session_id = m.session_id
+                               WHERE (m.timestamp < t.timestamp)
+                                  OR (m.timestamp = t.timestamp AND m.id < t.id)
+                               ORDER BY m.timestamp DESC, m.id DESC
+                               LIMIT 1
+                           )
+                           UNION ALL
+                           SELECT role, content
+                           FROM messages
+                           WHERE id = ?
+                           UNION ALL
+                           SELECT role, content
+                           FROM (
+                               SELECT m.id, m.timestamp, m.role, m.content
+                               FROM messages m
+                               JOIN target t ON t.session_id = m.session_id
+                               WHERE (m.timestamp > t.timestamp)
+                                  OR (m.timestamp = t.timestamp AND m.id > t.id)
+                               ORDER BY m.timestamp ASC, m.id ASC
+                               LIMIT 1
+                           )""",
+                        (match["id"], match["id"]),
                     )
                     context_msgs = [
                         {"role": r["role"], "content": (r["content"] or "")[:200]}
@@ -1291,3 +1476,116 @@ class SessionDB:
             return len(session_ids)
 
         return self._execute_write(_do)
+
+    # ── Meta key/value (for scheduler bookkeeping) ──
+
+    def get_meta(self, key: str) -> Optional[str]:
+        """Read a value from the state_meta key/value store."""
+        with self._lock:
+            row = self._conn.execute(
+                "SELECT value FROM state_meta WHERE key = ?", (key,)
+            ).fetchone()
+        if row is None:
+            return None
+        return row["value"] if isinstance(row, sqlite3.Row) else row[0]
+
+    def set_meta(self, key: str, value: str) -> None:
+        """Write a value to the state_meta key/value store."""
+        def _do(conn):
+            conn.execute(
+                "INSERT INTO state_meta (key, value) VALUES (?, ?) "
+                "ON CONFLICT(key) DO UPDATE SET value = excluded.value",
+                (key, value),
+            )
+        self._execute_write(_do)
+
+    # ── Space reclamation ──
+
+    def vacuum(self) -> None:
+        """Run VACUUM to reclaim disk space after large deletes.
+
+        SQLite does not shrink the database file when rows are deleted —
+        freed pages just get reused on the next insert. After a prune that
+        removed hundreds of sessions, the file stays bloated unless we
+        explicitly VACUUM.
+
+        VACUUM rewrites the entire DB, so it's expensive (seconds per
+        100MB) and cannot run inside a transaction. It also acquires an
+        exclusive lock, so callers must ensure no other writers are
+        active. Safe to call at startup before the gateway/CLI starts
+        serving traffic.
+        """
+        # VACUUM cannot be executed inside a transaction.
+        with self._lock:
+            # Best-effort WAL checkpoint first, then VACUUM.
+            try:
+                self._conn.execute("PRAGMA wal_checkpoint(TRUNCATE)")
+            except Exception:
+                pass
+            self._conn.execute("VACUUM")
+
+    def maybe_auto_prune_and_vacuum(
+        self,
+        retention_days: int = 90,
+        min_interval_hours: int = 24,
+        vacuum: bool = True,
+    ) -> Dict[str, Any]:
+        """Idempotent auto-maintenance: prune old sessions + optional VACUUM.
+
+        Records the last run timestamp in state_meta so subsequent calls
+        within ``min_interval_hours`` no-op. Designed to be called once at
+        startup from long-lived entrypoints (CLI, gateway, cron scheduler).
+
+        Never raises. On any failure, logs a warning and returns a dict
+        with ``"error"`` set.
+
+        Returns a dict with keys:
+          - ``"skipped"`` (bool) — true if within min_interval_hours of last run
+          - ``"pruned"`` (int)   — number of sessions deleted
+          - ``"vacuumed"`` (bool) — true if VACUUM ran
+          - ``"error"`` (str, optional) — present only on failure
+        """
+        result: Dict[str, Any] = {"skipped": False, "pruned": 0, "vacuumed": False}
+        try:
+            # Skip if another process/call did maintenance recently.
+            last_raw = self.get_meta("last_auto_prune")
+            now = time.time()
+            if last_raw:
+                try:
+                    last_ts = float(last_raw)
+                    if now - last_ts < min_interval_hours * 3600:
+                        result["skipped"] = True
+                        return result
+                except (TypeError, ValueError):
+                    pass  # corrupt meta; treat as no prior run
+
+            pruned = self.prune_sessions(older_than_days=retention_days)
+            result["pruned"] = pruned
+
+            # Only VACUUM if we actually freed rows — VACUUM on a tight DB
+            # is wasted I/O. Threshold keeps small DBs from paying the cost.
+            if vacuum and pruned > 0:
+                try:
+                    self.vacuum()
+                    result["vacuumed"] = True
+                except Exception as exc:
+                    logger.warning("state.db VACUUM failed: %s", exc)
+
+            # Record the attempt even if pruned == 0, so we don't retry
+            # every startup within the min_interval_hours window.
+            self.set_meta("last_auto_prune", str(now))
+
+            if pruned > 0:
+                logger.info(
+                    "state.db auto-maintenance: pruned %d session(s) older than %d days%s",
+                    pruned,
+                    retention_days,
+                    " + VACUUM" if result["vacuumed"] else "",
+                )
+        except Exception as exc:
+            # Maintenance must never block startup. Log and return error marker.
+            logger.warning("state.db auto-maintenance failed: %s", exc)
+            result["error"] = str(exc)
+
+        return result
+
diff --git a/mini_swe_runner.py b/mini_swe_runner.py
index 739074402d..c434515045 100644
--- a/mini_swe_runner.py
+++ b/mini_swe_runner.py
@@ -43,13 +43,23 @@ from dotenv import load_dotenv
 load_dotenv()
 
 
-def _effective_temperature_for_model(model: str) -> Optional[float]:
-    """Return a fixed temperature for models with strict sampling contracts."""
+def _effective_temperature_for_model(
+    model: str,
+    base_url: Optional[str] = None,
+) -> Optional[float]:
+    """Return a fixed temperature for models with strict sampling contracts.
+
+    Returns ``None`` when the model manages temperature server-side (Kimi);
+    callers must omit the ``temperature`` kwarg entirely in that case.
+    """
     try:
-        from agent.auxiliary_client import _fixed_temperature_for_model
+        from agent.auxiliary_client import _fixed_temperature_for_model, OMIT_TEMPERATURE
     except Exception:
         return None
-    return _fixed_temperature_for_model(model)
+    result = _fixed_temperature_for_model(model, base_url)
+    if result is OMIT_TEMPERATURE:
+        return None  # caller must omit temperature
+    return result
 
 
 
@@ -457,7 +467,10 @@ Complete the user's task step by step."""
                         "tools": self.tools,
                         "timeout": 300.0,
                     }
-                    fixed_temperature = _effective_temperature_for_model(self.model)
+                    fixed_temperature = _effective_temperature_for_model(
+                        self.model,
+                        str(getattr(self.client, "base_url", "") or ""),
+                    )
                     if fixed_temperature is not None:
                         api_kwargs["temperature"] = fixed_temperature
 
diff --git a/model_tools.py b/model_tools.py
index 5ec806e78b..db4b46326b 100644
--- a/model_tools.py
+++ b/model_tools.py
@@ -282,6 +282,31 @@ def get_tool_definitions(
                 filtered_tools[i] = {"type": "function", "function": dynamic_schema}
                 break
 
+    # Rebuild discord_server schema based on the bot's privileged intents
+    # (detected from GET /applications/@me) and the user's action allowlist
+    # in config.  Hides actions the bot's intents don't support so the
+    # model never attempts them, and annotates fetch_messages when the
+    # MESSAGE_CONTENT intent is missing.
+    if "discord_server" in available_tool_names:
+        try:
+            from tools.discord_tool import get_dynamic_schema
+            dynamic = get_dynamic_schema()
+        except Exception:  # pragma: no cover — defensive, fall back to static
+            dynamic = None
+        if dynamic is None:
+            # Tool filtered out entirely (empty allowlist or detection disabled
+            # the only remaining actions).  Drop it from the schema list.
+            filtered_tools = [
+                t for t in filtered_tools
+                if t.get("function", {}).get("name") != "discord_server"
+            ]
+            available_tool_names.discard("discord_server")
+        else:
+            for i, td in enumerate(filtered_tools):
+                if td.get("function", {}).get("name") == "discord_server":
+                    filtered_tools[i] = {"type": "function", "function": dynamic}
+                    break
+
     # Strip web tool cross-references from browser_navigate description when
     # web_search / web_extract are not available.  The static schema says
     # "prefer web_search or web_extract" which causes the model to hallucinate
@@ -525,6 +550,30 @@ def handle_function_call(
         except Exception:
             pass
 
+        # Generic tool-result canonicalization seam: plugins receive the
+        # final result string (JSON, usually) and may replace it by
+        # returning a string from transform_tool_result. Runs after
+        # post_tool_call (which stays observational) and before the result
+        # is appended back into conversation context. Fail-open; the first
+        # valid string return wins; non-string returns are ignored.
+        try:
+            from hermes_cli.plugins import invoke_hook
+            hook_results = invoke_hook(
+                "transform_tool_result",
+                tool_name=function_name,
+                args=function_args,
+                result=result,
+                task_id=task_id or "",
+                session_id=session_id or "",
+                tool_call_id=tool_call_id or "",
+            )
+            for hook_result in hook_results:
+                if isinstance(hook_result, str):
+                    result = hook_result
+                    break
+        except Exception:
+            pass
+
         return result
 
     except Exception as e:
diff --git a/nix/devShell.nix b/nix/devShell.nix
index 63edc59cf1..d0d56e40b0 100644
--- a/nix/devShell.nix
+++ b/nix/devShell.nix
@@ -7,7 +7,8 @@
     let
       hermes-agent = inputs.self.packages.${system}.default;
       hermes-tui = inputs.self.packages.${system}.tui;
-      packages = [ hermes-agent hermes-tui ];
+      hermes-web = inputs.self.packages.${system}.web;
+      packages = [ hermes-agent hermes-tui hermes-web ];
     in {
       devShells.default = pkgs.mkShell {
         inputsFrom = packages;
diff --git a/nix/lib.nix b/nix/lib.nix
new file mode 100644
index 0000000000..ee28537a66
--- /dev/null
+++ b/nix/lib.nix
@@ -0,0 +1,217 @@
+# nix/lib.nix — Shared helpers for nix stuff
+{ pkgs, npm-lockfile-fix }:
+{
+  # Returns a buildNpmPackage-compatible attrs set that provides:
+  #   patchPhase          — ensures lockfile has exactly one trailing newline
+  #   nativeBuildInputs   — [ updateLockfileScript ] (list, prepend with ++ for more)
+  #   passthru.devShellHook  — stamp-checked npm install + hash auto-update
+  #   passthru.npmLockfile   — metadata for mkFixLockfiles
+  #
+  # NOTE: npmConfigHook runs `diff` between the source lockfile and the
+  # npm-deps cache lockfile. fetchNpmDeps preserves whatever trailing
+  # newlines the lockfile has. The patchPhase normalizes to exactly one
+  # trailing newline so both sides always match.
+  #
+  # Usage:
+  #   npm = hermesNpmLib.mkNpmPassthru { folder = "ui-tui"; attr = "tui"; pname = "hermes-tui"; };
+  #   pkgs.buildNpmPackage (npm // { ... } # or:
+  #   pkgs.buildNpmPackage ({ ... } // npm)
+  mkNpmPassthru =
+    {
+      folder, # repo-relative folder with package.json, e.g. "ui-tui"
+      attr, # flake package attr, e.g. "tui"
+      pname, # e.g. "hermes-tui"
+      nixFile ? "nix/${attr}.nix", # defaults to nix/<attr>.nix
+    }:
+    {
+      patchPhase = ''
+        runHook prePatch
+        # Normalize trailing newlines so source and npm-deps always match,
+        # regardless of what fetchNpmDeps preserves.
+        sed -i -z 's/\n*$/\n/' package-lock.json
+
+        # Make npmConfigHook's byte-for-byte diff newline-agnostic by
+        # replacing its hardcoded /nix/store/.../diff with a wrapper that
+        # normalizes trailing newlines on both sides before comparing.
+        mkdir -p "$TMPDIR/bin"
+        cat > "$TMPDIR/bin/diff" << DIFFWRAP
+        #!/bin/sh
+        f1=\$(mktemp) && sed -z 's/\n*$/\n/' "\$1" > "\$f1"
+        f2=\$(mktemp) && sed -z 's/\n*$/\n/' "\$2" > "\$f2"
+        ${pkgs.diffutils}/bin/diff "\$f1" "\$f2" && rc=0 || rc=\$?
+        rm -f "\$f1" "\$f2"
+        exit \$rc
+        DIFFWRAP
+        chmod +x "$TMPDIR/bin/diff"
+        export PATH="$TMPDIR/bin:$PATH"
+
+        runHook postPatch
+      '';
+
+      nativeBuildInputs = [
+        (pkgs.writeShellScriptBin "update_${attr}_lockfile" ''
+          set -euox pipefail
+
+          REPO_ROOT=$(git rev-parse --show-toplevel)
+
+          cd "$REPO_ROOT/${folder}"
+          rm -rf node_modules/
+          npm cache clean --force
+          CI=true npm install
+          ${pkgs.lib.getExe npm-lockfile-fix} ./package-lock.json
+
+          NIX_FILE="$REPO_ROOT/${nixFile}"
+          sed -i "s/hash = \"[^\"]*\";/hash = \"\";/" $NIX_FILE
+          NIX_OUTPUT=$(nix build .#${attr} 2>&1 || true)
+          NEW_HASH=$(echo "$NIX_OUTPUT" | grep 'got:' | awk '{print $2}')
+          echo got new hash $NEW_HASH
+          sed -i "s|hash = \"[^\"]*\";|hash = \"$NEW_HASH\";|" $NIX_FILE
+          nix build .#${attr}
+          echo "Updated npm hash in $NIX_FILE to $NEW_HASH"
+        '')
+      ];
+
+      passthru = {
+        devShellHook = pkgs.writeShellScript "npm-dev-hook-${pname}" ''
+          REPO_ROOT=$(git rev-parse --show-toplevel)
+
+          _hermes_npm_stamp() {
+            sha256sum "${folder}/package.json" "${folder}/package-lock.json" \
+              2>/dev/null | sha256sum | awk '{print $1}'
+          }
+          STAMP=".nix-stamps/${pname}"
+          STAMP_VALUE="$(_hermes_npm_stamp)"
+          if [ ! -f "$STAMP" ] || [ "$(cat "$STAMP")" != "$STAMP_VALUE" ]; then
+            echo "${pname}: installing npm dependencies..."
+            ( cd ${folder} && CI=true npm install --silent --no-fund --no-audit 2>/dev/null )
+
+            # Auto-update the nix hash so it stays in sync with the lockfile
+            echo "${pname}: prefetching npm deps..."
+            NIX_FILE="$REPO_ROOT/${nixFile}"
+            if NEW_HASH=$(${pkgs.lib.getExe pkgs.prefetch-npm-deps} "${folder}/package-lock.json" 2>/dev/null); then
+              sed -i "s|hash = \"sha256-[A-Za-z0-9+/=]+\"|hash = \"$NEW_HASH\";|" "$NIX_FILE"
+              echo "${pname}: updated hash to $NEW_HASH"
+            else
+              echo "${pname}: warning: prefetch failed, run 'nix run .#fix-lockfiles -- --apply' manually" >&2
+            fi
+
+            mkdir -p .nix-stamps
+            _hermes_npm_stamp > "$STAMP"
+          fi
+          unset -f _hermes_npm_stamp
+        '';
+
+        npmLockfile = {
+          inherit attr folder nixFile;
+        };
+      };
+    };
+
+  # Aggregate `fix-lockfiles` bin from a list of packages carrying
+  #   passthru.npmLockfile = { attr; folder; nixFile; };
+  # Invocations:
+  #   fix-lockfiles --check   # exit 1 if any hash is stale
+  #   fix-lockfiles --apply   # rewrite stale hashes in place
+  # Writes machine-readable fields (stale, changed, report) to $GITHUB_OUTPUT
+  # when set, so CI workflows can post a sticky PR comment directly.
+  mkFixLockfiles =
+    {
+      packages, # list of packages with passthru.npmLockfile
+    }:
+    let
+      entries = map (p: p.passthru.npmLockfile) packages;
+      entryArgs = pkgs.lib.concatMapStringsSep " " (e: "\"${e.attr}:${e.folder}:${e.nixFile}\"") entries;
+    in
+    pkgs.writeShellScriptBin "fix-lockfiles" ''
+      set -uox pipefail
+      MODE="''${1:---check}"
+      case "$MODE" in
+        --check|--apply) ;;
+        -h|--help)
+          echo "usage: fix-lockfiles [--check|--apply]"
+          exit 0 ;;
+        *)
+          echo "usage: fix-lockfiles [--check|--apply]" >&2
+          exit 2 ;;
+      esac
+
+      ENTRIES=(${entryArgs})
+
+      REPO_ROOT="$(git rev-parse --show-toplevel)"
+      cd "$REPO_ROOT"
+
+      # When running in GH Actions, emit Markdown links in the report pointing
+      # at the offending line of the nix file (and the lockfile) at the exact
+      # commit that was checked. LINK_SHA should be set by the workflow to the
+      # PR head SHA; falls back to GITHUB_SHA (which on pull_request is the
+      # test-merge commit, still browseable).
+      LINK_SERVER="''${GITHUB_SERVER_URL:-https://github.com}"
+      LINK_REPO="''${GITHUB_REPOSITORY:-}"
+      LINK_SHA="''${LINK_SHA:-''${GITHUB_SHA:-}}"
+
+      STALE=0
+      FIXED=0
+      REPORT=""
+
+      for entry in "''${ENTRIES[@]}"; do
+        IFS=":" read -r ATTR FOLDER NIX_FILE <<< "$entry"
+        echo "==> .#$ATTR ($FOLDER -> $NIX_FILE)"
+        OUTPUT=$(nix build ".#$ATTR.npmDeps" --no-link --print-build-logs 2>&1)
+        STATUS=$?
+        if [ "$STATUS" -eq 0 ]; then
+          echo "    ok"
+          continue
+        fi
+
+        NEW_HASH=$(echo "$OUTPUT" | awk '/got:/ {print $2; exit}')
+        if [ -z "$NEW_HASH" ]; then
+          echo "    build failed with no hash mismatch:" >&2
+          echo "$OUTPUT" | tail -40 >&2
+          exit 1
+        fi
+
+        HASH_LINE=$(grep -n 'hash = "sha256-' "$NIX_FILE" | head -1 | cut -d: -f1)
+        OLD_HASH=$(grep -oE 'hash = "sha256-[^"]+"' "$NIX_FILE" | head -1 \
+          | sed -E 's/hash = "(.*)"/\1/')
+        LOCK_FILE="$FOLDER/package-lock.json"
+        echo "    stale: $NIX_FILE:$HASH_LINE $OLD_HASH -> $NEW_HASH"
+        STALE=1
+
+        if [ -n "$LINK_REPO" ] && [ -n "$LINK_SHA" ]; then
+          NIX_URL="$LINK_SERVER/$LINK_REPO/blob/$LINK_SHA/$NIX_FILE#L$HASH_LINE"
+          LOCK_URL="$LINK_SERVER/$LINK_REPO/blob/$LINK_SHA/$LOCK_FILE"
+          REPORT+="- [\`$NIX_FILE:$HASH_LINE\`]($NIX_URL) (\`.#$ATTR\`): \`$OLD_HASH\` → \`$NEW_HASH\` — lockfile: [\`$LOCK_FILE\`]($LOCK_URL)"$'\n'
+        else
+          REPORT+="- \`$NIX_FILE:$HASH_LINE\` (\`.#$ATTR\`): \`$OLD_HASH\` → \`$NEW_HASH\`"$'\n'
+        fi
+
+        if [ "$MODE" = "--apply" ]; then
+          sed -i "s|hash = \"sha256-[^\"]*\";|hash = \"$NEW_HASH\";|" "$NIX_FILE"
+          nix build ".#$ATTR.npmDeps" --no-link --print-build-logs
+          FIXED=1
+          echo "    fixed"
+        fi
+      done
+
+      if [ -n "''${GITHUB_OUTPUT:-}" ]; then
+        {
+          [ "$STALE" -eq 1 ] && echo "stale=true" || echo "stale=false"
+          [ "$FIXED" -eq 1 ] && echo "changed=true" || echo "changed=false"
+          if [ -n "$REPORT" ]; then
+            echo "report<<REPORT_EOF"
+            printf "%s" "$REPORT"
+            echo "REPORT_EOF"
+          fi
+        } >> "$GITHUB_OUTPUT"
+      fi
+
+      if [ "$STALE" -eq 1 ] && [ "$MODE" = "--check" ]; then
+        echo
+        echo "Stale lockfile hashes detected. Run:"
+        echo "  nix run .#fix-lockfiles -- --apply"
+        exit 1
+      fi
+
+      exit 0
+    '';
+}
diff --git a/nix/packages.nix b/nix/packages.nix
index 912be7843b..721546851d 100644
--- a/nix/packages.nix
+++ b/nix/packages.nix
@@ -8,10 +8,14 @@
         inherit (inputs) uv2nix pyproject-nix pyproject-build-systems;
       };
 
-      hermesTui = pkgs.callPackage ./tui.nix {
+      hermesNpmLib = pkgs.callPackage ./lib.nix {
         npm-lockfile-fix = inputs'.npm-lockfile-fix.packages.default;
       };
 
+      hermesTui = pkgs.callPackage ./tui.nix {
+        inherit hermesNpmLib;
+      };
+
       # Import bundled skills, excluding runtime caches
       bundledSkills = pkgs.lib.cleanSourceWith {
         src = ../skills;
@@ -19,7 +23,7 @@
       };
 
       hermesWeb = pkgs.callPackage ./web.nix {
-        npm-lockfile-fix = inputs'.npm-lockfile-fix.packages.default;
+        inherit hermesNpmLib;
       };
 
       runtimeDeps = with pkgs; [
@@ -111,6 +115,10 @@
 
         tui = hermesTui;
         web = hermesWeb;
+
+        fix-lockfiles = hermesNpmLib.mkFixLockfiles {
+          packages = [ hermesTui hermesWeb ];
+        };
       };
     };
 }
diff --git a/nix/tui.nix b/nix/tui.nix
index 7303edecb9..04bbfa034e 100644
--- a/nix/tui.nix
+++ b/nix/tui.nix
@@ -1,18 +1,18 @@
 # nix/tui.nix — Hermes TUI (Ink/React) compiled with tsc and bundled
-{ pkgs, npm-lockfile-fix, ... }:
+{ pkgs, hermesNpmLib, ... }:
 let
   src = ../ui-tui;
   npmDeps = pkgs.fetchNpmDeps {
     inherit src;
-    hash = "sha256-mG3vpgGi4ljt4X3XIf3I/5mIcm+rVTUAmx2DQ6YVA90=";
+    hash = "sha256-RU4qSHgJPMyfRSEJDzkG4+MReDZDc6QbTD2wisa5QE0=";
   };
 
+  npm = hermesNpmLib.mkNpmPassthru { folder = "ui-tui"; attr = "tui"; pname = "hermes-tui"; };
+
   packageJson = builtins.fromJSON (builtins.readFile (src + "/package.json"));
   version = packageJson.version;
-
-  npmLockHash = builtins.hashString "sha256" (builtins.readFile ../ui-tui/package-lock.json);
 in
-pkgs.buildNpmPackage {
+pkgs.buildNpmPackage (npm // {
   pname = "hermes-tui";
   inherit src npmDeps version;
 
@@ -37,41 +37,4 @@ pkgs.buildNpmPackage {
 
     runHook postInstall
   '';
-
-  nativeBuildInputs = [
-    (pkgs.writeShellScriptBin "update_tui_lockfile" ''
-      set -euox pipefail
-
-      # get root of repo
-      REPO_ROOT=$(git rev-parse --show-toplevel)
-
-      # cd into ui-tui and reinstall
-      cd "$REPO_ROOT/ui-tui"
-      rm -rf node_modules/
-      npm cache clean --force
-      CI=true npm install # ci env var to suppress annoying unicode install banner lag
-      ${pkgs.lib.getExe npm-lockfile-fix} ./package-lock.json
-
-      NIX_FILE="$REPO_ROOT/nix/tui.nix"
-      # compute the new hash
-      sed -i "s/hash = \"[^\"]*\";/hash = \"\";/" $NIX_FILE
-      NIX_OUTPUT=$(nix build .#tui 2>&1 || true)
-      NEW_HASH=$(echo "$NIX_OUTPUT" | grep 'got:' | awk '{print $2}') 
-      echo got new hash $NEW_HASH
-      sed -i "s|hash = \"[^\"]*\";|hash = \"$NEW_HASH\";|" $NIX_FILE
-      nix build .#tui
-      echo "Updated npm hash in $NIX_FILE to $NEW_HASH"
-    '')
-  ];
-
-  passthru.devShellHook = ''
-    STAMP=".nix-stamps/hermes-tui"
-    STAMP_VALUE="${npmLockHash}"
-    if [ ! -f "$STAMP" ] || [ "$(cat "$STAMP")" != "$STAMP_VALUE" ]; then
-      echo "hermes-tui: installing npm dependencies..."
-      cd ui-tui && CI=true npm install --silent --no-fund --no-audit 2>/dev/null && cd ..
-      mkdir -p .nix-stamps
-      echo "$STAMP_VALUE" > "$STAMP"
-    fi
-  '';
-}
+})
diff --git a/nix/web.nix b/nix/web.nix
index 247889753f..fc77728966 100644
--- a/nix/web.nix
+++ b/nix/web.nix
@@ -1,15 +1,15 @@
 # nix/web.nix — Hermes Web Dashboard (Vite/React) frontend build
-{ pkgs, npm-lockfile-fix, ... }:
+{ pkgs, hermesNpmLib, ... }:
 let
   src = ../web;
   npmDeps = pkgs.fetchNpmDeps {
     inherit src;
-    hash = "sha256-Y0pOzdFG8BLjfvCLmsvqYpjxFjAQabXp1i7X9W/cCU4=";
+    hash = "sha256-TS/vrCHbdvXkPcAPxImKzAd2pdDCrKlgYZkXBMQ+TEg=";
   };
 
-  npmLockHash = builtins.hashString "sha256" (builtins.readFile ../web/package-lock.json);
+  npm = hermesNpmLib.mkNpmPassthru { folder = "web"; attr = "web"; pname = "hermes-web"; };
 in
-pkgs.buildNpmPackage {
+pkgs.buildNpmPackage (npm // {
   pname = "hermes-web";
   version = "0.0.0";
   inherit src npmDeps;
@@ -26,38 +26,4 @@ pkgs.buildNpmPackage {
     cp -r dist $out
     runHook postInstall
   '';
-
-  nativeBuildInputs = [
-    (pkgs.writeShellScriptBin "update_web_lockfile" ''
-      set -euox pipefail
-
-      REPO_ROOT=$(git rev-parse --show-toplevel)
-
-      cd "$REPO_ROOT/web"
-      rm -rf node_modules/
-      npm cache clean --force
-      CI=true npm install
-      ${pkgs.lib.getExe npm-lockfile-fix} ./package-lock.json
-
-      NIX_FILE="$REPO_ROOT/nix/web.nix"
-      sed -i "s/hash = \"[^\"]*\";/hash = \"\";/" $NIX_FILE
-      NIX_OUTPUT=$(nix build .#web 2>&1 || true)
-      NEW_HASH=$(echo "$NIX_OUTPUT" | grep 'got:' | awk '{print $2}')
-      echo got new hash $NEW_HASH
-      sed -i "s|hash = \"[^\"]*\";|hash = \"$NEW_HASH\";|" $NIX_FILE
-      nix build .#web
-      echo "Updated npm hash in $NIX_FILE to $NEW_HASH"
-    '')
-  ];
-
-  passthru.devShellHook = ''
-    STAMP=".nix-stamps/hermes-web"
-    STAMP_VALUE="${npmLockHash}"
-    if [ ! -f "$STAMP" ] || [ "$(cat "$STAMP")" != "$STAMP_VALUE" ]; then
-      echo "hermes-web: installing npm dependencies..."
-      cd web && CI=true npm install --silent --no-fund --no-audit 2>/dev/null && cd ..
-      mkdir -p .nix-stamps
-      echo "$STAMP_VALUE" > "$STAMP"
-    fi
-  '';
-}
+})
diff --git a/optional-skills/autonomous-ai-agents/honcho/SKILL.md b/optional-skills/autonomous-ai-agents/honcho/SKILL.md
index c60d2c6356..1c099ca605 100644
--- a/optional-skills/autonomous-ai-agents/honcho/SKILL.md
+++ b/optional-skills/autonomous-ai-agents/honcho/SKILL.md
@@ -145,10 +145,10 @@ Controls **how often** dialectic and context calls happen.
 | Key | Default | Description |
 |-----|---------|-------------|
 | `contextCadence` | `1` | Min turns between context API calls |
-| `dialecticCadence` | `3` | Min turns between dialectic API calls |
+| `dialecticCadence` | `2` | Min turns between dialectic API calls. Recommended 1–5 |
 | `injectionFrequency` | `every-turn` | `every-turn` or `first-turn` for base context injection |
 
-Higher cadence values reduce API calls and cost. `dialecticCadence: 3` (default) means the dialectic engine fires at most every 3rd turn.
+Higher cadence values fire the dialectic LLM less often. `dialecticCadence: 2` means the engine fires every other turn. Setting it to `1` fires every turn.
 
 ### Depth (how many)
 
@@ -180,6 +180,8 @@ If `dialecticDepthLevels` is omitted, rounds use **proportional levels** derived
 
 This keeps earlier passes cheap while using full depth on the final synthesis.
 
+**Depth at session start.** The session-start prewarm runs the full configured `dialecticDepth` in the background before turn 1. A single-pass prewarm on a cold peer often returns thin output — multi-pass depth runs the audit/reconcile cycle before the user ever speaks. Turn 1 consumes the prewarm result directly; if prewarm hasn't landed in time, turn 1 falls back to a synchronous call with a bounded timeout.
+
 ### Level (how hard)
 
 Controls the **intensity** of each dialectic reasoning round.
@@ -368,7 +370,7 @@ Config file: `$HERMES_HOME/honcho.json` (profile-local) or `~/.honcho/config.jso
 | `contextTokens` | uncapped | Max tokens for the combined base context injection (summary + representation + card). Opt-in cap — omit to leave uncapped, set to an integer to bound injection size. |
 | `injectionFrequency` | `every-turn` | `every-turn` or `first-turn` |
 | `contextCadence` | `1` | Min turns between context API calls |
-| `dialecticCadence` | `3` | Min turns between dialectic LLM calls |
+| `dialecticCadence` | `2` | Min turns between dialectic LLM calls (recommended 1–5) |
 
 The `contextTokens` budget is enforced at injection time. If the session summary + representation + card exceed the budget, Honcho trims the summary first, then the representation, preserving the card. This prevents context blowup in long sessions.
 
diff --git a/optional-skills/dogfood/DESCRIPTION.md b/optional-skills/dogfood/DESCRIPTION.md
new file mode 100644
index 0000000000..f083fd72bd
--- /dev/null
+++ b/optional-skills/dogfood/DESCRIPTION.md
@@ -0,0 +1,3 @@
+# Dogfood — Advanced QA & Testing Skills
+
+Specialized QA workflows that go beyond basic bug-finding. These skills use structured methodologies to surface UX friction, accessibility issues, and product-level problems that standard testing misses.
diff --git a/optional-skills/dogfood/adversarial-ux-test/SKILL.md b/optional-skills/dogfood/adversarial-ux-test/SKILL.md
new file mode 100644
index 0000000000..1777e083d1
--- /dev/null
+++ b/optional-skills/dogfood/adversarial-ux-test/SKILL.md
@@ -0,0 +1,190 @@
+---
+name: adversarial-ux-test
+description: Roleplay the most difficult, tech-resistant user for your product. Browse the app as that persona, find every UX pain point, then filter complaints through a pragmatism layer to separate real problems from noise. Creates actionable tickets from genuine issues only.
+version: 1.0.0
+author: Omni @ Comelse
+license: MIT
+metadata:
+  hermes:
+    tags: [qa, ux, testing, adversarial, dogfood, personas, user-testing]
+    related_skills: [dogfood]
+---
+
+# Adversarial UX Test
+
+Roleplay the worst-case user for your product — the person who hates technology, doesn't want your software, and will find every reason to complain. Then filter their feedback through a pragmatism layer to separate real UX problems from "I hate computers" noise.
+
+Think of it as an automated "mom test" — but angry.
+
+## Why This Works
+
+Most QA finds bugs. This finds **friction**. A technically correct app can still be unusable for real humans. The adversarial persona catches:
+- Confusing terminology that makes sense to developers but not users
+- Too many steps to accomplish basic tasks
+- Missing onboarding or "aha moments"
+- Accessibility issues (font size, contrast, click targets)
+- Cold-start problems (empty states, no demo content)
+- Paywall/signup friction that kills conversion
+
+The **pragmatism filter** (Phase 3) is what makes this useful instead of just entertaining. Without it, you'd add a "print this page" button to every screen because Grandpa can't figure out PDFs.
+
+## How to Use
+
+Tell the agent:
+```
+"Run an adversarial UX test on [URL]"
+"Be a grumpy [persona type] and test [app name]"
+"Do an asshole user test on my staging site"
+```
+
+You can provide a persona or let the agent generate one based on your product's target audience.
+
+## Step 1: Define the Persona
+
+If no persona is provided, generate one by answering:
+
+1. **Who is the HARDEST user for this product?** (age 50+, non-technical role, decades of experience doing it "the old way")
+2. **What is their tech comfort level?** (the lower the better — WhatsApp-only, paper notebooks, wife set up their email)
+3. **What is the ONE thing they need to accomplish?** (their core job, not your feature list)
+4. **What would make them give up?** (too many clicks, jargon, slow, confusing)
+5. **How do they talk when frustrated?** (blunt, sweary, dismissive, sighing)
+
+### Good Persona Example
+> **"Big Mick" McAllister** — 58-year-old S&C coach. Uses WhatsApp and that's it. His "spreadsheet" is a paper notebook. "If I can't figure it out in 10 seconds I'm going back to my notebook." Needs to log session results for 25 players. Hates small text, jargon, and passwords.
+
+### Bad Persona Example
+> "A user who doesn't like the app" — too vague, no constraints, no voice.
+
+The persona must be **specific enough to stay in character** for 20 minutes of testing.
+
+## Step 2: Become the Asshole (Browse as the Persona)
+
+1. Read any available project docs for app context and URLs
+2. **Fully inhabit the persona** — their frustrations, limitations, goals
+3. Navigate to the app using browser tools
+4. **Attempt the persona's ACTUAL TASKS** (not a feature tour):
+   - Can they do what they came to do?
+   - How many clicks/screens to accomplish it?
+   - What confuses them?
+   - What makes them angry?
+   - Where do they get lost?
+   - What would make them give up and go back to their old way?
+
+5. Test these friction categories:
+   - **First impression** — would they even bother past the landing page?
+   - **Core workflow** — the ONE thing they need to do most often
+   - **Error recovery** — what happens when they do something wrong?
+   - **Readability** — text size, contrast, information density
+   - **Speed** — does it feel faster than their current method?
+   - **Terminology** — any jargon they wouldn't understand?
+   - **Navigation** — can they find their way back? do they know where they are?
+
+6. Take screenshots of every pain point
+7. Check browser console for JS errors on every page
+
+## Step 3: The Rant (Write Feedback in Character)
+
+Write the feedback AS THE PERSONA — in their voice, with their frustrations. This is not a bug report. This is a real human venting.
+
+```
+[PERSONA NAME]'s Review of [PRODUCT]
+
+Overall: [Would they keep using it? Yes/No/Maybe with conditions]
+
+THE GOOD (grudging admission):
+- [things even they have to admit work]
+
+THE BAD (legitimate UX issues):
+- [real problems that would stop them from using the product]
+
+THE UGLY (showstoppers):
+- [things that would make them uninstall/cancel immediately]
+
+SPECIFIC COMPLAINTS:
+1. [Page/feature]: "[quote in persona voice]" — [what happened, expected]
+2. ...
+
+VERDICT: "[one-line persona quote summarizing their experience]"
+```
+
+## Step 4: The Pragmatism Filter (Critical — Do Not Skip)
+
+Step OUT of the persona. Evaluate each complaint as a product person:
+
+- **RED: REAL UX BUG** — Any user would have this problem, not just grumpy ones. Fix it.
+- **YELLOW: VALID BUT LOW PRIORITY** — Real issue but only for extreme users. Note it.
+- **WHITE: PERSONA NOISE** — "I hate computers" talking, not a product problem. Skip it.
+- **GREEN: FEATURE REQUEST** — Good idea hidden in the complaint. Consider it.
+
+### Filter Criteria
+1. Would a 35-year-old competent-but-busy user have the same complaint? → RED
+2. Is this a genuine accessibility issue (font size, contrast, click targets)? → RED
+3. Is this "I want it to work like paper" resistance to digital? → WHITE
+4. Is this a real workflow inefficiency the persona stumbled on? → YELLOW or RED
+5. Would fixing this add complexity for the 80% who are fine? → WHITE
+6. Does the complaint reveal a missing onboarding moment? → GREEN
+
+**This filter is MANDATORY.** Never ship raw persona complaints as tickets.
+
+## Step 5: Create Tickets
+
+For **RED** and **GREEN** items only:
+- Clear, actionable title
+- Include the persona's verbatim quote (entertaining + memorable)
+- The real UX issue underneath (objective)
+- A suggested fix (actionable)
+- Tag/label: "ux-review"
+
+For **YELLOW** items: one catch-all ticket with all notes.
+
+**WHITE** items appear in the report only. No tickets.
+
+**Max 10 tickets per session** — focus on the worst issues.
+
+## Step 6: Report
+
+Deliver:
+1. The persona rant (Step 3) — entertaining and visceral
+2. The filtered assessment (Step 4) — pragmatic and actionable
+3. Tickets created (Step 5) — with links
+4. Screenshots of key issues
+
+## Tips
+
+- **One persona per session.** Don't mix perspectives.
+- **Stay in character during Steps 2-3.** Break character only at Step 4.
+- **Test the CORE WORKFLOW first.** Don't get distracted by settings pages.
+- **Empty states are gold.** New user experience reveals the most friction.
+- **The best findings are RED items the persona found accidentally** while trying to do something else.
+- **If the persona has zero complaints, your persona is too tech-savvy.** Make them older, less patient, more set in their ways.
+- **Run this before demos, launches, or after shipping a batch of features.**
+- **Register as a NEW user when possible.** Don't use pre-seeded admin accounts — the cold start experience is where most friction lives.
+- **Zero WHITE items is a signal, not a failure.** If the pragmatism filter finds no noise, your product has real UX problems, not just a grumpy persona.
+- **Check known issues in project docs AFTER the test.** If the persona found a bug that's already in the known issues list, that's actually the most damning finding — it means the team knew about it but never felt the user's pain.
+- **Subscription/paywall testing is critical.** Test with expired accounts, not just active ones. The "what happens when you can't pay" experience reveals whether the product respects users or holds their data hostage.
+- **Count the clicks to accomplish the persona's ONE task.** If it's more than 5, that's almost always a RED finding regardless of persona tech level.
+
+## Example Personas by Industry
+
+These are starting points — customize for your specific product:
+
+| Product Type | Persona | Age | Key Trait |
+|-------------|---------|-----|-----------|
+| CRM | Retirement home director | 68 | Filing cabinet is the current CRM |
+| Photography SaaS | Rural wedding photographer | 62 | Books clients by phone, invoices on paper |
+| AI/ML Tool | Department store buyer | 55 | Burned by 3 failed tech startups |
+| Fitness App | Old-school gym coach | 58 | Paper notebook, thick fingers, bad eyes |
+| Accounting | Family bakery owner | 64 | Shoebox of receipts, hates subscriptions |
+| E-commerce | Market stall vendor | 60 | Cash only, smartphone is for calls |
+| Healthcare | Senior GP | 63 | Dictates notes, nurse handles the computer |
+| Education | Veteran teacher | 57 | Chalk and talk, worksheets in ring binders |
+
+## Rules
+
+- Stay in character during Steps 2-3
+- Be genuinely mean but fair — find real problems, not manufactured ones
+- The pragmatism filter (Step 4) is **MANDATORY**
+- Screenshots required for every complaint
+- Max 10 tickets per session
+- Test on staging/deployed app, not local dev
+- One persona, one session, one report
diff --git a/skills/mcp/mcporter/SKILL.md b/optional-skills/mcp/mcporter/SKILL.md
similarity index 100%
rename from skills/mcp/mcporter/SKILL.md
rename to optional-skills/mcp/mcporter/SKILL.md
diff --git a/skills/mlops/models/clip/SKILL.md b/optional-skills/mlops/clip/SKILL.md
similarity index 100%
rename from skills/mlops/models/clip/SKILL.md
rename to optional-skills/mlops/clip/SKILL.md
diff --git a/skills/mlops/models/clip/references/applications.md b/optional-skills/mlops/clip/references/applications.md
similarity index 100%
rename from skills/mlops/models/clip/references/applications.md
rename to optional-skills/mlops/clip/references/applications.md
diff --git a/skills/mlops/cloud/modal/SKILL.md b/optional-skills/mlops/modal/SKILL.md
similarity index 100%
rename from skills/mlops/cloud/modal/SKILL.md
rename to optional-skills/mlops/modal/SKILL.md
diff --git a/skills/mlops/cloud/modal/references/advanced-usage.md b/optional-skills/mlops/modal/references/advanced-usage.md
similarity index 100%
rename from skills/mlops/cloud/modal/references/advanced-usage.md
rename to optional-skills/mlops/modal/references/advanced-usage.md
diff --git a/skills/mlops/cloud/modal/references/troubleshooting.md b/optional-skills/mlops/modal/references/troubleshooting.md
similarity index 100%
rename from skills/mlops/cloud/modal/references/troubleshooting.md
rename to optional-skills/mlops/modal/references/troubleshooting.md
diff --git a/skills/mlops/training/peft/SKILL.md b/optional-skills/mlops/peft/SKILL.md
similarity index 100%
rename from skills/mlops/training/peft/SKILL.md
rename to optional-skills/mlops/peft/SKILL.md
diff --git a/skills/mlops/training/peft/references/advanced-usage.md b/optional-skills/mlops/peft/references/advanced-usage.md
similarity index 100%
rename from skills/mlops/training/peft/references/advanced-usage.md
rename to optional-skills/mlops/peft/references/advanced-usage.md
diff --git a/skills/mlops/training/peft/references/troubleshooting.md b/optional-skills/mlops/peft/references/troubleshooting.md
similarity index 100%
rename from skills/mlops/training/peft/references/troubleshooting.md
rename to optional-skills/mlops/peft/references/troubleshooting.md
diff --git a/skills/mlops/training/pytorch-fsdp/SKILL.md b/optional-skills/mlops/pytorch-fsdp/SKILL.md
similarity index 100%
rename from skills/mlops/training/pytorch-fsdp/SKILL.md
rename to optional-skills/mlops/pytorch-fsdp/SKILL.md
diff --git a/skills/mlops/training/pytorch-fsdp/references/index.md b/optional-skills/mlops/pytorch-fsdp/references/index.md
similarity index 100%
rename from skills/mlops/training/pytorch-fsdp/references/index.md
rename to optional-skills/mlops/pytorch-fsdp/references/index.md
diff --git a/skills/mlops/training/pytorch-fsdp/references/other.md b/optional-skills/mlops/pytorch-fsdp/references/other.md
similarity index 100%
rename from skills/mlops/training/pytorch-fsdp/references/other.md
rename to optional-skills/mlops/pytorch-fsdp/references/other.md
diff --git a/skills/mlops/models/stable-diffusion/SKILL.md b/optional-skills/mlops/stable-diffusion/SKILL.md
similarity index 100%
rename from skills/mlops/models/stable-diffusion/SKILL.md
rename to optional-skills/mlops/stable-diffusion/SKILL.md
diff --git a/skills/mlops/models/stable-diffusion/references/advanced-usage.md b/optional-skills/mlops/stable-diffusion/references/advanced-usage.md
similarity index 100%
rename from skills/mlops/models/stable-diffusion/references/advanced-usage.md
rename to optional-skills/mlops/stable-diffusion/references/advanced-usage.md
diff --git a/skills/mlops/models/stable-diffusion/references/troubleshooting.md b/optional-skills/mlops/stable-diffusion/references/troubleshooting.md
similarity index 100%
rename from skills/mlops/models/stable-diffusion/references/troubleshooting.md
rename to optional-skills/mlops/stable-diffusion/references/troubleshooting.md
diff --git a/skills/mlops/models/whisper/SKILL.md b/optional-skills/mlops/whisper/SKILL.md
similarity index 100%
rename from skills/mlops/models/whisper/SKILL.md
rename to optional-skills/mlops/whisper/SKILL.md
diff --git a/skills/mlops/models/whisper/references/languages.md b/optional-skills/mlops/whisper/references/languages.md
similarity index 100%
rename from skills/mlops/models/whisper/references/languages.md
rename to optional-skills/mlops/whisper/references/languages.md
diff --git a/optional-skills/productivity/telephony/SKILL.md b/optional-skills/productivity/telephony/SKILL.md
index c74a369209..6c457592a9 100644
--- a/optional-skills/productivity/telephony/SKILL.md
+++ b/optional-skills/productivity/telephony/SKILL.md
@@ -7,7 +7,7 @@ license: MIT
 metadata:
   hermes:
     tags: [telephony, phone, sms, mms, voice, twilio, bland.ai, vapi, calling, texting]
-    related_skills: [find-nearby, google-workspace, agentmail]
+    related_skills: [maps, google-workspace, agentmail]
     category: productivity
 ---
 
diff --git a/optional-skills/research/duckduckgo-search/SKILL.md b/optional-skills/research/duckduckgo-search/SKILL.md
index ea14e6b30f..c24fc1b956 100644
--- a/optional-skills/research/duckduckgo-search/SKILL.md
+++ b/optional-skills/research/duckduckgo-search/SKILL.md
@@ -57,32 +57,32 @@ Use the `ddgs` command via `terminal` when it exists. This is the preferred path
 
 ```bash
 # Text search
-ddgs text -k "python async programming" -m 5
+ddgs text -q "python async programming" -m 5
 
 # News search
-ddgs news -k "artificial intelligence" -m 5
+ddgs news -q "artificial intelligence" -m 5
 
 # Image search
-ddgs images -k "landscape photography" -m 10
+ddgs images -q "landscape photography" -m 10
 
 # Video search
-ddgs videos -k "python tutorial" -m 5
+ddgs videos -q "python tutorial" -m 5
 
 # With region filter
-ddgs text -k "best restaurants" -m 5 -r us-en
+ddgs text -q "best restaurants" -m 5 -r us-en
 
 # Recent results only (d=day, w=week, m=month, y=year)
-ddgs text -k "latest AI news" -m 5 -t w
+ddgs text -q "latest AI news" -m 5 -t w
 
 # JSON output for parsing
-ddgs text -k "fastapi tutorial" -m 5 -o json
+ddgs text -q "fastapi tutorial" -m 5 -o json
 ```
 
 ### CLI Flags
 
 | Flag | Description | Example |
 |------|-------------|---------|
-| `-k` | Keywords (query) — **required** | `-k "search terms"` |
+| `-q` | Query — **required** | `-q "search terms"` |
 | `-m` | Max results | `-m 5` |
 | `-r` | Region | `-r us-en` |
 | `-t` | Time limit | `-t w` (week) |
@@ -189,7 +189,7 @@ DuckDuckGo returns titles, URLs, and snippets — not full page content. To get
 CLI example:
 
 ```bash
-ddgs text -k "fastapi deployment guide" -m 3 -o json
+ddgs text -q "fastapi deployment guide" -m 3 -o json
 ```
 
 Python example, only after verifying `ddgs` is installed in that runtime:
@@ -229,7 +229,7 @@ Then extract the best URL with `web_extract` or another content-retrieval tool.
 - **Do not assume the CLI exists**: Check `command -v ddgs` before using it.
 - **Do not assume `execute_code` can import `ddgs`**: `from ddgs import DDGS` may fail with `ModuleNotFoundError` unless that runtime was prepared separately.
 - **Package name**: The package is `ddgs` (previously `duckduckgo-search`). Install with `pip install ddgs`.
-- **Don't confuse `-k` and `-m`** (CLI): `-k` is for keywords, `-m` is for max results count.
+- **Don't confuse `-q` and `-m`** (CLI): `-q` is for the query, `-m` is for max results count.
 - **Empty results**: If `ddgs` returns nothing, it may be rate-limited. Wait a few seconds and retry.
 
 ## Validated With
diff --git a/optional-skills/research/duckduckgo-search/scripts/duckduckgo.sh b/optional-skills/research/duckduckgo-search/scripts/duckduckgo.sh
index b33ac8a60d..1553d45968 100755
--- a/optional-skills/research/duckduckgo-search/scripts/duckduckgo.sh
+++ b/optional-skills/research/duckduckgo-search/scripts/duckduckgo.sh
@@ -25,4 +25,4 @@ if ! command -v ddgs &> /dev/null; then
     exit 1
 fi
 
-ddgs text -k "$QUERY" -m "$MAX_RESULTS"
+ddgs text -q "$QUERY" -m "$MAX_RESULTS"
diff --git a/optional-skills/web-development/DESCRIPTION.md b/optional-skills/web-development/DESCRIPTION.md
new file mode 100644
index 0000000000..588817bbca
--- /dev/null
+++ b/optional-skills/web-development/DESCRIPTION.md
@@ -0,0 +1,5 @@
+# Web Development
+
+Optional skills for client-side web development workflows — embedding agents, copilots, and AI-native UX patterns into user-facing web apps.
+
+These are distinct from Hermes' own browser automation (Browserbase, Camofox), which operate *on* websites from outside. Web-development skills here help users build *into* their own websites.
diff --git a/optional-skills/web-development/page-agent/SKILL.md b/optional-skills/web-development/page-agent/SKILL.md
new file mode 100644
index 0000000000..caab19901f
--- /dev/null
+++ b/optional-skills/web-development/page-agent/SKILL.md
@@ -0,0 +1,189 @@
+---
+name: page-agent
+description: Embed alibaba/page-agent into your own web application — a pure-JavaScript in-page GUI agent that ships as a single <script> tag or npm package and lets end-users of your site drive the UI with natural language ("click login, fill username as John"). No Python, no headless browser, no extension required. Use this skill when the user is a web developer who wants to add an AI copilot to their SaaS / admin panel / B2B tool, make a legacy web app accessible via natural language, or evaluate page-agent against a local (Ollama) or cloud (Qwen / OpenAI / OpenRouter) LLM. NOT for server-side browser automation — point those users to Hermes' built-in browser tool instead.
+version: 1.0.0
+author: Hermes Agent
+license: MIT
+metadata:
+  hermes:
+    tags: [web, javascript, agent, browser, gui, alibaba, embed, copilot, saas]
+    category: web-development
+---
+
+# page-agent
+
+alibaba/page-agent (https://github.com/alibaba/page-agent, 17k+ stars, MIT) is an in-page GUI agent written in TypeScript. It lives inside a webpage, reads the DOM as text (no screenshots, no multi-modal LLM), and executes natural-language instructions like "click the login button, then fill username as John" against the current page. Pure client-side — the host site just includes a script and passes an OpenAI-compatible LLM endpoint.
+
+## When to use this skill
+
+Load this skill when a user wants to:
+
+- **Ship an AI copilot inside their own web app** (SaaS, admin panel, B2B tool, ERP, CRM) — "users on my dashboard should be able to type 'create invoice for Acme Corp and email it' instead of clicking through five screens"
+- **Modernize a legacy web app** without rewriting the frontend — page-agent drops on top of existing DOM
+- **Add accessibility via natural language** — voice / screen-reader users drive the UI by describing what they want
+- **Demo or evaluate page-agent** against a local (Ollama) or hosted (Qwen, OpenAI, OpenRouter) LLM
+- **Build interactive training / product demos** — let an AI walk a user through "how to submit an expense report" live in the real UI
+
+## When NOT to use this skill
+
+- User wants **Hermes itself to drive a browser** → use Hermes' built-in browser tool (Browserbase / Camofox). page-agent is the *opposite* direction.
+- User wants **cross-tab automation without embedding** → use Playwright, browser-use, or the page-agent Chrome extension
+- User needs **visual grounding / screenshots** → page-agent is text-DOM only; use a multimodal browser agent instead
+
+## Prerequisites
+
+- Node 22.13+ or 24+, npm 10+ (docs claim 11+ but 10.9 works fine)
+- An OpenAI-compatible LLM endpoint: Qwen (DashScope), OpenAI, Ollama, OpenRouter, or anything speaking `/v1/chat/completions`
+- Browser with devtools (for debugging)
+
+## Path 1 — 30-second demo via CDN (no install)
+
+Fastest way to see it work. Uses alibaba's free testing LLM proxy — **for evaluation only**, subject to their terms.
+
+Add to any HTML page (or paste into the devtools console as a bookmarklet):
+
+```html
+<script src="https://cdn.jsdelivr.net/npm/page-agent@1.8.0/dist/iife/page-agent.demo.js" crossorigin="true"></script>
+```
+
+A panel appears. Type an instruction. Done.
+
+Bookmarklet form (drop into bookmarks bar, click on any page):
+
+```javascript
+javascript:(function(){var s=document.createElement('script');s.src='https://cdn.jsdelivr.net/npm/page-agent@1.8.0/dist/iife/page-agent.demo.js';document.head.appendChild(s);})();
+```
+
+## Path 2 — npm install into your own web app (production use)
+
+Inside an existing web project (React / Vue / Svelte / plain):
+
+```bash
+npm install page-agent
+```
+
+Wire it up with your own LLM endpoint — **never ship the demo CDN to real users**:
+
+```javascript
+import { PageAgent } from 'page-agent'
+
+const agent = new PageAgent({
+    model: 'qwen3.5-plus',
+    baseURL: 'https://dashscope.aliyuncs.com/compatible-mode/v1',
+    apiKey: process.env.LLM_API_KEY,   // never hardcode
+    language: 'en-US',
+})
+
+// Show the panel for end users:
+agent.panel.show()
+
+// Or drive it programmatically:
+await agent.execute('Click submit button, then fill username as John')
+```
+
+Provider examples (any OpenAI-compatible endpoint works):
+
+| Provider | `baseURL` | `model` |
+|----------|-----------|---------|
+| Qwen / DashScope | `https://dashscope.aliyuncs.com/compatible-mode/v1` | `qwen3.5-plus` |
+| OpenAI | `https://api.openai.com/v1` | `gpt-4o-mini` |
+| Ollama (local) | `http://localhost:11434/v1` | `qwen3:14b` |
+| OpenRouter | `https://openrouter.ai/api/v1` | `anthropic/claude-sonnet-4.6` |
+
+**Key config fields** (passed to `new PageAgent({...})`):
+
+- `model`, `baseURL`, `apiKey` — LLM connection
+- `language` — UI language (`en-US`, `zh-CN`, etc.)
+- Allowlist and data-masking hooks exist for locking down what the agent can touch — see https://alibaba.github.io/page-agent/ for the full option list
+
+**Security.** Don't put your `apiKey` in client-side code for a real deployment — proxy LLM calls through your backend and point `baseURL` at your proxy. The demo CDN exists because alibaba runs that proxy for evaluation.
+
+## Path 3 — clone the source repo (contributing, or hacking on it)
+
+Use this when the user wants to modify page-agent itself, test it against arbitrary sites via a local IIFE bundle, or develop the browser extension.
+
+```bash
+git clone https://github.com/alibaba/page-agent.git
+cd page-agent
+npm ci              # exact lockfile install (or `npm i` to allow updates)
+```
+
+Create `.env` in the repo root with an LLM endpoint. Example:
+
+```
+LLM_MODEL_NAME=gpt-4o-mini
+LLM_API_KEY=sk-...
+LLM_BASE_URL=https://api.openai.com/v1
+```
+
+Ollama flavor:
+
+```
+LLM_BASE_URL=http://localhost:11434/v1
+LLM_API_KEY=NA
+LLM_MODEL_NAME=qwen3:14b
+```
+
+Common commands:
+
+```bash
+npm start           # docs/website dev server
+npm run build       # build every package
+npm run dev:demo    # serve IIFE bundle at http://localhost:5174/page-agent.demo.js
+npm run dev:ext     # develop the browser extension (WXT + React)
+npm run build:ext   # build the extension
+```
+
+**Test on any website** using the local IIFE bundle. Add this bookmarklet:
+
+```javascript
+javascript:(function(){var s=document.createElement('script');s.src=`http://localhost:5174/page-agent.demo.js?t=${Math.random()}`;s.onload=()=>console.log('PageAgent ready!');document.head.appendChild(s);})();
+```
+
+Then: `npm run dev:demo`, click the bookmarklet on any page, and the local build injects. Auto-rebuilds on save.
+
+**Warning:** your `.env` `LLM_API_KEY` is inlined into the IIFE bundle during dev builds. Don't share the bundle. Don't commit it. Don't paste the URL into Slack. (Verified: grepping the public dev bundle returns the literal values from `.env`.)
+
+## Repo layout (Path 3)
+
+Monorepo with npm workspaces. Key packages:
+
+| Package | Path | Purpose |
+|---------|------|---------|
+| `page-agent` | `packages/page-agent/` | Main entry with UI panel |
+| `@page-agent/core` | `packages/core/` | Core agent logic, no UI |
+| `@page-agent/mcp` | `packages/mcp/` | MCP server (beta) |
+| — | `packages/llms/` | LLM client |
+| — | `packages/page-controller/` | DOM ops + visual feedback |
+| — | `packages/ui/` | Panel + i18n |
+| — | `packages/extension/` | Chrome/Firefox extension |
+| — | `packages/website/` | Docs + landing site |
+
+## Verifying it works
+
+After Path 1 or Path 2:
+1. Open the page in a browser with devtools open
+2. You should see a floating panel. If not, check the console for errors (most common: CORS on the LLM endpoint, wrong `baseURL`, or a bad API key)
+3. Type a simple instruction matching something visible on the page ("click the Login link")
+4. Watch the Network tab — you should see a request to your `baseURL`
+
+After Path 3:
+1. `npm run dev:demo` prints `Accepting connections at http://localhost:5174`
+2. `curl -I http://localhost:5174/page-agent.demo.js` returns `HTTP/1.1 200 OK` with `Content-Type: application/javascript`
+3. Click the bookmarklet on any site; panel appears
+
+## Pitfalls
+
+- **Demo CDN in production** — don't. It's rate-limited, uses alibaba's free proxy, and their terms forbid production use.
+- **API key exposure** — any key passed to `new PageAgent({apiKey: ...})` ships in your JS bundle. Always proxy through your own backend for real deployments.
+- **Non-OpenAI-compatible endpoints** fail silently or with cryptic errors. If your provider needs native Anthropic/Gemini formatting, use an OpenAI-compatibility proxy (LiteLLM, OpenRouter) in front.
+- **CSP blocks** — sites with strict Content-Security-Policy may refuse to load the CDN script or disallow inline eval. In that case, self-host from your origin.
+- **Restart dev server** after editing `.env` in Path 3 — Vite only reads env at startup.
+- **Node version** — the repo declares `^22.13.0 || >=24`. Node 20 will fail `npm ci` with engine errors.
+- **npm 10 vs 11** — docs say npm 11+; npm 10.9 actually works fine.
+
+## Reference
+
+- Repo: https://github.com/alibaba/page-agent
+- Docs: https://alibaba.github.io/page-agent/
+- License: MIT (built on browser-use's DOM processing internals, Copyright 2024 Gregor Zunic)
diff --git a/package-lock.json b/package-lock.json
index 9d0ae80cdc..728429e51b 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1069,6 +1069,7 @@
         }
       ],
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "baseline-browser-mapping": "^2.10.12",
         "caniuse-lite": "^1.0.30001782",
@@ -3911,6 +3912,7 @@
       "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.59.1.tgz",
       "integrity": "sha512-C8oWjPR3F81yljW9o5OxcWzfh6avkVwDD2VYdwIGqTkl+OGFISgypqzfu7dOe4QNLL2aqcWBmI3PMtLIK233lw==",
       "license": "Apache-2.0",
+      "peer": true,
       "dependencies": {
         "playwright-core": "1.59.1"
       },
@@ -3929,6 +3931,7 @@
       "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.59.1.tgz",
       "integrity": "sha512-HBV/RJg81z5BiiZ9yPzIiClYV/QMsDCKUyogwH9p3MCP6IYjUFu/MActgYAvK0oWyV9NlwM3GLBjADyWgydVyg==",
       "license": "Apache-2.0",
+      "peer": true,
       "bin": {
         "playwright-core": "cli.js"
       },
diff --git a/plans/gemini-oauth-provider.md b/plans/gemini-oauth-provider.md
index 9953d0eca5..a466183e80 100644
--- a/plans/gemini-oauth-provider.md
+++ b/plans/gemini-oauth-provider.md
@@ -4,7 +4,7 @@
 Add a first-class `gemini` provider that authenticates via Google OAuth, using the standard Gemini API (not Cloud Code Assist). Users who have a Google AI subscription or Gemini API access can authenticate through the browser without needing to manually copy API keys.
 
 ## Architecture Decision
-- **Path A (chosen):** Standard Gemini API at `generativelanguage.googleapis.com/v1beta/openai/`
+- **Path A (chosen):** Standard Gemini API at `generativelanguage.googleapis.com/v1beta`
 - **NOT Path B:** Cloud Code Assist (`cloudcode-pa.googleapis.com`) — rate-limited free tier, internal API, account ban risk
 - Standard `chat_completions` api_mode via OpenAI SDK — no new api_mode needed
 - Our own OAuth credentials — NOT sharing tokens with Gemini CLI
@@ -32,9 +32,9 @@ Add a first-class `gemini` provider that authenticates via Google OAuth, using t
 - File locking for concurrent access (multiple agent sessions)
 
 ## API Integration
-- Base URL: `https://generativelanguage.googleapis.com/v1beta/openai/`
-- Auth: `Authorization: Bearer <access_token>` (passed as `api_key` to OpenAI SDK)
-- api_mode: `chat_completions` (standard)
+- Base URL: `https://generativelanguage.googleapis.com/v1beta`
+- Auth: native Gemini API authentication handled by the provider adapter
+- api_mode: `chat_completions` (standard facade over native transport)
 - Models: gemini-2.5-pro, gemini-2.5-flash, gemini-2.0-flash, etc.
 
 ## Files to Create/Modify
diff --git a/plugins/disk-cleanup/README.md b/plugins/disk-cleanup/README.md
new file mode 100644
index 0000000000..bc46047325
--- /dev/null
+++ b/plugins/disk-cleanup/README.md
@@ -0,0 +1,51 @@
+# disk-cleanup
+
+Auto-tracks and cleans up ephemeral files created during Hermes Agent
+sessions — test scripts, temp outputs, cron logs, stale chrome profiles.
+Scoped strictly to `$HERMES_HOME` and `/tmp/hermes-*`.
+
+Originally contributed by [@LVT382009](https://github.com/LVT382009) as a
+skill in PR #12212.  Ported to the plugin system so the behaviour runs
+automatically via `post_tool_call` and `on_session_end` hooks — the agent
+never needs to remember to call a tool.
+
+## How it works
+
+| Hook | Behaviour |
+|---|---|
+| `post_tool_call` | When `write_file` / `terminal` / `patch` creates a file matching `test_*`, `tmp_*`, or `*.test.*` inside `HERMES_HOME`, track it silently as `test` / `temp` / `cron-output`. |
+| `on_session_end` | If any test files were auto-tracked during this turn, run `quick` cleanup (no prompts). |
+
+Deletion rules (same as the original PR):
+
+| Category | Threshold | Confirmation |
+|---|---|---|
+| `test` | every session end | Never |
+| `temp` | >7 days since tracked | Never |
+| `cron-output` | >14 days since tracked | Never |
+| empty dirs under HERMES_HOME | always | Never |
+| `research` | >30 days, beyond 10 newest | Always (deep only) |
+| `chrome-profile` | >14 days since tracked | Always (deep only) |
+| files >500 MB | never auto | Always (deep only) |
+
+## Slash command
+
+```
+/disk-cleanup status                     # breakdown + top-10 largest
+/disk-cleanup dry-run                    # preview without deleting
+/disk-cleanup quick                      # run safe cleanup now
+/disk-cleanup deep                       # quick + list items needing prompt
+/disk-cleanup track <path> <category>    # manual tracking
+/disk-cleanup forget <path>              # stop tracking
+```
+
+## Safety
+
+- `is_safe_path()` rejects anything outside `HERMES_HOME` or `/tmp/hermes-*`
+- Windows mounts (`/mnt/c` etc.) are rejected
+- The state directory `$HERMES_HOME/disk-cleanup/` is itself excluded
+- `$HERMES_HOME/logs/`, `memories/`, `sessions/`, `skills/`, `plugins/`,
+  and config files are never tracked
+- Backup/restore is scoped to `tracked.json` — the plugin never touches
+  agent logs
+- Atomic writes: `.tmp` → backup → rename
diff --git a/plugins/disk-cleanup/__init__.py b/plugins/disk-cleanup/__init__.py
new file mode 100644
index 0000000000..0a4b6c7ae1
--- /dev/null
+++ b/plugins/disk-cleanup/__init__.py
@@ -0,0 +1,316 @@
+"""disk-cleanup plugin — auto-cleanup of ephemeral Hermes session files.
+
+Wires three behaviours:
+
+1. ``post_tool_call`` hook — inspects ``write_file`` and ``terminal``
+   tool results for newly-created paths matching test/temp patterns
+   under ``HERMES_HOME`` and tracks them silently.  Zero agent
+   compliance required.
+
+2. ``on_session_end`` hook — when any test files were auto-tracked
+   during the just-finished turn, runs :func:`disk_cleanup.quick` and
+   logs a single line to ``$HERMES_HOME/disk-cleanup/cleanup.log``.
+
+3. ``/disk-cleanup`` slash command — manual ``status``, ``dry-run``,
+   ``quick``, ``deep``, ``track``, ``forget``.
+
+Replaces PR #12212's skill-plus-script design: the agent no longer
+needs to remember to run commands.
+"""
+
+from __future__ import annotations
+
+import logging
+import re
+import shlex
+import threading
+from pathlib import Path
+from typing import Any, Dict, Optional, Set
+
+from . import disk_cleanup as dg
+
+logger = logging.getLogger(__name__)
+
+
+# Per-task set of "test files newly tracked this turn".  Keyed by task_id
+# (or session_id as fallback) so on_session_end can decide whether to run
+# cleanup.  Guarded by a lock — post_tool_call can fire concurrently on
+# parallel tool calls.
+_recent_test_tracks: Dict[str, Set[str]] = {}
+_lock = threading.Lock()
+
+
+# Tool-call result shapes we can parse
+_WRITE_FILE_PATH_KEY = "path"
+_TERMINAL_PATH_REGEX = re.compile(r"(?:^|\s)(/[^\s'\"`]+|\~/[^\s'\"`]+)")
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _tracker_key(task_id: str, session_id: str) -> str:
+    return task_id or session_id or "default"
+
+
+def _record_track(task_id: str, session_id: str, path: Path, category: str) -> None:
+    """Record that we tracked *path* as *category* during this turn."""
+    if category != "test":
+        return
+    key = _tracker_key(task_id, session_id)
+    with _lock:
+        _recent_test_tracks.setdefault(key, set()).add(str(path))
+
+
+def _drain(task_id: str, session_id: str) -> Set[str]:
+    """Pop the set of test paths tracked during this turn."""
+    key = _tracker_key(task_id, session_id)
+    with _lock:
+        return _recent_test_tracks.pop(key, set())
+
+
+def _attempt_track(path_str: str, task_id: str, session_id: str) -> None:
+    """Best-effort auto-track. Never raises."""
+    try:
+        p = Path(path_str).expanduser()
+    except Exception:
+        return
+    if not p.exists():
+        return
+    category = dg.guess_category(p)
+    if category is None:
+        return
+    newly = dg.track(str(p), category, silent=True)
+    if newly:
+        _record_track(task_id, session_id, p, category)
+
+
+def _extract_paths_from_write_file(args: Dict[str, Any]) -> Set[str]:
+    path = args.get(_WRITE_FILE_PATH_KEY)
+    return {path} if isinstance(path, str) and path else set()
+
+
+def _extract_paths_from_patch(args: Dict[str, Any]) -> Set[str]:
+    # The patch tool creates new files via the `mode="patch"` path too, but
+    # most of its use is editing existing files — we only care about new
+    # ephemeral creations, so treat patch conservatively and only pick up
+    # the single-file `path` arg.  Track-then-cleanup is idempotent, so
+    # re-tracking an already-tracked file is a no-op (dedup in track()).
+    path = args.get("path")
+    return {path} if isinstance(path, str) and path else set()
+
+
+def _extract_paths_from_terminal(args: Dict[str, Any], result: str) -> Set[str]:
+    """Best-effort: pull candidate filesystem paths from a terminal command
+    and its output, then let ``guess_category`` / ``is_safe_path`` filter.
+    """
+    paths: Set[str] = set()
+    cmd = args.get("command") or ""
+    if isinstance(cmd, str) and cmd:
+        # Tokenise the command — catches `touch /tmp/hermes-x/test_foo.py`
+        try:
+            for tok in shlex.split(cmd, posix=True):
+                if tok.startswith(("/", "~")):
+                    paths.add(tok)
+        except ValueError:
+            pass
+    # Only scan the result text if it's a reasonable size (avoid 50KB dumps).
+    if isinstance(result, str) and len(result) < 4096:
+        for match in _TERMINAL_PATH_REGEX.findall(result):
+            paths.add(match)
+    return paths
+
+
+# ---------------------------------------------------------------------------
+# Hooks
+# ---------------------------------------------------------------------------
+
+def _on_post_tool_call(
+    tool_name: str = "",
+    args: Optional[Dict[str, Any]] = None,
+    result: Any = None,
+    task_id: str = "",
+    session_id: str = "",
+    tool_call_id: str = "",
+    **_: Any,
+) -> None:
+    """Auto-track ephemeral files created by recent tool calls."""
+    if not isinstance(args, dict):
+        return
+
+    candidates: Set[str] = set()
+    if tool_name == "write_file":
+        candidates = _extract_paths_from_write_file(args)
+    elif tool_name == "patch":
+        candidates = _extract_paths_from_patch(args)
+    elif tool_name == "terminal":
+        candidates = _extract_paths_from_terminal(args, result if isinstance(result, str) else "")
+    else:
+        return
+
+    for path_str in candidates:
+        _attempt_track(path_str, task_id, session_id)
+
+
+def _on_session_end(
+    session_id: str = "",
+    completed: bool = True,
+    interrupted: bool = False,
+    **_: Any,
+) -> None:
+    """Run quick cleanup if any test files were tracked during this turn."""
+    # Drain both task-level and session-level buckets.  In practice only one
+    # is populated per turn; the other is empty.
+    drained_session = _drain("", session_id)
+    # Also drain any task-scoped buckets that happen to exist.  This is a
+    # cheap sweep: if an agent spawned subagents (each with their own
+    # task_id) they'll have recorded into separate buckets; we want to
+    # cleanup them all at session end.
+    with _lock:
+        task_buckets = list(_recent_test_tracks.keys())
+    for key in task_buckets:
+        if key and key != session_id:
+            _recent_test_tracks.pop(key, None)
+
+    if not drained_session and not task_buckets:
+        return
+
+    try:
+        summary = dg.quick()
+    except Exception as exc:
+        logger.debug("disk-cleanup quick cleanup failed: %s", exc)
+        return
+
+    if summary["deleted"] or summary["empty_dirs"]:
+        dg._log(
+            f"AUTO_QUICK (session_end): deleted={summary['deleted']} "
+            f"dirs={summary['empty_dirs']} freed={dg.fmt_size(summary['freed'])}"
+        )
+
+
+# ---------------------------------------------------------------------------
+# Slash command
+# ---------------------------------------------------------------------------
+
+_HELP_TEXT = """\
+/disk-cleanup — ephemeral-file cleanup
+
+Subcommands:
+  status                     Per-category breakdown + top-10 largest
+  dry-run                    Preview what quick/deep would delete
+  quick                      Run safe cleanup now (no prompts)
+  deep                       Run quick, then list items that need prompts
+  track <path> <category>    Manually add a path to tracking
+  forget <path>              Stop tracking a path (does not delete)
+
+Categories: temp | test | research | download | chrome-profile | cron-output | other
+
+All operations are scoped to HERMES_HOME and /tmp/hermes-*.
+Test files are auto-tracked on write_file / terminal and auto-cleaned at session end.
+"""
+
+
+def _fmt_summary(summary: Dict[str, Any]) -> str:
+    base = (
+        f"[disk-cleanup] Cleaned {summary['deleted']} files + "
+        f"{summary['empty_dirs']} empty dirs, freed {dg.fmt_size(summary['freed'])}."
+    )
+    if summary.get("errors"):
+        base += f"\n  {len(summary['errors'])} error(s); see cleanup.log."
+    return base
+
+
+def _handle_slash(raw_args: str) -> Optional[str]:
+    argv = raw_args.strip().split()
+    if not argv or argv[0] in ("help", "-h", "--help"):
+        return _HELP_TEXT
+
+    sub = argv[0]
+
+    if sub == "status":
+        return dg.format_status(dg.status())
+
+    if sub == "dry-run":
+        auto, prompt = dg.dry_run()
+        auto_size = sum(i["size"] for i in auto)
+        prompt_size = sum(i["size"] for i in prompt)
+        lines = [
+            "Dry-run preview (nothing deleted):",
+            f"  Auto-delete : {len(auto)} files ({dg.fmt_size(auto_size)})",
+        ]
+        for item in auto:
+            lines.append(f"    [{item['category']}] {item['path']}")
+        lines.append(
+            f"  Needs prompt: {len(prompt)} files ({dg.fmt_size(prompt_size)})"
+        )
+        for item in prompt:
+            lines.append(f"    [{item['category']}] {item['path']}")
+        lines.append(
+            f"\n  Total potential: {dg.fmt_size(auto_size + prompt_size)}"
+        )
+        return "\n".join(lines)
+
+    if sub == "quick":
+        return _fmt_summary(dg.quick())
+
+    if sub == "deep":
+        # In-session deep can't prompt the user interactively — show what
+        # quick cleaned plus the items that WOULD need confirmation.
+        quick_summary = dg.quick()
+        _auto, prompt_items = dg.dry_run()
+        lines = [_fmt_summary(quick_summary)]
+        if prompt_items:
+            size = sum(i["size"] for i in prompt_items)
+            lines.append(
+                f"\n{len(prompt_items)} item(s) need confirmation "
+                f"({dg.fmt_size(size)}):"
+            )
+            for item in prompt_items:
+                lines.append(f"  [{item['category']}] {item['path']}")
+            lines.append(
+                "\nRun `/disk-cleanup forget <path>` to skip, or delete "
+                "manually via terminal."
+            )
+        return "\n".join(lines)
+
+    if sub == "track":
+        if len(argv) < 3:
+            return "Usage: /disk-cleanup track <path> <category>"
+        path_arg = argv[1]
+        category = argv[2]
+        if category not in dg.ALLOWED_CATEGORIES:
+            return (
+                f"Unknown category '{category}'. "
+                f"Allowed: {sorted(dg.ALLOWED_CATEGORIES)}"
+            )
+        if dg.track(path_arg, category, silent=True):
+            return f"Tracked {path_arg} as '{category}'."
+        return (
+            f"Not tracked (already present, missing, or outside HERMES_HOME): "
+            f"{path_arg}"
+        )
+
+    if sub == "forget":
+        if len(argv) < 2:
+            return "Usage: /disk-cleanup forget <path>"
+        n = dg.forget(argv[1])
+        return (
+            f"Removed {n} tracking entr{'y' if n == 1 else 'ies'} for {argv[1]}."
+            if n else f"Not found in tracking: {argv[1]}"
+        )
+
+    return f"Unknown subcommand: {sub}\n\n{_HELP_TEXT}"
+
+
+# ---------------------------------------------------------------------------
+# Plugin registration
+# ---------------------------------------------------------------------------
+
+def register(ctx) -> None:
+    ctx.register_hook("post_tool_call", _on_post_tool_call)
+    ctx.register_hook("on_session_end", _on_session_end)
+    ctx.register_command(
+        "disk-cleanup",
+        handler=_handle_slash,
+        description="Track and clean up ephemeral Hermes session files.",
+    )
diff --git a/plugins/disk-cleanup/disk_cleanup.py b/plugins/disk-cleanup/disk_cleanup.py
new file mode 100755
index 0000000000..cef2698316
--- /dev/null
+++ b/plugins/disk-cleanup/disk_cleanup.py
@@ -0,0 +1,496 @@
+"""disk_cleanup — ephemeral file cleanup for Hermes Agent.
+
+Library module wrapping the deterministic cleanup rules written by
+@LVT382009 in PR #12212. The plugin ``__init__.py`` wires these
+functions into ``post_tool_call`` and ``on_session_end`` hooks so
+tracking and cleanup happen automatically — the agent never needs to
+call a tool or remember a skill.
+
+Rules:
+  - test files    → delete immediately at task end (age >= 0)
+  - temp files    → delete after 7 days
+  - cron-output   → delete after 14 days
+  - empty dirs    → always delete (under HERMES_HOME)
+  - research      → keep 10 newest, prompt for older (deep only)
+  - chrome-profile→ prompt after 14 days (deep only)
+  - >500 MB files → prompt always (deep only)
+
+Scope: strictly HERMES_HOME and /tmp/hermes-*
+Never touches: ~/.hermes/logs/ or any system directory.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import shutil
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+
+try:
+    from hermes_constants import get_hermes_home
+except Exception:  # pragma: no cover — plugin may load before constants resolves
+    import os
+
+    def get_hermes_home() -> Path:  # type: ignore[no-redef]
+        val = (os.environ.get("HERMES_HOME") or "").strip()
+        return Path(val).resolve() if val else (Path.home() / ".hermes").resolve()
+
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Paths
+# ---------------------------------------------------------------------------
+
+def get_state_dir() -> Path:
+    """State dir — separate from ``$HERMES_HOME/logs/``."""
+    return get_hermes_home() / "disk-cleanup"
+
+
+def get_tracked_file() -> Path:
+    return get_state_dir() / "tracked.json"
+
+
+def get_log_file() -> Path:
+    """Audit log — intentionally NOT under ``$HERMES_HOME/logs/``."""
+    return get_state_dir() / "cleanup.log"
+
+
+# ---------------------------------------------------------------------------
+# Path safety
+# ---------------------------------------------------------------------------
+
+def is_safe_path(path: Path) -> bool:
+    """Accept only paths under HERMES_HOME or ``/tmp/hermes-*``.
+
+    Rejects Windows mounts (``/mnt/c`` etc.) and any system directory.
+    """
+    hermes_home = get_hermes_home()
+    try:
+        path.resolve().relative_to(hermes_home)
+        return True
+    except (ValueError, OSError):
+        pass
+    # Allow /tmp/hermes-* explicitly
+    parts = path.parts
+    if len(parts) >= 3 and parts[1] == "tmp" and parts[2].startswith("hermes-"):
+        return True
+    return False
+
+
+# ---------------------------------------------------------------------------
+# Audit log
+# ---------------------------------------------------------------------------
+
+def _log(message: str) -> None:
+    try:
+        log_file = get_log_file()
+        log_file.parent.mkdir(parents=True, exist_ok=True)
+        ts = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
+        with open(log_file, "a") as f:
+            f.write(f"[{ts}] {message}\n")
+    except OSError:
+        # Never let the audit log break the agent loop.
+        pass
+
+
+# ---------------------------------------------------------------------------
+# tracked.json — atomic read/write, backup scoped to tracked.json only
+# ---------------------------------------------------------------------------
+
+def load_tracked() -> List[Dict[str, Any]]:
+    """Load tracked.json.  Restores from ``.bak`` on corruption."""
+    tf = get_tracked_file()
+    tf.parent.mkdir(parents=True, exist_ok=True)
+
+    if not tf.exists():
+        return []
+
+    try:
+        return json.loads(tf.read_text())
+    except (json.JSONDecodeError, ValueError):
+        bak = tf.with_suffix(".json.bak")
+        if bak.exists():
+            try:
+                data = json.loads(bak.read_text())
+                _log("WARN: tracked.json corrupted — restored from .bak")
+                return data
+            except Exception:
+                pass
+        _log("WARN: tracked.json corrupted, no backup — starting fresh")
+        return []
+
+
+def save_tracked(tracked: List[Dict[str, Any]]) -> None:
+    """Atomic write: ``.tmp`` → backup old → rename."""
+    tf = get_tracked_file()
+    tf.parent.mkdir(parents=True, exist_ok=True)
+    tmp = tf.with_suffix(".json.tmp")
+    tmp.write_text(json.dumps(tracked, indent=2))
+    if tf.exists():
+        shutil.copy2(tf, tf.with_suffix(".json.bak"))
+    tmp.replace(tf)
+
+
+# ---------------------------------------------------------------------------
+# Categories
+# ---------------------------------------------------------------------------
+
+ALLOWED_CATEGORIES = {
+    "temp", "test", "research", "download",
+    "chrome-profile", "cron-output", "other",
+}
+
+
+def fmt_size(n: float) -> str:
+    for unit in ("B", "KB", "MB", "GB", "TB"):
+        if n < 1024:
+            return f"{n:.1f} {unit}"
+        n /= 1024
+    return f"{n:.1f} PB"
+
+
+# ---------------------------------------------------------------------------
+# Track / forget
+# ---------------------------------------------------------------------------
+
+def track(path_str: str, category: str, silent: bool = False) -> bool:
+    """Register a file for tracking. Returns True if newly tracked."""
+    if category not in ALLOWED_CATEGORIES:
+        _log(f"WARN: unknown category '{category}', using 'other'")
+        category = "other"
+
+    path = Path(path_str).resolve()
+
+    if not path.exists():
+        _log(f"SKIP: {path} (does not exist)")
+        return False
+
+    if not is_safe_path(path):
+        _log(f"REJECT: {path} (outside HERMES_HOME)")
+        return False
+
+    size = path.stat().st_size if path.is_file() else 0
+    tracked = load_tracked()
+
+    # Deduplicate
+    if any(item["path"] == str(path) for item in tracked):
+        return False
+
+    tracked.append({
+        "path": str(path),
+        "timestamp": datetime.now(timezone.utc).isoformat(),
+        "category": category,
+        "size": size,
+    })
+    save_tracked(tracked)
+    _log(f"TRACKED: {path} ({category}, {fmt_size(size)})")
+    if not silent:
+        print(f"Tracked: {path} ({category}, {fmt_size(size)})")
+    return True
+
+
+def forget(path_str: str) -> int:
+    """Remove a path from tracking without deleting the file."""
+    p = Path(path_str).resolve()
+    tracked = load_tracked()
+    before = len(tracked)
+    tracked = [i for i in tracked if Path(i["path"]).resolve() != p]
+    removed = before - len(tracked)
+    if removed:
+        save_tracked(tracked)
+        _log(f"FORGOT: {p} ({removed} entries)")
+    return removed
+
+
+# ---------------------------------------------------------------------------
+# Dry run
+# ---------------------------------------------------------------------------
+
+def dry_run() -> Tuple[List[Dict], List[Dict]]:
+    """Return (auto_delete_list, needs_prompt_list) without touching files."""
+    tracked = load_tracked()
+    now = datetime.now(timezone.utc)
+
+    auto: List[Dict] = []
+    prompt: List[Dict] = []
+
+    for item in tracked:
+        p = Path(item["path"])
+        if not p.exists():
+            continue
+        age = (now - datetime.fromisoformat(item["timestamp"])).days
+        cat = item["category"]
+        size = item["size"]
+
+        if cat == "test":
+            auto.append(item)
+        elif cat == "temp" and age > 7:
+            auto.append(item)
+        elif cat == "cron-output" and age > 14:
+            auto.append(item)
+        elif cat == "research" and age > 30:
+            prompt.append(item)
+        elif cat == "chrome-profile" and age > 14:
+            prompt.append(item)
+        elif size > 500 * 1024 * 1024:
+            prompt.append(item)
+
+    return auto, prompt
+
+
+# ---------------------------------------------------------------------------
+# Quick cleanup
+# ---------------------------------------------------------------------------
+
+def quick() -> Dict[str, Any]:
+    """Safe deterministic cleanup — no prompts.
+
+    Returns: ``{"deleted": N, "empty_dirs": N, "freed": bytes,
+               "errors": [str, ...]}``.
+    """
+    tracked = load_tracked()
+    now = datetime.now(timezone.utc)
+    deleted = 0
+    freed = 0
+    new_tracked: List[Dict] = []
+    errors: List[str] = []
+
+    for item in tracked:
+        p = Path(item["path"])
+        cat = item["category"]
+
+        if not p.exists():
+            _log(f"STALE: {p} (removed from tracking)")
+            continue
+
+        age = (now - datetime.fromisoformat(item["timestamp"])).days
+
+        should_delete = (
+            cat == "test"
+            or (cat == "temp" and age > 7)
+            or (cat == "cron-output" and age > 14)
+        )
+
+        if should_delete:
+            try:
+                if p.is_file():
+                    p.unlink()
+                elif p.is_dir():
+                    shutil.rmtree(p)
+                freed += item["size"]
+                deleted += 1
+                _log(f"DELETED: {p} ({cat}, {fmt_size(item['size'])})")
+            except OSError as e:
+                _log(f"ERROR deleting {p}: {e}")
+                errors.append(f"{p}: {e}")
+                new_tracked.append(item)
+        else:
+            new_tracked.append(item)
+
+    # Remove empty dirs under HERMES_HOME (but leave HERMES_HOME itself and
+    # a short list of well-known top-level state dirs alone — a fresh install
+    # has these empty, and deleting them would surprise the user).
+    hermes_home = get_hermes_home()
+    _PROTECTED_TOP_LEVEL = {
+        "logs", "memories", "sessions", "cron", "cronjobs",
+        "cache", "skills", "plugins", "disk-cleanup", "optional-skills",
+        "hermes-agent", "backups", "profiles", ".worktrees",
+    }
+    empty_removed = 0
+    try:
+        for dirpath in sorted(hermes_home.rglob("*"), reverse=True):
+            if not dirpath.is_dir() or dirpath == hermes_home:
+                continue
+            try:
+                rel_parts = dirpath.relative_to(hermes_home).parts
+            except ValueError:
+                continue
+            # Skip the well-known top-level state dirs themselves.
+            if len(rel_parts) == 1 and rel_parts[0] in _PROTECTED_TOP_LEVEL:
+                continue
+            try:
+                if not any(dirpath.iterdir()):
+                    dirpath.rmdir()
+                    empty_removed += 1
+                    _log(f"DELETED: {dirpath} (empty dir)")
+            except OSError:
+                pass
+    except OSError:
+        pass
+
+    save_tracked(new_tracked)
+    _log(
+        f"QUICK_SUMMARY: {deleted} files, {empty_removed} dirs, "
+        f"{fmt_size(freed)}"
+    )
+    return {
+        "deleted": deleted,
+        "empty_dirs": empty_removed,
+        "freed": freed,
+        "errors": errors,
+    }
+
+
+# ---------------------------------------------------------------------------
+# Deep cleanup (interactive — not called from plugin hooks)
+# ---------------------------------------------------------------------------
+
+def deep(
+    confirm: Optional[callable] = None,
+) -> Dict[str, Any]:
+    """Deep cleanup.
+
+    Runs :func:`quick` first, then asks the *confirm* callable for each
+    risky item (research > 30d beyond 10 newest, chrome-profile > 14d,
+    any file > 500 MB).  *confirm(item)* must return True to delete.
+
+    Returns: ``{"quick": {...}, "deep_deleted": N, "deep_freed": bytes}``.
+    """
+    quick_result = quick()
+
+    if confirm is None:
+        # No interactive confirmer — deep stops after the quick pass.
+        return {"quick": quick_result, "deep_deleted": 0, "deep_freed": 0}
+
+    tracked = load_tracked()
+    now = datetime.now(timezone.utc)
+    research, chrome, large = [], [], []
+
+    for item in tracked:
+        p = Path(item["path"])
+        if not p.exists():
+            continue
+        age = (now - datetime.fromisoformat(item["timestamp"])).days
+        cat = item["category"]
+
+        if cat == "research" and age > 30:
+            research.append(item)
+        elif cat == "chrome-profile" and age > 14:
+            chrome.append(item)
+        elif item["size"] > 500 * 1024 * 1024:
+            large.append(item)
+
+    research.sort(key=lambda x: x["timestamp"], reverse=True)
+    old_research = research[10:]
+
+    freed, count = 0, 0
+    to_remove: List[Dict] = []
+
+    for group in (old_research, chrome, large):
+        for item in group:
+            if confirm(item):
+                try:
+                    p = Path(item["path"])
+                    if p.is_file():
+                        p.unlink()
+                    elif p.is_dir():
+                        shutil.rmtree(p)
+                    to_remove.append(item)
+                    freed += item["size"]
+                    count += 1
+                    _log(
+                        f"DELETED: {p} ({item['category']}, "
+                        f"{fmt_size(item['size'])})"
+                    )
+                except OSError as e:
+                    _log(f"ERROR deleting {item['path']}: {e}")
+
+    if to_remove:
+        remove_paths = {i["path"] for i in to_remove}
+        save_tracked([i for i in tracked if i["path"] not in remove_paths])
+
+    return {"quick": quick_result, "deep_deleted": count, "deep_freed": freed}
+
+
+# ---------------------------------------------------------------------------
+# Status
+# ---------------------------------------------------------------------------
+
+def status() -> Dict[str, Any]:
+    """Return per-category breakdown and top 10 largest tracked files."""
+    tracked = load_tracked()
+    cats: Dict[str, Dict] = {}
+    for item in tracked:
+        c = item["category"]
+        cats.setdefault(c, {"count": 0, "size": 0})
+        cats[c]["count"] += 1
+        cats[c]["size"] += item["size"]
+
+    existing = [
+        (i["path"], i["size"], i["category"])
+        for i in tracked if Path(i["path"]).exists()
+    ]
+    existing.sort(key=lambda x: x[1], reverse=True)
+
+    return {
+        "categories": cats,
+        "top10": existing[:10],
+        "total_tracked": len(tracked),
+    }
+
+
+def format_status(s: Dict[str, Any]) -> str:
+    """Human-readable status string (for slash command output)."""
+    lines = [f"{'Category':<20} {'Files':>6}  {'Size':>10}", "-" * 40]
+    cats = s["categories"]
+    for cat, d in sorted(cats.items(), key=lambda x: x[1]["size"], reverse=True):
+        lines.append(f"{cat:<20} {d['count']:>6}  {fmt_size(d['size']):>10}")
+
+    if not cats:
+        lines.append("(nothing tracked yet)")
+
+    lines.append("")
+    lines.append("Top 10 largest tracked files:")
+    if not s["top10"]:
+        lines.append("  (none)")
+    else:
+        for rank, (path, size, cat) in enumerate(s["top10"], 1):
+            lines.append(f"  {rank:>2}. {fmt_size(size):>8}  [{cat}]  {path}")
+    return "\n".join(lines)
+
+
+# ---------------------------------------------------------------------------
+# Auto-categorisation from tool-call inspection
+# ---------------------------------------------------------------------------
+
+_TEST_PATTERNS = ("test_", "tmp_")
+_TEST_SUFFIXES = (".test.py", ".test.js", ".test.ts", ".test.md")
+
+
+def guess_category(path: Path) -> Optional[str]:
+    """Return a category label for *path*, or None if we shouldn't track it.
+
+    Used by the ``post_tool_call`` hook to auto-track ephemeral files.
+    """
+    if not is_safe_path(path):
+        return None
+
+    # Skip the state dir itself, logs, memory files, sessions, config.
+    hermes_home = get_hermes_home()
+    try:
+        rel = path.resolve().relative_to(hermes_home)
+        top = rel.parts[0] if rel.parts else ""
+        if top in {
+            "disk-cleanup", "logs", "memories", "sessions", "config.yaml",
+            "skills", "plugins", ".env", "USER.md", "MEMORY.md", "SOUL.md",
+            "auth.json", "hermes-agent",
+        }:
+            return None
+        if top == "cron" or top == "cronjobs":
+            return "cron-output"
+        if top == "cache":
+            return "temp"
+    except ValueError:
+        # Path isn't under HERMES_HOME (e.g. /tmp/hermes-*) — fall through.
+        pass
+
+    name = path.name
+    if name.startswith(_TEST_PATTERNS):
+        return "test"
+    if any(name.endswith(sfx) for sfx in _TEST_SUFFIXES):
+        return "test"
+    return None
diff --git a/plugins/disk-cleanup/plugin.yaml b/plugins/disk-cleanup/plugin.yaml
new file mode 100644
index 0000000000..fe005c8849
--- /dev/null
+++ b/plugins/disk-cleanup/plugin.yaml
@@ -0,0 +1,7 @@
+name: disk-cleanup
+version: 2.0.0
+description: "Auto-track and clean up ephemeral files (test scripts, temp outputs, cron logs) created during Hermes sessions. Runs via plugin hooks — no agent action required."
+author: "@LVT382009 (original), NousResearch (plugin port)"
+hooks:
+  - post_tool_call
+  - on_session_end
diff --git a/plugins/image_gen/openai/__init__.py b/plugins/image_gen/openai/__init__.py
new file mode 100644
index 0000000000..c1a719f910
--- /dev/null
+++ b/plugins/image_gen/openai/__init__.py
@@ -0,0 +1,303 @@
+"""OpenAI image generation backend.
+
+Exposes OpenAI's ``gpt-image-2`` model at three quality tiers as an
+:class:`ImageGenProvider` implementation. The tiers are implemented as
+three virtual model IDs so the ``hermes tools`` model picker and the
+``image_gen.model`` config key behave like any other multi-model backend:
+
+    gpt-image-2-low     ~15s   fastest, good for iteration
+    gpt-image-2-medium  ~40s   default — balanced
+    gpt-image-2-high    ~2min  slowest, highest fidelity
+
+All three hit the same underlying API model (``gpt-image-2``) with a
+different ``quality`` parameter. Output is base64 JSON → saved under
+``$HERMES_HOME/cache/images/``.
+
+Selection precedence (first hit wins):
+
+1. ``OPENAI_IMAGE_MODEL`` env var (escape hatch for scripts / tests)
+2. ``image_gen.openai.model`` in ``config.yaml``
+3. ``image_gen.model`` in ``config.yaml`` (when it's one of our tier IDs)
+4. :data:`DEFAULT_MODEL` — ``gpt-image-2-medium``
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+from typing import Any, Dict, List, Optional, Tuple
+
+from agent.image_gen_provider import (
+    DEFAULT_ASPECT_RATIO,
+    ImageGenProvider,
+    error_response,
+    resolve_aspect_ratio,
+    save_b64_image,
+    success_response,
+)
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Model catalog
+# ---------------------------------------------------------------------------
+#
+# All three IDs resolve to the same underlying API model with a different
+# ``quality`` setting. ``api_model`` is what gets sent to OpenAI;
+# ``quality`` is the knob that changes generation time and output fidelity.
+
+API_MODEL = "gpt-image-2"
+
+_MODELS: Dict[str, Dict[str, Any]] = {
+    "gpt-image-2-low": {
+        "display": "GPT Image 2 (Low)",
+        "speed": "~15s",
+        "strengths": "Fast iteration, lowest cost",
+        "quality": "low",
+    },
+    "gpt-image-2-medium": {
+        "display": "GPT Image 2 (Medium)",
+        "speed": "~40s",
+        "strengths": "Balanced — default",
+        "quality": "medium",
+    },
+    "gpt-image-2-high": {
+        "display": "GPT Image 2 (High)",
+        "speed": "~2min",
+        "strengths": "Highest fidelity, strongest prompt adherence",
+        "quality": "high",
+    },
+}
+
+DEFAULT_MODEL = "gpt-image-2-medium"
+
+_SIZES = {
+    "landscape": "1536x1024",
+    "square": "1024x1024",
+    "portrait": "1024x1536",
+}
+
+
+def _load_openai_config() -> Dict[str, Any]:
+    """Read ``image_gen`` from config.yaml (returns {} on any failure)."""
+    try:
+        from hermes_cli.config import load_config
+
+        cfg = load_config()
+        section = cfg.get("image_gen") if isinstance(cfg, dict) else None
+        return section if isinstance(section, dict) else {}
+    except Exception as exc:
+        logger.debug("Could not load image_gen config: %s", exc)
+        return {}
+
+
+def _resolve_model() -> Tuple[str, Dict[str, Any]]:
+    """Decide which tier to use and return ``(model_id, meta)``."""
+    env_override = os.environ.get("OPENAI_IMAGE_MODEL")
+    if env_override and env_override in _MODELS:
+        return env_override, _MODELS[env_override]
+
+    cfg = _load_openai_config()
+    openai_cfg = cfg.get("openai") if isinstance(cfg.get("openai"), dict) else {}
+    candidate: Optional[str] = None
+    if isinstance(openai_cfg, dict):
+        value = openai_cfg.get("model")
+        if isinstance(value, str) and value in _MODELS:
+            candidate = value
+    if candidate is None:
+        top = cfg.get("model")
+        if isinstance(top, str) and top in _MODELS:
+            candidate = top
+
+    if candidate is not None:
+        return candidate, _MODELS[candidate]
+
+    return DEFAULT_MODEL, _MODELS[DEFAULT_MODEL]
+
+
+# ---------------------------------------------------------------------------
+# Provider
+# ---------------------------------------------------------------------------
+
+
+class OpenAIImageGenProvider(ImageGenProvider):
+    """OpenAI ``images.generate`` backend — gpt-image-2 at low/medium/high."""
+
+    @property
+    def name(self) -> str:
+        return "openai"
+
+    @property
+    def display_name(self) -> str:
+        return "OpenAI"
+
+    def is_available(self) -> bool:
+        if not os.environ.get("OPENAI_API_KEY"):
+            return False
+        try:
+            import openai  # noqa: F401
+        except ImportError:
+            return False
+        return True
+
+    def list_models(self) -> List[Dict[str, Any]]:
+        return [
+            {
+                "id": model_id,
+                "display": meta["display"],
+                "speed": meta["speed"],
+                "strengths": meta["strengths"],
+                "price": "varies",
+            }
+            for model_id, meta in _MODELS.items()
+        ]
+
+    def default_model(self) -> Optional[str]:
+        return DEFAULT_MODEL
+
+    def get_setup_schema(self) -> Dict[str, Any]:
+        return {
+            "name": "OpenAI",
+            "badge": "paid",
+            "tag": "gpt-image-2 at low/medium/high quality tiers",
+            "env_vars": [
+                {
+                    "key": "OPENAI_API_KEY",
+                    "prompt": "OpenAI API key",
+                    "url": "https://platform.openai.com/api-keys",
+                },
+            ],
+        }
+
+    def generate(
+        self,
+        prompt: str,
+        aspect_ratio: str = DEFAULT_ASPECT_RATIO,
+        **kwargs: Any,
+    ) -> Dict[str, Any]:
+        prompt = (prompt or "").strip()
+        aspect = resolve_aspect_ratio(aspect_ratio)
+
+        if not prompt:
+            return error_response(
+                error="Prompt is required and must be a non-empty string",
+                error_type="invalid_argument",
+                provider="openai",
+                aspect_ratio=aspect,
+            )
+
+        if not os.environ.get("OPENAI_API_KEY"):
+            return error_response(
+                error=(
+                    "OPENAI_API_KEY not set. Run `hermes tools` → Image "
+                    "Generation → OpenAI to configure, or `hermes setup` "
+                    "to add the key."
+                ),
+                error_type="auth_required",
+                provider="openai",
+                aspect_ratio=aspect,
+            )
+
+        try:
+            import openai
+        except ImportError:
+            return error_response(
+                error="openai Python package not installed (pip install openai)",
+                error_type="missing_dependency",
+                provider="openai",
+                aspect_ratio=aspect,
+            )
+
+        tier_id, meta = _resolve_model()
+        size = _SIZES.get(aspect, _SIZES["square"])
+
+        # gpt-image-2 returns b64_json unconditionally and REJECTS
+        # ``response_format`` as an unknown parameter. Don't send it.
+        payload: Dict[str, Any] = {
+            "model": API_MODEL,
+            "prompt": prompt,
+            "size": size,
+            "n": 1,
+            "quality": meta["quality"],
+        }
+
+        try:
+            client = openai.OpenAI()
+            response = client.images.generate(**payload)
+        except Exception as exc:
+            logger.debug("OpenAI image generation failed", exc_info=True)
+            return error_response(
+                error=f"OpenAI image generation failed: {exc}",
+                error_type="api_error",
+                provider="openai",
+                model=tier_id,
+                prompt=prompt,
+                aspect_ratio=aspect,
+            )
+
+        data = getattr(response, "data", None) or []
+        if not data:
+            return error_response(
+                error="OpenAI returned no image data",
+                error_type="empty_response",
+                provider="openai",
+                model=tier_id,
+                prompt=prompt,
+                aspect_ratio=aspect,
+            )
+
+        first = data[0]
+        b64 = getattr(first, "b64_json", None)
+        url = getattr(first, "url", None)
+        revised_prompt = getattr(first, "revised_prompt", None)
+
+        if b64:
+            try:
+                saved_path = save_b64_image(b64, prefix=f"openai_{tier_id}")
+            except Exception as exc:
+                return error_response(
+                    error=f"Could not save image to cache: {exc}",
+                    error_type="io_error",
+                    provider="openai",
+                    model=tier_id,
+                    prompt=prompt,
+                    aspect_ratio=aspect,
+                )
+            image_ref = str(saved_path)
+        elif url:
+            # Defensive — gpt-image-2 returns b64 today, but fall back
+            # gracefully if the API ever changes.
+            image_ref = url
+        else:
+            return error_response(
+                error="OpenAI response contained neither b64_json nor URL",
+                error_type="empty_response",
+                provider="openai",
+                model=tier_id,
+                prompt=prompt,
+                aspect_ratio=aspect,
+            )
+
+        extra: Dict[str, Any] = {"size": size, "quality": meta["quality"]}
+        if revised_prompt:
+            extra["revised_prompt"] = revised_prompt
+
+        return success_response(
+            image=image_ref,
+            model=tier_id,
+            prompt=prompt,
+            aspect_ratio=aspect,
+            provider="openai",
+            extra=extra,
+        )
+
+
+# ---------------------------------------------------------------------------
+# Plugin entry point
+# ---------------------------------------------------------------------------
+
+
+def register(ctx) -> None:
+    """Plugin entry point — wire ``OpenAIImageGenProvider`` into the registry."""
+    ctx.register_image_gen_provider(OpenAIImageGenProvider())
diff --git a/plugins/image_gen/openai/plugin.yaml b/plugins/image_gen/openai/plugin.yaml
new file mode 100644
index 0000000000..18e4d86390
--- /dev/null
+++ b/plugins/image_gen/openai/plugin.yaml
@@ -0,0 +1,7 @@
+name: openai
+version: 1.0.0
+description: "OpenAI image generation backend (gpt-image-2). Saves generated images to $HERMES_HOME/cache/images/."
+author: NousResearch
+kind: backend
+requires_env:
+  - OPENAI_API_KEY
diff --git a/plugins/memory/hindsight/README.md b/plugins/memory/hindsight/README.md
index 024a993031..3fbdc2aba4 100644
--- a/plugins/memory/hindsight/README.md
+++ b/plugins/memory/hindsight/README.md
@@ -84,7 +84,10 @@ Config file: `~/.hermes/hindsight/config.json`
 | `retain_async` | `true` | Process retain asynchronously on the Hindsight server |
 | `retain_every_n_turns` | `1` | Retain every N turns (1 = every turn) |
 | `retain_context` | `conversation between Hermes Agent and the User` | Context label for retained memories |
-| `tags` | — | Tags applied when storing memories |
+| `retain_tags` | — | Default tags applied to retained memories; merged with per-call tool tags |
+| `retain_source` | — | Optional `metadata.source` attached to retained memories |
+| `retain_user_prefix` | `User` | Label used before user turns in auto-retained transcripts |
+| `retain_assistant_prefix` | `Assistant` | Label used before assistant turns in auto-retained transcripts |
 
 ### Integration
 
@@ -113,7 +116,7 @@ Available in `hybrid` and `tools` memory modes:
 
 | Tool | Description |
 |------|-------------|
-| `hindsight_retain` | Store information with auto entity extraction |
+| `hindsight_retain` | Store information with auto entity extraction; supports optional per-call `tags` |
 | `hindsight_recall` | Multi-strategy search (semantic + entity graph) |
 | `hindsight_reflect` | Cross-memory synthesis (LLM-powered) |
 
diff --git a/plugins/memory/hindsight/__init__.py b/plugins/memory/hindsight/__init__.py
index c39679b73c..2b233e265c 100644
--- a/plugins/memory/hindsight/__init__.py
+++ b/plugins/memory/hindsight/__init__.py
@@ -6,11 +6,15 @@ retrieval. Supports cloud (API key) and local modes.
 Original PR #1811 by benfrank241, adapted to MemoryProvider ABC.
 
 Config via environment variables:
-  HINDSIGHT_API_KEY   — API key for Hindsight Cloud
-  HINDSIGHT_BANK_ID   — memory bank identifier (default: hermes)
-  HINDSIGHT_BUDGET    — recall budget: low/mid/high (default: mid)
-  HINDSIGHT_API_URL   — API endpoint
-  HINDSIGHT_MODE      — cloud or local (default: cloud)
+  HINDSIGHT_API_KEY                — API key for Hindsight Cloud
+  HINDSIGHT_BANK_ID                — memory bank identifier (default: hermes)
+  HINDSIGHT_BUDGET                 — recall budget: low/mid/high (default: mid)
+  HINDSIGHT_API_URL                — API endpoint
+  HINDSIGHT_MODE                   — cloud or local (default: cloud)
+  HINDSIGHT_RETAIN_TAGS            — comma-separated tags attached to retained memories
+  HINDSIGHT_RETAIN_SOURCE          — metadata source value attached to retained memories
+  HINDSIGHT_RETAIN_USER_PREFIX     — label used before user turns in retained transcripts
+  HINDSIGHT_RETAIN_ASSISTANT_PREFIX — label used before assistant turns in retained transcripts
 
 Or via $HERMES_HOME/hindsight/config.json (profile-scoped), falling back to
 ~/.hindsight/config.json (legacy, shared) for backward compatibility.
@@ -24,7 +28,7 @@ import logging
 import os
 import threading
 
-from hermes_constants import get_hermes_home
+from datetime import datetime, timezone
 from typing import Any, Dict, List
 
 from agent.memory_provider import MemoryProvider
@@ -99,6 +103,11 @@ RETAIN_SCHEMA = {
         "properties": {
             "content": {"type": "string", "description": "The information to store."},
             "context": {"type": "string", "description": "Short label (e.g. 'user preference', 'project decision')."},
+            "tags": {
+                "type": "array",
+                "items": {"type": "string"},
+                "description": "Optional per-call tags to merge with configured default retain tags.",
+            },
         },
         "required": ["content"],
     },
@@ -168,6 +177,10 @@ def _load_config() -> dict:
     return {
         "mode": os.environ.get("HINDSIGHT_MODE", "cloud"),
         "apiKey": os.environ.get("HINDSIGHT_API_KEY", ""),
+        "retain_tags": os.environ.get("HINDSIGHT_RETAIN_TAGS", ""),
+        "retain_source": os.environ.get("HINDSIGHT_RETAIN_SOURCE", ""),
+        "retain_user_prefix": os.environ.get("HINDSIGHT_RETAIN_USER_PREFIX", "User"),
+        "retain_assistant_prefix": os.environ.get("HINDSIGHT_RETAIN_ASSISTANT_PREFIX", "Assistant"),
         "banks": {
             "hermes": {
                 "bankId": os.environ.get("HINDSIGHT_BANK_ID", "hermes"),
@@ -178,6 +191,48 @@ def _load_config() -> dict:
     }
 
 
+def _normalize_retain_tags(value: Any) -> List[str]:
+    """Normalize tag config/tool values to a deduplicated list of strings."""
+    if value is None:
+        return []
+
+    raw_items: list[Any]
+    if isinstance(value, list):
+        raw_items = value
+    elif isinstance(value, str):
+        text = value.strip()
+        if not text:
+            return []
+        if text.startswith("["):
+            try:
+                parsed = json.loads(text)
+            except Exception:
+                parsed = None
+            if isinstance(parsed, list):
+                raw_items = parsed
+            else:
+                raw_items = text.split(",")
+        else:
+            raw_items = text.split(",")
+    else:
+        raw_items = [value]
+
+    normalized = []
+    seen = set()
+    for item in raw_items:
+        tag = str(item).strip()
+        if not tag or tag in seen:
+            continue
+        seen.add(tag)
+        normalized.append(tag)
+    return normalized
+
+
+def _utc_timestamp() -> str:
+    """Return current UTC timestamp in ISO-8601 with milliseconds and Z suffix."""
+    return datetime.now(timezone.utc).isoformat(timespec="milliseconds").replace("+00:00", "Z")
+
+
 # ---------------------------------------------------------------------------
 # MemoryProvider implementation
 # ---------------------------------------------------------------------------
@@ -195,6 +250,19 @@ class HindsightMemoryProvider(MemoryProvider):
         self._llm_base_url = ""
         self._memory_mode = "hybrid"  # "context", "tools", or "hybrid"
         self._prefetch_method = "recall"  # "recall" or "reflect"
+        self._retain_tags: List[str] = []
+        self._retain_source = ""
+        self._retain_user_prefix = "User"
+        self._retain_assistant_prefix = "Assistant"
+        self._platform = ""
+        self._user_id = ""
+        self._user_name = ""
+        self._chat_id = ""
+        self._chat_name = ""
+        self._chat_type = ""
+        self._thread_id = ""
+        self._agent_identity = ""
+        self._turn_index = 0
         self._client = None
         self._prefetch_result = ""
         self._prefetch_lock = threading.Lock()
@@ -210,6 +278,7 @@ class HindsightMemoryProvider(MemoryProvider):
         # Retain controls
         self._auto_retain = True
         self._retain_every_n_turns = 1
+        self._retain_async = True
         self._retain_context = "conversation between Hermes Agent and the User"
         self._turn_counter = 0
         self._session_turns: list[str] = []  # accumulates ALL turns for the session
@@ -224,7 +293,6 @@ class HindsightMemoryProvider(MemoryProvider):
         # Bank
         self._bank_mission = ""
         self._bank_retain_mission: str | None = None
-        self._retain_async = True
 
     @property
     def name(self) -> str:
@@ -423,7 +491,10 @@ class HindsightMemoryProvider(MemoryProvider):
             {"key": "recall_budget", "description": "Recall thoroughness", "default": "mid", "choices": ["low", "mid", "high"]},
             {"key": "memory_mode", "description": "Memory integration mode", "default": "hybrid", "choices": ["hybrid", "context", "tools"]},
             {"key": "recall_prefetch_method", "description": "Auto-recall method", "default": "recall", "choices": ["recall", "reflect"]},
-            {"key": "tags", "description": "Tags applied when storing memories (comma-separated)", "default": ""},
+            {"key": "retain_tags", "description": "Default tags applied to retained memories (comma-separated)", "default": ""},
+            {"key": "retain_source", "description": "Metadata source value attached to retained memories", "default": ""},
+            {"key": "retain_user_prefix", "description": "Label used before user turns in retained transcripts", "default": "User"},
+            {"key": "retain_assistant_prefix", "description": "Label used before assistant turns in retained transcripts", "default": "Assistant"},
             {"key": "recall_tags", "description": "Tags to filter when searching memories (comma-separated)", "default": ""},
             {"key": "recall_tags_match", "description": "Tag matching mode for recall", "default": "any", "choices": ["any", "all", "any_strict", "all_strict"]},
             {"key": "auto_recall", "description": "Automatically recall memories before each turn", "default": True},
@@ -467,7 +538,7 @@ class HindsightMemoryProvider(MemoryProvider):
         return self._client
 
     def initialize(self, session_id: str, **kwargs) -> None:
-        self._session_id = session_id
+        self._session_id = str(session_id or "").strip()
 
         # Check client version and auto-upgrade if needed
         try:
@@ -496,6 +567,16 @@ class HindsightMemoryProvider(MemoryProvider):
             pass  # packaging not available or other issue — proceed anyway
 
         self._config = _load_config()
+        self._platform = str(kwargs.get("platform") or "").strip()
+        self._user_id = str(kwargs.get("user_id") or "").strip()
+        self._user_name = str(kwargs.get("user_name") or "").strip()
+        self._chat_id = str(kwargs.get("chat_id") or "").strip()
+        self._chat_name = str(kwargs.get("chat_name") or "").strip()
+        self._chat_type = str(kwargs.get("chat_type") or "").strip()
+        self._thread_id = str(kwargs.get("thread_id") or "").strip()
+        self._agent_identity = str(kwargs.get("agent_identity") or "").strip()
+        self._turn_index = 0
+        self._session_turns = []
         self._mode = self._config.get("mode", "cloud")
         # "local" is a legacy alias for "local_embedded"
         if self._mode == "local":
@@ -513,7 +594,7 @@ class HindsightMemoryProvider(MemoryProvider):
         memory_mode = self._config.get("memory_mode", "hybrid")
         self._memory_mode = memory_mode if memory_mode in ("context", "tools", "hybrid") else "hybrid"
 
-        prefetch_method = self._config.get("recall_prefetch_method", "recall")
+        prefetch_method = self._config.get("recall_prefetch_method") or self._config.get("prefetch_method", "recall")
         self._prefetch_method = prefetch_method if prefetch_method in ("recall", "reflect") else "recall"
 
         # Bank options
@@ -521,9 +602,22 @@ class HindsightMemoryProvider(MemoryProvider):
         self._bank_retain_mission = self._config.get("bank_retain_mission") or None
 
         # Tags
-        self._tags = self._config.get("tags") or None
+        self._retain_tags = _normalize_retain_tags(
+            self._config.get("retain_tags")
+            or os.environ.get("HINDSIGHT_RETAIN_TAGS", "")
+        )
+        self._tags = self._retain_tags or None
         self._recall_tags = self._config.get("recall_tags") or None
         self._recall_tags_match = self._config.get("recall_tags_match", "any")
+        self._retain_source = str(
+            self._config.get("retain_source") or os.environ.get("HINDSIGHT_RETAIN_SOURCE", "")
+        ).strip()
+        self._retain_user_prefix = str(
+            self._config.get("retain_user_prefix") or os.environ.get("HINDSIGHT_RETAIN_USER_PREFIX", "User")
+        ).strip() or "User"
+        self._retain_assistant_prefix = str(
+            self._config.get("retain_assistant_prefix") or os.environ.get("HINDSIGHT_RETAIN_ASSISTANT_PREFIX", "Assistant")
+        ).strip() or "Assistant"
 
         # Retain controls
         self._auto_retain = self._config.get("auto_retain", True)
@@ -547,11 +641,9 @@ class HindsightMemoryProvider(MemoryProvider):
         logger.info("Hindsight initialized: mode=%s, api_url=%s, bank=%s, budget=%s, memory_mode=%s, prefetch_method=%s, client=%s",
                      self._mode, self._api_url, self._bank_id, self._budget, self._memory_mode, self._prefetch_method, _client_version)
         logger.debug("Hindsight config: auto_retain=%s, auto_recall=%s, retain_every_n=%d, "
-                     "retain_async=%s, retain_context=%s, "
-                     "recall_max_tokens=%d, recall_max_input_chars=%d, tags=%s, recall_tags=%s",
+                     "retain_async=%s, retain_context=%s, recall_max_tokens=%d, recall_max_input_chars=%d, tags=%s, recall_tags=%s",
                      self._auto_retain, self._auto_recall, self._retain_every_n_turns,
-                     self._retain_async, self._retain_context,
-                     self._recall_max_tokens, self._recall_max_input_chars,
+                     self._retain_async, self._retain_context, self._recall_max_tokens, self._recall_max_input_chars,
                      self._tags, self._recall_tags)
 
         # For local mode, start the embedded daemon in the background so it
@@ -712,6 +804,78 @@ class HindsightMemoryProvider(MemoryProvider):
         self._prefetch_thread = threading.Thread(target=_run, daemon=True, name="hindsight-prefetch")
         self._prefetch_thread.start()
 
+    def _build_turn_messages(self, user_content: str, assistant_content: str) -> List[Dict[str, str]]:
+        now = datetime.now(timezone.utc).isoformat()
+        return [
+            {
+                "role": "user",
+                "content": f"{self._retain_user_prefix}: {user_content}",
+                "timestamp": now,
+            },
+            {
+                "role": "assistant",
+                "content": f"{self._retain_assistant_prefix}: {assistant_content}",
+                "timestamp": now,
+            },
+        ]
+
+    def _build_metadata(self, *, message_count: int, turn_index: int) -> Dict[str, str]:
+        metadata: Dict[str, str] = {
+            "retained_at": _utc_timestamp(),
+            "message_count": str(message_count),
+            "turn_index": str(turn_index),
+        }
+        if self._retain_source:
+            metadata["source"] = self._retain_source
+        if self._session_id:
+            metadata["session_id"] = self._session_id
+        if self._platform:
+            metadata["platform"] = self._platform
+        if self._user_id:
+            metadata["user_id"] = self._user_id
+        if self._user_name:
+            metadata["user_name"] = self._user_name
+        if self._chat_id:
+            metadata["chat_id"] = self._chat_id
+        if self._chat_name:
+            metadata["chat_name"] = self._chat_name
+        if self._chat_type:
+            metadata["chat_type"] = self._chat_type
+        if self._thread_id:
+            metadata["thread_id"] = self._thread_id
+        if self._agent_identity:
+            metadata["agent_identity"] = self._agent_identity
+        return metadata
+
+    def _build_retain_kwargs(
+        self,
+        content: str,
+        *,
+        context: str | None = None,
+        document_id: str | None = None,
+        metadata: Dict[str, str] | None = None,
+        tags: List[str] | None = None,
+        retain_async: bool | None = None,
+    ) -> Dict[str, Any]:
+        kwargs: Dict[str, Any] = {
+            "bank_id": self._bank_id,
+            "content": content,
+            "metadata": metadata or self._build_metadata(message_count=1, turn_index=self._turn_index),
+        }
+        if context is not None:
+            kwargs["context"] = context
+        if document_id:
+            kwargs["document_id"] = document_id
+        if retain_async is not None:
+            kwargs["retain_async"] = retain_async
+        merged_tags = _normalize_retain_tags(self._retain_tags)
+        for tag in _normalize_retain_tags(tags):
+            if tag not in merged_tags:
+                merged_tags.append(tag)
+        if merged_tags:
+            kwargs["tags"] = merged_tags
+        return kwargs
+
     def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
         """Retain conversation turn in background (non-blocking).
 
@@ -721,19 +885,14 @@ class HindsightMemoryProvider(MemoryProvider):
             logger.debug("sync_turn: skipped (auto_retain disabled)")
             return
 
-        from datetime import datetime, timezone
-        now = datetime.now(timezone.utc).isoformat()
+        if session_id:
+            self._session_id = str(session_id).strip()
 
-        messages = [
-            {"role": "user", "content": user_content, "timestamp": now},
-            {"role": "assistant", "content": assistant_content, "timestamp": now},
-        ]
-
-        turn = json.dumps(messages)
+        turn = json.dumps(self._build_turn_messages(user_content, assistant_content))
         self._session_turns.append(turn)
         self._turn_counter += 1
+        self._turn_index = self._turn_counter
 
-        # Only retain every N turns
         if self._turn_counter % self._retain_every_n_turns != 0:
             logger.debug("sync_turn: buffered turn %d (will retain at turn %d)",
                          self._turn_counter, self._turn_counter + (self._retain_every_n_turns - self._turn_counter % self._retain_every_n_turns))
@@ -741,19 +900,21 @@ class HindsightMemoryProvider(MemoryProvider):
 
         logger.debug("sync_turn: retaining %d turns, total session content %d chars",
                      len(self._session_turns), sum(len(t) for t in self._session_turns))
-        # Send the ENTIRE session as a single JSON array (document_id deduplicates).
-        # Each element in _session_turns is a JSON string of that turn's messages.
         content = "[" + ",".join(self._session_turns) + "]"
 
         def _sync():
             try:
                 client = self._get_client()
-                item: dict = {
-                    "content": content,
-                    "context": self._retain_context,
-                }
-                if self._tags:
-                    item["tags"] = self._tags
+                item = self._build_retain_kwargs(
+                    content,
+                    context=self._retain_context,
+                    metadata=self._build_metadata(
+                        message_count=len(self._session_turns) * 2,
+                        turn_index=self._turn_index,
+                    ),
+                )
+                item.pop("bank_id", None)
+                item.pop("retain_async", None)
                 logger.debug("Hindsight retain: bank=%s, doc=%s, async=%s, content_len=%d, num_turns=%d",
                              self._bank_id, self._session_id, self._retain_async, len(content), len(self._session_turns))
                 _run_sync(client.aretain_batch(
@@ -789,11 +950,11 @@ class HindsightMemoryProvider(MemoryProvider):
                 return tool_error("Missing required parameter: content")
             context = args.get("context")
             try:
-                retain_kwargs: dict = {
-                    "bank_id": self._bank_id, "content": content, "context": context,
-                }
-                if self._tags:
-                    retain_kwargs["tags"] = self._tags
+                retain_kwargs = self._build_retain_kwargs(
+                    content,
+                    context=context,
+                    tags=args.get("tags"),
+                )
                 logger.debug("Tool hindsight_retain: bank=%s, content_len=%d, context=%s",
                              self._bank_id, len(content), context)
                 _run_sync(client.aretain(**retain_kwargs))
diff --git a/plugins/memory/honcho/__init__.py b/plugins/memory/honcho/__init__.py
index ca44ce6019..6ca32c1dcb 100644
--- a/plugins/memory/honcho/__init__.py
+++ b/plugins/memory/honcho/__init__.py
@@ -19,6 +19,7 @@ import json
 import logging
 import re
 import threading
+import time
 from typing import Any, Dict, List, Optional
 
 from agent.memory_provider import MemoryProvider
@@ -206,13 +207,19 @@ class HonchoMemoryProvider(MemoryProvider):
         self._turn_count = 0
         self._injection_frequency = "every-turn"  # or "first-turn"
         self._context_cadence = 1   # minimum turns between context API calls
-        self._dialectic_cadence = 3  # minimum turns between dialectic API calls
+        self._dialectic_cadence = 1  # backwards-compat fallback; wizard writes 2 on new configs
         self._dialectic_depth = 1   # how many .chat() calls per dialectic cycle (1-3)
         self._dialectic_depth_levels: list[str] | None = None  # per-pass reasoning levels
-        self._reasoning_level_cap: Optional[str] = None  # "minimal", "low", "medium", "high"
+        self._reasoning_heuristic: bool = True  # scale base level by query length
+        self._reasoning_level_cap: str = "high"  # ceiling for auto-selected level
         self._last_context_turn = -999
         self._last_dialectic_turn = -999
 
+        # Liveness + observability state
+        self._prefetch_thread_started_at: float = 0.0   # monotonic ts of current thread
+        self._prefetch_result_fired_at: int = -999      # turn the pending result was fired at
+        self._dialectic_empty_streak: int = 0           # consecutive empty returns
+
         # Port #1957: lazy session init for tools-only mode
         self._session_initialized = False
         self._lazy_init_kwargs: Optional[dict] = None
@@ -286,14 +293,6 @@ class HonchoMemoryProvider(MemoryProvider):
                 logger.debug("Honcho not configured — plugin inactive")
                 return
 
-            # Override peer_name with gateway user_id for per-user memory scoping.
-            # Only when no explicit peerName was configured — an explicit peerName
-            # means the user chose their identity; a raw user_id (e.g. Telegram
-            # chat ID) should not silently replace it.
-            _gw_user_id = kwargs.get("user_id")
-            if _gw_user_id and not cfg.peer_name:
-                cfg.peer_name = _gw_user_id
-
             self._config = cfg
 
             # ----- B1: recall_mode from config -----
@@ -305,12 +304,16 @@ class HonchoMemoryProvider(MemoryProvider):
                 raw = cfg.raw or {}
                 self._injection_frequency = raw.get("injectionFrequency", "every-turn")
                 self._context_cadence = int(raw.get("contextCadence", 1))
-                self._dialectic_cadence = int(raw.get("dialecticCadence", 3))
+                # Backwards-compat: unset dialecticCadence falls back to 1
+                # (every turn) so existing honcho.json configs without the key
+                # behave as they did before. New setups via `hermes honcho setup`
+                # get dialecticCadence=2 written explicitly by the wizard.
+                self._dialectic_cadence = int(raw.get("dialecticCadence", 1))
                 self._dialectic_depth = max(1, min(cfg.dialectic_depth, 3))
                 self._dialectic_depth_levels = cfg.dialectic_depth_levels
-                cap = raw.get("reasoningLevelCap")
-                if cap and cap in ("minimal", "low", "medium", "high"):
-                    self._reasoning_level_cap = cap
+                self._reasoning_heuristic = cfg.reasoning_heuristic
+                if cfg.reasoning_level_cap in self._LEVEL_ORDER:
+                    self._reasoning_level_cap = cfg.reasoning_level_cap
             except Exception as e:
                 logger.debug("Honcho cost-awareness config parse error: %s", e)
 
@@ -352,6 +355,7 @@ class HonchoMemoryProvider(MemoryProvider):
             honcho=client,
             config=cfg,
             context_tokens=cfg.context_tokens,
+            runtime_user_peer_name=kwargs.get("user_id") or None,
         )
 
         # ----- B3: resolve_session_name -----
@@ -391,14 +395,45 @@ class HonchoMemoryProvider(MemoryProvider):
         except Exception as e:
             logger.debug("Honcho memory file migration skipped: %s", e)
 
-        # ----- B7: Pre-warming context at init -----
+        # ----- B7: Pre-warming at init -----
+        # Context prewarm warms peer.context() (base layer), consumed via
+        # pop_context_result() in prefetch(). Dialectic prewarm runs the
+        # full configured depth and writes into _prefetch_result so turn 1
+        # consumes the result directly.
         if self._recall_mode in ("context", "hybrid"):
             try:
                 self._manager.prefetch_context(self._session_key)
-                self._manager.prefetch_dialectic(self._session_key, "What should I know about this user?")
-                logger.debug("Honcho pre-warm threads started for session: %s", self._session_key)
             except Exception as e:
-                logger.debug("Honcho pre-warm failed: %s", e)
+                logger.debug("Honcho context prewarm failed: %s", e)
+
+            _prewarm_query = (
+                "Summarize what you know about this user. "
+                "Focus on preferences, current projects, and working style."
+            )
+
+            def _prewarm_dialectic() -> None:
+                try:
+                    r = self._run_dialectic_depth(_prewarm_query)
+                except Exception as exc:
+                    logger.debug("Honcho dialectic prewarm failed: %s", exc)
+                    self._dialectic_empty_streak += 1
+                    return
+                if r and r.strip():
+                    with self._prefetch_lock:
+                        self._prefetch_result = r
+                        self._prefetch_result_fired_at = 0
+                    # Treat prewarm as turn 0 so cadence gating starts clean.
+                    self._last_dialectic_turn = 0
+                    self._dialectic_empty_streak = 0
+                else:
+                    self._dialectic_empty_streak += 1
+
+            self._prefetch_thread_started_at = time.monotonic()
+            self._prefetch_thread = threading.Thread(
+                target=_prewarm_dialectic, daemon=True, name="honcho-prewarm-dialectic"
+            )
+            self._prefetch_thread.start()
+            logger.debug("Honcho pre-warm started for session: %s", self._session_key)
 
     def _ensure_session(self) -> bool:
         """Lazily initialize the Honcho session (for tools-only mode).
@@ -487,7 +522,8 @@ class HonchoMemoryProvider(MemoryProvider):
                 "# Honcho Memory\n"
                 "Active (tools-only mode). Use honcho_profile for a quick factual snapshot, "
                 "honcho_search for raw excerpts, honcho_context for raw peer context, "
-                "honcho_reasoning for synthesized answers, "
+                "honcho_reasoning for synthesized answers (pass reasoning_level "
+                "minimal/low/medium/high/max — you pick the depth per call), "
                 "honcho_conclude to save facts about the user. "
                 "No automatic context injection — you must use tools to access memory."
             )
@@ -497,7 +533,8 @@ class HonchoMemoryProvider(MemoryProvider):
                 "Active (hybrid mode). Relevant context is auto-injected AND memory tools are available. "
                 "Use honcho_profile for a quick factual snapshot, "
                 "honcho_search for raw excerpts, honcho_context for raw peer context, "
-                "honcho_reasoning for synthesized answers, "
+                "honcho_reasoning for synthesized answers (pass reasoning_level "
+                "minimal/low/medium/high/max — you pick the depth per call), "
                 "honcho_conclude to save facts about the user."
             )
 
@@ -526,6 +563,10 @@ class HonchoMemoryProvider(MemoryProvider):
         if self._injection_frequency == "first-turn" and self._turn_count > 1:
             return ""
 
+        # Trivial prompts ("ok", "yes", slash commands) carry no semantic signal.
+        if self._is_trivial_prompt(query):
+            return ""
+
         parts = []
 
         # ----- Layer 1: Base context (representation + card) -----
@@ -560,43 +601,72 @@ class HonchoMemoryProvider(MemoryProvider):
         # On the very first turn, no queue_prefetch() has run yet so the
         # dialectic result is empty.  Run with a bounded timeout so a slow
         # Honcho connection doesn't block the first response indefinitely.
-        # On timeout the result is skipped and queue_prefetch() will pick it
-        # up at the next cadence-allowed turn.
+        # On timeout we let the thread keep running and write its result into
+        # _prefetch_result under the lock, so the next turn picks it up.
+        #
+        # Skip if the session-start prewarm already filled _prefetch_result —
+        # firing another .chat() would be duplicate work.
+        with self._prefetch_lock:
+            _prewarm_landed = bool(self._prefetch_result)
+        if _prewarm_landed and self._last_dialectic_turn == -999:
+            self._last_dialectic_turn = self._turn_count
+
         if self._last_dialectic_turn == -999 and query:
             _first_turn_timeout = (
                 self._config.timeout if self._config and self._config.timeout else 8.0
             )
-            _result_holder: list[str] = []
+            _fired_at = self._turn_count
 
             def _run_first_turn() -> None:
                 try:
-                    _result_holder.append(self._run_dialectic_depth(query))
+                    r = self._run_dialectic_depth(query)
                 except Exception as exc:
                     logger.debug("Honcho first-turn dialectic failed: %s", exc)
-
-            _t = threading.Thread(target=_run_first_turn, daemon=True)
-            _t.start()
-            _t.join(timeout=_first_turn_timeout)
-            if not _t.is_alive():
-                first_turn_dialectic = _result_holder[0] if _result_holder else ""
-                if first_turn_dialectic and first_turn_dialectic.strip():
+                    self._dialectic_empty_streak += 1
+                    return
+                if r and r.strip():
                     with self._prefetch_lock:
-                        self._prefetch_result = first_turn_dialectic
-                self._last_dialectic_turn = self._turn_count
-            else:
+                        self._prefetch_result = r
+                        self._prefetch_result_fired_at = _fired_at
+                    # Advance cadence only on a non-empty result so the next
+                    # turn retries when the call returned nothing.
+                    self._last_dialectic_turn = _fired_at
+                    self._dialectic_empty_streak = 0
+                else:
+                    self._dialectic_empty_streak += 1
+
+            self._prefetch_thread_started_at = time.monotonic()
+            self._prefetch_thread = threading.Thread(
+                target=_run_first_turn, daemon=True, name="honcho-prefetch-first"
+            )
+            self._prefetch_thread.start()
+            self._prefetch_thread.join(timeout=_first_turn_timeout)
+            if self._prefetch_thread.is_alive():
                 logger.debug(
-                    "Honcho first-turn dialectic timed out (%.1fs) — "
-                    "will inject at next cadence-allowed turn",
+                    "Honcho first-turn dialectic still running after %.1fs — "
+                    "will surface on next turn",
                     _first_turn_timeout,
                 )
-                # Don't update _last_dialectic_turn: queue_prefetch() will
-                # retry at the next cadence-allowed turn via the async path.
 
         if self._prefetch_thread and self._prefetch_thread.is_alive():
             self._prefetch_thread.join(timeout=3.0)
         with self._prefetch_lock:
             dialectic_result = self._prefetch_result
+            fired_at = self._prefetch_result_fired_at
             self._prefetch_result = ""
+            self._prefetch_result_fired_at = -999
+
+        # Discard stale pending results: if the fire happened more than
+        # cadence × multiplier turns ago (e.g. a run of trivial-prompt turns
+        # passed without consumption), the content likely no longer tracks
+        # the current conversational pivot.
+        stale_limit = self._dialectic_cadence * self._STALE_RESULT_MULTIPLIER
+        if dialectic_result and fired_at >= 0 and (self._turn_count - fired_at) > stale_limit:
+            logger.debug(
+                "Honcho pending dialectic discarded as stale: fired_at=%d, "
+                "turn=%d, limit=%d", fired_at, self._turn_count, stale_limit,
+            )
+            dialectic_result = ""
 
         if dialectic_result and dialectic_result.strip():
             parts.append(dialectic_result)
@@ -641,6 +711,10 @@ class HonchoMemoryProvider(MemoryProvider):
         if self._recall_mode == "tools":
             return
 
+        # Trivial prompts don't warrant either a context refresh or a dialectic call.
+        if self._is_trivial_prompt(query):
+            return
+
         # ----- Context refresh (base layer) — independent cadence -----
         if self._context_cadence <= 1 or (self._turn_count - self._last_context_turn) >= self._context_cadence:
             self._last_context_turn = self._turn_count
@@ -650,24 +724,46 @@ class HonchoMemoryProvider(MemoryProvider):
                 logger.debug("Honcho context prefetch failed: %s", e)
 
         # ----- Dialectic prefetch (supplement layer) -----
-        # B5: cadence check — skip if too soon since last dialectic call
-        if self._dialectic_cadence > 1:
-            if (self._turn_count - self._last_dialectic_turn) < self._dialectic_cadence:
-                logger.debug("Honcho dialectic prefetch skipped: cadence %d, turns since last: %d",
-                             self._dialectic_cadence, self._turn_count - self._last_dialectic_turn)
-                return
+        # Thread-alive guard with stale-thread recovery: a hung Honcho call
+        # older than timeout × multiplier is treated as dead so it can't
+        # block subsequent fires.
+        if self._thread_is_live():
+            logger.debug("Honcho dialectic prefetch skipped: prior thread still running")
+            return
 
-        self._last_dialectic_turn = self._turn_count
+        # Cadence gate, widened by the empty-streak backoff so a persistently
+        # silent backend doesn't retry every turn forever.
+        effective = self._effective_cadence()
+        if (self._turn_count - self._last_dialectic_turn) < effective:
+            logger.debug(
+                "Honcho dialectic prefetch skipped: effective cadence %d "
+                "(base %d, empty streak %d), turns since last: %d",
+                effective, self._dialectic_cadence, self._dialectic_empty_streak,
+                self._turn_count - self._last_dialectic_turn,
+            )
+            return
+
+        # Cadence advances only on a non-empty result so empty returns
+        # (transient API error, sparse representation) retry next turn.
+        _fired_at = self._turn_count
 
         def _run():
             try:
                 result = self._run_dialectic_depth(query)
-                if result and result.strip():
-                    with self._prefetch_lock:
-                        self._prefetch_result = result
             except Exception as e:
                 logger.debug("Honcho prefetch failed: %s", e)
+                self._dialectic_empty_streak += 1
+                return
+            if result and result.strip():
+                with self._prefetch_lock:
+                    self._prefetch_result = result
+                    self._prefetch_result_fired_at = _fired_at
+                self._last_dialectic_turn = _fired_at
+                self._dialectic_empty_streak = 0
+            else:
+                self._dialectic_empty_streak += 1
 
+        self._prefetch_thread_started_at = time.monotonic()
         self._prefetch_thread = threading.Thread(
             target=_run, daemon=True, name="honcho-prefetch"
         )
@@ -692,11 +788,91 @@ class HonchoMemoryProvider(MemoryProvider):
 
     _LEVEL_ORDER = ("minimal", "low", "medium", "high", "max")
 
-    def _resolve_pass_level(self, pass_idx: int) -> str:
+    # Char-count thresholds for the query-length reasoning heuristic.
+    _HEURISTIC_LENGTH_MEDIUM = 120
+    _HEURISTIC_LENGTH_HIGH = 400
+
+    # Liveness constants. A thread older than timeout × multiplier is treated
+    # as dead so a hung Honcho call can't block future retries indefinitely.
+    _STALE_THREAD_MULTIPLIER = 2.0
+    # Pending result whose fire-turn is older than cadence × multiplier is
+    # discarded on read so we don't inject context for a stale conversational
+    # pivot after a gap of trivial-prompt turns.
+    _STALE_RESULT_MULTIPLIER = 2
+    # Cap on the empty-streak backoff so a persistently silent backend
+    # eventually settles on a ceiling instead of unbounded widening.
+    _BACKOFF_MAX = 8
+
+    def _thread_is_live(self) -> bool:
+        """Thread-alive guard that treats threads older than the stale
+        threshold as dead, so a hung Honcho request can't block new fires."""
+        if not self._prefetch_thread or not self._prefetch_thread.is_alive():
+            return False
+        timeout = (self._config.timeout if self._config and self._config.timeout else 8.0)
+        age = time.monotonic() - self._prefetch_thread_started_at
+        if age > timeout * self._STALE_THREAD_MULTIPLIER:
+            logger.debug(
+                "Honcho prefetch thread age %.1fs exceeds stale threshold "
+                "%.1fs — treating as dead", age, timeout * self._STALE_THREAD_MULTIPLIER,
+            )
+            return False
+        return True
+
+    def _effective_cadence(self) -> int:
+        """Cadence plus empty-streak backoff, capped at _BACKOFF_MAX × base."""
+        if self._dialectic_empty_streak <= 0:
+            return self._dialectic_cadence
+        widened = self._dialectic_cadence + self._dialectic_empty_streak
+        ceiling = self._dialectic_cadence * self._BACKOFF_MAX
+        return min(widened, ceiling)
+
+    def liveness_snapshot(self) -> dict:
+        """In-process snapshot of dialectic liveness state for diagnostics.
+
+        Returns current turn, last successful dialectic turn, pending-result
+        fire turn, empty streak, effective cadence, and thread status.
+        """
+        thread_age = None
+        if self._prefetch_thread and self._prefetch_thread.is_alive():
+            thread_age = time.monotonic() - self._prefetch_thread_started_at
+        return {
+            "turn_count": self._turn_count,
+            "last_dialectic_turn": self._last_dialectic_turn,
+            "pending_result_fired_at": self._prefetch_result_fired_at,
+            "empty_streak": self._dialectic_empty_streak,
+            "effective_cadence": self._effective_cadence(),
+            "thread_alive": thread_age is not None,
+            "thread_age_seconds": thread_age,
+        }
+
+    def _apply_reasoning_heuristic(self, base: str, query: str) -> str:
+        """Scale `base` up by query length, clamped at reasoning_level_cap.
+
+        Char-count heuristic: +1 at >=120 chars, +2 at >=400.
+        """
+        if not self._reasoning_heuristic or not query:
+            return base
+        if base not in self._LEVEL_ORDER:
+            return base
+        n = len(query)
+        if n < self._HEURISTIC_LENGTH_MEDIUM:
+            bump = 0
+        elif n < self._HEURISTIC_LENGTH_HIGH:
+            bump = 1
+        else:
+            bump = 2
+        base_idx = self._LEVEL_ORDER.index(base)
+        cap_idx = self._LEVEL_ORDER.index(self._reasoning_level_cap)
+        return self._LEVEL_ORDER[min(base_idx + bump, cap_idx)]
+
+    def _resolve_pass_level(self, pass_idx: int, query: str = "") -> str:
         """Resolve reasoning level for a given pass index.
 
-        Uses dialecticDepthLevels if configured, otherwise proportional
-        defaults relative to dialecticReasoningLevel.
+        Precedence:
+          1. dialecticDepthLevels (explicit per-pass) — wins absolutely
+          2. _PROPORTIONAL_LEVELS table (depth>1 lighter-early passes)
+          3. Base level = dialecticReasoningLevel, optionally scaled by the
+             reasoning heuristic when the mapping falls through to 'base'
         """
         if self._dialectic_depth_levels and pass_idx < len(self._dialectic_depth_levels):
             return self._dialectic_depth_levels[pass_idx]
@@ -704,7 +880,7 @@ class HonchoMemoryProvider(MemoryProvider):
         base = (self._config.dialectic_reasoning_level if self._config else "low")
         mapping = self._PROPORTIONAL_LEVELS.get((self._dialectic_depth, pass_idx))
         if mapping is None or mapping == "base":
-            return base
+            return self._apply_reasoning_heuristic(base, query)
         return mapping
 
     def _build_dialectic_prompt(self, pass_idx: int, prior_results: list[str], is_cold: bool) -> str:
@@ -791,7 +967,7 @@ class HonchoMemoryProvider(MemoryProvider):
                     break
                 prompt = self._build_dialectic_prompt(i, results, is_cold)
 
-            level = self._resolve_pass_level(i)
+            level = self._resolve_pass_level(i, query=query)
             logger.debug("Honcho dialectic depth %d: pass %d, level=%s, cold=%s",
                          self._dialectic_depth, i, level, is_cold)
 
@@ -808,6 +984,29 @@ class HonchoMemoryProvider(MemoryProvider):
                 return r
         return ""
 
+    # Prompts that carry no semantic signal — trivial acknowledgements, slash
+    # commands, empty input. Skipping injection here saves tokens and prevents
+    # stale user-model context from derailing one-word replies.
+    _TRIVIAL_PROMPT_RE = re.compile(
+        r'^(yes|no|ok|okay|sure|thanks|thank you|y|n|yep|nope|yeah|nah|'
+        r'continue|go ahead|do it|proceed|got it|cool|nice|great|done|next|lgtm|k)$',
+        re.IGNORECASE,
+    )
+
+    @classmethod
+    def _is_trivial_prompt(cls, text: str) -> bool:
+        """Return True if the prompt is too trivial to warrant context injection."""
+        if not text:
+            return True
+        stripped = text.strip()
+        if not stripped:
+            return True
+        if stripped.startswith("/"):
+            return True
+        if cls._TRIVIAL_PROMPT_RE.match(stripped):
+            return True
+        return False
+
     def on_turn_start(self, turn_number: int, message: str, **kwargs) -> None:
         """Track turn count for cadence and injection_frequency logic."""
         self._turn_count = turn_number
diff --git a/plugins/memory/honcho/cli.py b/plugins/memory/honcho/cli.py
index 536d34002d..5c829a4c98 100644
--- a/plugins/memory/honcho/cli.py
+++ b/plugins/memory/honcho/cli.py
@@ -460,17 +460,37 @@ def cmd_setup(args) -> None:
             pass  # keep current
 
     # --- 7b. Dialectic cadence ---
-    current_dialectic = str(hermes_host.get("dialecticCadence") or cfg.get("dialecticCadence") or "3")
+    current_dialectic = str(hermes_host.get("dialecticCadence") or cfg.get("dialecticCadence") or "2")
     print("\n  Dialectic cadence:")
     print("    How often Honcho rebuilds its user model (LLM call on Honcho backend).")
-    print("    1 = every turn (aggressive), 3 = every 3 turns (recommended), 5+ = sparse.")
+    print("    1 = every turn, 2 = every other turn, 3+ = sparser.")
+    print("    Recommended: 1-5.")
     new_dialectic = _prompt("Dialectic cadence", default=current_dialectic)
     try:
         val = int(new_dialectic)
         if val >= 1:
             hermes_host["dialecticCadence"] = val
     except (ValueError, TypeError):
-        hermes_host["dialecticCadence"] = 3
+        hermes_host["dialecticCadence"] = 2
+
+    # --- 7c. Dialectic reasoning level ---
+    current_reasoning = (
+        hermes_host.get("dialecticReasoningLevel")
+        or cfg.get("dialecticReasoningLevel")
+        or "low"
+    )
+    print("\n  Dialectic reasoning level:")
+    print("    Depth Honcho uses when synthesizing user context on auto-injected calls.")
+    print("    minimal  -- quick factual lookups")
+    print("    low      -- straightforward questions (default)")
+    print("    medium   -- multi-aspect synthesis")
+    print("    high     -- complex behavioral patterns")
+    print("    max      -- thorough audit-level analysis")
+    new_reasoning = _prompt("Reasoning level", default=current_reasoning)
+    if new_reasoning in ("minimal", "low", "medium", "high", "max"):
+        hermes_host["dialecticReasoningLevel"] = new_reasoning
+    else:
+        hermes_host["dialecticReasoningLevel"] = "low"
 
     # --- 8. Session strategy ---
     current_strat = hermes_host.get("sessionStrategy") or cfg.get("sessionStrategy", "per-session")
@@ -636,8 +656,11 @@ def cmd_status(args) -> None:
     print(f"  Recall mode:    {hcfg.recall_mode}")
     print(f"  Context budget: {hcfg.context_tokens or '(uncapped)'} tokens")
     raw = getattr(hcfg, "raw", None) or {}
-    dialectic_cadence = raw.get("dialecticCadence") or 3
+    dialectic_cadence = raw.get("dialecticCadence") or 1
     print(f"  Dialectic cad:  every {dialectic_cadence} turn{'s' if dialectic_cadence != 1 else ''}")
+    reasoning_cap = raw.get("reasoningLevelCap") or hcfg.reasoning_level_cap
+    heuristic_on = "on" if hcfg.reasoning_heuristic else "off"
+    print(f"  Reasoning:      base={hcfg.dialectic_reasoning_level}, cap={reasoning_cap}, heuristic={heuristic_on}")
     print(f"  Observation:    user(me={hcfg.user_observe_me},others={hcfg.user_observe_others}) ai(me={hcfg.ai_observe_me},others={hcfg.ai_observe_others})")
     print(f"  Write freq:     {hcfg.write_frequency}")
 
diff --git a/plugins/memory/honcho/client.py b/plugins/memory/honcho/client.py
index 2474d3a2b6..fef2e2d58f 100644
--- a/plugins/memory/honcho/client.py
+++ b/plugins/memory/honcho/client.py
@@ -251,6 +251,11 @@ class HonchoClientConfig:
     # matching dialectic_depth length. When None, uses proportional defaults
     # derived from dialectic_reasoning_level.
     dialectic_depth_levels: list[str] | None = None
+    # When true, the auto-injected dialectic scales reasoning level up on
+    # longer queries. See HonchoMemoryProvider for thresholds.
+    reasoning_heuristic: bool = True
+    # Ceiling for the heuristic-selected reasoning level.
+    reasoning_level_cap: str = "high"
     # Honcho API limits — configurable for self-hosted instances
     # Max chars per message sent via add_messages() (Honcho cloud: 25000)
     message_max_chars: int = 25000
@@ -446,6 +451,16 @@ class HonchoClientConfig:
                 raw.get("dialecticDepthLevels"),
                 depth=_parse_dialectic_depth(host_block.get("dialecticDepth"), raw.get("dialecticDepth")),
             ),
+            reasoning_heuristic=_resolve_bool(
+                host_block.get("reasoningHeuristic"),
+                raw.get("reasoningHeuristic"),
+                default=True,
+            ),
+            reasoning_level_cap=(
+                host_block.get("reasoningLevelCap")
+                or raw.get("reasoningLevelCap")
+                or "high"
+            ),
             message_max_chars=int(
                 host_block.get("messageMaxChars")
                 or raw.get("messageMaxChars")
diff --git a/plugins/memory/honcho/session.py b/plugins/memory/honcho/session.py
index fd91ee3b3b..79625b5cd5 100644
--- a/plugins/memory/honcho/session.py
+++ b/plugins/memory/honcho/session.py
@@ -78,6 +78,7 @@ class HonchoSessionManager:
         honcho: Honcho | None = None,
         context_tokens: int | None = None,
         config: Any | None = None,
+        runtime_user_peer_name: str | None = None,
     ):
         """
         Initialize the session manager.
@@ -87,10 +88,12 @@ class HonchoSessionManager:
             context_tokens: Max tokens for context() calls (None = Honcho default).
             config: HonchoClientConfig from global config (provides peer_name, ai_peer,
                     write_frequency, observation, etc.).
+            runtime_user_peer_name: Gateway user identity for per-user memory scoping.
         """
         self._honcho = honcho
         self._context_tokens = context_tokens
         self._config = config
+        self._runtime_user_peer_name = runtime_user_peer_name
         self._cache: dict[str, HonchoSession] = {}
         self._peers_cache: dict[str, Any] = {}
         self._sessions_cache: dict[str, Any] = {}
@@ -100,9 +103,11 @@ class HonchoSessionManager:
         self._write_frequency = write_frequency
         self._turn_counter: int = 0
 
-        # Prefetch caches: session_key → last result (consumed once per turn)
+        # Prefetch cache: session_key → last context result (consumed once per turn).
+        # Dialectic results are cached on the plugin side (HonchoMemoryProvider
+        # ._prefetch_result) so session-start prewarm and turn-driven fires share
+        # one source of truth; see __init__.py _do_session_init for the prewarm.
         self._context_cache: dict[str, dict] = {}
-        self._dialectic_cache: dict[str, str] = {}
         self._prefetch_cache_lock = threading.Lock()
         self._dialectic_reasoning_level: str = (
             config.dialectic_reasoning_level if config else "low"
@@ -272,8 +277,10 @@ class HonchoSessionManager:
             logger.debug("Local session cache hit: %s", key)
             return self._cache[key]
 
-        # Use peer names from global config when available
-        if self._config and self._config.peer_name:
+        # Gateway sessions should use the runtime user identity when available.
+        if self._runtime_user_peer_name:
+            user_peer_id = self._sanitize_id(self._runtime_user_peer_name)
+        elif self._config and self._config.peer_name:
             user_peer_id = self._sanitize_id(self._config.peer_name)
         else:
             # Fallback: derive from session key
@@ -499,8 +506,8 @@ class HonchoSessionManager:
         Query Honcho's dialectic endpoint about a peer.
 
         Runs an LLM on Honcho's backend against the target peer's full
-        representation. Higher latency than context() — call async via
-        prefetch_dialectic() to avoid blocking the response.
+        representation. Higher latency than context() — callers run this in
+        a background thread (see HonchoMemoryProvider) to avoid blocking.
 
         Args:
             session_key: The session key to query against.
@@ -555,42 +562,6 @@ class HonchoSessionManager:
             logger.warning("Honcho dialectic query failed: %s", e)
             return ""
 
-    def prefetch_dialectic(self, session_key: str, query: str) -> None:
-        """
-        Fire a dialectic_query in a background thread, caching the result.
-
-        Non-blocking. The result is available via pop_dialectic_result()
-        on the next call (typically the following turn). Reasoning level
-        is selected dynamically based on query complexity.
-
-        Args:
-            session_key: The session key to query against.
-            query: The user's current message, used as the query.
-        """
-        def _run():
-            result = self.dialectic_query(session_key, query)
-            if result:
-                self.set_dialectic_result(session_key, result)
-
-        t = threading.Thread(target=_run, name="honcho-dialectic-prefetch", daemon=True)
-        t.start()
-
-    def set_dialectic_result(self, session_key: str, result: str) -> None:
-        """Store a prefetched dialectic result in a thread-safe way."""
-        if not result:
-            return
-        with self._prefetch_cache_lock:
-            self._dialectic_cache[session_key] = result
-
-    def pop_dialectic_result(self, session_key: str) -> str:
-        """
-        Return and clear the cached dialectic result for this session.
-
-        Returns empty string if no result is ready yet.
-        """
-        with self._prefetch_cache_lock:
-            return self._dialectic_cache.pop(session_key, "")
-
     def prefetch_context(self, session_key: str, user_message: str | None = None) -> None:
         """
         Fire get_prefetch_context in a background thread, caching the result.
diff --git a/pyproject.toml b/pyproject.toml
index bd83673651..992e548f9c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -126,7 +126,7 @@ py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajector
 hermes_cli = ["web_dist/**/*"]
 
 [tool.setuptools.packages.find]
-include = ["agent", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "tui_gateway", "tui_gateway.*", "cron", "acp_adapter", "plugins", "plugins.*"]
+include = ["agent", "agent.*", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "tui_gateway", "tui_gateway.*", "cron", "acp_adapter", "plugins", "plugins.*"]
 
 [tool.pytest.ini_options]
 testpaths = ["tests"]
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 96f48e77f5..0000000000
--- a/requirements.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-# NOTE: This file is maintained for convenience only.
-# The canonical dependency list is in pyproject.toml.
-# Preferred install: pip install -e ".[all]"
-
-# Core dependencies
-openai
-python-dotenv
-fire
-httpx
-rich
-tenacity
-prompt_toolkit
-pyyaml
-requests
-jinja2
-pydantic>=2.0
-PyJWT[crypto]
-debugpy
-
-# Web tools
-firecrawl-py
-parallel-web>=0.4.2
-
-# Image generation
-fal-client
-
-# Text-to-speech (Edge TTS is free, no API key needed)
-edge-tts
-
-# Optional: For cron expression parsing (cronjob scheduling)
-croniter
-
-# Optional: For messaging platform integrations (gateway)
-python-telegram-bot[webhooks]>=22.6
-discord.py>=2.0
-aiohttp>=3.9.0
diff --git a/run_agent.py b/run_agent.py
index c87bd35152..ec5e86d786 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -48,6 +48,10 @@ from hermes_constants import get_hermes_home
 # Load .env from ~/.hermes/.env first, then project root as dev fallback.
 # User-managed env files should override stale shell exports on restart.
 from hermes_cli.env_loader import load_hermes_dotenv
+from hermes_cli.timeouts import (
+    get_provider_request_timeout,
+    get_provider_stale_timeout,
+)
 
 _hermes_home = get_hermes_home()
 _project_env = Path(__file__).parent / '.env'
@@ -96,6 +100,20 @@ from agent.subdirectory_hints import SubdirectoryHintTracker
 from agent.prompt_caching import apply_anthropic_cache_control
 from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, build_environment_hints, load_soul_md, TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS, DEVELOPER_ROLE_MODELS, GOOGLE_MODEL_OPERATIONAL_GUIDANCE, OPENAI_MODEL_EXECUTION_GUIDANCE
 from agent.usage_pricing import estimate_usage_cost, normalize_usage
+from agent.codex_responses_adapter import (
+    _chat_content_to_responses_parts,
+    _chat_messages_to_responses_input as _codex_chat_messages_to_responses_input,
+    _derive_responses_function_call_id as _codex_derive_responses_function_call_id,
+    _deterministic_call_id as _codex_deterministic_call_id,
+    _extract_responses_message_text as _codex_extract_responses_message_text,
+    _extract_responses_reasoning_text as _codex_extract_responses_reasoning_text,
+    _normalize_codex_response as _codex_normalize_codex_response,
+    _preflight_codex_api_kwargs as _codex_preflight_codex_api_kwargs,
+    _preflight_codex_input_items as _codex_preflight_codex_input_items,
+    _responses_tools as _codex_responses_tools,
+    _split_responses_tool_id as _codex_split_responses_tool_id,
+    _summarize_user_message_for_log,
+)
 from agent.display import (
     KawaiiSpinner, build_tool_preview as _build_tool_preview,
     get_cute_tool_message as _get_cute_tool_message_impl,
@@ -106,7 +124,7 @@ from agent.trajectory import (
     convert_scratchpad_to_think, has_incomplete_scratchpad,
     save_trajectory as _save_trajectory_to_file,
 )
-from utils import atomic_json_write, env_var_enabled
+from utils import atomic_json_write, base_url_host_matches, base_url_hostname, env_var_enabled, normalize_proxy_url
 
 
 
@@ -159,6 +177,20 @@ class _SafeWriter:
         return getattr(self._inner, name)
 
 
+def _get_proxy_from_env() -> Optional[str]:
+    """Read proxy URL from environment variables.
+
+    Checks HTTPS_PROXY, HTTP_PROXY, ALL_PROXY (and lowercase variants) in order.
+    Returns the first valid proxy URL found, or None if no proxy is configured.
+    """
+    for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
+                "https_proxy", "http_proxy", "all_proxy"):
+        value = os.environ.get(key, "").strip()
+        if value:
+            return normalize_proxy_url(value)
+    return None
+
+
 def _install_safe_stdio() -> None:
     """Wrap stdout/stderr so best-effort console output cannot crash the agent."""
     for stream_name in ("stdout", "stderr"):
@@ -353,6 +385,11 @@ def _sanitize_surrogates(text: str) -> str:
     return text
 
 
+# _chat_content_to_responses_parts and _summarize_user_message_for_log are
+# imported from agent.codex_responses_adapter (see import block above).
+# They remain importable from run_agent for backward compatibility.
+
+
 def _sanitize_structure_surrogates(payload: Any) -> bool:
     """Replace surrogate code points in nested dict/list payloads in-place.
 
@@ -454,6 +491,71 @@ def _sanitize_messages_surrogates(messages: list) -> bool:
     return found
 
 
+def _repair_tool_call_arguments(raw_args: str, tool_name: str = "?") -> str:
+    """Attempt to repair malformed tool_call argument JSON.
+
+    Models like GLM-5.1 via Ollama can produce truncated JSON, trailing
+    commas, Python ``None``, etc.  The API proxy rejects these with HTTP 400
+    "invalid tool call arguments".  This function applies common repairs;
+    if all fail it returns ``"{}"`` so the request succeeds (better than
+    crashing the session).  All repairs are logged at WARNING level.
+    """
+    raw_stripped = raw_args.strip() if isinstance(raw_args, str) else ""
+
+    # Fast-path: empty / whitespace-only -> empty object
+    if not raw_stripped:
+        logger.warning("Sanitized empty tool_call arguments for %s", tool_name)
+        return "{}"
+
+    # Python-literal None -> normalise to {}
+    if raw_stripped == "None":
+        logger.warning("Sanitized Python-None tool_call arguments for %s", tool_name)
+        return "{}"
+
+    # Attempt common JSON repairs
+    fixed = raw_stripped
+    # 1. Strip trailing commas before } or ]
+    fixed = re.sub(r',\s*([}\]])', r'\1', fixed)
+    # 2. Close unclosed structures
+    open_curly = fixed.count('{') - fixed.count('}')
+    open_bracket = fixed.count('[') - fixed.count(']')
+    if open_curly > 0:
+        fixed += '}' * open_curly
+    if open_bracket > 0:
+        fixed += ']' * open_bracket
+    # 3. Remove excess closing braces/brackets (bounded to 50 iterations)
+    for _ in range(50):
+        try:
+            json.loads(fixed)
+            break
+        except json.JSONDecodeError:
+            if fixed.endswith('}') and fixed.count('}') > fixed.count('{'):
+                fixed = fixed[:-1]
+            elif fixed.endswith(']') and fixed.count(']') > fixed.count('['):
+                fixed = fixed[:-1]
+            else:
+                break
+
+    try:
+        json.loads(fixed)
+        logger.warning(
+            "Repaired malformed tool_call arguments for %s: %s → %s",
+            tool_name, raw_stripped[:80], fixed[:80],
+        )
+        return fixed
+    except json.JSONDecodeError:
+        pass
+
+    # Last resort: replace with empty object so the API request doesn't
+    # crash the entire session.
+    logger.warning(
+        "Unrepairable tool_call arguments for %s — "
+        "replaced with empty object (was: %s)",
+        tool_name, raw_stripped[:80],
+    )
+    return "{}"
+
+
 def _strip_non_ascii(text: str) -> str:
     """Remove non-ASCII characters, replacing with closest ASCII equivalent or removing.
 
@@ -601,6 +703,7 @@ class AIAgent:
     def base_url(self, value: str) -> None:
         self._base_url = value
         self._base_url_lower = value.lower() if value else ""
+        self._base_url_hostname = base_url_hostname(value)
 
     def __init__(
         self,
@@ -648,6 +751,11 @@ class AIAgent:
         prefill_messages: List[Dict[str, Any]] = None,
         platform: str = None,
         user_id: str = None,
+        user_name: str = None,
+        chat_id: str = None,
+        chat_name: str = None,
+        chat_type: str = None,
+        thread_id: str = None,
         gateway_session_key: str = None,
         skip_context_files: bool = False,
         skip_memory: bool = False,
@@ -717,6 +825,11 @@ class AIAgent:
         self.ephemeral_system_prompt = ephemeral_system_prompt
         self.platform = platform  # "cli", "telegram", "discord", "whatsapp", etc.
         self._user_id = user_id  # Platform user identifier (gateway sessions)
+        self._user_name = user_name
+        self._chat_id = chat_id
+        self._chat_name = chat_name
+        self._chat_type = chat_type
+        self._thread_id = thread_id
         self._gateway_session_key = gateway_session_key  # Stable per-chat key (e.g. agent:main:telegram:dm:123)
         # Pluggable print function — CLI replaces this with _cprint so that
         # raw ANSI status lines are routed through prompt_toolkit's renderer
@@ -742,13 +855,16 @@ class AIAgent:
             self.api_mode = "codex_responses"
         elif self.provider == "xai":
             self.api_mode = "codex_responses"
-        elif (provider_name is None) and "chatgpt.com/backend-api/codex" in self._base_url_lower:
+        elif (provider_name is None) and (
+            self._base_url_hostname == "chatgpt.com"
+            and "/backend-api/codex" in self._base_url_lower
+        ):
             self.api_mode = "codex_responses"
             self.provider = "openai-codex"
-        elif (provider_name is None) and "api.x.ai" in self._base_url_lower:
+        elif (provider_name is None) and self._base_url_hostname == "api.x.ai":
             self.api_mode = "codex_responses"
             self.provider = "xai"
-        elif self.provider == "anthropic" or (provider_name is None and "api.anthropic.com" in self._base_url_lower):
+        elif self.provider == "anthropic" or (provider_name is None and self._base_url_hostname == "api.anthropic.com"):
             self.api_mode = "anthropic_messages"
             self.provider = "anthropic"
         elif self._base_url_lower.rstrip("/").endswith("/anthropic"):
@@ -756,8 +872,12 @@ class AIAgent:
             # use a URL convention ending in /anthropic. Auto-detect these so the
             # Anthropic Messages API adapter is used instead of chat completions.
             self.api_mode = "anthropic_messages"
-        elif self.provider == "bedrock" or "bedrock-runtime" in self._base_url_lower:
-            # AWS Bedrock — auto-detect from provider name or base URL.
+        elif self.provider == "bedrock" or (
+            self._base_url_hostname.startswith("bedrock-runtime.")
+            and base_url_host_matches(self._base_url_lower, "amazonaws.com")
+        ):
+            # AWS Bedrock — auto-detect from provider name or base URL
+            # (bedrock-runtime.<region>.amazonaws.com).
             self.api_mode = "bedrock_converse"
         else:
             self.api_mode = "chat_completions"
@@ -877,13 +997,15 @@ class AIAgent:
         self.prefill_messages = prefill_messages or []  # Prefilled conversation turns
         self._force_ascii_payload = False
         
-        # Anthropic prompt caching: auto-enabled for Claude models via OpenRouter.
-        # Reduces input costs by ~75% on multi-turn conversations by caching the
-        # conversation prefix. Uses system_and_3 strategy (4 breakpoints).
-        is_openrouter = self._is_openrouter_url()
-        is_claude = "claude" in self.model.lower()
-        is_native_anthropic = self.api_mode == "anthropic_messages" and self.provider == "anthropic"
-        self._use_prompt_caching = (is_openrouter and is_claude) or is_native_anthropic
+        # Anthropic prompt caching: auto-enabled for Claude models on native
+        # Anthropic, OpenRouter, and third-party gateways that speak the
+        # Anthropic protocol (``api_mode == 'anthropic_messages'``). Reduces
+        # input costs by ~75% on multi-turn conversations. Uses system_and_3
+        # strategy (4 breakpoints). See ``_anthropic_prompt_cache_policy``
+        # for the layout-vs-transport decision.
+        self._use_prompt_caching, self._use_native_cache_layout = (
+            self._anthropic_prompt_cache_policy()
+        )
         self._cache_ttl = "5m"  # Default 5-minute TTL (1.25x write cost)
         
         # Iteration budget: the LLM is only notified when it actually exhausts
@@ -963,6 +1085,12 @@ class AIAgent:
         self._anthropic_client = None
         self._is_anthropic_oauth = False
 
+        # Resolve per-provider / per-model request timeout once up front so
+        # every client construction path below (Anthropic native, OpenAI-wire,
+        # router-based implicit auth) can apply it consistently.  Bedrock
+        # Claude uses its own timeout path and is not covered here.
+        _provider_timeout = get_provider_request_timeout(self.provider, self.model)
+
         if self.api_mode == "anthropic_messages":
             from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token
             # Bedrock + Claude → use AnthropicBedrock SDK for full feature parity
@@ -970,8 +1098,7 @@ class AIAgent:
             _is_bedrock_anthropic = self.provider == "bedrock"
             if _is_bedrock_anthropic:
                 from agent.anthropic_adapter import build_anthropic_bedrock_client
-                import re as _re
-                _region_match = _re.search(r"bedrock-runtime\.([a-z0-9-]+)\.", base_url or "")
+                _region_match = re.search(r"bedrock-runtime\.([a-z0-9-]+)\.", base_url or "")
                 _br_region = _region_match.group(1) if _region_match else "us-east-1"
                 self._bedrock_region = _br_region
                 self._anthropic_client = build_anthropic_bedrock_client(_br_region)
@@ -992,9 +1119,16 @@ class AIAgent:
                 self.api_key = effective_key
                 self._anthropic_api_key = effective_key
                 self._anthropic_base_url = base_url
+                # Only mark the session as OAuth-authenticated when the token
+                # genuinely belongs to native Anthropic.  Third-party providers
+                # (MiniMax, Kimi, GLM, LiteLLM proxies) that accept the
+                # Anthropic protocol must never trip OAuth code paths — doing
+                # so injects Claude-Code identity headers and system prompts
+                # that cause 401/403 on their endpoints.  Guards #1739 and
+                # the third-party identity-injection bug.
                 from agent.anthropic_adapter import _is_oauth_token as _is_oat
-                self._is_anthropic_oauth = _is_oat(effective_key)
-                self._anthropic_client = build_anthropic_client(effective_key, base_url)
+                self._is_anthropic_oauth = _is_oat(effective_key) if _is_native_anthropic else False
+                self._anthropic_client = build_anthropic_client(effective_key, base_url, timeout=_provider_timeout)
                 # No OpenAI client needed for Anthropic mode
                 self.client = None
                 self._client_kwargs = {}
@@ -1005,8 +1139,7 @@ class AIAgent:
         elif self.api_mode == "bedrock_converse":
             # AWS Bedrock — uses boto3 directly, no OpenAI client needed.
             # Region is extracted from the base_url or defaults to us-east-1.
-            import re as _re
-            _region_match = _re.search(r"bedrock-runtime\.([a-z0-9-]+)\.", base_url or "")
+            _region_match = re.search(r"bedrock-runtime\.([a-z0-9-]+)\.", base_url or "")
             self._bedrock_region = _region_match.group(1) if _region_match else "us-east-1"
             # Guardrail config — read from config.yaml at init time.
             self._bedrock_guardrail_config = None
@@ -1034,26 +1167,31 @@ class AIAgent:
                 # Explicit credentials from CLI/gateway — construct directly.
                 # The runtime provider resolver already handled auth for us.
                 client_kwargs = {"api_key": api_key, "base_url": base_url}
+                if _provider_timeout is not None:
+                    client_kwargs["timeout"] = _provider_timeout
                 if self.provider == "copilot-acp":
                     client_kwargs["command"] = self.acp_command
                     client_kwargs["args"] = self.acp_args
                 effective_base = base_url
-                if "openrouter" in effective_base.lower():
+                if base_url_host_matches(effective_base, "openrouter.ai"):
                     client_kwargs["default_headers"] = {
                         "HTTP-Referer": "https://hermes-agent.nousresearch.com",
                         "X-OpenRouter-Title": "Hermes Agent",
                         "X-OpenRouter-Categories": "productivity,cli-agent",
                     }
-                elif "api.githubcopilot.com" in effective_base.lower():
+                elif base_url_host_matches(effective_base, "api.githubcopilot.com"):
                     from hermes_cli.models import copilot_default_headers
 
                     client_kwargs["default_headers"] = copilot_default_headers()
-                elif "api.kimi.com" in effective_base.lower():
+                elif base_url_host_matches(effective_base, "api.kimi.com"):
                     client_kwargs["default_headers"] = {
-                        "User-Agent": "KimiCLI/1.30.0",
+                        "User-Agent": "claude-code/0.1.0",
                     }
-                elif "portal.qwen.ai" in effective_base.lower():
+                elif base_url_host_matches(effective_base, "portal.qwen.ai"):
                     client_kwargs["default_headers"] = _qwen_portal_headers()
+                elif base_url_host_matches(effective_base, "chatgpt.com"):
+                    from agent.auxiliary_client import _codex_cloudflare_headers
+                    client_kwargs["default_headers"] = _codex_cloudflare_headers(api_key)
             else:
                 # No explicit creds — use the centralized provider router
                 from agent.auxiliary_client import resolve_provider_client
@@ -1064,6 +1202,8 @@ class AIAgent:
                         "api_key": _routed_client.api_key,
                         "base_url": str(_routed_client.base_url),
                     }
+                    if _provider_timeout is not None:
+                        client_kwargs["timeout"] = _provider_timeout
                     # Preserve any default_headers the router set
                     if hasattr(_routed_client, '_default_headers') and _routed_client._default_headers:
                         client_kwargs["default_headers"] = dict(_routed_client._default_headers)
@@ -1105,7 +1245,7 @@ class AIAgent:
             # stream tool call arguments token-by-token, keeping the
             # connection alive.
             _effective_base = str(client_kwargs.get("base_url", "")).lower()
-            if "openrouter" in _effective_base and "claude" in (self.model or "").lower():
+            if base_url_host_matches(_effective_base, "openrouter.ai") and "claude" in (self.model or "").lower():
                 headers = client_kwargs.get("default_headers") or {}
                 existing_beta = headers.get("x-anthropic-beta", "")
                 _FINE_GRAINED = "fine-grained-tool-streaming-2025-05-14"
@@ -1199,7 +1339,12 @@ class AIAgent:
         
         # Show prompt caching status
         if self._use_prompt_caching and not self.quiet_mode:
-            source = "native Anthropic" if is_native_anthropic else "Claude via OpenRouter"
+            if self._use_native_cache_layout and self.provider == "anthropic":
+                source = "native Anthropic"
+            elif self._use_native_cache_layout:
+                source = "Anthropic-compatible endpoint"
+            else:
+                source = "Claude via OpenRouter"
             print(f"💾 Prompt caching: ENABLED ({source}, {self._cache_ttl} TTL)")
         
         # Session logging setup - auto-save conversation trajectories for debugging
@@ -1271,6 +1416,10 @@ class AIAgent:
             _agent_cfg = _load_agent_config()
         except Exception:
             _agent_cfg = {}
+        # Cache only the derived auxiliary compression context override that is
+        # needed later by the startup feasibility check.  Avoid exposing a
+        # broad pseudo-public config object on the agent instance.
+        self._aux_compression_context_length_config = None
 
         # Persistent memory (MEMORY.md + USER.md) -- loaded from disk
         self._memory_store = None
@@ -1306,31 +1455,6 @@ class AIAgent:
             try:
                 _mem_provider_name = mem_config.get("provider", "") if mem_config else ""
 
-                # Auto-migrate: if Honcho was actively configured (enabled +
-                # credentials) but memory.provider is not set, activate the
-                # honcho plugin automatically.  Just having the config file
-                # is not enough — the user may have disabled Honcho or the
-                # file may be from a different tool.
-                if not _mem_provider_name:
-                    try:
-                        from plugins.memory.honcho.client import HonchoClientConfig as _HCC
-                        _hcfg = _HCC.from_global_config()
-                        if _hcfg.enabled and (_hcfg.api_key or _hcfg.base_url):
-                            _mem_provider_name = "honcho"
-                            # Persist so this only auto-migrates once
-                            try:
-                                from hermes_cli.config import load_config as _lc, save_config as _sc
-                                _cfg = _lc()
-                                _cfg.setdefault("memory", {})["provider"] = "honcho"
-                                _sc(_cfg)
-                            except Exception:
-                                pass
-                            if not self.quiet_mode:
-                                print("  ✓ Auto-migrated Honcho to memory provider plugin.")
-                                print("    Your config and data are preserved.\n")
-                    except Exception:
-                        pass
-
                 if _mem_provider_name:
                     from agent.memory_manager import MemoryManager as _MemoryManager
                     from plugins.memory import load_memory_provider as _load_mem
@@ -1339,11 +1463,10 @@ class AIAgent:
                     if _mp and _mp.is_available():
                         self._memory_manager.add_provider(_mp)
                     if self._memory_manager.providers:
-                        from hermes_constants import get_hermes_home as _ghh
                         _init_kwargs = {
                             "session_id": self.session_id,
                             "platform": platform or "cli",
-                            "hermes_home": str(_ghh()),
+                            "hermes_home": str(get_hermes_home()),
                             "agent_context": "primary",
                         }
                         # Thread session title for memory provider scoping
@@ -1358,6 +1481,16 @@ class AIAgent:
                         # Thread gateway user identity for per-user memory scoping
                         if self._user_id:
                             _init_kwargs["user_id"] = self._user_id
+                        if self._user_name:
+                            _init_kwargs["user_name"] = self._user_name
+                        if self._chat_id:
+                            _init_kwargs["chat_id"] = self._chat_id
+                        if self._chat_name:
+                            _init_kwargs["chat_name"] = self._chat_name
+                        if self._chat_type:
+                            _init_kwargs["chat_type"] = self._chat_type
+                        if self._thread_id:
+                            _init_kwargs["thread_id"] = self._thread_id
                         # Thread gateway session key for stable per-chat Honcho session isolation
                         if self._gateway_session_key:
                             _init_kwargs["gateway_session_key"] = self._gateway_session_key
@@ -1426,6 +1559,24 @@ class AIAgent:
         compression_target_ratio = float(_compression_cfg.get("target_ratio", 0.20))
         compression_protect_last = int(_compression_cfg.get("protect_last_n", 20))
 
+        # Read optional explicit context_length override for the auxiliary
+        # compression model. Custom endpoints often cannot report this via
+        # /models, so the startup feasibility check needs the config hint.
+        try:
+            _aux_cfg = _agent_cfg.get("auxiliary", {}).get("compression", {})
+        except Exception:
+            _aux_cfg = {}
+        if isinstance(_aux_cfg, dict):
+            _aux_context_config = _aux_cfg.get("context_length")
+        else:
+            _aux_context_config = None
+        if _aux_context_config is not None:
+            try:
+                _aux_context_config = int(_aux_context_config)
+            except (TypeError, ValueError):
+                _aux_context_config = None
+        self._aux_compression_context_length_config = _aux_context_config
+
         # Read explicit context_length override from model config
         _model_cfg = _agent_cfg.get("model", {})
         if isinstance(_model_cfg, dict):
@@ -1442,7 +1593,6 @@ class AIAgent:
                     "Falling back to auto-detection.",
                     _config_context_length,
                 )
-                import sys
                 print(
                     f"\n⚠ Invalid model.context_length in config.yaml: {_config_context_length!r}\n"
                     f"  Must be a plain integer (e.g. 256000, not '256K').\n"
@@ -1484,7 +1634,6 @@ class AIAgent:
                                         "Falling back to auto-detection.",
                                         self.model, _cp_ctx,
                                     )
-                                    import sys
                                     print(
                                         f"\n⚠ Invalid context_length for model {self.model!r} in custom_providers: {_cp_ctx!r}\n"
                                         f"  Must be a plain integer (e.g. 256000, not '256K').\n"
@@ -1640,7 +1789,7 @@ class AIAgent:
                 logger.debug("Invalid ollama_num_ctx config value: %r", _ollama_num_ctx_override)
         if self._ollama_num_ctx is None and self.base_url and is_local_endpoint(self.base_url):
             try:
-                _detected = query_ollama_num_ctx(self.model, self.base_url)
+                _detected = query_ollama_num_ctx(self.model, self.base_url, api_key=self.api_key or "")
                 if _detected and _detected > 0:
                     self._ollama_num_ctx = _detected
             except Exception as exc:
@@ -1676,6 +1825,7 @@ class AIAgent:
             "api_key": getattr(self, "api_key", ""),
             "client_kwargs": dict(self._client_kwargs),
             "use_prompt_caching": self._use_prompt_caching,
+            "use_native_cache_layout": self._use_native_cache_layout,
             # Context engine state that _try_activate_fallback() overwrites.
             # Use getattr for model/base_url/api_key/provider since plugin
             # engines may not have these (they're ContextCompressor-specific).
@@ -1746,8 +1896,6 @@ class AIAgent:
         change persists across turns (unlike fallback which is
         turn-scoped).
         """
-        import logging
-        import re as _re
         from hermes_cli.providers import determine_api_mode
 
         # ── Determine api_mode if not provided ──
@@ -1765,7 +1913,7 @@ class AIAgent:
             and isinstance(base_url, str)
             and base_url
         ):
-            base_url = _re.sub(r"/v1/?$", "", base_url)
+            base_url = re.sub(r"/v1/?$", "", base_url)
 
         old_model = self.model
         old_provider = self.provider
@@ -1795,8 +1943,9 @@ class AIAgent:
             self._anthropic_base_url = base_url or getattr(self, "_anthropic_base_url", None)
             self._anthropic_client = build_anthropic_client(
                 effective_key, self._anthropic_base_url,
+                timeout=get_provider_request_timeout(self.provider, self.model),
             )
-            self._is_anthropic_oauth = _is_oauth_token(effective_key)
+            self._is_anthropic_oauth = _is_oauth_token(effective_key) if _is_native_anthropic else False
             self.client = None
             self._client_kwargs = {}
         else:
@@ -1806,6 +1955,9 @@ class AIAgent:
                 "api_key": effective_key,
                 "base_url": effective_base,
             }
+            _sm_timeout = get_provider_request_timeout(self.provider, self.model)
+            if _sm_timeout is not None:
+                self._client_kwargs["timeout"] = _sm_timeout
             self.client = self._create_openai_client(
                 dict(self._client_kwargs),
                 reason="switch_model",
@@ -1813,10 +1965,13 @@ class AIAgent:
             )
 
         # ── Re-evaluate prompt caching ──
-        is_native_anthropic = api_mode == "anthropic_messages" and new_provider == "anthropic"
-        self._use_prompt_caching = (
-            ("openrouter" in (self.base_url or "").lower() and "claude" in new_model.lower())
-            or is_native_anthropic
+        self._use_prompt_caching, self._use_native_cache_layout = (
+            self._anthropic_prompt_cache_policy(
+                provider=new_provider,
+                base_url=self.base_url,
+                api_mode=api_mode,
+                model=new_model,
+            )
         )
 
         # ── Update context compressor ──
@@ -1851,6 +2006,7 @@ class AIAgent:
             "api_key": getattr(self, "api_key", ""),
             "client_kwargs": dict(self._client_kwargs),
             "use_prompt_caching": self._use_prompt_caching,
+            "use_native_cache_layout": self._use_native_cache_layout,
             "compressor_model": getattr(_cc, "model", self.model) if _cc else self.model,
             "compressor_base_url": getattr(_cc, "base_url", self.base_url) if _cc else self.base_url,
             "compressor_api_key": getattr(_cc, "api_key", "") if _cc else "",
@@ -1869,6 +2025,22 @@ class AIAgent:
         self._fallback_activated = False
         self._fallback_index = 0
 
+        # When the user deliberately swaps primary providers (e.g. openrouter
+        # → anthropic), drop any fallback entries that target the OLD primary
+        # or the NEW one.  The chain was seeded from config at agent init for
+        # the original provider — without pruning, a failed turn on the new
+        # primary silently re-activates the provider the user just rejected,
+        # which is exactly what was reported during TUI v2 blitz testing
+        # ("switched to anthropic, tui keeps trying openrouter").
+        old_norm = (old_provider or "").strip().lower()
+        new_norm = (new_provider or "").strip().lower()
+        if old_norm and new_norm and old_norm != new_norm:
+            self._fallback_chain = [
+                entry for entry in self._fallback_chain
+                if (entry.get("provider") or "").strip().lower() not in {old_norm, new_norm}
+            ]
+            self._fallback_model = self._fallback_chain[0] if self._fallback_chain else None
+
         logging.info(
             "Model switched in-place: %s (%s) -> %s (%s)",
             old_model, old_provider, new_model, new_provider,
@@ -1941,13 +2113,16 @@ class AIAgent:
     def _should_emit_quiet_tool_messages(self) -> bool:
         """Return True when quiet-mode tool summaries should print directly.
 
-        When the caller provides ``tool_progress_callback`` (for example the CLI
-        TUI or a gateway progress renderer), that callback owns progress display.
-        Emitting quiet-mode summary lines here duplicates progress and leaks tool
-        previews into flows that are expected to stay silent, such as
-        ``hermes chat -q``.
+        Quiet mode is used by both the interactive CLI and embedded/library
+        callers. The CLI may still want compact progress hints when no callback
+        owns rendering. Embedded/library callers, on the other hand, expect
+        quiet mode to be truly silent.
         """
-        return self.quiet_mode and not self.tool_progress_callback
+        return (
+            self.quiet_mode
+            and not self.tool_progress_callback
+            and getattr(self, "platform", "") == "cli"
+        )
 
     def _emit_status(self, message: str) -> None:
         """Emit a lifecycle status message to both CLI and gateway channels.
@@ -1997,7 +2172,10 @@ class AIAgent:
             return
         try:
             from agent.auxiliary_client import get_text_auxiliary_client
-            from agent.model_metadata import get_model_context_length
+            from agent.model_metadata import (
+                MINIMUM_CONTEXT_LENGTH,
+                get_model_context_length,
+            )
 
             client, aux_model = get_text_auxiliary_client(
                 "compression",
@@ -2020,45 +2198,61 @@ class AIAgent:
             aux_base_url = str(getattr(client, "base_url", ""))
             aux_api_key = str(getattr(client, "api_key", ""))
 
-            # Read user-configured context_length for the compression model.
-            # Custom endpoints often don't support /models API queries so
-            # get_model_context_length() falls through to the 128K default,
-            # ignoring the explicit config value.  Pass it as the highest-
-            # priority hint so the configured value is always respected.
-            _aux_cfg = (self.config or {}).get("auxiliary", {}).get("compression", {})
-            _aux_context_config = _aux_cfg.get("context_length") if isinstance(_aux_cfg, dict) else None
-            if _aux_context_config is not None:
-                try:
-                    _aux_context_config = int(_aux_context_config)
-                except (TypeError, ValueError):
-                    _aux_context_config = None
-
             aux_context = get_model_context_length(
                 aux_model,
                 base_url=aux_base_url,
                 api_key=aux_api_key,
-                config_context_length=_aux_context_config,
+                config_context_length=getattr(self, "_aux_compression_context_length_config", None),
             )
 
+            # Hard floor: the auxiliary compression model must have at least
+            # MINIMUM_CONTEXT_LENGTH (64K) tokens of context.  The main model
+            # is already required to meet this floor (checked earlier in
+            # __init__), so the compression model must too — otherwise it
+            # cannot summarise a full threshold-sized window of main-model
+            # content.  Mirrors the main-model rejection pattern.
+            if aux_context and aux_context < MINIMUM_CONTEXT_LENGTH:
+                raise ValueError(
+                    f"Auxiliary compression model {aux_model} has a context "
+                    f"window of {aux_context:,} tokens, which is below the "
+                    f"minimum {MINIMUM_CONTEXT_LENGTH:,} required by Hermes "
+                    f"Agent.  Choose a compression model with at least "
+                    f"{MINIMUM_CONTEXT_LENGTH // 1000}K context (set "
+                    f"auxiliary.compression.model in config.yaml), or set "
+                    f"auxiliary.compression.context_length to override the "
+                    f"detected value if it is wrong."
+                )
+
             threshold = self.context_compressor.threshold_tokens
             if aux_context < threshold:
-                # Suggest a threshold that would fit the aux model,
-                # rounded down to a clean percentage.
-                safe_pct = int((aux_context / self.context_compressor.context_length) * 100)
+                # Auto-correct: lower the live session threshold so
+                # compression actually works this session.  The hard floor
+                # above guarantees aux_context >= MINIMUM_CONTEXT_LENGTH,
+                # so the new threshold is always >= 64K.
+                old_threshold = threshold
+                new_threshold = aux_context
+                self.context_compressor.threshold_tokens = new_threshold
+                # Keep threshold_percent in sync so future main-model
+                # context_length changes (update_model) re-derive from a
+                # sensible number rather than the original too-high value.
+                main_ctx = self.context_compressor.context_length
+                if main_ctx:
+                    self.context_compressor.threshold_percent = (
+                        new_threshold / main_ctx
+                    )
+                safe_pct = int((aux_context / main_ctx) * 100) if main_ctx else 50
                 msg = (
-                    f"⚠ Compression model ({aux_model}) context "
-                    f"is {aux_context:,} tokens, but the main model's "
-                    f"compression threshold is {threshold:,} tokens. "
-                    f"Context compression will not be possible — the "
-                    f"content to summarise will exceed the auxiliary "
-                    f"model's context window.\n"
-                    f"  Fix options (config.yaml):\n"
+                    f"⚠ Compression model ({aux_model}) context is "
+                    f"{aux_context:,} tokens, but the main model's "
+                    f"compression threshold was {old_threshold:,} tokens. "
+                    f"Auto-lowered this session's threshold to "
+                    f"{new_threshold:,} tokens so compression can run.\n"
+                    f"  To make this permanent, edit config.yaml — either:\n"
                     f"  1. Use a larger compression model:\n"
                     f"       auxiliary:\n"
                     f"         compression:\n"
-                    f"           model: <model-with-{threshold:,}+-context>\n"
-                    f"  2. Lower the compression threshold to fit "
-                    f"the current model:\n"
+                    f"           model: <model-with-{old_threshold:,}+-context>\n"
+                    f"  2. Lower the compression threshold:\n"
                     f"       compression:\n"
                     f"         threshold: 0.{safe_pct:02d}"
                 )
@@ -2067,12 +2261,17 @@ class AIAgent:
                 logger.warning(
                     "Auxiliary compression model %s has %d token context, "
                     "below the main model's compression threshold of %d "
-                    "tokens — compression summaries will fail or be "
-                    "severely truncated.",
+                    "tokens — auto-lowered session threshold to %d to "
+                    "keep compression working.",
                     aux_model,
                     aux_context,
-                    threshold,
+                    old_threshold,
+                    new_threshold,
                 )
+        except ValueError:
+            # Hard rejections (aux below minimum context) must propagate
+            # so the session refuses to start.
+            raise
         except Exception as exc:
             logger.debug(
                 "Compression feasibility check failed (non-fatal): %s", exc
@@ -2097,12 +2296,149 @@ class AIAgent:
 
     def _is_direct_openai_url(self, base_url: str = None) -> bool:
         """Return True when a base URL targets OpenAI's native API."""
-        url = (base_url or self._base_url_lower).lower()
-        return "api.openai.com" in url and "openrouter" not in url
+        if base_url is not None:
+            hostname = base_url_hostname(base_url)
+        else:
+            hostname = getattr(self, "_base_url_hostname", "") or base_url_hostname(
+                getattr(self, "_base_url_lower", "")
+            )
+        return hostname == "api.openai.com"
+
+    def _resolved_api_call_timeout(self) -> float:
+        """Resolve the effective per-call request timeout in seconds.
+
+        Priority:
+          1. ``providers.<id>.models.<model>.timeout_seconds`` (per-model override)
+          2. ``providers.<id>.request_timeout_seconds`` (provider-wide)
+          3. ``HERMES_API_TIMEOUT`` env var (legacy escape hatch)
+          4. 1800.0s default
+
+        Used by OpenAI-wire chat completions (streaming and non-streaming) so
+        the per-provider config knob wins over the 1800s default.  Without this
+        helper, the hardcoded ``HERMES_API_TIMEOUT`` fallback would always be
+        passed as a per-call ``timeout=`` kwarg, overriding the client-level
+        timeout the AIAgent.__init__ path configured.
+        """
+        cfg = get_provider_request_timeout(self.provider, self.model)
+        if cfg is not None:
+            return cfg
+        return float(os.getenv("HERMES_API_TIMEOUT", 1800.0))
+
+    def _resolved_api_call_stale_timeout_base(self) -> tuple[float, bool]:
+        """Resolve the base non-stream stale timeout and whether it is implicit.
+
+        Priority:
+          1. ``providers.<id>.models.<model>.stale_timeout_seconds``
+          2. ``providers.<id>.stale_timeout_seconds``
+          3. ``HERMES_API_CALL_STALE_TIMEOUT`` env var
+          4. 300.0s default
+
+        Returns ``(timeout_seconds, uses_implicit_default)`` so the caller can
+        preserve legacy behaviors that only apply when the user has *not*
+        explicitly configured a stale timeout, such as auto-disabling the
+        detector for local endpoints.
+        """
+        cfg = get_provider_stale_timeout(self.provider, self.model)
+        if cfg is not None:
+            return cfg, False
+
+        env_timeout = os.getenv("HERMES_API_CALL_STALE_TIMEOUT")
+        if env_timeout is not None:
+            return float(env_timeout), False
+
+        return 300.0, True
+
+    def _compute_non_stream_stale_timeout(self, messages: list[dict[str, Any]]) -> float:
+        """Compute the effective non-stream stale timeout for this request."""
+        stale_base, uses_implicit_default = self._resolved_api_call_stale_timeout_base()
+        base_url = getattr(self, "_base_url", None) or self.base_url or ""
+        if uses_implicit_default and base_url and is_local_endpoint(base_url):
+            return float("inf")
+
+        est_tokens = sum(len(str(v)) for v in messages) // 4
+        if est_tokens > 100_000:
+            return max(stale_base, 600.0)
+        if est_tokens > 50_000:
+            return max(stale_base, 450.0)
+        return stale_base
 
     def _is_openrouter_url(self) -> bool:
         """Return True when the base URL targets OpenRouter."""
-        return "openrouter" in self._base_url_lower
+        return base_url_host_matches(self._base_url_lower, "openrouter.ai")
+
+    def _anthropic_prompt_cache_policy(
+        self,
+        *,
+        provider: Optional[str] = None,
+        base_url: Optional[str] = None,
+        api_mode: Optional[str] = None,
+        model: Optional[str] = None,
+    ) -> tuple[bool, bool]:
+        """Decide whether to apply Anthropic prompt caching and which layout to use.
+
+        Returns ``(should_cache, use_native_layout)``:
+          * ``should_cache`` — inject ``cache_control`` breakpoints for this
+            request (applies to OpenRouter Claude, native Anthropic, and
+            third-party gateways that speak the native Anthropic protocol).
+          * ``use_native_layout`` — place markers on the *inner* content
+            blocks (native Anthropic accepts and requires this layout);
+            when False markers go on the message envelope (OpenRouter and
+            OpenAI-wire proxies expect the looser layout).
+
+        Third-party providers using the native Anthropic transport
+        (``api_mode == 'anthropic_messages'`` + Claude-named model) get
+        caching with the native layout so they benefit from the same
+        cost reduction as direct Anthropic callers, provided their
+        gateway implements the Anthropic cache_control contract
+        (MiniMax, Zhipu GLM, LiteLLM's Anthropic proxy mode all do).
+
+        Qwen / Alibaba-family models on OpenCode, OpenCode Go, and direct
+        Alibaba (DashScope) also honour Anthropic-style ``cache_control``
+        markers on OpenAI-wire chat completions. Upstream pi-mono #3392 /
+        pi #3393 documented this for opencode-go Qwen. Without markers
+        these providers serve zero cache hits, re-billing the full prompt
+        on every turn.
+        """
+        eff_provider = (provider if provider is not None else self.provider) or ""
+        eff_base_url = base_url if base_url is not None else (self.base_url or "")
+        eff_api_mode = api_mode if api_mode is not None else (self.api_mode or "")
+        eff_model = (model if model is not None else self.model) or ""
+
+        base_lower = eff_base_url.lower()
+        model_lower = eff_model.lower()
+        provider_lower = eff_provider.lower()
+        is_claude = "claude" in model_lower
+        is_openrouter = base_url_host_matches(eff_base_url, "openrouter.ai")
+        is_anthropic_wire = eff_api_mode == "anthropic_messages"
+        is_native_anthropic = (
+            is_anthropic_wire
+            and (eff_provider == "anthropic" or base_url_hostname(eff_base_url) == "api.anthropic.com")
+        )
+
+        if is_native_anthropic:
+            return True, True
+        if is_openrouter and is_claude:
+            return True, False
+        if is_anthropic_wire and is_claude:
+            # Third-party Anthropic-compatible gateway.
+            return True, True
+
+        # Qwen/Alibaba on OpenCode (Zen/Go) and native DashScope: OpenAI-wire
+        # transport that accepts Anthropic-style cache_control markers and
+        # rewards them with real cache hits.  Without this branch
+        # qwen3.6-plus on opencode-go reports 0% cached tokens and burns
+        # through the subscription on every turn.
+        model_is_qwen = "qwen" in model_lower
+        provider_is_alibaba_family = provider_lower in {
+            "opencode", "opencode-zen", "opencode-go", "alibaba",
+        }
+        if provider_is_alibaba_family and model_is_qwen:
+            # Envelope layout (native_anthropic=False): markers on inner
+            # content parts, not top-level tool messages.  Matches
+            # pi-mono's "alibaba" cacheControlFormat.
+            return True, False
+
+        return False, False
 
     @staticmethod
     def _model_requires_responses_api(model: str) -> bool:
@@ -2501,10 +2837,10 @@ class AIAgent:
             prompt = self._SKILL_REVIEW_PROMPT
 
         def _run_review():
-            import contextlib, os as _os
+            import contextlib
             review_agent = None
             try:
-                with open(_os.devnull, "w") as _devnull, \
+                with open(os.devnull, "w") as _devnull, \
                      contextlib.redirect_stdout(_devnull), \
                      contextlib.redirect_stderr(_devnull):
                     review_agent = AIAgent(
@@ -2634,7 +2970,7 @@ class AIAgent:
                 role = msg.get("role", "unknown")
                 content = msg.get("content")
                 tool_calls_data = None
-                if hasattr(msg, "tool_calls") and msg.tool_calls:
+                if hasattr(msg, "tool_calls") and isinstance(msg.tool_calls, list) and msg.tool_calls:
                     tool_calls_data = [
                         {"name": tc.function.name, "arguments": tc.function.arguments}
                         for tc in msg.tool_calls
@@ -2650,6 +2986,7 @@ class AIAgent:
                     tool_call_id=msg.get("tool_call_id"),
                     finish_reason=msg.get("finish_reason"),
                     reasoning=msg.get("reasoning") if role == "assistant" else None,
+                    reasoning_content=msg.get("reasoning_content") if role == "assistant" else None,
                     reasoning_details=msg.get("reasoning_details") if role == "assistant" else None,
                     codex_reasoning_items=msg.get("codex_reasoning_items") if role == "assistant" else None,
                 )
@@ -2900,15 +3237,14 @@ class AIAgent:
         <title> tag instead of dumping raw HTML.  Falls back to a truncated
         str(error) for everything else.
         """
-        import re as _re
         raw = str(error)
 
         # Cloudflare / proxy HTML pages: grab the <title> for a clean summary
         if "<!DOCTYPE" in raw or "<html" in raw:
-            m = _re.search(r"<title[^>]*>([^<]+)</title>", raw, _re.IGNORECASE)
+            m = re.search(r"<title[^>]*>([^<]+)</title>", raw, re.IGNORECASE)
             title = m.group(1).strip() if m else "HTML error page (title not found)"
             # Also grab Cloudflare Ray ID if present
-            ray = _re.search(r"Cloudflare Ray ID:\s*<strong[^>]*>([^<]+)</strong>", raw)
+            ray = re.search(r"Cloudflare Ray ID:\s*<strong[^>]*>([^<]+)</strong>", raw)
             ray_id = ray.group(1).strip() if ray else None
             status_code = getattr(error, "status_code", None)
             parts = []
@@ -3401,7 +3737,7 @@ class AIAgent:
                 existing = getattr(self, "_pending_steer", None)
                 self._pending_steer = (existing + "\n" + steer_text) if existing else steer_text
             return
-        marker = f"\n\n[USER STEER (injected mid-run, not tool output): {steer_text}]"
+        marker = f"\n\nUser guidance: {steer_text}"
         existing_content = messages[target_idx].get("content", "")
         if not isinstance(existing_content, str):
             # Anthropic multimodal content blocks — preserve them and append
@@ -3577,14 +3913,12 @@ class AIAgent:
 
         # 2. Clean terminal sandbox environments
         try:
-            from tools.terminal_tool import cleanup_vm
             cleanup_vm(task_id)
         except Exception:
             pass
 
         # 3. Clean browser daemon sessions
         try:
-            from tools.browser_tool import cleanup_browser
             cleanup_browser(task_id)
         except Exception:
             pass
@@ -3995,27 +4329,6 @@ class AIAgent:
         if self._memory_store:
             self._memory_store.load_from_disk()
 
-    def _responses_tools(self, tools: Optional[List[Dict[str, Any]]] = None) -> Optional[List[Dict[str, Any]]]:
-        """Convert chat-completions tool schemas to Responses function-tool schemas."""
-        source_tools = tools if tools is not None else self.tools
-        if not source_tools:
-            return None
-
-        converted: List[Dict[str, Any]] = []
-        for item in source_tools:
-            fn = item.get("function", {}) if isinstance(item, dict) else {}
-            name = fn.get("name")
-            if not isinstance(name, str) or not name.strip():
-                continue
-            converted.append({
-                "type": "function",
-                "name": name,
-                "description": fn.get("description", ""),
-                "strict": False,
-                "parameters": fn.get("parameters", {"type": "object", "properties": {}}),
-            })
-        return converted or None
-
     @staticmethod
     def _deterministic_call_id(fn_name: str, arguments: str, index: int = 0) -> str:
         """Generate a deterministic call_id from tool call content.
@@ -4024,27 +4337,12 @@ class AIAgent:
         Deterministic IDs prevent cache invalidation — random UUIDs would
         make every API call's prefix unique, breaking OpenAI's prompt cache.
         """
-        import hashlib
-        seed = f"{fn_name}:{arguments}:{index}"
-        digest = hashlib.sha256(seed.encode("utf-8", errors="replace")).hexdigest()[:12]
-        return f"call_{digest}"
+        return _codex_deterministic_call_id(fn_name, arguments, index)
 
     @staticmethod
     def _split_responses_tool_id(raw_id: Any) -> tuple[Optional[str], Optional[str]]:
         """Split a stored tool id into (call_id, response_item_id)."""
-        if not isinstance(raw_id, str):
-            return None, None
-        value = raw_id.strip()
-        if not value:
-            return None, None
-        if "|" in value:
-            call_id, response_item_id = value.split("|", 1)
-            call_id = call_id.strip() or None
-            response_item_id = response_item_id.strip() or None
-            return call_id, response_item_id
-        if value.startswith("fc_"):
-            return None, value
-        return value, None
+        return _codex_split_responses_tool_id(raw_id)
 
     def _derive_responses_function_call_id(
         self,
@@ -4052,569 +4350,7 @@ class AIAgent:
         response_item_id: Optional[str] = None,
     ) -> str:
         """Build a valid Responses `function_call.id` (must start with `fc_`)."""
-        if isinstance(response_item_id, str):
-            candidate = response_item_id.strip()
-            if candidate.startswith("fc_"):
-                return candidate
-
-        source = (call_id or "").strip()
-        if source.startswith("fc_"):
-            return source
-        if source.startswith("call_") and len(source) > len("call_"):
-            return f"fc_{source[len('call_'):]}"
-
-        sanitized = re.sub(r"[^A-Za-z0-9_-]", "", source)
-        if sanitized.startswith("fc_"):
-            return sanitized
-        if sanitized.startswith("call_") and len(sanitized) > len("call_"):
-            return f"fc_{sanitized[len('call_'):]}"
-        if sanitized:
-            return f"fc_{sanitized[:48]}"
-
-        seed = source or str(response_item_id or "") or uuid.uuid4().hex
-        digest = hashlib.sha1(seed.encode("utf-8")).hexdigest()[:24]
-        return f"fc_{digest}"
-
-    def _chat_messages_to_responses_input(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
-        """Convert internal chat-style messages to Responses input items."""
-        items: List[Dict[str, Any]] = []
-        seen_item_ids: set = set()
-
-        for msg in messages:
-            if not isinstance(msg, dict):
-                continue
-            role = msg.get("role")
-            if role == "system":
-                continue
-
-            if role in {"user", "assistant"}:
-                content = msg.get("content", "")
-                content_text = str(content) if content is not None else ""
-
-                if role == "assistant":
-                    # Replay encrypted reasoning items from previous turns
-                    # so the API can maintain coherent reasoning chains.
-                    codex_reasoning = msg.get("codex_reasoning_items")
-                    has_codex_reasoning = False
-                    if isinstance(codex_reasoning, list):
-                        for ri in codex_reasoning:
-                            if isinstance(ri, dict) and ri.get("encrypted_content"):
-                                item_id = ri.get("id")
-                                if item_id and item_id in seen_item_ids:
-                                    continue
-                                # Strip the "id" field — with store=False the
-                                # Responses API cannot look up items by ID and
-                                # returns 404.  The encrypted_content blob is
-                                # self-contained for reasoning chain continuity.
-                                replay_item = {k: v for k, v in ri.items() if k != "id"}
-                                items.append(replay_item)
-                                if item_id:
-                                    seen_item_ids.add(item_id)
-                                has_codex_reasoning = True
-
-                    if content_text.strip():
-                        items.append({"role": "assistant", "content": content_text})
-                    elif has_codex_reasoning:
-                        # The Responses API requires a following item after each
-                        # reasoning item (otherwise: missing_following_item error).
-                        # When the assistant produced only reasoning with no visible
-                        # content, emit an empty assistant message as the required
-                        # following item.
-                        items.append({"role": "assistant", "content": ""})
-
-                    tool_calls = msg.get("tool_calls")
-                    if isinstance(tool_calls, list):
-                        for tc in tool_calls:
-                            if not isinstance(tc, dict):
-                                continue
-                            fn = tc.get("function", {})
-                            fn_name = fn.get("name")
-                            if not isinstance(fn_name, str) or not fn_name.strip():
-                                continue
-
-                            embedded_call_id, embedded_response_item_id = self._split_responses_tool_id(
-                                tc.get("id")
-                            )
-                            call_id = tc.get("call_id")
-                            if not isinstance(call_id, str) or not call_id.strip():
-                                call_id = embedded_call_id
-                            if not isinstance(call_id, str) or not call_id.strip():
-                                if (
-                                    isinstance(embedded_response_item_id, str)
-                                    and embedded_response_item_id.startswith("fc_")
-                                    and len(embedded_response_item_id) > len("fc_")
-                                ):
-                                    call_id = f"call_{embedded_response_item_id[len('fc_'):]}"
-                                else:
-                                    _raw_args = str(fn.get("arguments", "{}"))
-                                    call_id = self._deterministic_call_id(fn_name, _raw_args, len(items))
-                            call_id = call_id.strip()
-
-                            arguments = fn.get("arguments", "{}")
-                            if isinstance(arguments, dict):
-                                arguments = json.dumps(arguments, ensure_ascii=False)
-                            elif not isinstance(arguments, str):
-                                arguments = str(arguments)
-                            arguments = arguments.strip() or "{}"
-
-                            items.append({
-                                "type": "function_call",
-                                "call_id": call_id,
-                                "name": fn_name,
-                                "arguments": arguments,
-                            })
-                    continue
-
-                items.append({"role": role, "content": content_text})
-                continue
-
-            if role == "tool":
-                raw_tool_call_id = msg.get("tool_call_id")
-                call_id, _ = self._split_responses_tool_id(raw_tool_call_id)
-                if not isinstance(call_id, str) or not call_id.strip():
-                    if isinstance(raw_tool_call_id, str) and raw_tool_call_id.strip():
-                        call_id = raw_tool_call_id.strip()
-                if not isinstance(call_id, str) or not call_id.strip():
-                    continue
-                items.append({
-                    "type": "function_call_output",
-                    "call_id": call_id,
-                    "output": str(msg.get("content", "") or ""),
-                })
-
-        return items
-
-    def _preflight_codex_input_items(self, raw_items: Any) -> List[Dict[str, Any]]:
-        if not isinstance(raw_items, list):
-            raise ValueError("Codex Responses input must be a list of input items.")
-
-        normalized: List[Dict[str, Any]] = []
-        seen_ids: set = set()
-        for idx, item in enumerate(raw_items):
-            if not isinstance(item, dict):
-                raise ValueError(f"Codex Responses input[{idx}] must be an object.")
-
-            item_type = item.get("type")
-            if item_type == "function_call":
-                call_id = item.get("call_id")
-                name = item.get("name")
-                if not isinstance(call_id, str) or not call_id.strip():
-                    raise ValueError(f"Codex Responses input[{idx}] function_call is missing call_id.")
-                if not isinstance(name, str) or not name.strip():
-                    raise ValueError(f"Codex Responses input[{idx}] function_call is missing name.")
-
-                arguments = item.get("arguments", "{}")
-                if isinstance(arguments, dict):
-                    arguments = json.dumps(arguments, ensure_ascii=False)
-                elif not isinstance(arguments, str):
-                    arguments = str(arguments)
-                arguments = arguments.strip() or "{}"
-
-                normalized.append(
-                    {
-                        "type": "function_call",
-                        "call_id": call_id.strip(),
-                        "name": name.strip(),
-                        "arguments": arguments,
-                    }
-                )
-                continue
-
-            if item_type == "function_call_output":
-                call_id = item.get("call_id")
-                if not isinstance(call_id, str) or not call_id.strip():
-                    raise ValueError(f"Codex Responses input[{idx}] function_call_output is missing call_id.")
-                output = item.get("output", "")
-                if output is None:
-                    output = ""
-                if not isinstance(output, str):
-                    output = str(output)
-
-                normalized.append(
-                    {
-                        "type": "function_call_output",
-                        "call_id": call_id.strip(),
-                        "output": output,
-                    }
-                )
-                continue
-
-            if item_type == "reasoning":
-                encrypted = item.get("encrypted_content")
-                if isinstance(encrypted, str) and encrypted:
-                    item_id = item.get("id")
-                    if isinstance(item_id, str) and item_id:
-                        if item_id in seen_ids:
-                            continue
-                        seen_ids.add(item_id)
-                    reasoning_item = {"type": "reasoning", "encrypted_content": encrypted}
-                    # Do NOT include the "id" in the outgoing item — with
-                    # store=False (our default) the API tries to resolve the
-                    # id server-side and returns 404.  The id is still used
-                    # above for local deduplication via seen_ids.
-                    summary = item.get("summary")
-                    if isinstance(summary, list):
-                        reasoning_item["summary"] = summary
-                    else:
-                        reasoning_item["summary"] = []
-                    normalized.append(reasoning_item)
-                continue
-
-            role = item.get("role")
-            if role in {"user", "assistant"}:
-                content = item.get("content", "")
-                if content is None:
-                    content = ""
-                if not isinstance(content, str):
-                    content = str(content)
-
-                normalized.append({"role": role, "content": content})
-                continue
-
-            raise ValueError(
-                f"Codex Responses input[{idx}] has unsupported item shape (type={item_type!r}, role={role!r})."
-            )
-
-        return normalized
-
-    def _preflight_codex_api_kwargs(
-        self,
-        api_kwargs: Any,
-        *,
-        allow_stream: bool = False,
-    ) -> Dict[str, Any]:
-        if not isinstance(api_kwargs, dict):
-            raise ValueError("Codex Responses request must be a dict.")
-
-        required = {"model", "instructions", "input"}
-        missing = [key for key in required if key not in api_kwargs]
-        if missing:
-            raise ValueError(f"Codex Responses request missing required field(s): {', '.join(sorted(missing))}.")
-
-        model = api_kwargs.get("model")
-        if not isinstance(model, str) or not model.strip():
-            raise ValueError("Codex Responses request 'model' must be a non-empty string.")
-        model = model.strip()
-
-        instructions = api_kwargs.get("instructions")
-        if instructions is None:
-            instructions = ""
-        if not isinstance(instructions, str):
-            instructions = str(instructions)
-        instructions = instructions.strip() or DEFAULT_AGENT_IDENTITY
-
-        normalized_input = self._preflight_codex_input_items(api_kwargs.get("input"))
-
-        tools = api_kwargs.get("tools")
-        normalized_tools = None
-        if tools is not None:
-            if not isinstance(tools, list):
-                raise ValueError("Codex Responses request 'tools' must be a list when provided.")
-            normalized_tools = []
-            for idx, tool in enumerate(tools):
-                if not isinstance(tool, dict):
-                    raise ValueError(f"Codex Responses tools[{idx}] must be an object.")
-                if tool.get("type") != "function":
-                    raise ValueError(f"Codex Responses tools[{idx}] has unsupported type {tool.get('type')!r}.")
-
-                name = tool.get("name")
-                parameters = tool.get("parameters")
-                if not isinstance(name, str) or not name.strip():
-                    raise ValueError(f"Codex Responses tools[{idx}] is missing a valid name.")
-                if not isinstance(parameters, dict):
-                    raise ValueError(f"Codex Responses tools[{idx}] is missing valid parameters.")
-
-                description = tool.get("description", "")
-                if description is None:
-                    description = ""
-                if not isinstance(description, str):
-                    description = str(description)
-
-                strict = tool.get("strict", False)
-                if not isinstance(strict, bool):
-                    strict = bool(strict)
-
-                normalized_tools.append(
-                    {
-                        "type": "function",
-                        "name": name.strip(),
-                        "description": description,
-                        "strict": strict,
-                        "parameters": parameters,
-                    }
-                )
-
-        store = api_kwargs.get("store", False)
-        if store is not False:
-            raise ValueError("Codex Responses contract requires 'store' to be false.")
-
-        allowed_keys = {
-            "model", "instructions", "input", "tools", "store",
-            "reasoning", "include", "max_output_tokens", "temperature",
-            "tool_choice", "parallel_tool_calls", "prompt_cache_key", "service_tier",
-            "extra_headers",
-        }
-        normalized: Dict[str, Any] = {
-            "model": model,
-            "instructions": instructions,
-            "input": normalized_input,
-            "store": False,
-        }
-        if normalized_tools is not None:
-            normalized["tools"] = normalized_tools
-
-        # Pass through reasoning config
-        reasoning = api_kwargs.get("reasoning")
-        if isinstance(reasoning, dict):
-            normalized["reasoning"] = reasoning
-        include = api_kwargs.get("include")
-        if isinstance(include, list):
-            normalized["include"] = include
-        service_tier = api_kwargs.get("service_tier")
-        if isinstance(service_tier, str) and service_tier.strip():
-            normalized["service_tier"] = service_tier.strip()
-
-        # Pass through max_output_tokens and temperature
-        max_output_tokens = api_kwargs.get("max_output_tokens")
-        if isinstance(max_output_tokens, (int, float)) and max_output_tokens > 0:
-            normalized["max_output_tokens"] = int(max_output_tokens)
-        temperature = api_kwargs.get("temperature")
-        if isinstance(temperature, (int, float)):
-            normalized["temperature"] = float(temperature)
-
-        # Pass through tool_choice, parallel_tool_calls, prompt_cache_key
-        for passthrough_key in ("tool_choice", "parallel_tool_calls", "prompt_cache_key"):
-            val = api_kwargs.get(passthrough_key)
-            if val is not None:
-                normalized[passthrough_key] = val
-
-        extra_headers = api_kwargs.get("extra_headers")
-        if extra_headers is not None:
-            if not isinstance(extra_headers, dict):
-                raise ValueError("Codex Responses request 'extra_headers' must be an object.")
-            normalized_headers: Dict[str, str] = {}
-            for key, value in extra_headers.items():
-                if not isinstance(key, str) or not key.strip():
-                    raise ValueError("Codex Responses request 'extra_headers' keys must be non-empty strings.")
-                if value is None:
-                    continue
-                normalized_headers[key.strip()] = str(value)
-            if normalized_headers:
-                normalized["extra_headers"] = normalized_headers
-
-        if allow_stream:
-            stream = api_kwargs.get("stream")
-            if stream is not None and stream is not True:
-                raise ValueError("Codex Responses 'stream' must be true when set.")
-            if stream is True:
-                normalized["stream"] = True
-            allowed_keys.add("stream")
-        elif "stream" in api_kwargs:
-            raise ValueError("Codex Responses stream flag is only allowed in fallback streaming requests.")
-
-        unexpected = sorted(key for key in api_kwargs if key not in allowed_keys)
-        if unexpected:
-            raise ValueError(
-                f"Codex Responses request has unsupported field(s): {', '.join(unexpected)}."
-            )
-
-        return normalized
-
-    def _extract_responses_message_text(self, item: Any) -> str:
-        """Extract assistant text from a Responses message output item."""
-        content = getattr(item, "content", None)
-        if not isinstance(content, list):
-            return ""
-
-        chunks: List[str] = []
-        for part in content:
-            ptype = getattr(part, "type", None)
-            if ptype not in {"output_text", "text"}:
-                continue
-            text = getattr(part, "text", None)
-            if isinstance(text, str) and text:
-                chunks.append(text)
-        return "".join(chunks).strip()
-
-    def _extract_responses_reasoning_text(self, item: Any) -> str:
-        """Extract a compact reasoning text from a Responses reasoning item."""
-        summary = getattr(item, "summary", None)
-        if isinstance(summary, list):
-            chunks: List[str] = []
-            for part in summary:
-                text = getattr(part, "text", None)
-                if isinstance(text, str) and text:
-                    chunks.append(text)
-            if chunks:
-                return "\n".join(chunks).strip()
-        text = getattr(item, "text", None)
-        if isinstance(text, str) and text:
-            return text.strip()
-        return ""
-
-    def _normalize_codex_response(self, response: Any) -> tuple[Any, str]:
-        """Normalize a Responses API object to an assistant_message-like object."""
-        output = getattr(response, "output", None)
-        if not isinstance(output, list) or not output:
-            # The Codex backend can return empty output when the answer was
-            # delivered entirely via stream events. Check output_text as a
-            # last-resort fallback before raising.
-            out_text = getattr(response, "output_text", None)
-            if isinstance(out_text, str) and out_text.strip():
-                logger.debug(
-                    "Codex response has empty output but output_text is present (%d chars); "
-                    "synthesizing output item.", len(out_text.strip()),
-                )
-                output = [SimpleNamespace(
-                    type="message", role="assistant", status="completed",
-                    content=[SimpleNamespace(type="output_text", text=out_text.strip())],
-                )]
-                response.output = output
-            else:
-                raise RuntimeError("Responses API returned no output items")
-
-        response_status = getattr(response, "status", None)
-        if isinstance(response_status, str):
-            response_status = response_status.strip().lower()
-        else:
-            response_status = None
-
-        if response_status in {"failed", "cancelled"}:
-            error_obj = getattr(response, "error", None)
-            if isinstance(error_obj, dict):
-                error_msg = error_obj.get("message") or str(error_obj)
-            else:
-                error_msg = str(error_obj) if error_obj else f"Responses API returned status '{response_status}'"
-            raise RuntimeError(error_msg)
-
-        content_parts: List[str] = []
-        reasoning_parts: List[str] = []
-        reasoning_items_raw: List[Dict[str, Any]] = []
-        tool_calls: List[Any] = []
-        has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"}
-        saw_commentary_phase = False
-        saw_final_answer_phase = False
-
-        for item in output:
-            item_type = getattr(item, "type", None)
-            item_status = getattr(item, "status", None)
-            if isinstance(item_status, str):
-                item_status = item_status.strip().lower()
-            else:
-                item_status = None
-
-            if item_status in {"queued", "in_progress", "incomplete"}:
-                has_incomplete_items = True
-
-            if item_type == "message":
-                item_phase = getattr(item, "phase", None)
-                if isinstance(item_phase, str):
-                    normalized_phase = item_phase.strip().lower()
-                    if normalized_phase in {"commentary", "analysis"}:
-                        saw_commentary_phase = True
-                    elif normalized_phase in {"final_answer", "final"}:
-                        saw_final_answer_phase = True
-                message_text = self._extract_responses_message_text(item)
-                if message_text:
-                    content_parts.append(message_text)
-            elif item_type == "reasoning":
-                reasoning_text = self._extract_responses_reasoning_text(item)
-                if reasoning_text:
-                    reasoning_parts.append(reasoning_text)
-                # Capture the full reasoning item for multi-turn continuity.
-                # encrypted_content is an opaque blob the API needs back on
-                # subsequent turns to maintain coherent reasoning chains.
-                encrypted = getattr(item, "encrypted_content", None)
-                if isinstance(encrypted, str) and encrypted:
-                    raw_item = {"type": "reasoning", "encrypted_content": encrypted}
-                    item_id = getattr(item, "id", None)
-                    if isinstance(item_id, str) and item_id:
-                        raw_item["id"] = item_id
-                    # Capture summary — required by the API when replaying reasoning items
-                    summary = getattr(item, "summary", None)
-                    if isinstance(summary, list):
-                        raw_summary = []
-                        for part in summary:
-                            text = getattr(part, "text", None)
-                            if isinstance(text, str):
-                                raw_summary.append({"type": "summary_text", "text": text})
-                        raw_item["summary"] = raw_summary
-                    reasoning_items_raw.append(raw_item)
-            elif item_type == "function_call":
-                if item_status in {"queued", "in_progress", "incomplete"}:
-                    continue
-                fn_name = getattr(item, "name", "") or ""
-                arguments = getattr(item, "arguments", "{}")
-                if not isinstance(arguments, str):
-                    arguments = json.dumps(arguments, ensure_ascii=False)
-                raw_call_id = getattr(item, "call_id", None)
-                raw_item_id = getattr(item, "id", None)
-                embedded_call_id, _ = self._split_responses_tool_id(raw_item_id)
-                call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id
-                if not isinstance(call_id, str) or not call_id.strip():
-                    call_id = self._deterministic_call_id(fn_name, arguments, len(tool_calls))
-                call_id = call_id.strip()
-                response_item_id = raw_item_id if isinstance(raw_item_id, str) else None
-                response_item_id = self._derive_responses_function_call_id(call_id, response_item_id)
-                tool_calls.append(SimpleNamespace(
-                    id=call_id,
-                    call_id=call_id,
-                    response_item_id=response_item_id,
-                    type="function",
-                    function=SimpleNamespace(name=fn_name, arguments=arguments),
-                ))
-            elif item_type == "custom_tool_call":
-                fn_name = getattr(item, "name", "") or ""
-                arguments = getattr(item, "input", "{}")
-                if not isinstance(arguments, str):
-                    arguments = json.dumps(arguments, ensure_ascii=False)
-                raw_call_id = getattr(item, "call_id", None)
-                raw_item_id = getattr(item, "id", None)
-                embedded_call_id, _ = self._split_responses_tool_id(raw_item_id)
-                call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id
-                if not isinstance(call_id, str) or not call_id.strip():
-                    call_id = self._deterministic_call_id(fn_name, arguments, len(tool_calls))
-                call_id = call_id.strip()
-                response_item_id = raw_item_id if isinstance(raw_item_id, str) else None
-                response_item_id = self._derive_responses_function_call_id(call_id, response_item_id)
-                tool_calls.append(SimpleNamespace(
-                    id=call_id,
-                    call_id=call_id,
-                    response_item_id=response_item_id,
-                    type="function",
-                    function=SimpleNamespace(name=fn_name, arguments=arguments),
-                ))
-
-        final_text = "\n".join([p for p in content_parts if p]).strip()
-        if not final_text and hasattr(response, "output_text"):
-            out_text = getattr(response, "output_text", "")
-            if isinstance(out_text, str):
-                final_text = out_text.strip()
-
-        assistant_message = SimpleNamespace(
-            content=final_text,
-            tool_calls=tool_calls,
-            reasoning="\n\n".join(reasoning_parts).strip() if reasoning_parts else None,
-            reasoning_content=None,
-            reasoning_details=None,
-            codex_reasoning_items=reasoning_items_raw or None,
-        )
-
-        if tool_calls:
-            finish_reason = "tool_calls"
-        elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase):
-            finish_reason = "incomplete"
-        elif reasoning_items_raw and not final_text:
-            # Response contains only reasoning (encrypted thinking state) with
-            # no visible content or tool calls.  The model is still thinking and
-            # needs another turn to produce the actual answer.  Marking this as
-            # "stop" would send it into the empty-content retry loop which burns
-            # 3 retries then fails — treat it as incomplete instead so the Codex
-            # continuation path handles it correctly.
-            finish_reason = "incomplete"
-        else:
-            finish_reason = "stop"
-        return assistant_message, finish_reason
+        return _codex_derive_responses_function_call_id(call_id, response_item_id)
 
     def _thread_identity(self) -> str:
         thread = threading.current_thread()
@@ -4666,6 +4402,30 @@ class AIAgent:
             return bool(getattr(http_client, "is_closed", False))
         return False
 
+    @staticmethod
+    def _build_keepalive_http_client() -> Any:
+        try:
+            import httpx as _httpx
+            import socket as _socket
+
+            _sock_opts = [(_socket.SOL_SOCKET, _socket.SO_KEEPALIVE, 1)]
+            if hasattr(_socket, "TCP_KEEPIDLE"):
+                _sock_opts.append((_socket.IPPROTO_TCP, _socket.TCP_KEEPIDLE, 30))
+                _sock_opts.append((_socket.IPPROTO_TCP, _socket.TCP_KEEPINTVL, 10))
+                _sock_opts.append((_socket.IPPROTO_TCP, _socket.TCP_KEEPCNT, 3))
+            elif hasattr(_socket, "TCP_KEEPALIVE"):
+                _sock_opts.append((_socket.IPPROTO_TCP, _socket.TCP_KEEPALIVE, 30))
+            # When a custom transport is provided, httpx won't auto-read proxy
+            # from env vars (allow_env_proxies = trust_env and transport is None).
+            # Explicitly read proxy settings to ensure HTTP_PROXY/HTTPS_PROXY work.
+            _proxy = _get_proxy_from_env()
+            return _httpx.Client(
+                transport=_httpx.HTTPTransport(socket_options=_sock_opts),
+                proxy=_proxy,
+            )
+        except Exception:
+            return None
+
     def _create_openai_client(self, client_kwargs: dict, *, reason: str, shared: bool) -> Any:
         from agent.auxiliary_client import _validate_base_url, _validate_proxy_env_urls
         # Treat client_kwargs as read-only. Callers pass self._client_kwargs (or shallow
@@ -4706,6 +4466,27 @@ class AIAgent:
                 self._client_log_context(),
             )
             return client
+        if self.provider == "gemini":
+            from agent.gemini_native_adapter import GeminiNativeClient, is_native_gemini_base_url
+
+            base_url = str(client_kwargs.get("base_url", "") or "")
+            if is_native_gemini_base_url(base_url):
+                safe_kwargs = {
+                    k: v for k, v in client_kwargs.items()
+                    if k in {"api_key", "base_url", "default_headers", "timeout", "http_client"}
+                }
+                if "http_client" not in safe_kwargs:
+                    keepalive_http = self._build_keepalive_http_client()
+                    if keepalive_http is not None:
+                        safe_kwargs["http_client"] = keepalive_http
+                client = GeminiNativeClient(**safe_kwargs)
+                logger.info(
+                    "Gemini native client created (%s, shared=%s) %s",
+                    reason,
+                    shared,
+                    self._client_log_context(),
+                )
+                return client
         # Inject TCP keepalives so the kernel detects dead provider connections
         # instead of letting them sit silently in CLOSE-WAIT (#10324).  Without
         # this, a peer that drops mid-stream leaves the socket in a state where
@@ -4724,23 +4505,9 @@ class AIAgent:
         # Tests in ``tests/run_agent/test_create_openai_client_reuse.py`` and
         # ``tests/run_agent/test_sequential_chats_live.py`` pin this invariant.
         if "http_client" not in client_kwargs:
-            try:
-                import httpx as _httpx
-                import socket as _socket
-                _sock_opts = [(_socket.SOL_SOCKET, _socket.SO_KEEPALIVE, 1)]
-                if hasattr(_socket, "TCP_KEEPIDLE"):
-                    # Linux
-                    _sock_opts.append((_socket.IPPROTO_TCP, _socket.TCP_KEEPIDLE, 30))
-                    _sock_opts.append((_socket.IPPROTO_TCP, _socket.TCP_KEEPINTVL, 10))
-                    _sock_opts.append((_socket.IPPROTO_TCP, _socket.TCP_KEEPCNT, 3))
-                elif hasattr(_socket, "TCP_KEEPALIVE"):
-                    # macOS (uses TCP_KEEPALIVE instead of TCP_KEEPIDLE)
-                    _sock_opts.append((_socket.IPPROTO_TCP, _socket.TCP_KEEPALIVE, 30))
-                client_kwargs["http_client"] = _httpx.Client(
-                    transport=_httpx.HTTPTransport(socket_options=_sock_opts),
-                )
-            except Exception:
-                pass  # Fall through to default transport if socket opts fail
+            keepalive_http = self._build_keepalive_http_client()
+            if keepalive_http is not None:
+                client_kwargs["http_client"] = keepalive_http
         client = OpenAI(**client_kwargs)
         logger.info(
             "OpenAI client created (%s, shared=%s) %s",
@@ -5077,7 +4844,7 @@ class AIAgent:
         active_client = client or self._ensure_primary_openai_client(reason="codex_create_stream_fallback")
         fallback_kwargs = dict(api_kwargs)
         fallback_kwargs["stream"] = True
-        fallback_kwargs = self._preflight_codex_api_kwargs(fallback_kwargs, allow_stream=True)
+        fallback_kwargs = self._get_codex_transport().preflight_kwargs(fallback_kwargs, allow_stream=True)
         stream_or_response = active_client.responses.create(**fallback_kwargs)
 
         # Compatibility shim for mocks or providers that still return a concrete response.
@@ -5242,31 +5009,44 @@ class AIAgent:
             pass
 
         try:
-            self._anthropic_client = build_anthropic_client(new_token, getattr(self, "_anthropic_base_url", None))
+            self._anthropic_client = build_anthropic_client(
+                new_token,
+                getattr(self, "_anthropic_base_url", None),
+                timeout=get_provider_request_timeout(self.provider, self.model),
+            )
         except Exception as exc:
             logger.warning("Failed to rebuild Anthropic client after credential refresh: %s", exc)
             return False
 
         self._anthropic_api_key = new_token
-        # Update OAuth flag — token type may have changed (API key ↔ OAuth)
+        # Update OAuth flag — token type may have changed (API key ↔ OAuth).
+        # Only treat as OAuth on native Anthropic; third-party endpoints using
+        # the Anthropic protocol must not trip OAuth paths (#1739 & third-party
+        # identity-injection guard).
         from agent.anthropic_adapter import _is_oauth_token
-        self._is_anthropic_oauth = _is_oauth_token(new_token)
+        self._is_anthropic_oauth = _is_oauth_token(new_token) if self.provider == "anthropic" else False
         return True
 
     def _apply_client_headers_for_base_url(self, base_url: str) -> None:
-        from agent.auxiliary_client import _OR_HEADERS
+        from agent.auxiliary_client import _AI_GATEWAY_HEADERS, _OR_HEADERS
 
-        normalized = (base_url or "").lower()
-        if "openrouter" in normalized:
+        if base_url_host_matches(base_url, "openrouter.ai"):
             self._client_kwargs["default_headers"] = dict(_OR_HEADERS)
-        elif "api.githubcopilot.com" in normalized:
+        elif base_url_host_matches(base_url, "ai-gateway.vercel.sh"):
+            self._client_kwargs["default_headers"] = dict(_AI_GATEWAY_HEADERS)
+        elif base_url_host_matches(base_url, "api.githubcopilot.com"):
             from hermes_cli.models import copilot_default_headers
 
             self._client_kwargs["default_headers"] = copilot_default_headers()
-        elif "api.kimi.com" in normalized:
-            self._client_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
-        elif "portal.qwen.ai" in normalized:
+        elif base_url_host_matches(base_url, "api.kimi.com"):
+            self._client_kwargs["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
+        elif base_url_host_matches(base_url, "portal.qwen.ai"):
             self._client_kwargs["default_headers"] = _qwen_portal_headers()
+        elif base_url_host_matches(base_url, "chatgpt.com"):
+            from agent.auxiliary_client import _codex_cloudflare_headers
+            self._client_kwargs["default_headers"] = _codex_cloudflare_headers(
+                self._client_kwargs.get("api_key", "")
+            )
         else:
             self._client_kwargs.pop("default_headers", None)
 
@@ -5284,8 +5064,11 @@ class AIAgent:
 
             self._anthropic_api_key = runtime_key
             self._anthropic_base_url = runtime_base
-            self._anthropic_client = build_anthropic_client(runtime_key, runtime_base)
-            self._is_anthropic_oauth = _is_oauth_token(runtime_key)
+            self._anthropic_client = build_anthropic_client(
+                runtime_key, runtime_base,
+                timeout=get_provider_request_timeout(self.provider, self.model),
+            )
+            self._is_anthropic_oauth = _is_oauth_token(runtime_key) if self.provider == "anthropic" else False
             self.api_key = runtime_key
             self.base_url = runtime_base
             return
@@ -5441,18 +5224,9 @@ class AIAgent:
         # httpx timeout (default 1800s) with zero feedback.  The stale
         # detector kills the connection early so the main retry loop can
         # apply richer recovery (credential rotation, provider fallback).
-        _stale_base = float(os.getenv("HERMES_API_CALL_STALE_TIMEOUT", 300.0))
-        _base_url = getattr(self, "_base_url", None) or ""
-        if _stale_base == 300.0 and _base_url and is_local_endpoint(_base_url):
-            _stale_timeout = float("inf")
-        else:
-            _est_tokens = sum(len(str(v)) for v in api_kwargs.get("messages", [])) // 4
-            if _est_tokens > 100_000:
-                _stale_timeout = max(_stale_base, 600.0)
-            elif _est_tokens > 50_000:
-                _stale_timeout = max(_stale_base, 450.0)
-            else:
-                _stale_timeout = _stale_base
+        _stale_timeout = self._compute_non_stream_stale_timeout(
+            api_kwargs.get("messages", [])
+        )
 
         _call_start = time.time()
         self._touch_activity("waiting for non-streaming API response")
@@ -5496,6 +5270,7 @@ class AIAgent:
                         self._anthropic_client = build_anthropic_client(
                             self._anthropic_api_key,
                             getattr(self, "_anthropic_base_url", None),
+                            timeout=get_provider_request_timeout(self.provider, self.model),
                         )
                     else:
                         rc = request_client_holder.get("client")
@@ -5527,6 +5302,7 @@ class AIAgent:
                         self._anthropic_client = build_anthropic_client(
                             self._anthropic_api_key,
                             getattr(self, "_anthropic_base_url", None),
+                            timeout=get_provider_request_timeout(self.provider, self.model),
                         )
                     else:
                         request_client = request_client_holder.get("client")
@@ -5743,18 +5519,30 @@ class AIAgent:
         def _call_chat_completions():
             """Stream a chat completions response."""
             import httpx as _httpx
-            _base_timeout = float(os.getenv("HERMES_API_TIMEOUT", 1800.0))
-            _stream_read_timeout = float(os.getenv("HERMES_STREAM_READ_TIMEOUT", 120.0))
-            # Local providers (Ollama, llama.cpp, vLLM) can take minutes for
-            # prefill on large contexts before producing the first token.
-            # Auto-increase the httpx read timeout unless the user explicitly
-            # overrode HERMES_STREAM_READ_TIMEOUT.
-            if _stream_read_timeout == 120.0 and self.base_url and is_local_endpoint(self.base_url):
-                _stream_read_timeout = _base_timeout
-                logger.debug(
-                    "Local provider detected (%s) — stream read timeout raised to %.0fs",
-                    self.base_url, _stream_read_timeout,
-                )
+            # Per-provider / per-model request_timeout_seconds (from config.yaml)
+            # wins over the HERMES_API_TIMEOUT env default if the user set it.
+            _provider_timeout_cfg = get_provider_request_timeout(self.provider, self.model)
+            _base_timeout = (
+                _provider_timeout_cfg
+                if _provider_timeout_cfg is not None
+                else float(os.getenv("HERMES_API_TIMEOUT", 1800.0))
+            )
+            # Read timeout: config wins here too.  Otherwise use
+            # HERMES_STREAM_READ_TIMEOUT (default 120s) for cloud providers.
+            if _provider_timeout_cfg is not None:
+                _stream_read_timeout = _provider_timeout_cfg
+            else:
+                _stream_read_timeout = float(os.getenv("HERMES_STREAM_READ_TIMEOUT", 120.0))
+                # Local providers (Ollama, llama.cpp, vLLM) can take minutes for
+                # prefill on large contexts before producing the first token.
+                # Auto-increase the httpx read timeout unless the user explicitly
+                # overrode HERMES_STREAM_READ_TIMEOUT.
+                if _stream_read_timeout == 120.0 and self.base_url and is_local_endpoint(self.base_url):
+                    _stream_read_timeout = _base_timeout
+                    logger.debug(
+                        "Local provider detected (%s) — stream read timeout raised to %.0fs",
+                        self.base_url, _stream_read_timeout,
+                    )
             stream_kwargs = {
                 **api_kwargs,
                 "stream": True,
@@ -6255,6 +6043,7 @@ class AIAgent:
                         self._anthropic_client = build_anthropic_client(
                             self._anthropic_api_key,
                             getattr(self, "_anthropic_base_url", None),
+                            timeout=get_provider_request_timeout(self.provider, self.model),
                         )
                     else:
                         request_client = request_client_holder.get("client")
@@ -6365,8 +6154,9 @@ class AIAgent:
             fb_base_url_hint = (fb.get("base_url") or "").strip() or None
             fb_api_key_hint = (fb.get("api_key") or "").strip() or None
             # For Ollama Cloud endpoints, pull OLLAMA_API_KEY from env
-            # when no explicit key is in the fallback config.
-            if fb_base_url_hint and "ollama.com" in fb_base_url_hint.lower() and not fb_api_key_hint:
+            # when no explicit key is in the fallback config. Host match
+            # (not substring) — see GHSA-76xc-57q6-vm5m.
+            if fb_base_url_hint and base_url_host_matches(fb_base_url_hint, "ollama.com") and not fb_api_key_hint:
                 fb_api_key_hint = os.getenv("OLLAMA_API_KEY") or None
             fb_client, _resolved_fb_model = resolve_provider_client(
                 fb_provider, model=fb_model, raw_codex=True,
@@ -6401,7 +6191,10 @@ class AIAgent:
                 # provider-specific exceptions like Copilot gpt-5-mini on
                 # chat completions.
                 fb_api_mode = "codex_responses"
-            elif fb_provider == "bedrock" or "bedrock-runtime" in fb_base_url.lower():
+            elif fb_provider == "bedrock" or (
+                base_url_hostname(fb_base_url).startswith("bedrock-runtime.")
+                and base_url_host_matches(fb_base_url, "amazonaws.com")
+            ):
                 fb_api_mode = "bedrock_converse"
 
             old_model = self.model
@@ -6411,6 +6204,11 @@ class AIAgent:
             self.api_mode = fb_api_mode
             self._fallback_activated = True
 
+            # Honor per-provider / per-model request_timeout_seconds for the
+            # fallback target (same knob the primary client uses).  None = use
+            # SDK default.
+            _fb_timeout = get_provider_request_timeout(fb_provider, fb_model)
+
             if fb_api_mode == "anthropic_messages":
                 # Build native Anthropic client instead of using OpenAI client
                 from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token, _is_oauth_token
@@ -6418,8 +6216,10 @@ class AIAgent:
                 self.api_key = effective_key
                 self._anthropic_api_key = effective_key
                 self._anthropic_base_url = fb_base_url
-                self._anthropic_client = build_anthropic_client(effective_key, self._anthropic_base_url)
-                self._is_anthropic_oauth = _is_oauth_token(effective_key)
+                self._anthropic_client = build_anthropic_client(
+                    effective_key, self._anthropic_base_url, timeout=_fb_timeout,
+                )
+                self._is_anthropic_oauth = _is_oauth_token(effective_key) if fb_provider == "anthropic" else False
                 self.client = None
                 self._client_kwargs = {}
             else:
@@ -6442,12 +6242,21 @@ class AIAgent:
                     "base_url": fb_base_url,
                     **({"default_headers": dict(fb_headers)} if fb_headers else {}),
                 }
+                if _fb_timeout is not None:
+                    self._client_kwargs["timeout"] = _fb_timeout
+                    # Rebuild the shared OpenAI client so the configured
+                    # timeout takes effect on the very next fallback request,
+                    # not only after a later credential-rotation rebuild.
+                    self._replace_primary_openai_client(reason="fallback_timeout_apply")
 
             # Re-evaluate prompt caching for the new provider/model
-            is_native_anthropic = fb_api_mode == "anthropic_messages" and fb_provider == "anthropic"
-            self._use_prompt_caching = (
-                ("openrouter" in fb_base_url.lower() and "claude" in fb_model.lower())
-                or is_native_anthropic
+            self._use_prompt_caching, self._use_native_cache_layout = (
+                self._anthropic_prompt_cache_policy(
+                    provider=fb_provider,
+                    base_url=fb_base_url,
+                    api_mode=fb_api_mode,
+                    model=fb_model,
+                )
             )
 
             # Update context compressor limits for the fallback model.
@@ -6507,6 +6316,12 @@ class AIAgent:
             self.api_key = rt["api_key"]
             self._client_kwargs = dict(rt["client_kwargs"])
             self._use_prompt_caching = rt["use_prompt_caching"]
+            # Default to native layout when the restored snapshot predates the
+            # native-vs-proxy split (older sessions saved before this PR).
+            self._use_native_cache_layout = rt.get(
+                "use_native_cache_layout",
+                self.api_mode == "anthropic_messages" and self.provider == "anthropic",
+            )
 
             # ── Rebuild client for the primary provider ──
             if self.api_mode == "anthropic_messages":
@@ -6515,6 +6330,7 @@ class AIAgent:
                 self._anthropic_base_url = rt["anthropic_base_url"]
                 self._anthropic_client = build_anthropic_client(
                     rt["anthropic_api_key"], rt["anthropic_base_url"],
+                    timeout=get_provider_request_timeout(self.provider, self.model),
                 )
                 self._is_anthropic_oauth = rt["is_anthropic_oauth"]
                 self.client = None
@@ -6611,6 +6427,7 @@ class AIAgent:
                 self._anthropic_base_url = rt["anthropic_base_url"]
                 self._anthropic_client = build_anthropic_client(
                     rt["anthropic_api_key"], rt["anthropic_base_url"],
+                    timeout=get_provider_request_timeout(self.provider, self.model),
                 )
                 self._is_anthropic_oauth = rt["is_anthropic_oauth"]
                 self.client = None
@@ -6760,6 +6577,42 @@ class AIAgent:
             return suffix
         return "[A multimodal message was converted to text for Anthropic compatibility.]"
 
+    def _get_anthropic_transport(self):
+        """Return the cached AnthropicTransport instance (lazy singleton)."""
+        t = getattr(self, "_anthropic_transport", None)
+        if t is None:
+            from agent.transports import get_transport
+            t = get_transport("anthropic_messages")
+            self._anthropic_transport = t
+        return t
+
+    def _get_codex_transport(self):
+        """Return the cached ResponsesApiTransport instance (lazy singleton)."""
+        t = getattr(self, "_codex_transport", None)
+        if t is None:
+            from agent.transports import get_transport
+            t = get_transport("codex_responses")
+            self._codex_transport = t
+        return t
+
+    def _get_chat_completions_transport(self):
+        """Return the cached ChatCompletionsTransport instance (lazy singleton)."""
+        t = getattr(self, "_chat_completions_transport", None)
+        if t is None:
+            from agent.transports import get_transport
+            t = get_transport("chat_completions")
+            self._chat_completions_transport = t
+        return t
+
+    def _get_bedrock_transport(self):
+        """Return the cached BedrockTransport instance (lazy singleton)."""
+        t = getattr(self, "_bedrock_transport", None)
+        if t is None:
+            from agent.transports import get_transport
+            t = get_transport("bedrock_converse")
+            self._bedrock_transport = t
+        return t
+
     def _prepare_anthropic_messages_for_api(self, api_messages: list) -> list:
         if not any(
             isinstance(msg, dict) and self._content_has_image_parts(msg.get("content"))
@@ -6782,15 +6635,35 @@ class AIAgent:
         Alibaba/DashScope keeps dots (e.g. qwen3.5-plus).
         MiniMax keeps dots (e.g. MiniMax-M2.7).
         OpenCode Go/Zen keeps dots for non-Claude models (e.g. minimax-m2.5-free).
-        ZAI/Zhipu keeps dots (e.g. glm-4.7, glm-5.1)."""
-        if (getattr(self, "provider", "") or "").lower() in {"alibaba", "minimax", "minimax-cn", "opencode-go", "opencode-zen", "zai"}:
+        ZAI/Zhipu keeps dots (e.g. glm-4.7, glm-5.1).
+        AWS Bedrock uses dotted inference-profile IDs
+        (e.g. ``global.anthropic.claude-opus-4-7``,
+        ``us.anthropic.claude-sonnet-4-5-20250929-v1:0``) and rejects
+        the hyphenated form with
+        ``HTTP 400 The provided model identifier is invalid``.
+        Regression for #11976; mirrors the opencode-go fix for #5211
+        (commit f77be22c), which extended this same allowlist."""
+        if (getattr(self, "provider", "") or "").lower() in {
+            "alibaba", "minimax", "minimax-cn",
+            "opencode-go", "opencode-zen",
+            "zai", "bedrock",
+        }:
             return True
         base = (getattr(self, "base_url", "") or "").lower()
-        return "dashscope" in base or "aliyuncs" in base or "minimax" in base or "opencode.ai/zen/" in base or "bigmodel.cn" in base
+        return (
+            "dashscope" in base
+            or "aliyuncs" in base
+            or "minimax" in base
+            or "opencode.ai/zen/" in base
+            or "bigmodel.cn" in base
+            # AWS Bedrock runtime endpoints — defense-in-depth when
+            # ``provider`` is unset but ``base_url`` still names Bedrock.
+            or "bedrock-runtime." in base
+        )
 
     def _is_qwen_portal(self) -> bool:
         """Return True when the base URL targets Qwen Portal."""
-        return "portal.qwen.ai" in self._base_url_lower
+        return base_url_host_matches(self._base_url_lower, "portal.qwen.ai")
 
     def _qwen_prepare_chat_messages(self, api_messages: list) -> list:
         prepared = copy.deepcopy(api_messages)
@@ -6856,20 +6729,14 @@ class AIAgent:
     def _build_api_kwargs(self, api_messages: list) -> dict:
         """Build the keyword arguments dict for the active API mode."""
         if self.api_mode == "anthropic_messages":
-            from agent.anthropic_adapter import build_anthropic_kwargs
+            _transport = self._get_anthropic_transport()
             anthropic_messages = self._prepare_anthropic_messages_for_api(api_messages)
-            # Pass context_length (total input+output window) so the adapter can
-            # clamp max_tokens (output cap) when the user configured a smaller
-            # context window than the model's native output limit.
             ctx_len = getattr(self, "context_compressor", None)
             ctx_len = ctx_len.context_length if ctx_len else None
-            # _ephemeral_max_output_tokens is set for one call when the API
-            # returns "max_tokens too large given prompt" — it caps output to
-            # the available window space without touching context_length.
             ephemeral_out = getattr(self, "_ephemeral_max_output_tokens", None)
             if ephemeral_out is not None:
                 self._ephemeral_max_output_tokens = None  # consume immediately
-            return build_anthropic_kwargs(
+            return _transport.build_kwargs(
                 model=self.model,
                 messages=anthropic_messages,
                 tools=self.tools,
@@ -6885,305 +6752,144 @@ class AIAgent:
         # AWS Bedrock native Converse API — bypasses the OpenAI client entirely.
         # The adapter handles message/tool conversion and boto3 calls directly.
         if self.api_mode == "bedrock_converse":
-            from agent.bedrock_adapter import build_converse_kwargs
+            _bt = self._get_bedrock_transport()
             region = getattr(self, "_bedrock_region", None) or "us-east-1"
             guardrail = getattr(self, "_bedrock_guardrail_config", None)
-            return {
-                "__bedrock_converse__": True,
-                "__bedrock_region__": region,
-                **build_converse_kwargs(
-                    model=self.model,
-                    messages=api_messages,
-                    tools=self.tools,
-                    max_tokens=self.max_tokens or 4096,
-                    temperature=None,  # Let the model use its default
-                    guardrail_config=guardrail,
-                ),
-            }
+            return _bt.build_kwargs(
+                model=self.model,
+                messages=api_messages,
+                tools=self.tools,
+                max_tokens=self.max_tokens or 4096,
+                region=region,
+                guardrail_config=guardrail,
+            )
 
         if self.api_mode == "codex_responses":
-            instructions = ""
-            payload_messages = api_messages
-            if api_messages and api_messages[0].get("role") == "system":
-                instructions = str(api_messages[0].get("content") or "").strip()
-                payload_messages = api_messages[1:]
-            if not instructions:
-                instructions = DEFAULT_AGENT_IDENTITY
-
+            _ct = self._get_codex_transport()
             is_github_responses = (
-                "models.github.ai" in self.base_url.lower()
-                or "api.githubcopilot.com" in self.base_url.lower()
+                base_url_host_matches(self.base_url, "models.github.ai")
+                or base_url_host_matches(self.base_url, "api.githubcopilot.com")
             )
             is_codex_backend = (
                 self.provider == "openai-codex"
-                or "chatgpt.com/backend-api/codex" in self.base_url.lower()
+                or (
+                    self._base_url_hostname == "chatgpt.com"
+                    and "/backend-api/codex" in self._base_url_lower
+                )
+            )
+            is_xai_responses = self.provider == "xai" or self._base_url_hostname == "api.x.ai"
+            return _ct.build_kwargs(
+                model=self.model,
+                messages=api_messages,
+                tools=self.tools,
+                reasoning_config=self.reasoning_config,
+                session_id=getattr(self, "session_id", None),
+                max_tokens=self.max_tokens,
+                request_overrides=self.request_overrides,
+                is_github_responses=is_github_responses,
+                is_codex_backend=is_codex_backend,
+                is_xai_responses=is_xai_responses,
+                github_reasoning_extra=self._github_models_reasoning_extra_body() if is_github_responses else None,
             )
 
-            # Resolve reasoning effort: config > default (medium)
-            reasoning_effort = "medium"
-            reasoning_enabled = True
-            if self.reasoning_config and isinstance(self.reasoning_config, dict):
-                if self.reasoning_config.get("enabled") is False:
-                    reasoning_enabled = False
-                elif self.reasoning_config.get("effort"):
-                    reasoning_effort = self.reasoning_config["effort"]
+        # ── chat_completions (default) ─────────────────────────────────────
+        _ct = self._get_chat_completions_transport()
 
-            # Clamp effort levels not supported by the Responses API model.
-            # GPT-5.4 supports none/low/medium/high/xhigh but not "minimal".
-            # "minimal" is valid on OpenRouter and GPT-5 but fails on 5.2/5.4.
-            _effort_clamp = {"minimal": "low"}
-            reasoning_effort = _effort_clamp.get(reasoning_effort, reasoning_effort)
+        # Provider detection flags
+        _is_qwen = self._is_qwen_portal()
+        _is_or = self._is_openrouter_url()
+        _is_gh = (
+            base_url_host_matches(self._base_url_lower, "models.github.ai")
+            or base_url_host_matches(self._base_url_lower, "api.githubcopilot.com")
+        )
+        _is_nous = "nousresearch" in self._base_url_lower
+        _is_nvidia = "integrate.api.nvidia.com" in self._base_url_lower
+        _is_kimi = (
+            base_url_host_matches(self.base_url, "api.kimi.com")
+            or base_url_host_matches(self.base_url, "moonshot.ai")
+            or base_url_host_matches(self.base_url, "moonshot.cn")
+        )
 
-            kwargs = {
-                "model": self.model,
-                "instructions": instructions,
-                "input": self._chat_messages_to_responses_input(payload_messages),
-                "tools": self._responses_tools(),
-                "tool_choice": "auto",
-                "parallel_tool_calls": True,
-                "store": False,
-            }
-
-            if not is_github_responses:
-                kwargs["prompt_cache_key"] = self.session_id
-
-            is_xai_responses = self.provider == "xai" or "api.x.ai" in (self.base_url or "").lower()
-
-            if reasoning_enabled and is_xai_responses:
-                # xAI reasons automatically — no effort param, just include encrypted content
-                kwargs["include"] = ["reasoning.encrypted_content"]
-            elif reasoning_enabled:
-                if is_github_responses:
-                    # Copilot's Responses route advertises reasoning-effort support,
-                    # but not OpenAI-specific prompt cache or encrypted reasoning
-                    # fields. Keep the payload to the documented subset.
-                    github_reasoning = self._github_models_reasoning_extra_body()
-                    if github_reasoning is not None:
-                        kwargs["reasoning"] = github_reasoning
-                else:
-                    kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"}
-                    kwargs["include"] = ["reasoning.encrypted_content"]
-            elif not is_github_responses and not is_xai_responses:
-                kwargs["include"] = []
-
-            if self.request_overrides:
-                kwargs.update(self.request_overrides)
-
-            if self.max_tokens is not None and not is_codex_backend:
-                kwargs["max_output_tokens"] = self.max_tokens
-
-            if is_xai_responses and getattr(self, "session_id", None):
-                kwargs["extra_headers"] = {"x-grok-conv-id": self.session_id}
-
-            return kwargs
-
-        sanitized_messages = api_messages
-        needs_sanitization = False
-        for msg in api_messages:
-            if not isinstance(msg, dict):
-                continue
-            if "codex_reasoning_items" in msg:
-                needs_sanitization = True
-                break
-
-            tool_calls = msg.get("tool_calls")
-            if isinstance(tool_calls, list):
-                for tool_call in tool_calls:
-                    if not isinstance(tool_call, dict):
-                        continue
-                    if "call_id" in tool_call or "response_item_id" in tool_call:
-                        needs_sanitization = True
-                        break
-                if needs_sanitization:
-                    break
-
-        if needs_sanitization:
-            sanitized_messages = copy.deepcopy(api_messages)
-            for msg in sanitized_messages:
-                if not isinstance(msg, dict):
-                    continue
-
-                # Codex-only replay state must not leak into strict chat-completions APIs.
-                msg.pop("codex_reasoning_items", None)
-
-                tool_calls = msg.get("tool_calls")
-                if isinstance(tool_calls, list):
-                    for tool_call in tool_calls:
-                        if isinstance(tool_call, dict):
-                            tool_call.pop("call_id", None)
-                            tool_call.pop("response_item_id", None)
-
-        # Qwen portal: normalize content to list-of-dicts, inject cache_control.
-        # Must run AFTER codex sanitization so we transform the final messages.
-        # If sanitization already deepcopied, reuse that copy (in-place).
-        if self._is_qwen_portal():
-            if sanitized_messages is api_messages:
-                # No sanitization was done — we need our own copy.
-                sanitized_messages = self._qwen_prepare_chat_messages(sanitized_messages)
-            else:
-                # Already a deepcopy — transform in place to avoid a second deepcopy.
-                self._qwen_prepare_chat_messages_inplace(sanitized_messages)
-
-        # GPT-5 and Codex models respond better to 'developer' than 'system'
-        # for instruction-following.  Swap the role at the API boundary so
-        # internal message representation stays uniform ("system").
-        _model_lower = (self.model or "").lower()
-        if (
-            sanitized_messages
-            and sanitized_messages[0].get("role") == "system"
-            and any(p in _model_lower for p in DEVELOPER_ROLE_MODELS)
-        ):
-            # Shallow-copy the list + first message only — rest stays shared.
-            sanitized_messages = list(sanitized_messages)
-            sanitized_messages[0] = {**sanitized_messages[0], "role": "developer"}
-
-        provider_preferences = {}
-        if self.providers_allowed:
-            provider_preferences["only"] = self.providers_allowed
-        if self.providers_ignored:
-            provider_preferences["ignore"] = self.providers_ignored
-        if self.providers_order:
-            provider_preferences["order"] = self.providers_order
-        if self.provider_sort:
-            provider_preferences["sort"] = self.provider_sort
-        if self.provider_require_parameters:
-            provider_preferences["require_parameters"] = True
-        if self.provider_data_collection:
-            provider_preferences["data_collection"] = self.provider_data_collection
-
-        api_kwargs = {
-            "model": self.model,
-            "messages": sanitized_messages,
-            "timeout": float(os.getenv("HERMES_API_TIMEOUT", 1800.0)),
-        }
+        # Temperature: _fixed_temperature_for_model may return OMIT_TEMPERATURE
+        # sentinel (temperature omitted entirely), a numeric override, or None.
         try:
-            from agent.auxiliary_client import _fixed_temperature_for_model
+            from agent.auxiliary_client import _fixed_temperature_for_model, OMIT_TEMPERATURE
+            _ft = _fixed_temperature_for_model(self.model, self.base_url)
+            _omit_temp = _ft is OMIT_TEMPERATURE
+            _fixed_temp = _ft if not _omit_temp else None
         except Exception:
-            _fixed_temperature_for_model = None
-        if _fixed_temperature_for_model is not None:
-            fixed_temperature = _fixed_temperature_for_model(self.model)
-            if fixed_temperature is not None:
-                api_kwargs["temperature"] = fixed_temperature
-        if self._is_qwen_portal():
-            api_kwargs["metadata"] = {
-                "sessionId": self.session_id or "hermes",
-                "promptId": str(uuid.uuid4()),
-            }
-        if self.tools:
-            api_kwargs["tools"] = self.tools
+            _omit_temp = False
+            _fixed_temp = None
 
-        # ── max_tokens for chat_completions ──────────────────────────────
-        # Priority: ephemeral override (error recovery / length-continuation
-        # boost) > user-configured max_tokens > provider-specific defaults.
-        _ephemeral_out = getattr(self, "_ephemeral_max_output_tokens", None)
-        if _ephemeral_out is not None:
-            self._ephemeral_max_output_tokens = None  # consume immediately
-            api_kwargs.update(self._max_tokens_param(_ephemeral_out))
-        elif self.max_tokens is not None:
-            api_kwargs.update(self._max_tokens_param(self.max_tokens))
-        elif "integrate.api.nvidia.com" in self._base_url_lower:
-            # NVIDIA NIM defaults to a very low max_tokens when omitted,
-            # causing models like GLM-4.7 to truncate immediately (thinking
-            # tokens alone exhaust the budget).  16384 provides adequate room.
-            api_kwargs.update(self._max_tokens_param(16384))
-        elif self._is_qwen_portal():
-            # Qwen Portal defaults to a very low max_tokens when omitted.
-            # Reasoning models (qwen3-coder-plus) exhaust that budget on
-            # thinking tokens alone, causing the portal to return
-            # finish_reason="stop" with truncated output — the agent sees
-            # this as an intentional stop and exits the loop.  Send 65536
-            # (the documented max output for qwen3-coder models) so the
-            # model has adequate output budget for tool calls.
-            api_kwargs.update(self._max_tokens_param(65536))
-        elif (self._is_openrouter_url() or "nousresearch" in self._base_url_lower) and "claude" in (self.model or "").lower():
-            # OpenRouter and Nous Portal translate requests to Anthropic's
-            # Messages API, which requires max_tokens as a mandatory field.
-            # When we omit it, the proxy picks a default that can be too
-            # low — the model spends its output budget on thinking and has
-            # almost nothing left for the actual response (especially large
-            # tool calls like write_file).  Sending the model's real output
-            # limit ensures full capacity.
+        # Provider preferences (OpenRouter-specific)
+        _prefs: Dict[str, Any] = {}
+        if self.providers_allowed:
+            _prefs["only"] = self.providers_allowed
+        if self.providers_ignored:
+            _prefs["ignore"] = self.providers_ignored
+        if self.providers_order:
+            _prefs["order"] = self.providers_order
+        if self.provider_sort:
+            _prefs["sort"] = self.provider_sort
+        if self.provider_require_parameters:
+            _prefs["require_parameters"] = True
+        if self.provider_data_collection:
+            _prefs["data_collection"] = self.provider_data_collection
+
+        # Anthropic max output for Claude on OpenRouter/Nous
+        _ant_max = None
+        if (_is_or or _is_nous) and "claude" in (self.model or "").lower():
             try:
                 from agent.anthropic_adapter import _get_anthropic_max_output
-                _model_output_limit = _get_anthropic_max_output(self.model)
-                api_kwargs["max_tokens"] = _model_output_limit
+                _ant_max = _get_anthropic_max_output(self.model)
             except Exception:
                 pass  # fail open — let the proxy pick its default
 
-        extra_body = {}
+        # Qwen session metadata precomputed here (promptId is per-call random)
+        _qwen_meta = None
+        if _is_qwen:
+            _qwen_meta = {
+                "sessionId": self.session_id or "hermes",
+                "promptId": str(uuid.uuid4()),
+            }
 
-        _is_openrouter = self._is_openrouter_url()
-        _is_github_models = (
-            "models.github.ai" in self._base_url_lower
-            or "api.githubcopilot.com" in self._base_url_lower
+        # Ephemeral max output override — consume immediately so the next
+        # turn doesn't inherit it.
+        _ephemeral_out = getattr(self, "_ephemeral_max_output_tokens", None)
+        if _ephemeral_out is not None:
+            self._ephemeral_max_output_tokens = None
+
+        return _ct.build_kwargs(
+            model=self.model,
+            messages=api_messages,
+            tools=self.tools,
+            timeout=self._resolved_api_call_timeout(),
+            max_tokens=self.max_tokens,
+            ephemeral_max_output_tokens=_ephemeral_out,
+            max_tokens_param_fn=self._max_tokens_param,
+            reasoning_config=self.reasoning_config,
+            request_overrides=self.request_overrides,
+            session_id=getattr(self, "session_id", None),
+            model_lower=(self.model or "").lower(),
+            is_openrouter=_is_or,
+            is_nous=_is_nous,
+            is_qwen_portal=_is_qwen,
+            is_github_models=_is_gh,
+            is_nvidia_nim=_is_nvidia,
+            is_kimi=_is_kimi,
+            is_custom_provider=self.provider == "custom",
+            ollama_num_ctx=self._ollama_num_ctx,
+            provider_preferences=_prefs or None,
+            qwen_prepare_fn=self._qwen_prepare_chat_messages if _is_qwen else None,
+            qwen_prepare_inplace_fn=self._qwen_prepare_chat_messages_inplace if _is_qwen else None,
+            qwen_session_metadata=_qwen_meta,
+            fixed_temperature=_fixed_temp,
+            omit_temperature=_omit_temp,
+            supports_reasoning=self._supports_reasoning_extra_body(),
+            github_reasoning_extra=self._github_models_reasoning_extra_body() if _is_gh else None,
+            anthropic_max_output=_ant_max,
         )
 
-        # Provider preferences (only, ignore, order, sort) are OpenRouter-
-        # specific.  Only send to OpenRouter-compatible endpoints.
-        # TODO: Nous Portal will add transparent proxy support — re-enable
-        # for _is_nous when their backend is updated.
-        if provider_preferences and _is_openrouter:
-            extra_body["provider"] = provider_preferences
-        _is_nous = "nousresearch" in self._base_url_lower
-
-        if self._supports_reasoning_extra_body():
-            if _is_github_models:
-                github_reasoning = self._github_models_reasoning_extra_body()
-                if github_reasoning is not None:
-                    extra_body["reasoning"] = github_reasoning
-            else:
-                if self.reasoning_config is not None:
-                    rc = dict(self.reasoning_config)
-                    # Nous Portal requires reasoning enabled — don't send
-                    # enabled=false to it (would cause 400).
-                    if _is_nous and rc.get("enabled") is False:
-                        pass  # omit reasoning entirely for Nous when disabled
-                    else:
-                        extra_body["reasoning"] = rc
-                else:
-                    extra_body["reasoning"] = {
-                        "enabled": True,
-                        "effort": "medium"
-                    }
-
-        # Nous Portal product attribution
-        if _is_nous:
-            extra_body["tags"] = ["product=hermes-agent"]
-
-        # Ollama num_ctx: override the 2048 default so the model actually
-        # uses the context window it was trained for.  Passed via the OpenAI
-        # SDK's extra_body → options.num_ctx, which Ollama's OpenAI-compat
-        # endpoint forwards to the runner as --ctx-size.
-        if self._ollama_num_ctx:
-            options = extra_body.get("options", {})
-            options["num_ctx"] = self._ollama_num_ctx
-            extra_body["options"] = options
-
-        # Ollama / custom provider: pass think=false when reasoning is disabled.
-        # Ollama does not recognise the OpenRouter-style `reasoning` extra_body
-        # field, so we use its native `think` parameter instead.
-        # This prevents thinking-capable models (Qwen3, etc.) from generating
-        # <think> blocks and producing empty-response errors when the user has
-        # set reasoning_effort: none.
-        if self.provider == "custom" and self.reasoning_config and isinstance(self.reasoning_config, dict):
-            _effort = (self.reasoning_config.get("effort") or "").strip().lower()
-            _enabled = self.reasoning_config.get("enabled", True)
-            if _effort == "none" or _enabled is False:
-                extra_body["think"] = False
-
-        if self._is_qwen_portal():
-            extra_body["vl_high_resolution_images"] = True
-
-        if extra_body:
-            api_kwargs["extra_body"] = extra_body
-
-        # Priority Processing / generic request overrides (e.g. service_tier).
-        # Applied last so overrides win over any defaults set above.
-        if self.request_overrides:
-            api_kwargs.update(self.request_overrides)
-
-        return api_kwargs
-
     def _supports_reasoning_extra_body(self) -> bool:
         """Return True when reasoning extra_body is safe to send for this route/model.
 
@@ -7191,11 +6897,14 @@ class AIAgent:
         Some providers/routes reject `reasoning` with 400s, so gate it to
         known reasoning-capable model families and direct Nous Portal.
         """
-        if "nousresearch" in self._base_url_lower:
+        if base_url_host_matches(self._base_url_lower, "nousresearch.com"):
             return True
-        if "ai-gateway.vercel.sh" in self._base_url_lower:
+        if base_url_host_matches(self._base_url_lower, "ai-gateway.vercel.sh"):
             return True
-        if "models.github.ai" in self._base_url_lower or "api.githubcopilot.com" in self._base_url_lower:
+        if (
+            base_url_host_matches(self._base_url_lower, "models.github.ai")
+            or base_url_host_matches(self._base_url_lower, "api.githubcopilot.com")
+        ):
             try:
                 from hermes_cli.models import github_model_reasoning_efforts
 
@@ -7315,6 +7024,11 @@ class AIAgent:
             "finish_reason": finish_reason,
         }
 
+        if hasattr(assistant_message, "reasoning_content"):
+            raw_reasoning_content = getattr(assistant_message, "reasoning_content", None)
+            if raw_reasoning_content is not None:
+                msg["reasoning_content"] = _sanitize_surrogates(raw_reasoning_content)
+
         if hasattr(assistant_message, 'reasoning_details') and assistant_message.reasoning_details:
             # Pass reasoning_details back unmodified so providers (OpenRouter,
             # Anthropic, OpenAI) can maintain reasoning continuity across turns.
@@ -7389,6 +7103,30 @@ class AIAgent:
 
         return msg
 
+    def _copy_reasoning_content_for_api(self, source_msg: dict, api_msg: dict) -> None:
+        """Copy provider-facing reasoning fields onto an API replay message."""
+        if source_msg.get("role") != "assistant":
+            return
+
+        explicit_reasoning = source_msg.get("reasoning_content")
+        if isinstance(explicit_reasoning, str):
+            api_msg["reasoning_content"] = explicit_reasoning
+            return
+
+        normalized_reasoning = source_msg.get("reasoning")
+        if isinstance(normalized_reasoning, str) and normalized_reasoning:
+            api_msg["reasoning_content"] = normalized_reasoning
+            return
+
+        kimi_requires_reasoning = (
+            self.provider in {"kimi-coding", "kimi-coding-cn"}
+            or base_url_host_matches(self.base_url, "api.kimi.com")
+            or base_url_host_matches(self.base_url, "moonshot.ai")
+            or base_url_host_matches(self.base_url, "moonshot.cn")
+        )
+        if kimi_requires_reasoning and source_msg.get("tool_calls"):
+            api_msg["reasoning_content"] = ""
+
     @staticmethod
     def _sanitize_tool_calls_for_strict_api(api_msg: dict) -> dict:
         """Strip Codex Responses API fields from tool_calls for strict providers.
@@ -7472,10 +7210,7 @@ class AIAgent:
             api_messages = []
             for msg in messages:
                 api_msg = msg.copy()
-                if msg.get("role") == "assistant":
-                    reasoning = msg.get("reasoning")
-                    if reasoning:
-                        api_msg["reasoning_content"] = reasoning
+                self._copy_reasoning_content_for_api(msg, api_msg)
                 api_msg.pop("reasoning", None)
                 api_msg.pop("finish_reason", None)
                 api_msg.pop("_flush_sentinel", None)
@@ -7503,12 +7238,19 @@ class AIAgent:
             from agent.auxiliary_client import (
                 call_llm as _call_llm,
                 _fixed_temperature_for_model,
+                OMIT_TEMPERATURE,
             )
             _aux_available = True
-            # Use the fixed-temperature override (e.g. kimi-for-coding → 0.6) if
-            # the model has a strict contract; otherwise the historical 0.3 default.
-            _flush_temperature = _fixed_temperature_for_model(self.model)
-            if _flush_temperature is None:
+            # Kimi models manage temperature server-side — omit it entirely.
+            # Other models with a fixed contract get that value; everyone else
+            # gets the historical 0.3 default.
+            _fixed_temp = _fixed_temperature_for_model(self.model, self.base_url)
+            _omit_temperature = _fixed_temp is OMIT_TEMPERATURE
+            if _omit_temperature:
+                _flush_temperature = None
+            elif _fixed_temp is not None:
+                _flush_temperature = _fixed_temp
+            else:
                 _flush_temperature = 0.3
             try:
                 response = _call_llm(
@@ -7526,15 +7268,18 @@ class AIAgent:
             if not _aux_available and self.api_mode == "codex_responses":
                 # No auxiliary client -- use the Codex Responses path directly
                 codex_kwargs = self._build_api_kwargs(api_messages)
-                codex_kwargs["tools"] = self._responses_tools([memory_tool_def])
-                codex_kwargs["temperature"] = _flush_temperature
+                codex_kwargs["tools"] = self._get_codex_transport().convert_tools([memory_tool_def])
+                if _flush_temperature is not None:
+                    codex_kwargs["temperature"] = _flush_temperature
+                else:
+                    codex_kwargs.pop("temperature", None)
                 if "max_output_tokens" in codex_kwargs:
                     codex_kwargs["max_output_tokens"] = 5120
                 response = self._run_codex_stream(codex_kwargs)
             elif not _aux_available and self.api_mode == "anthropic_messages":
-                # Native Anthropic — use the Anthropic client directly
-                from agent.anthropic_adapter import build_anthropic_kwargs as _build_ant_kwargs
-                ant_kwargs = _build_ant_kwargs(
+                # Native Anthropic — use the transport for kwargs
+                _tflush = self._get_anthropic_transport()
+                ant_kwargs = _tflush.build_kwargs(
                     model=self.model, messages=api_messages,
                     tools=[memory_tool_def], max_tokens=5120,
                     reasoning_config=None,
@@ -7546,9 +7291,10 @@ class AIAgent:
                     "model": self.model,
                     "messages": api_messages,
                     "tools": [memory_tool_def],
-                    "temperature": _flush_temperature,
                     **self._max_tokens_param(5120),
                 }
+                if _flush_temperature is not None:
+                    api_kwargs["temperature"] = _flush_temperature
                 from agent.auxiliary_client import _get_task_timeout
                 response = self._ensure_primary_openai_client(reason="flush_memories").chat.completions.create(
                     **api_kwargs, timeout=_get_task_timeout("flush_memories")
@@ -7557,14 +7303,25 @@ class AIAgent:
             # Extract tool calls from the response, handling all API formats
             tool_calls = []
             if self.api_mode == "codex_responses" and not _aux_available:
-                assistant_msg, _ = self._normalize_codex_response(response)
-                if assistant_msg and assistant_msg.tool_calls:
-                    tool_calls = assistant_msg.tool_calls
+                _ct_flush = self._get_codex_transport()
+                _cnr_flush = _ct_flush.normalize_response(response)
+                if _cnr_flush and _cnr_flush.tool_calls:
+                    tool_calls = [
+                        SimpleNamespace(
+                            id=tc.id, type="function",
+                            function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
+                        ) for tc in _cnr_flush.tool_calls
+                    ]
             elif self.api_mode == "anthropic_messages" and not _aux_available:
-                from agent.anthropic_adapter import normalize_anthropic_response as _nar_flush
-                _flush_msg, _ = _nar_flush(response, strip_tool_prefix=self._is_anthropic_oauth)
-                if _flush_msg and _flush_msg.tool_calls:
-                    tool_calls = _flush_msg.tool_calls
+                _tfn = self._get_anthropic_transport()
+                _flush_nr = _tfn.normalize_response(response, strip_tool_prefix=self._is_anthropic_oauth)
+                if _flush_nr and _flush_nr.tool_calls:
+                    tool_calls = [
+                        SimpleNamespace(
+                            id=tc.id, type="function",
+                            function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
+                        ) for tc in _flush_nr.tool_calls
+                    ]
             elif hasattr(response, "choices") and response.choices:
                 assistant_message = response.choices[0].message
                 if assistant_message.tool_calls:
@@ -7724,8 +7481,27 @@ class AIAgent:
         finally:
             self._executing_tools = False
 
+    def _dispatch_delegate_task(self, function_args: dict) -> str:
+        """Single call site for delegate_task dispatch.
+
+        New DELEGATE_TASK_SCHEMA fields only need to be added here to reach all
+        invocation paths (concurrent, sequential, inline).
+        """
+        from tools.delegate_tool import delegate_task as _delegate_task
+        return _delegate_task(
+            goal=function_args.get("goal"),
+            context=function_args.get("context"),
+            toolsets=function_args.get("toolsets"),
+            tasks=function_args.get("tasks"),
+            max_iterations=function_args.get("max_iterations"),
+            acp_command=function_args.get("acp_command"),
+            acp_args=function_args.get("acp_args"),
+            role=function_args.get("role"),
+            parent_agent=self,
+        )
+
     def _invoke_tool(self, function_name: str, function_args: dict, effective_task_id: str,
-                     tool_call_id: Optional[str] = None) -> str:
+                     tool_call_id: Optional[str] = None, messages: list = None) -> str:
         """Invoke a single tool and return the result string. No display logic.
 
         Handles both agent-level tools (todo, memory, etc.) and registry-dispatched
@@ -7793,15 +7569,7 @@ class AIAgent:
                 callback=self.clarify_callback,
             )
         elif function_name == "delegate_task":
-            from tools.delegate_tool import delegate_task as _delegate_task
-            return _delegate_task(
-                goal=function_args.get("goal"),
-                context=function_args.get("context"),
-                toolsets=function_args.get("toolsets"),
-                tasks=function_args.get("tasks"),
-                max_iterations=function_args.get("max_iterations"),
-                parent_agent=self,
-            )
+            return self._dispatch_delegate_task(function_args)
         else:
             return handle_function_call(
                 function_name, function_args, effective_task_id,
@@ -7949,8 +7717,7 @@ class AIAgent:
             # the tool returns True on the next poll.
             if self._interrupt_requested:
                 try:
-                    from tools.interrupt import set_interrupt as _sif
-                    _sif(True, _worker_tid)
+                    _set_interrupt(True, _worker_tid)
                 except Exception:
                     pass
             # Set the activity callback on THIS worker thread so
@@ -7964,7 +7731,7 @@ class AIAgent:
                 pass
             start = time.time()
             try:
-                result = self._invoke_tool(function_name, function_args, effective_task_id, tool_call.id)
+                result = self._invoke_tool(function_name, function_args, effective_task_id, tool_call.id, messages=messages)
             except Exception as tool_error:
                 result = f"Error executing tool '{function_name}': {tool_error}"
                 logger.error("_invoke_tool raised for %s: %s", function_name, tool_error, exc_info=True)
@@ -7981,8 +7748,7 @@ class AIAgent:
             with self._tool_worker_threads_lock:
                 self._tool_worker_threads.discard(_worker_tid)
             try:
-                from tools.interrupt import set_interrupt as _sif
-                _sif(False, _worker_tid)
+                _set_interrupt(False, _worker_tid)
             except Exception:
                 pass
 
@@ -8123,6 +7889,11 @@ class AIAgent:
             }
             messages.append(tool_msg)
 
+            # ── Per-tool /steer drain ───────────────────────────────────
+            # Same as the sequential path: drain between each collected
+            # result so the steer lands as early as possible.
+            self._apply_pending_steer_to_tool_results(messages, 1)
+
         # ── Per-turn aggregate budget enforcement ─────────────────────────
         num_tools = len(parsed_calls)
         if num_tools > 0:
@@ -8312,7 +8083,6 @@ class AIAgent:
                 if self._should_emit_quiet_tool_messages():
                     self._vprint(f"  {_get_cute_tool_message_impl('clarify', function_args, tool_duration, result=function_result)}")
             elif function_name == "delegate_task":
-                from tools.delegate_tool import delegate_task as _delegate_task
                 tasks_arg = function_args.get("tasks")
                 if tasks_arg and isinstance(tasks_arg, list):
                     spinner_label = f"🔀 delegating {len(tasks_arg)} tasks"
@@ -8327,14 +8097,7 @@ class AIAgent:
                 self._delegate_spinner = spinner
                 _delegate_result = None
                 try:
-                    function_result = _delegate_task(
-                        goal=function_args.get("goal"),
-                        context=function_args.get("context"),
-                        toolsets=function_args.get("toolsets"),
-                        tasks=tasks_arg,
-                        max_iterations=function_args.get("max_iterations"),
-                        parent_agent=self,
-                    )
+                    function_result = self._dispatch_delegate_task(function_args)
                     _delegate_result = function_result
                 finally:
                     self._delegate_spinner = None
@@ -8347,7 +8110,7 @@ class AIAgent:
             elif self._context_engine_tool_names and function_name in self._context_engine_tool_names:
                 # Context engine tools (lcm_grep, lcm_describe, lcm_expand, etc.)
                 spinner = None
-                if self.quiet_mode and not self.tool_progress_callback:
+                if self._should_emit_quiet_tool_messages():
                     face = random.choice(KawaiiSpinner.get_waiting_faces())
                     emoji = _get_tool_emoji(function_name)
                     preview = _build_tool_preview(function_name, function_args) or function_name
@@ -8365,7 +8128,7 @@ class AIAgent:
                     cute_msg = _get_cute_tool_message_impl(function_name, function_args, tool_duration, result=_ce_result)
                     if spinner:
                         spinner.stop(cute_msg)
-                    elif self.quiet_mode:
+                    elif self._should_emit_quiet_tool_messages():
                         self._vprint(f"  {cute_msg}")
             elif self._memory_manager and self._memory_manager.has_tool(function_name):
                 # Memory provider tools (hindsight_retain, honcho_search, etc.)
@@ -8486,6 +8249,12 @@ class AIAgent:
             }
             messages.append(tool_msg)
 
+            # ── Per-tool /steer drain ───────────────────────────────────
+            # Drain pending steer BETWEEN individual tool calls so the
+            # injection lands as soon as a tool finishes — not after the
+            # entire batch.  The model sees it on the next API iteration.
+            self._apply_pending_steer_to_tool_results(messages, 1)
+
             if not self.quiet_mode:
                 if self.verbose_logging:
                     print(f"  ✅ Tool {i} completed in {tool_duration:.2f}s")
@@ -8559,14 +8328,17 @@ class AIAgent:
 
             summary_extra_body = {}
             try:
-                from agent.auxiliary_client import _fixed_temperature_for_model
+                from agent.auxiliary_client import _fixed_temperature_for_model, OMIT_TEMPERATURE as _OMIT_TEMP
             except Exception:
                 _fixed_temperature_for_model = None
-            _summary_temperature = (
-                _fixed_temperature_for_model(self.model)
+                _OMIT_TEMP = None
+            _raw_summary_temp = (
+                _fixed_temperature_for_model(self.model, self.base_url)
                 if _fixed_temperature_for_model is not None
                 else None
             )
+            _omit_summary_temperature = _raw_summary_temp is _OMIT_TEMP
+            _summary_temperature = None if _omit_summary_temperature else _raw_summary_temp
             _is_nous = "nousresearch" in self._base_url_lower
             if self._supports_reasoning_extra_body():
                 if self.reasoning_config is not None:
@@ -8583,8 +8355,9 @@ class AIAgent:
                 codex_kwargs = self._build_api_kwargs(api_messages)
                 codex_kwargs.pop("tools", None)
                 summary_response = self._run_codex_stream(codex_kwargs)
-                assistant_message, _ = self._normalize_codex_response(summary_response)
-                final_response = (assistant_message.content or "").strip() if assistant_message else ""
+                _ct_sum = self._get_codex_transport()
+                _cnr_sum = _ct_sum.normalize_response(summary_response)
+                final_response = (_cnr_sum.content or "").strip()
             else:
                 summary_kwargs = {
                     "model": self.model,
@@ -8612,14 +8385,14 @@ class AIAgent:
                     summary_kwargs["extra_body"] = summary_extra_body
 
                 if self.api_mode == "anthropic_messages":
-                    from agent.anthropic_adapter import build_anthropic_kwargs as _bak, normalize_anthropic_response as _nar
-                    _ant_kw = _bak(model=self.model, messages=api_messages, tools=None,
+                    _tsum = self._get_anthropic_transport()
+                    _ant_kw = _tsum.build_kwargs(model=self.model, messages=api_messages, tools=None,
                                    max_tokens=self.max_tokens, reasoning_config=self.reasoning_config,
                                    is_oauth=self._is_anthropic_oauth,
                                    preserve_dots=self._anthropic_preserve_dots())
                     summary_response = self._anthropic_messages_create(_ant_kw)
-                    _msg, _ = _nar(summary_response, strip_tool_prefix=self._is_anthropic_oauth)
-                    final_response = (_msg.content or "").strip()
+                    _sum_nr = _tsum.normalize_response(summary_response, strip_tool_prefix=self._is_anthropic_oauth)
+                    final_response = (_sum_nr.content or "").strip()
                 else:
                     summary_response = self._ensure_primary_openai_client(reason="iteration_limit_summary").chat.completions.create(**summary_kwargs)
 
@@ -8641,17 +8414,18 @@ class AIAgent:
                     codex_kwargs = self._build_api_kwargs(api_messages)
                     codex_kwargs.pop("tools", None)
                     retry_response = self._run_codex_stream(codex_kwargs)
-                    retry_msg, _ = self._normalize_codex_response(retry_response)
-                    final_response = (retry_msg.content or "").strip() if retry_msg else ""
+                    _ct_retry = self._get_codex_transport()
+                    _cnr_retry = _ct_retry.normalize_response(retry_response)
+                    final_response = (_cnr_retry.content or "").strip()
                 elif self.api_mode == "anthropic_messages":
-                    from agent.anthropic_adapter import build_anthropic_kwargs as _bak2, normalize_anthropic_response as _nar2
-                    _ant_kw2 = _bak2(model=self.model, messages=api_messages, tools=None,
+                    _tretry = self._get_anthropic_transport()
+                    _ant_kw2 = _tretry.build_kwargs(model=self.model, messages=api_messages, tools=None,
                                     is_oauth=self._is_anthropic_oauth,
                                     max_tokens=self.max_tokens, reasoning_config=self.reasoning_config,
                                     preserve_dots=self._anthropic_preserve_dots())
                     retry_response = self._anthropic_messages_create(_ant_kw2)
-                    _retry_msg, _ = _nar2(retry_response, strip_tool_prefix=self._is_anthropic_oauth)
-                    final_response = (_retry_msg.content or "").strip()
+                    _retry_nr = _tretry.normalize_response(retry_response, strip_tool_prefix=self._is_anthropic_oauth)
+                    final_response = (_retry_nr.content or "").strip()
                 else:
                     summary_kwargs = {
                         "model": self.model,
@@ -8753,6 +8527,11 @@ class AIAgent:
         self._persist_user_message_override = persist_user_message
         # Generate unique task_id if not provided to isolate VMs between concurrent tasks
         effective_task_id = task_id or str(uuid.uuid4())
+        # Expose the active task_id so tools running mid-turn (e.g. delegate_task
+        # in delegate_tool.py) can identify this agent for the cross-agent file
+        # state registry.  Set BEFORE any tool dispatch so snapshots taken at
+        # child-launch time see the parent's real id, not None.
+        self._current_task_id = effective_task_id
         
         # Reset retry counters and iteration budget at the start of each turn
         # so subagent usage from a previous turn doesn't eat into the next one.
@@ -8793,7 +8572,8 @@ class AIAgent:
         self.iteration_budget = IterationBudget(self.max_iterations)
 
         # Log conversation turn start for debugging/observability
-        _msg_preview = (user_message[:80] + "...") if len(user_message) > 80 else user_message
+        _preview_text = _summarize_user_message_for_log(user_message)
+        _msg_preview = (_preview_text[:80] + "...") if len(_preview_text) > 80 else _preview_text
         _msg_preview = _msg_preview.replace("\n", " ")
         logger.info(
             "conversation turn: session=%s model=%s provider=%s platform=%s history=%d msg=%r",
@@ -8841,7 +8621,8 @@ class AIAgent:
         self._persist_user_message_idx = current_turn_user_idx
         
         if not self.quiet_mode:
-            self._safe_print(f"💬 Starting conversation: '{user_message[:60]}{'...' if len(user_message) > 60 else ''}'")
+            _print_preview = _summarize_user_message_for_log(user_message)
+            self._safe_print(f"💬 Starting conversation: '{_print_preview[:60]}{'...' if len(_print_preview) > 60 else ''}'")
         
         # ── System prompt (cached per session for prefix caching) ──
         # Built once on first call, reused for all subsequent calls.
@@ -9110,6 +8891,56 @@ class AIAgent:
                     and "skill_manage" in self.valid_tool_names):
                 self._iters_since_skill += 1
             
+            # ── Pre-API-call /steer drain ──────────────────────────────────
+            # If a /steer arrived during the previous API call (while the model
+            # was thinking), drain it now — before we build api_messages — so
+            # the model sees the steer text on THIS iteration.  Without this,
+            # steers sent during an API call only land after the NEXT tool batch,
+            # which may never come if the model returns a final response.
+            #
+            # We scan backwards for the last tool-role message in the messages
+            # list.  If found, the steer is appended there.  If not (first
+            # iteration, no tools yet), the steer stays pending for the next
+            # tool batch — injecting into a user message would break role
+            # alternation, and there's no tool output to piggyback on.
+            _pre_api_steer = self._drain_pending_steer()
+            if _pre_api_steer:
+                _injected = False
+                for _si in range(len(messages) - 1, -1, -1):
+                    _sm = messages[_si]
+                    if isinstance(_sm, dict) and _sm.get("role") == "tool":
+                        marker = f"\n\nUser guidance: {_pre_api_steer}"
+                        existing = _sm.get("content", "")
+                        if isinstance(existing, str):
+                            _sm["content"] = existing + marker
+                        else:
+                            # Multimodal content blocks — append text block
+                            try:
+                                blocks = list(existing) if existing else []
+                                blocks.append({"type": "text", "text": marker})
+                                _sm["content"] = blocks
+                            except Exception:
+                                pass
+                        _injected = True
+                        logger.debug(
+                            "Pre-API-call steer drain: injected into tool msg at index %d",
+                            _si,
+                        )
+                        break
+                if not _injected:
+                    # No tool message to inject into — put it back so
+                    # the post-tool-execution drain picks it up later.
+                    _lock = getattr(self, "_pending_steer_lock", None)
+                    if _lock is not None:
+                        with _lock:
+                            if self._pending_steer:
+                                self._pending_steer = self._pending_steer + "\n" + _pre_api_steer
+                            else:
+                                self._pending_steer = _pre_api_steer
+                    else:
+                        existing = getattr(self, "_pending_steer", None)
+                        self._pending_steer = (existing + "\n" + _pre_api_steer) if existing else _pre_api_steer
+
             # Prepare messages for API call
             # If we have an ephemeral system prompt, prepend it to the messages
             # Note: Reasoning is embedded in content via <think> tags for trajectory storage.
@@ -9139,11 +8970,7 @@ class AIAgent:
 
                 # For ALL assistant messages, pass reasoning back to the API
                 # This ensures multi-turn reasoning context is preserved
-                if msg.get("role") == "assistant":
-                    reasoning_text = msg.get("reasoning")
-                    if reasoning_text:
-                        # Add reasoning_content for API compatibility (Moonshot AI, Novita, OpenRouter)
-                        api_msg["reasoning_content"] = reasoning_text
+                self._copy_reasoning_content_for_api(msg, api_msg)
 
                 # Remove 'reasoning' field - it's for trajectory storage only
                 # We've copied it to 'reasoning_content' for the API above
@@ -9185,12 +9012,19 @@ class AIAgent:
                 for idx, pfm in enumerate(self.prefill_messages):
                     api_messages.insert(sys_offset + idx, pfm.copy())
 
-            # Apply Anthropic prompt caching for Claude models via OpenRouter.
-            # Auto-detected: if model name contains "claude" and base_url is OpenRouter,
-            # inject cache_control breakpoints (system + last 3 messages) to reduce
-            # input token costs by ~75% on multi-turn conversations.
+            # Apply Anthropic prompt caching for Claude models on native
+            # Anthropic, OpenRouter, and third-party Anthropic-compatible
+            # gateways. Auto-detected: if ``_use_prompt_caching`` is set,
+            # inject cache_control breakpoints (system + last 3 messages)
+            # to reduce input token costs by ~75% on multi-turn
+            # conversations. Layout is chosen per endpoint by
+            # ``_anthropic_prompt_cache_policy``.
             if self._use_prompt_caching:
-                api_messages = apply_anthropic_cache_control(api_messages, cache_ttl=self._cache_ttl, native_anthropic=(self.api_mode == 'anthropic_messages'))
+                api_messages = apply_anthropic_cache_control(
+                    api_messages,
+                    cache_ttl=self._cache_ttl,
+                    native_anthropic=self._use_native_cache_layout,
+                )
 
             # Safety net: strip orphaned tool results / add stubs for missing
             # results before sending to the API.  Runs unconditionally — not
@@ -9224,7 +9058,10 @@ class AIAgent:
                                 ),
                             }}
                         except Exception:
-                            pass
+                            tc["function"]["arguments"] = _repair_tool_call_arguments(
+                                tc["function"]["arguments"],
+                                tc["function"].get("name", "?"),
+                            )
                     new_tcs.append(tc)
                 am["tool_calls"] = new_tcs
 
@@ -9337,7 +9174,7 @@ class AIAgent:
                     if self._force_ascii_payload:
                         _sanitize_structure_non_ascii(api_kwargs)
                     if self.api_mode == "codex_responses":
-                        api_kwargs = self._preflight_codex_api_kwargs(api_kwargs, allow_stream=False)
+                        api_kwargs = self._get_codex_transport().preflight_kwargs(api_kwargs, allow_stream=False)
 
                     try:
                         from hermes_cli.plugins import invoke_hook as _invoke_hook
@@ -9425,51 +9262,53 @@ class AIAgent:
                     response_invalid = False
                     error_details = []
                     if self.api_mode == "codex_responses":
-                        output_items = getattr(response, "output", None) if response is not None else None
-                        if response is None:
-                            response_invalid = True
-                            error_details.append("response is None")
-                        elif not isinstance(output_items, list):
-                            response_invalid = True
-                            error_details.append("response.output is not a list")
-                        elif not output_items:
-                            # Stream backfill may have failed, but
-                            # _normalize_codex_response can still recover
-                            # from response.output_text. Only mark invalid
-                            # when that fallback is also absent.
-                            _out_text = getattr(response, "output_text", None)
-                            _out_text_stripped = _out_text.strip() if isinstance(_out_text, str) else ""
-                            if _out_text_stripped:
-                                logger.debug(
-                                    "Codex response.output is empty but output_text is present "
-                                    "(%d chars); deferring to normalization.",
-                                    len(_out_text_stripped),
-                                )
-                            else:
-                                _resp_status = getattr(response, "status", None)
-                                _resp_incomplete = getattr(response, "incomplete_details", None)
-                                logger.warning(
-                                    "Codex response.output is empty after stream backfill "
-                                    "(status=%s, incomplete_details=%s, model=%s). %s",
-                                    _resp_status, _resp_incomplete,
-                                    getattr(response, "model", None),
-                                    f"api_mode={self.api_mode} provider={self.provider}",
-                                )
+                        _ct_v = self._get_codex_transport()
+                        if not _ct_v.validate_response(response):
+                            if response is None:
                                 response_invalid = True
-                                error_details.append("response.output is empty")
+                                error_details.append("response is None")
+                            else:
+                                # output_text fallback: stream backfill may have failed
+                                # but normalize can still recover from output_text
+                                _out_text = getattr(response, "output_text", None)
+                                _out_text_stripped = _out_text.strip() if isinstance(_out_text, str) else ""
+                                if _out_text_stripped:
+                                    logger.debug(
+                                        "Codex response.output is empty but output_text is present "
+                                        "(%d chars); deferring to normalization.",
+                                        len(_out_text_stripped),
+                                    )
+                                else:
+                                    _resp_status = getattr(response, "status", None)
+                                    _resp_incomplete = getattr(response, "incomplete_details", None)
+                                    logger.warning(
+                                        "Codex response.output is empty after stream backfill "
+                                        "(status=%s, incomplete_details=%s, model=%s). %s",
+                                        _resp_status, _resp_incomplete,
+                                        getattr(response, "model", None),
+                                        f"api_mode={self.api_mode} provider={self.provider}",
+                                    )
+                                    response_invalid = True
+                                    error_details.append("response.output is empty")
                     elif self.api_mode == "anthropic_messages":
-                        content_blocks = getattr(response, "content", None) if response is not None else None
-                        if response is None:
+                        _tv = self._get_anthropic_transport()
+                        if not _tv.validate_response(response):
                             response_invalid = True
-                            error_details.append("response is None")
-                        elif not isinstance(content_blocks, list):
+                            if response is None:
+                                error_details.append("response is None")
+                            else:
+                                error_details.append("response.content invalid (not a non-empty list)")
+                    elif self.api_mode == "bedrock_converse":
+                        _btv = self._get_bedrock_transport()
+                        if not _btv.validate_response(response):
                             response_invalid = True
-                            error_details.append("response.content is not a list")
-                        elif not content_blocks:
-                            response_invalid = True
-                            error_details.append("response.content is empty")
+                            if response is None:
+                                error_details.append("response is None")
+                            else:
+                                error_details.append("Bedrock response invalid (no output or choices)")
                     else:
-                        if response is None or not hasattr(response, 'choices') or response.choices is None or not response.choices:
+                        _ctv = self._get_chat_completions_transport()
+                        if not _ctv.validate_response(response):
                             response_invalid = True
                             if response is None:
                                 error_details.append("response is None")
@@ -9628,8 +9467,12 @@ class AIAgent:
                         else:
                             finish_reason = "stop"
                     elif self.api_mode == "anthropic_messages":
-                        stop_reason_map = {"end_turn": "stop", "tool_use": "tool_calls", "max_tokens": "length", "stop_sequence": "stop"}
-                        finish_reason = stop_reason_map.get(response.stop_reason, "stop")
+                        _tfr = self._get_anthropic_transport()
+                        finish_reason = _tfr.map_finish_reason(response.stop_reason)
+                    elif self.api_mode == "bedrock_converse":
+                        # Bedrock response is already normalized at dispatch — finish_reason
+                        # is already in OpenAI format via normalize_converse_response()
+                        finish_reason = response.choices[0].finish_reason if hasattr(response, "choices") and response.choices else "stop"
                     else:
                         finish_reason = response.choices[0].finish_reason
                         assistant_message = response.choices[0].message
@@ -9647,25 +9490,44 @@ class AIAgent:
                     if finish_reason == "length":
                         self._vprint(f"{self.log_prefix}⚠️  Response truncated (finish_reason='length') - model hit max output tokens", force=True)
 
+                        # Normalize the truncated response to a single OpenAI-style
+                        # message shape so text-continuation and tool-call retry
+                        # work uniformly across chat_completions, bedrock_converse,
+                        # and anthropic_messages.  For Anthropic we use the same
+                        # adapter the agent loop already relies on so the rebuilt
+                        # interim assistant message is byte-identical to what
+                        # would have been appended in the non-truncated path.
+                        _trunc_msg = None
+                        if self.api_mode in ("chat_completions", "bedrock_converse"):
+                            _trunc_msg = response.choices[0].message if (hasattr(response, "choices") and response.choices) else None
+                        elif self.api_mode == "anthropic_messages":
+                            _trunc_nr = self._get_anthropic_transport().normalize_response(
+                                response, strip_tool_prefix=self._is_anthropic_oauth
+                            )
+                            _trunc_msg = SimpleNamespace(
+                                content=_trunc_nr.content,
+                                tool_calls=[
+                                    SimpleNamespace(
+                                        id=tc.id, type="function",
+                                        function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
+                                    ) for tc in (_trunc_nr.tool_calls or [])
+                                ] or None,
+                                reasoning=_trunc_nr.reasoning,
+                                reasoning_content=None,
+                                reasoning_details=(
+                                    _trunc_nr.provider_data.get("reasoning_details")
+                                    if _trunc_nr.provider_data else None
+                                ),
+                            )
+
+                        _trunc_content = getattr(_trunc_msg, "content", None) if _trunc_msg else None
+                        _trunc_has_tool_calls = bool(getattr(_trunc_msg, "tool_calls", None)) if _trunc_msg else False
+
                         # ── Detect thinking-budget exhaustion ──────────────
                         # When the model spends ALL output tokens on reasoning
                         # and has none left for the response, continuation
                         # retries are pointless.  Detect this early and give a
                         # targeted error instead of wasting 3 API calls.
-                        _trunc_content = None
-                        _trunc_has_tool_calls = False
-                        if self.api_mode in ("chat_completions", "bedrock_converse"):
-                            _trunc_msg = response.choices[0].message if (hasattr(response, "choices") and response.choices) else None
-                            _trunc_content = getattr(_trunc_msg, "content", None) if _trunc_msg else None
-                            _trunc_has_tool_calls = bool(getattr(_trunc_msg, "tool_calls", None)) if _trunc_msg else False
-                        elif self.api_mode == "anthropic_messages":
-                            # Anthropic response.content is a list of blocks
-                            _text_parts = []
-                            for _blk in getattr(response, "content", []):
-                                if getattr(_blk, "type", None) == "text":
-                                    _text_parts.append(getattr(_blk, "text", ""))
-                            _trunc_content = "\n".join(_text_parts) if _text_parts else None
-
                         # A response is "thinking exhausted" only when the model
                         # actually produced reasoning blocks but no visible text after
                         # them.  Models that do not use <think> tags (e.g. GLM-4.7 on
@@ -9722,9 +9584,9 @@ class AIAgent:
                                 "error": _exhaust_error,
                             }
 
-                        if self.api_mode in ("chat_completions", "bedrock_converse"):
-                            assistant_message = response.choices[0].message
-                            if not assistant_message.tool_calls:
+                        if self.api_mode in ("chat_completions", "bedrock_converse", "anthropic_messages"):
+                            assistant_message = _trunc_msg
+                            if assistant_message is not None and not _trunc_has_tool_calls:
                                 length_continue_retries += 1
                                 interim_msg = self._build_assistant_message(assistant_message, finish_reason)
                                 messages.append(interim_msg)
@@ -9762,9 +9624,9 @@ class AIAgent:
                                     "error": "Response remained truncated after 3 continuation attempts",
                                 }
 
-                        if self.api_mode in ("chat_completions", "bedrock_converse"):
-                            assistant_message = response.choices[0].message
-                            if assistant_message.tool_calls:
+                        if self.api_mode in ("chat_completions", "bedrock_converse", "anthropic_messages"):
+                            assistant_message = _trunc_msg
+                            if assistant_message is not None and _trunc_has_tool_calls:
                                 if truncated_tool_call_retries < 1:
                                     truncated_tool_call_retries += 1
                                     self._vprint(
@@ -9905,6 +9767,7 @@ class AIAgent:
                                     billing_mode="subscription_included"
                                     if cost_result.status == "included" else None,
                                     model=self.model,
+                                    api_call_count=1,
                                 )
                             except Exception:
                                 pass  # never block the agent loop
@@ -9912,21 +9775,27 @@ class AIAgent:
                         if self.verbose_logging:
                             logging.debug(f"Token usage: prompt={usage_dict['prompt_tokens']:,}, completion={usage_dict['completion_tokens']:,}, total={usage_dict['total_tokens']:,}")
                         
-                        # Log cache hit stats when prompt caching is active
-                        if self._use_prompt_caching:
-                            if self.api_mode == "anthropic_messages":
-                                # Anthropic uses cache_read_input_tokens / cache_creation_input_tokens
-                                cached = getattr(response.usage, 'cache_read_input_tokens', 0) or 0
-                                written = getattr(response.usage, 'cache_creation_input_tokens', 0) or 0
-                            else:
-                                # OpenRouter uses prompt_tokens_details.cached_tokens
-                                details = getattr(response.usage, 'prompt_tokens_details', None)
-                                cached = getattr(details, 'cached_tokens', 0) or 0 if details else 0
-                                written = getattr(details, 'cache_write_tokens', 0) or 0 if details else 0
-                            prompt = usage_dict["prompt_tokens"]
+                        # Surface cache hit stats for any provider that reports
+                        # them — not just those where we inject cache_control
+                        # markers.  OpenAI/Kimi/DeepSeek/Qwen all do automatic
+                        # server-side prefix caching and return
+                        # ``prompt_tokens_details.cached_tokens``; users
+                        # previously could not see their cache % because this
+                        # line was gated on ``_use_prompt_caching``, which is
+                        # only True for Anthropic-style marker injection.
+                        # ``canonical_usage`` is already normalised from all
+                        # three API shapes (Anthropic / Codex / OpenAI-chat)
+                        # so we can rely on its values directly.
+                        cached = canonical_usage.cache_read_tokens
+                        written = canonical_usage.cache_write_tokens
+                        prompt = usage_dict["prompt_tokens"]
+                        if (cached or written) and not self.quiet_mode:
                             hit_pct = (cached / prompt * 100) if prompt > 0 else 0
-                            if not self.quiet_mode:
-                                self._vprint(f"{self.log_prefix}   💾 Cache: {cached:,}/{prompt:,} tokens ({hit_pct:.0f}% hit, {written:,} written)")
+                            self._vprint(
+                                f"{self.log_prefix}   💾 Cache: "
+                                f"{cached:,}/{prompt:,} tokens "
+                                f"({hit_pct:.0f}% hit, {written:,} written)"
+                            )
                     
                     has_retried_429 = False  # Reset on success
                     # Clear Nous rate limit state on successful request —
@@ -10175,6 +10044,27 @@ class AIAgent:
                         if self._try_refresh_nous_client_credentials(force=True):
                             print(f"{self.log_prefix}🔐 Nous agent key refreshed after 401. Retrying request...")
                             continue
+                        # Credential refresh didn't help — show diagnostic info.
+                        # Most common causes: Portal OAuth expired/revoked,
+                        # account out of credits, or agent key blocked.
+                        from hermes_constants import display_hermes_home as _dhh_fn
+                        _dhh = _dhh_fn()
+                        _body_text = ""
+                        try:
+                            _body = getattr(api_error, "body", None) or getattr(api_error, "response", None)
+                            if _body is not None:
+                                _body_text = str(_body)[:200]
+                        except Exception:
+                            pass
+                        print(f"{self.log_prefix}🔐 Nous 401 — Portal authentication failed.")
+                        if _body_text:
+                            print(f"{self.log_prefix}   Response: {_body_text}")
+                        print(f"{self.log_prefix}   Most likely: Portal OAuth expired, account out of credits, or agent key revoked.")
+                        print(f"{self.log_prefix}   Troubleshooting:")
+                        print(f"{self.log_prefix}     • Re-authenticate: hermes login --provider nous")
+                        print(f"{self.log_prefix}     • Check credits / billing: https://portal.nousresearch.com")
+                        print(f"{self.log_prefix}     • Verify stored credentials: {_dhh}/auth.json")
+                        print(f"{self.log_prefix}     • Switch providers temporarily: /model <model> --provider openrouter")
                     if (
                         self.api_mode == "anthropic_messages"
                         and status_code == 401
@@ -10665,7 +10555,7 @@ class AIAgent:
                                 self._vprint(f"{self.log_prefix}   💡 Your API key was rejected by the provider. Check:", force=True)
                                 self._vprint(f"{self.log_prefix}      • Is the key valid? Run: hermes setup", force=True)
                                 self._vprint(f"{self.log_prefix}      • Does your account have access to {_model}?", force=True)
-                                if "openrouter" in str(_base).lower():
+                                if base_url_host_matches(str(_base), "openrouter.ai"):
                                     self._vprint(f"{self.log_prefix}      • Check credits: https://openrouter.ai/settings/credits", force=True)
                         else:
                             self._vprint(f"{self.log_prefix}   💡 This type of error won't be fixed by retrying.", force=True)
@@ -10860,12 +10750,66 @@ class AIAgent:
 
             try:
                 if self.api_mode == "codex_responses":
-                    assistant_message, finish_reason = self._normalize_codex_response(response)
+                    _ct = self._get_codex_transport()
+                    _cnr = _ct.normalize_response(response)
+                    # Back-compat shim: downstream expects SimpleNamespace with
+                    # codex-specific fields (.codex_reasoning_items, .reasoning_details,
+                    # and .call_id/.response_item_id on tool calls).
+                    _tc_list = None
+                    if _cnr.tool_calls:
+                        _tc_list = []
+                        for tc in _cnr.tool_calls:
+                            _tc_ns = SimpleNamespace(
+                                id=tc.id, type="function",
+                                function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
+                            )
+                            if tc.provider_data:
+                                if tc.provider_data.get("call_id"):
+                                    _tc_ns.call_id = tc.provider_data["call_id"]
+                                if tc.provider_data.get("response_item_id"):
+                                    _tc_ns.response_item_id = tc.provider_data["response_item_id"]
+                            _tc_list.append(_tc_ns)
+                    assistant_message = SimpleNamespace(
+                        content=_cnr.content,
+                        tool_calls=_tc_list or None,
+                        reasoning=_cnr.reasoning,
+                        reasoning_content=None,
+                        codex_reasoning_items=(
+                            _cnr.provider_data.get("codex_reasoning_items")
+                            if _cnr.provider_data else None
+                        ),
+                        reasoning_details=(
+                            _cnr.provider_data.get("reasoning_details")
+                            if _cnr.provider_data else None
+                        ),
+                    )
+                    finish_reason = _cnr.finish_reason
                 elif self.api_mode == "anthropic_messages":
-                    from agent.anthropic_adapter import normalize_anthropic_response
-                    assistant_message, finish_reason = normalize_anthropic_response(
+                    _transport = self._get_anthropic_transport()
+                    _nr = _transport.normalize_response(
                         response, strip_tool_prefix=self._is_anthropic_oauth
                     )
+                    # Back-compat shim: downstream code expects SimpleNamespace with
+                    # .content, .tool_calls, .reasoning, .reasoning_content,
+                    # .reasoning_details attributes.
+                    assistant_message = SimpleNamespace(
+                        content=_nr.content,
+                        tool_calls=[
+                            SimpleNamespace(
+                                id=tc.id,
+                                type="function",
+                                function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
+                            )
+                            for tc in (_nr.tool_calls or [])
+                        ] or None,
+                        reasoning=_nr.reasoning,
+                        reasoning_content=None,
+                        reasoning_details=(
+                            _nr.provider_data.get("reasoning_details")
+                            if _nr.provider_data else None
+                        ),
+                    )
+                    finish_reason = _nr.finish_reason
                 else:
                     assistant_message = response.choices[0].message
                 
@@ -11209,17 +11153,10 @@ class AIAgent:
                         self._last_content_tools_all_housekeeping = _all_housekeeping
                         if _all_housekeeping and self._has_stream_consumers():
                             self._mute_post_response = True
-                        elif self.quiet_mode:
+                        elif self._should_emit_quiet_tool_messages():
                             clean = self._strip_think_blocks(turn_content).strip()
                             if clean:
-                                relayed = False
-                                if (
-                                    self.tool_progress_callback
-                                    and getattr(self, "platform", "") == "tui"
-                                ):
-                                    relayed = True
-                                if not relayed:
-                                    self._vprint(f"  ┊ 💬 {clean}")
+                                self._vprint(f"  ┊ 💬 {clean}")
                     
                     # Pop thinking-only prefill message(s) before appending
                     # (tool-call path — same rationale as the final-response path).
@@ -11300,10 +11237,12 @@ class AIAgent:
                     # should_compress(0) never fires.  (#2153)
                     _compressor = self.context_compressor
                     if _compressor.last_prompt_tokens > 0:
-                        _real_tokens = (
-                            _compressor.last_prompt_tokens
-                            + _compressor.last_completion_tokens
-                        )
+                        # Only use prompt_tokens — completion/reasoning
+                        # tokens don't consume context window space.
+                        # Thinking models (GLM-5.1, QwQ, DeepSeek R1)
+                        # inflate completion_tokens with reasoning,
+                        # causing premature compression.  (#12026)
+                        _real_tokens = _compressor.last_prompt_tokens
                     else:
                         _real_tokens = estimate_messages_tokens_rough(messages)
 
@@ -11702,8 +11641,9 @@ class AIAgent:
         # Determine if conversation completed successfully
         completed = final_response is not None and api_call_count < self.max_iterations
 
-        # Save trajectory if enabled
-        self._save_trajectory(messages, user_message, completed)
+        # Save trajectory if enabled.  ``user_message`` may be a multimodal
+        # list of parts; the trajectory format wants a plain string.
+        self._save_trajectory(messages, _summarize_user_message_for_log(user_message), completed)
 
         # Clean up VM and browser for this task after conversation completes
         self._cleanup_task_resources(effective_task_id)
@@ -11936,7 +11876,7 @@ def main(
     
     # Handle tool listing
     if list_tools:
-        from model_tools import get_all_tool_names, get_toolset_for_tool, get_available_toolsets
+        from model_tools import get_all_tool_names, get_available_toolsets
         from toolsets import get_all_toolsets, get_toolset_info
         
         print("📋 Available Tools & Toolsets:")
diff --git a/scripts/install.ps1 b/scripts/install.ps1
index 80ed53cce8..144113d5a0 100644
--- a/scripts/install.ps1
+++ b/scripts/install.ps1
@@ -630,7 +630,7 @@ function Copy-ConfigTemplates {
     New-Item -ItemType Directory -Force -Path "$HermesHome\audio_cache" | Out-Null
     New-Item -ItemType Directory -Force -Path "$HermesHome\memories" | Out-Null
     New-Item -ItemType Directory -Force -Path "$HermesHome\skills" | Out-Null
-    New-Item -ItemType Directory -Force -Path "$HermesHome\whatsapp\session" | Out-Null
+
     
     # Create .env
     $envPath = "$HermesHome\.env"
@@ -735,19 +735,7 @@ function Install-NodeDeps {
         Pop-Location
     }
 
-    # Install WhatsApp bridge dependencies
-    $bridgeDir = "$InstallDir\scripts\whatsapp-bridge"
-    if (Test-Path "$bridgeDir\package.json") {
-        Write-Info "Installing WhatsApp bridge dependencies..."
-        Push-Location $bridgeDir
-        try {
-            npm install --silent 2>&1 | Out-Null
-            Write-Success "WhatsApp bridge dependencies installed"
-        } catch {
-            Write-Warn "WhatsApp bridge npm install failed (WhatsApp may not work)"
-        }
-        Pop-Location
-    }
+
     
     Pop-Location
 }
diff --git a/scripts/install.sh b/scripts/install.sh
index c6524cefcb..166d984fac 100755
--- a/scripts/install.sh
+++ b/scripts/install.sh
@@ -297,7 +297,7 @@ check_python() {
         if command -v python >/dev/null 2>&1; then
             PYTHON_PATH="$(command -v python)"
             if "$PYTHON_PATH" -c 'import sys; raise SystemExit(0 if sys.version_info >= (3, 11) else 1)' 2>/dev/null; then
-                PYTHON_FOUND_VERSION=$($PYTHON_PATH --version 2>/dev/null)
+                PYTHON_FOUND_VERSION="$("$PYTHON_PATH" --version 2>/dev/null)"
                 log_success "Python found: $PYTHON_FOUND_VERSION"
                 return 0
             fi
@@ -306,7 +306,7 @@ check_python() {
         log_info "Installing Python via pkg..."
         pkg install -y python >/dev/null
         PYTHON_PATH="$(command -v python)"
-        PYTHON_FOUND_VERSION=$($PYTHON_PATH --version 2>/dev/null)
+        PYTHON_FOUND_VERSION="$("$PYTHON_PATH" --version 2>/dev/null)"
         log_success "Python installed: $PYTHON_FOUND_VERSION"
         return 0
     fi
@@ -315,18 +315,17 @@ check_python() {
 
     # Let uv handle Python — it can download and manage Python versions
     # First check if a suitable Python is already available
-    if $UV_CMD python find "$PYTHON_VERSION" &> /dev/null; then
-        PYTHON_PATH=$($UV_CMD python find "$PYTHON_VERSION")
-        PYTHON_FOUND_VERSION=$($PYTHON_PATH --version 2>/dev/null)
+    if PYTHON_PATH="$("$UV_CMD" python find "$PYTHON_VERSION" 2>/dev/null)"; then
+        PYTHON_FOUND_VERSION="$("$PYTHON_PATH" --version 2>/dev/null)"
         log_success "Python found: $PYTHON_FOUND_VERSION"
         return 0
     fi
 
     # Python not found — use uv to install it (no sudo needed!)
     log_info "Python $PYTHON_VERSION not found, installing via uv..."
-    if $UV_CMD python install "$PYTHON_VERSION"; then
-        PYTHON_PATH=$($UV_CMD python find "$PYTHON_VERSION")
-        PYTHON_FOUND_VERSION=$($PYTHON_PATH --version 2>/dev/null)
+    if "$UV_CMD" python install "$PYTHON_VERSION"; then
+        PYTHON_PATH="$("$UV_CMD" python find "$PYTHON_VERSION")"
+        PYTHON_FOUND_VERSION="$("$PYTHON_PATH" --version 2>/dev/null)"
         log_success "Python installed: $PYTHON_FOUND_VERSION"
     else
         log_error "Failed to install Python $PYTHON_VERSION"
@@ -1052,7 +1051,7 @@ copy_config_templates() {
     log_info "Setting up configuration files..."
 
     # Create ~/.hermes directory structure (config at top level, code in subdir)
-    mkdir -p "$HERMES_HOME"/{cron,sessions,logs,pairing,hooks,image_cache,audio_cache,memories,skills,whatsapp/session}
+    mkdir -p "$HERMES_HOME"/{cron,sessions,logs,pairing,hooks,image_cache,audio_cache,memories,skills}
 
     # Create .env at ~/.hermes/.env (top level, easy to find)
     if [ ! -f "$HERMES_HOME/.env" ]; then
@@ -1122,7 +1121,7 @@ install_node_deps() {
 
     if [ "$DISTRO" = "termux" ]; then
         log_info "Skipping automatic Node/browser dependency setup on Termux"
-        log_info "Browser automation and WhatsApp bridge are not part of the tested Termux install path yet."
+        log_info "Browser automation is not part of the tested Termux install path yet."
         log_info "If you want to experiment manually later, run: cd $INSTALL_DIR && npm install"
         return 0
     fi
@@ -1204,15 +1203,7 @@ install_node_deps() {
         log_success "TUI dependencies installed"
     fi
 
-    # Install WhatsApp bridge dependencies
-    if [ -f "$INSTALL_DIR/scripts/whatsapp-bridge/package.json" ]; then
-        log_info "Installing WhatsApp bridge dependencies..."
-        cd "$INSTALL_DIR/scripts/whatsapp-bridge"
-        npm install --silent 2>/dev/null || {
-            log_warn "WhatsApp bridge npm install failed (WhatsApp may not work)"
-        }
-        log_success "WhatsApp bridge dependencies installed"
-    fi
+
 }
 
 run_setup_wizard() {
diff --git a/scripts/release.py b/scripts/release.py
index 90c2a13d0b..8d213ea070 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -44,15 +44,24 @@ AUTHOR_MAP = {
     "teknium@nousresearch.com": "teknium1",
     "127238744+teknium1@users.noreply.github.com": "teknium1",
     # contributors (from noreply pattern)
+    "wangqiang@wangqiangdeMac-mini.local": "xiaoqiang243",
     "snreynolds2506@gmail.com": "snreynolds",
     "35742124+0xbyt4@users.noreply.github.com": "0xbyt4",
+    "71184274+MassiveMassimo@users.noreply.github.com": "MassiveMassimo",
+    "massivemassimo@users.noreply.github.com": "MassiveMassimo",
     "82637225+kshitijk4poor@users.noreply.github.com": "kshitijk4poor",
+    "keifergu@tencent.com": "keifergu",
     "kshitijk4poor@users.noreply.github.com": "kshitijk4poor",
+    "abner.the.foreman@agentmail.to": "Abnertheforeman",
+    "harryykyle1@gmail.com": "hharry11",
     "kshitijk4poor@gmail.com": "kshitijk4poor",
     "16443023+stablegenius49@users.noreply.github.com": "stablegenius49",
     "185121704+stablegenius49@users.noreply.github.com": "stablegenius49",
     "101283333+batuhankocyigit@users.noreply.github.com": "batuhankocyigit",
     "valdi.jorge@gmail.com": "jvcl",
+    "francip@gmail.com": "francip",
+    "omni@comelse.com": "omnissiah-comelse",
+    "oussama.redcode@gmail.com": "mavrickdeveloper",
     "126368201+vilkasdev@users.noreply.github.com": "vilkasdev",
     "137614867+cutepawss@users.noreply.github.com": "cutepawss",
     "96793918+memosr@users.noreply.github.com": "memosr",
@@ -66,7 +75,10 @@ AUTHOR_MAP = {
     "104278804+Sertug17@users.noreply.github.com": "Sertug17",
     "112503481+caentzminger@users.noreply.github.com": "caentzminger",
     "258577966+voidborne-d@users.noreply.github.com": "voidborne-d",
+    "sir_even@icloud.com": "sirEven",
+    "36056348+sirEven@users.noreply.github.com": "sirEven",
     "70424851+insecurejezza@users.noreply.github.com": "insecurejezza",
+    "254021826+dodo-reach@users.noreply.github.com": "dodo-reach",
     "259807879+Bartok9@users.noreply.github.com": "Bartok9",
     "241404605+MestreY0d4-Uninter@users.noreply.github.com": "MestreY0d4-Uninter",
     "268667990+Roy-oss1@users.noreply.github.com": "Roy-oss1",
@@ -76,32 +88,62 @@ AUTHOR_MAP = {
     "39405770+yyq4193@users.noreply.github.com": "yyq4193",
     "Asunfly@users.noreply.github.com": "Asunfly",
     "2500400+honghua@users.noreply.github.com": "honghua",
+    "462836+jplew@users.noreply.github.com": "jplew",
     "nish3451@users.noreply.github.com": "nish3451",
+    "Mibayy@users.noreply.github.com": "Mibayy",
+    "mibayy@users.noreply.github.com": "Mibayy",
+    "135070653+sgaofen@users.noreply.github.com": "sgaofen",
+    "nocoo@users.noreply.github.com": "nocoo",
+    "30841158+n-WN@users.noreply.github.com": "n-WN",
+    "tsuijinglei@gmail.com": "hiddenpuppy",
+    "jerome@clawwork.ai": "HiddenPuppy",
+    "leoyuan0099@gmail.com": "keyuyuan",
+    "bxzt2006@163.com": "Only-Code-A",
+    "i@troy-y.org": "TroyMitchell911",
+    "mygamez@163.com": "zhongyueming1121",
+    "hansnow@users.noreply.github.com": "hansnow",
+    "134848055+UNLINEARITY@users.noreply.github.com": "UNLINEARITY",
+    "ben.burtenshaw@gmail.com": "burtenshaw",
+    "roopaknijhara@gmail.com": "rnijhara",
     # contributors (manual mapping from git names)
     "ahmedsherif95@gmail.com": "asheriif",
     "liujinkun@bytedance.com": "liujinkun2025",
     "dmayhem93@gmail.com": "dmahan93",
+    "fr@tecompanytea.com": "ifrederico",
+    "cdanis@gmail.com": "cdanis",
     "samherring99@gmail.com": "samherring99",
     "desaiaum08@gmail.com": "Aum08Desai",
     "shannon.sands.1979@gmail.com": "shannonsands",
     "shannon@nousresearch.com": "shannonsands",
+    "abdi.moya@gmail.com": "AxDSan",
     "eri@plasticlabs.ai": "Erosika",
     "hjcpuro@gmail.com": "hjc-puro",
     "xaydinoktay@gmail.com": "aydnOktay",
     "abdullahfarukozden@gmail.com": "Farukest",
     "lovre.pesut@gmail.com": "rovle",
+    "xjtumj@gmail.com": "mengjian-github",
     "kevinskysunny@gmail.com": "kevinskysunny",
     "xiewenxuan462@gmail.com": "yule975",
     "yiweimeng.dlut@hotmail.com": "meng93",
     "hakanerten02@hotmail.com": "teyrebaz33",
+    "linux2010@users.noreply.github.com": "Linux2010",
+    "elmatadorgh@users.noreply.github.com": "elmatadorgh",
+    "alexazzjjtt@163.com": "alexzhu0",
+    "1180176+Swift42@users.noreply.github.com": "Swift42",
     "ruzzgarcn@gmail.com": "Ruzzgar",
+    "yukipukikedy@gmail.com": "Yukipukii1",
     "alireza78.crypto@gmail.com": "alireza78a",
     "brooklyn.bb.nicholson@gmail.com": "brooklynnicholson",
+    "withapurpose37@gmail.com": "StefanIsMe",
     "4317663+helix4u@users.noreply.github.com": "helix4u",
+    "ifkellx@users.noreply.github.com": "Ifkellx",
     "331214+counterposition@users.noreply.github.com": "counterposition",
     "blspear@gmail.com": "BrennerSpear",
     "akhater@gmail.com": "akhater",
     "239876380+handsdiff@users.noreply.github.com": "handsdiff",
+    "hesapacicam112@gmail.com": "etherman-os",
+    "mark.ramsell@rivermounts.com": "mark-ramsell",
+    "taeng02@icloud.com": "taeng0204",
     "gpickett00@gmail.com": "gpickett00",
     "mcosma@gmail.com": "wakamex",
     "clawdia.nash@proton.me": "clawdia-nash",
@@ -112,6 +154,7 @@ AUTHOR_MAP = {
     "noonou7@gmail.com": "HenkDz",
     "dean.kerr@gmail.com": "deankerr",
     "socrates1024@gmail.com": "socrates1024",
+    "seanalt555@gmail.com": "Salt-555",
     "satelerd@gmail.com": "satelerd",
     "numman.ali@gmail.com": "nummanali",
     "0xNyk@users.noreply.github.com": "0xNyk",
@@ -123,12 +166,14 @@ AUTHOR_MAP = {
     "aryan@synvoid.com": "aryansingh",
     "johnsonblake1@gmail.com": "blakejohnson",
     "hcn518@gmail.com": "pedh",
+    "haileymarshall005@gmail.com": "haileymarshall",
     "greer.guthrie@gmail.com": "g-guthrie",
     "kennyx102@gmail.com": "bobashopcashier",
     "shokatalishaikh95@gmail.com": "areu01or00",
     "bryan@intertwinesys.com": "bryanyoung",
     "christo.mitov@gmail.com": "christomitov",
     "hermes@nousresearch.com": "NousResearch",
+    "hermes@noushq.ai": "benbarclay",
     "chinmingcock@gmail.com": "ChimingLiu",
     "openclaw@sparklab.ai": "openclaw",
     "semihcvlk53@gmail.com": "Himess",
@@ -143,16 +188,21 @@ AUTHOR_MAP = {
     "jack.47@gmail.com": "JackTheGit",
     "dalvidjr2022@gmail.com": "Jr-kenny",
     "m@statecraft.systems": "mbierling",
-    "balyan.sid@gmail.com": "balyansid",
+    "balyan.sid@gmail.com": "alt-glitch",
     "oluwadareab12@gmail.com": "bennytimz",
     "simon@simonmarcus.org": "simon-marcus",
     "xowiekk@gmail.com": "Xowiek",
     "1243352777@qq.com": "zons-zhaozhy",
+    "e.silacandmr@gmail.com": "Es1la",
     # ── bulk addition: 75 emails resolved via API, PR salvage bodies, noreply
     #    crossref, and GH contributor list matching (April 2026 audit) ──
     "1115117931@qq.com": "aaronagent",
     "1506751656@qq.com": "hqhq1025",
     "364939526@qq.com": "luyao618",
+    "hgk324@gmail.com": "houziershi",
+    "176644217+PStarH@users.noreply.github.com": "PStarH",
+    "51058514+Sanjays2402@users.noreply.github.com": "Sanjays2402",
+    "906014227@qq.com": "bingo906",
     "aaronwong1999@icloud.com": "AaronWong1999",
     "agents@kylefrench.dev": "DeployFaith",
     "angelos@oikos.lan.home.malaiwah.com": "angelos",
@@ -172,9 +222,12 @@ AUTHOR_MAP = {
     "don.rhm@gmail.com": "donrhmexe",
     "dorukardahan@hotmail.com": "dorukardahan",
     "dsocolobsky@gmail.com": "dsocolobsky",
+    "dylan.socolobsky@lambdaclass.com": "dsocolobsky",
+    "ignacio.avecilla@lambdaclass.com": "IAvecilla",
     "duerzy@gmail.com": "duerzy",
     "emozilla@nousresearch.com": "emozilla",
     "fancydirty@gmail.com": "fancydirty",
+    "farion1231@gmail.com": "farion1231",
     "floptopbot33@gmail.com": "flobo3",
     "fontana.pedro93@gmail.com": "pefontana",
     "francis.x.fitzpatrick@gmail.com": "fxfitz",
@@ -193,6 +246,7 @@ AUTHOR_MAP = {
     "kagura.chen28@gmail.com": "kagura-agent",
     "1342088860@qq.com": "youngDoo",
     "kamil@gwozdz.me": "kamil-gwozdz",
+    "skmishra1991@gmail.com": "bugkill3r",
     "karamusti912@gmail.com": "MustafaKara7",
     "kira@ariaki.me": "kira-ariaki",
     "knopki@duck.com": "knopki",
@@ -203,6 +257,7 @@ AUTHOR_MAP = {
     "82095453+iacker@users.noreply.github.com": "iacker",
     "sontianye@users.noreply.github.com": "sontianye",
     "jackjin1997@users.noreply.github.com": "jackjin1997",
+    "1037461232@qq.com": "jackjin1997",
     "danieldoderlein@users.noreply.github.com": "danieldoderlein",
     "lrawnsley@users.noreply.github.com": "lrawnsley",
     "taeuk178@users.noreply.github.com": "taeuk178",
@@ -211,10 +266,12 @@ AUTHOR_MAP = {
     "ygd58@users.noreply.github.com": "ygd58",
     "vominh1919@users.noreply.github.com": "vominh1919",
     "iamagenius00@users.noreply.github.com": "iamagenius00",
+    "9219265+cresslank@users.noreply.github.com": "cresslank",
     "trevmanthony@gmail.com": "trevthefoolish",
     "ziliangpeng@users.noreply.github.com": "ziliangpeng",
     "centripetal-star@users.noreply.github.com": "centripetal-star",
     "LeonSGP43@users.noreply.github.com": "LeonSGP43",
+    "154585401+LeonSGP43@users.noreply.github.com": "LeonSGP43",
     "Lubrsy706@users.noreply.github.com": "Lubrsy706",
     "niyant@spicefi.xyz": "spniyant",
     "olafthiele@gmail.com": "olafthiele",
@@ -254,6 +311,7 @@ AUTHOR_MAP = {
     "ywt000818@gmail.com": "OwenYWT",
     "dhandhalyabhavik@gmail.com": "v1k22",
     "rucchizhao@zhaochenfeideMacBook-Pro.local": "RucchiZ",
+    "tannerfokkens@Mac.attlocal.net": "tannerfokkens-maker",
     "lehaolin98@outlook.com": "LehaoLin",
     "yuewang1@microsoft.com": "imink",
     "1736355688@qq.com": "hedgeho9X",
@@ -264,12 +322,25 @@ AUTHOR_MAP = {
     "anthhub@163.com": "anthhub",
     "shenuu@gmail.com": "shenuu",
     "xiayh17@gmail.com": "xiayh0107",
+    "zhujianxyz@gmail.com": "opriz",
     "asurla@nvidia.com": "anniesurla",
     "limkuan24@gmail.com": "WideLee",
     "aviralarora002@gmail.com": "AviArora02-commits",
+    "draixagent@gmail.com": "draix",
     "junminliu@gmail.com": "JimLiu",
     "jarvischer@gmail.com": "maxchernin",
     "levantam.98.2324@gmail.com": "LVT382009",
+    "zhurongcheng@rcrai.com": "heykb",
+    "withapurpose37@gmail.com": "StefanIsMe",
+    "261797239+lumenradley@users.noreply.github.com": "lumenradley",
+    "166376523+sjz-ks@users.noreply.github.com": "sjz-ks",
+    "haileymarshall005@gmail.com": "haileymarshall",
+    "aniruddhaadak80@users.noreply.github.com": "aniruddhaadak80",
+    "zheng.jerilyn@gmail.com": "jerilynzheng",
+    "asslaenn5@gmail.com": "Aslaaen",
+    "shalompmc0505@naver.com": "pinion05",
+    "105142614+VTRiot@users.noreply.github.com": "VTRiot",
+    "vivien000812@gmail.com": "iamagenius00",
 }
 
 
diff --git a/scripts/whatsapp-bridge/bridge.js b/scripts/whatsapp-bridge/bridge.js
index 70cf8e95d9..d1aeb73722 100644
--- a/scripts/whatsapp-bridge/bridge.js
+++ b/scripts/whatsapp-bridge/bridge.js
@@ -229,6 +229,14 @@ async function startSocket() {
 
       // Check allowlist for messages from others (resolve LID ↔ phone aliases)
       if (!msg.key.fromMe && !matchesAllowedUser(senderId, ALLOWED_USERS, SESSION_DIR)) {
+        try {
+          console.log(JSON.stringify({
+            event: 'ignored',
+            reason: 'allowlist_mismatch',
+            chatId,
+            senderId,
+          }));
+        } catch {}
         continue;
       }
 
@@ -364,6 +372,37 @@ async function startSocket() {
 const app = express();
 app.use(express.json());
 
+// Host-header validation — defends against DNS rebinding.
+// The bridge binds loopback-only (127.0.0.1) but a victim browser on
+// the same machine could be tricked into fetching from an attacker
+// hostname that TTL-flips to 127.0.0.1. Reject any request whose Host
+// header doesn't resolve to a loopback alias.
+// See GHSA-ppp5-vxwm-4cf7.
+const _ACCEPTED_HOST_VALUES = new Set([
+  'localhost',
+  '127.0.0.1',
+  '[::1]',
+  '::1',
+]);
+
+app.use((req, res, next) => {
+  const raw = (req.headers.host || '').trim();
+  if (!raw) {
+    return res.status(400).json({ error: 'Missing Host header' });
+  }
+  // Strip port suffix: "localhost:3000" → "localhost"
+  const hostOnly = (raw.includes(':')
+    ? raw.substring(0, raw.lastIndexOf(':'))
+    : raw
+  ).replace(/^\[|\]$/g, '').toLowerCase();
+  if (!_ACCEPTED_HOST_VALUES.has(hostOnly)) {
+    return res.status(400).json({
+      error: 'Invalid Host header. Bridge accepts loopback hosts only.',
+    });
+  }
+  next();
+});
+
 // Poll for new messages (long-poll style)
 app.get('/messages', (req, res) => {
   const msgs = messageQueue.splice(0, messageQueue.length);
diff --git a/skills/autonomous-ai-agents/hermes-agent/SKILL.md b/skills/autonomous-ai-agents/hermes-agent/SKILL.md
index 362841f395..d19471c80d 100644
--- a/skills/autonomous-ai-agents/hermes-agent/SKILL.md
+++ b/skills/autonomous-ai-agents/hermes-agent/SKILL.md
@@ -338,7 +338,6 @@ Edit with `hermes config edit` or `hermes config set section.key value`.
 | `memory` | `memory_enabled`, `user_profile_enabled`, `provider` |
 | `security` | `tirith_enabled`, `website_blocklist` |
 | `delegation` | `model`, `provider`, `base_url`, `api_key`, `max_iterations` (50), `reasoning_effort` |
-| `smart_model_routing` | `enabled`, `cheap_model` |
 | `checkpoints` | `enabled`, `max_snapshots` (50) |
 
 Full config reference: https://hermes-agent.nousresearch.com/docs/user-guide/configuration
diff --git a/skills/creative/baoyu-comic/PORT_NOTES.md b/skills/creative/baoyu-comic/PORT_NOTES.md
new file mode 100644
index 0000000000..637b7befb5
--- /dev/null
+++ b/skills/creative/baoyu-comic/PORT_NOTES.md
@@ -0,0 +1,77 @@
+# Port Notes — baoyu-comic
+
+Ported from [JimLiu/baoyu-skills](https://github.com/JimLiu/baoyu-skills) v1.56.1.
+
+## Changes from upstream
+
+### SKILL.md adaptations
+
+| Change | Upstream | Hermes |
+|--------|----------|--------|
+| Metadata namespace | `openclaw` | `hermes` (with `tags` + `homepage`) |
+| Trigger | Slash commands / CLI flags | Natural language skill matching |
+| User config | EXTEND.md file (project/user/XDG paths) | Removed — not part of Hermes infra |
+| User prompts | `AskUserQuestion` (batched) | `clarify` tool (one question at a time) |
+| Image generation | baoyu-imagine (Bun/TypeScript, supports `--ref`) | `image_generate` — **prompt-only**, returns a URL; no reference image input; agent must download the URL to the output directory |
+| PDF assembly | `scripts/merge-to-pdf.ts` (Bun + `pdf-lib`) | Removed — the PDF merge step is out of scope for this port; pages are delivered as PNGs only |
+| Platform support | Linux/macOS/Windows/WSL/PowerShell | Linux/macOS only |
+| File operations | Generic instructions | Hermes file tools (`write_file`, `read_file`) |
+
+### Structural removals
+
+- **`references/config/` directory** (removed entirely):
+  - `first-time-setup.md` — blocking first-time setup flow for EXTEND.md
+  - `preferences-schema.md` — EXTEND.md YAML schema
+  - `watermark-guide.md` — watermark config (tied to EXTEND.md)
+- **`scripts/` directory** (removed entirely): upstream's `merge-to-pdf.ts` depended on `pdf-lib`, which is not declared anywhere in the Hermes repo. Rather than add a new dependency, the port drops PDF assembly and delivers per-page PNGs.
+- **Workflow Step 8 (Merge to PDF)** removed from `workflow.md`; Step 9 (Completion report) renumbered to Step 8.
+- **Workflow Step 1.1** — "Load Preferences (EXTEND.md)" section removed from `workflow.md`; steps 1.2/1.3 renumbered to 1.1/1.2.
+- **Generic "User Input Tools" and "Image Generation Tools" preambles** — SKILL.md no longer lists fallback rules for multiple possible tools; it references `clarify` and `image_generate` directly.
+
+### Image generation strategy changes
+
+`image_generate`'s schema accepts only `prompt` and `aspect_ratio` (`landscape` | `portrait` | `square`). Upstream's reference-image flow (`--ref characters.png` for character consistency, plus user-supplied refs for style/palette/scene) does not map to this tool, so the workflow was restructured:
+
+- **Character sheet PNG** is still generated for multi-page comics, but it is repositioned as a **human-facing review artifact** (for visual verification) and a reference for later regenerations / manual prompt edits. Page prompts themselves are built from the **text descriptions** in `characters/characters.md` (embedded inline during Step 5). `image_generate` never sees the PNG as a visual input.
+- **User-supplied reference images** are reduced to `style` / `palette` / `scene` trait extraction — traits are embedded in the prompt body; the image files themselves are kept only for provenance under `refs/`.
+- **Page prompts** now mandate that character descriptions are embedded inline (copied from `characters/characters.md`) — this is the only mechanism left to enforce cross-page character consistency.
+- **Download step** — after every `image_generate` call, the returned URL is fetched to disk (e.g., `curl -fsSL "<url>" -o <target>.png`) and verified before the workflow advances.
+
+### SKILL.md reductions
+
+- CLI option columns (`--art`, `--tone`, `--layout`, `--aspect`, `--lang`, `--ref`, `--storyboard-only`, `--prompts-only`, `--images-only`, `--regenerate`) converted to plain-English option descriptions.
+- Preset files (`presets/*.md`) and `ohmsha-guide.md`: `` `--style X` `` / `` `--art X --tone Y` `` shorthand rewritten to `art=X, tone=Y` + natural-language references.
+- `partial-workflows.md`: per-skill slash command invocations rewritten as user-intent cues; PDF-related outputs removed.
+- `auto-selection.md`: priority order dropped the EXTEND.md tier.
+- `analysis-framework.md`: language-priority comment updated (user option → conversation → source).
+
+### File naming convention
+
+Source content pasted by the user is saved as `source-{slug}.md`, where `{slug}` is the kebab-case topic slug used for the output directory. Backups follow the same pattern with a `-backup-YYYYMMDD-HHMMSS` suffix. SKILL.md and `workflow.md` now agree on this single convention.
+
+### What was preserved verbatim
+
+- All 6 art-style definitions (`references/art-styles/`)
+- All 7 tone definitions (`references/tones/`)
+- All 7 layout definitions (`references/layouts/`)
+- Core templates: `character-template.md`, `storyboard-template.md`, `base-prompt.md`
+- Preset bodies (only the first few intro lines adapted; special rules unchanged)
+- Author, version, homepage attribution
+
+## Syncing with upstream
+
+To pull upstream updates:
+
+```bash
+# Compare versions
+curl -sL https://raw.githubusercontent.com/JimLiu/baoyu-skills/main/skills/baoyu-comic/SKILL.md | head -5
+# Look for the version: line
+
+# Diff a reference file
+diff <(curl -sL https://raw.githubusercontent.com/JimLiu/baoyu-skills/main/skills/baoyu-comic/references/art-styles/manga.md) \
+     references/art-styles/manga.md
+```
+
+Art-style, tone, and layout reference files can usually be overwritten directly (they're upstream-verbatim). `SKILL.md`, `references/workflow.md`, `references/partial-workflows.md`, `references/auto-selection.md`, `references/analysis-framework.md`, `references/ohmsha-guide.md`, and `references/presets/*.md` must be manually merged since they contain Hermes-specific adaptations.
+
+If upstream adds a Hermes-compatible PDF merge step (no extra npm deps), restore `scripts/` and reintroduce Step 8 in `workflow.md`.
diff --git a/skills/creative/baoyu-comic/SKILL.md b/skills/creative/baoyu-comic/SKILL.md
new file mode 100644
index 0000000000..d3c89ed4c7
--- /dev/null
+++ b/skills/creative/baoyu-comic/SKILL.md
@@ -0,0 +1,246 @@
+---
+name: baoyu-comic
+description: Knowledge comic creator supporting multiple art styles and tones. Creates original educational comics with detailed panel layouts and sequential image generation. Use when user asks to create "知识漫画", "教育漫画", "biography comic", "tutorial comic", or "Logicomix-style comic".
+version: 1.56.1
+author: 宝玉 (JimLiu)
+license: MIT
+metadata:
+  hermes:
+    tags: [comic, knowledge-comic, creative, image-generation]
+    homepage: https://github.com/JimLiu/baoyu-skills#baoyu-comic
+---
+
+# Knowledge Comic Creator
+
+Adapted from [baoyu-comic](https://github.com/JimLiu/baoyu-skills) for Hermes Agent's tool ecosystem.
+
+Create original knowledge comics with flexible art style × tone combinations.
+
+## When to Use
+
+Trigger this skill when the user asks to create a knowledge/educational comic, biography comic, tutorial comic, or uses terms like "知识漫画", "教育漫画", or "Logicomix-style". The user provides content (text, file path, URL, or topic) and optionally specifies art style, tone, layout, aspect ratio, or language.
+
+## Reference Images
+
+Hermes' `image_generate` tool is **prompt-only** — it accepts a text prompt and an aspect ratio, and returns an image URL. It does **NOT** accept reference images. When the user supplies a reference image, use it to **extract traits in text** that get embedded in every page prompt:
+
+**Intake**: Accept file paths when the user provides them (or pastes images in conversation).
+- File path(s) → copy to `refs/NN-ref-{slug}.{ext}` alongside the comic output for provenance
+- Pasted image with no path → ask the user for the path via `clarify`, or extract style traits verbally as a text fallback
+- No reference → skip this section
+
+**Usage modes** (per reference):
+
+| Usage | Effect |
+|-------|--------|
+| `style` | Extract style traits (line treatment, texture, mood) and append to every page's prompt body |
+| `palette` | Extract hex colors and append to every page's prompt body |
+| `scene` | Extract scene composition or subject notes and append to the relevant page(s) |
+
+**Record in each page's prompt frontmatter** when refs exist:
+
+```yaml
+references:
+  - ref_id: 01
+    filename: 01-ref-scene.png
+    usage: style
+    traits: "muted earth tones, soft-edged ink wash, low-contrast backgrounds"
+```
+
+Character consistency is driven by **text descriptions** in `characters/characters.md` (written in Step 3) that get embedded inline in every page prompt (Step 5). The optional PNG character sheet generated in Step 7.1 is a human-facing review artifact, not an input to `image_generate`.
+
+## Options
+
+### Visual Dimensions
+
+| Option | Values | Description |
+|--------|--------|-------------|
+| Art | ligne-claire (default), manga, realistic, ink-brush, chalk, minimalist | Art style / rendering technique |
+| Tone | neutral (default), warm, dramatic, romantic, energetic, vintage, action | Mood / atmosphere |
+| Layout | standard (default), cinematic, dense, splash, mixed, webtoon, four-panel | Panel arrangement |
+| Aspect | 3:4 (default, portrait), 4:3 (landscape), 16:9 (widescreen) | Page aspect ratio |
+| Language | auto (default), zh, en, ja, etc. | Output language |
+| Refs | File paths | Reference images used for style / palette trait extraction (not passed to the image model). See [Reference Images](#reference-images) above. |
+
+### Partial Workflow Options
+
+| Option | Description |
+|--------|-------------|
+| Storyboard only | Generate storyboard only, skip prompts and images |
+| Prompts only | Generate storyboard + prompts, skip images |
+| Images only | Generate images from existing prompts directory |
+| Regenerate N | Regenerate specific page(s) only (e.g., `3` or `2,5,8`) |
+
+Details: [references/partial-workflows.md](references/partial-workflows.md)
+
+### Art, Tone & Preset Catalogue
+
+- **Art styles** (6): `ligne-claire`, `manga`, `realistic`, `ink-brush`, `chalk`, `minimalist`. Full definitions at `references/art-styles/<style>.md`.
+- **Tones** (7): `neutral`, `warm`, `dramatic`, `romantic`, `energetic`, `vintage`, `action`. Full definitions at `references/tones/<tone>.md`.
+- **Presets** (5) with special rules beyond plain art+tone:
+
+  | Preset | Equivalent | Hook |
+  |--------|-----------|------|
+  | `ohmsha` | manga + neutral | Visual metaphors, no talking heads, gadget reveals |
+  | `wuxia` | ink-brush + action | Qi effects, combat visuals, atmospheric |
+  | `shoujo` | manga + romantic | Decorative elements, eye details, romantic beats |
+  | `concept-story` | manga + warm | Visual symbol system, growth arc, dialogue+action balance |
+  | `four-panel` | minimalist + neutral + four-panel layout | 起承转合 structure, B&W + spot color, stick-figure characters |
+
+  Full rules at `references/presets/<preset>.md` — load the file when a preset is picked.
+
+- **Compatibility matrix** and **content-signal → preset** table live in [references/auto-selection.md](references/auto-selection.md). Read it before recommending combinations in Step 2.
+
+## File Structure
+
+Output directory: `comic/{topic-slug}/`
+- Slug: 2-4 words kebab-case from topic (e.g., `alan-turing-bio`)
+- Conflict: append timestamp (e.g., `turing-story-20260118-143052`)
+
+**Contents**:
+| File | Description |
+|------|-------------|
+| `source-{slug}.md` | Saved source content (kebab-case slug matches the output directory) |
+| `analysis.md` | Content analysis |
+| `storyboard.md` | Storyboard with panel breakdown |
+| `characters/characters.md` | Character definitions |
+| `characters/characters.png` | Character reference sheet (downloaded from `image_generate`) |
+| `prompts/NN-{cover\|page}-[slug].md` | Generation prompts |
+| `NN-{cover\|page}-[slug].png` | Generated images (downloaded from `image_generate`) |
+| `refs/NN-ref-{slug}.{ext}` | User-supplied reference images (optional, for provenance) |
+
+## Language Handling
+
+**Detection Priority**:
+1. User-specified language (explicit option)
+2. User's conversation language
+3. Source content language
+
+**Rule**: Use user's input language for ALL interactions:
+- Storyboard outlines and scene descriptions
+- Image generation prompts
+- User selection options and confirmations
+- Progress updates, questions, errors, summaries
+
+Technical terms remain in English.
+
+## Workflow
+
+### Progress Checklist
+
+```
+Comic Progress:
+- [ ] Step 1: Setup & Analyze
+  - [ ] 1.1 Analyze content
+  - [ ] 1.2 Check existing directory
+- [ ] Step 2: Confirmation - Style & options ⚠️ REQUIRED
+- [ ] Step 3: Generate storyboard + characters
+- [ ] Step 4: Review outline (conditional)
+- [ ] Step 5: Generate prompts
+- [ ] Step 6: Review prompts (conditional)
+- [ ] Step 7: Generate images
+  - [ ] 7.1 Generate character sheet (if needed) → characters/characters.png
+  - [ ] 7.2 Generate pages (with character descriptions embedded in prompt)
+- [ ] Step 8: Completion report
+```
+
+### Flow
+
+```
+Input → Analyze → [Check Existing?] → [Confirm: Style + Reviews] → Storyboard → [Review?] → Prompts → [Review?] → Images → Complete
+```
+
+### Step Summary
+
+| Step | Action | Key Output |
+|------|--------|------------|
+| 1.1 | Analyze content | `analysis.md`, `source-{slug}.md` |
+| 1.2 | Check existing directory | Handle conflicts |
+| 2 | Confirm style, focus, audience, reviews | User preferences |
+| 3 | Generate storyboard + characters | `storyboard.md`, `characters/` |
+| 4 | Review outline (if requested) | User approval |
+| 5 | Generate prompts | `prompts/*.md` |
+| 6 | Review prompts (if requested) | User approval |
+| 7.1 | Generate character sheet (if needed) | `characters/characters.png` |
+| 7.2 | Generate pages | `*.png` files |
+| 8 | Completion report | Summary |
+
+### User Questions
+
+Use the `clarify` tool to confirm options. Since `clarify` handles one question at a time, ask the most important question first and proceed sequentially. See [references/workflow.md](references/workflow.md) for the full Step 2 question set.
+
+**Timeout handling (CRITICAL)**: `clarify` can return `"The user did not provide a response within the time limit. Use your best judgement to make the choice and proceed."` — this is NOT user consent to default everything.
+
+- Treat it as a default **for that one question only**. Continue asking the remaining Step 2 questions in sequence; each question is an independent consent point.
+- **Surface the default to the user visibly** in your next message so they have a chance to correct it: e.g. `"Style: defaulted to ohmsha preset (clarify timed out). Say the word to switch."` — an unreported default is indistinguishable from never having asked.
+- Do NOT collapse Step 2 into a single "use all defaults" pass after one timeout. If the user is genuinely absent, they will be equally absent for all five questions — but they can correct visible defaults when they return, and cannot correct invisible ones.
+
+### Step 7: Image Generation
+
+Use Hermes' built-in `image_generate` tool for all image rendering. Its schema accepts only `prompt` and `aspect_ratio` (`landscape` | `portrait` | `square`); it **returns a URL**, not a local file. Every generated page or character sheet must therefore be downloaded to the output directory.
+
+**Prompt file requirement (hard)**: write each image's full, final prompt to a standalone file under `prompts/` (naming: `NN-{type}-[slug].md`) BEFORE calling `image_generate`. The prompt file is the reproducibility record.
+
+**Aspect ratio mapping** — the storyboard's `aspect_ratio` field maps to `image_generate`'s format as follows:
+
+| Storyboard ratio | `image_generate` format |
+|------------------|-------------------------|
+| `3:4`, `9:16`, `2:3` | `portrait` |
+| `4:3`, `16:9`, `3:2` | `landscape` |
+| `1:1` | `square` |
+
+**Download step** — after every `image_generate` call:
+1. Read the URL from the tool result
+2. Fetch the image bytes using an **absolute** output path, e.g.
+   `curl -fsSL "<url>" -o /abs/path/to/comic/<slug>/NN-page-<slug>.png`
+3. Verify the file exists and is non-empty at that exact path before proceeding to the next page
+
+**Never rely on shell CWD persistence for `-o` paths.** The terminal tool's persistent-shell CWD can change between batches (session expiry, `TERMINAL_LIFETIME_SECONDS`, a failed `cd` that leaves you in the wrong directory). `curl -o relative/path.png` is a silent footgun: if CWD has drifted, the file lands somewhere else with no error. **Always pass a fully-qualified absolute path to `-o`**, or pass `workdir=<abs path>` to the terminal tool. Incident Apr 2026: pages 06-09 of a 10-page comic landed at the repo root instead of `comic/<slug>/` because batch 3 inherited a stale CWD from batch 2 and `curl -o 06-page-skills.png` wrote to the wrong directory. The agent then spent several turns claiming the files existed where they didn't.
+
+**7.1 Character sheet** — generate it (to `characters/characters.png`, aspect `landscape`) when the comic is multi-page with recurring characters. Skip for simple presets (e.g., four-panel minimalist) or single-page comics. The prompt file at `characters/characters.md` must exist before invoking `image_generate`. The rendered PNG is a **human-facing review artifact** (so the user can visually verify character design) and a reference for later regenerations or manual prompt edits — it does **not** drive Step 7.2. Page prompts are already written in Step 5 from the **text descriptions** in `characters/characters.md`; `image_generate` cannot accept images as visual input.
+
+**7.2 Pages** — each page's prompt MUST already be at `prompts/NN-{cover|page}-[slug].md` before invoking `image_generate`. Because `image_generate` is prompt-only, character consistency is enforced by **embedding character descriptions (sourced from `characters/characters.md`) inline in every page prompt during Step 5**. The embedding is done uniformly whether or not a PNG sheet is produced in 7.1; the PNG is only a review/regeneration aid.
+
+**Backup rule**: existing `prompts/…md` and `…png` files → rename with `-backup-YYYYMMDD-HHMMSS` suffix before regenerating.
+
+Full step-by-step workflow (analysis, storyboard, review gates, regeneration variants): [references/workflow.md](references/workflow.md).
+
+## References
+
+**Core Templates**:
+- [analysis-framework.md](references/analysis-framework.md) - Deep content analysis
+- [character-template.md](references/character-template.md) - Character definition format
+- [storyboard-template.md](references/storyboard-template.md) - Storyboard structure
+- [ohmsha-guide.md](references/ohmsha-guide.md) - Ohmsha manga specifics
+
+**Style Definitions**:
+- `references/art-styles/` - Art styles (ligne-claire, manga, realistic, ink-brush, chalk, minimalist)
+- `references/tones/` - Tones (neutral, warm, dramatic, romantic, energetic, vintage, action)
+- `references/presets/` - Presets with special rules (ohmsha, wuxia, shoujo, concept-story, four-panel)
+- `references/layouts/` - Layouts (standard, cinematic, dense, splash, mixed, webtoon, four-panel)
+
+**Workflow**:
+- [workflow.md](references/workflow.md) - Full workflow details
+- [auto-selection.md](references/auto-selection.md) - Content signal analysis
+- [partial-workflows.md](references/partial-workflows.md) - Partial workflow options
+
+## Page Modification
+
+| Action | Steps |
+|--------|-------|
+| **Edit** | **Update prompt file FIRST** → regenerate image → download new PNG |
+| **Add** | Create prompt at position → generate with character descriptions embedded → renumber subsequent → update storyboard |
+| **Delete** | Remove files → renumber subsequent → update storyboard |
+
+**IMPORTANT**: When updating pages, ALWAYS update the prompt file (`prompts/NN-{cover|page}-[slug].md`) FIRST before regenerating. This ensures changes are documented and reproducible.
+
+## Pitfalls
+
+- Image generation: 10-30 seconds per page; auto-retry once on failure
+- **Always download** the URL returned by `image_generate` to a local PNG — downstream tooling (and the user's review) expects files in the output directory, not ephemeral URLs
+- **Use absolute paths for `curl -o`** — never rely on persistent-shell CWD across batches. Silent footgun: files land in the wrong directory and subsequent `ls` on the intended path shows nothing. See Step 7 "Download step".
+- Use stylized alternatives for sensitive public figures
+- **Step 2 confirmation required** - do not skip
+- **Steps 4/6 conditional** - only if user requested in Step 2
+- **Step 7.1 character sheet** - recommended for multi-page comics, optional for simple presets. The PNG is a review/regeneration aid; page prompts (written in Step 5) use the text descriptions in `characters/characters.md`, not the PNG. `image_generate` does not accept images as visual input
+- **Strip secrets** — scan source content for API keys, tokens, or credentials before writing any output file
diff --git a/skills/creative/baoyu-comic/references/analysis-framework.md b/skills/creative/baoyu-comic/references/analysis-framework.md
new file mode 100644
index 0000000000..da5ba9d9af
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/analysis-framework.md
@@ -0,0 +1,176 @@
+# Comic Content Analysis Framework
+
+Deep analysis framework for transforming source content into effective visual storytelling.
+
+## Purpose
+
+Before creating a comic, thoroughly analyze the source material to:
+- Identify the target audience and their needs
+- Determine what value the comic will deliver
+- Extract narrative potential for visual storytelling
+- Plan character arcs and key moments
+
+## Analysis Dimensions
+
+### 1. Core Content (Understanding "What")
+
+**Central Message**
+- What is the single most important idea readers should take away?
+- Can you express it in one sentence?
+
+**Key Concepts**
+- What are the essential concepts readers must understand?
+- How should these concepts be visualized?
+- Which concepts need simplified explanations?
+
+**Content Structure**
+- How is the source material organized?
+- What is the natural narrative arc?
+- Where are the climax and turning points?
+
+**Evidence & Examples**
+- What concrete examples, data, or stories support the main ideas?
+- Which examples translate well to visual panels?
+- What can be shown rather than told?
+
+### 2. Context & Background (Understanding "Why")
+
+**Source Origin**
+- Who created this content? What is their perspective?
+- What was the original purpose?
+- Is there bias to be aware of?
+
+**Historical/Cultural Context**
+- When and where does the story take place?
+- What background knowledge do readers need?
+- What period-specific visual elements are required?
+
+**Underlying Assumptions**
+- What does the source assume readers already know?
+- What implicit beliefs or values are present?
+- Should the comic challenge or reinforce these?
+
+### 3. Audience Analysis
+
+**Primary Audience**
+- Who will read this comic?
+- What is their existing knowledge level?
+- What are their interests and motivations?
+
+**Secondary Audiences**
+- Who else might benefit from this comic?
+- How might their needs differ?
+
+**Reader Questions**
+- What questions will readers have?
+- What misconceptions might they bring?
+- What "aha moments" can we create?
+
+### 4. Value Proposition
+
+**Knowledge Value**
+- What will readers learn?
+- What new perspectives will they gain?
+- How will this change their understanding?
+
+**Emotional Value**
+- What emotions should readers feel?
+- What connections will they make with characters?
+- What will make this memorable?
+
+**Practical Value**
+- Can readers apply what they learn?
+- What actions might this inspire?
+- What conversations might it spark?
+
+### 5. Narrative Potential
+
+**Story Arc Candidates**
+- What natural narratives exist in the content?
+- Where is the conflict or tension?
+- What transformations occur?
+
+**Character Potential**
+- Who are the key figures?
+- What are their motivations and obstacles?
+- How do they change throughout?
+
+**Visual Opportunities**
+- What scenes have strong visual potential?
+- Where can abstract concepts become concrete images?
+- What metaphors can be visualized?
+
+**Dramatic Moments**
+- What are the breakthrough/revelation moments?
+- Where are the emotional peaks?
+- What creates tension and release?
+
+### 6. Adaptation Considerations
+
+**What to Keep**
+- Essential facts and ideas
+- Key quotes or moments
+- Core emotional beats
+
+**What to Simplify**
+- Complex explanations
+- Dense technical details
+- Lengthy descriptions
+
+**What to Expand**
+- Brief mentions that deserve more attention
+- Implied emotions or relationships
+- Visual details not in source
+
+**What to Omit**
+- Tangential information
+- Redundant examples
+- Content that doesn't serve the narrative
+
+## Output Format
+
+Analysis results should be saved to `analysis.md` with:
+
+1. **YAML Front Matter**: Metadata (title, topic, time_span, source_language, user_language, aspect_ratio, recommended_page_count, recommended_art, recommended_tone, recommended_layout)
+2. **Target Audience**: Primary, secondary, tertiary audiences with their needs
+3. **Value Proposition**: What readers will gain (knowledge, emotional, practical)
+4. **Core Themes**: Table with theme, narrative potential, visual opportunity
+5. **Key Figures & Story Arcs**: Character profiles with arcs, visual identity, key moments
+6. **Content Signals**: Style and layout recommendations based on content type
+7. **Recommended Approaches**: Narrative approaches ranked by suitability
+
+### YAML Front Matter Example
+
+```yaml
+---
+title: "Alan Turing: The Father of Computing"
+topic: alan-turing-biography
+time_span: 1912-1954
+source_language: en
+user_language: zh  # User-specified or detected from conversation
+aspect_ratio: "3:4"
+recommended_page_count: 16
+recommended_art: ligne-claire  # ligne-claire|manga|realistic|ink-brush|chalk
+recommended_tone: neutral      # neutral|warm|dramatic|romantic|energetic|vintage|action
+recommended_layout: mixed      # standard|cinematic|dense|splash|mixed|webtoon
+---
+```
+
+### Language Fields
+
+| Field | Description |
+|-------|-------------|
+| `source_language` | Detected language of source content |
+| `user_language` | Output language for comic (user-specified option > conversation language > source_language) |
+
+## Analysis Checklist
+
+Before proceeding to storyboard:
+
+- [ ] Can I state the core message in one sentence?
+- [ ] Do I know exactly who will read this comic?
+- [ ] Have I identified at least 3 ways this comic provides value?
+- [ ] Are there clear protagonists with compelling arcs?
+- [ ] Have I found at least 5 visually powerful moments?
+- [ ] Do I understand what to keep, simplify, expand, and omit?
+- [ ] Have I identified the emotional peaks and valleys?
diff --git a/skills/creative/baoyu-comic/references/art-styles/chalk.md b/skills/creative/baoyu-comic/references/art-styles/chalk.md
new file mode 100644
index 0000000000..3974214cc8
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/art-styles/chalk.md
@@ -0,0 +1,101 @@
+# chalk
+
+粉笔画风 - Chalkboard aesthetic with hand-drawn warmth
+
+## Overview
+
+Classic classroom chalkboard aesthetic with hand-drawn chalk illustrations. Nostalgic educational feel with imperfect, sketchy lines that capture the warmth of traditional teaching.
+
+## Line Work
+
+- Sketchy, imperfect hand-drawn lines
+- Chalk texture on all strokes
+- Varying line weight from chalk pressure
+- Soft edges, no sharp digital lines
+- Visible chalk dust effects
+
+## Character Design
+
+- Simplified, friendly character designs
+- Stick figures to semi-detailed range
+- Expressive through simple gestures
+- Approachable, non-intimidating
+- Educational presenter style
+
+## Background
+
+- Chalkboard Black (#1A1A1A) or Dark Green-Black (#1C2B1C)
+- Realistic chalkboard texture
+- Subtle scratches and dust particles
+- Faint eraser marks for authenticity
+- Wooden frame border optional
+
+## Typography
+
+- Hand-drawn chalk lettering style
+- Visible chalk texture on text
+- Imperfect baseline adds authenticity
+- White or bright colored chalk for emphasis
+
+## Visual Elements
+
+- Hand-drawn chalk illustrations
+- Chalk dust effects around elements
+- Doodles: stars, arrows, underlines, circles
+- Mathematical formulas and diagrams
+- Eraser smudges and chalk residue
+- Stick figures and simple icons
+- Connection lines with hand-drawn feel
+
+## Default Color Palette
+
+| Role | Color | Hex |
+|------|-------|-----|
+| Background | Chalkboard Black | #1A1A1A |
+| Alt Background | Green-Black | #1C2B1C |
+| Primary Text | Chalk White | #F5F5F5 |
+| Accent 1 | Chalk Yellow | #FFE566 |
+| Accent 2 | Chalk Pink | #FF9999 |
+| Accent 3 | Chalk Blue | #66B3FF |
+| Accent 4 | Chalk Green | #90EE90 |
+| Accent 5 | Chalk Orange | #FFB366 |
+
+## Style Rules
+
+### Do
+- Maintain authentic chalk texture on all elements
+- Use imperfect, hand-drawn quality throughout
+- Add subtle chalk dust and smudge effects
+- Create visual hierarchy with color variety
+- Include playful doodles and annotations
+
+### Don't
+- Use perfect geometric shapes
+- Create clean digital-looking lines
+- Add photorealistic elements
+- Use gradients or glossy effects
+
+## Quality Markers
+
+- ✓ Authentic chalk texture throughout
+- ✓ Imperfect, hand-drawn quality
+- ✓ Readable despite sketchy style
+- ✓ Nostalgic classroom feel
+- ✓ Effective color hierarchy
+- ✓ Playful educational aesthetic
+
+## Compatibility
+
+| Tone | Fit | Notes |
+|------|-----|-------|
+| neutral | ✓✓ | Classic educational |
+| warm | ✓✓ | Nostalgic feel |
+| dramatic | ✗ | Style mismatch |
+| vintage | ✓ | Old school feel |
+| romantic | ✗ | Style mismatch |
+| energetic | ✓✓ | Fun learning |
+| action | ✗ | Style mismatch |
+
+## Best For
+
+Educational content, tutorials, classroom themes, teaching materials, workshops, informal learning, knowledge sharing
diff --git a/skills/creative/baoyu-comic/references/art-styles/ink-brush.md b/skills/creative/baoyu-comic/references/art-styles/ink-brush.md
new file mode 100644
index 0000000000..6c744d1422
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/art-styles/ink-brush.md
@@ -0,0 +1,97 @@
+# ink-brush
+
+水墨画风 - Chinese ink brush aesthetics with dynamic strokes
+
+## Overview
+
+Traditional Chinese ink brush painting style adapted for comics. Combines calligraphic brush strokes with ink wash effects. Creates atmospheric, artistic visuals rooted in East Asian aesthetics.
+
+## Line Work
+
+- 2-3px dynamic brush strokes with varying weight
+- Ink wash effects, traditional Chinese brush feel
+- Bold, confident strokes with sharp edges
+- Flowing lines for fabric and hair
+- Pressure-sensitive stroke variation
+
+## Character Design
+
+- Realistic human proportions (7.5-8 head heights)
+- Defined features with ink brush definition
+- Dynamic poses capturing movement
+- Flowing hair and clothing in motion
+- Traditional attire options (robes, hanfu)
+- Intense, expressive faces
+
+## Brush Techniques
+
+| Technique | Usage |
+|-----------|-------|
+| Bold strokes | Character outlines |
+| Fine lines | Details, hair |
+| Ink wash | Atmosphere, shadows |
+| Dry brush | Texture, aging |
+| Splatter | Impact, drama |
+
+## Background Treatment
+
+- Dramatic landscapes: mountains, waterfalls, temples
+- Ink wash atmospheric effects
+- Misty, layered depth
+- Traditional architecture elements
+- High contrast silhouettes
+- Negative space as design element
+
+## Color Approach
+
+- Ink gradients as primary
+- Limited accent colors
+- Traditional Chinese palette
+- Atmospheric color washes
+- High contrast compositions
+
+## Default Color Palette
+
+| Role | Color | Hex |
+|------|-------|-----|
+| Primary | Deep black ink | #1A1A1A |
+| Accent | Crimson red | #8B0000 |
+| Accent | Imperial gold | #D4AF37 |
+| Skin | Natural tan | #D4A574 |
+| Background | Misty gray | #9CA3AF |
+| Background | Earth tone | #8B7355 |
+| Wash | Ink gradient | #2D3748 |
+
+## Visual Elements
+
+- Calligraphic text integration
+- Seal stamps (optional)
+- Ink splatter effects
+- Flowing fabric trails
+- Atmospheric mist
+- Mountain silhouettes
+
+## Quality Markers
+
+- ✓ Dynamic brush stroke quality
+- ✓ Authentic ink wash atmosphere
+- ✓ High contrast compositions
+- ✓ Flowing movement in fabric/hair
+- ✓ Traditional aesthetic elements
+- ✓ Atmospheric depth
+
+## Compatibility
+
+| Tone | Fit | Notes |
+|------|-----|-------|
+| neutral | ✓ | Contemplative stories |
+| warm | ✓ | Nostalgic, gentle |
+| dramatic | ✓✓ | High contrast |
+| vintage | ✓✓ | Historical pieces |
+| romantic | ✗ | Style mismatch |
+| energetic | ✗ | Too refined |
+| action | ✓✓ | Martial arts |
+
+## Best For
+
+Chinese historical stories, martial arts, traditional tales, contemplative narratives, artistic adaptations
diff --git a/skills/creative/baoyu-comic/references/art-styles/ligne-claire.md b/skills/creative/baoyu-comic/references/art-styles/ligne-claire.md
new file mode 100644
index 0000000000..0ce58b2c00
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/art-styles/ligne-claire.md
@@ -0,0 +1,75 @@
+# ligne-claire
+
+清线画风 - Uniform lines, flat colors, European comic tradition
+
+## Overview
+
+Classic European comic style originating from Hergé's Tintin. Characterized by clean, uniform outlines and flat color fills without gradients. Creates a timeless, accessible aesthetic suitable for educational and narrative content.
+
+## Line Work
+
+- Uniform, clean outlines with consistent weight (2px)
+- No hatching or cross-hatching for shading
+- Sharp, precise edges on all elements
+- Black ink outlines on all figures and objects
+- Shadows indicated through flat color areas, not line techniques
+
+## Character Design
+
+- Slightly stylized/cartoonish characters with realistic proportions
+- Distinctive, recognizable facial features
+- Expressive faces with clear emotions
+- Period-appropriate clothing with attention to detail
+- Consistent character appearance across panels
+- 6-7 head height proportions
+
+## Background Treatment
+
+- Detailed, realistic backgrounds with architectural accuracy
+- Period-specific props and technology
+- Clear spatial depth and perspective
+- Environmental storytelling through details
+- Contrast between simplified characters and detailed backgrounds
+
+## Color Approach
+
+- Flat colors without gradients (true to Ligne Claire tradition)
+- Limited palette per page for cohesion
+- Colors support narrative mood
+- Consistent lighting logic within scenes
+
+## Default Color Palette
+
+| Role | Color | Hex |
+|------|-------|-----|
+| Primary Blue | Clean blue | #3182CE |
+| Primary Red | Classic red | #E53E3E |
+| Primary Yellow | Warm yellow | #ECC94B |
+| Skin | Warm tan | #F7CFAE |
+| Background Light | Light cream | #FFFAF0 |
+| Background Sky | Sky blue | #BEE3F8 |
+
+## Quality Markers
+
+- ✓ Clean, uniform line weight throughout
+- ✓ Flat colors without gradients
+- ✓ Detailed backgrounds, stylized characters
+- ✓ Clear panel borders and reading flow
+- ✓ Hand-drawn text style
+- ✓ Proper perspective in environments
+
+## Compatibility
+
+| Tone | Fit | Notes |
+|------|-----|-------|
+| neutral | ✓✓ | Classic combination |
+| warm | ✓✓ | Nostalgic stories |
+| dramatic | ✓ | Works with high contrast |
+| vintage | ✓ | Period pieces |
+| romantic | ✗ | Style mismatch |
+| energetic | ✓ | Lighter stories |
+| action | ✗ | Lacks dynamic lines |
+
+## Best For
+
+Educational content, balanced narratives, biography comics, historical stories
diff --git a/skills/creative/baoyu-comic/references/art-styles/manga.md b/skills/creative/baoyu-comic/references/art-styles/manga.md
new file mode 100644
index 0000000000..bb2a2663b5
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/art-styles/manga.md
@@ -0,0 +1,93 @@
+# manga
+
+日漫画风 - Anime/manga aesthetics with expressive characters
+
+## Overview
+
+Japanese manga art style characterized by large expressive eyes, dynamic poses, and visual emotion indicators. Versatile style that works across genres from educational to romantic to action.
+
+## Line Work
+
+- Clean, smooth lines (1.5-2px)
+- Expressive weight variation for emphasis
+- Smooth curves, dynamic strokes
+- Speed lines and motion effects available
+- Screen tone effects for atmosphere
+
+## Character Design
+
+- Anime/manga proportions: larger eyes, expressive faces
+- 5-7 head height proportions (varies by sub-style)
+- Clear emotional indicators (！, ？, sweat drops, sparkles)
+- Dynamic poses and gestures
+- Detailed hair with individual strands
+- Fashionable clothing with natural folds
+
+## Eye Styles
+
+| Type | Description |
+|------|-------------|
+| Standard | Medium-large, 2-3 highlights |
+| Educational | Friendly, approachable eyes |
+| Dramatic | Intense, detailed irises |
+| Cute | Very large, sparkly eyes |
+
+## Background Treatment
+
+- Simplified during dialogue/explanation
+- Detailed for establishing shots
+- Screen tone gradients for mood
+- Abstract backgrounds for emotional moments
+- Technical diagrams styled as displays
+
+## Color Approach
+
+- Clean, bright anime colors
+- Soft gradients on skin
+- Vibrant palette options
+- Light and shadow with soft transitions
+- Color coding for character identification
+
+## Default Color Palette
+
+| Role | Color | Hex |
+|------|-------|-----|
+| Primary Blue | Bright blue | #4299E1 |
+| Primary Orange | Warm orange | #ED8936 |
+| Primary Green | Soft green | #68D391 |
+| Skin | Anime warm | #FEEBC8 |
+| Background | Clean white | #FFFFFF |
+| Highlight | Golden | #FFD700 |
+
+## Visual Elements
+
+- Speech bubbles: rounded (normal), spiky (excitement)
+- Sound effects integrated visually
+- Emotion symbols (sweat drops, anger marks, hearts)
+- Speed lines and motion blur
+- Sparkle and glow effects
+
+## Quality Markers
+
+- ✓ Expressive character faces
+- ✓ Clean, consistent line work
+- ✓ Dynamic poses and compositions
+- ✓ Appropriate use of manga conventions
+- ✓ Readable panel flow
+- ✓ Consistent character designs
+
+## Compatibility
+
+| Tone | Fit | Notes |
+|------|-----|-------|
+| neutral | ✓✓ | Educational manga |
+| warm | ✓ | Slice of life |
+| dramatic | ✓ | Intense moments |
+| romantic | ✓✓ | Shoujo style |
+| energetic | ✓✓ | Shonen style |
+| vintage | ✗ | Style mismatch |
+| action | ✓✓ | Battle manga |
+
+## Best For
+
+Educational tutorials, romance, action, coming-of-age, technical explanations, youth-oriented content
diff --git a/skills/creative/baoyu-comic/references/art-styles/minimalist.md b/skills/creative/baoyu-comic/references/art-styles/minimalist.md
new file mode 100644
index 0000000000..f075b2d400
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/art-styles/minimalist.md
@@ -0,0 +1,84 @@
+# minimalist
+
+极简画风 - Clean black line art, limited spot color, simplified stick-figure characters
+
+## Overview
+
+Minimalist cartoon illustration characterized by clean black line art on white background with very limited spot color for emphasis. Characters are simplified to near-stick-figure abstraction, focusing on gesture and concept rather than anatomical detail. Designed for business allegory, quick-read educational content, and concept illustration.
+
+## Line Work
+
+- Clean, uniform black lines (1.5-2px)
+- No hatching, cross-hatching, or shading techniques
+- Minimal detail — every line serves a purpose
+- Bold outlines for characters, thinner lines for props/labels
+- No decorative flourishes or ornamental lines
+
+## Character Design
+
+- Highly simplified, stick-figure-like business characters
+- Circle or oval heads with minimal facial features (dot eyes, simple line mouth)
+- Body as simple geometric shapes or line constructions
+- Distinguishing features through props only (tie, hat, briefcase, glasses)
+- No anatomical detail — expressive through posture and gesture
+- 4-5 head height proportions (squat, iconic)
+
+## Background Treatment
+
+- Mostly blank/white — negative space is a design element
+- Minimal environmental cues (a line for ground, simple desk outline)
+- Concept labels and text annotations replace detailed environments
+- Icons and symbols over realistic rendering
+- No perspective or spatial depth
+
+## Color Approach
+
+- Primarily black and white (90%+ of the image)
+- 1-2 spot accent colors for emphasis on key concepts
+- Accent color used sparingly: highlighting key objects, text labels, concept indicators
+- No gradients, no shading, no color fills on backgrounds
+- Color draws the eye to the most important element in each panel
+
+## Default Color Palette
+
+| Role | Color | Hex |
+|------|-------|-----|
+| Primary | Black ink | `#1A1A1A` |
+| Background | Clean white | `#FFFFFF` |
+| Accent 1 | Spot orange | `#FF6B35` |
+| Accent 2 | Spot blue (optional) | `#3182CE` |
+| Text labels | Dark gray | `#4A4A4A` |
+| Panel border | Medium gray | `#666666` |
+
+## Visual Elements
+
+- Text labels with accent-color backgrounds or underlines for key terms
+- Simple icons: arrows, circles, checkmarks, crosses
+- Concept highlight boxes with spot color
+- Minimal speech bubbles (simple oval or rectangle, thin black outline)
+- No sound effects, no motion lines, no screen tones
+
+## Quality Markers
+
+- ✓ Clean, purposeful line work with no unnecessary detail
+- ✓ 90%+ black-and-white with strategic spot color
+- ✓ Simplified characters readable at small sizes
+- ✓ Text labels integrated naturally into panels
+- ✓ Strong negative space usage
+- ✓ Every element serves the narrative point
+
+## Compatibility
+
+| Tone | Fit | Notes |
+|------|-----|-------|
+| neutral | ✓✓ | Ideal for business/educational content |
+| warm | ✓ | Works for gentle stories, slight warmth in accent |
+| energetic | ✓ | Works for punchy, high-energy content |
+| dramatic | ✗ | Style too stripped down for dramatic intensity |
+| vintage | ✗ | Minimalist aesthetic conflicts with aged/textured look |
+| romantic | ✗ | No capacity for decorative/soft elements |
+| action | ✗ | No dynamic line capability for speed/impact |
+
+## Best For
+
+Business allegory, management fables, short concept illustration, four-panel comic strips, quick-insight education, social media content
diff --git a/skills/creative/baoyu-comic/references/art-styles/realistic.md b/skills/creative/baoyu-comic/references/art-styles/realistic.md
new file mode 100644
index 0000000000..fcc39ad7f4
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/art-styles/realistic.md
@@ -0,0 +1,89 @@
+# realistic
+
+写实画风 - Digital painting with realistic proportions and lighting
+
+## Overview
+
+Full-color realistic manga style using digital painting techniques. Features anatomically accurate characters, rich gradients, and detailed environmental rendering. Sophisticated aesthetic for mature audiences.
+
+## Line Work
+
+- Clean, precise outlines with clear contours
+- Uniform line weight for character definition
+- No excessive hatching - rely on color for depth
+- Smooth curves and realistic anatomical lines
+- Ligne Claire influence: clean but not simplified
+
+## Character Design
+
+- Realistic human proportions (7-8 head heights)
+- Anatomically accurate features and expressions
+- Detailed facial structure without exaggeration
+- Natural poses and body language
+- Consistent appearance across panels
+- Subtle expressions rather than manga-style
+
+## Rendering Style
+
+- Full-color digital painting with rich gradients
+- Soft shadow transitions on skin and fabric
+- Realistic material textures (glass, liquid, fabric, wood)
+- Detailed hair with natural shine and volume
+- Environmental lighting affects all elements
+- NOT flat cel-shading - smooth color blending
+
+## Background Treatment
+
+- Highly detailed, realistic environments
+- Accurate perspective and spatial depth
+- Atmospheric lighting (warm indoor, cool outdoor)
+- Professional settings rendered with precision
+- Props and objects with realistic textures
+
+## Color Approach
+
+- Rich gradients for depth and volume
+- Realistic lighting with warm/cool contrast
+- Material-specific rendering
+- Subtle color temperature shifts
+- Professional, sophisticated palette
+
+## Default Color Palette
+
+| Role | Color | Hex |
+|------|-------|-----|
+| Skin Light | Natural warm | #F5D6C6 |
+| Skin Shadow | Warm shadow | #E8C4B0 |
+| Environment | Warm wood | #8B7355 |
+| Environment Cool | Cool stone | #9CA3AF |
+| Accent | Wine red | #722F37 |
+| Accent Gold | Gold | #D4AF37 |
+| Light Warm | Amber | #FFB347 |
+| Light Cool | Cool blue | #B0C4DE |
+
+## Quality Markers
+
+- ✓ Anatomically accurate proportions
+- ✓ Smooth color gradients (not flat fills)
+- ✓ Realistic material textures
+- ✓ Detailed, atmospheric backgrounds
+- ✓ Natural lighting with soft shadows
+- ✓ Expressive but subtle expressions
+- ✓ Professional aesthetic
+- ✓ Clean speech bubbles
+
+## Compatibility
+
+| Tone | Fit | Notes |
+|------|-----|-------|
+| neutral | ✓✓ | Professional content |
+| warm | ✓✓ | Nostalgic stories |
+| dramatic | ✓✓ | High drama |
+| vintage | ✓✓ | Period pieces |
+| romantic | ✗ | Style mismatch |
+| energetic | ✗ | Too refined |
+| action | ✓ | Serious action |
+
+## Best For
+
+Professional topics (wine, food, business), lifestyle content, adult narratives, documentary-style, mature educational guides
diff --git a/skills/creative/baoyu-comic/references/auto-selection.md b/skills/creative/baoyu-comic/references/auto-selection.md
new file mode 100644
index 0000000000..4541b7dfc7
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/auto-selection.md
@@ -0,0 +1,71 @@
+# Auto Selection
+
+Content signals determine default art + tone + layout (or preset).
+
+## Content Signal Matrix
+
+| Content Signals | Art Style | Tone | Layout | Preset |
+|-----------------|-----------|------|--------|--------|
+| Tutorial, how-to, beginner | manga | neutral | webtoon | **ohmsha** |
+| Computing, AI, programming | manga | neutral | dense | **ohmsha** |
+| Technical explanation, educational | manga | neutral | webtoon | **ohmsha** |
+| Pre-1950, classical, ancient | realistic | vintage | cinematic | - |
+| Personal story, mentor | ligne-claire | warm | standard | - |
+| Psychology, motivation, self-help, coaching | manga | warm | standard | **concept-story** |
+| Business narrative, management, leadership | manga | warm | standard | **concept-story** |
+| Conflict, breakthrough | (inherit) | dramatic | splash | - |
+| Wine, food, lifestyle | realistic | neutral | cinematic | - |
+| Martial arts, wuxia, xianxia | ink-brush | action | splash | **wuxia** |
+| Romance, love, school life | manga | romantic | standard | **shoujo** |
+| Business allegory, fable, parable, short insight, 四格 | minimalist | neutral | four-panel | **four-panel** |
+| Biography, balanced | ligne-claire | neutral | mixed | - |
+
+## Preset Recommendation Rules
+
+**When preset is recommended**: Load `presets/{preset}.md` and apply all special rules.
+
+### ohmsha
+- **Triggers**: Tutorial, technical, educational, computing, programming, how-to, beginner
+- **Special rules**: Visual metaphors, NO talking heads, gadget reveals, Doraemon-style characters
+- **Base**: manga + neutral + webtoon/dense
+
+### wuxia
+- **Triggers**: Martial arts, wuxia, xianxia, cultivation, swordplay
+- **Special rules**: Qi effects, combat visuals, atmospheric elements
+- **Base**: ink-brush + action + splash
+
+### shoujo
+- **Triggers**: Romance, love story, school life, emotional drama
+- **Special rules**: Decorative elements, eye details, romantic beats
+- **Base**: manga + romantic + standard
+
+### concept-story
+- **Triggers**: Psychology, motivation, self-help, business narrative, management, leadership, personal growth, coaching, soft skills, abstract concept through story
+- **Special rules**: Visual symbol system, growth arc, dialogue+action balance, original characters
+- **Base**: manga + warm + standard
+
+### four-panel
+- **Triggers**: Business allegory, fable, parable, short insight, four-panel, 四格, 四格漫画, single-page comic, minimalist comic strip
+- **Special rules**: Strict 起承转合 4-panel structure, B&W + spot color, simplified stick-figure characters, single-page story
+- **Base**: minimalist + neutral + four-panel
+
+## Compatibility Matrix
+
+Art Style × Tone combinations work best when matched appropriately:
+
+| Art Style | ✓✓ Best | ✓ Works | ✗ Avoid |
+|-----------|---------|---------|---------|
+| ligne-claire | neutral, warm | dramatic, vintage, energetic | romantic, action |
+| manga | neutral, romantic, energetic, action | warm, dramatic | vintage |
+| realistic | neutral, warm, dramatic, vintage | action | romantic, energetic |
+| ink-brush | neutral, dramatic, action, vintage | warm | romantic, energetic |
+| chalk | neutral, warm, energetic | vintage | dramatic, action, romantic |
+| minimalist | neutral | warm, energetic | dramatic, vintage, romantic, action |
+
+**Note**: Art Style × Tone × Layout can be freely combined. Incompatible combinations work but may produce unexpected results.
+
+## Priority Order
+
+1. User-specified options (art / tone / style)
+2. Content signal analysis → auto-selection
+3. Fallback: ligne-claire + neutral + standard
diff --git a/skills/creative/baoyu-comic/references/base-prompt.md b/skills/creative/baoyu-comic/references/base-prompt.md
new file mode 100644
index 0000000000..7df4e959ba
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/base-prompt.md
@@ -0,0 +1,98 @@
+Create a knowledge biography comic page following these guidelines:
+
+## Image Specifications
+
+- **Type**: Comic book page with multiple panels
+- **Orientation**: Portrait (vertical)
+- **Aspect Ratio**: 2:3
+- **Style**: See style-specific reference for visual guidelines
+
+## Panel Structure
+
+### Panel Borders
+- Clean black lines (1-2px) around each panel
+- White gutters between panels (8-12px)
+- Panels arranged for clear reading flow
+- Variety in panel sizes for visual rhythm
+
+### Panel Composition
+- Clear focal points in each panel
+- Proper use of foreground, midground, background
+- Camera angles vary: eye level, bird's eye, low angle, close-up, wide shot
+- Action flows logically between panels
+- Negative space used intentionally
+
+## Text Elements
+
+### Speech Bubbles
+- **Dialogue**: Oval/elliptical bubbles with pointed tails
+- White fill with thin black outline
+- Tail points clearly to speaker
+- Hand-lettered style font (not computer-generated)
+
+### Narrator Boxes
+- **Fourth Wall/Narrator**: Rectangular boxes
+- Often positioned at panel edges (top or bottom)
+- Slightly different fill color (cream or light yellow)
+- Used for commentary, time jumps, explanations
+
+### Thought Bubbles
+- Cloud-shaped with bubble trail leading to thinker
+- Softer outline than speech bubbles
+- For internal monologue
+
+### Caption Bars
+- Rectangular bars at panel edges
+- Time and place information
+- "Meanwhile...", "Three years later..." type transitions
+- Darker fill with white text, or vice versa
+
+### Typography
+- Hand-drawn lettering style throughout
+- Bold for emphasis and key terms
+- Consistent letter sizing
+- Chinese text: use full-width punctuation ""，。！
+- Clear hierarchy: titles > dialogue > captions
+
+## Scientific/Concept Visualization
+
+When depicting abstract concepts:
+
+| Concept | Visual Metaphor |
+|---------|----------------|
+| Neural networks | Glowing nodes connected by clean lines |
+| Data flow | Luminous particles along simple paths |
+| Algorithms | Geometric patterns, building blocks |
+| Logic/proof | Interlocking puzzle pieces |
+| Discovery | Light breaking through darkness |
+| Uncertainty | Forking paths, question marks |
+| Time | Clock motifs, calendar pages |
+
+- Integrate diagrams naturally into narrative panels
+- Use inset panels or thought-bubble style for explanations
+- Simplified iconography over realistic depiction
+
+## Fourth Wall / Narrator Character
+
+When depicting narrator characters addressing the reader:
+- Character may look directly out of panel
+- Can appear in "present day" framing scenes
+- Distinct visual treatment from main timeline
+- Often at page edges or in dedicated panels
+- May comment on or question the events shown
+
+## Historical Accuracy
+
+- Research period-specific details: costumes, technology, architecture
+- Show aging naturally for characters across time periods
+- Iconic items and locations rendered recognizably
+- Balance accuracy with stylization
+
+## Language
+
+- All text in Chinese (中文) unless source material is in another language
+- Use Chinese full-width punctuation: ""，。！
+
+---
+
+Please generate the comic page based on the content provided below:
diff --git a/skills/creative/baoyu-comic/references/character-template.md b/skills/creative/baoyu-comic/references/character-template.md
new file mode 100644
index 0000000000..5865358ce5
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/character-template.md
@@ -0,0 +1,180 @@
+# Character Definition Template
+
+## Character Document Format
+
+Create `characters/characters.md` with the following structure:
+
+```markdown
+# Character Definitions - [Comic Title]
+
+**Style**: [selected style]
+**Art Direction**: [Ligne Claire / Manga / etc.]
+
+---
+
+## Character 1: [Name]
+
+**Role**: [Protagonist / Mentor / Antagonist / Narrator]
+**Age**: [approximate age or age range in story]
+
+**Appearance**:
+- Face shape: [oval/square/round]
+- Hair: [color, style, length]
+- Eyes: [color, shape, distinctive features]
+- Build: [height, body type]
+- Distinguishing features: [glasses, beard, scar, etc.]
+
+**Costume**:
+- Default outfit: [detailed description]
+- Color palette: [primary colors for this character]
+- Accessories: [hat, bag, tools, etc.]
+
+**Expression Range**:
+- Neutral: [description]
+- Happy/Excited: [description]
+- Thinking/Confused: [description]
+- Determined: [description]
+
+**Visual Reference Notes**:
+[Any specific artistic direction]
+
+---
+
+## Character 2: [Name]
+...
+```
+
+## Reference Sheet Image Prompt
+
+After character definitions, include a prompt for generating the reference sheet:
+
+```markdown
+## Reference Sheet Prompt
+
+Character reference sheet in [style] style, clean lines, flat colors:
+
+[ROW 1 - Character Name]:
+- Front view: [detailed description]
+- 3/4 view: [description]
+- Expression sheet: Neutral | Happy | Focused | Worried
+
+[ROW 2 - Character Name]:
+...
+
+COLOR PALETTE:
+- [Character 1]: [colors]
+- [Character 2]: [colors]
+
+White background, clear labels under each character.
+```
+
+## Example: Turing Biography
+
+```markdown
+# Character Definitions - The Imitation Game
+
+**Style**: classic (Ligne Claire)
+**Art Direction**: Clean lines, muted colors, period-accurate details
+
+---
+
+## Character 1: Alan Turing
+
+**Role**: Protagonist
+**Age**: 25-40 (varies across story)
+
+**Appearance**:
+- Face shape: Oval, slightly angular
+- Hair: Dark brown, wavy, slightly disheveled
+- Eyes: Deep-set, intense gaze
+- Build: Tall, lean, slightly awkward posture
+- Distinguishing features: Prominent brow, thoughtful expression
+
+**Costume**:
+- Default outfit: Tweed jacket with elbow patches, white shirt, no tie
+- Color palette: Muted browns, navy blue, cream
+- Accessories: Occasionally a pipe, papers/notebooks
+
+**Expression Range**:
+- Neutral: Thoughtful, slightly distant
+- Happy/Excited: Eureka moment, eyes bright, subtle smile
+- Thinking/Confused: Furrowed brow, looking at abstract space
+- Determined: Jaw set, focused eyes
+
+---
+
+## Character 2: The Bombe Machine
+
+**Role**: Supporting (anthropomorphized)
+**Appearance**:
+- Large brass and wood cabinet
+- Dial "eyes" that can express states
+- Paper tape "mouth"
+- Indicator lights for emotions
+
+**Expression Range**:
+- Processing: Spinning dials, humming
+- Success: Lights up warmly
+- Stuck: Smoke wisps, stuttering
+
+---
+
+## Reference Sheet Prompt
+
+Character reference sheet in Ligne Claire style, clean lines, flat colors:
+
+TOP ROW - Alan Turing:
+- Front view: Young man, 30s, short dark wavy hair, thoughtful expression, wearing tweed jacket with elbow patches, white shirt
+- 3/4 view: Same character, slight smile, showing profile of nose
+- Expression sheet: Neutral | Excited (eureka moment) | Focused (working) | Worried
+
+BOTTOM ROW - The Bombe Machine (anthropomorphized):
+- Bombe machine as character: Large, brass and wood, dial "eyes", paper tape "mouth"
+- Expressions: Processing (spinning dials) | Success (lights up) | Stuck (smoke wisps)
+
+COLOR PALETTE:
+- Turing: Muted browns (#8B7355), navy blue (#2C3E50), cream (#F5F5DC)
+- Machine: Brass (#B5A642), mahogany (#4E2728), emerald indicators (#2ECC71)
+
+White background, clear labels under each character.
+```
+
+## Handling Age Variants
+
+For biographies spanning many years, define age variants:
+
+```markdown
+## Alan Turing - Age Variants
+
+### Young (1920s, age 10-18)
+- Boyish features, round face
+- School uniform (Sherborne)
+- Curious, eager expression
+
+### Adult (1930s-40s, age 25-35)
+- Angular face, defined jaw
+- Tweed jacket, rumpled appearance
+- Intense, focused expression
+
+### Later (1950s, age 40+)
+- Slightly weathered
+- More casual dress
+- Thoughtful, sometimes melancholic
+```
+
+## Best Practices
+
+| Practice | Description |
+|----------|-------------|
+| Be specific | "Short dark wavy hair, parted left" not just "dark hair" |
+| Use distinguishing features | Glasses, scars, accessories that identify character |
+| Define color codes | Use specific color names or hex codes |
+| Include age markers | Wrinkles, posture, clothing style matching era |
+| Reference real people | For historical figures, note "based on 1940s photographs" |
+
+## Why Character Reference Matters
+
+Without unified character definition, AI generates inconsistent appearances. The reference sheet provides:
+1. Visual anchors for consistent features
+2. Color palettes for consistent coloring
+3. Expression documentation for emotional portrayals
diff --git a/skills/creative/baoyu-comic/references/layouts/cinematic.md b/skills/creative/baoyu-comic/references/layouts/cinematic.md
new file mode 100644
index 0000000000..8061cde9e9
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/layouts/cinematic.md
@@ -0,0 +1,23 @@
+# cinematic
+
+Wide panels, filmic feel
+
+## Panel Structure
+
+- **Panels per page**: 2-4
+- **Structure**: Horizontal emphasis, wide aspect panels
+- **Gutters**: Generous spacing (12-15px)
+
+## Grid Configuration
+
+- 1-2 columns, horizontal emphasis
+- Panel sizes: Wide aspect ratios (3:1, 4:1)
+- Reading flow: Horizontal sweep, filmic rhythm
+
+## Best For
+
+Establishing shots, dramatic moments, landscapes
+
+## Best Style Pairings
+
+dramatic, classic, sepia
diff --git a/skills/creative/baoyu-comic/references/layouts/dense.md b/skills/creative/baoyu-comic/references/layouts/dense.md
new file mode 100644
index 0000000000..7346466ffe
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/layouts/dense.md
@@ -0,0 +1,23 @@
+# dense
+
+Information-rich, educational focus
+
+## Panel Structure
+
+- **Panels per page**: 6-9
+- **Structure**: Compact grid, smaller panels
+- **Gutters**: Tight spacing (4-6px)
+
+## Grid Configuration
+
+- 3 columns × 3 rows
+- Panel sizes: Compact, uniform
+- Reading flow: Rapid progression, information-rich
+
+## Best For
+
+Technical explanations, complex narratives, timelines
+
+## Best Style Pairings
+
+ohmsha, vibrant
diff --git a/skills/creative/baoyu-comic/references/layouts/four-panel.md b/skills/creative/baoyu-comic/references/layouts/four-panel.md
new file mode 100644
index 0000000000..e9cbdfcdf3
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/layouts/four-panel.md
@@ -0,0 +1,40 @@
+# four-panel
+
+四格漫画 - Strict 2×2 grid, single-page story
+
+## Panel Structure
+
+- **Panels per page**: 4 (exactly, no variation)
+- **Structure**: Strict 2×2 equal grid
+- **Gutters**: Consistent white space (8-10px), uniform on all sides
+
+## Grid Configuration
+
+- 2 columns × 2 rows, all panels identical size
+- Panel sizes: Exactly equal (each panel = 25% of content area)
+- Reading flow: Z-pattern — Panel 1 (top-left) → Panel 2 (top-right) → Panel 3 (bottom-left) → Panel 4 (bottom-right)
+
+## Narrative Structure
+
+Each panel serves a specific narrative role (起承转合 / kishōtenketsu):
+
+| Panel | Position | Role | Purpose |
+|-------|----------|------|---------|
+| 1 | Top-left | 起 Setup | Establish situation, introduce characters/problem |
+| 2 | Top-right | 承 Development | Build on setup, add complication or attempt |
+| 3 | Bottom-left | 转 Turn | Twist, key insight, or reversal — the pivotal moment |
+| 4 | Bottom-right | 合 Conclusion | Resolution, punchline, or takeaway |
+
+## Aspect Ratio
+
+- Recommended page aspect: **4:3** (landscape)
+- Landscape gives each panel a comfortable wide rectangle
+- Portrait (3:4) makes panels tall and narrow — avoid for this layout
+
+## Best For
+
+Business allegory, quick-insight education, social media comics, fables, parables, single-concept explanation
+
+## Best Style Pairings
+
+minimalist, ligne-claire, chalk
diff --git a/skills/creative/baoyu-comic/references/layouts/mixed.md b/skills/creative/baoyu-comic/references/layouts/mixed.md
new file mode 100644
index 0000000000..dc33cc1477
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/layouts/mixed.md
@@ -0,0 +1,23 @@
+# mixed
+
+Dynamic, varied rhythm
+
+## Panel Structure
+
+- **Panels per page**: 3-7 (varies)
+- **Structure**: Intentionally varied for pacing
+- **Gutters**: Dynamic spacing
+
+## Grid Configuration
+
+- Intentionally irregular
+- Panel sizes: Varied for pacing and emphasis
+- Reading flow: Guides eye through varied rhythm
+
+## Best For
+
+Action sequences, emotional arcs, complex stories
+
+## Best Style Pairings
+
+dramatic, vibrant, ohmsha
diff --git a/skills/creative/baoyu-comic/references/layouts/splash.md b/skills/creative/baoyu-comic/references/layouts/splash.md
new file mode 100644
index 0000000000..15d4258b5d
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/layouts/splash.md
@@ -0,0 +1,23 @@
+# splash
+
+Impact-focused, key moments
+
+## Panel Structure
+
+- **Panels per page**: 1-2 large + 2-3 small
+- **Structure**: Dominant splash with supporting panels
+- **Gutters**: Varied for emphasis
+
+## Grid Configuration
+
+- 1 dominant panel + 2-3 supporting
+- Panel sizes: 50-70% splash, remainder small
+- Reading flow: Splash dominates, supporting panels accent
+
+## Best For
+
+Revelations, breakthroughs, chapter openings
+
+## Best Style Pairings
+
+dramatic, classic, vibrant
diff --git a/skills/creative/baoyu-comic/references/layouts/standard.md b/skills/creative/baoyu-comic/references/layouts/standard.md
new file mode 100644
index 0000000000..76ee5d824c
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/layouts/standard.md
@@ -0,0 +1,23 @@
+# standard
+
+Classic comic grid, versatile
+
+## Panel Structure
+
+- **Panels per page**: 4-6
+- **Structure**: Regular grid with occasional variation
+- **Gutters**: Consistent white space (8-10px)
+
+## Grid Configuration
+
+- 2-3 columns × 2-3 rows
+- Panel sizes: Mostly equal, occasional variation
+- Reading flow: Left→right, top→bottom (Z-pattern)
+
+## Best For
+
+Narrative flow, dialogue scenes
+
+## Best Style Pairings
+
+classic, warm, sepia
diff --git a/skills/creative/baoyu-comic/references/layouts/webtoon.md b/skills/creative/baoyu-comic/references/layouts/webtoon.md
new file mode 100644
index 0000000000..efc464aa76
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/layouts/webtoon.md
@@ -0,0 +1,30 @@
+# webtoon
+
+Vertical scrolling comic (竖版条漫)
+
+## Panel Structure
+
+- **Panels per page**: 3-5 vertically stacked
+- **Structure**: Single column, vertical flow optimized for scrolling
+- **Gutters**: Generous vertical spacing (20-40px), panels often bleed horizontally
+
+## Grid Configuration
+
+- Single column, vertical stack
+- Panel sizes: Full width, variable height (1:1 to 1:2 aspect)
+- Reading flow: Top→bottom continuous scroll
+
+## Special Features
+
+- Panels can extend beyond frame for dramatic effect
+- Generous whitespace between beats
+- Character close-ups alternate with wide explanation panels
+- "Float" effect - elements can exist between panels
+
+## Best For
+
+Ohmsha-style tutorials, mobile reading, step-by-step guides
+
+## Best Style Pairings
+
+ohmsha, vibrant
diff --git a/skills/creative/baoyu-comic/references/ohmsha-guide.md b/skills/creative/baoyu-comic/references/ohmsha-guide.md
new file mode 100644
index 0000000000..e789677214
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/ohmsha-guide.md
@@ -0,0 +1,85 @@
+# Ohmsha Manga Guide Style
+
+Guidelines for educational manga comics using the `ohmsha` preset.
+
+## Character Setup
+
+| Role | Default | Traits |
+|------|---------|--------|
+| Student (Role A) | 大雄 | Confused, asks basic but crucial questions, represents reader |
+| Mentor (Role B) | 哆啦A梦 | Knowledgeable, patient, uses gadgets as technical metaphors |
+| Antagonist (Role C, optional) | 胖虎 | Represents misunderstanding, or "noise" in the data |
+
+Custom characters: ask the user for role → name mappings (e.g., `Student:小明, Mentor:教授, Antagonist:Bug怪`).
+
+## Character Reference Sheet Style
+
+For Ohmsha style, use manga/anime style with:
+- Exaggerated expressions for educational clarity
+- Simple, distinctive silhouettes
+- Bright, saturated color palettes
+- Chibi/SD (super-deformed) variants for comedic reactions
+
+## Outline Spec Block
+
+Every ohmsha outline must start with:
+
+```markdown
+【漫画规格单】
+- Language: [Same as input content]
+- Style: Ohmsha (Manga Guide), Full Color
+- Layout: Vertical Scrolling Comic (竖版条漫)
+- Characters: [List character names and roles]
+- Character Reference: characters/characters.png
+- Page Limit: ≤20 pages
+```
+
+## Visual Metaphor Rules (Critical)
+
+**NEVER** create "talking heads" panels. Every technical concept must become:
+
+1. **A tangible gadget/prop** - Something characters can hold, use, demonstrate
+2. **An action scene** - Characters doing something that illustrates the concept
+3. **A visual environment** - Stepping into a metaphorical space
+
+### Examples
+
+| Concept | Bad (Talking Heads) | Good (Visual Metaphor) |
+|---------|---------------------|------------------------|
+| Word embeddings | Characters discussing vectors | 哆啦A梦拿出"词向量压缩机"，把书本压缩成彩色小球 |
+| Gradient descent | Explaining math formula | 大雄在山谷地形上滚球，寻找最低点 |
+| Neural network | Diagram on whiteboard | 角色走进由发光节点组成的网络迷宫 |
+
+## Page Title Convention
+
+Avoid AI-style "Title: Subtitle" format. Use narrative descriptions:
+
+- ❌ "Page 3: Introduction to Neural Networks"
+- ✓ "Page 3: 大雄被海量单词淹没，哆啦A梦拿出'词向量压缩机'"
+
+## Ending Requirements
+
+- NO generic endings ("What will you choose?", "Thanks for reading")
+- End with: Technical summary moment OR character achieving a small goal
+- Final panel: Sense of accomplishment, not open-ended question
+
+### Good Endings
+
+- Student successfully applies learned concept
+- Visual callback to opening problem, now solved
+- Mentor gives summary while student demonstrates understanding
+
+### Bad Endings
+
+- "What do you think?" open questions
+- "Thanks for reading this tutorial"
+- Cliffhanger without resolution
+
+## Layout Preference
+
+Ohmsha style typically uses:
+- `webtoon` (vertical scrolling) - Primary choice
+- `dense` - For information-heavy sections
+- `mixed` - For varied pacing
+
+Avoid `cinematic` and `splash` for educational content.
diff --git a/skills/creative/baoyu-comic/references/partial-workflows.md b/skills/creative/baoyu-comic/references/partial-workflows.md
new file mode 100644
index 0000000000..749b5ac7b2
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/partial-workflows.md
@@ -0,0 +1,106 @@
+# Partial Workflows
+
+Options to run specific parts of the workflow. Trigger these via natural language (e.g., "just the storyboard", "regenerate page 3").
+
+## Options Summary
+
+| Option | Steps Executed | Output |
+|--------|----------------|--------|
+| Storyboard only | 1-3 | `storyboard.md` + `characters/` |
+| Prompts only | 1-5 | + `prompts/*.md` |
+| Images only | 7-8 | + images |
+| Regenerate N | 7 (partial) | Specific page(s) |
+
+---
+
+## Storyboard-only
+
+Generate storyboard and characters without prompts or images.
+
+**User cue**: "storyboard only", "just the outline", "don't generate images yet".
+
+**Workflow**: Steps 1-3 only (stop after storyboard + characters)
+
+**Output**:
+- `analysis.md`
+- `storyboard.md`
+- `characters/characters.md`
+
+**Use case**: Review and edit the storyboard before generating images. Useful for:
+- Getting feedback on the narrative structure
+- Making manual adjustments to panel layouts
+- Defining custom characters
+
+---
+
+## Prompts-only
+
+Generate storyboard, characters, and prompts without images.
+
+**User cue**: "prompts only", "write the prompts but don't generate yet".
+
+**Workflow**: Steps 1-5 (generate prompts, skip images)
+
+**Output**:
+- `analysis.md`
+- `storyboard.md`
+- `characters/characters.md`
+- `prompts/*.md`
+
+**Use case**: Review and edit prompts before image generation. Useful for:
+- Fine-tuning image generation prompts
+- Ensuring visual consistency before committing to generation
+- Making style adjustments at the prompt level
+
+---
+
+## Images-only
+
+Generate images from existing prompts (starts at Step 7).
+
+**User cue**: "generate images from existing prompts", "run the images now" (pointing at an existing `comic/topic-slug/` directory).
+
+**Workflow**: Skip to Step 7, then 8
+
+**Prerequisites** (must exist in directory):
+- `prompts/` directory with page prompt files
+- `storyboard.md` with style information
+- `characters/characters.md` with character definitions
+
+**Output**:
+- `characters/characters.png` (if not exists)
+- `NN-{cover|page}-[slug].png` images
+
+**Use case**: Re-generate images after editing prompts. Useful for:
+- Recovering from failed image generation
+- Trying different image generation settings
+- Regenerating after manual prompt edits
+
+---
+
+## Regenerate
+
+Regenerate specific pages only.
+
+**User cue**: "regenerate page 3", "redo pages 2, 5, 8", "regenerate the cover".
+
+**Workflow**:
+1. Read existing prompts for specified pages
+2. Regenerate images only for those pages via `image_generate`
+3. Download each returned URL and overwrite the existing PNG
+
+**Prerequisites** (must exist):
+- `prompts/NN-{cover|page}-[slug].md` for specified pages
+- `characters/characters.md` (for agent-side consistency checks, if it was used originally)
+
+**Output**:
+- Regenerated `NN-{cover|page}-[slug].png` for specified pages
+
+**Use case**: Fix specific pages without regenerating entire comic. Useful for:
+- Fixing a single problematic page
+- Iterating on specific visuals
+- Regenerating pages after prompt edits
+
+**Page numbering**:
+- `0` = Cover page
+- `1-N` = Content pages
diff --git a/skills/creative/baoyu-comic/references/presets/concept-story.md b/skills/creative/baoyu-comic/references/presets/concept-story.md
new file mode 100644
index 0000000000..d1c71d6ed5
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/presets/concept-story.md
@@ -0,0 +1,121 @@
+# concept-story
+
+概念故事预设 - Narrative comics that visualize abstract concepts through character-driven stories
+
+## Base Configuration
+
+| Dimension | Value |
+|-----------|-------|
+| Art Style | manga |
+| Tone | warm |
+| Layout | standard (default) |
+
+Equivalent to: art=manga, tone=warm
+
+## Unique Rules
+
+This preset includes special rules beyond the art+tone combination. When the `concept-story` preset is selected, ALL rules below must be applied.
+
+### Concept Visualization System (CRITICAL)
+
+Each major abstract concept SHOULD have a recurring visual symbol/metaphor:
+
+| Concept Type | Visualization Approach |
+|-------------|----------------------|
+| Psychological need | Tangible object character holds or discovers (e.g., glowing energy ball = competence) |
+| Management principle | Environmental metaphor character navigates (e.g., ship wheel = autonomy) |
+| Growth/development | Living organic symbol that transforms (e.g., seed → flowering plant = relatedness) |
+| Abstract framework | Spatial structure characters can enter or observe |
+| Emotional state | Color/lighting shift in the scene atmosphere |
+
+**Unlike ohmsha**: Dialogue panels are allowed and expected. The goal is to COMBINE visual metaphors WITH dialogue, not replace dialogue entirely.
+
+**Pattern**: "Dialogue introduces idea" → "Visual metaphor illustrates it" → "Character reacts/applies it"
+
+### Visual Symbol Continuity
+
+Symbols must persist across the story:
+
+| Stage | Treatment |
+|-------|-----------|
+| Introduction | Symbol appears with soft glow effect when concept is first mentioned |
+| Recurrence | Same symbol reappears in background or character interaction when concept is referenced |
+| Resolution | ALL symbols gather in the final composition, showing integration of learned concepts |
+
+**Storyboard requirement**: Include a Symbol Mapping Table defining concept → visual symbol before panel breakdown.
+
+### Character Archetypes (Flexible)
+
+Create original characters based on content domain. No fixed defaults:
+
+| Role | Archetype | Visual Cues |
+|------|-----------|------------|
+| Protagonist | Learner/worker facing a challenge | Modern professional or student, relatable, starts with constrained posture |
+| Mentor | Experienced guide who teaches through experience | Slightly older, calm demeanor, warm color accents |
+| Catalyst | Person or event that triggers transformation | Can be a colleague, situation, challenge, or opportunity |
+
+**IMPORTANT**: Characters are created fresh each time based on the source content's domain (business, psychology, education, etc.). No default character set.
+
+### Narrative Arc Structure
+
+Enforce a five-stage growth arc:
+
+| Act | Structure | Visual Tone |
+|-----|-----------|------------|
+| Opening | Protagonist stuck in routine, faces frustration | Muted warm tones, tight framing, constrained compositions |
+| Inciting moment | Mentor appears or opportunity arrives | Brightness increases, panels open up |
+| Learning | Concepts introduced through visual metaphors | Rich warm palette, symbols introduced one by one |
+| Turning point | Protagonist applies knowledge, faces test | Contrast increases, dynamic compositions |
+| Transformation | Growth demonstrated, new understanding visible | Full warm palette, expansive composition, all symbols present |
+
+### Dialogue + Action Balance
+
+- Dialogue is encouraged and expected (unlike ohmsha's NO talking heads rule)
+- Every page should combine at least one dialogue panel with at least one visual/action panel
+- Avoid pure "lecture" pages where a character explains for 4+ panels straight
+- When a character explains a concept verbally, the NEXT panel should visualize it
+
+**Wrong approach**: Four consecutive panels of mentor lecturing at protagonist
+**Right approach**: Mentor introduces concept → visual metaphor panel → protagonist reacts → applies understanding
+
+### Scene Atmosphere Rules
+
+| Scene Type | Atmosphere |
+|------------|-----------|
+| Problem/frustration | Cool muted tones over warm base, tight framing, cluttered environment |
+| Mentoring moment | Golden hour lighting, open composition, warm indoor glow |
+| Concept visualization | Soft glow effects, clean simplified backgrounds, symbol spotlight |
+| Growth/transformation | Warm light expanding outward, character posture opening up |
+| Resolution | Full warm palette, spacious composition, all visual symbols visible |
+
+### Ending Requirements
+
+Final page MUST include:
+
+1. Protagonist demonstrating transformed understanding (not just being told)
+2. Visual callback showing contrast with opening state (e.g., wilted plant → thriving plant)
+3. All concept symbols visible together in the composition
+4. A forward-looking element suggesting ongoing growth (not a closed ending)
+
+### Page Title Convention
+
+Every page MUST have a narrative title:
+
+**Wrong**: "Chapter 3: Self-Determination Theory"
+**Right**: "The Day Xiao Ming Found His Own Engine"
+
+## Quality Markers
+
+- ✓ Each major concept has a recurring visual symbol
+- ✓ Dialogue and visual metaphors work together (not one replacing the other)
+- ✓ Clear growth arc from problem to transformation
+- ✓ Original characters suited to the content domain
+- ✓ Warm, professional atmosphere throughout
+- ✓ Visual symbols recur and accumulate through the story
+- ✓ Final page integrates all concept symbols with transformation callback
+
+## Best For
+
+Psychology concepts, business/management principles, motivation theory, personal development,
+self-help content, leadership frameworks, coaching narratives, soft skill education,
+abstract concept explanation through character-driven stories
diff --git a/skills/creative/baoyu-comic/references/presets/four-panel.md b/skills/creative/baoyu-comic/references/presets/four-panel.md
new file mode 100644
index 0000000000..8c52a4d044
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/presets/four-panel.md
@@ -0,0 +1,107 @@
+# four-panel
+
+四格漫画预设 - Minimalist four-panel business allegory comics
+
+## Base Configuration
+
+| Dimension | Value |
+|-----------|-------|
+| Art Style | minimalist |
+| Tone | neutral |
+| Layout | four-panel (default) |
+| Aspect | 4:3 (landscape) |
+
+Equivalent to: art=minimalist, tone=neutral, layout=four-panel, aspect=4:3
+
+## Unique Rules
+
+This preset includes special rules beyond the art+tone combination. When the `four-panel` preset is selected, ALL rules below must be applied.
+
+### 起承转合 Narrative Structure (CRITICAL)
+
+Every comic MUST follow the four-panel 起承转合 structure:
+
+| Panel | Role | Requirements |
+|-------|------|-------------|
+| 1 (起 Setup) | Introduce the situation | Show character(s) in a recognizable context. Establish the "normal" state or problem |
+| 2 (承 Development) | Build on the setup | Add complication, show an attempt, or introduce the concept. Stakes become clearer |
+| 3 (转 Turn) | The twist or key insight | **Most important panel.** Show the unexpected reversal, contrast, or "aha" moment that makes the allegory work |
+| 4 (合 Conclusion) | Resolution and takeaway | Show the result, consequence, or lesson learned. Can be a visual punchline or summary |
+
+**CRITICAL**: Do NOT deviate from exactly 4 panels. No 5th panel, no title panel, no footer panel within the image.
+
+### Single-Page Story Rule (CRITICAL)
+
+- The entire story is told in ONE page with exactly 4 panels
+- Page count: always 1 (plus optional cover)
+- No multi-page four-panel stories — if content requires more, create multiple separate four-panel comics
+- Storyboard structure: Cover (optional) + 1 page
+
+### Accent Color System
+
+- The image is primarily black-and-white line art
+- Use exactly 1-2 spot colors per strip (default: orange `#FF6B35`)
+- Rules:
+  - Key concept label or object: filled with accent color or outlined in accent
+  - Panel 3 (转 Turn) should have the strongest color emphasis
+  - Characters remain B&W — color is for concepts/objects/labels only
+  - Consistent accent color across all 4 panels (do not switch colors between panels)
+
+### Character Design Rules
+
+- Simplified stick-figure-like characters
+- Distinguish characters through simple props: ties, glasses, hats, briefcases, aprons
+- No detailed faces — dot eyes, line mouth at most
+- Characters should be generic enough to represent archetypes (the manager, the employee, the customer)
+- Maximum 2-3 characters per strip
+
+### Text in Panels
+
+- Chinese text for dialogue and labels (or match source language)
+- Keep text minimal — 1-2 short lines per panel maximum
+- Key concept terms can be highlighted with accent color background
+- No narrator boxes — dialogue and labels only
+- Speech bubbles: simple rectangles or ovals, thin black outline
+
+### Optional Title & Caption
+
+- A brief descriptive title above the 4 panels
+- An optional one-line caption/moral below the panels
+- These are part of the page composition, not separate panels
+
+### Character Archetypes (Flexible)
+
+Create simple stick-figure characters based on content. No fixed defaults:
+
+| Role | Archetype | Visual Cues |
+|------|-----------|------------|
+| Protagonist | Worker/employee facing a situation | Simple figure, minimal distinguishing feature (glasses, tie) |
+| Authority | Boss/manager/expert | Slightly larger figure, or prop like pointer/clipboard |
+| Object | The concept itself | Labeled object, icon, or highlighted text with accent color |
+
+### Prompt Template
+
+When generating image prompts for four-panel comics, include these keywords:
+
+> A minimalist, clean line art digital comic strip in a four-panel grid layout (2×2). The style is simplified cartoon illustration with clear black outlines and a minimal color palette of black, white, and specific spot [accent color] for key concepts.
+
+Each panel description should specify:
+- Panel position (Top Left / Top Right / Bottom Left / Bottom Right)
+- Character poses and gestures (simple, stick-figure style)
+- Dialogue text in Chinese (hand-drawn style)
+- Any accent-colored elements (concept labels, key objects)
+
+## Quality Markers
+
+- ✓ Exactly 4 panels in strict 2×2 grid
+- ✓ 起承转合 narrative arc clearly present
+- ✓ 90%+ black-and-white with strategic spot color
+- ✓ Simplified stick-figure characters
+- ✓ Key concept visually highlighted with accent color
+- ✓ Text is minimal and in Chinese (or source language)
+- ✓ Single complete story in one page
+- ✓ Panel 3 delivers a clear "turn" or insight
+
+## Best For
+
+Business allegory, management fables, short insights, workplace parables, concept contrasts, social media educational content, quick-read comics
diff --git a/skills/creative/baoyu-comic/references/presets/ohmsha.md b/skills/creative/baoyu-comic/references/presets/ohmsha.md
new file mode 100644
index 0000000000..8b6540ef39
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/presets/ohmsha.md
@@ -0,0 +1,114 @@
+# ohmsha
+
+Ohmsha预设 - Educational manga with visual metaphors
+
+## Base Configuration
+
+| Dimension | Value |
+|-----------|-------|
+| Art Style | manga |
+| Tone | neutral |
+| Layout | webtoon (default) |
+
+Equivalent to: art=manga, tone=neutral
+
+## Unique Rules
+
+This preset includes special rules beyond the art+tone combination. When the `ohmsha` preset is selected, ALL rules below must be applied.
+
+### Visual Metaphor Requirements (CRITICAL)
+
+Every technical concept MUST be visualized as a metaphor:
+
+| Concept Type | Visualization Approach |
+|-------------|----------------------|
+| Algorithm | Gadget/machine that demonstrates the process |
+| Data structure | Physical space characters can enter/explore |
+| Mathematical formula | Transformation visible in environment |
+| Abstract process | Tangible flow of particles/objects |
+
+**Wrong approach**: Character points at blackboard explaining
+**Right approach**: Character uses "Concept Visualizer" gadget, steps into metaphorical space
+
+### Visual Metaphor Examples
+
+| Concept | Wrong (Talking Head) | Right (Visual Metaphor) |
+|---------|---------------------|------------------------|
+| Attention mechanism | Character points at formula on blackboard | "Attention Flashlight" gadget illuminates key words in dark room |
+| Gradient descent | "The algorithm minimizes loss" | Character rides ball rolling down mountain valley |
+| Neural network | Diagram with arrows | Living network of glowing creatures passing messages |
+| Overfitting | "The model memorized the data" | Character wearing clothes that fit only one specific pose |
+
+### Character Roles (Required)
+
+**DEFAULT: Use Doraemon characters** unless user explicitly specifies custom characters.
+
+| Role | Default Character | Visual | Traits |
+|------|-------------------|--------|--------|
+| Student (Role A) | 大雄 (Nobita) | Boy, 10yo, round glasses, black hair, yellow shirt, navy shorts | Confused, asks basic but crucial questions, represents reader |
+| Mentor (Role B) | 哆啦A梦 (Doraemon) | Blue robot cat, white belly, 4D pocket, red nose, golden bell | Knowledgeable, patient, uses gadgets as technical metaphors |
+| Challenge (Role C) | 胖虎 (Gian) | Stocky boy, small eyes, orange shirt | Represents misunderstanding, or "noise" in the data |
+| Support (Role D) | 静香 (Shizuka) | Cute girl, black short hair, pink dress | Asks clarifying questions, provides alternative perspectives |
+
+**IMPORTANT**: These Doraemon characters ARE the default for ohmsha preset. Generate character definitions using these exact characters unless user requests otherwise.
+
+To use custom characters: ask the user to provide role → character mappings (e.g., `Student:小明, Mentor:教授`).
+
+### Page Title Convention
+
+Every page MUST have a narrative title (not section header):
+
+**Wrong**: "Chapter 1: Introduction to Transformers"
+**Right**: "The Day Nobita Couldn't Understand Anyone"
+
+### Gadget Reveal Pattern
+
+When introducing a concept:
+
+1. Student expresses confusion with visual indicator (？, spiral eyes)
+2. Mentor dramatically produces gadget with sparkle effects
+3. Gadget name announced in bold with explanation
+4. Demonstration begins - student enters metaphorical space
+
+### Ending Requirements
+
+Final page MUST include:
+
+1. Student demonstrating understanding (applying the concept)
+2. Callback to opening problem (now resolved)
+3. Mentor's satisfied expression
+4. Optional: hint at next topic
+
+### NO Talking Heads Rule
+
+**Critical**: Characters must DO things, not just explain.
+
+Every panel should show:
+- Action being performed
+- Metaphor being demonstrated
+- Character interaction with concept-space
+- NOT: two characters facing each other talking
+
+### Special Visual Elements
+
+| Element | Usage |
+|---------|-------|
+| Gadget reveals | Dramatic unveiling with sparkle effects |
+| Concept spaces | Rounded borders, glowing edges for "imagination mode" |
+| Information displays | Holographic UI style for technical details |
+| Aha moments | Radial lines, light burst effects |
+| Confusion | Spiral eyes, question marks floating above head |
+
+## Quality Markers
+
+- ✓ Every concept is a visual metaphor
+- ✓ Characters are DOING things, not just talking
+- ✓ Clear student/mentor dynamic
+- ✓ Gadgets and props drive the explanation
+- ✓ Expressive manga-style emotions
+- ✓ Information density through visual design, not text walls
+- ✓ Narrative page titles
+
+## Reference
+
+For complete guidelines, see `references/ohmsha-guide.md`
diff --git a/skills/creative/baoyu-comic/references/presets/shoujo.md b/skills/creative/baoyu-comic/references/presets/shoujo.md
new file mode 100644
index 0000000000..697887373d
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/presets/shoujo.md
@@ -0,0 +1,116 @@
+# shoujo
+
+少女预设 - Classic shoujo manga with romantic aesthetics
+
+## Base Configuration
+
+| Dimension | Value |
+|-----------|-------|
+| Art Style | manga |
+| Tone | romantic |
+| Layout | standard (default) |
+
+Equivalent to: art=manga, tone=romantic
+
+## Unique Rules
+
+This preset includes special rules beyond the art+tone combination. When the `shoujo` preset is selected, ALL rules below must be applied.
+
+### Decorative Elements (Required)
+
+Every emotional moment must include decorative elements:
+
+| Emotion | Required Decorations |
+|---------|---------------------|
+| Love | Floating hearts, sparkles, rose petals |
+| Longing | Feathers, bubbles, distant sparkles |
+| Joy | Flowers blooming, light bursts, stars |
+| Sadness | Falling petals, fading sparkles |
+| Shyness | Soft sparkles, floating bubbles |
+| Realization | Radiating lines with sparkles |
+
+### Eye Detail Requirements
+
+Eyes are critical in shoujo style:
+
+| Aspect | Treatment |
+|--------|-----------|
+| Size | Larger than standard manga (1.2x) |
+| Highlights | Multiple (3-5), placed for emotion |
+| Reflection | Scene reflection in emotional moments |
+| Sparkle | Built-in sparkle effects |
+| Tears | Crystalline, detailed teardrops |
+
+### Character Beauty Standards
+
+| Feature | Treatment |
+|---------|-----------|
+| Hair | Flowing, detailed strands, shine highlights |
+| Skin | Porcelain, soft blush on cheeks |
+| Lips | Soft, slightly glossy |
+| Hands | Elegant, expressive gestures |
+| Posture | Graceful, elegant poses |
+
+### Background Effects
+
+**Abstract backgrounds** for emotional moments:
+
+| Moment Type | Background |
+|-------------|-----------|
+| Love confession | Soft gradient + floating flowers |
+| Shock | Screen tone speed lines + sparkles |
+| Memory | Dreamy blur + scattered petals |
+| Realization | Radial lines + light burst |
+| Intimate | Soft focus + floating elements |
+
+### Panel Flow
+
+- Overlap panels for intimate moments
+- Break panel borders for emotional impact
+- Float decorative elements between panels
+- Use screen tone gradients for mood
+- Irregular panel shapes for drama
+
+### Emotional Beat Timing
+
+Slow down pacing for emotional impact:
+
+| Scene Type | Panel Treatment |
+|------------|-----------------|
+| Confession | Multiple small panels, then splash |
+| Eye contact | Close-up sequence |
+| Touch | Slow-motion panel breakdown |
+| Realization | Build-up panels then impact |
+
+### Color Palette Application
+
+| Scene Type | Palette |
+|------------|---------|
+| Romantic | Pink, lavender, rose gold |
+| Happy | Soft yellow, peach, sky blue |
+| Sad | Pale blue, silver, gray lavender |
+| Dramatic | Deep rose, purple, contrast |
+
+### Screen Tone Usage
+
+| Mood | Tone Pattern |
+|------|-------------|
+| Neutral | Clean, minimal |
+| Romantic | Soft gradient overlays |
+| Dramatic | Heavy contrast tones |
+| Dreamy | Soft dot patterns |
+
+## Quality Markers
+
+- ✓ Large, sparkling detailed eyes
+- ✓ Decorative elements in emotional moments
+- ✓ Flowing, beautiful character designs
+- ✓ Soft, pastel color palette
+- ✓ Elegant panel compositions
+- ✓ Screen tone mood effects
+- ✓ Romantic atmosphere throughout
+- ✓ Beautiful, expressive poses
+
+## Best For
+
+Romance stories, coming-of-age, friendship narratives, school life, emotional drama, love stories
diff --git a/skills/creative/baoyu-comic/references/presets/wuxia.md b/skills/creative/baoyu-comic/references/presets/wuxia.md
new file mode 100644
index 0000000000..15b911d622
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/presets/wuxia.md
@@ -0,0 +1,110 @@
+# wuxia
+
+武侠预设 - Hong Kong martial arts comic style
+
+## Base Configuration
+
+| Dimension | Value |
+|-----------|-------|
+| Art Style | ink-brush |
+| Tone | action |
+| Layout | splash (default) |
+
+Equivalent to: art=ink-brush, tone=action
+
+## Unique Rules
+
+This preset includes special rules beyond the art+tone combination. When the `wuxia` preset is selected, ALL rules below must be applied.
+
+### Qi/Energy Effects (Required)
+
+Martial arts power must be visible through qi effects:
+
+| Effect Type | Visual Treatment |
+|-------------|-----------------|
+| Internal qi | Glowing aura around character |
+| External qi | Visible energy projection |
+| Qi clash | Radiating impact waves |
+| Qi absorption | Flowing particles toward character |
+| Hidden power | Subtle glow in eyes/fists |
+
+### Energy Colors
+
+| Qi Type | Color |
+|---------|-------|
+| Righteous | Blue (#4299E1), Gold (#FFD700) |
+| Fierce | Red (#DC2626), Orange (#EA580C) |
+| Evil | Purple (#7C3AED), Green (#16A34A) |
+| Pure | White, Silver |
+| Ancient | Gold with particles |
+
+### Combat Visual Language
+
+**Impact moments** must include:
+
+1. Speed lines radiating from impact point
+2. Flying debris (stone, wood, cloth)
+3. Shockwave rings
+4. Dust/energy clouds
+5. Hair and clothing blown back
+
+### Movement Depiction
+
+| Speed Level | Visual Treatment |
+|-------------|-----------------|
+| Normal | Standard pose |
+| Fast | Motion blur, speed lines |
+| Lightning | Afterimages, multiple positions |
+| Teleport | Fade effect, particle trail |
+
+### Environmental Integration
+
+Backgrounds must support action:
+
+| Environment | Combat Enhancement |
+|-------------|-------------------|
+| Mountains | Crumbling peaks from impacts |
+| Forest | Exploding trees, flying leaves |
+| Water | Dramatic splashes, walking on water |
+| Temple | Breaking pillars, flying tiles |
+| Cliff | Dramatic falls, wind effects |
+
+### Character Pose Guidelines
+
+- Dynamic warrior stances with weight distribution
+- Flowing robes and hair showing movement
+- Muscle tension visible in action
+- Feet planted or in dynamic motion
+- Traditional martial arts postures
+
+### Weapon Effects
+
+| Weapon | Visual Treatment |
+|--------|-----------------|
+| Sword | Trailing light arc, blade glow |
+| Palm | Qi projection, wind effect |
+| Staff | Spinning blur, impact ripples |
+| Whip | Flowing energy trail |
+
+### Atmospheric Elements
+
+Always include:
+- Floating particles (leaves, petals, dust)
+- Ink wash mist for depth
+- Wind direction indicators
+- Dramatic sky/weather when appropriate
+
+## Quality Markers
+
+- ✓ Dynamic action poses with sense of motion
+- ✓ Ink brush aesthetic in line work
+- ✓ Visible qi/energy effects
+- ✓ High contrast dramatic lighting
+- ✓ Atmospheric backgrounds with Chinese elements
+- ✓ Flowing fabric and hair movement
+- ✓ Impactful combat moments
+- ✓ Speed lines and impact effects
+
+## Best For
+
+Martial arts stories, Chinese historical fiction, wuxia/xianxia adaptations, action-heavy narratives
diff --git a/skills/creative/baoyu-comic/references/storyboard-template.md b/skills/creative/baoyu-comic/references/storyboard-template.md
new file mode 100644
index 0000000000..790ceb0297
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/storyboard-template.md
@@ -0,0 +1,143 @@
+# Storyboard Template
+
+## Storyboard Document Format
+
+```markdown
+---
+title: "[Comic Title]"
+topic: "[topic description]"
+time_span: "[e.g., 1912-1954]"
+narrative_approach: "[chronological/thematic/character-focused]"
+recommended_style: "[style name]"
+recommended_layout: "[layout name or varies]"
+aspect_ratio: "3:4"    # 3:4 (portrait), 4:3 (landscape), 16:9 (widescreen)
+language: "[zh/en/ja/etc.]"
+page_count: [N]
+generated: "YYYY-MM-DD HH:mm"
+---
+
+# [Comic Title] - Knowledge Comic Storyboard
+
+**Character Reference**: characters/characters.png
+
+---
+
+## Cover
+
+**Filename**: 00-cover-[slug].png
+**Core Message**: [one-liner]
+
+**Visual Design**:
+- Title typography style
+- Main visual composition
+- Color scheme
+- Subtitle / time span notation
+
+**Visual Prompt**:
+[Detailed image generation prompt]
+
+---
+
+## Page 1 / N
+
+**Filename**: 01-page-[slug].png
+**Layout**: [standard/cinematic/dense/splash/mixed]
+**Narrative Layer**: [Main narrative / Narrator layer / Mixed]
+**Core Message**: [What this page conveys]
+
+### Panel Layout
+
+**Panel Count**: X
+**Layout Type**: [grid/irregular/splash]
+
+#### Panel 1 (Size: 1/3 page, Position: Top)
+
+**Scene**: [Time, location]
+**Image Description**:
+- Camera angle: [bird's eye / low angle / eye level / close-up / wide shot]
+- Characters: [pose, expression, action]
+- Environment: [scene details, period markers]
+- Lighting: [atmosphere description]
+- Color tone: [palette reference]
+
+**Text Elements**:
+- Dialogue bubble (oval): "Character line"
+- Narrator box (rectangular): 「Narrator commentary」
+- Caption bar: [Background info text]
+
+#### Panel 2...
+
+**Page Hook**: [Cliffhanger or transition at page end]
+
+**Visual Prompt**:
+[Full page image generation prompt]
+
+---
+
+## Page 2 / N
+...
+```
+
+## Cover Design Principles
+
+- Academic gravitas with visual appeal
+- Title typography reflecting knowledge/science theme
+- Composition hinting at core theme (character silhouette, iconic symbol, concept diagram)
+- Subtitle or time span for epic scope
+
+## Panel Composition Guidelines
+
+| Panel Type | Recommended Count | Usage |
+|-----------|-------------------|-------|
+| Main narrative | 3-5 per page | Story progression |
+| Concept diagram | 1-2 per page | Visualize abstractions |
+| Narrator panel | 0-1 per page | Commentary, transition |
+| Splash (full/half) | Occasional | Major moments |
+
+## Panel Size Reference
+
+- **Full page (Splash)**: Major moments, key breakthroughs
+- **Half page**: Important scenes, turning points
+- **1/3 page**: Standard narrative panels
+- **1/4 or smaller**: Quick progression, sequential action
+
+## Concept Visualization Techniques
+
+Transform abstract concepts into concrete visuals:
+
+| Abstract Concept | Visual Approach |
+|-----------------|-----------------|
+| Neural network | Glowing nodes with connecting lines |
+| Gradient descent | Ball rolling down valley terrain |
+| Data flow | Luminous particles flowing through pipes |
+| Algorithm iteration | Ascending spiral staircase |
+| Breakthrough moment | Shattering barrier, piercing light |
+| Logical proof | Building blocks assembling |
+| Uncertainty | Forking paths, fog, multiple shadows |
+
+## Text Element Design
+
+| Text Type | Style | Usage |
+|-----------|-------|-------|
+| Character dialogue | Oval speech bubble | Main narrative speech |
+| Narrator commentary | Rectangular box | Explanation, commentary |
+| Caption bar | Edge-mounted rectangle | Time, location info |
+| Thought bubble | Cloud shape | Character inner monologue |
+| Term label | Bold / special color | First appearance of technical terms |
+
+## Prompt Structure for Consistency
+
+Each page prompt should include character reference:
+
+```
+[CHARACTER REFERENCE]
+(Key details from characters.md for characters in this page)
+
+[PAGE CONTENT]
+(Specific scene, panel layout, and visual elements)
+
+[CONSISTENCY REMINDER]
+Maintain exact character appearances as defined in character reference.
+- [Character A]: [key identifying features]
+- [Character B]: [key identifying features]
+```
diff --git a/skills/creative/baoyu-comic/references/tones/action.md b/skills/creative/baoyu-comic/references/tones/action.md
new file mode 100644
index 0000000000..f9c6d954af
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/tones/action.md
@@ -0,0 +1,110 @@
+# action
+
+动作基调 - Speed, impact, power
+
+## Overview
+
+High-impact action atmosphere with dynamic movement, combat effects, and powerful visual energy. Creates visceral, exciting sequences.
+
+## Mood Characteristics
+
+- Speed and motion
+- Power and impact
+- Combat intensity
+- Physical energy
+- Visceral excitement
+
+## Color Modifiers
+
+When applied to any art style:
+
+| Adjustment | Direction |
+|------------|-----------|
+| Saturation | High contrast |
+| Contrast | Maximum |
+| Temperature | Variable per effect |
+| Brightness | Dynamic range |
+
+## Action Effects
+
+**Combat/motion effects** (apply liberally):
+
+| Effect | Usage |
+|--------|-------|
+| Speed lines | Motion, velocity |
+| Impact bursts | Hits, collisions |
+| Shockwaves | Powerful impacts |
+| Flying debris | Environmental destruction |
+| Dust clouds | Ground impacts |
+| Motion blur | Fast movement |
+| Afterimages | Super speed |
+
+## Special Effects
+
+| Effect Type | Visual Approach |
+|------------|-----------------|
+| Energy attacks | Glowing, radiating |
+| Physical impacts | Radiating lines, debris |
+| Movement | Speed lines, blur |
+| Atmosphere | Flying particles, wind |
+
+## Effect Colors
+
+| Effect | Color | Hex |
+|--------|-------|-----|
+| Energy glow | Blue | #4299E1 |
+| Fire/power | Gold | #FFD700 |
+| Impact | White burst | #FFFFFF |
+| Blood/intensity | Deep red | #8B0000 |
+
+## Lighting
+
+- Dynamic, shifting
+- Impact flashes
+- Energy glow sources
+- Rim lighting on figures
+- Dramatic contrast
+
+## Emotional Range
+
+| Emotion | Expression |
+|---------|-----------|
+| Determination | Fierce focus |
+| Rage | Intense, powerful |
+| Triumph | Victorious pose |
+| Struggle | Strained effort |
+
+## Composition
+
+- Dynamic angles
+- Extreme perspectives
+- Panel-breaking layouts
+- Asymmetric designs
+- Impact-focused framing
+
+## Pose Guidelines
+
+- Dynamic warrior poses
+- Weight and momentum visible
+- Muscle tension shown
+- Flow of movement captured
+- Impact points emphasized
+
+## Best For
+
+- Martial arts combat
+- Action sequences
+- Sports moments
+- Physical challenges
+- Battle scenes
+- Climactic confrontations
+
+## Combination Notes
+
+Works especially well with:
+- ink-brush: wuxia combat
+- manga: shonen battles
+
+Avoid with:
+- chalk: style mismatch
+- ligne-claire: style mismatch (too static)
diff --git a/skills/creative/baoyu-comic/references/tones/dramatic.md b/skills/creative/baoyu-comic/references/tones/dramatic.md
new file mode 100644
index 0000000000..459cc2aecd
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/tones/dramatic.md
@@ -0,0 +1,95 @@
+# dramatic
+
+戏剧基调 - High contrast, intense, powerful moments
+
+## Overview
+
+High-impact dramatic tone for pivotal moments, conflicts, and breakthroughs. Uses strong contrast and intense compositions to create emotional power.
+
+## Mood Characteristics
+
+- Tension and intensity
+- Pivotal moments
+- Conflict and resolution
+- Breakthrough discoveries
+- Emotional climaxes
+
+## Color Modifiers
+
+When applied to any art style:
+
+| Adjustment | Direction |
+|------------|-----------|
+| Saturation | High (vibrant or deep) |
+| Contrast | Maximum |
+| Temperature | Varies for effect |
+| Brightness | Strong highlights, deep shadows |
+
+## Contrast Approach
+
+- Sharp light/dark divisions
+- Minimal mid-tones
+- Stark compositions
+- Silhouette potential
+- Rim lighting effects
+
+## Accent Colors
+
+- Deep navy (#1A365D)
+- Crimson (#9B2C2C)
+- Stark white
+- Heavy blacks
+- Limited palette per scene
+
+## Lighting
+
+- Dramatic single-source
+- High contrast shadows
+- Rim lighting on characters
+- Spotlight effects
+- Chiaroscuro influence
+
+## Emotional Range
+
+| Emotion | Expression |
+|---------|-----------|
+| Anger | Intense, defined features |
+| Determination | Strong, focused gaze |
+| Shock | Wide eyes, stark lighting |
+| Triumph | Powerful, elevated pose |
+
+## Composition
+
+- Angular, dynamic layouts
+- Dramatic camera angles
+- Low/high viewpoints
+- Diagonal compositions
+- Negative space for impact
+
+## Visual Elements
+
+- Speed lines for tension
+- Impact effects
+- Dramatic backgrounds (storms, fire)
+- Silhouettes
+- Light burst effects
+- Environmental drama
+
+## Best For
+
+- Pivotal discoveries
+- Conflict scenes
+- Climactic moments
+- Breakthrough realizations
+- Emotional confrontations
+- Historical turning points
+
+## Combination Notes
+
+Works especially well with:
+- realistic: powerful drama
+- ink-brush: martial arts climax
+- ligne-claire: historical pivots
+- manga: shonen battles
+
+Avoid with: chalk (style mismatch)
diff --git a/skills/creative/baoyu-comic/references/tones/energetic.md b/skills/creative/baoyu-comic/references/tones/energetic.md
new file mode 100644
index 0000000000..257e8c6e33
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/tones/energetic.md
@@ -0,0 +1,105 @@
+# energetic
+
+活力基调 - Bright, dynamic, exciting
+
+## Overview
+
+High-energy atmosphere for exciting, discovery-filled content. Bright colors, dynamic compositions, and movement create engaging visuals for younger audiences.
+
+## Mood Characteristics
+
+- Excitement and wonder
+- Discovery and learning
+- Energy and enthusiasm
+- Movement and action
+- Youthful spirit
+
+## Color Modifiers
+
+When applied to any art style:
+
+| Adjustment | Direction |
+|------------|-----------|
+| Saturation | High (vibrant) |
+| Contrast | Medium-high |
+| Temperature | Variable, punchy |
+| Brightness | Bright, clean |
+
+## Color Palette
+
+Shift toward vibrant tones:
+
+| Role | Color | Hex |
+|------|-------|-----|
+| Primary Red | Bright red | #F56565 |
+| Primary Yellow | Sunny yellow | #F6E05E |
+| Primary Blue | Sky blue | #63B3ED |
+| Accent 1 | Magenta | #D53F8C |
+| Accent 2 | Lime green | #68D391 |
+| Background | Clean white | #FFFFFF |
+| Background Alt | Bright pastels | Various |
+
+## Lighting
+
+- Bright, clear lighting
+- Clean shadows
+- High energy
+- Spotlight effects for emphasis
+- Dynamic light sources
+
+## Dynamic Elements
+
+**Energy effects** (add to compositions):
+
+| Element | Usage |
+|---------|-------|
+| Speed lines | Motion, excitement |
+| Sparkles | Discoveries |
+| Burst effects | Aha moments |
+| Motion blur | Fast action |
+| Star bursts | Emphasis |
+| Sweat drops | Effort/surprise |
+
+## Emotional Range
+
+| Emotion | Expression |
+|---------|-----------|
+| Excitement | Wide eyes, big smile |
+| Surprise | Dramatic reaction |
+| Determination | Intense focus |
+| Wonder | Sparkling eyes |
+
+## Composition
+
+- Dynamic angles
+- Action-oriented layouts
+- Movement emphasis
+- Clean, punchy designs
+- Energy flows
+
+## Visual Style
+
+- Expressive, animated characters
+- Wide eyes, big reactions
+- Dynamic poses
+- Motion and action focus
+- Simplified backgrounds for energy
+
+## Best For
+
+- Science explanations
+- "Aha" moments
+- Young audience content
+- Discovery narratives
+- Learning adventures
+- Action tutorials
+
+## Combination Notes
+
+Works especially well with:
+- manga: shonen energy
+- chalk: fun education
+
+Avoid with:
+- realistic: style mismatch
+- ink-brush: style mismatch
diff --git a/skills/creative/baoyu-comic/references/tones/neutral.md b/skills/creative/baoyu-comic/references/tones/neutral.md
new file mode 100644
index 0000000000..db1f7a3c50
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/tones/neutral.md
@@ -0,0 +1,63 @@
+# neutral
+
+中性基调 - Balanced, rational, educational
+
+## Overview
+
+Default balanced tone suitable for educational and informative content. Neither overly emotional nor cold - creates accessible, professional atmosphere.
+
+## Mood Characteristics
+
+- Balanced emotional register
+- Clear, rational presentation
+- Educational focus
+- Professional but approachable
+- Objective storytelling
+
+## Color Modifiers
+
+When applied to any art style:
+
+| Adjustment | Direction |
+|------------|-----------|
+| Saturation | Standard (no shift) |
+| Contrast | Balanced |
+| Temperature | Neutral |
+| Brightness | Slightly bright |
+
+## Lighting
+
+- Even, clear lighting
+- Minimal dramatic shadows
+- Consistent across panels
+- Natural light sources
+- No extreme contrast
+
+## Emotional Range
+
+| Emotion | Expression Level |
+|---------|-----------------|
+| Joy | Moderate smile |
+| Concern | Thoughtful expression |
+| Surprise | Mild widening of eyes |
+| Frustration | Slight frown |
+
+## Composition
+
+- Balanced panel layouts
+- Clear focal points
+- Readable hierarchies
+- Standard framing
+- Functional compositions
+
+## Best For
+
+- Educational content
+- Technical tutorials
+- Informative biographies
+- Documentary style
+- Professional topics
+
+## Usage Notes
+
+Neutral is the default tone. Combine with any art style for baseline professional output. Most versatile tone option.
diff --git a/skills/creative/baoyu-comic/references/tones/romantic.md b/skills/creative/baoyu-comic/references/tones/romantic.md
new file mode 100644
index 0000000000..396439d9ec
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/tones/romantic.md
@@ -0,0 +1,100 @@
+# romantic
+
+浪漫基调 - Soft, beautiful, emotionally delicate
+
+## Overview
+
+Soft, dreamy atmosphere for romantic and emotionally delicate content. Features decorative elements, sparkles, and beautiful compositions that emphasize feeling and beauty.
+
+## Mood Characteristics
+
+- Romance and love
+- Beauty and elegance
+- Emotional delicacy
+- Dreams and hopes
+- Youth and idealism
+
+## Color Modifiers
+
+When applied to any art style:
+
+| Adjustment | Direction |
+|------------|-----------|
+| Saturation | Soft pastels |
+| Contrast | Low, gentle |
+| Temperature | Slightly warm pink |
+| Brightness | Soft, glowing |
+
+## Color Palette
+
+Shift toward romantic tones:
+
+| Role | Color | Hex |
+|------|-------|-----|
+| Primary | Soft pink | #FFB6C1 |
+| Secondary | Lavender | #E6E6FA |
+| Accent | Rose | #FF69B4 |
+| Highlight | Pearl white | #FFFAF0 |
+| Gold | Gold sparkle | #FFD700 |
+| Skin | Porcelain | #FFF5EE |
+| Blush | Soft blush | #FFE4E1 |
+| Background | Soft cream | #FFF8DC |
+
+## Lighting
+
+- Soft, diffused light
+- Glowing effects
+- Backlighting halos
+- Sparkle highlights
+- Dreamy atmospheres
+
+## Decorative Elements
+
+**Essential decorations** (add to compositions):
+
+| Element | Usage |
+|---------|-------|
+| Flower petals | Floating, framing |
+| Sparkles | Emotional highlights |
+| Bubbles | Dreamy moments |
+| Feathers | Gentle floating |
+| Stars | Night scenes, wonder |
+| Hearts | Love emphasis |
+| Light halos | Character highlights |
+
+## Emotional Range
+
+| Emotion | Expression |
+|---------|-----------|
+| Love | Soft gaze, blush |
+| Longing | Distant, beautiful sadness |
+| Joy | Radiant smile, sparkles |
+| Shyness | Downcast eyes, blush |
+
+## Composition
+
+- Elegant, flowing layouts
+- Soft focus backgrounds
+- Characters framed by decorations
+- Beautiful angles (3/4 profiles)
+- Screen tone gradients
+
+## Best For
+
+- Romance stories
+- Coming-of-age
+- Friendship narratives
+- Emotional drama
+- School life
+- Beautiful moments
+
+## Combination Notes
+
+Works especially well with:
+- manga: classic shoujo style
+
+Avoid with:
+- realistic: style mismatch
+- ink-brush: style mismatch
+- ligne-claire: style mismatch
+- chalk: style mismatch
diff --git a/skills/creative/baoyu-comic/references/tones/vintage.md b/skills/creative/baoyu-comic/references/tones/vintage.md
new file mode 100644
index 0000000000..32250024bf
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/tones/vintage.md
@@ -0,0 +1,104 @@
+# vintage
+
+复古基调 - Historical, aged, period authenticity
+
+## Overview
+
+Historical atmosphere with aged paper effects and period-appropriate aesthetics. Creates sense of time, authenticity, and historical distance.
+
+## Mood Characteristics
+
+- Historical authenticity
+- Period distance
+- Archival quality
+- Time and memory
+- Classical elegance
+
+## Color Modifiers
+
+When applied to any art style:
+
+| Adjustment | Direction |
+|------------|-----------|
+| Saturation | Reduced, muted |
+| Contrast | Medium, aged |
+| Temperature | Sepia shift |
+| Brightness | Slightly faded |
+
+## Color Palette
+
+Shift toward aged tones:
+
+| Role | Color | Hex |
+|------|-------|-----|
+| Primary | Sepia brown | #8B7355 |
+| Background | Aged paper | #F5E6D3 |
+| Accent 1 | Faded teal | #6B8E8E |
+| Accent 2 | Muted burgundy | #7B3F3F |
+| Ink | Aged black | #3D3D3D |
+| Yellowed | Paper yellow | #F5DEB3 |
+
+## Visual Effects
+
+**Aging effects** (apply subtly):
+
+| Effect | Application |
+|--------|-------------|
+| Paper aging | Background texture |
+| Faded edges | Vignette effect |
+| Dust specks | Subtle overlay |
+| Yellowing | Color shift |
+| Wear marks | Corner/edge details |
+
+## Period Elements
+
+- Historical typography
+- Period-accurate details
+- Archival presentation
+- Classical compositions
+- Formal framing
+
+## Lighting
+
+- Natural, period-appropriate
+- Oil lamp/candle warmth
+- Soft, diffused light
+- Indoor historical lighting
+- Photographic quality
+
+## Emotional Range
+
+| Emotion | Expression |
+|---------|-----------|
+| Dignity | Formal, composed |
+| Sorrow | Restrained, elegant |
+| Pride | Classical posture |
+| Wisdom | Aged grace |
+
+## Composition
+
+- Classical framing
+- Formal compositions
+- Period-appropriate staging
+- Documentary style
+- Historical accuracy priority
+
+## Best For
+
+- Pre-1950s stories
+- Classical science history
+- Historical biographies
+- Period pieces
+- Documentary comics
+- Archival narratives
+
+## Combination Notes
+
+Works especially well with:
+- realistic: period drama
+- ligne-claire: historical adventure
+- ink-brush: classical Asian stories
+
+Avoid with:
+- manga: style mismatch (too modern)
+- chalk: style mismatch (modern educational)
diff --git a/skills/creative/baoyu-comic/references/tones/warm.md b/skills/creative/baoyu-comic/references/tones/warm.md
new file mode 100644
index 0000000000..11b24aeefc
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/tones/warm.md
@@ -0,0 +1,94 @@
+# warm
+
+温馨基调 - Nostalgic, personal, comforting
+
+## Overview
+
+Warm, inviting atmosphere for personal stories and nostalgic content. Creates emotional connection through cozy aesthetics and comforting visuals.
+
+## Mood Characteristics
+
+- Nostalgic feeling
+- Personal, intimate atmosphere
+- Comforting and healing
+- Memory and reflection
+- Gentle emotional warmth
+
+## Color Modifiers
+
+When applied to any art style:
+
+| Adjustment | Direction |
+|------------|-----------|
+| Saturation | Slightly reduced |
+| Contrast | Softer |
+| Temperature | Warm shift (+15%) |
+| Brightness | Soft, golden |
+
+## Color Temperature
+
+Shift palette toward warm tones:
+
+| Original | Warm Shift |
+|----------|-----------|
+| Cool blue | Soft teal |
+| Pure white | Cream |
+| Gray | Warm gray |
+| Black | Soft charcoal |
+
+## Accent Colors
+
+- Golden yellow (#D69E2E)
+- Soft orange (#DD6B20)
+- Warm brown (#8B6F47)
+- Sunset tones
+
+## Lighting
+
+- Golden hour lighting
+- Soft, diffused light
+- Warm indoor glow
+- Candle/lamp warmth
+- Gentle shadows
+
+## Emotional Range
+
+| Emotion | Expression |
+|---------|-----------|
+| Joy | Genuine warm smile |
+| Sadness | Gentle melancholy |
+| Love | Soft, tender expressions |
+| Memory | Distant, reflective gaze |
+
+## Composition
+
+- Intimate framing
+- Cozy environments
+- Soft focus backgrounds
+- Welcoming spaces
+- Personal moments highlighted
+
+## Visual Elements
+
+- Warm light rays
+- Soft edges
+- Nostalgic props (old photos, keepsakes)
+- Comfort objects (blankets, tea cups)
+- Nature elements (autumn leaves, sunset)
+
+## Best For
+
+- Personal stories
+- Childhood memories
+- Mentorship narratives
+- Family histories
+- Gentle biographies
+- Healing journeys
+
+## Combination Notes
+
+Works especially well with:
+- ligne-claire: nostalgic European comics
+- realistic: touching human stories
+- manga: slice-of-life warmth
+- chalk: nostalgic education
diff --git a/skills/creative/baoyu-comic/references/workflow.md b/skills/creative/baoyu-comic/references/workflow.md
new file mode 100644
index 0000000000..f98109374a
--- /dev/null
+++ b/skills/creative/baoyu-comic/references/workflow.md
@@ -0,0 +1,401 @@
+# Complete Workflow
+
+Full workflow for generating knowledge comics.
+
+## Progress Checklist
+
+Copy and track progress:
+
+```
+Comic Progress:
+- [ ] Step 1: Setup & Analyze
+  - [ ] 1.1 Analyze content
+  - [ ] 1.2 Check existing ⚠️ REQUIRED
+- [ ] Step 2: Confirmation - Style & options ⚠️ REQUIRED
+- [ ] Step 3: Generate storyboard + characters
+- [ ] Step 4: Review outline (conditional)
+- [ ] Step 5: Generate prompts
+- [ ] Step 6: Review prompts (conditional)
+- [ ] Step 7: Generate images
+  - [ ] 7.1 Character sheet (if needed)
+  - [ ] 7.2 Generate pages
+- [ ] Step 8: Completion report
+```
+
+## Flow Diagram
+
+```
+Input → Analyze → [Check Existing?] → [Confirm: Style + Reviews] → Storyboard → [Review Outline?] → Prompts → [Review Prompts?] → Images → Complete
+```
+
+---
+
+## Step 1: Setup & Analyze
+
+### 1.1 Analyze Content → `analysis.md`
+
+Read source content, save it if needed, and perform deep analysis.
+
+**Actions**:
+1. **Save source content** (if not already a file):
+   - If user provides a file path: use as-is
+   - If user pastes content: save to `source-{slug}.md` in the target directory using `write_file`, where `{slug}` is the kebab-case topic slug used for the output directory
+   - **Backup rule**: If `source-{slug}.md` already exists, rename it to `source-{slug}-backup-YYYYMMDD-HHMMSS.md` before writing
+2. Read source content
+3. **Deep analysis** following `analysis-framework.md`:
+   - Target audience identification
+   - Value proposition for readers
+   - Core themes and narrative potential
+   - Key figures and their story arcs
+4. Detect source language
+5. **Determine language**:
+   - If user specified a language → use it
+   - Else → use detected source language or user's conversation language
+6. Determine recommended page count:
+   - Short story: 5-8 pages
+   - Medium complexity: 9-15 pages
+   - Full biography: 16-25 pages
+7. Analyze content signals for art/tone/layout recommendations
+8. **Save to `analysis.md`** using `write_file`
+
+**analysis.md Format**: YAML front matter (title, topic, time_span, source_language, user_language, aspect_ratio, recommended_page_count, recommended_art, recommended_tone) + sections for Target Audience, Value Proposition, Core Themes, Key Figures & Story Arcs, Content Signals, Recommended Approaches. See `analysis-framework.md` for full template.
+
+### 1.2 Check Existing Content ⚠️ REQUIRED
+
+**MUST execute before proceeding to Step 2.**
+
+Check if the output directory exists (e.g., via `test -d "comic/{topic-slug}"`).
+
+**If directory exists**, use `clarify`:
+
+```
+question: "Existing content found at comic/{topic-slug}. How to proceed?"
+options:
+  - "Regenerate storyboard — Keep images, regenerate storyboard and characters only"
+  - "Regenerate images — Keep storyboard, regenerate images only"
+  - "Backup and regenerate — Backup to {slug}-backup-{timestamp}, then regenerate all"
+  - "Exit — Cancel, keep existing content unchanged"
+```
+
+Save result and handle accordingly:
+- **Regenerate storyboard**: Skip to Step 3, preserve `prompts/` and images
+- **Regenerate images**: Skip to Step 7, use existing prompts
+- **Backup and regenerate**: Move directory, start fresh from Step 2
+- **Exit**: End workflow immediately
+
+---
+
+## Step 2: Confirmation - Style & Options ⚠️
+
+**Purpose**: Select visual style + decide whether to review outline before generation. **Do NOT skip.**
+
+**Display summary first**:
+- Content type + topic identified
+- Key figures extracted
+- Time span detected
+- Recommended page count
+- Language (detected or user-specified)
+- **Recommended style**: [art] + [tone] (based on content signals)
+
+**Use `clarify` one question at a time**, in priority order:
+
+> **Timeout handling (CRITICAL)**: if `clarify` returns `"The user did not provide a response within the time limit. Use your best judgement..."`, that is a per-question default, NOT blanket consent. Continue to the next question in the sequence — do not bail out of Step 2. Then, in your next user-visible message, explicitly surface every default that was taken (e.g. `"Defaulted style → ohmsha, narrative focus → concept explanation, audience → developers (clarify timed out on all three). Say the word to redirect."`). An unreported default is indistinguishable to the user from "the agent never asked."
+
+### Question 1: Visual Style
+
+If a preset is recommended (see `auto-selection.md`), show it first:
+
+```
+question: "Which visual style for this comic?"
+options:
+  - "[preset name] preset (Recommended) — [preset description] with special rules"
+  - "[recommended art] + [recommended tone] (Recommended) — Best match for your content"
+  - "ligne-claire + neutral — Classic educational, Logicomix style"
+  - "ohmsha preset — Educational manga with visual metaphors, gadgets, NO talking heads"
+  - "Custom — Specify your own art + tone or preset"
+```
+
+**Preset vs Art+Tone**: Presets include special rules beyond art+tone. `ohmsha` = manga + neutral + visual metaphor rules + character roles + NO talking heads. Plain `manga + neutral` does NOT include these rules.
+
+### Question 2: Narrative Focus
+
+```
+question: "What should the comic emphasize? (Pick the primary focus; mention others in a follow-up if needed)"
+options:
+  - "Biography/life story — Follow a person's journey through key life events"
+  - "Concept explanation — Break down complex ideas visually"
+  - "Historical event — Dramatize important historical moments"
+  - "Tutorial/how-to — Step-by-step educational guide"
+```
+
+### Question 3: Target Audience
+
+```
+question: "Who is the primary reader?"
+options:
+  - "General readers — Broad appeal, accessible content"
+  - "Students/learners — Educational focus, clear explanations"
+  - "Industry professionals — Technical depth, domain knowledge"
+  - "Children/young readers — Simplified language, engaging visuals"
+```
+
+### Question 4: Outline Review
+
+```
+question: "Do you want to review the outline before image generation?"
+options:
+  - "Yes, let me review (Recommended) — Review storyboard and characters before generating images"
+  - "No, generate directly — Skip outline review, start generating immediately"
+```
+
+### Question 5: Prompt Review
+
+```
+question: "Review prompts before generating images?"
+options:
+  - "Yes, review prompts (Recommended) — Review image generation prompts before generating"
+  - "No, skip prompt review — Proceed directly to image generation"
+```
+
+**After responses**:
+1. Update `analysis.md` with user preferences
+2. **Store `skip_outline_review`** flag based on Question 4 response
+3. **Store `skip_prompt_review`** flag based on Question 5 response
+4. → Step 3
+
+---
+
+## Step 3: Generate Storyboard + Characters
+
+Create storyboard and character definitions using the confirmed style from Step 2.
+
+**Loading Style References**:
+- Art style: `art-styles/{art}.md`
+- Tone: `tones/{tone}.md`
+- If preset (ohmsha/wuxia/shoujo/concept-story/four-panel): also load `presets/{preset}.md`
+
+**Generate**:
+
+1. **Storyboard** (`storyboard.md`):
+   - YAML front matter with art_style, tone, layout, aspect_ratio
+   - Cover design
+   - Each page: layout, panel breakdown, visual prompts
+   - **Written in user's preferred language** (from Step 1)
+   - Reference: `storyboard-template.md`
+   - **If using preset**: Load and apply preset rules from `presets/`
+
+2. **Character definitions** (`characters/characters.md`):
+   - Visual specs matching the art style (in user's preferred language)
+   - Include Reference Sheet Prompt for later image generation
+   - Reference: `character-template.md`
+   - **If using ohmsha preset**: Use default Doraemon characters (see below)
+
+**Ohmsha Default Characters** (use these unless user specifies custom characters):
+
+| Role | Character | Visual Description |
+|------|-----------|-------------------|
+| Student | 大雄 (Nobita) | Japanese boy, 10yo, round glasses, black hair parted in middle, yellow shirt, navy shorts |
+| Mentor | 哆啦 A 梦 (Doraemon) | Round blue robot cat, big white eyes, red nose, whiskers, white belly with 4D pocket, golden bell, no ears |
+| Challenge | 胖虎 (Gian) | Stocky boy, rough features, small eyes, orange shirt |
+| Support | 静香 (Shizuka) | Cute girl, black short hair, pink dress, gentle expression |
+
+These are the canonical ohmsha-style characters. Do NOT create custom characters for ohmsha unless explicitly requested.
+
+**After generation**:
+- If `skip_outline_review` is true → Skip Step 4, go directly to Step 5
+- If `skip_outline_review` is false → Continue to Step 4
+
+---
+
+## Step 4: Review Outline (Conditional)
+
+**Skip this step** if user selected "No, generate directly" in Step 2.
+
+**Purpose**: User reviews and confirms storyboard + characters before generation.
+
+**Display**:
+- Page count and structure
+- Art style + Tone combination
+- Page-by-page summary (Cover → P1 → P2...)
+- Character list with brief descriptions
+
+**Use `clarify`**:
+
+```
+question: "Ready to generate images with this outline?"
+options:
+  - "Yes, proceed (Recommended) — Generate character sheet and comic pages"
+  - "Edit storyboard first — I'll modify storyboard.md before continuing"
+  - "Edit characters first — I'll modify characters/characters.md before continuing"
+  - "Edit both — I'll modify both files before continuing"
+```
+
+**After response**:
+1. If user wants to edit → Wait for user to finish editing, then ask again
+2. If user confirms → Continue to Step 5
+
+---
+
+## Step 5: Generate Prompts
+
+Create image generation prompts for all pages.
+
+**Style Reference Loading**:
+- Read `art-styles/{art}.md` for rendering guidelines
+- Read `tones/{tone}.md` for mood/color adjustments
+- If preset: Read `presets/{preset}.md` for special rules
+
+**For each page (cover + pages)**:
+1. Create prompt following art style + tone guidelines
+2. **Embed character descriptions** inline (copy relevant traits from `characters/characters.md`) — `image_generate` is prompt-only, so the prompt text is the sole vehicle for character consistency
+3. Save to `prompts/NN-{cover|page}-[slug].md` using `write_file`
+   - **Backup rule**: If prompt file exists, rename to `prompts/NN-{cover|page}-[slug]-backup-YYYYMMDD-HHMMSS.md`
+
+**Prompt File Format**:
+```markdown
+# Page NN: [Title]
+
+## Visual Style
+Art: [art style] | Tone: [tone] | Layout: [layout type]
+
+## Character Reference (embedded inline — maintain exact traits below)
+- [Character A]: [detailed visual traits from characters/characters.md]
+- [Character B]: [detailed visual traits from characters/characters.md]
+
+## Panel Breakdown
+[From storyboard.md - panel descriptions, actions, dialogue]
+
+## Generation Prompt
+[Combined prompt passed to image_generate]
+```
+
+**After generation**:
+- If `skip_prompt_review` is true → Skip Step 6, go directly to Step 7
+- If `skip_prompt_review` is false → Continue to Step 6
+
+---
+
+## Step 6: Review Prompts (Conditional)
+
+**Skip this step** if user selected "No, skip prompt review" in Step 2.
+
+**Purpose**: User reviews and confirms prompts before image generation.
+
+**Display prompt summary table**:
+
+| Page | Title | Key Elements |
+|------|-------|--------------|
+| Cover | [title] | [main visual] |
+| P1 | [title] | [key elements] |
+| ... | ... | ... |
+
+**Use `clarify`**:
+
+```
+question: "Ready to generate images with these prompts?"
+options:
+  - "Yes, proceed (Recommended) — Generate all comic page images"
+  - "Edit prompts first — I'll modify prompts/*.md before continuing"
+  - "Regenerate prompts — Regenerate all prompts with different approach"
+```
+
+**After response**:
+1. If user wants to edit → Wait for user to finish editing, then ask again
+2. If user wants to regenerate → Go back to Step 5
+3. If user confirms → Continue to Step 7
+
+---
+
+## Step 7: Generate Images
+
+With confirmed prompts from Step 5/6, use the `image_generate` tool. The tool accepts only `prompt` and `aspect_ratio` (`landscape` | `portrait` | `square`) and **returns a URL** — it does not accept reference images and does not write local files. Every invocation must be followed by a download step.
+
+**Aspect ratio mapping** — map the storyboard's `aspect_ratio` to the tool's enum:
+
+| Storyboard ratio | `image_generate` format |
+|------------------|-------------------------|
+| `3:4`, `9:16`, `2:3` | `portrait` |
+| `4:3`, `16:9`, `3:2` | `landscape` |
+| `1:1` | `square` |
+
+**Download procedure** (run after every successful `image_generate` call):
+
+1. Extract the `url` field from the tool result
+2. Fetch it to disk, e.g. `curl -fsSL "<url>" -o comic/{slug}/<target>.png`
+3. Verify the file is non-empty (`test -s <target>.png`); on failure, retry the generation once
+
+### 7.1 Generate Character Reference Sheet (conditional)
+
+Character sheet is recommended for multi-page comics with recurring characters, but **NOT required** for all presets.
+
+**When to generate**:
+
+| Condition | Action |
+|-----------|--------|
+| Multi-page comic with detailed/recurring characters | Generate character sheet (recommended) |
+| Preset with simplified characters (e.g., four-panel minimalist) | Skip — prompt descriptions are sufficient |
+| Single-page comic | Skip unless characters are complex |
+
+**When generating**:
+1. Use Reference Sheet Prompt from `characters/characters.md`
+2. **Backup rule**: If `characters/characters.png` exists, rename to `characters/characters-backup-YYYYMMDD-HHMMSS.png`
+3. Call `image_generate` with `landscape` format
+4. Download the returned URL → save to `characters/characters.png`
+
+**Important**: the downloaded sheet is a **human-facing review artifact** (so the user can visually verify character design) and a reference for later regenerations or manual prompt edits. It does **not** drive Step 7.2 — page prompts were already written in Step 5 from the text descriptions in `characters/characters.md`. `image_generate` cannot accept images as visual input, so the text is the sole cross-page consistency mechanism.
+
+### 7.2 Generate Comic Pages
+
+**Before generating any page**:
+1. Confirm each prompt file exists at `prompts/NN-{cover|page}-[slug].md`
+2. Confirm that each prompt has character descriptions embedded inline (see Step 5). `image_generate` is prompt-only, so the prompt text is the sole consistency mechanism.
+
+**Page Generation Strategy**: every page prompt must embed character descriptions (sourced from `characters/characters.md`) inline. This is done during Step 5, uniformly whether or not the PNG sheet was produced in 7.1 — the PNG is only a review/regeneration aid, never a generation input.
+
+**Example embedded prompt** (`prompts/01-page-xxx.md`):
+
+```markdown
+# Page 01: [Title]
+
+## Character Reference (embedded inline — maintain consistency)
+- 大雄：Japanese boy, round glasses, yellow shirt, navy shorts, worried expression...
+- 哆啦 A 梦：Round blue robot cat, white belly, red nose, golden bell, 4D pocket...
+
+## Page Content
+[Original page prompt body — panels, dialogue, visual metaphors]
+```
+
+**For each page (cover + pages)**:
+1. Read prompt from `prompts/NN-{cover|page}-[slug].md`
+2. **Backup rule**: If image file exists, rename to `NN-{cover|page}-[slug]-backup-YYYYMMDD-HHMMSS.png`
+3. Call `image_generate` with the prompt text and mapped aspect ratio
+4. Download the returned URL → save to `NN-{cover|page}-[slug].png`
+5. Report progress after each generation: "Generated X/N: [page title]"
+
+---
+
+## Step 8: Completion Report
+
+```
+Comic Complete!
+Title: [title] | Art: [art] | Tone: [tone] | Pages: [count] | Aspect: [ratio] | Language: [lang]
+Location: [path]
+✓ source-{slug}.md (if content was pasted)
+✓ analysis.md
+✓ characters.png (if generated)
+✓ 00-cover-[slug].png ... NN-page-[slug].png
+```
+
+---
+
+## Page Modification
+
+| Action | Steps |
+|--------|-------|
+| **Edit** | Update prompt → Regenerate image → Download new PNG |
+| **Add** | Create prompt at position → Generate image → Download PNG → Renumber subsequent (NN+1) → Update storyboard |
+| **Delete** | Remove files → Renumber subsequent (NN-1) → Update storyboard |
+
+**File naming**: `NN-{cover|page}-[slug].png` (e.g., `03-page-enigma-machine.png`)
+- Slugs: kebab-case, unique, derived from content
+- Renumbering: Update NN prefix only, slugs unchanged
diff --git a/skills/creative/pixel-art/ATTRIBUTION.md b/skills/creative/pixel-art/ATTRIBUTION.md
new file mode 100644
index 0000000000..20bb126b62
--- /dev/null
+++ b/skills/creative/pixel-art/ATTRIBUTION.md
@@ -0,0 +1,54 @@
+# Attribution
+
+This skill bundles code ported from a third-party MIT-licensed project.
+All reuse is credited here.
+
+## pixel-art-studio (Synero)
+
+- Source: https://github.com/Synero/pixel-art-studio
+- License: MIT
+- Copyright: © Synero, MIT-licensed contributors
+
+### What was ported
+
+**`scripts/palettes.py`** — the `PALETTES` dict containing 23 named RGB
+palettes (hardware and artistic). Values are reproduced verbatim from
+`scripts/pixelart.py` of pixel-art-studio.
+
+**`scripts/pixel_art_video.py`** — the 12 procedural animation init/draw pairs
+(`stars`, `fireflies`, `leaves`, `dust_motes`, `sparkles`, `rain`,
+`lightning`, `bubbles`, `embers`, `snowflakes`, `neon_pulse`, `heat_shimmer`)
+and the `SCENES` → layer mapping. Ported from `scripts/pixelart_video.py`
+with minor refactors:
+- Names prefixed with `_` for private helpers (`_px`, `_pixel_cross`)
+- `SCENE_ANIMATIONS` renamed to `SCENES` and restructured to hold layer
+  names (strings) instead of function-name strings resolved via `globals()`
+- `generate_video()` split: the Pollinations text-to-image call was removed
+  (Hermes uses its own `image_generate` + `pixel_art()` pipeline for base
+  frames). Only the overlay + ffmpeg encoding remains.
+- Frame directory is now a `tempfile.TemporaryDirectory` instead of
+  hand-managed cleanup.
+- `ffmpeg` invocation switched from `os.system` to `subprocess.run(check=True)`
+  for safety.
+
+### What was NOT ported
+
+- Wu's Color Quantization (PIL's built-in `quantize` suffices)
+- Sobel edge-aware downsampling (requires scipy; not worth the dep)
+- Bayer / Atkinson dither (would need numpy reimplementation; kept scope tight)
+- Pollinations text-to-image generation (`pixelart_image.py`,
+  `generate_base()` in `pixelart_video.py`) — Hermes has `image_generate`
+
+### License compatibility
+
+pixel-art-studio ships under the MIT License, which permits redistribution
+with attribution. This skill preserves the original copyright notice here
+and in the SKILL.md credits block. No code was relicensed.
+
+---
+
+## pixel-art skill itself
+
+- License: MIT (inherits from hermes-agent repo)
+- Original author of the skill shell: dodo-reach
+- Expansion with palettes + video: Hermes Agent contributors
diff --git a/skills/creative/pixel-art/SKILL.md b/skills/creative/pixel-art/SKILL.md
new file mode 100644
index 0000000000..e123fc6327
--- /dev/null
+++ b/skills/creative/pixel-art/SKILL.md
@@ -0,0 +1,217 @@
+---
+name: pixel-art
+description: Convert images into retro pixel art with hardware-accurate palettes (NES, Game Boy, PICO-8, C64, etc.), and animate them into short videos. Presets cover arcade, SNES, and 10+ era-correct looks. Use `clarify` to let the user pick a style before generating.
+version: 2.0.0
+author: dodo-reach
+license: MIT
+metadata:
+  hermes:
+    tags: [creative, pixel-art, arcade, snes, nes, gameboy, retro, image, video]
+    category: creative
+    credits:
+      - "Hardware palettes and animation loops ported from Synero/pixel-art-studio (MIT) — https://github.com/Synero/pixel-art-studio"
+---
+
+# Pixel Art
+
+Convert any image into retro pixel art, then optionally animate it into a short
+MP4 or GIF with era-appropriate effects (rain, fireflies, snow, embers).
+
+Two scripts ship with this skill:
+
+- `scripts/pixel_art.py` — photo → pixel-art PNG (Floyd-Steinberg dithering)
+- `scripts/pixel_art_video.py` — pixel-art PNG → animated MP4 (+ optional GIF)
+
+Each is importable or runnable directly. Presets snap to hardware palettes
+when you want era-accurate colors (NES, Game Boy, PICO-8, etc.), or use
+adaptive N-color quantization for arcade/SNES-style looks.
+
+## When to Use
+
+- User wants retro pixel art from a source image
+- User asks for NES / Game Boy / PICO-8 / C64 / arcade / SNES styling
+- User wants a short looping animation (rain scene, night sky, snow, etc.)
+- Posters, album covers, social posts, sprites, characters, avatars
+
+## Workflow
+
+Before generating, confirm the style with the user. Different presets produce
+very different outputs and regenerating is costly.
+
+### Step 1 — Offer a style
+
+Call `clarify` with 4 representative presets. Pick the set based on what the
+user asked for — don't just dump all 14.
+
+Default menu when the user's intent is unclear:
+
+```python
+clarify(
+    question="Which pixel-art style do you want?",
+    choices=[
+        "arcade — bold, chunky 80s cabinet feel (16 colors, 8px)",
+        "nes — Nintendo 8-bit hardware palette (54 colors, 8px)",
+        "gameboy — 4-shade green Game Boy DMG",
+        "snes — cleaner 16-bit look (32 colors, 4px)",
+    ],
+)
+```
+
+When the user already named an era (e.g. "80s arcade", "Gameboy"), skip
+`clarify` and use the matching preset directly.
+
+### Step 2 — Offer animation (optional)
+
+If the user asked for a video/GIF, or the output might benefit from motion,
+ask which scene:
+
+```python
+clarify(
+    question="Want to animate it? Pick a scene or skip.",
+    choices=[
+        "night — stars + fireflies + leaves",
+        "urban — rain + neon pulse",
+        "snow — falling snowflakes",
+        "skip — just the image",
+    ],
+)
+```
+
+Do NOT call `clarify` more than twice in a row. One for style, one for scene if
+animation is on the table. If the user explicitly asked for a specific style
+and scene in their message, skip `clarify` entirely.
+
+### Step 3 — Generate
+
+Run `pixel_art()` first; if animation was requested, chain into
+`pixel_art_video()` on the result.
+
+## Preset Catalog
+
+| Preset | Era | Palette | Block | Best for |
+|--------|-----|---------|-------|----------|
+| `arcade` | 80s arcade | adaptive 16 | 8px | Bold posters, hero art |
+| `snes` | 16-bit | adaptive 32 | 4px | Characters, detailed scenes |
+| `nes` | 8-bit | NES (54) | 8px | True NES look |
+| `gameboy` | DMG handheld | 4 green shades | 8px | Monochrome Game Boy |
+| `gameboy_pocket` | Pocket handheld | 4 grey shades | 8px | Mono GB Pocket |
+| `pico8` | PICO-8 | 16 fixed | 6px | Fantasy-console look |
+| `c64` | Commodore 64 | 16 fixed | 8px | 8-bit home computer |
+| `apple2` | Apple II hi-res | 6 fixed | 10px | Extreme retro, 6 colors |
+| `teletext` | BBC Teletext | 8 pure | 10px | Chunky primary colors |
+| `mspaint` | Windows MS Paint | 24 fixed | 8px | Nostalgic desktop |
+| `mono_green` | CRT phosphor | 2 green | 6px | Terminal/CRT aesthetic |
+| `mono_amber` | CRT amber | 2 amber | 6px | Amber monitor look |
+| `neon` | Cyberpunk | 10 neons | 6px | Vaporwave/cyber |
+| `pastel` | Soft pastel | 10 pastels | 6px | Kawaii / gentle |
+
+Named palettes live in `scripts/palettes.py` (see `references/palettes.md` for
+the complete list — 28 named palettes total). Any preset can be overridden:
+
+```python
+pixel_art("in.png", "out.png", preset="snes", palette="PICO_8", block=6)
+```
+
+## Scene Catalog (for video)
+
+| Scene | Effects |
+|-------|---------|
+| `night` | Twinkling stars + fireflies + drifting leaves |
+| `dusk` | Fireflies + sparkles |
+| `tavern` | Dust motes + warm sparkles |
+| `indoor` | Dust motes |
+| `urban` | Rain + neon pulse |
+| `nature` | Leaves + fireflies |
+| `magic` | Sparkles + fireflies |
+| `storm` | Rain + lightning |
+| `underwater` | Bubbles + light sparkles |
+| `fire` | Embers + sparkles |
+| `snow` | Snowflakes + sparkles |
+| `desert` | Heat shimmer + dust |
+
+## Invocation Patterns
+
+### Python (import)
+
+```python
+import sys
+sys.path.insert(0, "/home/teknium/.hermes/skills/creative/pixel-art/scripts")
+from pixel_art import pixel_art
+from pixel_art_video import pixel_art_video
+
+# 1. Convert to pixel art
+pixel_art("/path/to/photo.jpg", "/tmp/pixel.png", preset="nes")
+
+# 2. Animate (optional)
+pixel_art_video(
+    "/tmp/pixel.png",
+    "/tmp/pixel.mp4",
+    scene="night",
+    duration=6,
+    fps=15,
+    seed=42,
+    export_gif=True,
+)
+```
+
+### CLI
+
+```bash
+cd /home/teknium/.hermes/skills/creative/pixel-art/scripts
+
+python pixel_art.py in.jpg out.png --preset gameboy
+python pixel_art.py in.jpg out.png --preset snes --palette PICO_8 --block 6
+
+python pixel_art_video.py out.png out.mp4 --scene night --duration 6 --gif
+```
+
+## Pipeline Rationale
+
+**Pixel conversion:**
+1. Boost contrast/color/sharpness (stronger for smaller palettes)
+2. Posterize to simplify tonal regions before quantization
+3. Downscale by `block` with `Image.NEAREST` (hard pixels, no interpolation)
+4. Quantize with Floyd-Steinberg dithering — against either an adaptive
+   N-color palette OR a named hardware palette
+5. Upscale back with `Image.NEAREST`
+
+Quantizing AFTER downscale keeps dithering aligned with the final pixel grid.
+Quantizing before would waste error-diffusion on detail that disappears.
+
+**Video overlay:**
+- Copies the base frame each tick (static background)
+- Overlays stateless-per-frame particle draws (one function per effect)
+- Encodes via ffmpeg `libx264 -pix_fmt yuv420p -crf 18`
+- Optional GIF via `palettegen` + `paletteuse`
+
+## Dependencies
+
+- Python 3.9+
+- Pillow (`pip install Pillow`)
+- ffmpeg on PATH (only needed for video — Hermes installs package this)
+
+## Pitfalls
+
+- Pallet keys are case-sensitive (`"NES"`, `"PICO_8"`, `"GAMEBOY_ORIGINAL"`).
+- Very small sources (<100px wide) collapse under 8-10px blocks. Upscale the
+  source first if it's tiny.
+- Fractional `block` or `palette` will break quantization — keep them positive ints.
+- Animation particle counts are tuned for ~640x480 canvases. On very large
+  images you may want a second pass with a different seed for density.
+- `mono_green` / `mono_amber` force `color=0.0` (desaturate). If you override
+  and keep chroma, the 2-color palette can produce stripes on smooth regions.
+- `clarify` loop: call it at most twice per turn (style, then scene). Don't
+  pepper the user with more picks.
+
+## Verification
+
+- PNG is created at the output path
+- Clear square pixel blocks visible at the preset's block size
+- Color count matches preset (eyeball the image or run `Image.open(p).getcolors()`)
+- Video is a valid MP4 (`ffprobe` can open it) with non-zero size
+
+## Attribution
+
+Named hardware palettes and the procedural animation loops in `pixel_art_video.py`
+are ported from [pixel-art-studio](https://github.com/Synero/pixel-art-studio)
+(MIT). See `ATTRIBUTION.md` in this skill directory for details.
diff --git a/skills/creative/pixel-art/references/palettes.md b/skills/creative/pixel-art/references/palettes.md
new file mode 100644
index 0000000000..6902ecb749
--- /dev/null
+++ b/skills/creative/pixel-art/references/palettes.md
@@ -0,0 +1,49 @@
+# Named Palettes
+
+28 hardware-accurate and artistic palettes available to `pixel_art()`.
+Palette values are sourced from `pixel-art-studio` (MIT) — see ATTRIBUTION.md in the skill root.
+
+Usage: pass the palette name as `palette=` or let a preset select it.
+
+```python
+pixel_art("in.png", "out.png", preset="nes")           # preset selects NES
+pixel_art("in.png", "out.png", preset="custom", palette="PICO_8", block=6)
+```
+
+## Hardware Palettes
+
+| Name | Colors | Source |
+|------|--------|--------|
+| `NES` | 54 | Nintendo NES |
+| `C64` | 16 | Commodore 64 |
+| `COMMODORE_64` | 16 | Commodore 64 (alt) |
+| `ZX_SPECTRUM` | 8 | Sinclair ZX Spectrum |
+| `APPLE_II_LO` | 16 | Apple II lo-res |
+| `APPLE_II_HI` | 6 | Apple II hi-res |
+| `GAMEBOY_ORIGINAL` | 4 | Game Boy DMG (green) |
+| `GAMEBOY_POCKET` | 4 | Game Boy Pocket (grey) |
+| `GAMEBOY_VIRTUALBOY` | 4 | Virtual Boy (red) |
+| `PICO_8` | 16 | PICO-8 fantasy console |
+| `TELETEXT` | 8 | BBC Teletext |
+| `CGA_MODE4_PAL1` | 4 | IBM CGA |
+| `MSX` | 15 | MSX |
+| `MICROSOFT_WINDOWS_16` | 16 | Windows 3.x default |
+| `MICROSOFT_WINDOWS_PAINT` | 24 | MS Paint classic |
+| `MONO_BW` | 2 | Black and white |
+| `MONO_AMBER` | 2 | Amber monochrome |
+| `MONO_GREEN` | 2 | Green monochrome |
+
+## Artistic Palettes
+
+| Name | Colors | Feel |
+|------|--------|------|
+| `PASTEL_DREAM` | 10 | Soft pastels |
+| `NEON_CYBER` | 10 | Cyberpunk neon |
+| `RETRO_WARM` | 10 | Warm 70s |
+| `OCEAN_DEEP` | 10 | Blue gradient |
+| `FOREST_MOSS` | 10 | Green naturals |
+| `SUNSET_FIRE` | 10 | Red to yellow |
+| `ARCTIC_ICE` | 10 | Cool blues and whites |
+| `VINTAGE_ROSE` | 10 | Rose mauves |
+| `EARTH_CLAY` | 10 | Terracotta browns |
+| `ELECTRIC_VIOLET` | 10 | Violet gradient |
diff --git a/skills/creative/pixel-art/scripts/__init__.py b/skills/creative/pixel-art/scripts/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/skills/creative/pixel-art/scripts/palettes.py b/skills/creative/pixel-art/scripts/palettes.py
new file mode 100644
index 0000000000..adf0f1b34d
--- /dev/null
+++ b/skills/creative/pixel-art/scripts/palettes.py
@@ -0,0 +1,167 @@
+"""Named RGB palettes for pixel_art() and pixel_art_video().
+
+Palette RGB values sourced from pixel-art-studio (MIT License)
+https://github.com/Synero/pixel-art-studio — see ATTRIBUTION.md.
+"""
+
+PALETTES = {
+    # ── Hardware palettes ───────────────────────────────────────────────
+    "NES": [
+        (0, 0, 0), (124, 124, 124), (0, 0, 252), (0, 0, 188), (68, 40, 188),
+        (148, 0, 132), (168, 0, 32), (168, 16, 0), (136, 20, 0), (0, 116, 0),
+        (0, 148, 0), (0, 120, 0), (0, 88, 0), (0, 64, 88), (188, 188, 188),
+        (0, 120, 248), (0, 88, 248), (104, 68, 252), (216, 0, 204), (228, 0, 88),
+        (248, 56, 0), (228, 92, 16), (172, 124, 0), (0, 184, 0), (0, 168, 0),
+        (0, 168, 68), (0, 136, 136), (248, 248, 248), (60, 188, 252),
+        (104, 136, 252), (152, 120, 248), (248, 120, 248), (248, 88, 152),
+        (248, 120, 88), (252, 160, 68), (248, 184, 0), (184, 248, 24),
+        (88, 216, 84), (88, 248, 152), (0, 232, 216), (120, 120, 120),
+        (252, 252, 252), (164, 228, 252), (184, 184, 248), (216, 184, 248),
+        (248, 184, 248), (248, 164, 192), (240, 208, 176), (252, 224, 168),
+        (248, 216, 120), (216, 248, 120), (184, 248, 184), (184, 248, 216),
+        (0, 252, 252), (216, 216, 216),
+    ],
+    "C64": [
+        (0, 0, 0), (255, 255, 255), (161, 77, 67), (106, 191, 199),
+        (161, 87, 164), (92, 172, 95), (64, 64, 223), (191, 206, 137),
+        (161, 104, 60), (108, 80, 21), (203, 126, 117), (98, 98, 98),
+        (137, 137, 137), (154, 226, 155), (124, 124, 255), (173, 173, 173),
+    ],
+    "COMMODORE_64": [
+        (0, 0, 0), (255, 255, 255), (161, 77, 67), (106, 192, 200),
+        (161, 87, 165), (92, 172, 95), (64, 68, 227), (203, 214, 137),
+        (163, 104, 58), (110, 84, 11), (204, 127, 118), (99, 99, 99),
+        (139, 139, 139), (154, 227, 157), (139, 127, 205), (175, 175, 175),
+    ],
+    "ZX_SPECTRUM": [
+        (0, 0, 0), (0, 39, 251), (252, 48, 22), (255, 63, 252),
+        (0, 249, 44), (0, 252, 254), (255, 253, 51), (255, 255, 255),
+    ],
+    "APPLE_II_LO": [
+        (0, 0, 0), (133, 59, 81), (80, 71, 137), (234, 93, 240),
+        (0, 104, 82), (146, 146, 146), (0, 168, 241), (202, 195, 248),
+        (81, 92, 15), (235, 127, 35), (146, 146, 146), (246, 185, 202),
+        (0, 202, 41), (203, 211, 155), (155, 220, 203), (255, 255, 255),
+    ],
+    "APPLE_II_HI": [
+        (0, 0, 0), (255, 0, 255), (0, 255, 0), (255, 255, 255),
+        (0, 175, 255), (255, 80, 0),
+    ],
+    "GAMEBOY_ORIGINAL": [
+        (0, 63, 0), (46, 115, 32), (140, 191, 10), (160, 207, 10),
+    ],
+    "GAMEBOY_POCKET": [
+        (0, 0, 0), (85, 85, 85), (170, 170, 170), (255, 255, 255),
+    ],
+    "GAMEBOY_VIRTUALBOY": [
+        (239, 0, 0), (164, 0, 0), (85, 0, 0), (0, 0, 0),
+    ],
+    "PICO_8": [
+        (0, 0, 0), (29, 43, 83), (126, 37, 83), (0, 135, 81), (171, 82, 54),
+        (95, 87, 79), (194, 195, 199), (255, 241, 232), (255, 0, 77),
+        (255, 163, 0), (255, 236, 39), (0, 228, 54), (41, 173, 255),
+        (131, 118, 156), (255, 119, 168), (255, 204, 170),
+    ],
+    "TELETEXT": [
+        (0, 0, 0), (255, 0, 0), (0, 128, 0), (255, 255, 0),
+        (0, 0, 255), (255, 0, 255), (0, 255, 255), (255, 255, 255),
+    ],
+    "CGA_MODE4_PAL1": [
+        (0, 0, 0), (255, 255, 255), (0, 255, 255), (255, 0, 255),
+    ],
+    "MSX": [
+        (0, 0, 0), (62, 184, 73), (116, 208, 125), (89, 85, 224),
+        (128, 118, 241), (185, 94, 81), (101, 219, 239), (219, 101, 89),
+        (255, 137, 125), (204, 195, 94), (222, 208, 135), (58, 162, 65),
+        (183, 102, 181), (204, 204, 204), (255, 255, 255),
+    ],
+    "MICROSOFT_WINDOWS_16": [
+        (0, 0, 0), (128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128),
+        (128, 0, 128), (0, 128, 128), (192, 192, 192), (128, 128, 128),
+        (255, 0, 0), (0, 255, 0), (255, 255, 0), (0, 0, 255),
+        (255, 0, 255), (0, 255, 255), (255, 255, 255),
+    ],
+    "MICROSOFT_WINDOWS_PAINT": [
+        (0, 0, 0), (255, 255, 255), (123, 123, 123), (189, 189, 189),
+        (123, 12, 2), (255, 37, 0), (123, 123, 2), (255, 251, 2),
+        (0, 123, 2), (2, 249, 2), (0, 123, 122), (2, 253, 254),
+        (2, 19, 122), (5, 50, 255), (123, 25, 122), (255, 64, 254),
+        (122, 57, 2), (255, 122, 57), (123, 123, 56), (255, 252, 122),
+        (2, 57, 57), (5, 250, 123), (0, 123, 255), (255, 44, 123),
+    ],
+    "MONO_BW": [(0, 0, 0), (255, 255, 255)],
+    "MONO_AMBER": [(40, 40, 40), (255, 176, 0)],
+    "MONO_GREEN": [(40, 40, 40), (51, 255, 51)],
+
+    # ── Artistic palettes ───────────────────────────────────────────────
+    "PASTEL_DREAM": [
+        (255, 218, 233), (255, 229, 204), (255, 255, 204), (204, 255, 229),
+        (204, 229, 255), (229, 204, 255), (255, 204, 229), (204, 255, 255),
+        (255, 245, 220), (230, 230, 250),
+    ],
+    "NEON_CYBER": [
+        (0, 0, 0), (255, 0, 128), (0, 255, 255), (255, 0, 255),
+        (0, 255, 128), (255, 255, 0), (128, 0, 255), (255, 128, 0),
+        (0, 128, 255), (255, 255, 255),
+    ],
+    "RETRO_WARM": [
+        (62, 39, 35), (139, 69, 19), (210, 105, 30), (244, 164, 96),
+        (255, 218, 185), (255, 245, 238), (178, 34, 34), (205, 92, 92),
+        (255, 99, 71), (255, 160, 122),
+    ],
+    "OCEAN_DEEP": [
+        (0, 25, 51), (0, 51, 102), (0, 76, 153), (0, 102, 178),
+        (0, 128, 204), (51, 153, 204), (102, 178, 204), (153, 204, 229),
+        (204, 229, 255), (229, 245, 255),
+    ],
+    "FOREST_MOSS": [
+        (34, 51, 34), (51, 76, 51), (68, 102, 51), (85, 128, 68),
+        (102, 153, 85), (136, 170, 102), (170, 196, 136), (204, 221, 170),
+        (238, 238, 204), (245, 245, 220),
+    ],
+    "SUNSET_FIRE": [
+        (51, 0, 0), (102, 0, 0), (153, 0, 0), (204, 0, 0), (255, 0, 0),
+        (255, 51, 0), (255, 102, 0), (255, 153, 0), (255, 204, 0),
+        (255, 255, 51),
+    ],
+    "ARCTIC_ICE": [
+        (0, 0, 51), (0, 0, 102), (0, 51, 153), (0, 102, 153),
+        (51, 153, 204), (102, 204, 255), (153, 229, 255), (204, 242, 255),
+        (229, 247, 255), (255, 255, 255),
+    ],
+    "VINTAGE_ROSE": [
+        (103, 58, 63), (137, 72, 81), (170, 91, 102), (196, 113, 122),
+        (219, 139, 147), (232, 168, 175), (240, 196, 199), (245, 215, 217),
+        (249, 232, 233), (255, 245, 245),
+    ],
+    "EARTH_CLAY": [
+        (62, 39, 35), (89, 56, 47), (116, 73, 59), (143, 90, 71),
+        (170, 107, 83), (197, 124, 95), (210, 155, 126), (222, 186, 160),
+        (235, 217, 196), (248, 248, 232),
+    ],
+    "ELECTRIC_VIOLET": [
+        (26, 0, 51), (51, 0, 102), (76, 0, 153), (102, 0, 204),
+        (128, 0, 255), (153, 51, 255), (178, 102, 255), (204, 153, 255),
+        (229, 204, 255), (245, 229, 255),
+    ],
+}
+
+
+def build_palette_image(palette_name):
+    """Build a 1x1 PIL 'P'-mode image with the named palette for Image.quantize(palette=...)."""
+    from PIL import Image
+
+    if palette_name not in PALETTES:
+        raise ValueError(
+            f"Unknown palette {palette_name!r}. "
+            f"Choose from: {sorted(PALETTES)}"
+        )
+    flat = []
+    for (r, g, b) in PALETTES[palette_name]:
+        flat.extend([r, g, b])
+    # Pad to 768 bytes (256 colors) as PIL requires
+    while len(flat) < 768:
+        flat.append(0)
+    pal_img = Image.new("P", (1, 1))
+    pal_img.putpalette(flat)
+    return pal_img
diff --git a/skills/creative/pixel-art/scripts/pixel_art.py b/skills/creative/pixel-art/scripts/pixel_art.py
new file mode 100644
index 0000000000..67987e4183
--- /dev/null
+++ b/skills/creative/pixel-art/scripts/pixel_art.py
@@ -0,0 +1,162 @@
+"""Pixel art converter — Floyd-Steinberg dithering with preset or named palette.
+
+Named hardware palettes (NES, GameBoy, PICO-8, C64, etc.) ported from
+pixel-art-studio (MIT) — see ATTRIBUTION.md.
+
+Usage (import):
+    from pixel_art import pixel_art
+    pixel_art("in.png", "out.png", preset="arcade")
+    pixel_art("in.png", "out.png", preset="nes")
+    pixel_art("in.png", "out.png", palette="PICO_8", block=6)
+
+Usage (CLI):
+    python pixel_art.py in.png out.png --preset nes
+"""
+
+from PIL import Image, ImageEnhance, ImageOps
+
+try:
+    from .palettes import PALETTES, build_palette_image
+except ImportError:
+    from palettes import PALETTES, build_palette_image
+
+
+PRESETS = {
+    # ── Original presets (adaptive palette) ─────────────────────────────
+    "arcade": {
+        "contrast": 1.8, "color": 1.5, "sharpness": 1.2,
+        "posterize_bits": 5, "block": 8, "palette": 16,
+    },
+    "snes": {
+        "contrast": 1.6, "color": 1.4, "sharpness": 1.2,
+        "posterize_bits": 6, "block": 4, "palette": 32,
+    },
+    # ── Hardware-accurate presets (named palette) ───────────────────────
+    "nes": {
+        "contrast": 1.5, "color": 1.4, "sharpness": 1.2,
+        "posterize_bits": 6, "block": 8, "palette": "NES",
+    },
+    "gameboy": {
+        "contrast": 1.5, "color": 1.0, "sharpness": 1.2,
+        "posterize_bits": 6, "block": 8, "palette": "GAMEBOY_ORIGINAL",
+    },
+    "gameboy_pocket": {
+        "contrast": 1.5, "color": 1.0, "sharpness": 1.2,
+        "posterize_bits": 6, "block": 8, "palette": "GAMEBOY_POCKET",
+    },
+    "pico8": {
+        "contrast": 1.6, "color": 1.3, "sharpness": 1.2,
+        "posterize_bits": 6, "block": 6, "palette": "PICO_8",
+    },
+    "c64": {
+        "contrast": 1.6, "color": 1.3, "sharpness": 1.2,
+        "posterize_bits": 6, "block": 8, "palette": "C64",
+    },
+    "apple2": {
+        "contrast": 1.8, "color": 1.4, "sharpness": 1.2,
+        "posterize_bits": 5, "block": 10, "palette": "APPLE_II_HI",
+    },
+    "teletext": {
+        "contrast": 1.8, "color": 1.5, "sharpness": 1.2,
+        "posterize_bits": 5, "block": 10, "palette": "TELETEXT",
+    },
+    "mspaint": {
+        "contrast": 1.6, "color": 1.4, "sharpness": 1.2,
+        "posterize_bits": 6, "block": 8, "palette": "MICROSOFT_WINDOWS_PAINT",
+    },
+    "mono_green": {
+        "contrast": 1.8, "color": 0.0, "sharpness": 1.2,
+        "posterize_bits": 5, "block": 6, "palette": "MONO_GREEN",
+    },
+    "mono_amber": {
+        "contrast": 1.8, "color": 0.0, "sharpness": 1.2,
+        "posterize_bits": 5, "block": 6, "palette": "MONO_AMBER",
+    },
+    # ── Artistic palette presets ────────────────────────────────────────
+    "neon": {
+        "contrast": 1.8, "color": 1.6, "sharpness": 1.2,
+        "posterize_bits": 5, "block": 6, "palette": "NEON_CYBER",
+    },
+    "pastel": {
+        "contrast": 1.2, "color": 1.3, "sharpness": 1.1,
+        "posterize_bits": 6, "block": 6, "palette": "PASTEL_DREAM",
+    },
+}
+
+
+def pixel_art(input_path, output_path, preset="arcade", **overrides):
+    """Convert an image to retro pixel art.
+
+    Args:
+        input_path: path to source image
+        output_path: path to save the resulting PNG
+        preset: one of PRESETS (arcade, snes, nes, gameboy, pico8, c64, ...)
+        **overrides: optionally override any preset field. In particular:
+            palette: int (adaptive N colors) OR str (named palette from PALETTES)
+            block:   int pixel block size
+            contrast / color / sharpness / posterize_bits: numeric enhancers
+
+    Returns:
+        The resulting PIL.Image.
+    """
+    if preset not in PRESETS:
+        raise ValueError(
+            f"Unknown preset {preset!r}. Choose from: {sorted(PRESETS)}"
+        )
+    cfg = {**PRESETS[preset], **overrides}
+
+    img = Image.open(input_path).convert("RGB")
+
+    img = ImageEnhance.Contrast(img).enhance(cfg["contrast"])
+    img = ImageEnhance.Color(img).enhance(cfg["color"])
+    img = ImageEnhance.Sharpness(img).enhance(cfg["sharpness"])
+    img = ImageOps.posterize(img, cfg["posterize_bits"])
+
+    w, h = img.size
+    block = cfg["block"]
+    small = img.resize(
+        (max(1, w // block), max(1, h // block)),
+        Image.NEAREST,
+    )
+
+    # Quantize AFTER downscale so Floyd-Steinberg aligns with final pixel grid.
+    pal = cfg["palette"]
+    if isinstance(pal, str):
+        # Named hardware/artistic palette
+        pal_img = build_palette_image(pal)
+        quantized = small.quantize(palette=pal_img, dither=Image.FLOYDSTEINBERG)
+    else:
+        # Adaptive N-color palette (original behavior)
+        quantized = small.quantize(colors=int(pal), dither=Image.FLOYDSTEINBERG)
+
+    result = quantized.resize((w, h), Image.NEAREST)
+    result.save(output_path, "PNG")
+    return result
+
+
+def main():
+    import argparse
+    p = argparse.ArgumentParser(description="Convert image to pixel art.")
+    p.add_argument("input")
+    p.add_argument("output")
+    p.add_argument("--preset", default="arcade", choices=sorted(PRESETS))
+    p.add_argument("--palette", default=None,
+                   help=f"Override palette: int or name from {sorted(PALETTES)}")
+    p.add_argument("--block", type=int, default=None)
+    args = p.parse_args()
+
+    overrides = {}
+    if args.palette is not None:
+        try:
+            overrides["palette"] = int(args.palette)
+        except ValueError:
+            overrides["palette"] = args.palette
+    if args.block is not None:
+        overrides["block"] = args.block
+
+    pixel_art(args.input, args.output, preset=args.preset, **overrides)
+    print(f"Wrote {args.output}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/skills/creative/pixel-art/scripts/pixel_art_video.py b/skills/creative/pixel-art/scripts/pixel_art_video.py
new file mode 100644
index 0000000000..3b584144d9
--- /dev/null
+++ b/skills/creative/pixel-art/scripts/pixel_art_video.py
@@ -0,0 +1,345 @@
+"""Pixel art video — overlay procedural animations onto a source image.
+
+Takes any image (typically pre-processed with pixel_art()) and overlays
+animated pixel effects (stars, rain, fireflies, etc.), then encodes to MP4
+(and optionally GIF) via ffmpeg.
+
+Scene animations ported from pixel-art-studio (MIT) — see ATTRIBUTION.md.
+The generative/Pollinations code is intentionally dropped — Hermes uses
+`image_generate` + `pixel_art()` for base frames instead.
+
+Usage (import):
+    from pixel_art_video import pixel_art_video
+    pixel_art_video("frame.png", "out.mp4", scene="night", duration=6)
+
+Usage (CLI):
+    python pixel_art_video.py frame.png out.mp4 --scene night --duration 6 --gif
+"""
+
+import math
+import os
+import random
+import shutil
+import subprocess
+import tempfile
+
+from PIL import Image, ImageDraw
+
+
+# ── Pixel drawing helpers ──────────────────────────────────────────────
+
+def _px(draw, x, y, color, size=2):
+    x, y = int(x), int(y)
+    W, H = draw.im.size
+    if 0 <= x < W and 0 <= y < H:
+        draw.rectangle([x, y, x + size - 1, y + size - 1], fill=color)
+
+
+def _pixel_cross(draw, x, y, color, arm=2):
+    x, y = int(x), int(y)
+    for i in range(-arm, arm + 1):
+        _px(draw, x + i, y, color, 1)
+        _px(draw, x, y + i, color, 1)
+
+
+# ── Animation init/draw pairs ──────────────────────────────────────────
+
+def init_stars(rng, W, H):
+    return [(rng.randint(0, W), rng.randint(0, H // 2)) for _ in range(15)]
+
+def draw_stars(draw, stars, t, W, H):
+    for i, (sx, sy) in enumerate(stars):
+        if math.sin(t * 2.0 + i * 0.7) > 0.65:
+            _pixel_cross(draw, sx, sy, (255, 255, 220), arm=2)
+
+
+def init_fireflies(rng, W, H):
+    return [{"x": rng.randint(20, W - 20), "y": rng.randint(H // 4, H - 20),
+             "phase": rng.uniform(0, 6.28), "speed": rng.uniform(0.3, 0.8)}
+            for _ in range(10)]
+
+def draw_fireflies(draw, ff, t, W, H):
+    for f in ff:
+        if math.sin(t * 1.5 + f["phase"]) < 0.15:
+            continue
+        _px(draw,
+            f["x"] + math.sin(t * f["speed"] + f["phase"]) * 3,
+            f["y"] + math.cos(t * f["speed"] * 0.7) * 2,
+            (200, 255, 100), 2)
+
+
+def init_leaves(rng, W, H):
+    return [{"x": rng.randint(0, W), "y": rng.randint(-H, 0),
+             "speed": rng.uniform(0.5, 1.5), "wobble": rng.uniform(0.02, 0.05),
+             "phase": rng.uniform(0, 6.28),
+             "color": rng.choice([(180, 120, 50), (160, 100, 40), (200, 140, 60)])}
+            for _ in range(12)]
+
+def draw_leaves(draw, leaves, t, W, H):
+    for leaf in leaves:
+        _px(draw,
+            leaf["x"] + math.sin(t * leaf["wobble"] + leaf["phase"]) * 15,
+            (leaf["y"] + t * leaf["speed"] * 20) % (H + 40) - 20,
+            leaf["color"], 2)
+
+
+def init_dust_motes(rng, W, H):
+    return [{"x": rng.randint(30, W - 30), "y": rng.randint(30, H - 30),
+             "phase": rng.uniform(0, 6.28), "speed": rng.uniform(0.2, 0.5),
+             "amp": rng.uniform(2, 6)} for _ in range(20)]
+
+def draw_dust_motes(draw, motes, t, W, H):
+    for m in motes:
+        if math.sin(t * 2.0 + m["phase"]) > 0.3:
+            _px(draw,
+                m["x"] + math.sin(t * 0.3 + m["phase"]) * m["amp"],
+                m["y"] - (m["speed"] * t * 15) % H,
+                (255, 210, 100), 1)
+
+
+def init_sparkles(rng, W, H):
+    return [(rng.randint(W // 4, 3 * W // 4), rng.randint(H // 4, 3 * H // 4),
+             rng.uniform(0, 6.28),
+             rng.choice([(180, 200, 255), (255, 220, 150), (200, 180, 255)]))
+            for _ in range(10)]
+
+def draw_sparkles(draw, sparkles, t, W, H):
+    for sx, sy, phase, color in sparkles:
+        if math.sin(t * 1.8 + phase) > 0.6:
+            _pixel_cross(draw, sx, sy, color, arm=2)
+
+
+def init_rain(rng, W, H):
+    return [{"x": rng.randint(0, W), "y": rng.randint(0, H),
+             "speed": rng.uniform(4, 8)} for _ in range(30)]
+
+def draw_rain(draw, rain, t, W, H):
+    for r in rain:
+        y = (r["y"] + t * r["speed"] * 20) % H
+        _px(draw, r["x"], y, (120, 150, 200), 1)
+        _px(draw, r["x"], y + 4, (100, 130, 180), 1)
+
+
+def init_lightning(rng, W, H):
+    return {"timer": 0, "flash": False, "rng": rng}
+
+def draw_lightning(draw, state, t, W, H):
+    state["timer"] += 1
+    if state["timer"] > 45 and state["rng"].random() < 0.04:
+        state["flash"] = True
+        state["timer"] = 0
+    if state["flash"]:
+        for x in range(0, W, 4):
+            for y in range(0, H // 3, 3):
+                if state["rng"].random() < 0.12:
+                    _px(draw, x, y, (255, 255, 240), 2)
+        state["flash"] = False
+
+
+def init_bubbles(rng, W, H):
+    return [{"x": rng.randint(20, W - 20), "y": rng.randint(H, H * 2),
+             "speed": rng.uniform(0.3, 0.8), "size": rng.choice([1, 2, 2])}
+            for _ in range(15)]
+
+def draw_bubbles(draw, bubbles, t, W, H):
+    for b in bubbles:
+        x = b["x"] + math.sin(t * 0.5 + b["x"]) * 3
+        y = b["y"] - (t * b["speed"] * 20) % (H + 40)
+        if 0 < y < H:
+            _px(draw, x, y, (150, 200, 255), b["size"])
+
+
+def init_embers(rng, W, H):
+    return [{"x": rng.randint(0, W), "y": rng.randint(0, H),
+             "speed": rng.uniform(0.3, 0.9), "phase": rng.uniform(0, 6.28),
+             "color": rng.choice([(255, 150, 30), (255, 100, 20), (255, 200, 50)])}
+            for _ in range(18)]
+
+def draw_embers(draw, embers, t, W, H):
+    for e in embers:
+        x = e["x"] + math.sin(t * 0.4 + e["phase"]) * 5
+        y = e["y"] - (t * e["speed"] * 15) % H
+        if math.sin(t * 2.5 + e["phase"]) > 0.2:
+            _px(draw, x, y, e["color"], 2)
+
+
+def init_snowflakes(rng, W, H):
+    return [{"x": rng.randint(0, W), "y": rng.randint(-H, 0),
+             "speed": rng.uniform(0.3, 0.6), "wobble": rng.uniform(0.04, 0.09),
+             "size": rng.choice([2, 2, 3])}
+            for _ in range(40)]
+
+def draw_snowflakes(draw, flakes, t, W, H):
+    for f in flakes:
+        x = f["x"] + math.sin(t * f["wobble"] + f["x"]) * 20
+        y = (f["y"] + t * f["speed"] * 8) % (H + 20) - 10
+        if f["size"] >= 3:
+            _pixel_cross(draw, x, y, (230, 235, 255), arm=1)
+        else:
+            _px(draw, x, y, (230, 235, 255), 2)
+
+
+def init_neon_pulse(rng, W, H):
+    return [(rng.randint(0, W), rng.randint(0, H), rng.uniform(0, 6.28),
+             rng.choice([(255, 0, 200), (0, 255, 255), (255, 50, 150)]))
+            for _ in range(8)]
+
+def draw_neon_pulse(draw, points, t, W, H):
+    for x, y, phase, color in points:
+        if math.sin(t * 2.5 + phase) > 0.5:
+            _pixel_cross(draw, x, y, color, arm=3)
+
+
+def init_heat_shimmer(rng, W, H):
+    return [{"x": rng.randint(0, W), "y": rng.randint(H // 2, H),
+             "phase": rng.uniform(0, 6.28)} for _ in range(12)]
+
+def draw_heat_shimmer(draw, points, t, W, H):
+    for p in points:
+        x = p["x"] + math.sin(t * 0.8 + p["phase"]) * 2
+        y = p["y"] + math.sin(t * 1.2 + p["phase"]) * 1
+        if abs(math.sin(t * 1.5 + p["phase"])) > 0.6:
+            _px(draw, x, y, (255, 200, 100), 1)
+
+
+# ── Scene → animation mapping ──────────────────────────────────────────
+
+SCENES = {
+    "night":      ["stars", "fireflies", "leaves"],
+    "dusk":       ["fireflies", "sparkles"],
+    "tavern":     ["dust_motes", "sparkles"],
+    "indoor":     ["dust_motes"],
+    "urban":      ["rain", "neon_pulse"],
+    "nature":     ["leaves", "fireflies"],
+    "magic":      ["sparkles", "fireflies"],
+    "storm":      ["rain", "lightning"],
+    "underwater": ["bubbles", "sparkles"],
+    "fire":       ["embers", "sparkles"],
+    "snow":       ["snowflakes", "sparkles"],
+    "desert":     ["heat_shimmer", "dust_motes"],
+}
+
+# Map scene layer name to (init_fn, draw_fn).
+_LAYERS = {
+    "stars":        (init_stars, draw_stars),
+    "fireflies":    (init_fireflies, draw_fireflies),
+    "leaves":       (init_leaves, draw_leaves),
+    "dust_motes":   (init_dust_motes, draw_dust_motes),
+    "sparkles":     (init_sparkles, draw_sparkles),
+    "rain":         (init_rain, draw_rain),
+    "lightning":    (init_lightning, draw_lightning),
+    "bubbles":      (init_bubbles, draw_bubbles),
+    "embers":       (init_embers, draw_embers),
+    "snowflakes":   (init_snowflakes, draw_snowflakes),
+    "neon_pulse":   (init_neon_pulse, draw_neon_pulse),
+    "heat_shimmer": (init_heat_shimmer, draw_heat_shimmer),
+}
+
+
+def _ensure_ffmpeg():
+    if shutil.which("ffmpeg") is None:
+        raise RuntimeError(
+            "ffmpeg not found on PATH. Install via your package manager or "
+            "download from https://ffmpeg.org/"
+        )
+
+
+def pixel_art_video(
+    base_image,
+    output_path,
+    scene="night",
+    duration=6,
+    fps=15,
+    seed=None,
+    export_gif=False,
+):
+    """Overlay pixel animations onto a base image and encode to MP4.
+
+    Args:
+        base_image: path to source image (ideally already pixel-art styled)
+        output_path: path to MP4 output (GIF sibling written if export_gif=True)
+        scene: key from SCENES (night, urban, storm, snow, fire, ...)
+        duration: seconds of animation
+        fps: frames per second (default 15 for retro feel)
+        seed: optional int for reproducible animation placement
+        export_gif: also write a GIF alongside the MP4
+
+    Returns:
+        (mp4_path, gif_path_or_None)
+    """
+    if scene not in SCENES:
+        raise ValueError(
+            f"Unknown scene {scene!r}. Choose from: {sorted(SCENES)}"
+        )
+    _ensure_ffmpeg()
+
+    base = Image.open(base_image).convert("RGB")
+    W, H = base.size
+
+    rng = random.Random(seed if seed is not None else 42)
+    layers = []
+    for name in SCENES[scene]:
+        init_fn, draw_fn = _LAYERS[name]
+        layers.append((draw_fn, init_fn(rng, W, H)))
+
+    n_frames = fps * duration
+    os.makedirs(os.path.dirname(os.path.abspath(output_path)) or ".", exist_ok=True)
+
+    with tempfile.TemporaryDirectory(prefix="pixelart_frames_") as frames_dir:
+        for frame_idx in range(n_frames):
+            canvas = base.copy()
+            draw = ImageDraw.Draw(canvas)
+            t = frame_idx / fps
+            for draw_fn, state in layers:
+                draw_fn(draw, state, t, W, H)
+            canvas.save(os.path.join(frames_dir, f"frame_{frame_idx:04d}.png"))
+
+        subprocess.run(
+            ["ffmpeg", "-y", "-loglevel", "error",
+             "-framerate", str(fps),
+             "-i", os.path.join(frames_dir, "frame_%04d.png"),
+             "-c:v", "libx264", "-pix_fmt", "yuv420p", "-crf", "18",
+             output_path],
+            check=True,
+        )
+
+        gif_path = None
+        if export_gif:
+            gif_path = output_path.rsplit(".", 1)[0] + ".gif"
+            subprocess.run(
+                ["ffmpeg", "-y", "-loglevel", "error",
+                 "-framerate", str(fps),
+                 "-i", os.path.join(frames_dir, "frame_%04d.png"),
+                 "-vf",
+                 "scale=320:-1:flags=neighbor,split[s0][s1];[s0]palettegen[p];[s1][p]paletteuse",
+                 "-loop", "0",
+                 gif_path],
+                check=True,
+            )
+
+    return output_path, gif_path
+
+
+def main():
+    import argparse
+    p = argparse.ArgumentParser(description="Overlay pixel animations onto an image → MP4.")
+    p.add_argument("base_image")
+    p.add_argument("output")
+    p.add_argument("--scene", default="night", choices=sorted(SCENES))
+    p.add_argument("--duration", type=int, default=6)
+    p.add_argument("--fps", type=int, default=15)
+    p.add_argument("--seed", type=int, default=None)
+    p.add_argument("--gif", action="store_true")
+    args = p.parse_args()
+    mp4, gif = pixel_art_video(
+        args.base_image, args.output,
+        scene=args.scene, duration=args.duration,
+        fps=args.fps, seed=args.seed, export_gif=args.gif,
+    )
+    print(f"Wrote {mp4}")
+    if gif:
+        print(f"Wrote {gif}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/skills/devops/webhook-subscriptions/SKILL.md b/skills/devops/webhook-subscriptions/SKILL.md
index e5ab6d5880..dd20a19b41 100644
--- a/skills/devops/webhook-subscriptions/SKILL.md
+++ b/skills/devops/webhook-subscriptions/SKILL.md
@@ -1,10 +1,10 @@
 ---
 name: webhook-subscriptions
-description: Create and manage webhook subscriptions for event-driven agent activation. Use when the user wants external services to trigger agent runs automatically.
-version: 1.0.0
+description: Create and manage webhook subscriptions for event-driven agent activation, or for direct push notifications (zero LLM cost). Use when the user wants external services to trigger agent runs OR push notifications to chats.
+version: 1.1.0
 metadata:
   hermes:
-    tags: [webhook, events, automation, integrations]
+    tags: [webhook, events, automation, integrations, notifications, push]
 ---
 
 # Webhook Subscriptions
@@ -154,6 +154,29 @@ hermes webhook subscribe alerts \
   --deliver origin
 ```
 
+### Direct delivery (no agent, zero LLM cost)
+
+For use cases where you just want to push a notification through to a user's chat — no reasoning, no agent loop — add `--deliver-only`. The rendered `--prompt` template becomes the literal message body and is dispatched directly to the target adapter.
+
+Use this for:
+- External service push notifications (Supabase/Firebase webhooks → Telegram)
+- Monitoring alerts that should forward verbatim
+- Inter-agent pings where one agent is telling another agent's user something
+- Any webhook where an LLM round trip would be wasted effort
+
+```bash
+hermes webhook subscribe antenna-matches \
+  --deliver telegram \
+  --deliver-chat-id "123456789" \
+  --deliver-only \
+  --prompt "🎉 New match: {match.user_name} matched with you!" \
+  --description "Antenna match notifications"
+```
+
+The POST returns `200 OK` on successful delivery, `502` on target failure — so upstream services can retry intelligently. HMAC auth, rate limits, and idempotency still apply.
+
+Requires `--deliver` to be a real target (telegram, discord, slack, github_comment, etc.) — `--deliver log` is rejected because log-only direct delivery is pointless.
+
 ## Security
 
 - Each subscription gets an auto-generated HMAC-SHA256 secret (or provide your own with `--secret`)
diff --git a/skills/leisure/find-nearby/SKILL.md b/skills/leisure/find-nearby/SKILL.md
deleted file mode 100644
index f0ecdbf531..0000000000
--- a/skills/leisure/find-nearby/SKILL.md
+++ /dev/null
@@ -1,69 +0,0 @@
----
-name: find-nearby
-description: Find nearby places (restaurants, cafes, bars, pharmacies, etc.) using OpenStreetMap. Works with coordinates, addresses, cities, zip codes, or Telegram location pins. No API keys needed.
-version: 1.0.0
-metadata:
-  hermes:
-    tags: [location, maps, nearby, places, restaurants, local]
-    related_skills: []
----
-
-# Find Nearby — Local Place Discovery
-
-Find restaurants, cafes, bars, pharmacies, and other places near any location. Uses OpenStreetMap (free, no API keys). Works with:
-
-- **Coordinates** from Telegram location pins (latitude/longitude in conversation)
-- **Addresses** ("near 123 Main St, Springfield")
-- **Cities** ("restaurants in downtown Austin")
-- **Zip codes** ("pharmacies near 90210")
-- **Landmarks** ("cafes near Times Square")
-
-## Quick Reference
-
-```bash
-# By coordinates (from Telegram location pin or user-provided)
-python3 SKILL_DIR/scripts/find_nearby.py --lat <LAT> --lon <LON> --type restaurant --radius 1500
-
-# By address, city, or landmark (auto-geocoded)
-python3 SKILL_DIR/scripts/find_nearby.py --near "Times Square, New York" --type cafe
-
-# Multiple place types
-python3 SKILL_DIR/scripts/find_nearby.py --near "downtown austin" --type restaurant --type bar --limit 10
-
-# JSON output
-python3 SKILL_DIR/scripts/find_nearby.py --near "90210" --type pharmacy --json
-```
-
-### Parameters
-
-| Flag | Description | Default |
-|------|-------------|---------|
-| `--lat`, `--lon` | Exact coordinates | — |
-| `--near` | Address, city, zip, or landmark (geocoded) | — |
-| `--type` | Place type (repeatable for multiple) | restaurant |
-| `--radius` | Search radius in meters | 1500 |
-| `--limit` | Max results | 15 |
-| `--json` | Machine-readable JSON output | off |
-
-### Common Place Types
-
-`restaurant`, `cafe`, `bar`, `pub`, `fast_food`, `pharmacy`, `hospital`, `bank`, `atm`, `fuel`, `parking`, `supermarket`, `convenience`, `hotel`
-
-## Workflow
-
-1. **Get the location.** Look for coordinates (`latitude: ... / longitude: ...`) from a Telegram pin, or ask the user for an address/city/zip.
-
-2. **Ask for preferences** (only if not already stated): place type, how far they're willing to go, any specifics (cuisine, "open now", etc.).
-
-3. **Run the script** with appropriate flags. Use `--json` if you need to process results programmatically.
-
-4. **Present results** with names, distances, and Google Maps links. If the user asked about hours or "open now," check the `hours` field in results — if missing or unclear, verify with `web_search`.
-
-5. **For directions**, use the `directions_url` from results, or construct: `https://www.google.com/maps/dir/?api=1&origin=<LAT>,<LON>&destination=<LAT>,<LON>`
-
-## Tips
-
-- If results are sparse, widen the radius (1500 → 3000m)
-- For "open now" requests: check the `hours` field in results, cross-reference with `web_search` for accuracy since OSM hours aren't always complete
-- Zip codes alone can be ambiguous globally — prompt the user for country/state if results look wrong
-- The script uses OpenStreetMap data which is community-maintained; coverage varies by region
diff --git a/skills/leisure/find-nearby/scripts/find_nearby.py b/skills/leisure/find-nearby/scripts/find_nearby.py
deleted file mode 100644
index 9d7fed78f4..0000000000
--- a/skills/leisure/find-nearby/scripts/find_nearby.py
+++ /dev/null
@@ -1,184 +0,0 @@
-#!/usr/bin/env python3
-"""Find nearby places using OpenStreetMap (Overpass + Nominatim). No API keys needed.
-
-Usage:
-    # By coordinates
-    python find_nearby.py --lat 36.17 --lon -115.14 --type restaurant --radius 1500
-
-    # By address/city/zip (auto-geocoded)
-    python find_nearby.py --near "Times Square, New York" --type cafe --radius 1000
-    python find_nearby.py --near "90210" --type pharmacy
-
-    # Multiple types
-    python find_nearby.py --lat 36.17 --lon -115.14 --type restaurant --type bar
-
-    # JSON output for programmatic use
-    python find_nearby.py --near "downtown las vegas" --type restaurant --json
-"""
-
-import argparse
-import json
-import math
-import sys
-import urllib.parse
-import urllib.request
-from typing import Any
-
-OVERPASS_URLS = [
-    "https://overpass-api.de/api/interpreter",
-    "https://overpass.kumi.systems/api/interpreter",
-]
-NOMINATIM_URL = "https://nominatim.openstreetmap.org/search"
-USER_AGENT = "HermesAgent/1.0 (find-nearby skill)"
-TIMEOUT = 15
-
-
-def _http_get(url: str) -> Any:
-    req = urllib.request.Request(url, headers={"User-Agent": USER_AGENT})
-    with urllib.request.urlopen(req, timeout=TIMEOUT) as r:
-        return json.loads(r.read())
-
-
-def _http_post(url: str, data: str) -> Any:
-    req = urllib.request.Request(
-        url, data=data.encode(), headers={"User-Agent": USER_AGENT}
-    )
-    with urllib.request.urlopen(req, timeout=TIMEOUT) as r:
-        return json.loads(r.read())
-
-
-def haversine(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
-    """Distance in meters between two coordinates."""
-    R = 6_371_000
-    rlat1, rlat2 = math.radians(lat1), math.radians(lat2)
-    dlat = math.radians(lat2 - lat1)
-    dlon = math.radians(lon2 - lon1)
-    a = math.sin(dlat / 2) ** 2 + math.cos(rlat1) * math.cos(rlat2) * math.sin(dlon / 2) ** 2
-    return R * 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
-
-
-def geocode(query: str) -> tuple[float, float]:
-    """Convert address/city/zip to coordinates via Nominatim."""
-    params = urllib.parse.urlencode({"q": query, "format": "json", "limit": 1})
-    results = _http_get(f"{NOMINATIM_URL}?{params}")
-    if not results:
-        print(f"Error: Could not geocode '{query}'. Try a more specific address.", file=sys.stderr)
-        sys.exit(1)
-    return float(results[0]["lat"]), float(results[0]["lon"])
-
-
-def find_nearby(lat: float, lon: float, types: list[str], radius: int = 1500, limit: int = 15) -> list[dict]:
-    """Query Overpass for nearby amenities."""
-    # Build Overpass QL query
-    type_filters = "".join(
-        f'nwr["amenity"="{t}"](around:{radius},{lat},{lon});' for t in types
-    )
-    query = f"[out:json][timeout:{TIMEOUT}];({type_filters});out center tags;"
-
-    # Try each Overpass server
-    data = None
-    for url in OVERPASS_URLS:
-        try:
-            data = _http_post(url, f"data={urllib.parse.quote(query)}")
-            break
-        except Exception:
-            continue
-
-    if not data:
-        return []
-
-    # Parse results
-    places = []
-    for el in data.get("elements", []):
-        tags = el.get("tags", {})
-        name = tags.get("name")
-        if not name:
-            continue
-
-        # Get coordinates (nodes have lat/lon directly, ways/relations use center)
-        plat = el.get("lat") or (el.get("center", {}) or {}).get("lat")
-        plon = el.get("lon") or (el.get("center", {}) or {}).get("lon")
-        if plat is None or plon is None:
-            continue
-
-        dist = haversine(lat, lon, plat, plon)
-
-        place = {
-            "name": name,
-            "type": tags.get("amenity", ""),
-            "distance_m": round(dist),
-            "lat": plat,
-            "lon": plon,
-            "maps_url": f"https://www.google.com/maps/search/?api=1&query={plat},{plon}",
-            "directions_url": f"https://www.google.com/maps/dir/?api=1&origin={lat},{lon}&destination={plat},{plon}",
-        }
-
-        # Add useful optional fields
-        if tags.get("cuisine"):
-            place["cuisine"] = tags["cuisine"]
-        if tags.get("opening_hours"):
-            place["hours"] = tags["opening_hours"]
-        if tags.get("phone"):
-            place["phone"] = tags["phone"]
-        if tags.get("website"):
-            place["website"] = tags["website"]
-        if tags.get("addr:street"):
-            addr_parts = [tags.get("addr:housenumber", ""), tags.get("addr:street", "")]
-            if tags.get("addr:city"):
-                addr_parts.append(tags["addr:city"])
-            place["address"] = " ".join(p for p in addr_parts if p)
-
-        places.append(place)
-
-    # Sort by distance, limit results
-    places.sort(key=lambda p: p["distance_m"])
-    return places[:limit]
-
-
-def main():
-    parser = argparse.ArgumentParser(description="Find nearby places via OpenStreetMap")
-    parser.add_argument("--lat", type=float, help="Latitude")
-    parser.add_argument("--lon", type=float, help="Longitude")
-    parser.add_argument("--near", type=str, help="Address, city, or zip code (geocoded automatically)")
-    parser.add_argument("--type", action="append", dest="types", default=[], help="Place type (restaurant, cafe, bar, pharmacy, etc.)")
-    parser.add_argument("--radius", type=int, default=1500, help="Search radius in meters (default: 1500)")
-    parser.add_argument("--limit", type=int, default=15, help="Max results (default: 15)")
-    parser.add_argument("--json", action="store_true", dest="json_output", help="Output as JSON")
-    args = parser.parse_args()
-
-    # Resolve coordinates
-    if args.near:
-        lat, lon = geocode(args.near)
-    elif args.lat is not None and args.lon is not None:
-        lat, lon = args.lat, args.lon
-    else:
-        print("Error: Provide --lat/--lon or --near", file=sys.stderr)
-        sys.exit(1)
-
-    if not args.types:
-        args.types = ["restaurant"]
-
-    places = find_nearby(lat, lon, args.types, args.radius, args.limit)
-
-    if args.json_output:
-        print(json.dumps({"origin": {"lat": lat, "lon": lon}, "results": places, "count": len(places)}, indent=2))
-    else:
-        if not places:
-            print(f"No {'/'.join(args.types)} found within {args.radius}m")
-            return
-        print(f"Found {len(places)} places within {args.radius}m:\n")
-        for i, p in enumerate(places, 1):
-            dist_str = f"{p['distance_m']}m" if p["distance_m"] < 1000 else f"{p['distance_m']/1000:.1f}km"
-            print(f"  {i}. {p['name']} ({p['type']}) — {dist_str}")
-            if p.get("cuisine"):
-                print(f"     Cuisine: {p['cuisine']}")
-            if p.get("hours"):
-                print(f"     Hours: {p['hours']}")
-            if p.get("address"):
-                print(f"     Address: {p['address']}")
-            print(f"     Map: {p['maps_url']}")
-            print()
-
-
-if __name__ == "__main__":
-    main()
diff --git a/skills/mcp/DESCRIPTION.md b/skills/mcp/DESCRIPTION.md
index 627c20ea1b..30a0660333 100644
--- a/skills/mcp/DESCRIPTION.md
+++ b/skills/mcp/DESCRIPTION.md
@@ -1,3 +1,3 @@
 ---
-description: Skills for working with MCP (Model Context Protocol) servers, tools, and integrations. Includes the built-in native MCP client (configure servers in config.yaml for automatic tool discovery) and the mcporter CLI bridge for ad-hoc server interaction.
+description: Skills for working with MCP (Model Context Protocol) servers, tools, and integrations. Documents the built-in native MCP client — configure servers in config.yaml for automatic tool discovery.
 ---
diff --git a/skills/mlops/cloud/DESCRIPTION.md b/skills/mlops/cloud/DESCRIPTION.md
deleted file mode 100644
index 32675823e0..0000000000
--- a/skills/mlops/cloud/DESCRIPTION.md
+++ /dev/null
@@ -1,3 +0,0 @@
----
-description: GPU cloud providers and serverless compute platforms for ML workloads.
----
diff --git a/skills/mlops/inference/llama-cpp/SKILL.md b/skills/mlops/inference/llama-cpp/SKILL.md
index 33fc37adb1..0844e4d5a4 100644
--- a/skills/mlops/inference/llama-cpp/SKILL.md
+++ b/skills/mlops/inference/llama-cpp/SKILL.md
@@ -1,218 +1,113 @@
 ---
 name: llama-cpp
-description: Run LLM inference with llama.cpp on CPU, Apple Silicon, AMD/Intel GPUs, or NVIDIA — plus GGUF model conversion and quantization (2–8 bit with K-quants and imatrix). Covers CLI, Python bindings, OpenAI-compatible server, and Ollama/LM Studio integration. Use for edge deployment, M1/M2/M3/M4 Macs, CUDA-less environments, or flexible local quantization.
-version: 2.0.0
+description: llama.cpp local GGUF inference + HF Hub model discovery.
+version: 2.1.2
 author: Orchestra Research
 license: MIT
 dependencies: [llama-cpp-python>=0.2.0]
 metadata:
   hermes:
-    tags: [llama.cpp, GGUF, Quantization, CPU Inference, Apple Silicon, Edge Deployment, Non-NVIDIA, AMD GPUs, Intel GPUs, Embedded, Model Compression]
+    tags: [llama.cpp, GGUF, Quantization, Hugging Face Hub, CPU Inference, Apple Silicon, Edge Deployment, AMD GPUs, Intel GPUs, NVIDIA, URL-first]
 ---
 
 # llama.cpp + GGUF
 
-Pure C/C++ LLM inference with minimal dependencies, plus the GGUF (GPT-Generated Unified Format) standard used for quantized weights. One toolchain covers conversion, quantization, and serving.
+Use this skill for local GGUF inference, quant selection, or Hugging Face repo discovery for llama.cpp.
 
 ## When to use
 
-**Use llama.cpp + GGUF when:**
-- Running on CPU-only machines or Apple Silicon (M1/M2/M3/M4) with Metal acceleration
-- Using AMD (ROCm) or Intel GPUs where CUDA isn't available
-- Edge deployment (Raspberry Pi, embedded systems, consumer laptops)
-- Need flexible quantization (2–8 bit with K-quants)
-- Want local AI tools (LM Studio, Ollama, text-generation-webui, koboldcpp)
-- Want a single binary deploy without Docker/Python
+- Run local models on CPU, Apple Silicon, CUDA, ROCm, or Intel GPUs
+- Find the right GGUF for a specific Hugging Face repo
+- Build a `llama-server` or `llama-cli` command from the Hub
+- Search the Hub for models that already support llama.cpp
+- Enumerate available `.gguf` files and sizes for a repo
+- Decide between Q4/Q5/Q6/IQ variants for the user's RAM or VRAM
 
-**Key advantages:**
-- Universal hardware: CPU, Apple Silicon, NVIDIA, AMD, Intel
-- No Python runtime required (pure C/C++)
-- K-quants + imatrix for better low-bit quality
-- OpenAI-compatible server built in
-- Rich ecosystem (Ollama, LM Studio, llama-cpp-python)
+## Model Discovery workflow
 
-**Use alternatives instead:**
-- **vLLM** — NVIDIA GPUs, PagedAttention, Python-first, max throughput
-- **TensorRT-LLM** — Production NVIDIA (A100/H100), maximum speed
-- **AWQ/GPTQ** — Calibrated quantization for NVIDIA-only deployments
-- **bitsandbytes** — Simple HuggingFace transformers integration
-- **HQQ** — Fast calibration-free quantization
+Prefer URL workflows before asking for `hf`, Python, or custom scripts.
+
+1. Search for candidate repos on the Hub:
+   - Base: `https://huggingface.co/models?apps=llama.cpp&sort=trending`
+   - Add `search=<term>` for a model family
+   - Add `num_parameters=min:0,max:24B` or similar when the user has size constraints
+2. Open the repo with the llama.cpp local-app view:
+   - `https://huggingface.co/<repo>?local-app=llama.cpp`
+3. Treat the local-app snippet as the source of truth when it is visible:
+   - copy the exact `llama-server` or `llama-cli` command
+   - report the recommended quant exactly as HF shows it
+4. Read the same `?local-app=llama.cpp` URL as page text or HTML and extract the section under `Hardware compatibility`:
+   - prefer its exact quant labels and sizes over generic tables
+   - keep repo-specific labels such as `UD-Q4_K_M` or `IQ4_NL_XL`
+   - if that section is not visible in the fetched page source, say so and fall back to the tree API plus generic quant guidance
+5. Query the tree API to confirm what actually exists:
+   - `https://huggingface.co/api/models/<repo>/tree/main?recursive=true`
+   - keep entries where `type` is `file` and `path` ends with `.gguf`
+   - use `path` and `size` as the source of truth for filenames and byte sizes
+   - separate quantized checkpoints from `mmproj-*.gguf` projector files and `BF16/` shard files
+   - use `https://huggingface.co/<repo>/tree/main` only as a human fallback
+6. If the local-app snippet is not text-visible, reconstruct the command from the repo plus the chosen quant:
+   - shorthand quant selection: `llama-server -hf <repo>:<QUANT>`
+   - exact-file fallback: `llama-server --hf-repo <repo> --hf-file <filename.gguf>`
+7. Only suggest conversion from Transformers weights if the repo does not already expose GGUF files.
 
 ## Quick start
 
-### Install
+### Install llama.cpp
 
 ```bash
 # macOS / Linux (simplest)
 brew install llama.cpp
+```
 
-# Or build from source
+```bash
+winget install llama.cpp
+```
+
+```bash
 git clone https://github.com/ggml-org/llama.cpp
 cd llama.cpp
-make                        # CPU
-make GGML_METAL=1           # Apple Silicon
-make GGML_CUDA=1            # NVIDIA CUDA
-make LLAMA_HIP=1            # AMD ROCm
-
-# Python bindings (optional)
-pip install llama-cpp-python
-# With CUDA:   CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python --force-reinstall --no-cache-dir
-# With Metal:  CMAKE_ARGS="-DGGML_METAL=on" pip install llama-cpp-python --force-reinstall --no-cache-dir
+cmake -B build
+cmake --build build --config Release
 ```
 
-### Download a pre-quantized GGUF
+### Run directly from the Hugging Face Hub
 
 ```bash
-# TheBloke hosts most popular models pre-quantized
-huggingface-cli download \
-    TheBloke/Llama-2-7B-Chat-GGUF \
-    llama-2-7b-chat.Q4_K_M.gguf \
-    --local-dir models/
+llama-cli -hf bartowski/Llama-3.2-3B-Instruct-GGUF:Q8_0
 ```
 
-### Or convert a HuggingFace model to GGUF
-
 ```bash
-# 1. Download HF model
-huggingface-cli download meta-llama/Llama-3.1-8B --local-dir ./llama-3.1-8b
-
-# 2. Convert to FP16 GGUF
-python convert_hf_to_gguf.py ./llama-3.1-8b \
-    --outfile llama-3.1-8b-f16.gguf \
-    --outtype f16
-
-# 3. Quantize to Q4_K_M
-./llama-quantize llama-3.1-8b-f16.gguf llama-3.1-8b-q4_k_m.gguf Q4_K_M
+llama-server -hf bartowski/Llama-3.2-3B-Instruct-GGUF:Q8_0
 ```
 
-### Run inference
+### Run an exact GGUF file from the Hub
+
+Use this when the tree API shows custom file naming or the exact HF snippet is missing.
 
 ```bash
-# One-shot prompt
-./llama-cli -m model.Q4_K_M.gguf -p "Explain quantum computing" -n 256
-
-# Interactive chat
-./llama-cli -m model.Q4_K_M.gguf --interactive
-
-# With GPU offload
-./llama-cli -m model.Q4_K_M.gguf -ngl 35 -p "Hello!"
+llama-server \
+    --hf-repo microsoft/Phi-3-mini-4k-instruct-gguf \
+    --hf-file Phi-3-mini-4k-instruct-q4.gguf \
+    -c 4096
 ```
 
-### Serve an OpenAI-compatible API
-
-```bash
-./llama-server \
-    -m model.Q4_K_M.gguf \
-    --host 0.0.0.0 \
-    --port 8080 \
-    -ngl 35 \
-    -c 4096 \
-    --parallel 4 \
-    --cont-batching
-```
+### OpenAI-compatible server check
 
 ```bash
 curl http://localhost:8080/v1/chat/completions \
   -H "Content-Type: application/json" \
   -d '{
-    "model": "local",
-    "messages": [{"role": "user", "content": "Hello!"}],
-    "temperature": 0.7,
-    "max_tokens": 100
+    "messages": [
+      {"role": "user", "content": "Write a limerick about Python exceptions"}
+    ]
   }'
 ```
 
-## Quantization formats (GGUF)
-
-### K-quant methods (recommended)
-
-| Type | Bits | Size (7B) | Quality | Use Case |
-|------|------|-----------|---------|----------|
-| Q2_K | 2.5 | ~2.8 GB | Low | Extreme compression (testing only) |
-| Q3_K_S | 3.0 | ~3.0 GB | Low-Med | Memory constrained |
-| Q3_K_M | 3.3 | ~3.3 GB | Medium | Fits small devices |
-| Q4_K_S | 4.0 | ~3.8 GB | Med-High | Speed critical |
-| **Q4_K_M** | 4.5 | ~4.1 GB | High | **Recommended default** |
-| Q5_K_S | 5.0 | ~4.6 GB | High | Quality focused |
-| Q5_K_M | 5.5 | ~4.8 GB | Very High | High quality |
-| Q6_K | 6.0 | ~5.5 GB | Excellent | Near-original |
-| Q8_0 | 8.0 | ~7.2 GB | Best | Maximum quality, minimal degradation |
-
-**Variant suffixes** — `_S` (Small, faster, lower quality), `_M` (Medium, balanced), `_L` (Large, better quality).
-
-**Legacy (Q4_0/Q4_1/Q5_0/Q5_1) exist** but always prefer K-quants for better quality/size ratio.
-
-**IQ quantization** — ultra-low-bit with importance-aware methods: IQ2_XXS, IQ2_XS, IQ2_S, IQ3_XXS, IQ3_XS, IQ3_S, IQ4_XS. Require `--imatrix`.
-
-**Task-specific defaults:**
-- General chat / assistants: Q4_K_M, or Q5_K_M if RAM allows
-- Code generation: Q5_K_M or Q6_K (higher precision helps)
-- Technical / medical: Q6_K or Q8_0
-- Very large (70B, 405B) on consumer hardware: Q3_K_M or Q4_K_S
-- Raspberry Pi / edge: Q2_K or Q3_K_S
-
-## Conversion workflows
-
-### Basic: HF → GGUF → quantized
-
-```bash
-python convert_hf_to_gguf.py ./model --outfile model-f16.gguf --outtype f16
-./llama-quantize model-f16.gguf model-q4_k_m.gguf Q4_K_M
-./llama-cli -m model-q4_k_m.gguf -p "Hello!" -n 50
-```
-
-### With importance matrix (imatrix) — better low-bit quality
-
-`imatrix` gives 10–20% perplexity improvement at Q4, essential at Q3 and below.
-
-```bash
-# 1. Convert to FP16 GGUF
-python convert_hf_to_gguf.py ./model --outfile model-f16.gguf
-
-# 2. Prepare calibration data (diverse text, ~100MB is ideal)
-cat > calibration.txt << 'EOF'
-The quick brown fox jumps over the lazy dog.
-Machine learning is a subset of artificial intelligence.
-# Add more diverse text samples...
-EOF
-
-# 3. Generate importance matrix
-./llama-imatrix -m model-f16.gguf \
-    -f calibration.txt \
-    --chunk 512 \
-    -o model.imatrix \
-    -ngl 35
-
-# 4. Quantize with imatrix
-./llama-quantize --imatrix model.imatrix \
-    model-f16.gguf model-q4_k_m.gguf Q4_K_M
-```
-
-### Multi-quant batch
-
-```bash
-#!/bin/bash
-MODEL="llama-3.1-8b-f16.gguf"
-IMATRIX="llama-3.1-8b.imatrix"
-
-./llama-imatrix -m $MODEL -f wiki.txt -o $IMATRIX -ngl 35
-
-for QUANT in Q4_K_M Q5_K_M Q6_K Q8_0; do
-    OUTPUT="llama-3.1-8b-${QUANT,,}.gguf"
-    ./llama-quantize --imatrix $IMATRIX $MODEL $OUTPUT $QUANT
-    echo "Created: $OUTPUT ($(du -h $OUTPUT | cut -f1))"
-done
-```
-
-### Quality testing (perplexity)
-
-```bash
-./llama-perplexity -m model.gguf -f wikitext-2-raw/wiki.test.raw -c 512
-# Baseline FP16: ~5.96  |  Q4_K_M: ~6.06 (+1.7%)  |  Q2_K: ~6.87 (+15.3%)
-```
-
 ## Python bindings (llama-cpp-python)
 
+`pip install llama-cpp-python` (CUDA: `CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python --force-reinstall --no-cache-dir`; Metal: `CMAKE_ARGS="-DGGML_METAL=on" ...`).
+
 ### Basic generation
 
 ```python
@@ -221,39 +116,32 @@ from llama_cpp import Llama
 llm = Llama(
     model_path="./model-q4_k_m.gguf",
     n_ctx=4096,
-    n_gpu_layers=35,     # 0 for CPU only, 99 to offload everything
+    n_gpu_layers=35,     # 0 for CPU, 99 to offload everything
     n_threads=8,
 )
 
-output = llm(
-    "What is machine learning?",
-    max_tokens=256,
-    temperature=0.7,
-    stop=["</s>", "\n\n"],
-)
-print(output["choices"][0]["text"])
+out = llm("What is machine learning?", max_tokens=256, temperature=0.7)
+print(out["choices"][0]["text"])
 ```
 
-### Chat completion + streaming
+### Chat + streaming
 
 ```python
 llm = Llama(
     model_path="./model-q4_k_m.gguf",
     n_ctx=4096,
     n_gpu_layers=35,
-    chat_format="llama-3",    # Or "chatml", "mistral", etc.
+    chat_format="llama-3",   # or "chatml", "mistral", etc.
 )
 
-# Non-streaming
-response = llm.create_chat_completion(
+resp = llm.create_chat_completion(
     messages=[
         {"role": "system", "content": "You are a helpful assistant."},
         {"role": "user", "content": "What is Python?"},
     ],
     max_tokens=256,
-    temperature=0.7,
 )
-print(response["choices"][0]["message"]["content"])
+print(resp["choices"][0]["message"]["content"])
 
 # Streaming
 for chunk in llm("Explain quantum computing:", max_tokens=256, stream=True):
@@ -268,171 +156,93 @@ vec = llm.embed("This is a test sentence.")
 print(f"Embedding dimension: {len(vec)}")
 ```
 
-## Hardware acceleration
-
-### Apple Silicon (Metal)
-
-```bash
-make clean && make GGML_METAL=1
-./llama-cli -m model.gguf -ngl 99 -p "Hello"   # offload all layers
-```
+You can also load a GGUF straight from the Hub:
 
 ```python
-llm = Llama(
-    model_path="model.gguf",
-    n_gpu_layers=99,     # Offload everything
-    n_threads=1,         # Metal handles parallelism
+llm = Llama.from_pretrained(
+    repo_id="bartowski/Llama-3.2-3B-Instruct-GGUF",
+    filename="*Q4_K_M.gguf",
+    n_gpu_layers=35,
 )
 ```
 
-Performance: M3 Max ~40–60 tok/s on Llama 2-7B Q4_K_M.
+## Choosing a quant
 
-### NVIDIA (CUDA)
+Use the Hub page first, generic heuristics second.
 
-```bash
-make clean && make GGML_CUDA=1
-./llama-cli -m model.gguf -ngl 35 -p "Hello"
+- Prefer the exact quant that HF marks as compatible for the user's hardware profile.
+- For general chat, start with `Q4_K_M`.
+- For code or technical work, prefer `Q5_K_M` or `Q6_K` if memory allows.
+- For very tight RAM budgets, consider `Q3_K_M`, `IQ` variants, or `Q2` variants only if the user explicitly prioritizes fit over quality.
+- For multimodal repos, mention `mmproj-*.gguf` separately. The projector is not the main model file.
+- Do not normalize repo-native labels. If the page says `UD-Q4_K_M`, report `UD-Q4_K_M`.
 
-# Hybrid for large models
-./llama-cli -m llama-70b.Q4_K_M.gguf -ngl 20   # GPU: 20 layers, CPU: rest
+## Extracting available GGUFs from a repo
 
-# Multi-GPU split
-./llama-cli -m large-model.gguf --tensor-split 0.5,0.5 -ngl 60
+When the user asks what GGUFs exist, return:
+
+- filename
+- file size
+- quant label
+- whether it is a main model or an auxiliary projector
+
+Ignore unless requested:
+
+- README
+- BF16 shard files
+- imatrix blobs or calibration artifacts
+
+Use the tree API for this step:
+
+- `https://huggingface.co/api/models/<repo>/tree/main?recursive=true`
+
+For a repo like `unsloth/Qwen3.6-35B-A3B-GGUF`, the local-app page can show quant chips such as `UD-Q4_K_M`, `UD-Q5_K_M`, `UD-Q6_K`, and `Q8_0`, while the tree API exposes exact file paths such as `Qwen3.6-35B-A3B-UD-Q4_K_M.gguf` and `Qwen3.6-35B-A3B-Q8_0.gguf` with byte sizes. Use the tree API to turn a quant label into an exact filename.
+
+## Search patterns
+
+Use these URL shapes directly:
+
+```text
+https://huggingface.co/models?apps=llama.cpp&sort=trending
+https://huggingface.co/models?search=<term>&apps=llama.cpp&sort=trending
+https://huggingface.co/models?search=<term>&apps=llama.cpp&num_parameters=min:0,max:24B&sort=trending
+https://huggingface.co/<repo>?local-app=llama.cpp
+https://huggingface.co/api/models/<repo>/tree/main?recursive=true
+https://huggingface.co/<repo>/tree/main
 ```
 
-### AMD (ROCm)
+## Output format
 
-```bash
-make LLAMA_HIP=1
-./llama-cli -m model.gguf -ngl 999
+When answering discovery requests, prefer a compact structured result like:
+
+```text
+Repo: <repo>
+Recommended quant from HF: <label> (<size>)
+llama-server: <command>
+Other GGUFs:
+- <filename> - <size>
+- <filename> - <size>
+Source URLs:
+- <local-app URL>
+- <tree API URL>
 ```
 
-### CPU
-
-```bash
-# Match PHYSICAL cores, not logical
-./llama-cli -m model.gguf -t 8 -p "Hello"
-
-# BLAS acceleration (2–3× speedup)
-make LLAMA_OPENBLAS=1
-```
-
-```python
-llm = Llama(
-    model_path="model.gguf",
-    n_gpu_layers=0,
-    n_threads=8,
-    n_batch=512,         # Larger batch = faster prompt processing
-)
-```
-
-## Performance benchmarks
-
-### CPU (Llama 2-7B Q4_K_M)
-
-| CPU | Threads | Speed |
-|-----|---------|-------|
-| Apple M3 Max (Metal) | 16 | 50 tok/s |
-| AMD Ryzen 9 7950X | 32 | 35 tok/s |
-| Intel i9-13900K | 32 | 30 tok/s |
-
-### GPU offloading on RTX 4090
-
-| Layers GPU | Speed | VRAM |
-|------------|-------|------|
-| 0 (CPU only) | 30 tok/s | 0 GB |
-| 20 (hybrid) | 80 tok/s | 8 GB |
-| 35 (all) | 120 tok/s | 12 GB |
-
-## Supported models
-
-- **LLaMA family**: Llama 2 (7B/13B/70B), Llama 3 (8B/70B/405B), Code Llama
-- **Mistral family**: Mistral 7B, Mixtral 8x7B/8x22B
-- **Other**: Falcon, BLOOM, GPT-J, Phi-3, Gemma, Qwen, LLaVA (vision), Whisper (audio)
-
-Find GGUF models: https://huggingface.co/models?library=gguf
-
-## Ecosystem integrations
-
-### Ollama
-
-```bash
-cat > Modelfile << 'EOF'
-FROM ./model-q4_k_m.gguf
-TEMPLATE """{{ .System }}
-{{ .Prompt }}"""
-PARAMETER temperature 0.7
-PARAMETER num_ctx 4096
-EOF
-
-ollama create mymodel -f Modelfile
-ollama run mymodel "Hello!"
-```
-
-### LM Studio
-
-1. Place GGUF file in `~/.cache/lm-studio/models/`
-2. Open LM Studio and select the model
-3. Configure context length and GPU offload, start inference
-
-### text-generation-webui
-
-```bash
-cp model-q4_k_m.gguf text-generation-webui/models/
-python server.py --model model-q4_k_m.gguf --loader llama.cpp --n-gpu-layers 35
-```
-
-### OpenAI client → llama-server
-
-```python
-from openai import OpenAI
-
-client = OpenAI(base_url="http://localhost:8080/v1", api_key="not-needed")
-response = client.chat.completions.create(
-    model="local-model",
-    messages=[{"role": "user", "content": "Hello!"}],
-    max_tokens=256,
-)
-print(response.choices[0].message.content)
-```
-
-## Best practices
-
-1. **Use K-quants** — Q4_K_M is the recommended default
-2. **Use imatrix** for Q4 and below (calibration improves quality substantially)
-3. **Offload as many layers as VRAM allows** — start high, reduce by 5 on OOM
-4. **Thread count** — match physical cores, not logical
-5. **Batch size** — increase `n_batch` (e.g. 512) for faster prompt processing
-6. **Context** — start at 4096, grow only as needed (memory scales with ctx)
-7. **Flash Attention** — add `--flash-attn` if your build supports it
-
-## Common issues (quick fixes)
-
-**Model loads slowly** — use `--mmap` for memory-mapped loading.
-
-**Out of memory (GPU)** — reduce `-ngl`, use a smaller quant (Q4_K_S / Q3_K_M), or quantize the KV cache:
-```python
-Llama(model_path="...", type_k=2, type_v=2, n_gpu_layers=35)  # Q4_0 KV cache
-```
-
-**Garbage output** — wrong `chat_format`, temperature too high, or model file corrupted. Test with `temperature=0.1` and verify FP16 baseline works.
-
-**Connection refused (server)** — bind to `--host 0.0.0.0`, check `lsof -i :8080`.
-
-See `references/troubleshooting.md` for the full playbook.
-
 ## References
 
+- **[hub-discovery.md](references/hub-discovery.md)** - URL-only Hugging Face workflows, search patterns, GGUF extraction, and command reconstruction
 - **[advanced-usage.md](references/advanced-usage.md)** — speculative decoding, batched inference, grammar-constrained generation, LoRA, multi-GPU, custom builds, benchmark scripts
-- **[quantization.md](references/quantization.md)** — perplexity tables, use-case guide, model size scaling (7B/13B/70B RAM needs), imatrix deep dive
-- **[server.md](references/server.md)** — OpenAI API endpoints, Docker deployment, NGINX load balancing, monitoring
+- **[quantization.md](references/quantization.md)** — quant quality tradeoffs, when to use Q4/Q5/Q6/IQ, model size scaling, imatrix
+- **[server.md](references/server.md)** — direct-from-Hub server launch, OpenAI API endpoints, Docker deployment, NGINX load balancing, monitoring
 - **[optimization.md](references/optimization.md)** — CPU threading, BLAS, GPU offload heuristics, batch tuning, benchmarks
 - **[troubleshooting.md](references/troubleshooting.md)** — install/convert/quantize/inference/server issues, Apple Silicon, debugging
 
 ## Resources
 
 - **GitHub**: https://github.com/ggml-org/llama.cpp
-- **Python bindings**: https://github.com/abetlen/llama-cpp-python
-- **Pre-quantized models**: https://huggingface.co/TheBloke
-- **GGUF converter Space**: https://huggingface.co/spaces/ggml-org/gguf-my-repo
+- **Hugging Face GGUF + llama.cpp docs**: https://huggingface.co/docs/hub/gguf-llamacpp
+- **Hugging Face Local Apps docs**: https://huggingface.co/docs/hub/main/local-apps
+- **Hugging Face Local Agents docs**: https://huggingface.co/docs/hub/agents-local
+- **Example local-app page**: https://huggingface.co/unsloth/Qwen3.6-35B-A3B-GGUF?local-app=llama.cpp
+- **Example tree API**: https://huggingface.co/api/models/unsloth/Qwen3.6-35B-A3B-GGUF/tree/main?recursive=true
+- **Example llama.cpp search**: https://huggingface.co/models?num_parameters=min:0,max:24B&apps=llama.cpp&sort=trending
 - **License**: MIT
diff --git a/skills/mlops/inference/llama-cpp/references/hub-discovery.md b/skills/mlops/inference/llama-cpp/references/hub-discovery.md
new file mode 100644
index 0000000000..4573ad4601
--- /dev/null
+++ b/skills/mlops/inference/llama-cpp/references/hub-discovery.md
@@ -0,0 +1,168 @@
+# Hugging Face URL Workflows for llama.cpp
+
+Use URL-only workflows first. Do not require `hf` or API clients just to find GGUF files, choose a quant, or build a `llama-server` command.
+
+## Core URLs
+
+```text
+Search:
+https://huggingface.co/models?apps=llama.cpp&sort=trending
+
+Search with text:
+https://huggingface.co/models?search=<term>&apps=llama.cpp&sort=trending
+
+Search with size bounds:
+https://huggingface.co/models?search=<term>&apps=llama.cpp&num_parameters=min:0,max:24B&sort=trending
+
+Repo local-app view:
+https://huggingface.co/<repo>?local-app=llama.cpp
+
+Repo tree API:
+https://huggingface.co/api/models/<repo>/tree/main?recursive=true
+
+Repo file tree:
+https://huggingface.co/<repo>/tree/main
+```
+
+## 1. Search for llama.cpp-compatible models
+
+Start from the models page with `apps=llama.cpp`.
+
+Use:
+
+- `search=<term>` for model family names such as `Qwen`, `Gemma`, `Phi`, or `Mistral`
+- `num_parameters=min:0,max:24B` or similar if the user has hardware limits
+- `sort=trending` when the user wants popular repos right now
+
+Do not start with random GGUF repos if the user has not chosen a model family yet. Search first, shortlist second.
+
+Example: https://huggingface.co/models?search=Qwen&apps=llama.cpp&num_parameters=min:0,max:24B&sort=trending
+
+## 2. Use the local-app page for the recommended quant
+
+Open:
+
+```text
+https://huggingface.co/<repo>?local-app=llama.cpp
+```
+
+Extract, in order:
+
+1. The exact `Use this model` snippet, if it is visible as text
+2. The `Hardware compatibility` section from the fetched page text or HTML:
+   - quant label
+   - file size
+   - bit-depth grouping
+3. Any extra launch flags shown in the snippet, such as `--jinja`
+
+Treat the HF local-app snippet as the source of truth when it is visible.
+
+Do this by reading the URL itself, not by assuming the UI rendered in a browser. If the fetched page source does not expose `Hardware compatibility`, say that the section was not text-visible and fall back to the tree API plus generic guidance from `quantization.md`.
+
+## 3. Confirm exact files from the tree API
+
+Open:
+
+```text
+https://huggingface.co/api/models/<repo>/tree/main?recursive=true
+```
+
+Treat the JSON response as the source of truth for repo inventory.
+
+Keep entries where:
+
+- `type` is `file`
+- `path` ends with `.gguf`
+
+Use these fields:
+
+- `path` for the filename and subdirectory
+- `size` for the byte size
+- optionally `lfs.size` to confirm the LFS payload size
+
+Separate files into:
+
+- quantized single-file checkpoints, for example `Qwen3.6-35B-A3B-UD-Q4_K_M.gguf`
+- projector weights, usually `mmproj-*.gguf`
+- BF16 shard files, usually under `BF16/`
+- everything else
+
+Ignore unless the user asks:
+
+- `README.md`
+- imatrix or calibration blobs
+
+Use `https://huggingface.co/<repo>/tree/main` only as a human fallback if the API endpoint fails or the user wants the web view.
+
+## 4. Build the command
+
+Preferred order:
+
+1. Copy the exact HF snippet from the local-app page
+2. If the page gives a clean quant label, use shorthand selection:
+
+```bash
+llama-server -hf <repo>:<QUANT>
+```
+
+3. If you need an exact file from the tree API, use the file-specific form:
+
+```bash
+llama-server --hf-repo <repo> --hf-file <filename.gguf>
+```
+
+4. For CLI usage instead of a server, use:
+
+```bash
+llama-cli -hf <repo>:<QUANT>
+```
+
+Use the exact-file form when the repo uses custom labels or nonstandard naming that could make `:<QUANT>` ambiguous.
+
+## 5. Example: `unsloth/Qwen3.6-35B-A3B-GGUF`
+
+Use these URLs:
+
+```text
+https://huggingface.co/unsloth/Qwen3.6-35B-A3B-GGUF?local-app=llama.cpp
+https://huggingface.co/api/models/unsloth/Qwen3.6-35B-A3B-GGUF/tree/main?recursive=true
+https://huggingface.co/unsloth/Qwen3.6-35B-A3B-GGUF/tree/main
+```
+
+On the local-app page, the hardware compatibility section can expose entries such as:
+
+- `UD-IQ4_XS` - 17.7 GB
+- `UD-Q4_K_S` - 20.9 GB
+- `UD-Q4_K_M` - 22.1 GB
+- `UD-Q5_K_M` - 26.5 GB
+- `UD-Q6_K` - 29.3 GB
+- `Q8_0` - 36.9 GB
+
+On the tree API, you can confirm exact filenames such as:
+
+- `Qwen3.6-35B-A3B-UD-Q4_K_M.gguf`
+- `Qwen3.6-35B-A3B-UD-Q5_K_M.gguf`
+- `Qwen3.6-35B-A3B-UD-Q6_K.gguf`
+- `Qwen3.6-35B-A3B-Q8_0.gguf`
+- `mmproj-F16.gguf`
+
+Good final output for this repo:
+
+```text
+Repo: unsloth/Qwen3.6-35B-A3B-GGUF
+Recommended quant from HF: UD-Q4_K_M (22.1 GB)
+llama-server: llama-server --hf-repo unsloth/Qwen3.6-35B-A3B-GGUF --hf-file Qwen3.6-35B-A3B-UD-Q4_K_M.gguf
+Other GGUFs:
+- Qwen3.6-35B-A3B-UD-Q5_K_M.gguf - 26.5 GB
+- Qwen3.6-35B-A3B-UD-Q6_K.gguf - 29.3 GB
+- Qwen3.6-35B-A3B-Q8_0.gguf - 36.9 GB
+Projector:
+- mmproj-F16.gguf - 899 MB
+```
+
+## Notes
+
+- Repo-specific quant labels matter. Do not rewrite `UD-Q4_K_M` to `Q4_K_M` unless the page itself does.
+- `mmproj` files are projector weights for multimodal models, not the main language model checkpoint.
+- If the HF hardware compatibility panel is missing because the user has no hardware profile configured, or because the fetched page source did not expose it, still use the tree API plus generic quant guidance from `quantization.md`.
+- If the repo already has GGUFs, do not jump straight to conversion workflows.
diff --git a/skills/mlops/inference/llama-cpp/references/quantization.md b/skills/mlops/inference/llama-cpp/references/quantization.md
index 8620463a64..79478779f4 100644
--- a/skills/mlops/inference/llama-cpp/references/quantization.md
+++ b/skills/mlops/inference/llama-cpp/references/quantization.md
@@ -2,6 +2,22 @@
 
 Complete guide to GGUF quantization formats and model conversion.
 
+## Hub-first quant selection
+
+Before using generic tables, open the model repo with:
+
+```text
+https://huggingface.co/<repo>?local-app=llama.cpp
+```
+
+Prefer the exact quant labels and sizes shown in the `Hardware compatibility` section of the fetched `?local-app=llama.cpp` page text or HTML. Then confirm the matching filenames in:
+
+```text
+https://huggingface.co/api/models/<repo>/tree/main?recursive=true
+```
+
+Use the Hub page first, and only fall back to the generic heuristics below when the repo page does not expose a clear recommendation.
+
 ## Quantization Overview
 
 **GGUF** (GPT-Generated Unified Format) - Standard format for llama.cpp models.
@@ -23,11 +39,11 @@ Complete guide to GGUF quantization formats and model conversion.
 
 ## Converting Models
 
-### HuggingFace to GGUF
+### Hugging Face to GGUF
 
 ```bash
-# 1. Download HuggingFace model
-huggingface-cli download meta-llama/Llama-2-7b-chat-hf \
+# 1. Download Hugging Face model
+hf download meta-llama/Llama-2-7b-chat-hf \
     --local-dir models/llama-2-7b-chat/
 
 # 2. Convert to FP16 GGUF
@@ -152,18 +168,32 @@ Q2_K or Q3_K_S - Fit in limited RAM
 
 ## Finding Pre-Quantized Models
 
-**TheBloke** on HuggingFace:
-- https://huggingface.co/TheBloke
-- Most models available in all GGUF formats
-- No conversion needed
+Use the Hub search with the llama.cpp app filter:
+
+```text
+https://huggingface.co/models?apps=llama.cpp&sort=trending
+https://huggingface.co/models?search=<term>&apps=llama.cpp&sort=trending
+https://huggingface.co/models?search=<term>&apps=llama.cpp&num_parameters=min:0,max:24B&sort=trending
+```
+
+For a specific repo, open:
+
+```text
+https://huggingface.co/<repo>?local-app=llama.cpp
+https://huggingface.co/api/models/<repo>/tree/main?recursive=true
+```
+
+Then launch directly from the Hub without extra Hub tooling:
 
-**Example**:
 ```bash
-# Download pre-quantized Llama 2-7B
-huggingface-cli download \
-    TheBloke/Llama-2-7B-Chat-GGUF \
-    llama-2-7b-chat.Q4_K_M.gguf \
-    --local-dir models/
+llama-cli -hf <repo>:Q4_K_M
+llama-server -hf <repo>:Q4_K_M
+```
+
+If you need the exact file name from the tree API:
+
+```bash
+llama-server --hf-repo <repo> --hf-file <filename.gguf>
 ```
 
 ## Importance Matrices (imatrix)
diff --git a/skills/mlops/inference/llama-cpp/references/server.md b/skills/mlops/inference/llama-cpp/references/server.md
index 19dba47bc2..896d81b964 100644
--- a/skills/mlops/inference/llama-cpp/references/server.md
+++ b/skills/mlops/inference/llama-cpp/references/server.md
@@ -2,6 +2,31 @@
 
 Production deployment of llama.cpp server with OpenAI-compatible API.
 
+## Direct from Hugging Face Hub
+
+Prefer the model repo's local-app page first:
+
+```text
+https://huggingface.co/<repo>?local-app=llama.cpp
+```
+
+If the page shows an exact snippet, copy it. If not, use one of these forms:
+
+```bash
+# Choose a quant label directly from the Hub repo
+llama-server -hf bartowski/Llama-3.2-3B-Instruct-GGUF:Q8_0
+```
+
+```bash
+# Pin an exact GGUF file from the repo tree
+llama-server \
+    --hf-repo microsoft/Phi-3-mini-4k-instruct-gguf \
+    --hf-file Phi-3-mini-4k-instruct-q4.gguf \
+    -c 4096
+```
+
+Use the file-specific form when the repo has custom naming or when you already extracted the exact filename from the tree API.
+
 ## Server Modes
 
 ### llama-server
diff --git a/skills/mlops/models/DESCRIPTION.md b/skills/mlops/models/DESCRIPTION.md
index 8170b517f5..8f7e669562 100644
--- a/skills/mlops/models/DESCRIPTION.md
+++ b/skills/mlops/models/DESCRIPTION.md
@@ -1,3 +1,3 @@
 ---
-description: Specific model architectures and tools — computer vision (CLIP, SAM, Stable Diffusion), speech (Whisper), audio generation (AudioCraft), and multimodal models (LLaVA).
+description: Specific model architectures and tools — image segmentation (Segment Anything / SAM) and audio generation (AudioCraft / MusicGen). Additional model skills (CLIP, Stable Diffusion, Whisper, LLaVA) are available as optional skills.
 ---
diff --git a/skills/productivity/maps/SKILL.md b/skills/productivity/maps/SKILL.md
new file mode 100644
index 0000000000..d93692a4a6
--- /dev/null
+++ b/skills/productivity/maps/SKILL.md
@@ -0,0 +1,199 @@
+---
+name: maps
+description: >
+  Location intelligence — geocode a place, reverse-geocode coordinates,
+  find nearby places (46 POI categories), driving/walking/cycling
+  distance + time, turn-by-turn directions, timezone lookup, bounding
+  box + area for a named place, and POI search within a rectangle.
+  Uses OpenStreetMap + Overpass + OSRM. Free, no API key.
+version: 1.2.0
+author: Mibayy
+license: MIT
+metadata:
+  hermes:
+    tags: [maps, geocoding, places, routing, distance, directions, nearby, location, openstreetmap, nominatim, overpass, osrm]
+    category: productivity
+    requires_toolsets: [terminal]
+    supersedes: [find-nearby]
+---
+
+# Maps Skill
+
+Location intelligence using free, open data sources. 8 commands, 44 POI
+categories, zero dependencies (Python stdlib only), no API key required.
+
+Data sources: OpenStreetMap/Nominatim, Overpass API, OSRM, TimeAPI.io.
+
+This skill supersedes the old `find-nearby` skill — all of find-nearby's
+functionality is covered by the `nearby` command below, with the same
+`--near "<place>"` shortcut and multi-category support.
+
+## When to Use
+
+- User sends a Telegram location pin (latitude/longitude in the message) → `nearby`
+- User wants coordinates for a place name → `search`
+- User has coordinates and wants the address → `reverse`
+- User asks for nearby restaurants, hospitals, pharmacies, hotels, etc. → `nearby`
+- User wants driving/walking/cycling distance or travel time → `distance`
+- User wants turn-by-turn directions between two places → `directions`
+- User wants timezone information for a location → `timezone`
+- User wants to search for POIs within a geographic area → `area` + `bbox`
+
+## Prerequisites
+
+Python 3.8+ (stdlib only — no pip installs needed).
+
+Script path: `~/.hermes/skills/maps/scripts/maps_client.py`
+
+## Commands
+
+```bash
+MAPS=~/.hermes/skills/maps/scripts/maps_client.py
+```
+
+### search — Geocode a place name
+
+```bash
+python3 $MAPS search "Eiffel Tower"
+python3 $MAPS search "1600 Pennsylvania Ave, Washington DC"
+```
+
+Returns: lat, lon, display name, type, bounding box, importance score.
+
+### reverse — Coordinates to address
+
+```bash
+python3 $MAPS reverse 48.8584 2.2945
+```
+
+Returns: full address breakdown (street, city, state, country, postcode).
+
+### nearby — Find places by category
+
+```bash
+# By coordinates (from a Telegram location pin, for example)
+python3 $MAPS nearby 48.8584 2.2945 restaurant --limit 10
+python3 $MAPS nearby 40.7128 -74.0060 hospital --radius 2000
+
+# By address / city / zip / landmark — --near auto-geocodes
+python3 $MAPS nearby --near "Times Square, New York" --category cafe
+python3 $MAPS nearby --near "90210" --category pharmacy
+
+# Multiple categories merged into one query
+python3 $MAPS nearby --near "downtown austin" --category restaurant --category bar --limit 10
+```
+
+46 categories: restaurant, cafe, bar, hospital, pharmacy, hotel, guest_house,
+camp_site, supermarket, atm, gas_station, parking, museum, park, school,
+university, bank, police, fire_station, library, airport, train_station,
+bus_stop, church, mosque, synagogue, dentist, doctor, cinema, theatre, gym,
+swimming_pool, post_office, convenience_store, bakery, bookshop, laundry,
+car_wash, car_rental, bicycle_rental, taxi, veterinary, zoo, playground,
+stadium, nightclub.
+
+Each result includes: `name`, `address`, `lat`/`lon`, `distance_m`,
+`maps_url` (clickable Google Maps link), `directions_url` (Google Maps
+directions from the search point), and promoted tags when available —
+`cuisine`, `hours` (opening_hours), `phone`, `website`.
+
+### distance — Travel distance and time
+
+```bash
+python3 $MAPS distance "Paris" --to "Lyon"
+python3 $MAPS distance "New York" --to "Boston" --mode driving
+python3 $MAPS distance "Big Ben" --to "Tower Bridge" --mode walking
+```
+
+Modes: driving (default), walking, cycling. Returns road distance, duration,
+and straight-line distance for comparison.
+
+### directions — Turn-by-turn navigation
+
+```bash
+python3 $MAPS directions "Eiffel Tower" --to "Louvre Museum" --mode walking
+python3 $MAPS directions "JFK Airport" --to "Times Square" --mode driving
+```
+
+Returns numbered steps with instruction, distance, duration, road name, and
+maneuver type (turn, depart, arrive, etc.).
+
+### timezone — Timezone for coordinates
+
+```bash
+python3 $MAPS timezone 48.8584 2.2945
+python3 $MAPS timezone 35.6762 139.6503
+```
+
+Returns timezone name, UTC offset, and current local time.
+
+### area — Bounding box and area for a place
+
+```bash
+python3 $MAPS area "Manhattan, New York"
+python3 $MAPS area "London"
+```
+
+Returns bounding box coordinates, width/height in km, and approximate area.
+Useful as input for the bbox command.
+
+### bbox — Search within a bounding box
+
+```bash
+python3 $MAPS bbox 40.75 -74.00 40.77 -73.98 restaurant --limit 20
+```
+
+Finds POIs within a geographic rectangle. Use `area` first to get the
+bounding box coordinates for a named place.
+
+## Working With Telegram Location Pins
+
+When a user sends a location pin, the message contains `latitude:` and
+`longitude:` fields. Extract those and pass them straight to `nearby`:
+
+```bash
+# User sent a pin at 36.17, -115.14 and asked "find cafes nearby"
+python3 $MAPS nearby 36.17 -115.14 cafe --radius 1500
+```
+
+Present results as a numbered list with names, distances, and the
+`maps_url` field so the user gets a tap-to-open link in chat. For "open
+now?" questions, check the `hours` field; if missing or unclear, verify
+with `web_search` since OSM hours are community-maintained and not always
+current.
+
+## Workflow Examples
+
+**"Find Italian restaurants near the Colosseum":**
+1. `nearby --near "Colosseum Rome" --category restaurant --radius 500`
+   — one command, auto-geocoded
+
+**"What's near this location pin they sent?":**
+1. Extract lat/lon from the Telegram message
+2. `nearby LAT LON cafe --radius 1500`
+
+**"How do I walk from hotel to conference center?":**
+1. `directions "Hotel Name" --to "Conference Center" --mode walking`
+
+**"What restaurants are in downtown Seattle?":**
+1. `area "Downtown Seattle"` → get bounding box
+2. `bbox S W N E restaurant --limit 30`
+
+## Pitfalls
+
+- Nominatim ToS: max 1 req/s (handled automatically by the script)
+- `nearby` requires lat/lon OR `--near "<address>"` — one of the two is needed
+- OSRM routing coverage is best for Europe and North America
+- Overpass API can be slow during peak hours; the script automatically
+  falls back between mirrors (overpass-api.de → overpass.kumi.systems)
+- `distance` and `directions` use `--to` flag for the destination (not positional)
+- If a zip code alone gives ambiguous results globally, include country/state
+
+## Verification
+
+```bash
+python3 ~/.hermes/skills/maps/scripts/maps_client.py search "Statue of Liberty"
+# Should return lat ~40.689, lon ~-74.044
+
+python3 ~/.hermes/skills/maps/scripts/maps_client.py nearby --near "Times Square" --category restaurant --limit 3
+# Should return a list of restaurants within ~500m of Times Square
+```
diff --git a/skills/productivity/maps/scripts/maps_client.py b/skills/productivity/maps/scripts/maps_client.py
new file mode 100644
index 0000000000..06d775e824
--- /dev/null
+++ b/skills/productivity/maps/scripts/maps_client.py
@@ -0,0 +1,1293 @@
+#!/usr/bin/env python3
+"""
+maps_client.py - CLI tool for maps, geocoding, routing, POI search, and more.
+Uses only Python stdlib. Data from OpenStreetMap/Nominatim, Overpass API, OSRM,
+and TimeAPI.io.
+
+Commands:
+  search     - Geocode a place name to coordinates
+  reverse    - Reverse geocode coordinates to an address
+  nearby     - Find nearby POIs by category
+  distance   - Road distance and travel time between two places
+  directions - Turn-by-turn directions between two places
+  timezone   - Timezone info for coordinates
+  bbox       - Find POIs within a bounding box
+  area       - Get bounding box and area info for a named place
+"""
+
+import argparse
+import json
+import math
+import os
+import sys
+import time
+import urllib.error
+import urllib.parse
+import urllib.request
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+
+USER_AGENT = "HermesAgent/1.0 (contact: hermes@agent.ai)"
+DATA_SOURCE = "OpenStreetMap/Nominatim"
+
+NOMINATIM_SEARCH  = "https://nominatim.openstreetmap.org/search"
+NOMINATIM_REVERSE = "https://nominatim.openstreetmap.org/reverse"
+# Public Overpass endpoints. We try them in order so a single server
+# outage doesn't break the skill — kumi.systems is a well-known mirror.
+OVERPASS_URLS = [
+    "https://overpass-api.de/api/interpreter",
+    "https://overpass.kumi.systems/api/interpreter",
+]
+# Backward-compat alias for any caller that imports OVERPASS_API directly.
+OVERPASS_API      = OVERPASS_URLS[0]
+OSRM_BASE         = "https://router.project-osrm.org/route/v1"
+TIMEAPI_BASE      = "https://timeapi.io/api/timezone/coordinate"
+
+# Seconds to sleep between Nominatim requests (ToS requirement)
+NOMINATIM_RATE_LIMIT = 1.0
+
+# Maximum retries for HTTP errors
+MAX_RETRIES = 3
+RETRY_DELAY = 2.0  # seconds
+
+# Category -> (OSM tag key, OSM tag value)
+CATEGORY_TAGS = {
+    # Food & Drink
+    "restaurant":        ("amenity", "restaurant"),
+    "cafe":              ("amenity", "cafe"),
+    "bar":               ("amenity", "bar"),
+    # bakery is tagged as shop=bakery in the OSM wiki, but some mappers use
+    # amenity=bakery. Search both so small indie bakeries aren't missed.
+    "bakery":            [("shop", "bakery"), ("amenity", "bakery")],
+    "convenience_store": ("shop",    "convenience"),
+    # Health
+    "hospital":          ("amenity", "hospital"),
+    "pharmacy":          ("amenity", "pharmacy"),
+    "dentist":           ("amenity", "dentist"),
+    "doctor":            ("amenity", "doctors"),
+    "veterinary":        ("amenity", "veterinary"),
+    # Accommodation
+    "hotel":             ("tourism", "hotel"),
+    "guest_house":       ("tourism", "guest_house"),
+    "camp_site":         ("tourism", "camp_site"),
+    # Shopping & Services
+    "supermarket":       ("shop",    "supermarket"),
+    "bookshop":          ("shop",    "books"),
+    "laundry":           ("shop",    "laundry"),
+    # Finance
+    "atm":               ("amenity", "atm"),
+    "bank":              ("amenity", "bank"),
+    # Transport
+    "gas_station":       ("amenity", "fuel"),
+    "parking":           ("amenity", "parking"),
+    "airport":           ("aeroway", "aerodrome"),
+    "train_station":     ("railway", "station"),
+    "bus_stop":          ("highway", "bus_stop"),
+    "taxi":              ("amenity", "taxi"),
+    "car_wash":          ("amenity", "car_wash"),
+    "car_rental":        ("amenity", "car_rental"),
+    "bicycle_rental":    ("amenity", "bicycle_rental"),
+    # Culture & Entertainment
+    "museum":            ("tourism", "museum"),
+    "cinema":            ("amenity", "cinema"),
+    "theatre":           ("amenity", "theatre"),
+    "nightclub":         ("amenity", "nightclub"),
+    "zoo":               ("tourism", "zoo"),
+    # Education
+    "school":            ("amenity", "school"),
+    "university":        ("amenity", "university"),
+    "library":           ("amenity", "library"),
+    # Public Services
+    "police":            ("amenity", "police"),
+    "fire_station":      ("amenity", "fire_station"),
+    "post_office":       ("amenity", "post_office"),
+    # Religion
+    "church":            ("amenity", "place_of_worship"),  # refined by religion tag
+    "mosque":            ("amenity", "place_of_worship"),
+    "synagogue":         ("amenity", "place_of_worship"),
+    # Recreation
+    "park":              ("leisure", "park"),
+    "gym":               ("leisure", "fitness_centre"),
+    "swimming_pool":     ("leisure", "swimming_pool"),
+    "playground":        ("leisure", "playground"),
+    "stadium":           ("leisure", "stadium"),
+}
+
+# Religion-specific overrides for place_of_worship categories
+RELIGION_FILTER = {
+    "church":    "christian",
+    "mosque":    "muslim",
+    "synagogue": "jewish",
+}
+
+VALID_CATEGORIES = sorted(CATEGORY_TAGS.keys())
+
+
+def _tags_for(category):
+    """Return the CATEGORY_TAGS entry as a list of (key, value) pairs.
+
+    Most categories map to a single (tag_key, tag_val) tuple, but some
+    (e.g. ``bakery``) are tagged under more than one OSM key and are
+    represented as a list of tuples. Normalise both forms to a list.
+    """
+    entry = CATEGORY_TAGS[category]
+    if isinstance(entry, list):
+        return list(entry)
+    return [entry]
+
+OSRM_PROFILES = {
+    "driving": "driving",
+    "walking": "foot",
+    "cycling": "bike",
+}
+
+# ---------------------------------------------------------------------------
+# Output helpers
+# ---------------------------------------------------------------------------
+
+def print_json(data):
+    """Print data as pretty-printed JSON to stdout."""
+    print(json.dumps(data, indent=2, ensure_ascii=False))
+
+
+def error_exit(message, code=1):
+    """Print an error result as JSON and exit."""
+    print_json({"error": message, "status": "error"})
+    sys.exit(code)
+
+
+# ---------------------------------------------------------------------------
+# HTTP helpers
+# ---------------------------------------------------------------------------
+
+def http_get(url, params=None, retries=MAX_RETRIES, silent=False):
+    """
+    Perform an HTTP GET request, returning parsed JSON.
+    Adds the required User-Agent header. Retries on transient errors.
+    If silent=True, raises RuntimeError instead of calling error_exit.
+    """
+    if params:
+        url = url + "?" + urllib.parse.urlencode(params)
+
+    req = urllib.request.Request(url, headers={"User-Agent": USER_AGENT})
+
+    last_error = None
+    for attempt in range(1, retries + 1):
+        try:
+            with urllib.request.urlopen(req, timeout=15) as resp:
+                raw = resp.read().decode("utf-8")
+                return json.loads(raw)
+        except urllib.error.HTTPError as exc:
+            last_error = f"HTTP {exc.code}: {exc.reason} for {url}"
+            if exc.code in (429, 503, 502, 504):
+                time.sleep(RETRY_DELAY * attempt)
+            else:
+                if silent:
+                    raise RuntimeError(last_error)
+                error_exit(last_error)
+        except urllib.error.URLError as exc:
+            last_error = f"URL error: {exc.reason}"
+            time.sleep(RETRY_DELAY * attempt)
+        except json.JSONDecodeError as exc:
+            last_error = f"JSON parse error: {exc}"
+            time.sleep(RETRY_DELAY * attempt)
+
+    msg = f"Request failed after {retries} attempts. Last error: {last_error}"
+    if silent:
+        raise RuntimeError(msg)
+    error_exit(msg)
+
+
+def http_get_text(url, params=None, retries=MAX_RETRIES, silent=False):
+    """
+    Like http_get but returns raw text instead of parsed JSON.
+    Useful for APIs that may return non-JSON responses.
+    """
+    if params:
+        url = url + "?" + urllib.parse.urlencode(params)
+
+    req = urllib.request.Request(url, headers={"User-Agent": USER_AGENT})
+
+    last_error = None
+    for attempt in range(1, retries + 1):
+        try:
+            with urllib.request.urlopen(req, timeout=15) as resp:
+                return resp.read().decode("utf-8")
+        except urllib.error.HTTPError as exc:
+            last_error = f"HTTP {exc.code}: {exc.reason} for {url}"
+            if exc.code in (429, 503, 502, 504):
+                time.sleep(RETRY_DELAY * attempt)
+            else:
+                if silent:
+                    raise RuntimeError(last_error)
+                error_exit(last_error)
+        except urllib.error.URLError as exc:
+            last_error = f"URL error: {exc.reason}"
+            time.sleep(RETRY_DELAY * attempt)
+
+    msg = f"Request failed after {retries} attempts. Last error: {last_error}"
+    if silent:
+        raise RuntimeError(msg)
+    error_exit(msg)
+
+
+def http_post(url, data_str, retries=MAX_RETRIES):
+    """
+    Perform an HTTP POST with a plain-text body (for Overpass QL).
+    Returns parsed JSON.
+    """
+    encoded = data_str.encode("utf-8")
+    req = urllib.request.Request(
+        url,
+        data=encoded,
+        headers={
+            "User-Agent": USER_AGENT,
+            "Content-Type": "application/x-www-form-urlencoded",
+        },
+    )
+
+    last_error = None
+    for attempt in range(1, retries + 1):
+        try:
+            with urllib.request.urlopen(req, timeout=30) as resp:
+                raw = resp.read().decode("utf-8")
+                return json.loads(raw)
+        except urllib.error.HTTPError as exc:
+            last_error = f"HTTP {exc.code}: {exc.reason}"
+            if exc.code in (429, 503, 502, 504):
+                time.sleep(RETRY_DELAY * attempt)
+            else:
+                error_exit(last_error)
+        except urllib.error.URLError as exc:
+            last_error = f"URL error: {exc.reason}"
+            time.sleep(RETRY_DELAY * attempt)
+        except json.JSONDecodeError as exc:
+            last_error = f"JSON parse error: {exc}"
+            time.sleep(RETRY_DELAY * attempt)
+
+    error_exit(f"POST failed after {retries} attempts. Last error: {last_error}")
+
+
+def overpass_query(query):
+    """POST an Overpass QL query, trying each URL in OVERPASS_URLS in turn.
+
+    A single public Overpass mirror can be rate-limited or down; trying the
+    next mirror before giving up turns a flaky outage into a retry. Returns
+    parsed JSON. Falls through to error_exit if every mirror fails.
+    """
+    post_data = "data=" + urllib.parse.quote(query)
+    last_error = None
+    for url in OVERPASS_URLS:
+        try:
+            return http_post(url, post_data, retries=1)
+        except SystemExit:
+            # error_exit inside http_post — keep trying the next mirror.
+            last_error = f"mirror {url} exhausted retries"
+            continue
+        except Exception as exc:
+            last_error = f"{url}: {exc}"
+            continue
+    error_exit(
+        f"All Overpass mirrors failed. Last error: {last_error or 'unknown'}"
+    )
+
+
+# ---------------------------------------------------------------------------
+# Geo math
+# ---------------------------------------------------------------------------
+
+def haversine_m(lat1, lon1, lat2, lon2):
+    """Return distance in metres between two lat/lon points (Haversine)."""
+    R = 6_371_000  # Earth mean radius in metres
+    phi1 = math.radians(lat1)
+    phi2 = math.radians(lat2)
+    dphi = math.radians(lat2 - lat1)
+    dlam = math.radians(lon2 - lon1)
+    a = (math.sin(dphi / 2) ** 2
+         + math.cos(phi1) * math.cos(phi2) * math.sin(dlam / 2) ** 2)
+    return 2 * R * math.atan2(math.sqrt(a), math.sqrt(1 - a))
+
+
+# ---------------------------------------------------------------------------
+# Nominatim helpers
+# ---------------------------------------------------------------------------
+
+def nominatim_search(query, limit=5):
+    """Geocode a free-text query. Returns list of result dicts."""
+    params = {
+        "q":              query,
+        "format":         "json",
+        "limit":          limit,
+        "addressdetails": 1,
+    }
+    time.sleep(NOMINATIM_RATE_LIMIT)
+    return http_get(NOMINATIM_SEARCH, params=params)
+
+
+def nominatim_reverse(lat, lon):
+    """Reverse geocode lat/lon. Returns a single result dict."""
+    params = {
+        "lat":            lat,
+        "lon":            lon,
+        "format":         "json",
+        "addressdetails": 1,
+    }
+    time.sleep(NOMINATIM_RATE_LIMIT)
+    return http_get(NOMINATIM_REVERSE, params=params)
+
+
+def geocode_single(query):
+    """
+    Geocode a query and return (lat, lon, display_name).
+    Exits with error if nothing found.
+    """
+    results = nominatim_search(query, limit=1)
+    if not results:
+        error_exit(f"Could not geocode: {query}")
+    r = results[0]
+    return float(r["lat"]), float(r["lon"]), r.get("display_name", query)
+
+
+# ---------------------------------------------------------------------------
+# Overpass helpers
+# ---------------------------------------------------------------------------
+
+def build_overpass_nearby(tag_key, tag_val, lat, lon, radius, limit,
+                          religion=None, tag_pairs=None):
+    """Build an Overpass QL query for nearby POIs around a point.
+
+    If ``tag_pairs`` is provided, the query unions across every
+    ``(key, value)`` pair (used for categories like ``bakery`` that are
+    tagged under more than one OSM key). Otherwise falls back to the
+    single ``tag_key``/``tag_val`` pair for back-compat.
+    """
+    pairs = tag_pairs if tag_pairs else [(tag_key, tag_val)]
+    religion_filter = ""
+    if religion:
+        religion_filter = f'["religion"="{religion}"]'
+    body_lines = []
+    for k, v in pairs:
+        body_lines.append(
+            f'  node["{k}"="{v}"]{religion_filter}'
+            f'(around:{radius},{lat},{lon});'
+        )
+        body_lines.append(
+            f'  way["{k}"="{v}"]{religion_filter}'
+            f'(around:{radius},{lat},{lon});'
+        )
+    body = "\n".join(body_lines)
+    return (
+        f'[out:json][timeout:25];\n'
+        f'(\n'
+        f'{body}\n'
+        f');\n'
+        f'out center {limit};\n'
+    )
+
+
+def build_overpass_bbox(tag_key, tag_val, south, west, north, east, limit,
+                        religion=None, tag_pairs=None):
+    """Build an Overpass QL query for POIs within a bounding box.
+
+    See ``build_overpass_nearby`` for ``tag_pairs`` semantics.
+    """
+    pairs = tag_pairs if tag_pairs else [(tag_key, tag_val)]
+    religion_filter = ""
+    if religion:
+        religion_filter = f'["religion"="{religion}"]'
+    body_lines = []
+    for k, v in pairs:
+        body_lines.append(
+            f'  node["{k}"="{v}"]{religion_filter}'
+            f'({south},{west},{north},{east});'
+        )
+        body_lines.append(
+            f'  way["{k}"="{v}"]{religion_filter}'
+            f'({south},{west},{north},{east});'
+        )
+    body = "\n".join(body_lines)
+    return (
+        f'[out:json][timeout:25];\n'
+        f'(\n'
+        f'{body}\n'
+        f');\n'
+        f'out center {limit};\n'
+    )
+
+
+def parse_overpass_elements(elements, ref_lat=None, ref_lon=None):
+    """
+    Parse Overpass elements into a clean list of POI dicts.
+    If ref_lat/ref_lon are provided, computes distance and sorts by it.
+    """
+    places = []
+    for el in elements:
+        # Ways have a "center" sub-dict; nodes have lat/lon directly
+        if el["type"] == "way":
+            center = el.get("center", {})
+            el_lat = center.get("lat")
+            el_lon = center.get("lon")
+        else:
+            el_lat = el.get("lat")
+            el_lon = el.get("lon")
+
+        if el_lat is None or el_lon is None:
+            continue
+
+        tags = el.get("tags", {})
+        name = tags.get("name") or tags.get("name:en") or ""
+
+        # Build a short address from available tags
+        addr_parts = []
+        for part_key in ("addr:housenumber", "addr:street", "addr:city"):
+            val = tags.get(part_key)
+            if val:
+                addr_parts.append(val)
+        address_str = ", ".join(addr_parts) if addr_parts else ""
+
+        place = {
+            "name":     name,
+            "address":  address_str,
+            "lat":      el_lat,
+            "lon":      el_lon,
+            "osm_type": el.get("type", ""),
+            "osm_id":   el.get("id", ""),
+            # Clickable Google Maps link so the agent can render a tap-to-open
+            # URL in chat without composing one downstream.
+            "maps_url": f"https://www.google.com/maps/search/?api=1&query={el_lat},{el_lon}",
+            "tags": {
+                k: v for k, v in tags.items()
+                if k not in ("name", "name:en",
+                             "addr:housenumber", "addr:street", "addr:city")
+            },
+        }
+
+        # Promote commonly-useful tags to top-level fields so agents can
+        # reference them without digging into the raw ``tags`` dict.
+        for src_key, dst_key in (
+            ("cuisine",        "cuisine"),
+            ("opening_hours",  "hours"),
+            ("phone",          "phone"),
+            ("website",        "website"),
+        ):
+            val = tags.get(src_key)
+            if val:
+                place[dst_key] = val
+
+        if ref_lat is not None and ref_lon is not None:
+            dist_m = haversine_m(ref_lat, ref_lon, el_lat, el_lon)
+            place["distance_m"] = round(dist_m, 1)
+            # With a reference point we can also hand back a directions URL.
+            place["directions_url"] = (
+                f"https://www.google.com/maps/dir/?api=1"
+                f"&origin={ref_lat},{ref_lon}"
+                f"&destination={el_lat},{el_lon}"
+            )
+
+        places.append(place)
+
+    # Sort by distance if available
+    if places and "distance_m" in places[0]:
+        places.sort(key=lambda p: p["distance_m"])
+
+    return places
+
+
+# ---------------------------------------------------------------------------
+# Command: search
+# ---------------------------------------------------------------------------
+
+def cmd_search(args):
+    """Geocode a place name and return top results."""
+    query = " ".join(args.query)
+    raw   = nominatim_search(query, limit=5)
+
+    if not raw:
+        print_json({
+            "query":       query,
+            "results":     [],
+            "count":       0,
+            "data_source": DATA_SOURCE,
+        })
+        return
+
+    results = []
+    for item in raw:
+        bb = item.get("boundingbox", [])
+        results.append({
+            "name":         item.get("name") or item.get("display_name", ""),
+            "display_name": item.get("display_name", ""),
+            "lat":          float(item["lat"]),
+            "lon":          float(item["lon"]),
+            "type":         item.get("type", ""),
+            "category":     item.get("category", ""),
+            "osm_type":     item.get("osm_type", ""),
+            "osm_id":       item.get("osm_id", ""),
+            "bounding_box": {
+                "min_lat": float(bb[0]) if len(bb) > 0 else None,
+                "max_lat": float(bb[1]) if len(bb) > 1 else None,
+                "min_lon": float(bb[2]) if len(bb) > 2 else None,
+                "max_lon": float(bb[3]) if len(bb) > 3 else None,
+            },
+            "importance":   item.get("importance"),
+        })
+
+    print_json({
+        "query":       query,
+        "results":     results,
+        "count":       len(results),
+        "data_source": DATA_SOURCE,
+    })
+
+
+# ---------------------------------------------------------------------------
+# Command: reverse
+# ---------------------------------------------------------------------------
+
+def cmd_reverse(args):
+    """Reverse geocode coordinates to a human-readable address."""
+    try:
+        lat = float(args.lat)
+        lon = float(args.lon)
+    except ValueError:
+        error_exit("LAT and LON must be numeric values.")
+
+    if not (-90 <= lat <= 90):
+        error_exit("Latitude must be between -90 and 90.")
+    if not (-180 <= lon <= 180):
+        error_exit("Longitude must be between -180 and 180.")
+
+    data = nominatim_reverse(lat, lon)
+
+    if "error" in data:
+        error_exit(f"Reverse geocode failed: {data['error']}")
+
+    address = data.get("address", {})
+
+    print_json({
+        "lat":          lat,
+        "lon":          lon,
+        "display_name": data.get("display_name", ""),
+        "address": {
+            "house_number":  address.get("house_number", ""),
+            "road":          address.get("road", ""),
+            "neighbourhood": address.get("neighbourhood", ""),
+            "suburb":        address.get("suburb", ""),
+            "city":          (address.get("city")
+                              or address.get("town")
+                              or address.get("village", "")),
+            "county":        address.get("county", ""),
+            "state":         address.get("state", ""),
+            "postcode":      address.get("postcode", ""),
+            "country":       address.get("country", ""),
+            "country_code":  address.get("country_code", ""),
+        },
+        "osm_type":    data.get("osm_type", ""),
+        "osm_id":      data.get("osm_id", ""),
+        "data_source": DATA_SOURCE,
+    })
+
+
+# ---------------------------------------------------------------------------
+# Command: nearby
+# ---------------------------------------------------------------------------
+
+def cmd_nearby(args):
+    """Find nearby POIs using the Overpass API.
+
+    Accepts either explicit coordinates (``lat``/``lon``) or a free-form
+    address via ``--near`` (auto-geocoded through Nominatim). Supports
+    multiple categories in one call — results are merged, deduplicated
+    by ``osm_type+osm_id``, sorted by distance.
+    """
+    # Resolve the center point. --near takes precedence if provided so the
+    # agent can ask "cafes near Times Square" in one command without having
+    # to geocode first.
+    if getattr(args, "near", None):
+        near_query = " ".join(args.near).strip() if isinstance(args.near, list) else str(args.near).strip()
+        if not near_query:
+            error_exit("--near must be a non-empty address or place name.")
+        lat, lon, _ = geocode_single(near_query)
+    else:
+        try:
+            lat = float(args.lat)
+            lon = float(args.lon)
+        except (TypeError, ValueError):
+            error_exit("Provide numeric LAT and LON, or use --near \"<address>\".")
+
+    # Categories: support both legacy single positional ``category`` and the
+    # new repeatable ``--category`` flag. Users can ask for multiple place
+    # types in one query.
+    categories = []
+    if getattr(args, "category_list", None):
+        categories.extend(args.category_list)
+    if getattr(args, "category", None):
+        categories.append(args.category)
+    # Deduplicate, preserve order, lower-case.
+    categories = list(dict.fromkeys(c.lower() for c in categories if c))
+    if not categories:
+        error_exit("Provide at least one category (positional or --category).")
+    unknown = [c for c in categories if c not in CATEGORY_TAGS]
+    if unknown:
+        error_exit(
+            f"Unknown categor{'ies' if len(unknown) > 1 else 'y'} "
+            f"{', '.join(repr(c) for c in unknown)}. "
+            f"Valid categories: {', '.join(VALID_CATEGORIES)}"
+        )
+
+    radius = int(args.radius)
+    limit  = int(args.limit)
+    if radius <= 0:
+        error_exit("Radius must be a positive integer (metres).")
+    if limit <= 0:
+        error_exit("Limit must be a positive integer.")
+
+    # Query each category against the Overpass fallback chain, merge results,
+    # dedupe by OSM identity so POIs tagged under multiple categories don't
+    # appear twice.
+    merged = {}
+    for category in categories:
+        tag_pairs = _tags_for(category)
+        religion = RELIGION_FILTER.get(category)
+        query = build_overpass_nearby(None, None, lat, lon, radius, limit,
+                                      religion=religion, tag_pairs=tag_pairs)
+        raw = overpass_query(query)
+        elements = raw.get("elements", [])
+        for place in parse_overpass_elements(elements, ref_lat=lat, ref_lon=lon):
+            place["category"] = category
+            key = (place.get("osm_type", ""), place.get("osm_id", ""))
+            # Prefer the entry that actually has a distance_m attached (first
+            # pass through the ref_lat/ref_lon branch), then first-seen wins.
+            if key not in merged:
+                merged[key] = place
+
+    # Sort merged by distance when we have ref lat/lon, then cap at ``limit``.
+    places = sorted(
+        merged.values(),
+        key=lambda p: p.get("distance_m", float("inf")),
+    )[:limit]
+
+    print_json({
+        "center_lat":  lat,
+        "center_lon":  lon,
+        "categories":  categories,
+        "radius_m":    radius,
+        "count":       len(places),
+        "results":     places,
+        "data_source": DATA_SOURCE,
+    })
+
+
+# ---------------------------------------------------------------------------
+# Command: distance
+# ---------------------------------------------------------------------------
+
+def cmd_distance(args):
+    """Calculate road distance and travel time between two places."""
+    origin_query      = " ".join(args.origin)
+    destination_query = " ".join(args.to)
+    mode              = args.mode.lower()
+
+    if mode not in OSRM_PROFILES:
+        error_exit(f"Invalid mode '{mode}'. Choose from: {', '.join(OSRM_PROFILES)}")
+
+    # Geocode origin and destination
+    o_lat, o_lon, o_name = geocode_single(origin_query)
+    d_lat, d_lon, d_name = geocode_single(destination_query)
+
+    profile = OSRM_PROFILES[mode]
+    url = (
+        f"{OSRM_BASE}/{profile}/"
+        f"{o_lon},{o_lat};{d_lon},{d_lat}"
+        f"?overview=false&steps=false"
+    )
+
+    osrm_data = http_get(url)
+
+    if osrm_data.get("code") != "Ok":
+        error_exit(
+            f"OSRM routing failed: "
+            f"{osrm_data.get('message', osrm_data.get('code', 'unknown error'))}"
+        )
+
+    routes = osrm_data.get("routes", [])
+    if not routes:
+        error_exit("No route found between the two locations.")
+
+    route        = routes[0]
+    distance_m   = route.get("distance", 0)
+    duration_s   = route.get("duration", 0)
+    distance_km  = round(distance_m / 1000, 3)
+    duration_min = round(duration_s / 60, 2)
+
+    # Straight-line distance for reference
+    straight_m = haversine_m(o_lat, o_lon, d_lat, d_lon)
+
+    print_json({
+        "origin": {
+            "query":        origin_query,
+            "display_name": o_name,
+            "lat":          o_lat,
+            "lon":          o_lon,
+        },
+        "destination": {
+            "query":        destination_query,
+            "display_name": d_name,
+            "lat":          d_lat,
+            "lon":          d_lon,
+        },
+        "mode":             mode,
+        "distance_km":      distance_km,
+        "distance_m":       round(distance_m, 1),
+        "duration_minutes": duration_min,
+        "duration_seconds": round(duration_s, 1),
+        "straight_line_km": round(straight_m / 1000, 3),
+        "data_source":      DATA_SOURCE,
+    })
+
+
+# ---------------------------------------------------------------------------
+# Command: directions
+# ---------------------------------------------------------------------------
+
+def _format_duration(seconds):
+    """Format seconds into a human-readable string."""
+    if seconds < 60:
+        return f"{round(seconds)}s"
+    minutes = seconds / 60
+    if minutes < 60:
+        return f"{round(minutes, 1)} min"
+    hours = int(minutes // 60)
+    remaining = round(minutes % 60)
+    return f"{hours}h {remaining}min"
+
+
+def _format_distance(metres):
+    """Format metres into a human-readable string."""
+    if metres < 1000:
+        return f"{round(metres)} m"
+    return f"{round(metres / 1000, 2)} km"
+
+
+def cmd_directions(args):
+    """Get turn-by-turn directions between two places via OSRM."""
+    origin_query      = " ".join(args.origin)
+    destination_query = " ".join(args.to)
+    mode              = args.mode.lower()
+
+    if mode not in OSRM_PROFILES:
+        error_exit(f"Invalid mode '{mode}'. Choose from: {', '.join(OSRM_PROFILES)}")
+
+    # Geocode origin and destination
+    o_lat, o_lon, o_name = geocode_single(origin_query)
+    d_lat, d_lon, d_name = geocode_single(destination_query)
+
+    profile = OSRM_PROFILES[mode]
+    url = (
+        f"{OSRM_BASE}/{profile}/"
+        f"{o_lon},{o_lat};{d_lon},{d_lat}"
+        f"?overview=false&steps=true"
+    )
+
+    osrm_data = http_get(url)
+
+    if osrm_data.get("code") != "Ok":
+        error_exit(
+            f"OSRM routing failed: "
+            f"{osrm_data.get('message', osrm_data.get('code', 'unknown error'))}"
+        )
+
+    routes = osrm_data.get("routes", [])
+    if not routes:
+        error_exit("No route found between the two locations.")
+
+    route        = routes[0]
+    distance_m   = route.get("distance", 0)
+    duration_s   = route.get("duration", 0)
+
+    # Extract steps from all legs
+    steps = []
+    step_num = 0
+    for leg in route.get("legs", []):
+        for step in leg.get("steps", []):
+            maneuver = step.get("maneuver", {})
+            step_dist = step.get("distance", 0)
+            step_dur  = step.get("duration", 0)
+            step_name = step.get("name", "")
+            modifier  = maneuver.get("modifier", "")
+            m_type    = maneuver.get("type", "")
+
+            # Build instruction text
+            if m_type == "depart":
+                instruction = f"Depart on {step_name}" if step_name else "Depart"
+            elif m_type == "arrive":
+                instruction = "Arrive at destination"
+            elif m_type == "turn":
+                instruction = f"Turn {modifier} onto {step_name}" if step_name else f"Turn {modifier}"
+            elif m_type == "new name":
+                instruction = f"Continue onto {step_name}" if step_name else "Continue"
+            elif m_type == "merge":
+                instruction = f"Merge {modifier} onto {step_name}" if step_name else f"Merge {modifier}"
+            elif m_type == "fork":
+                instruction = f"Take the {modifier} fork onto {step_name}" if step_name else f"Take the {modifier} fork"
+            elif m_type == "roundabout":
+                instruction = f"Enter roundabout, exit onto {step_name}" if step_name else "Enter roundabout"
+            elif m_type == "rotary":
+                instruction = f"Enter rotary, exit onto {step_name}" if step_name else "Enter rotary"
+            elif m_type == "end of road":
+                instruction = f"At end of road, turn {modifier} onto {step_name}" if step_name else f"At end of road, turn {modifier}"
+            elif m_type == "continue":
+                instruction = f"Continue {modifier} on {step_name}" if step_name else f"Continue {modifier}"
+            elif m_type == "on ramp":
+                instruction = f"Take ramp onto {step_name}" if step_name else "Take ramp"
+            elif m_type == "off ramp":
+                instruction = f"Take exit onto {step_name}" if step_name else "Take exit"
+            else:
+                instruction = f"{m_type} {modifier} {step_name}".strip()
+
+            step_num += 1
+            steps.append({
+                "step":        step_num,
+                "instruction": instruction,
+                "distance":    _format_distance(step_dist),
+                "distance_m":  round(step_dist, 1),
+                "duration":    _format_duration(step_dur),
+                "duration_s":  round(step_dur, 1),
+                "road_name":   step_name,
+                "maneuver":    m_type,
+            })
+
+    print_json({
+        "origin": {
+            "query":        origin_query,
+            "display_name": o_name,
+            "lat":          o_lat,
+            "lon":          o_lon,
+        },
+        "destination": {
+            "query":        destination_query,
+            "display_name": d_name,
+            "lat":          d_lat,
+            "lon":          d_lon,
+        },
+        "mode":               mode,
+        "total_distance":     _format_distance(distance_m),
+        "total_distance_m":   round(distance_m, 1),
+        "total_duration":     _format_duration(duration_s),
+        "total_duration_s":   round(duration_s, 1),
+        "steps":              steps,
+        "step_count":         len(steps),
+        "data_source":        DATA_SOURCE,
+    })
+
+
+# ---------------------------------------------------------------------------
+# Command: timezone
+# ---------------------------------------------------------------------------
+
+def cmd_timezone(args):
+    """
+    Get timezone information for a lat/lon coordinate.
+
+    Strategy:
+      1. Try TimeAPI.io (free, no key, supports coordinate-based lookup).
+      2. Fallback: derive UTC offset approximation from longitude.
+    """
+    try:
+        lat = float(args.lat)
+        lon = float(args.lon)
+    except ValueError:
+        error_exit("LAT and LON must be numeric values.")
+
+    if not (-90 <= lat <= 90):
+        error_exit("Latitude must be between -90 and 90.")
+    if not (-180 <= lon <= 180):
+        error_exit("Longitude must be between -180 and 180.")
+
+    timezone_str = None
+    timezone_src = None
+    current_time = None
+    utc_offset   = None
+
+    # --- Strategy 1: TimeAPI.io coordinate lookup ---
+    try:
+        params = {"latitude": lat, "longitude": lon}
+        tz_data = http_get(TIMEAPI_BASE, params=params, silent=True)
+        if isinstance(tz_data, dict):
+            timezone_str = tz_data.get("timeZone")
+            current_time = tz_data.get("currentLocalTime")
+            # Build utc_offset from currentUtcOffset if available
+            offset_info = tz_data.get("currentUtcOffset", {})
+            if isinstance(offset_info, dict):
+                oh = offset_info.get("hours", 0)
+                om = abs(offset_info.get("minutes", 0))
+                os_ = offset_info.get("seconds", 0)
+                sign = "+" if oh >= 0 else "-"
+                utc_offset = f"{sign}{abs(oh):02d}:{om:02d}"
+            elif tz_data.get("standardUtcOffset"):
+                offset_info2 = tz_data["standardUtcOffset"]
+                if isinstance(offset_info2, dict):
+                    oh = offset_info2.get("hours", 0)
+                    om = abs(offset_info2.get("minutes", 0))
+                    sign = "+" if oh >= 0 else "-"
+                    utc_offset = f"{sign}{abs(oh):02d}:{om:02d}"
+            timezone_src = "timeapi.io"
+    except (RuntimeError, KeyError, TypeError):
+        pass  # API may be down; continue to fallback
+
+    # --- Strategy 2: longitude-based UTC offset approximation ---
+    if not timezone_str:
+        approx_offset_h = round(lon / 15)
+        if approx_offset_h >= 0:
+            utc_offset = f"+{approx_offset_h:02d}:00"
+        else:
+            utc_offset = f"-{abs(approx_offset_h):02d}:00"
+        timezone_str = f"UTC{utc_offset}"
+        timezone_src = "longitude approximation (longitude/15)"
+
+    print_json({
+        "lat":          lat,
+        "lon":          lon,
+        "timezone":     timezone_str,
+        "utc_offset":   utc_offset,
+        "current_time": current_time,
+        "source":       timezone_src,
+        "data_source":  DATA_SOURCE,
+    })
+
+
+# ---------------------------------------------------------------------------
+# Command: bbox
+# ---------------------------------------------------------------------------
+
+def cmd_bbox(args):
+    """Find POIs within a bounding box using the Overpass API."""
+    try:
+        lat1 = float(args.lat1)
+        lon1 = float(args.lon1)
+        lat2 = float(args.lat2)
+        lon2 = float(args.lon2)
+    except ValueError:
+        error_exit("All coordinate arguments must be numeric values.")
+
+    # Normalize: south/west < north/east
+    south = min(lat1, lat2)
+    north = max(lat1, lat2)
+    west  = min(lon1, lon2)
+    east  = max(lon1, lon2)
+
+    category = args.category.lower()
+    if category not in CATEGORY_TAGS:
+        error_exit(
+            f"Unknown category '{category}'. "
+            f"Valid categories: {', '.join(VALID_CATEGORIES)}"
+        )
+
+    limit = int(args.limit)
+    if limit <= 0:
+        error_exit("Limit must be a positive integer.")
+
+    tag_pairs = _tags_for(category)
+    religion = RELIGION_FILTER.get(category)
+    query = build_overpass_bbox(None, None, south, west, north, east,
+                                limit, religion=religion, tag_pairs=tag_pairs)
+
+    raw = overpass_query(query)
+
+    elements = raw.get("elements", [])
+
+    # Use center of bbox as reference for distance sorting
+    center_lat = (south + north) / 2
+    center_lon = (west + east) / 2
+    places = parse_overpass_elements(elements, ref_lat=center_lat,
+                                     ref_lon=center_lon)
+
+    for p in places:
+        p["category"] = category
+
+    print_json({
+        "bounding_box": {
+            "south": south,
+            "west":  west,
+            "north": north,
+            "east":  east,
+        },
+        "category":    category,
+        "count":       len(places),
+        "results":     places,
+        "data_source": DATA_SOURCE,
+    })
+
+
+# ---------------------------------------------------------------------------
+# Command: area
+# ---------------------------------------------------------------------------
+
+def cmd_area(args):
+    """Get bounding box and area info for a named place."""
+    query = " ".join(args.place)
+    raw = nominatim_search(query, limit=1)
+
+    if not raw:
+        error_exit(f"Could not find place: {query}")
+
+    item = raw[0]
+    bb = item.get("boundingbox", [])
+
+    if len(bb) < 4:
+        error_exit(f"No bounding box data available for: {query}")
+
+    min_lat = float(bb[0])
+    max_lat = float(bb[1])
+    min_lon = float(bb[2])
+    max_lon = float(bb[3])
+
+    # Approximate area in km² using the bounding box
+    # Width in km at the average latitude
+    avg_lat = (min_lat + max_lat) / 2
+    height_km = haversine_m(min_lat, min_lon, max_lat, min_lon) / 1000
+    width_km  = haversine_m(avg_lat, min_lon, avg_lat, max_lon) / 1000
+    approx_area_km2 = round(height_km * width_km, 3)
+
+    print_json({
+        "query":        query,
+        "display_name": item.get("display_name", ""),
+        "lat":          float(item["lat"]),
+        "lon":          float(item["lon"]),
+        "type":         item.get("type", ""),
+        "category":     item.get("category", ""),
+        "bounding_box": {
+            "south": min_lat,
+            "north": max_lat,
+            "west":  min_lon,
+            "east":  max_lon,
+        },
+        "dimensions": {
+            "width_km":  round(width_km, 3),
+            "height_km": round(height_km, 3),
+        },
+        "approx_area_km2": approx_area_km2,
+        "osm_type":        item.get("osm_type", ""),
+        "osm_id":          item.get("osm_id", ""),
+        "data_source":     DATA_SOURCE,
+    })
+
+
+# ---------------------------------------------------------------------------
+# CLI setup
+# ---------------------------------------------------------------------------
+
+def build_parser():
+    parser = argparse.ArgumentParser(
+        prog="maps_client.py",
+        description=(
+            "CLI maps tool: geocoding, reverse geocoding, POI search, "
+            "routing, directions, timezone, and area lookup. "
+            "Powered by OpenStreetMap, OSRM, Overpass, and TimeAPI.io. "
+            "No API keys required."
+        ),
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=(
+            "Examples:\n"
+            "  maps_client.py search Times Square\n"
+            "  maps_client.py reverse 40.758 -73.985\n"
+            "  maps_client.py nearby 40.758 -73.985 restaurant --radius 800\n"
+            "  maps_client.py distance New York --to Los Angeles --mode driving\n"
+            "  maps_client.py directions Paris --to Berlin --mode driving\n"
+            "  maps_client.py timezone 48.8566 2.3522\n"
+            "  maps_client.py bbox 40.70 -74.02 40.78 -73.95 restaurant\n"
+            "  maps_client.py area Manhattan"
+        ),
+    )
+    sub = parser.add_subparsers(dest="command", required=True,
+                                 metavar="COMMAND")
+
+    # -- search --
+    p_search = sub.add_parser(
+        "search",
+        help="Geocode a place name to coordinates.",
+        description="Search for a place by name and return coordinates and details.",
+    )
+    p_search.add_argument(
+        "query", nargs="+",
+        help="Place name or address to search.",
+    )
+
+    # -- reverse --
+    p_reverse = sub.add_parser(
+        "reverse",
+        help="Reverse geocode coordinates to an address.",
+        description="Convert latitude/longitude coordinates to a human-readable address.",
+    )
+    p_reverse.add_argument("lat", help="Latitude (decimal degrees).")
+    p_reverse.add_argument("lon", help="Longitude (decimal degrees).")
+
+    # -- nearby --
+    p_nearby = sub.add_parser(
+        "nearby",
+        help="Find nearby places of a given category.",
+        description=(
+            "Find points of interest near a location using the Overpass API.\n"
+            "Provide either LAT/LON, or use --near \"<address>\" to auto-geocode.\n"
+            "Categories can be specified positionally OR repeated via --category\n"
+            "to merge multiple types in one query (e.g. --category bar --category cafe).\n"
+            f"Categories: {', '.join(VALID_CATEGORIES)}"
+        ),
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    p_nearby.add_argument(
+        "lat", nargs="?", default=None,
+        help="Center latitude (decimal degrees). Omit if using --near.",
+    )
+    p_nearby.add_argument(
+        "lon", nargs="?", default=None,
+        help="Center longitude (decimal degrees). Omit if using --near.",
+    )
+    p_nearby.add_argument(
+        "category", nargs="?", default=None,
+        help="POI category (use --help for full list). Omit if using --category flags.",
+    )
+    p_nearby.add_argument(
+        "--near", nargs="+", metavar="PLACE",
+        help="Address, city, or landmark to search around (geocoded via Nominatim).",
+    )
+    p_nearby.add_argument(
+        "--category", action="append", dest="category_list", default=[],
+        metavar="CAT",
+        help="POI category (repeatable — adds a type to the search).",
+    )
+    p_nearby.add_argument(
+        "--radius", "-r",
+        default=500, type=int, metavar="METRES",
+        help="Search radius in metres (default: 500).",
+    )
+    p_nearby.add_argument(
+        "--limit", "-n",
+        default=10, type=int, metavar="N",
+        help="Maximum number of results (default: 10).",
+    )
+
+    # -- distance --
+    p_dist = sub.add_parser(
+        "distance",
+        help="Calculate road distance and travel time.",
+        description=(
+            "Calculate road distance and estimated travel time between two places.\n"
+            "Example: maps_client.py distance New York --to Los Angeles"
+        ),
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    p_dist.add_argument(
+        "origin", nargs="+",
+        help="Origin address or place name.",
+    )
+    p_dist.add_argument(
+        "--to", nargs="+", required=True, metavar="DEST",
+        help="Destination address or place name (required).",
+    )
+    p_dist.add_argument(
+        "--mode", "-m",
+        default="driving",
+        choices=list(OSRM_PROFILES.keys()),
+        help="Travel mode (default: driving).",
+    )
+
+    # -- directions --
+    p_dir = sub.add_parser(
+        "directions",
+        help="Get turn-by-turn directions between two places.",
+        description=(
+            "Get step-by-step navigation directions between two places.\n"
+            "Example: maps_client.py directions Paris --to Berlin --mode driving"
+        ),
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    p_dir.add_argument(
+        "origin", nargs="+",
+        help="Origin address or place name.",
+    )
+    p_dir.add_argument(
+        "--to", nargs="+", required=True, metavar="DEST",
+        help="Destination address or place name (required).",
+    )
+    p_dir.add_argument(
+        "--mode", "-m",
+        default="driving",
+        choices=list(OSRM_PROFILES.keys()),
+        help="Travel mode (default: driving).",
+    )
+
+    # -- timezone --
+    p_tz = sub.add_parser(
+        "timezone",
+        help="Get timezone information for coordinates.",
+        description="Look up timezone and current local time for a lat/lon coordinate.",
+    )
+    p_tz.add_argument("lat", help="Latitude (decimal degrees).")
+    p_tz.add_argument("lon", help="Longitude (decimal degrees).")
+
+    # -- bbox --
+    p_bbox = sub.add_parser(
+        "bbox",
+        help="Find POIs within a bounding box.",
+        description=(
+            "Search for points of interest within a geographic bounding box.\n"
+            "Tip: use the 'area' command to find bounding boxes for named places.\n"
+            f"Categories: {', '.join(VALID_CATEGORIES)}"
+        ),
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    p_bbox.add_argument("lat1", help="First corner latitude.")
+    p_bbox.add_argument("lon1", help="First corner longitude.")
+    p_bbox.add_argument("lat2", help="Second corner latitude.")
+    p_bbox.add_argument("lon2", help="Second corner longitude.")
+    p_bbox.add_argument("category", help="POI category to search for.")
+    p_bbox.add_argument(
+        "--limit", "-n",
+        default=20, type=int, metavar="N",
+        help="Maximum number of results (default: 20).",
+    )
+
+    # -- area --
+    p_area = sub.add_parser(
+        "area",
+        help="Get bounding box and area info for a named place.",
+        description=(
+            "Look up a place by name and return its bounding box, dimensions, "
+            "and approximate area. Useful as input to the 'bbox' command."
+        ),
+    )
+    p_area.add_argument(
+        "place", nargs="+",
+        help="Place name to look up (e.g., 'Manhattan' or 'downtown Seattle').",
+    )
+
+    return parser
+
+
+def main():
+    parser = build_parser()
+    args   = parser.parse_args()
+
+    dispatch = {
+        "search":     cmd_search,
+        "reverse":    cmd_reverse,
+        "nearby":     cmd_nearby,
+        "distance":   cmd_distance,
+        "directions": cmd_directions,
+        "timezone":   cmd_timezone,
+        "bbox":       cmd_bbox,
+        "area":       cmd_area,
+    }
+
+    handler = dispatch.get(args.command)
+    if handler is None:
+        error_exit(f"Unknown command: {args.command}")
+
+    handler(args)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/skills/research/llm-wiki/SKILL.md b/skills/research/llm-wiki/SKILL.md
index a90dd0a9b6..d82d638f14 100644
--- a/skills/research/llm-wiki/SKILL.md
+++ b/skills/research/llm-wiki/SKILL.md
@@ -1,7 +1,7 @@
 ---
 name: llm-wiki
 description: "Karpathy's LLM Wiki — build and maintain a persistent, interlinked markdown knowledge base. Ingest sources, query compiled knowledge, and lint for consistency."
-version: 2.0.0
+version: 2.1.0
 author: Hermes Agent
 license: MIT
 metadata:
@@ -122,6 +122,10 @@ Adapt to the user's domain. The schema constrains agent behavior and ensures con
 - When updating a page, always bump the `updated` date
 - Every new page must be added to `index.md` under the correct section
 - Every action must be appended to `log.md`
+- **Provenance markers:** On pages that synthesize 3+ sources, append `^[raw/articles/source-file.md]`
+  at the end of paragraphs whose claims come from a specific source. This lets a reader trace each
+  claim back without re-reading the whole raw file. Optional on single-source pages where the
+  `sources:` frontmatter is enough.
 
 ## Frontmatter
   ```yaml
@@ -132,9 +136,33 @@ Adapt to the user's domain. The schema constrains agent behavior and ensures con
   type: entity | concept | comparison | query | summary
   tags: [from taxonomy below]
   sources: [raw/articles/source-name.md]
+  # Optional quality signals:
+  confidence: high | medium | low        # how well-supported the claims are
+  contested: true                        # set when the page has unresolved contradictions
+  contradictions: [other-page-slug]      # pages this one conflicts with
   ---
   ```
 
+`confidence` and `contested` are optional but recommended for opinion-heavy or fast-moving
+topics. Lint surfaces `contested: true` and `confidence: low` pages for review so weak claims
+don't silently harden into accepted wiki fact.
+
+### raw/ Frontmatter
+
+Raw sources ALSO get a small frontmatter block so re-ingests can detect drift:
+
+```yaml
+---
+source_url: https://example.com/article   # original URL, if applicable
+ingested: YYYY-MM-DD
+sha256: <hex digest of the raw content below the frontmatter>
+---
+```
+
+The `sha256:` lets a future re-ingest of the same URL skip processing when content is unchanged,
+and flag drift when it has changed. Compute over the body only (everything after the closing
+`---`), not the frontmatter itself.
+
 ## Tag Taxonomy
 [Define 10-20 top-level tags for the domain. Add new tags here BEFORE using them.]
 
@@ -234,6 +262,10 @@ When the user provides a source (URL, file, paste), integrate it into the wiki:
    - PDF → use `web_extract` (handles PDFs), save to `raw/papers/`
    - Pasted text → save to appropriate `raw/` subdirectory
    - Name the file descriptively: `raw/articles/karpathy-llm-wiki-2026.md`
+   - **Add raw frontmatter** (`source_url`, `ingested`, `sha256` of the body).
+     On re-ingest of the same URL: recompute the sha256, compare to the stored value —
+     skip if identical, flag drift and update if different. This is cheap enough to
+     do on every re-ingest and catches silent source changes.
 
 ② **Discuss takeaways** with the user — what's interesting, what matters for
    the domain. (Skip this in automated/cron contexts — proceed directly.)
@@ -250,6 +282,11 @@ When the user provides a source (URL, file, paste), integrate it into the wiki:
    - **Cross-reference:** Every new or updated page must link to at least 2 other
      pages via `[[wikilinks]]`. Check that existing pages link back.
    - **Tags:** Only use tags from the taxonomy in SCHEMA.md
+   - **Provenance:** On pages synthesizing 3+ sources, append `^[raw/articles/source.md]`
+     markers to paragraphs whose claims trace to a specific source.
+   - **Confidence:** For opinion-heavy, fast-moving, or single-source claims, set
+     `confidence: medium` or `low` in frontmatter. Don't mark `high` unless the
+     claim is well-supported across multiple sources.
 
 ⑤ **Update navigation:**
    - Add new pages to `index.md` under the correct section, alphabetically
@@ -304,18 +341,28 @@ wiki = "<WIKI_PATH>"
    recent source that mentions the same entities.
 
 ⑥ **Contradictions:** Pages on the same topic with conflicting claims. Look for
-   pages that share tags/entities but state different facts.
+   pages that share tags/entities but state different facts. Surface all pages
+   with `contested: true` or `contradictions:` frontmatter for user review.
 
-⑦ **Page size:** Flag pages over 200 lines — candidates for splitting.
+⑦ **Quality signals:** List pages with `confidence: low` and any page that cites
+   only a single source but has no confidence field set — these are candidates
+   for either finding corroboration or demoting to `confidence: medium`.
 
-⑧ **Tag audit:** List all tags in use, flag any not in the SCHEMA.md taxonomy.
+⑧ **Source drift:** For each file in `raw/` with a `sha256:` frontmatter, recompute
+   the hash and flag mismatches. Mismatches indicate the raw file was edited
+   (shouldn't happen — raw/ is immutable) or ingested from a URL that has since
+   changed. Not a hard error, but worth reporting.
 
-⑨ **Log rotation:** If log.md exceeds 500 entries, rotate it.
+⑨ **Page size:** Flag pages over 200 lines — candidates for splitting.
 
-⑩ **Report findings** with specific file paths and suggested actions, grouped by
-   severity (broken links > orphans > stale content > style issues).
+⑩ **Tag audit:** List all tags in use, flag any not in the SCHEMA.md taxonomy.
 
-⑪ **Append to log.md:** `## [YYYY-MM-DD] lint | N issues found`
+⑪ **Log rotation:** If log.md exceeds 500 entries, rotate it.
+
+⑫ **Report findings** with specific file paths and suggested actions, grouped by
+   severity (broken links > orphans > source drift > contested pages > stale content > style issues).
+
+⑬ **Append to log.md:** `## [YYYY-MM-DD] lint | N issues found`
 
 ## Working with the Wiki
 
@@ -448,3 +495,12 @@ vault in Obsidian on your laptop/phone — changes appear within seconds.
   The agent should check log size during lint.
 - **Handle contradictions explicitly** — don't silently overwrite. Note both claims with dates,
   mark in frontmatter, flag for user review.
+
+## Related Tools
+
+[llm-wiki-compiler](https://github.com/atomicmemory/llm-wiki-compiler) is a Node.js CLI that
+compiles sources into a concept wiki with the same Karpathy inspiration. It's Obsidian-compatible,
+so users who want a scheduled/CLI-driven compile pipeline can point it at the same vault this
+skill maintains. Trade-offs: it owns page generation (replaces the agent's judgment on page
+creation) and is tuned for small corpora. Use this skill when you want agent-in-the-loop curation;
+use llmwiki when you want batch compile of a source directory.
diff --git a/skills/social-media/xurl/SKILL.md b/skills/social-media/xurl/SKILL.md
index 2d7a017c9c..1f47b2e6a0 100644
--- a/skills/social-media/xurl/SKILL.md
+++ b/skills/social-media/xurl/SKILL.md
@@ -1,7 +1,7 @@
 ---
 name: xurl
 description: Interact with X/Twitter via xurl, the official X API CLI. Use for posting, replying, quoting, searching, timelines, mentions, likes, reposts, bookmarks, follows, DMs, media upload, and raw v2 endpoint access.
-version: 1.0.0
+version: 1.1.1
 author: xdevplatform + openclaw + Hermes Agent
 license: MIT
 platforms: [linux, macos]
@@ -90,12 +90,22 @@ These steps must be performed by the user directly, NOT by the agent, because th
    ```bash
    xurl auth apps add my-app --client-id YOUR_CLIENT_ID --client-secret YOUR_CLIENT_SECRET
    ```
-5. Authenticate:
+5. Authenticate (specify `--app` to bind the token to your app):
    ```bash
-   xurl auth oauth2
+   xurl auth oauth2 --app my-app
    ```
    (This opens a browser for the OAuth 2.0 PKCE flow.)
-6. Verify:
+
+   If X returns a `UsernameNotFound` error or 403 on the post-OAuth `/2/users/me` lookup, pass your handle explicitly (xurl v1.1.0+):
+   ```bash
+   xurl auth oauth2 --app my-app YOUR_USERNAME
+   ```
+   This binds the token to your handle and skips the broken `/2/users/me` call.
+6. Set the app as default so all commands use it:
+   ```bash
+   xurl auth default my-app
+   ```
+7. Verify:
    ```bash
    xurl auth status
    xurl whoami
@@ -103,6 +113,8 @@ These steps must be performed by the user directly, NOT by the agent, because th
 
 After this, the agent can use any command below without further setup. OAuth 2.0 tokens auto-refresh.
 
+> **Common pitfall:** If you omit `--app my-app` from `xurl auth oauth2`, the OAuth token is saved to the built-in `default` app profile — which has no client-id or client-secret. Commands will fail with auth errors even though the OAuth flow appeared to succeed. If you hit this, re-run `xurl auth oauth2 --app my-app` and `xurl auth default my-app`.
+
 ---
 
 ## Quick Reference
@@ -359,11 +371,27 @@ xurl --app staging /2/users/me             # one-off against staging
 ## Agent Workflow
 
 1. Verify prerequisites: `xurl --help` and `xurl auth status`.
-2. If auth is missing, stop and direct the user to the "One-Time User Setup" section — do NOT attempt to register apps or pass secrets yourself.
-3. Start with a cheap read (`xurl whoami`, `xurl user @handle`, `xurl search ... -n 3`) to confirm reachability.
-4. Confirm the target post/user and the user's intent before any write action (post, reply, like, repost, DM, follow, block, delete).
-5. Use JSON output directly — every response is already structured.
-6. Never paste `~/.xurl` contents back into the conversation.
+2. **Check default app has credentials.** Parse the `auth status` output. The default app is marked with `▸`. If the default app shows `oauth2: (none)` but another app has a valid oauth2 user, tell the user to run `xurl auth default <that-app>` to fix it. This is the most common setup mistake — the user added an app with a custom name but never set it as default, so xurl keeps trying the empty `default` profile.
+3. If auth is missing entirely, stop and direct the user to the "One-Time User Setup" section — do NOT attempt to register apps or pass secrets yourself.
+4. Start with a cheap read (`xurl whoami`, `xurl user @handle`, `xurl search ... -n 3`) to confirm reachability.
+5. Confirm the target post/user and the user's intent before any write action (post, reply, like, repost, DM, follow, block, delete).
+6. Use JSON output directly — every response is already structured.
+7. Never paste `~/.xurl` contents back into the conversation.
+
+---
+
+## Troubleshooting
+
+| Symptom | Cause | Fix |
+| --- | --- | --- |
+| Auth errors after successful OAuth flow | Token saved to `default` app (no client-id/secret) instead of your named app | `xurl auth oauth2 --app my-app` then `xurl auth default my-app` |
+| `unauthorized_client` during OAuth | App type set to "Native App" in X dashboard | Change to "Web app, automated app or bot" in User Authentication Settings |
+| `UsernameNotFound` or 403 on `/2/users/me` right after OAuth | X not returning username reliably from `/2/users/me` | Re-run `xurl auth oauth2 --app my-app YOUR_USERNAME` (xurl v1.1.0+) to pass the handle explicitly |
+| 401 on every request | Token expired or wrong default app | Check `xurl auth status` — verify `▸` points to an app with oauth2 tokens |
+| `client-forbidden` / `client-not-enrolled` | X platform enrollment issue | Dashboard → Apps → Manage → Move to "Pay-per-use" package → Production environment |
+| `CreditsDepleted` | $0 balance on X API | Buy credits (min $5) in Developer Console → Billing |
+| `media processing failed` on image upload | Default category is `amplify_video` | Add `--category tweet_image --media-type image/png` |
+| Two "Client Secret" values in X dashboard | UI bug — first is actually Client ID | Confirm on the "Keys and tokens" page; ID ends in `MTpjaQ` |
 
 ---
 
diff --git a/tests/acp/test_approval_isolation.py b/tests/acp/test_approval_isolation.py
new file mode 100644
index 0000000000..90ea4e063e
--- /dev/null
+++ b/tests/acp/test_approval_isolation.py
@@ -0,0 +1,170 @@
+"""Tests for GHSA-96vc-wcxf-jjff and GHSA-qg5c-hvr5-hjgr.
+
+Two related ACP approval-flow issues:
+- 96vc: ACP didn't set HERMES_EXEC_ASK, so `check_all_command_guards`
+  took the non-interactive auto-approve path and never consulted the
+  ACP-supplied callback.
+- qg5c: `_approval_callback` was a module-global in terminal_tool;
+  overlapping ACP sessions overwrote each other's callback slot.
+
+Both fixed together by:
+1. Setting HERMES_EXEC_ASK inside _run_agent (wraps the agent call).
+2. Storing the callback in thread-local state so concurrent executor
+   threads don't collide.
+"""
+
+import os
+import threading
+from unittest.mock import MagicMock
+
+import pytest
+
+
+class TestThreadLocalApprovalCallback:
+    """GHSA-qg5c-hvr5-hjgr: set_approval_callback must be per-thread so
+    concurrent ACP sessions don't stomp on each other's handlers."""
+
+    def test_set_and_get_in_same_thread(self):
+        from tools.terminal_tool import (
+            set_approval_callback,
+            _get_approval_callback,
+        )
+
+        cb1 = lambda cmd, desc: "once"  # noqa: E731
+        set_approval_callback(cb1)
+        assert _get_approval_callback() is cb1
+
+    def test_callback_not_visible_in_different_thread(self):
+        """Thread A's callback is NOT visible to Thread B."""
+        from tools.terminal_tool import (
+            set_approval_callback,
+            _get_approval_callback,
+        )
+
+        cb_a = lambda cmd, desc: "thread_a"  # noqa: E731
+        cb_b = lambda cmd, desc: "thread_b"  # noqa: E731
+
+        seen_in_a = []
+        seen_in_b = []
+
+        def thread_a():
+            set_approval_callback(cb_a)
+            # Pause so thread B has time to set its own callback
+            import time
+            time.sleep(0.05)
+            seen_in_a.append(_get_approval_callback())
+
+        def thread_b():
+            set_approval_callback(cb_b)
+            import time
+            time.sleep(0.05)
+            seen_in_b.append(_get_approval_callback())
+
+        ta = threading.Thread(target=thread_a)
+        tb = threading.Thread(target=thread_b)
+        ta.start()
+        tb.start()
+        ta.join()
+        tb.join()
+
+        # Each thread must see ONLY its own callback — not the other's
+        assert seen_in_a == [cb_a]
+        assert seen_in_b == [cb_b]
+
+    def test_main_thread_callback_not_leaked_to_worker(self):
+        """A callback set in the main thread does NOT leak into a
+        freshly-spawned worker thread."""
+        from tools.terminal_tool import (
+            set_approval_callback,
+            _get_approval_callback,
+        )
+
+        cb_main = lambda cmd, desc: "main"  # noqa: E731
+        set_approval_callback(cb_main)
+
+        worker_saw = []
+
+        def worker():
+            worker_saw.append(_get_approval_callback())
+
+        t = threading.Thread(target=worker)
+        t.start()
+        t.join()
+
+        # Worker thread has no callback set — TLS is empty for it
+        assert worker_saw == [None]
+        # Main thread still has its callback
+        assert _get_approval_callback() is cb_main
+
+    def test_sudo_password_callback_also_thread_local(self):
+        """Same protection applies to the sudo password callback."""
+        from tools.terminal_tool import (
+            set_sudo_password_callback,
+            _get_sudo_password_callback,
+        )
+
+        cb_main = lambda: "main-password"  # noqa: E731
+        set_sudo_password_callback(cb_main)
+
+        worker_saw = []
+
+        def worker():
+            worker_saw.append(_get_sudo_password_callback())
+
+        t = threading.Thread(target=worker)
+        t.start()
+        t.join()
+
+        assert worker_saw == [None]
+        assert _get_sudo_password_callback() is cb_main
+
+
+class TestAcpExecAskGate:
+    """GHSA-96vc-wcxf-jjff: ACP's _run_agent must set HERMES_INTERACTIVE so
+    that tools.approval.check_all_command_guards takes the CLI-interactive
+    path (consults the registered callback via prompt_dangerous_approval)
+    instead of the non-interactive auto-approve shortcut.
+
+    (HERMES_EXEC_ASK takes the gateway-queue path which requires a
+    notify_cb registered in _gateway_notify_cbs — not applicable to ACP,
+    which uses a direct callback shape.)"""
+
+    def test_interactive_env_var_routes_to_callback(self, monkeypatch):
+        """When HERMES_INTERACTIVE is set and an approval callback is
+        registered, a dangerous command must route through the callback."""
+        # Clean env
+        monkeypatch.delenv("HERMES_INTERACTIVE", raising=False)
+        monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
+        monkeypatch.delenv("HERMES_EXEC_ASK", raising=False)
+        monkeypatch.delenv("HERMES_YOLO_MODE", raising=False)
+
+        from tools.approval import check_all_command_guards
+
+        called_with = []
+
+        def fake_cb(command, description, *, allow_permanent=True):
+            called_with.append((command, description))
+            return "once"
+
+        # Without HERMES_INTERACTIVE: takes auto-approve path, callback NOT called
+        result = check_all_command_guards(
+            "rm -rf /tmp/test-exec-ask", "local", approval_callback=fake_cb,
+        )
+        assert result["approved"] is True
+        assert called_with == [], (
+            "without HERMES_INTERACTIVE the non-interactive auto-approve "
+            "path should fire without consulting the callback"
+        )
+
+        # With HERMES_INTERACTIVE: callback IS called, approval flows through it
+        monkeypatch.setenv("HERMES_INTERACTIVE", "1")
+        called_with.clear()
+        result = check_all_command_guards(
+            "rm -rf /tmp/test-exec-ask", "local", approval_callback=fake_cb,
+        )
+        assert called_with, (
+            "with HERMES_INTERACTIVE the approval path should consult the "
+            "registered callback — this was the ACP bypass in "
+            "GHSA-96vc-wcxf-jjff"
+        )
+        assert result["approved"] is True
diff --git a/tests/acp/test_permissions.py b/tests/acp/test_permissions.py
index de83ebeffd..57e2bd4e5b 100644
--- a/tests/acp/test_permissions.py
+++ b/tests/acp/test_permissions.py
@@ -73,3 +73,17 @@ class TestApprovalMapping:
             result = cb("rm -rf /", "dangerous")
 
         assert result == "deny"
+
+    def test_approval_none_response_returns_deny(self):
+        """When request_permission resolves to None, the callback should return 'deny'."""
+        loop = MagicMock(spec=asyncio.AbstractEventLoop)
+        mock_rp = MagicMock(name="request_permission")
+
+        future = MagicMock(spec=Future)
+        future.result.return_value = None
+
+        with patch("acp_adapter.permissions.asyncio.run_coroutine_threadsafe", return_value=future):
+            cb = make_approval_callback(mock_rp, loop, session_id="s1", timeout=1.0)
+            result = cb("echo hi", "demo")
+
+        assert result == "deny"
diff --git a/tests/acp/test_ping_suppression.py b/tests/acp/test_ping_suppression.py
new file mode 100644
index 0000000000..b072bbd7a9
--- /dev/null
+++ b/tests/acp/test_ping_suppression.py
@@ -0,0 +1,210 @@
+"""Tests for acp_adapter.entry._BenignProbeMethodFilter.
+
+Covers both the isolated filter logic and the full end-to-end path where a
+client sends a bare JSON-RPC ``ping`` request over stdio and the acp runtime
+surfaces the resulting ``RequestError`` via ``logging.exception("Background
+task failed", ...)``.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import logging
+import os
+from io import StringIO
+
+import pytest
+
+from acp.exceptions import RequestError
+
+from acp_adapter.entry import _BenignProbeMethodFilter
+
+
+# -- Unit tests on the filter itself ----------------------------------------
+
+
+def _make_record(msg: str, exc: BaseException | None) -> logging.LogRecord:
+    record = logging.LogRecord(
+        name="root",
+        level=logging.ERROR,
+        pathname=__file__,
+        lineno=0,
+        msg=msg,
+        args=(),
+        exc_info=(type(exc), exc, exc.__traceback__) if exc else None,
+    )
+    return record
+
+
+def _bake_tb(exc: BaseException) -> BaseException:
+    try:
+        raise exc
+    except BaseException as e:  # noqa: BLE001
+        return e
+
+
+@pytest.mark.parametrize("method", ["ping", "health", "healthcheck"])
+def test_filter_suppresses_benign_probe(method: str) -> None:
+    f = _BenignProbeMethodFilter()
+    exc = _bake_tb(RequestError.method_not_found(method))
+    record = _make_record("Background task failed", exc)
+    assert f.filter(record) is False
+
+
+def test_filter_allows_real_method_not_found() -> None:
+    f = _BenignProbeMethodFilter()
+    exc = _bake_tb(RequestError.method_not_found("session/custom"))
+    record = _make_record("Background task failed", exc)
+    assert f.filter(record) is True
+
+
+def test_filter_allows_non_request_error() -> None:
+    f = _BenignProbeMethodFilter()
+    exc = _bake_tb(RuntimeError("boom"))
+    record = _make_record("Background task failed", exc)
+    assert f.filter(record) is True
+
+
+def test_filter_allows_different_message_even_for_ping() -> None:
+    """Only 'Background task failed' is muted — other messages pass through."""
+    f = _BenignProbeMethodFilter()
+    exc = _bake_tb(RequestError.method_not_found("ping"))
+    record = _make_record("Some other context", exc)
+    assert f.filter(record) is True
+
+
+def test_filter_allows_request_error_with_different_code() -> None:
+    f = _BenignProbeMethodFilter()
+    exc = _bake_tb(RequestError.invalid_params({"method": "ping"}))
+    record = _make_record("Background task failed", exc)
+    assert f.filter(record) is True
+
+
+def test_filter_allows_log_without_exc_info() -> None:
+    f = _BenignProbeMethodFilter()
+    record = _make_record("Background task failed", None)
+    assert f.filter(record) is True
+
+
+# -- End-to-end: drive a real JSON-RPC `ping` through acp.run_agent ---------
+
+
+class _FakeAgent:
+    """Minimal acp.Agent stub — we only need the router to build."""
+
+    async def initialize(self, **kwargs):  # noqa: ANN003
+        from acp.schema import AgentCapabilities, InitializeResponse
+
+        return InitializeResponse(protocol_version=1, agent_capabilities=AgentCapabilities())
+
+    async def new_session(self, cwd, mcp_servers=None, **kwargs):  # noqa: ANN001, ANN003
+        from acp.schema import NewSessionResponse
+
+        return NewSessionResponse(session_id="test")
+
+    async def prompt(self, session_id, prompt, **kwargs):  # noqa: ANN001, ANN003
+        from acp.schema import PromptResponse
+
+        return PromptResponse(stop_reason="end_turn")
+
+    async def cancel(self, session_id, **kwargs):  # noqa: ANN001, ANN003
+        pass
+
+    async def authenticate(self, **kwargs):  # noqa: ANN003
+        pass
+
+    def on_connect(self, conn):  # noqa: ANN001
+        pass
+
+
+@pytest.mark.asyncio
+async def test_bare_ping_request_produces_proper_response_and_no_stderr_noise(
+    caplog: pytest.LogCaptureFixture,
+) -> None:
+    """A bare ``ping`` must get a JSON-RPC -32601 back AND leave stderr clean
+    when the filter is installed on the handler.
+    """
+    import acp
+
+    # Attach the filter to a fresh stream handler that mirrors entry._setup_logging.
+    stream = StringIO()
+    handler = logging.StreamHandler(stream)
+    handler.setFormatter(logging.Formatter("%(name)s|%(levelname)s|%(message)s"))
+    handler.addFilter(_BenignProbeMethodFilter())
+    root = logging.getLogger()
+    prior_handlers = root.handlers[:]
+    prior_level = root.level
+    root.handlers = [handler]
+    root.setLevel(logging.INFO)
+    # Also suppress propagation of caplog's default handler interfering with
+    # our stream (caplog still captures via its own propagation hook).
+    try:
+        loop = asyncio.get_running_loop()
+
+        # Pipe client -> agent
+        client_to_agent_r, client_to_agent_w = os.pipe()
+        # Pipe agent -> client
+        agent_to_client_r, agent_to_client_w = os.pipe()
+
+        in_read_file = os.fdopen(client_to_agent_r, "rb", buffering=0)
+        in_write_file = os.fdopen(client_to_agent_w, "wb", buffering=0)
+        out_read_file = os.fdopen(agent_to_client_r, "rb", buffering=0)
+        out_write_file = os.fdopen(agent_to_client_w, "wb", buffering=0)
+
+        # Agent reads its input from this StreamReader:
+        agent_input = asyncio.StreamReader(limit=1024 * 1024, loop=loop)
+        agent_input_proto = asyncio.StreamReaderProtocol(agent_input, loop=loop)
+        await loop.connect_read_pipe(lambda: agent_input_proto, in_read_file)
+
+        # Agent writes its output via this StreamWriter:
+        out_transport, out_protocol = await loop.connect_write_pipe(
+            asyncio.streams.FlowControlMixin, out_write_file
+        )
+        agent_output = asyncio.StreamWriter(out_transport, out_protocol, None, loop)
+
+        # Test harness reads agent output via this StreamReader:
+        client_input = asyncio.StreamReader(limit=1024 * 1024, loop=loop)
+        client_input_proto = asyncio.StreamReaderProtocol(client_input, loop=loop)
+        await loop.connect_read_pipe(lambda: client_input_proto, out_read_file)
+
+        agent_task = asyncio.create_task(
+            acp.run_agent(
+                _FakeAgent(),
+                input_stream=agent_output,
+                output_stream=agent_input,
+                use_unstable_protocol=True,
+            )
+        )
+
+        # Send a bare `ping`
+        request = {"jsonrpc": "2.0", "id": 1, "method": "ping", "params": {}}
+        in_write_file.write((json.dumps(request) + "\n").encode())
+        in_write_file.flush()
+
+        response_line = await asyncio.wait_for(client_input.readline(), timeout=5.0)
+        # Give the supervisor task a tick to fire (filter should eat it)
+        await asyncio.sleep(0.2)
+
+        response = json.loads(response_line.decode())
+        assert response["error"]["code"] == -32601, response
+        assert response["error"]["data"] == {"method": "ping"}, response
+
+        logs = stream.getvalue()
+        assert "Background task failed" not in logs, (
+            f"ping noise leaked to stderr:\n{logs}"
+        )
+
+        # Clean shutdown
+        in_write_file.close()
+        try:
+            await asyncio.wait_for(agent_task, timeout=2.0)
+        except (asyncio.TimeoutError, Exception):
+            agent_task.cancel()
+            try:
+                await agent_task
+            except BaseException:  # noqa: BLE001
+                pass
+    finally:
+        root.handlers = prior_handlers
+        root.setLevel(prior_level)
diff --git a/tests/acp/test_server.py b/tests/acp/test_server.py
index 5893d79071..faa4c18a70 100644
--- a/tests/acp/test_server.py
+++ b/tests/acp/test_server.py
@@ -95,19 +95,37 @@ class TestInitialize:
 
 class TestAuthenticate:
     @pytest.mark.asyncio
-    async def test_authenticate_with_provider_configured(self, agent, monkeypatch):
+    async def test_authenticate_with_matching_method_id(self, agent, monkeypatch):
         monkeypatch.setattr(
-            "acp_adapter.server.has_provider",
-            lambda: True,
+            "acp_adapter.server.detect_provider",
+            lambda: "openrouter",
         )
         resp = await agent.authenticate(method_id="openrouter")
         assert isinstance(resp, AuthenticateResponse)
 
+    @pytest.mark.asyncio
+    async def test_authenticate_is_case_insensitive(self, agent, monkeypatch):
+        monkeypatch.setattr(
+            "acp_adapter.server.detect_provider",
+            lambda: "openrouter",
+        )
+        resp = await agent.authenticate(method_id="OpenRouter")
+        assert isinstance(resp, AuthenticateResponse)
+
+    @pytest.mark.asyncio
+    async def test_authenticate_rejects_mismatched_method_id(self, agent, monkeypatch):
+        monkeypatch.setattr(
+            "acp_adapter.server.detect_provider",
+            lambda: "openrouter",
+        )
+        resp = await agent.authenticate(method_id="totally-invalid-method")
+        assert resp is None
+
     @pytest.mark.asyncio
     async def test_authenticate_without_provider(self, agent, monkeypatch):
         monkeypatch.setattr(
-            "acp_adapter.server.has_provider",
-            lambda: False,
+            "acp_adapter.server.detect_provider",
+            lambda: None,
         )
         resp = await agent.authenticate(method_id="openrouter")
         assert resp is None
@@ -252,6 +270,57 @@ class TestListAndFork:
 
         mock_list.assert_called_once_with(cwd="/mnt/e/Projects/AI/browser-link-3")
 
+    @pytest.mark.asyncio
+    async def test_list_sessions_pagination_first_page(self, agent):
+        from acp_adapter import server as acp_server
+
+        infos = [
+            {"session_id": f"s{i}", "cwd": "/tmp", "title": None, "updated_at": 0.0}
+            for i in range(acp_server._LIST_SESSIONS_PAGE_SIZE + 5)
+        ]
+        with patch.object(agent.session_manager, "list_sessions", return_value=infos):
+            resp = await agent.list_sessions()
+
+        assert len(resp.sessions) == acp_server._LIST_SESSIONS_PAGE_SIZE
+        assert resp.next_cursor == resp.sessions[-1].session_id
+
+    @pytest.mark.asyncio
+    async def test_list_sessions_pagination_no_more(self, agent):
+        infos = [
+            {"session_id": f"s{i}", "cwd": "/tmp", "title": None, "updated_at": 0.0}
+            for i in range(3)
+        ]
+        with patch.object(agent.session_manager, "list_sessions", return_value=infos):
+            resp = await agent.list_sessions()
+
+        assert len(resp.sessions) == 3
+        assert resp.next_cursor is None
+
+    @pytest.mark.asyncio
+    async def test_list_sessions_cursor_resumes_after_match(self, agent):
+        infos = [
+            {"session_id": "s1", "cwd": "/tmp", "title": None, "updated_at": 0.0},
+            {"session_id": "s2", "cwd": "/tmp", "title": None, "updated_at": 0.0},
+            {"session_id": "s3", "cwd": "/tmp", "title": None, "updated_at": 0.0},
+        ]
+        with patch.object(agent.session_manager, "list_sessions", return_value=infos):
+            resp = await agent.list_sessions(cursor="s1")
+
+        assert [s.session_id for s in resp.sessions] == ["s2", "s3"]
+        assert resp.next_cursor is None
+
+    @pytest.mark.asyncio
+    async def test_list_sessions_unknown_cursor_returns_empty(self, agent):
+        infos = [
+            {"session_id": "s1", "cwd": "/tmp", "title": None, "updated_at": 0.0},
+            {"session_id": "s2", "cwd": "/tmp", "title": None, "updated_at": 0.0},
+        ]
+        with patch.object(agent.session_manager, "list_sessions", return_value=infos):
+            resp = await agent.list_sessions(cursor="does-not-exist")
+
+        assert resp.sessions == []
+        assert resp.next_cursor is None
+
 # ---------------------------------------------------------------------------
 # session configuration / model routing
 # ---------------------------------------------------------------------------
diff --git a/tests/agent/test_anthropic_adapter.py b/tests/agent/test_anthropic_adapter.py
index 737db01a35..b947a2df85 100644
--- a/tests/agent/test_anthropic_adapter.py
+++ b/tests/agent/test_anthropic_adapter.py
@@ -414,7 +414,11 @@ class TestRunOauthSetupToken:
             token = run_oauth_setup_token()
 
         assert token == "from-cred-file"
-        mock_run.assert_called_once()
+        # Don't assert exact call count — the contract is "credentials flow
+        # through", not "exactly one subprocess call". xdist cross-test
+        # pollution (other tests shimming subprocess via plugins) has flaked
+        # assert_called_once() in CI.
+        assert mock_run.called
 
     def test_returns_token_from_env_var(self, monkeypatch, tmp_path):
         """Falls back to CLAUDE_CODE_OAUTH_TOKEN env var when no cred files."""
diff --git a/tests/agent/test_anthropic_normalize_v2.py b/tests/agent/test_anthropic_normalize_v2.py
new file mode 100644
index 0000000000..9d5c16139a
--- /dev/null
+++ b/tests/agent/test_anthropic_normalize_v2.py
@@ -0,0 +1,238 @@
+"""Regression tests: normalize_anthropic_response_v2 vs v1.
+
+Constructs mock Anthropic responses and asserts that the v2 function
+(returning NormalizedResponse) produces identical field values to the
+original v1 function (returning SimpleNamespace + finish_reason).
+"""
+
+import json
+import pytest
+from types import SimpleNamespace
+
+from agent.anthropic_adapter import (
+    normalize_anthropic_response,
+    normalize_anthropic_response_v2,
+)
+from agent.transports.types import NormalizedResponse, ToolCall
+
+
+# ---------------------------------------------------------------------------
+# Helpers to build mock Anthropic SDK responses
+# ---------------------------------------------------------------------------
+
+def _text_block(text: str):
+    return SimpleNamespace(type="text", text=text)
+
+
+def _thinking_block(thinking: str, signature: str = "sig_abc"):
+    return SimpleNamespace(type="thinking", thinking=thinking, signature=signature)
+
+
+def _tool_use_block(id: str, name: str, input: dict):
+    return SimpleNamespace(type="tool_use", id=id, name=name, input=input)
+
+
+def _response(content_blocks, stop_reason="end_turn"):
+    return SimpleNamespace(
+        content=content_blocks,
+        stop_reason=stop_reason,
+        usage=SimpleNamespace(
+            input_tokens=10,
+            output_tokens=5,
+        ),
+    )
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+class TestTextOnly:
+    """Text-only response — no tools, no thinking."""
+
+    def setup_method(self):
+        self.resp = _response([_text_block("Hello world")])
+        self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
+        self.v2 = normalize_anthropic_response_v2(self.resp)
+
+    def test_type(self):
+        assert isinstance(self.v2, NormalizedResponse)
+
+    def test_content_matches(self):
+        assert self.v2.content == self.v1_msg.content
+
+    def test_finish_reason_matches(self):
+        assert self.v2.finish_reason == self.v1_finish
+
+    def test_no_tool_calls(self):
+        assert self.v2.tool_calls is None
+        assert self.v1_msg.tool_calls is None
+
+    def test_no_reasoning(self):
+        assert self.v2.reasoning is None
+        assert self.v1_msg.reasoning is None
+
+
+class TestWithToolCalls:
+    """Response with tool calls."""
+
+    def setup_method(self):
+        self.resp = _response(
+            [
+                _text_block("I'll check that"),
+                _tool_use_block("toolu_abc", "terminal", {"command": "ls"}),
+                _tool_use_block("toolu_def", "read_file", {"path": "/tmp"}),
+            ],
+            stop_reason="tool_use",
+        )
+        self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
+        self.v2 = normalize_anthropic_response_v2(self.resp)
+
+    def test_finish_reason(self):
+        assert self.v2.finish_reason == "tool_calls"
+        assert self.v1_finish == "tool_calls"
+
+    def test_tool_call_count(self):
+        assert len(self.v2.tool_calls) == 2
+        assert len(self.v1_msg.tool_calls) == 2
+
+    def test_tool_call_ids_match(self):
+        for i in range(2):
+            assert self.v2.tool_calls[i].id == self.v1_msg.tool_calls[i].id
+
+    def test_tool_call_names_match(self):
+        assert self.v2.tool_calls[0].name == "terminal"
+        assert self.v2.tool_calls[1].name == "read_file"
+        for i in range(2):
+            assert self.v2.tool_calls[i].name == self.v1_msg.tool_calls[i].function.name
+
+    def test_tool_call_arguments_match(self):
+        for i in range(2):
+            assert self.v2.tool_calls[i].arguments == self.v1_msg.tool_calls[i].function.arguments
+
+    def test_content_preserved(self):
+        assert self.v2.content == self.v1_msg.content
+        assert "check that" in self.v2.content
+
+
+class TestWithThinking:
+    """Response with thinking blocks (Claude 3.5+ extended thinking)."""
+
+    def setup_method(self):
+        self.resp = _response([
+            _thinking_block("Let me think about this carefully..."),
+            _text_block("The answer is 42."),
+        ])
+        self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
+        self.v2 = normalize_anthropic_response_v2(self.resp)
+
+    def test_reasoning_matches(self):
+        assert self.v2.reasoning == self.v1_msg.reasoning
+        assert "think about this" in self.v2.reasoning
+
+    def test_reasoning_details_in_provider_data(self):
+        v1_details = self.v1_msg.reasoning_details
+        v2_details = self.v2.provider_data.get("reasoning_details") if self.v2.provider_data else None
+        assert v1_details is not None
+        assert v2_details is not None
+        assert len(v2_details) == len(v1_details)
+
+    def test_content_excludes_thinking(self):
+        assert self.v2.content == "The answer is 42."
+
+
+class TestMixed:
+    """Response with thinking + text + tool calls."""
+
+    def setup_method(self):
+        self.resp = _response(
+            [
+                _thinking_block("Planning my approach..."),
+                _text_block("I'll run the command"),
+                _tool_use_block("toolu_xyz", "terminal", {"command": "pwd"}),
+            ],
+            stop_reason="tool_use",
+        )
+        self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
+        self.v2 = normalize_anthropic_response_v2(self.resp)
+
+    def test_all_fields_present(self):
+        assert self.v2.content is not None
+        assert self.v2.tool_calls is not None
+        assert self.v2.reasoning is not None
+        assert self.v2.finish_reason == "tool_calls"
+
+    def test_content_matches(self):
+        assert self.v2.content == self.v1_msg.content
+
+    def test_reasoning_matches(self):
+        assert self.v2.reasoning == self.v1_msg.reasoning
+
+    def test_tool_call_matches(self):
+        assert self.v2.tool_calls[0].id == self.v1_msg.tool_calls[0].id
+        assert self.v2.tool_calls[0].name == self.v1_msg.tool_calls[0].function.name
+
+
+class TestStopReasons:
+    """Verify finish_reason mapping matches between v1 and v2."""
+
+    @pytest.mark.parametrize("stop_reason,expected", [
+        ("end_turn", "stop"),
+        ("tool_use", "tool_calls"),
+        ("max_tokens", "length"),
+        ("stop_sequence", "stop"),
+        ("refusal", "content_filter"),
+        ("model_context_window_exceeded", "length"),
+        ("unknown_future_reason", "stop"),
+    ])
+    def test_stop_reason_mapping(self, stop_reason, expected):
+        resp = _response([_text_block("x")], stop_reason=stop_reason)
+        v1_msg, v1_finish = normalize_anthropic_response(resp)
+        v2 = normalize_anthropic_response_v2(resp)
+        assert v2.finish_reason == v1_finish == expected
+
+
+class TestStripToolPrefix:
+    """Verify mcp_ prefix stripping works identically."""
+
+    def test_prefix_stripped(self):
+        resp = _response(
+            [_tool_use_block("toolu_1", "mcp_terminal", {"cmd": "ls"})],
+            stop_reason="tool_use",
+        )
+        v1_msg, _ = normalize_anthropic_response(resp, strip_tool_prefix=True)
+        v2 = normalize_anthropic_response_v2(resp, strip_tool_prefix=True)
+        assert v1_msg.tool_calls[0].function.name == "terminal"
+        assert v2.tool_calls[0].name == "terminal"
+
+    def test_prefix_kept(self):
+        resp = _response(
+            [_tool_use_block("toolu_1", "mcp_terminal", {"cmd": "ls"})],
+            stop_reason="tool_use",
+        )
+        v1_msg, _ = normalize_anthropic_response(resp, strip_tool_prefix=False)
+        v2 = normalize_anthropic_response_v2(resp, strip_tool_prefix=False)
+        assert v1_msg.tool_calls[0].function.name == "mcp_terminal"
+        assert v2.tool_calls[0].name == "mcp_terminal"
+
+
+class TestEdgeCases:
+    """Edge cases: empty content, no blocks, etc."""
+
+    def test_empty_content_blocks(self):
+        resp = _response([])
+        v1_msg, v1_finish = normalize_anthropic_response(resp)
+        v2 = normalize_anthropic_response_v2(resp)
+        assert v2.content == v1_msg.content
+        assert v2.content is None
+
+    def test_no_reasoning_details_means_none_provider_data(self):
+        resp = _response([_text_block("hi")])
+        v2 = normalize_anthropic_response_v2(resp)
+        assert v2.provider_data is None
+
+    def test_v2_returns_dataclass_not_namespace(self):
+        resp = _response([_text_block("hi")])
+        v2 = normalize_anthropic_response_v2(resp)
+        assert isinstance(v2, NormalizedResponse)
+        assert not isinstance(v2, SimpleNamespace)
diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py
index aea8152a53..4c775b8a6c 100644
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@@ -476,6 +476,133 @@ class TestGetTextAuxiliaryClient:
         assert isinstance(client, CodexAuxiliaryClient)
         assert model == "gpt-5.2-codex"
 
+
+class TestNousAuxiliaryRefresh:
+    def test_try_nous_prefers_runtime_credentials(self):
+        fresh_base = "https://inference-api.nousresearch.com/v1"
+        with (
+            patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "stale-token"}),
+            patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
+            patch("hermes_cli.models.get_nous_recommended_aux_model", return_value=None),
+            patch("agent.auxiliary_client.OpenAI") as mock_openai,
+        ):
+            from agent.auxiliary_client import _try_nous
+
+            mock_openai.return_value = MagicMock()
+            client, model = _try_nous()
+
+        assert client is not None
+        # No Portal recommendation → falls back to the hardcoded default.
+        assert model == "google/gemini-3-flash-preview"
+        assert mock_openai.call_args.kwargs["api_key"] == "fresh-agent-key"
+        assert mock_openai.call_args.kwargs["base_url"] == fresh_base
+
+    def test_try_nous_uses_portal_recommendation_for_text(self):
+        """When the Portal recommends a compaction model, _try_nous honors it."""
+        fresh_base = "https://inference-api.nousresearch.com/v1"
+        with (
+            patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "***"}),
+            patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
+            patch("hermes_cli.models.get_nous_recommended_aux_model", return_value="minimax/minimax-m2.7") as mock_rec,
+            patch("agent.auxiliary_client.OpenAI") as mock_openai,
+        ):
+            from agent.auxiliary_client import _try_nous
+
+            mock_openai.return_value = MagicMock()
+            client, model = _try_nous(vision=False)
+
+        assert client is not None
+        assert model == "minimax/minimax-m2.7"
+        assert mock_rec.call_args.kwargs["vision"] is False
+
+    def test_try_nous_uses_portal_recommendation_for_vision(self):
+        """Vision tasks should ask for the vision-specific recommendation."""
+        fresh_base = "https://inference-api.nousresearch.com/v1"
+        with (
+            patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "***"}),
+            patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
+            patch("hermes_cli.models.get_nous_recommended_aux_model", return_value="google/gemini-3-flash-preview") as mock_rec,
+            patch("agent.auxiliary_client.OpenAI"),
+        ):
+            from agent.auxiliary_client import _try_nous
+            client, model = _try_nous(vision=True)
+
+        assert client is not None
+        assert model == "google/gemini-3-flash-preview"
+        assert mock_rec.call_args.kwargs["vision"] is True
+
+    def test_try_nous_falls_back_when_recommendation_lookup_raises(self):
+        """If the Portal lookup throws, we must still return a usable model."""
+        fresh_base = "https://inference-api.nousresearch.com/v1"
+        with (
+            patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "***"}),
+            patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
+            patch("hermes_cli.models.get_nous_recommended_aux_model", side_effect=RuntimeError("portal down")),
+            patch("agent.auxiliary_client.OpenAI"),
+        ):
+            from agent.auxiliary_client import _try_nous
+            client, model = _try_nous()
+
+        assert client is not None
+        assert model == "google/gemini-3-flash-preview"
+
+    def test_call_llm_retries_nous_after_401(self):
+        class _Auth401(Exception):
+            status_code = 401
+
+        stale_client = MagicMock()
+        stale_client.base_url = "https://inference-api.nousresearch.com/v1"
+        stale_client.chat.completions.create.side_effect = _Auth401("stale nous key")
+
+        fresh_client = MagicMock()
+        fresh_client.base_url = "https://inference-api.nousresearch.com/v1"
+        fresh_client.chat.completions.create.return_value = {"ok": True}
+
+        with (
+            patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("nous", "nous-model", None, None, None)),
+            patch("agent.auxiliary_client._get_cached_client", return_value=(stale_client, "nous-model")),
+            patch("agent.auxiliary_client.OpenAI", return_value=fresh_client),
+            patch("agent.auxiliary_client._validate_llm_response", side_effect=lambda resp, _task: resp),
+            patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", "https://inference-api.nousresearch.com/v1")),
+        ):
+            result = call_llm(
+                task="compression",
+                messages=[{"role": "user", "content": "hi"}],
+            )
+
+        assert result == {"ok": True}
+        assert stale_client.chat.completions.create.call_count == 1
+        assert fresh_client.chat.completions.create.call_count == 1
+
+    @pytest.mark.asyncio
+    async def test_async_call_llm_retries_nous_after_401(self):
+        class _Auth401(Exception):
+            status_code = 401
+
+        stale_client = MagicMock()
+        stale_client.base_url = "https://inference-api.nousresearch.com/v1"
+        stale_client.chat.completions.create = AsyncMock(side_effect=_Auth401("stale nous key"))
+
+        fresh_async_client = MagicMock()
+        fresh_async_client.base_url = "https://inference-api.nousresearch.com/v1"
+        fresh_async_client.chat.completions.create = AsyncMock(return_value={"ok": True})
+
+        with (
+            patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("nous", "nous-model", None, None, None)),
+            patch("agent.auxiliary_client._get_cached_client", return_value=(stale_client, "nous-model")),
+            patch("agent.auxiliary_client._to_async_client", return_value=(fresh_async_client, "nous-model")),
+            patch("agent.auxiliary_client._validate_llm_response", side_effect=lambda resp, _task: resp),
+            patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", "https://inference-api.nousresearch.com/v1")),
+        ):
+            result = await async_call_llm(
+                task="session_search",
+                messages=[{"role": "user", "content": "hi"}],
+            )
+
+        assert result == {"ok": True}
+        assert stale_client.chat.completions.create.await_count == 1
+        assert fresh_async_client.chat.completions.create.await_count == 1
+
 # ── Payment / credit exhaustion fallback ─────────────────────────────────
 
 
@@ -696,27 +823,46 @@ class TestIsConnectionError:
         assert _is_connection_error(err) is False
 
 
-class TestKimiForCodingTemperature:
-    """Moonshot kimi-for-coding models require fixed temperatures.
+class TestKimiTemperatureOmitted:
+    """Kimi/Moonshot models should have temperature OMITTED from API kwargs.
 
-    k2.5 / k2-turbo-preview / k2-0905-preview → 0.6 (non-thinking lock).
-    k2-thinking / k2-thinking-turbo → 1.0 (thinking lock).
-    kimi-k2-instruct* and every other model preserve the caller's temperature.
+    The Kimi gateway selects the correct temperature server-side based on the
+    active mode (thinking → 1.0, non-thinking → 0.6).  Sending any temperature
+    value conflicts with gateway-managed defaults.
     """
 
-    def test_build_call_kwargs_forces_fixed_temperature(self):
+    @pytest.mark.parametrize(
+        "model",
+        [
+            "kimi-for-coding",
+            "kimi-k2.5",
+            "kimi-k2.6",
+            "kimi-k2-turbo-preview",
+            "kimi-k2-0905-preview",
+            "kimi-k2-thinking",
+            "kimi-k2-thinking-turbo",
+            "kimi-k2-instruct",
+            "kimi-k2-instruct-0905",
+            "moonshotai/kimi-k2.5",
+            "moonshotai/Kimi-K2-Thinking",
+            "moonshotai/Kimi-K2-Instruct",
+        ],
+    )
+    def test_kimi_models_omit_temperature(self, model):
+        """No kimi model should have a temperature key in kwargs."""
         from agent.auxiliary_client import _build_call_kwargs
 
         kwargs = _build_call_kwargs(
             provider="kimi-coding",
-            model="kimi-for-coding",
+            model=model,
             messages=[{"role": "user", "content": "hello"}],
             temperature=0.3,
         )
 
-        assert kwargs["temperature"] == 0.6
+        assert "temperature" not in kwargs
 
-    def test_build_call_kwargs_injects_temperature_when_missing(self):
+    def test_kimi_for_coding_no_temperature_when_none(self):
+        """When caller passes temperature=None, still no temperature key."""
         from agent.auxiliary_client import _build_call_kwargs
 
         kwargs = _build_call_kwargs(
@@ -726,9 +872,9 @@ class TestKimiForCodingTemperature:
             temperature=None,
         )
 
-        assert kwargs["temperature"] == 0.6
+        assert "temperature" not in kwargs
 
-    def test_auto_routed_kimi_for_coding_sync_call_uses_fixed_temperature(self):
+    def test_sync_call_omits_temperature(self):
         client = MagicMock()
         client.base_url = "https://api.kimi.com/coding/v1"
         response = MagicMock()
@@ -750,10 +896,10 @@ class TestKimiForCodingTemperature:
         assert result is response
         kwargs = client.chat.completions.create.call_args.kwargs
         assert kwargs["model"] == "kimi-for-coding"
-        assert kwargs["temperature"] == 0.6
+        assert "temperature" not in kwargs
 
     @pytest.mark.asyncio
-    async def test_auto_routed_kimi_for_coding_async_call_uses_fixed_temperature(self):
+    async def test_async_call_omits_temperature(self):
         client = MagicMock()
         client.base_url = "https://api.kimi.com/coding/v1"
         response = MagicMock()
@@ -775,52 +921,17 @@ class TestKimiForCodingTemperature:
         assert result is response
         kwargs = client.chat.completions.create.call_args.kwargs
         assert kwargs["model"] == "kimi-for-coding"
-        assert kwargs["temperature"] == 0.6
-
-    @pytest.mark.parametrize(
-        "model,expected",
-        [
-            ("kimi-k2.5", 0.6),
-            ("kimi-k2-turbo-preview", 0.6),
-            ("kimi-k2-0905-preview", 0.6),
-            ("kimi-k2-thinking", 1.0),
-            ("kimi-k2-thinking-turbo", 1.0),
-            ("moonshotai/kimi-k2.5", 0.6),
-            ("moonshotai/Kimi-K2-Thinking", 1.0),
-        ],
-    )
-    def test_kimi_k2_family_temperature_override(self, model, expected):
-        """Moonshot kimi-k2.* models only accept fixed temperatures.
-
-        Non-thinking models → 0.6, thinking-mode models → 1.0.
-        """
-        from agent.auxiliary_client import _build_call_kwargs
-
-        kwargs = _build_call_kwargs(
-            provider="kimi-coding",
-            model=model,
-            messages=[{"role": "user", "content": "hello"}],
-            temperature=0.3,
-        )
-
-        assert kwargs["temperature"] == expected
+        assert "temperature" not in kwargs
 
     @pytest.mark.parametrize(
         "model",
         [
             "anthropic/claude-sonnet-4-6",
             "gpt-5.4",
-            # kimi-k2-instruct is the non-coding K2 family — temperature is
-            # variable (recommended 0.6 but not enforced).  Must not clamp.
-            "kimi-k2-instruct",
-            "moonshotai/Kimi-K2-Instruct",
-            "moonshotai/Kimi-K2-Instruct-0905",
-            "kimi-k2-instruct-0905",
-            # Hypothetical future kimi name not in the whitelist.
-            "kimi-k2-experimental",
+            "deepseek-chat",
         ],
     )
-    def test_non_restricted_model_preserves_temperature(self, model):
+    def test_non_kimi_models_preserve_temperature(self, model):
         from agent.auxiliary_client import _build_call_kwargs
 
         kwargs = _build_call_kwargs(
@@ -832,6 +943,28 @@ class TestKimiForCodingTemperature:
 
         assert kwargs["temperature"] == 0.3
 
+    @pytest.mark.parametrize(
+        "base_url",
+        [
+            "https://api.moonshot.ai/v1",
+            "https://api.moonshot.cn/v1",
+            "https://api.kimi.com/coding/v1",
+        ],
+    )
+    def test_kimi_k2_5_omits_temperature_regardless_of_endpoint(self, base_url):
+        """Temperature is omitted regardless of which Kimi endpoint is used."""
+        from agent.auxiliary_client import _build_call_kwargs
+
+        kwargs = _build_call_kwargs(
+            provider="kimi-coding",
+            model="kimi-k2.5",
+            messages=[{"role": "user", "content": "hello"}],
+            temperature=0.1,
+            base_url=base_url,
+        )
+
+        assert "temperature" not in kwargs
+
 
 # ---------------------------------------------------------------------------
 # async_call_llm payment / connection fallback (#7512 bug 2)
@@ -858,6 +991,70 @@ class TestStaleBaseUrlWarning:
             "Expected a warning about stale OPENAI_BASE_URL"
         assert mod._stale_base_url_warned is True
 
+
+class TestAuxiliaryTaskExtraBody:
+    def test_sync_call_merges_task_extra_body_from_config(self):
+        client = MagicMock()
+        client.base_url = "https://api.example.com/v1"
+        response = MagicMock()
+        client.chat.completions.create.return_value = response
+
+        config = {
+            "auxiliary": {
+                "session_search": {
+                    "extra_body": {
+                        "enable_thinking": False,
+                        "reasoning": {"effort": "none"},
+                    }
+                }
+            }
+        }
+
+        with patch("hermes_cli.config.load_config", return_value=config), patch(
+            "agent.auxiliary_client._get_cached_client",
+            return_value=(client, "glm-4.5-air"),
+        ):
+            result = call_llm(
+                task="session_search",
+                messages=[{"role": "user", "content": "hello"}],
+                extra_body={"metadata": {"source": "test"}},
+            )
+
+        assert result is response
+        kwargs = client.chat.completions.create.call_args.kwargs
+        assert kwargs["extra_body"]["enable_thinking"] is False
+        assert kwargs["extra_body"]["reasoning"] == {"effort": "none"}
+        assert kwargs["extra_body"]["metadata"] == {"source": "test"}
+
+    @pytest.mark.asyncio
+    async def test_async_call_explicit_extra_body_overrides_task_config(self):
+        client = MagicMock()
+        client.base_url = "https://api.example.com/v1"
+        response = MagicMock()
+        client.chat.completions.create = AsyncMock(return_value=response)
+
+        config = {
+            "auxiliary": {
+                "session_search": {
+                    "extra_body": {"enable_thinking": False}
+                }
+            }
+        }
+
+        with patch("hermes_cli.config.load_config", return_value=config), patch(
+            "agent.auxiliary_client._get_cached_client",
+            return_value=(client, "glm-4.5-air"),
+        ):
+            result = await async_call_llm(
+                task="session_search",
+                messages=[{"role": "user", "content": "hello"}],
+                extra_body={"enable_thinking": True},
+            )
+
+        assert result is response
+        kwargs = client.chat.completions.create.call_args.kwargs
+        assert kwargs["extra_body"]["enable_thinking"] is True
+
     def test_no_warning_when_provider_is_custom(self, monkeypatch, caplog):
         """No warning when the provider is 'custom' — OPENAI_BASE_URL is expected."""
         import agent.auxiliary_client as mod
diff --git a/tests/agent/test_auxiliary_client_anthropic_custom.py b/tests/agent/test_auxiliary_client_anthropic_custom.py
new file mode 100644
index 0000000000..689a6c37ed
--- /dev/null
+++ b/tests/agent/test_auxiliary_client_anthropic_custom.py
@@ -0,0 +1,107 @@
+"""Tests for agent.auxiliary_client._try_custom_endpoint's anthropic_messages branch.
+
+When a user configures a custom endpoint with ``api_mode: anthropic_messages``
+(e.g. MiniMax, Zhipu GLM, LiteLLM in Anthropic-proxy mode), auxiliary tasks
+(compression, web_extract, session_search, title generation) must use the
+native Anthropic transport rather than being silently downgraded to an
+OpenAI-wire client that speaks the wrong protocol.
+"""
+
+from __future__ import annotations
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+
+@pytest.fixture(autouse=True)
+def _clean_env(monkeypatch):
+    for key in (
+        "OPENAI_API_KEY", "OPENAI_BASE_URL",
+        "ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN",
+    ):
+        monkeypatch.delenv(key, raising=False)
+
+
+def _install_anthropic_adapter_mocks():
+    """Patch build_anthropic_client so the test doesn't need the SDK."""
+    fake_client = MagicMock(name="anthropic_client")
+    return patch(
+        "agent.anthropic_adapter.build_anthropic_client",
+        return_value=fake_client,
+    ), fake_client
+
+
+def test_custom_endpoint_anthropic_messages_builds_anthropic_wrapper():
+    """api_mode=anthropic_messages → returns AnthropicAuxiliaryClient, not OpenAI."""
+    from agent.auxiliary_client import _try_custom_endpoint, AnthropicAuxiliaryClient
+
+    with patch(
+        "agent.auxiliary_client._resolve_custom_runtime",
+        return_value=(
+            "https://api.minimax.io/anthropic",
+            "minimax-key",
+            "anthropic_messages",
+        ),
+    ), patch(
+        "agent.auxiliary_client._read_main_model",
+        return_value="claude-sonnet-4-6",
+    ):
+        adapter_patch, fake_client = _install_anthropic_adapter_mocks()
+        with adapter_patch:
+            client, model = _try_custom_endpoint()
+
+    assert isinstance(client, AnthropicAuxiliaryClient), (
+        "Custom endpoint with api_mode=anthropic_messages must return the "
+        f"native Anthropic wrapper, got {type(client).__name__}"
+    )
+    assert model == "claude-sonnet-4-6"
+    # Wrapper should NOT be marked as OAuth — third-party endpoints are
+    # always API-key authenticated.
+    assert client.api_key == "minimax-key"
+    assert client.base_url == "https://api.minimax.io/anthropic"
+
+
+def test_custom_endpoint_anthropic_messages_falls_back_when_sdk_missing():
+    """Graceful degradation when anthropic SDK is unavailable."""
+    from agent.auxiliary_client import _try_custom_endpoint
+
+    import_error = ImportError("anthropic package not installed")
+
+    with patch(
+        "agent.auxiliary_client._resolve_custom_runtime",
+        return_value=("https://api.minimax.io/anthropic", "k", "anthropic_messages"),
+    ), patch(
+        "agent.auxiliary_client._read_main_model",
+        return_value="claude-sonnet-4-6",
+    ), patch(
+        "agent.anthropic_adapter.build_anthropic_client",
+        side_effect=import_error,
+    ):
+        client, model = _try_custom_endpoint()
+
+    # Should fall back to an OpenAI-wire client rather than returning
+    # (None, None) — the tool still needs to do *something*.
+    assert client is not None
+    assert model == "claude-sonnet-4-6"
+    # OpenAI client, not AnthropicAuxiliaryClient.
+    from agent.auxiliary_client import AnthropicAuxiliaryClient
+    assert not isinstance(client, AnthropicAuxiliaryClient)
+
+
+def test_custom_endpoint_chat_completions_still_uses_openai_wire():
+    """Regression: default path (no api_mode) must remain OpenAI client."""
+    from agent.auxiliary_client import _try_custom_endpoint, AnthropicAuxiliaryClient
+
+    with patch(
+        "agent.auxiliary_client._resolve_custom_runtime",
+        return_value=("https://api.example.com/v1", "key", None),
+    ), patch(
+        "agent.auxiliary_client._read_main_model",
+        return_value="my-model",
+    ):
+        client, model = _try_custom_endpoint()
+
+    assert client is not None
+    assert model == "my-model"
+    assert not isinstance(client, AnthropicAuxiliaryClient)
diff --git a/tests/agent/test_auxiliary_main_first.py b/tests/agent/test_auxiliary_main_first.py
index 353c6c2ddc..d756d6ffb1 100644
--- a/tests/agent/test_auxiliary_main_first.py
+++ b/tests/agent/test_auxiliary_main_first.py
@@ -167,7 +167,7 @@ class TestResolveAutoMainFirst:
 
 
 class TestResolveVisionMainFirst:
-    """Vision auto-detection prefers main provider + main model first."""
+    """Vision auto-detection prefers the main provider first."""
 
     def test_openrouter_main_vision_uses_main_model(self, monkeypatch):
         """OpenRouter main with vision-capable model → aux vision uses main model."""
@@ -200,28 +200,49 @@ class TestResolveVisionMainFirst:
         assert mock_resolve.call_args.args[0] == "openrouter"
         assert mock_resolve.call_args.args[1] == "anthropic/claude-sonnet-4.6"
 
-    def test_nous_main_vision_uses_main_model(self):
-        """Nous Portal main → aux vision uses main model, not free-tier MiMo-V2-Omni."""
+    def test_nous_main_vision_uses_paid_nous_vision_backend(self):
+        """Paid Nous main → aux vision uses the dedicated Nous vision backend."""
         with patch(
             "agent.auxiliary_client._read_main_provider", return_value="nous",
         ), patch(
             "agent.auxiliary_client._read_main_model",
             return_value="openai/gpt-5",
         ), patch(
-            "agent.auxiliary_client.resolve_provider_client"
-        ) as mock_resolve, patch(
             "agent.auxiliary_client._resolve_task_provider_model",
             return_value=("auto", None, None, None, None),
+        ), patch(
+            "agent.auxiliary_client._resolve_strict_vision_backend",
+            return_value=(MagicMock(), "google/gemini-3-flash-preview"),
         ):
-            mock_client = MagicMock()
-            mock_resolve.return_value = (mock_client, "openai/gpt-5")
-
             from agent.auxiliary_client import resolve_vision_provider_client
 
             provider, client, model = resolve_vision_provider_client()
 
         assert provider == "nous"
-        assert model == "openai/gpt-5"
+        assert client is not None
+        assert model == "google/gemini-3-flash-preview"
+
+    def test_nous_main_vision_uses_free_tier_nous_vision_backend(self):
+        """Free-tier Nous main → aux vision uses MiMo omni, not the text main model."""
+        with patch(
+            "agent.auxiliary_client._read_main_provider", return_value="nous",
+        ), patch(
+            "agent.auxiliary_client._read_main_model",
+            return_value="xiaomi/mimo-v2-pro",
+        ), patch(
+            "agent.auxiliary_client._resolve_task_provider_model",
+            return_value=("auto", None, None, None, None),
+        ), patch(
+            "agent.auxiliary_client._resolve_strict_vision_backend",
+            return_value=(MagicMock(), "xiaomi/mimo-v2-omni"),
+        ):
+            from agent.auxiliary_client import resolve_vision_provider_client
+
+            provider, client, model = resolve_vision_provider_client()
+
+        assert provider == "nous"
+        assert client is not None
+        assert model == "xiaomi/mimo-v2-omni"
 
     def test_exotic_provider_with_vision_override_preserved(self):
         """xiaomi → mimo-v2-omni override still wins over main_model."""
diff --git a/tests/agent/test_bedrock_integration.py b/tests/agent/test_bedrock_integration.py
index ba77d93614..202bd3ebdc 100644
--- a/tests/agent/test_bedrock_integration.py
+++ b/tests/agent/test_bedrock_integration.py
@@ -267,3 +267,174 @@ class TestPackaging:
         from pathlib import Path
         content = (Path(__file__).parent.parent.parent / "pyproject.toml").read_text()
         assert '"hermes-agent[bedrock]"' in content
+
+
+# ---------------------------------------------------------------------------
+# Model ID dot preservation — regression for #11976
+# ---------------------------------------------------------------------------
+# AWS Bedrock inference-profile model IDs embed structural dots:
+#
+#   global.anthropic.claude-opus-4-7
+#   us.anthropic.claude-sonnet-4-5-20250929-v1:0
+#   apac.anthropic.claude-haiku-4-5
+#
+# ``agent.anthropic_adapter.normalize_model_name`` converts dots to hyphens
+# unless the caller opts in via ``preserve_dots=True``.  Before this fix,
+# ``AIAgent._anthropic_preserve_dots`` returned False for the ``bedrock``
+# provider, so Claude-on-Bedrock requests went out with
+# ``global-anthropic-claude-opus-4-7`` (all dots mangled to hyphens) and
+# Bedrock rejected them with:
+#
+#   HTTP 400: The provided model identifier is invalid.
+#
+# The fix adds ``bedrock`` to the preserve-dots provider allowlist and
+# ``bedrock-runtime.`` to the base-URL heuristic, mirroring the shape of
+# the opencode-go fix for #5211 (commit f77be22c), which extended this
+# same allowlist.
+
+
+class TestBedrockPreserveDotsFlag:
+    """``AIAgent._anthropic_preserve_dots`` must return True on Bedrock so
+    inference-profile IDs survive the normalize step intact."""
+
+    def test_bedrock_provider_preserves_dots(self):
+        from types import SimpleNamespace
+        agent = SimpleNamespace(provider="bedrock", base_url="")
+        from run_agent import AIAgent
+        assert AIAgent._anthropic_preserve_dots(agent) is True
+
+    def test_bedrock_runtime_us_east_1_url_preserves_dots(self):
+        """Defense-in-depth: even without an explicit ``provider="bedrock"``,
+        a ``bedrock-runtime.us-east-1.amazonaws.com`` base URL must not
+        mangle dots."""
+        from types import SimpleNamespace
+        agent = SimpleNamespace(
+            provider="custom",
+            base_url="https://bedrock-runtime.us-east-1.amazonaws.com",
+        )
+        from run_agent import AIAgent
+        assert AIAgent._anthropic_preserve_dots(agent) is True
+
+    def test_bedrock_runtime_ap_northeast_2_url_preserves_dots(self):
+        """Reporter-reported region (ap-northeast-2) exercises the same
+        base-URL heuristic."""
+        from types import SimpleNamespace
+        agent = SimpleNamespace(
+            provider="custom",
+            base_url="https://bedrock-runtime.ap-northeast-2.amazonaws.com",
+        )
+        from run_agent import AIAgent
+        assert AIAgent._anthropic_preserve_dots(agent) is True
+
+    def test_non_bedrock_aws_url_does_not_preserve_dots(self):
+        """Unrelated AWS endpoints (e.g. ``s3.us-east-1.amazonaws.com``)
+        must not accidentally activate the dot-preservation heuristic —
+        the heuristic is scoped to the ``bedrock-runtime.`` substring
+        specifically."""
+        from types import SimpleNamespace
+        agent = SimpleNamespace(
+            provider="custom",
+            base_url="https://s3.us-east-1.amazonaws.com",
+        )
+        from run_agent import AIAgent
+        assert AIAgent._anthropic_preserve_dots(agent) is False
+
+    def test_anthropic_native_still_does_not_preserve_dots(self):
+        """Canary: adding Bedrock to the allowlist must not weaken the
+        existing Anthropic native behaviour — ``claude-sonnet-4.6`` still
+        becomes ``claude-sonnet-4-6`` for the Anthropic API."""
+        from types import SimpleNamespace
+        agent = SimpleNamespace(provider="anthropic", base_url="https://api.anthropic.com")
+        from run_agent import AIAgent
+        assert AIAgent._anthropic_preserve_dots(agent) is False
+
+
+class TestBedrockModelNameNormalization:
+    """End-to-end: ``normalize_model_name`` + the preserve-dots flag
+    reproduce the exact production request shape for each Bedrock model
+    family, confirming the fix resolves the reporter's HTTP 400."""
+
+    def test_global_anthropic_inference_profile_preserved(self):
+        """The reporter's exact model ID."""
+        from agent.anthropic_adapter import normalize_model_name
+        assert normalize_model_name(
+            "global.anthropic.claude-opus-4-7", preserve_dots=True
+        ) == "global.anthropic.claude-opus-4-7"
+
+    def test_us_anthropic_dated_inference_profile_preserved(self):
+        """Regional + dated Sonnet inference profile."""
+        from agent.anthropic_adapter import normalize_model_name
+        assert normalize_model_name(
+            "us.anthropic.claude-sonnet-4-5-20250929-v1:0",
+            preserve_dots=True,
+        ) == "us.anthropic.claude-sonnet-4-5-20250929-v1:0"
+
+    def test_apac_anthropic_haiku_inference_profile_preserved(self):
+        """APAC inference profile — same structural-dot shape."""
+        from agent.anthropic_adapter import normalize_model_name
+        assert normalize_model_name(
+            "apac.anthropic.claude-haiku-4-5", preserve_dots=True
+        ) == "apac.anthropic.claude-haiku-4-5"
+
+    def test_preserve_false_mangles_as_documented(self):
+        """Canary: with ``preserve_dots=False`` the function still
+        produces the broken all-hyphen form — this is the shape that
+        Bedrock rejected and that the fix avoids.  Keeping this test
+        locks in the existing behaviour of ``normalize_model_name`` so a
+        future refactor doesn't accidentally decouple the knob from its
+        effect."""
+        from agent.anthropic_adapter import normalize_model_name
+        assert normalize_model_name(
+            "global.anthropic.claude-opus-4-7", preserve_dots=False
+        ) == "global-anthropic-claude-opus-4-7"
+
+    def test_bare_foundation_model_id_preserved(self):
+        """Non-inference-profile Bedrock IDs
+        (e.g. ``anthropic.claude-3-5-sonnet-20241022-v2:0``) use dots as
+        vendor separators and must also survive intact under
+        ``preserve_dots=True``."""
+        from agent.anthropic_adapter import normalize_model_name
+        assert normalize_model_name(
+            "anthropic.claude-3-5-sonnet-20241022-v2:0",
+            preserve_dots=True,
+        ) == "anthropic.claude-3-5-sonnet-20241022-v2:0"
+
+
+class TestBedrockBuildAnthropicKwargsEndToEnd:
+    """Integration: calling ``build_anthropic_kwargs`` with a Bedrock-
+    shaped model ID and ``preserve_dots=True`` produces the unmangled
+    model string in the outgoing kwargs — the exact body sent to the
+    ``bedrock-runtime.`` endpoint.  This is the integration-level
+    regression for the reporter's HTTP 400."""
+
+    def test_bedrock_inference_profile_survives_build_kwargs(self):
+        from agent.anthropic_adapter import build_anthropic_kwargs
+        kwargs = build_anthropic_kwargs(
+            model="global.anthropic.claude-opus-4-7",
+            messages=[{"role": "user", "content": "hi"}],
+            tools=None,
+            max_tokens=1024,
+            reasoning_config=None,
+            preserve_dots=True,
+        )
+        assert kwargs["model"] == "global.anthropic.claude-opus-4-7", (
+            "Bedrock inference-profile ID was mangled in build_anthropic_kwargs: "
+            f"{kwargs['model']!r}"
+        )
+
+    def test_bedrock_model_mangled_without_preserve_dots(self):
+        """Inverse canary: without the flag, ``build_anthropic_kwargs``
+        still produces the broken form — so the fix in
+        ``_anthropic_preserve_dots`` is the load-bearing piece that
+        wires ``preserve_dots=True`` through to this builder for the
+        Bedrock case."""
+        from agent.anthropic_adapter import build_anthropic_kwargs
+        kwargs = build_anthropic_kwargs(
+            model="global.anthropic.claude-opus-4-7",
+            messages=[{"role": "user", "content": "hi"}],
+            tools=None,
+            max_tokens=1024,
+            reasoning_config=None,
+            preserve_dots=False,
+        )
+        assert kwargs["model"] == "global-anthropic-claude-opus-4-7"
diff --git a/tests/agent/test_codex_cloudflare_headers.py b/tests/agent/test_codex_cloudflare_headers.py
new file mode 100644
index 0000000000..6a343c8f84
--- /dev/null
+++ b/tests/agent/test_codex_cloudflare_headers.py
@@ -0,0 +1,253 @@
+"""Regression guard: Codex Cloudflare 403 mitigation headers.
+
+The ``chatgpt.com/backend-api/codex`` endpoint sits behind a Cloudflare layer
+that whitelists a small set of first-party originators (``codex_cli_rs``,
+``codex_vscode``, ``codex_sdk_ts``, ``Codex*``). Requests from non-residential
+IPs (VPS, always-on servers, some corporate egress) that don't advertise an
+allowed originator are served 403 with ``cf-mitigated: challenge`` regardless
+of auth correctness.
+
+``_codex_cloudflare_headers`` in ``agent.auxiliary_client`` centralizes the
+header set so the primary chat client (``run_agent.AIAgent.__init__`` +
+``_apply_client_headers_for_base_url``) and the auxiliary client paths
+(``_try_codex`` and the ``raw_codex`` branch of ``resolve_provider_client``)
+all emit the same headers.
+
+These tests pin:
+- the originator value (must be ``codex_cli_rs`` — the whitelisted one)
+- the User-Agent shape (codex_cli_rs-prefixed)
+- ``ChatGPT-Account-ID`` extraction from the OAuth JWT (canonical casing,
+  from codex-rs ``auth.rs``)
+- graceful handling of malformed tokens (drop the account-ID header, don't
+  raise)
+- primary-client wiring at both entry points in ``run_agent.py``
+- aux-client wiring at both entry points in ``agent/auxiliary_client.py``
+"""
+from __future__ import annotations
+
+import base64
+import json
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+def _make_codex_jwt(account_id: str = "acct-test-123") -> str:
+    """Build a syntactically valid Codex-style JWT with the account_id claim."""
+    def b64url(data: bytes) -> str:
+        return base64.urlsafe_b64encode(data).rstrip(b"=").decode()
+    header = b64url(b'{"alg":"RS256","typ":"JWT"}')
+    claims = {
+        "sub": "user-xyz",
+        "exp": 9999999999,
+        "https://api.openai.com/auth": {
+            "chatgpt_account_id": account_id,
+            "chatgpt_plan_type": "plus",
+        },
+    }
+    payload = b64url(json.dumps(claims).encode())
+    sig = b64url(b"fake-sig")
+    return f"{header}.{payload}.{sig}"
+
+
+# ---------------------------------------------------------------------------
+# _codex_cloudflare_headers — the shared helper
+# ---------------------------------------------------------------------------
+
+class TestCodexCloudflareHeaders:
+    def test_originator_is_codex_cli_rs(self):
+        """Cloudflare whitelists codex_cli_rs — any other value is 403'd."""
+        from agent.auxiliary_client import _codex_cloudflare_headers
+        headers = _codex_cloudflare_headers(_make_codex_jwt())
+        assert headers["originator"] == "codex_cli_rs"
+
+    def test_user_agent_advertises_codex_cli_rs(self):
+        from agent.auxiliary_client import _codex_cloudflare_headers
+        headers = _codex_cloudflare_headers(_make_codex_jwt())
+        assert headers["User-Agent"].startswith("codex_cli_rs/")
+
+    def test_account_id_extracted_from_jwt(self):
+        from agent.auxiliary_client import _codex_cloudflare_headers
+        headers = _codex_cloudflare_headers(_make_codex_jwt("acct-abc-999"))
+        # Canonical casing — matches codex-rs auth.rs
+        assert headers["ChatGPT-Account-ID"] == "acct-abc-999"
+
+    def test_canonical_header_casing(self):
+        """Upstream codex-rs uses PascalCase with trailing -ID. Match exactly."""
+        from agent.auxiliary_client import _codex_cloudflare_headers
+        headers = _codex_cloudflare_headers(_make_codex_jwt())
+        assert "ChatGPT-Account-ID" in headers
+        # The lowercase/titlecase variants MUST NOT be used — pin to be explicit
+        assert "chatgpt-account-id" not in headers
+        assert "ChatGPT-Account-Id" not in headers
+
+    def test_malformed_token_drops_account_id_without_raising(self):
+        from agent.auxiliary_client import _codex_cloudflare_headers
+        for bad in ["not-a-jwt", "", "only.one", "  ", "...."]:
+            headers = _codex_cloudflare_headers(bad)
+            # Still returns base headers — never raises
+            assert headers["originator"] == "codex_cli_rs"
+            assert "ChatGPT-Account-ID" not in headers
+
+    def test_non_string_token_handled(self):
+        from agent.auxiliary_client import _codex_cloudflare_headers
+        headers = _codex_cloudflare_headers(None)  # type: ignore[arg-type]
+        assert headers["originator"] == "codex_cli_rs"
+        assert "ChatGPT-Account-ID" not in headers
+
+    def test_jwt_without_chatgpt_account_id_claim(self):
+        """A valid JWT that lacks the account_id claim should still return headers."""
+        from agent.auxiliary_client import _codex_cloudflare_headers
+        import base64 as _b64, json as _json
+
+        def b64url(data: bytes) -> str:
+            return _b64.urlsafe_b64encode(data).rstrip(b"=").decode()
+        payload = b64url(_json.dumps({"sub": "user-xyz", "exp": 9999999999}).encode())
+        token = f"{b64url(b'{}')}.{payload}.{b64url(b'sig')}"
+        headers = _codex_cloudflare_headers(token)
+        assert headers["originator"] == "codex_cli_rs"
+        assert "ChatGPT-Account-ID" not in headers
+
+
+# ---------------------------------------------------------------------------
+# Primary chat client wiring (run_agent.AIAgent)
+# ---------------------------------------------------------------------------
+
+class TestPrimaryClientWiring:
+    def test_init_wires_codex_headers_for_chatgpt_base_url(self):
+        from run_agent import AIAgent
+        token = _make_codex_jwt("acct-primary-init")
+        with patch("run_agent.OpenAI") as mock_openai:
+            mock_openai.return_value = MagicMock()
+            AIAgent(
+                api_key=token,
+                base_url="https://chatgpt.com/backend-api/codex",
+                provider="openai-codex",
+                model="gpt-5.4",
+                quiet_mode=True,
+                skip_context_files=True,
+                skip_memory=True,
+            )
+            headers = mock_openai.call_args.kwargs.get("default_headers") or {}
+            assert headers.get("originator") == "codex_cli_rs"
+            assert headers.get("ChatGPT-Account-ID") == "acct-primary-init"
+            assert headers.get("User-Agent", "").startswith("codex_cli_rs/")
+
+    def test_apply_client_headers_on_base_url_change(self):
+        """Credential-rotation / base-url change path must also emit codex headers."""
+        from run_agent import AIAgent
+        token = _make_codex_jwt("acct-rotation")
+        with patch("run_agent.OpenAI") as mock_openai:
+            mock_openai.return_value = MagicMock()
+            agent = AIAgent(
+                api_key="placeholder-openrouter-key",
+                base_url="https://openrouter.ai/api/v1",
+                provider="openrouter",
+                model="anthropic/claude-sonnet-4.6",
+                quiet_mode=True,
+                skip_context_files=True,
+                skip_memory=True,
+            )
+            # Simulate rotation into a Codex credential
+            agent._client_kwargs["api_key"] = token
+            agent._apply_client_headers_for_base_url(
+                "https://chatgpt.com/backend-api/codex"
+            )
+            headers = agent._client_kwargs.get("default_headers") or {}
+            assert headers.get("originator") == "codex_cli_rs"
+            assert headers.get("ChatGPT-Account-ID") == "acct-rotation"
+            assert headers.get("User-Agent", "").startswith("codex_cli_rs/")
+
+    def test_apply_client_headers_clears_codex_headers_off_chatgpt(self):
+        """Switching AWAY from chatgpt.com must drop the codex headers."""
+        from run_agent import AIAgent
+        token = _make_codex_jwt()
+        with patch("run_agent.OpenAI") as mock_openai:
+            mock_openai.return_value = MagicMock()
+            agent = AIAgent(
+                api_key=token,
+                base_url="https://chatgpt.com/backend-api/codex",
+                provider="openai-codex",
+                model="gpt-5.4",
+                quiet_mode=True,
+                skip_context_files=True,
+                skip_memory=True,
+            )
+            # Sanity: headers are set initially
+            assert "originator" in (agent._client_kwargs.get("default_headers") or {})
+            agent._apply_client_headers_for_base_url(
+                "https://api.anthropic.com"
+            )
+            # default_headers should be popped for anthropic base
+            assert "default_headers" not in agent._client_kwargs
+
+    def test_openrouter_base_url_does_not_get_codex_headers(self):
+        from run_agent import AIAgent
+        with patch("run_agent.OpenAI") as mock_openai:
+            mock_openai.return_value = MagicMock()
+            AIAgent(
+                api_key="sk-or-test",
+                base_url="https://openrouter.ai/api/v1",
+                provider="openrouter",
+                model="anthropic/claude-sonnet-4.6",
+                quiet_mode=True,
+                skip_context_files=True,
+                skip_memory=True,
+            )
+            headers = mock_openai.call_args.kwargs.get("default_headers") or {}
+            assert headers.get("originator") != "codex_cli_rs"
+
+
+# ---------------------------------------------------------------------------
+# Auxiliary client wiring (agent.auxiliary_client)
+# ---------------------------------------------------------------------------
+
+class TestAuxiliaryClientWiring:
+    def test_try_codex_passes_codex_headers(self, monkeypatch):
+        """_try_codex builds the OpenAI client used for compression / vision /
+        title generation when routed through Codex. Must emit codex headers."""
+        from agent import auxiliary_client
+        token = _make_codex_jwt("acct-aux-try-codex")
+
+        # Force _select_pool_entry to return "no pool" so we fall through to
+        # _read_codex_access_token.
+        monkeypatch.setattr(
+            auxiliary_client, "_select_pool_entry",
+            lambda provider: (False, None),
+        )
+        monkeypatch.setattr(
+            auxiliary_client, "_read_codex_access_token",
+            lambda: token,
+        )
+        with patch("agent.auxiliary_client.OpenAI") as mock_openai:
+            mock_openai.return_value = MagicMock()
+            client, model = auxiliary_client._try_codex()
+            assert client is not None
+            headers = mock_openai.call_args.kwargs.get("default_headers") or {}
+            assert headers.get("originator") == "codex_cli_rs"
+            assert headers.get("ChatGPT-Account-ID") == "acct-aux-try-codex"
+            assert headers.get("User-Agent", "").startswith("codex_cli_rs/")
+
+    def test_resolve_provider_client_raw_codex_passes_codex_headers(self, monkeypatch):
+        """The ``raw_codex=True`` branch (used by the main agent loop for direct
+        responses.stream() access) must also emit codex headers."""
+        from agent import auxiliary_client
+        token = _make_codex_jwt("acct-aux-raw-codex")
+        monkeypatch.setattr(
+            auxiliary_client, "_read_codex_access_token",
+            lambda: token,
+        )
+        with patch("agent.auxiliary_client.OpenAI") as mock_openai:
+            mock_openai.return_value = MagicMock()
+            client, model = auxiliary_client.resolve_provider_client(
+                "openai-codex", raw_codex=True,
+            )
+            assert client is not None
+            headers = mock_openai.call_args.kwargs.get("default_headers") or {}
+            assert headers.get("originator") == "codex_cli_rs"
+            assert headers.get("ChatGPT-Account-ID") == "acct-aux-raw-codex"
+            assert headers.get("User-Agent", "").startswith("codex_cli_rs/")
diff --git a/tests/agent/test_context_references.py b/tests/agent/test_context_references.py
index ea5579c568..02456d0649 100644
--- a/tests/agent/test_context_references.py
+++ b/tests/agent/test_context_references.py
@@ -3,6 +3,7 @@ from __future__ import annotations
 import asyncio
 import subprocess
 from pathlib import Path
+from unittest.mock import patch
 
 import pytest
 
@@ -124,6 +125,31 @@ def test_expand_file_range_and_folder_listing(sample_repo: Path):
     assert not result.warnings
 
 
+def test_folder_listing_falls_back_when_rg_is_blocked(sample_repo: Path):
+    from agent.context_references import preprocess_context_references
+
+    real_run = subprocess.run
+
+    def blocked_rg(*args, **kwargs):
+        cmd = args[0] if args else kwargs.get("args")
+        if isinstance(cmd, list) and cmd and cmd[0] == "rg":
+            raise PermissionError("rg blocked by policy")
+        return real_run(*args, **kwargs)
+
+    with patch("agent.context_references.subprocess.run", side_effect=blocked_rg):
+        result = preprocess_context_references(
+            "Review @folder:src/",
+            cwd=sample_repo,
+            context_length=100_000,
+        )
+
+    assert result.expanded
+    assert "src/" in result.message
+    assert "main.py" in result.message
+    assert "helper.py" in result.message
+    assert not result.warnings
+
+
 def test_expand_quoted_file_reference_with_spaces(tmp_path: Path):
     from agent.context_references import preprocess_context_references
 
diff --git a/tests/agent/test_copilot_acp_client.py b/tests/agent/test_copilot_acp_client.py
new file mode 100644
index 0000000000..52ad20a350
--- /dev/null
+++ b/tests/agent/test_copilot_acp_client.py
@@ -0,0 +1,146 @@
+"""Focused regressions for the Copilot ACP shim safety layer."""
+
+from __future__ import annotations
+
+import io
+import json
+import os
+import tempfile
+import unittest
+from pathlib import Path
+from unittest.mock import patch
+
+from agent.copilot_acp_client import CopilotACPClient
+
+
+class _FakeProcess:
+    def __init__(self) -> None:
+        self.stdin = io.StringIO()
+
+
+class CopilotACPClientSafetyTests(unittest.TestCase):
+    def setUp(self) -> None:
+        self.client = CopilotACPClient(acp_cwd="/tmp")
+
+    def _dispatch(self, message: dict, *, cwd: str) -> dict:
+        process = _FakeProcess()
+        handled = self.client._handle_server_message(
+            message,
+            process=process,
+            cwd=cwd,
+            text_parts=[],
+            reasoning_parts=[],
+        )
+        self.assertTrue(handled)
+        payload = process.stdin.getvalue().strip()
+        self.assertTrue(payload)
+        return json.loads(payload)
+
+    def test_request_permission_is_not_auto_allowed(self) -> None:
+        response = self._dispatch(
+            {
+                "jsonrpc": "2.0",
+                "id": 1,
+                "method": "session/request_permission",
+                "params": {},
+            },
+            cwd="/tmp",
+        )
+
+        outcome = (((response.get("result") or {}).get("outcome") or {}).get("outcome"))
+        self.assertEqual(outcome, "cancelled")
+
+    def test_read_text_file_blocks_internal_hermes_hub_files(self) -> None:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            home = Path(tmpdir) / "home"
+            blocked = home / ".hermes" / "skills" / ".hub" / "index-cache" / "entry.json"
+            blocked.parent.mkdir(parents=True, exist_ok=True)
+            blocked.write_text('{"token":"sk-test-secret-1234567890"}')
+
+            with patch.dict(
+                os.environ,
+                {"HOME": str(home), "HERMES_HOME": str(home / ".hermes")},
+                clear=False,
+            ):
+                response = self._dispatch(
+                    {
+                        "jsonrpc": "2.0",
+                        "id": 2,
+                        "method": "fs/read_text_file",
+                        "params": {"path": str(blocked)},
+                    },
+                    cwd=str(home),
+                )
+
+        self.assertIn("error", response)
+
+    def test_read_text_file_redacts_sensitive_content(self) -> None:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            root = Path(tmpdir)
+            secret_file = root / "config.env"
+            secret_file.write_text("OPENAI_API_KEY=sk-proj-abc123def456ghi789jkl012")
+
+            response = self._dispatch(
+                {
+                    "jsonrpc": "2.0",
+                    "id": 3,
+                    "method": "fs/read_text_file",
+                    "params": {"path": str(secret_file)},
+                },
+                cwd=str(root),
+            )
+
+        content = ((response.get("result") or {}).get("content") or "")
+        self.assertNotIn("abc123def456", content)
+        self.assertIn("OPENAI_API_KEY=", content)
+
+    def test_write_text_file_reuses_write_denylist(self) -> None:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            home = Path(tmpdir) / "home"
+            target = home / ".ssh" / "id_rsa"
+            target.parent.mkdir(parents=True, exist_ok=True)
+
+            with patch("agent.copilot_acp_client.is_write_denied", return_value=True, create=True):
+                response = self._dispatch(
+                    {
+                        "jsonrpc": "2.0",
+                        "id": 4,
+                        "method": "fs/write_text_file",
+                        "params": {
+                            "path": str(target),
+                            "content": "fake-private-key",
+                        },
+                    },
+                    cwd=str(home),
+                )
+
+        self.assertIn("error", response)
+        self.assertFalse(target.exists())
+
+    def test_write_text_file_respects_safe_root(self) -> None:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            root = Path(tmpdir)
+            safe_root = root / "workspace"
+            safe_root.mkdir()
+            outside = root / "outside.txt"
+
+            with patch.dict(os.environ, {"HERMES_WRITE_SAFE_ROOT": str(safe_root)}, clear=False):
+                response = self._dispatch(
+                    {
+                        "jsonrpc": "2.0",
+                        "id": 5,
+                        "method": "fs/write_text_file",
+                        "params": {
+                            "path": str(outside),
+                            "content": "should-not-write",
+                        },
+                    },
+                    cwd=str(root),
+                )
+
+        self.assertIn("error", response)
+        self.assertFalse(outside.exists())
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/agent/test_credential_pool_routing.py b/tests/agent/test_credential_pool_routing.py
index 38f5c6dfd0..8477fdb646 100644
--- a/tests/agent/test_credential_pool_routing.py
+++ b/tests/agent/test_credential_pool_routing.py
@@ -1,129 +1,25 @@
-"""Tests for credential pool preservation through smart routing and 429 recovery.
+"""Tests for credential pool preservation through turn config and 429 recovery.
 
 Covers:
-1. credential_pool flows through resolve_turn_route (no-route and fallback paths)
-2. CLI _resolve_turn_agent_config passes credential_pool to primary dict
-3. Gateway _resolve_turn_agent_config passes credential_pool to primary dict
-4. Eager fallback deferred when credential pool has credentials
-5. Eager fallback fires when no credential pool exists
-6. Full 429 rotation cycle: retry-same → rotate → exhaust → fallback
+1. CLI _resolve_turn_agent_config passes credential_pool to runtime dict
+2. Gateway _resolve_turn_agent_config passes credential_pool to runtime dict
+3. Eager fallback deferred when credential pool has credentials
+4. Eager fallback fires when no credential pool exists
+5. Full 429 rotation cycle: retry-same → rotate → exhaust → fallback
 """
 
-import os
-import time
 from types import SimpleNamespace
-from unittest.mock import MagicMock, patch, PropertyMock
-
-import pytest
+from unittest.mock import MagicMock, patch
 
 
 # ---------------------------------------------------------------------------
-# 1. smart_model_routing: credential_pool preserved in no-route path
-# ---------------------------------------------------------------------------
-
-class TestSmartRoutingPoolPreservation:
-    def test_no_route_preserves_credential_pool(self):
-        from agent.smart_model_routing import resolve_turn_route
-
-        fake_pool = MagicMock(name="CredentialPool")
-        primary = {
-            "model": "gpt-5.4",
-            "api_key": "sk-test",
-            "base_url": None,
-            "provider": "openai-codex",
-            "api_mode": "codex_responses",
-            "command": None,
-            "args": [],
-            "credential_pool": fake_pool,
-        }
-        # routing disabled
-        result = resolve_turn_route("hello", None, primary)
-        assert result["runtime"]["credential_pool"] is fake_pool
-
-    def test_no_route_none_pool(self):
-        from agent.smart_model_routing import resolve_turn_route
-
-        primary = {
-            "model": "gpt-5.4",
-            "api_key": "sk-test",
-            "base_url": None,
-            "provider": "openai-codex",
-            "api_mode": "codex_responses",
-            "command": None,
-            "args": [],
-        }
-        result = resolve_turn_route("hello", None, primary)
-        assert result["runtime"]["credential_pool"] is None
-
-    def test_routing_disabled_preserves_pool(self):
-        from agent.smart_model_routing import resolve_turn_route
-
-        fake_pool = MagicMock(name="CredentialPool")
-        primary = {
-            "model": "gpt-5.4",
-            "api_key": "sk-test",
-            "base_url": None,
-            "provider": "openai-codex",
-            "api_mode": "codex_responses",
-            "command": None,
-            "args": [],
-            "credential_pool": fake_pool,
-        }
-        # routing explicitly disabled
-        result = resolve_turn_route("hello", {"enabled": False}, primary)
-        assert result["runtime"]["credential_pool"] is fake_pool
-
-    def test_route_fallback_on_resolve_error_preserves_pool(self, monkeypatch):
-        """When smart routing picks a cheap model but resolve_runtime_provider
-        fails, the fallback to primary must still include credential_pool."""
-        from agent.smart_model_routing import resolve_turn_route
-
-        fake_pool = MagicMock(name="CredentialPool")
-        primary = {
-            "model": "gpt-5.4",
-            "api_key": "sk-test",
-            "base_url": None,
-            "provider": "openai-codex",
-            "api_mode": "codex_responses",
-            "command": None,
-            "args": [],
-            "credential_pool": fake_pool,
-        }
-        routing_config = {
-            "enabled": True,
-            "cheap_model": "openai/gpt-4.1-mini",
-            "cheap_provider": "openrouter",
-            "max_tokens": 200,
-            "patterns": ["^(hi|hello|hey)"],
-        }
-        # Force resolve_runtime_provider to fail so it falls back to primary
-        monkeypatch.setattr(
-            "hermes_cli.runtime_provider.resolve_runtime_provider",
-            MagicMock(side_effect=RuntimeError("no credentials")),
-        )
-        result = resolve_turn_route("hi", routing_config, primary)
-        assert result["runtime"]["credential_pool"] is fake_pool
-
-
-# ---------------------------------------------------------------------------
-# 2 & 3. CLI and Gateway _resolve_turn_agent_config include credential_pool
+# 1. CLI _resolve_turn_agent_config includes credential_pool
 # ---------------------------------------------------------------------------
 
 class TestCliTurnRoutePool:
-    def test_resolve_turn_includes_pool(self, monkeypatch, tmp_path):
-        """CLI's _resolve_turn_agent_config must pass credential_pool to primary."""
-        from agent.smart_model_routing import resolve_turn_route
-        captured = {}
-
-        def spy_resolve(user_message, routing_config, primary):
-            captured["primary"] = primary
-            return resolve_turn_route(user_message, routing_config, primary)
-
-        monkeypatch.setattr(
-            "agent.smart_model_routing.resolve_turn_route", spy_resolve
-        )
-
-        # Build a minimal HermesCLI-like object with the method
+    def test_resolve_turn_includes_pool(self):
+        """CLI's _resolve_turn_agent_config must pass credential_pool in runtime."""
+        fake_pool = MagicMock(name="FakePool")
         shell = SimpleNamespace(
             model="gpt-5.4",
             api_key="sk-test",
@@ -132,58 +28,46 @@ class TestCliTurnRoutePool:
             api_mode="codex_responses",
             acp_command=None,
             acp_args=[],
-            _credential_pool=MagicMock(name="FakePool"),
-            _smart_model_routing={"enabled": False},
+            _credential_pool=fake_pool,
+            service_tier=None,
         )
 
-        # Import and bind the real method
         from cli import HermesCLI
         bound = HermesCLI._resolve_turn_agent_config.__get__(shell)
-        bound("test message")
+        route = bound("test message")
 
-        assert "credential_pool" in captured["primary"]
-        assert captured["primary"]["credential_pool"] is shell._credential_pool
+        assert route["runtime"]["credential_pool"] is fake_pool
 
 
+# ---------------------------------------------------------------------------
+# 2. Gateway _resolve_turn_agent_config includes credential_pool
+# ---------------------------------------------------------------------------
+
 class TestGatewayTurnRoutePool:
-    def test_resolve_turn_includes_pool(self, monkeypatch):
+    def test_resolve_turn_includes_pool(self):
         """Gateway's _resolve_turn_agent_config must pass credential_pool."""
-        from agent.smart_model_routing import resolve_turn_route
-        captured = {}
-
-        def spy_resolve(user_message, routing_config, primary):
-            captured["primary"] = primary
-            return resolve_turn_route(user_message, routing_config, primary)
-
-        monkeypatch.setattr(
-            "agent.smart_model_routing.resolve_turn_route", spy_resolve
-        )
-
         from gateway.run import GatewayRunner
 
-        runner = SimpleNamespace(
-            _smart_model_routing={"enabled": False},
-        )
-
+        fake_pool = MagicMock(name="FakePool")
+        runner = SimpleNamespace(_service_tier=None)
         runtime_kwargs = {
-            "api_key": "sk-test",
+            "api_key": "***",
             "base_url": None,
             "provider": "openai-codex",
             "api_mode": "codex_responses",
             "command": None,
             "args": [],
-            "credential_pool": MagicMock(name="FakePool"),
+            "credential_pool": fake_pool,
         }
 
         bound = GatewayRunner._resolve_turn_agent_config.__get__(runner)
-        bound("test message", "gpt-5.4", runtime_kwargs)
+        route = bound("test message", "gpt-5.4", runtime_kwargs)
 
-        assert "credential_pool" in captured["primary"]
-        assert captured["primary"]["credential_pool"] is runtime_kwargs["credential_pool"]
+        assert route["runtime"]["credential_pool"] is fake_pool
 
 
 # ---------------------------------------------------------------------------
-# 4 & 5. Eager fallback deferred/fires based on credential pool
+# 3 & 4. Eager fallback deferred/fires based on credential pool
 # ---------------------------------------------------------------------------
 
 class TestEagerFallbackWithPool:
@@ -251,7 +135,7 @@ class TestEagerFallbackWithPool:
 
 
 # ---------------------------------------------------------------------------
-# 6. Full 429 rotation cycle via _recover_with_credential_pool
+# 5. Full 429 rotation cycle via _recover_with_credential_pool
 # ---------------------------------------------------------------------------
 
 class TestPoolRotationCycle:
diff --git a/tests/agent/test_direct_provider_url_detection.py b/tests/agent/test_direct_provider_url_detection.py
new file mode 100644
index 0000000000..ed5dfab159
--- /dev/null
+++ b/tests/agent/test_direct_provider_url_detection.py
@@ -0,0 +1,27 @@
+from __future__ import annotations
+
+from run_agent import AIAgent
+
+
+def _agent_with_base_url(base_url: str) -> AIAgent:
+    agent = object.__new__(AIAgent)
+    agent.base_url = base_url
+    return agent
+
+
+def test_direct_openai_url_requires_openai_host():
+    agent = _agent_with_base_url("https://api.openai.com.example/v1")
+
+    assert agent._is_direct_openai_url() is False
+
+
+def test_direct_openai_url_ignores_path_segment_match():
+    agent = _agent_with_base_url("https://proxy.example.test/api.openai.com/v1")
+
+    assert agent._is_direct_openai_url() is False
+
+
+def test_direct_openai_url_accepts_native_host():
+    agent = _agent_with_base_url("https://api.openai.com/v1")
+
+    assert agent._is_direct_openai_url() is True
diff --git a/tests/agent/test_display.py b/tests/agent/test_display.py
index 5127a930ba..4c1309a44c 100644
--- a/tests/agent/test_display.py
+++ b/tests/agent/test_display.py
@@ -83,6 +83,13 @@ class TestBuildToolPreview:
         assert result is not None
         assert "user" in result
 
+    def test_memory_replace_missing_old_text_marked(self):
+        # Avoid empty quotes "" in the preview when old_text is missing/None.
+        result = build_tool_preview("memory", {"action": "replace", "target": "memory"})
+        assert result == '~memory: "<missing old_text>"'
+        result = build_tool_preview("memory", {"action": "remove", "target": "memory", "old_text": None})
+        assert result == '-memory: "<missing old_text>"'
+
     def test_session_search_preview(self):
         result = build_tool_preview("session_search", {"query": "find something"})
         assert result is not None
diff --git a/tests/agent/test_error_classifier.py b/tests/agent/test_error_classifier.py
index 766c5475f8..44e7059a9b 100644
--- a/tests/agent/test_error_classifier.py
+++ b/tests/agent/test_error_classifier.py
@@ -298,9 +298,15 @@ class TestClassifyApiError:
         assert result.retryable is False
 
     def test_404_generic(self):
+        # Generic 404 with no "model not found" signal — common for local
+        # llama.cpp/Ollama/vLLM endpoints with slightly wrong paths.  Treat
+        # as unknown (retryable) so the real error surfaces, rather than
+        # claiming the model is missing and silently falling back.
         e = MockAPIError("Not Found", status_code=404)
         result = classify_api_error(e)
-        assert result.reason == FailoverReason.model_not_found
+        assert result.reason == FailoverReason.unknown
+        assert result.retryable is True
+        assert result.should_fallback is False
 
     # ── Payload too large ──
 
@@ -849,3 +855,97 @@ class TestAdversarialEdgeCases:
         )
         result = classify_api_error(e, provider="openrouter")
         assert result.reason == FailoverReason.model_not_found
+
+    # ── Regression: dict-typed message field (Issue #11233) ──
+
+    def test_pydantic_dict_message_no_crash(self):
+        """Pydantic validation errors return message as dict, not string.
+
+        Regression: classify_api_error must not crash when body['message']
+        is a dict (e.g. {"detail": [...]} from FastAPI/Pydantic). The
+        'or ""' fallback only handles None/falsy values — a non-empty
+        dict is truthy and passed to .lower(), causing AttributeError.
+        """
+        e = MockAPIError(
+            "Unprocessable Entity",
+            status_code=422,
+            body={
+                "object": "error",
+                "message": {
+                    "detail": [
+                        {
+                            "type": "extra_forbidden",
+                            "loc": ["body", "think"],
+                            "msg": "Extra inputs are not permitted",
+                        }
+                    ]
+                },
+            },
+        )
+        result = classify_api_error(e)
+        assert result.reason == FailoverReason.format_error
+        assert result.status_code == 422
+        assert result.retryable is False
+
+    def test_nested_error_dict_message_no_crash(self):
+        """Nested body['error']['message'] as dict must not crash.
+
+        Some providers wrap Pydantic errors in an 'error' object.
+        """
+        e = MockAPIError(
+            "Validation error",
+            status_code=400,
+            body={
+                "error": {
+                    "message": {
+                        "detail": [
+                            {"type": "missing", "loc": ["body", "required"]}
+                        ]
+                    }
+                }
+            },
+        )
+        result = classify_api_error(e, approx_tokens=1000)
+        assert result.reason == FailoverReason.format_error
+        assert result.status_code == 400
+
+    def test_metadata_raw_dict_message_no_crash(self):
+        """OpenRouter metadata.raw with dict message must not crash."""
+        e = MockAPIError(
+            "Provider error",
+            status_code=400,
+            body={
+                "error": {
+                    "message": "Provider error",
+                    "metadata": {
+                        "raw": '{"error":{"message":{"detail":[{"type":"invalid"}]}}}'
+                    }
+                }
+            },
+        )
+        result = classify_api_error(e)
+        assert result.reason == FailoverReason.format_error
+
+    # Broader non-string type guards — defense against other provider quirks.
+
+    def test_list_message_no_crash(self):
+        """Some providers return message as a list of error entries."""
+        e = MockAPIError(
+            "validation",
+            status_code=400,
+            body={"message": [{"msg": "field required"}]},
+        )
+        result = classify_api_error(e)
+        assert result is not None
+
+    def test_int_message_no_crash(self):
+        """Any non-string type must be coerced safely."""
+        e = MockAPIError("server error", status_code=500, body={"message": 42})
+        result = classify_api_error(e)
+        assert result is not None
+
+    def test_none_message_still_works(self):
+        """Regression: None fallback (the 'or \"\"' path) must still work."""
+        e = MockAPIError("server error", status_code=500, body={"message": None})
+        result = classify_api_error(e)
+        assert result is not None
diff --git a/tests/agent/test_gemini_cloudcode.py b/tests/agent/test_gemini_cloudcode.py
index c9d2b87df8..dc2b1b1531 100644
--- a/tests/agent/test_gemini_cloudcode.py
+++ b/tests/agent/test_gemini_cloudcode.py
@@ -652,6 +652,42 @@ class TestBuildGeminiRequest:
         assert decls[0]["description"] == "foo"
         assert decls[0]["parameters"] == {"type": "object"}
 
+    def test_tools_strip_json_schema_only_fields_from_parameters(self):
+        from agent.gemini_cloudcode_adapter import build_gemini_request
+
+        req = build_gemini_request(
+            messages=[{"role": "user", "content": "hi"}],
+            tools=[
+                {"type": "function", "function": {
+                    "name": "fn1",
+                    "description": "foo",
+                    "parameters": {
+                        "$schema": "https://json-schema.org/draft/2020-12/schema",
+                        "type": "object",
+                        "additionalProperties": False,
+                        "properties": {
+                            "city": {
+                                "type": "string",
+                                "$schema": "ignored",
+                                "description": "City name",
+                                "additionalProperties": False,
+                            }
+                        },
+                        "required": ["city"],
+                    },
+                }},
+            ],
+        )
+        params = req["tools"][0]["functionDeclarations"][0]["parameters"]
+        assert "$schema" not in params
+        assert "additionalProperties" not in params
+        assert params["type"] == "object"
+        assert params["required"] == ["city"]
+        assert params["properties"]["city"] == {
+            "type": "string",
+            "description": "City name",
+        }
+
     def test_tool_choice_auto(self):
         from agent.gemini_cloudcode_adapter import build_gemini_request
 
@@ -814,6 +850,69 @@ class TestTranslateGeminiResponse:
         assert _map_gemini_finish_reason("RECITATION") == "content_filter"
 
 
+class TestTranslateStreamEvent:
+    def test_parallel_calls_to_same_tool_get_unique_indices(self):
+        """Gemini may emit several functionCall parts with the same name in a
+        single turn (e.g. parallel file reads). Each must get its own OpenAI
+        ``index`` — otherwise downstream aggregators collapse them into one.
+        """
+        from agent.gemini_cloudcode_adapter import _translate_stream_event
+
+        event = {
+            "response": {
+                "candidates": [{
+                    "content": {"parts": [
+                        {"functionCall": {"name": "read_file", "args": {"path": "a"}}},
+                        {"functionCall": {"name": "read_file", "args": {"path": "b"}}},
+                        {"functionCall": {"name": "read_file", "args": {"path": "c"}}},
+                    ]},
+                }],
+            }
+        }
+        counter = [0]
+        chunks = _translate_stream_event(event, model="gemini-2.5-flash",
+                                         tool_call_counter=counter)
+        indices = [c.choices[0].delta.tool_calls[0].index for c in chunks]
+        assert indices == [0, 1, 2]
+        assert counter[0] == 3
+
+    def test_counter_persists_across_events(self):
+        """Index assignment must continue across SSE events in the same stream."""
+        from agent.gemini_cloudcode_adapter import _translate_stream_event
+
+        def _event(name):
+            return {"response": {"candidates": [{
+                "content": {"parts": [{"functionCall": {"name": name, "args": {}}}]},
+            }]}}
+
+        counter = [0]
+        chunks_a = _translate_stream_event(_event("foo"), model="m", tool_call_counter=counter)
+        chunks_b = _translate_stream_event(_event("bar"), model="m", tool_call_counter=counter)
+        chunks_c = _translate_stream_event(_event("foo"), model="m", tool_call_counter=counter)
+
+        assert chunks_a[0].choices[0].delta.tool_calls[0].index == 0
+        assert chunks_b[0].choices[0].delta.tool_calls[0].index == 1
+        assert chunks_c[0].choices[0].delta.tool_calls[0].index == 2
+
+    def test_finish_reason_switches_to_tool_calls_when_any_seen(self):
+        from agent.gemini_cloudcode_adapter import _translate_stream_event
+
+        counter = [0]
+        # First event emits one tool call.
+        _translate_stream_event(
+            {"response": {"candidates": [{
+                "content": {"parts": [{"functionCall": {"name": "x", "args": {}}}]},
+            }]}},
+            model="m", tool_call_counter=counter,
+        )
+        # Second event carries only the terminal finishReason.
+        chunks = _translate_stream_event(
+            {"response": {"candidates": [{"finishReason": "STOP"}]}},
+            model="m", tool_call_counter=counter,
+        )
+        assert chunks[-1].choices[0].finish_reason == "tool_calls"
+
+
 class TestGeminiCloudCodeClient:
     def test_client_exposes_openai_interface(self):
         from agent.gemini_cloudcode_adapter import GeminiCloudCodeClient
diff --git a/tests/agent/test_gemini_native_adapter.py b/tests/agent/test_gemini_native_adapter.py
new file mode 100644
index 0000000000..a36b1e71c1
--- /dev/null
+++ b/tests/agent/test_gemini_native_adapter.py
@@ -0,0 +1,315 @@
+"""Tests for the native Google AI Studio Gemini adapter."""
+
+from __future__ import annotations
+
+import json
+from types import SimpleNamespace
+
+import pytest
+
+
+class DummyResponse:
+    def __init__(self, status_code=200, payload=None, headers=None, text=None):
+        self.status_code = status_code
+        self._payload = payload or {}
+        self.headers = headers or {}
+        self.text = text if text is not None else json.dumps(self._payload)
+
+    def json(self):
+        return self._payload
+
+
+def test_build_native_request_preserves_thought_signature_on_tool_replay():
+    from agent.gemini_native_adapter import build_gemini_request
+
+    request = build_gemini_request(
+        messages=[
+            {"role": "system", "content": "Be helpful."},
+            {
+                "role": "assistant",
+                "content": "",
+                "tool_calls": [
+                    {
+                        "id": "call_1",
+                        "type": "function",
+                        "function": {
+                            "name": "get_weather",
+                            "arguments": '{"city": "Paris"}',
+                        },
+                        "extra_content": {
+                            "google": {"thought_signature": "sig-123"}
+                        },
+                    }
+                ],
+            },
+        ],
+        tools=[],
+        tool_choice=None,
+    )
+
+    parts = request["contents"][0]["parts"]
+    assert parts[0]["functionCall"]["name"] == "get_weather"
+    assert parts[0]["thoughtSignature"] == "sig-123"
+
+
+def test_build_native_request_uses_original_function_name_for_tool_result():
+    from agent.gemini_native_adapter import build_gemini_request
+
+    request = build_gemini_request(
+        messages=[
+            {
+                "role": "assistant",
+                "content": "",
+                "tool_calls": [
+                    {
+                        "id": "call_1",
+                        "type": "function",
+                        "function": {
+                            "name": "get_weather",
+                            "arguments": '{"city": "Paris"}',
+                        },
+                    }
+                ],
+            },
+            {
+                "role": "tool",
+                "tool_call_id": "call_1",
+                "content": '{"forecast": "sunny"}',
+            },
+        ],
+        tools=[],
+        tool_choice=None,
+    )
+
+    tool_response = request["contents"][1]["parts"][0]["functionResponse"]
+    assert tool_response["name"] == "get_weather"
+
+
+def test_build_native_request_strips_json_schema_only_fields_from_tool_parameters():
+    from agent.gemini_native_adapter import build_gemini_request
+
+    request = build_gemini_request(
+        messages=[{"role": "user", "content": "Hello"}],
+        tools=[
+            {
+                "type": "function",
+                "function": {
+                    "name": "lookup_weather",
+                    "description": "Weather lookup",
+                    "parameters": {
+                        "$schema": "https://json-schema.org/draft/2020-12/schema",
+                        "type": "object",
+                        "additionalProperties": False,
+                        "properties": {
+                            "city": {
+                                "type": "string",
+                                "$schema": "ignored",
+                                "description": "City name",
+                            }
+                        },
+                        "required": ["city"],
+                    },
+                },
+            }
+        ],
+        tool_choice=None,
+    )
+
+    params = request["tools"][0]["functionDeclarations"][0]["parameters"]
+    assert "$schema" not in params
+    assert "additionalProperties" not in params
+    assert params["type"] == "object"
+    assert params["properties"]["city"] == {
+        "type": "string",
+        "description": "City name",
+    }
+
+
+def test_translate_native_response_surfaces_reasoning_and_tool_calls():
+    from agent.gemini_native_adapter import translate_gemini_response
+
+    payload = {
+        "candidates": [
+            {
+                "content": {
+                    "parts": [
+                        {"thought": True, "text": "thinking..."},
+                        {"functionCall": {"name": "search", "args": {"q": "hermes"}}},
+                    ]
+                },
+                "finishReason": "STOP",
+            }
+        ],
+        "usageMetadata": {
+            "promptTokenCount": 10,
+            "candidatesTokenCount": 5,
+            "totalTokenCount": 15,
+        },
+    }
+
+    response = translate_gemini_response(payload, model="gemini-2.5-flash")
+    choice = response.choices[0]
+    assert choice.finish_reason == "tool_calls"
+    assert choice.message.reasoning == "thinking..."
+    assert choice.message.tool_calls[0].function.name == "search"
+    assert json.loads(choice.message.tool_calls[0].function.arguments) == {"q": "hermes"}
+
+
+def test_native_client_uses_x_goog_api_key_and_native_models_endpoint(monkeypatch):
+    from agent.gemini_native_adapter import GeminiNativeClient
+
+    recorded = {}
+
+    class DummyHTTP:
+        def post(self, url, json=None, headers=None, timeout=None):
+            recorded["url"] = url
+            recorded["json"] = json
+            recorded["headers"] = headers
+            return DummyResponse(
+                payload={
+                    "candidates": [
+                        {
+                            "content": {"parts": [{"text": "hello"}]},
+                            "finishReason": "STOP",
+                        }
+                    ],
+                    "usageMetadata": {
+                        "promptTokenCount": 1,
+                        "candidatesTokenCount": 1,
+                        "totalTokenCount": 2,
+                    },
+                }
+            )
+
+        def close(self):
+            return None
+
+    monkeypatch.setattr("agent.gemini_native_adapter.httpx.Client", lambda *a, **k: DummyHTTP())
+
+    client = GeminiNativeClient(api_key="AIza-test", base_url="https://generativelanguage.googleapis.com/v1beta")
+    response = client.chat.completions.create(
+        model="gemini-2.5-flash",
+        messages=[{"role": "user", "content": "Hello"}],
+    )
+
+    assert recorded["url"] == "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent"
+    assert recorded["headers"]["x-goog-api-key"] == "AIza-test"
+    assert "Authorization" not in recorded["headers"]
+    assert response.choices[0].message.content == "hello"
+
+
+def test_native_http_error_keeps_status_and_retry_after():
+    from agent.gemini_native_adapter import gemini_http_error
+
+    response = DummyResponse(
+        status_code=429,
+        headers={"Retry-After": "17"},
+        payload={
+            "error": {
+                "code": 429,
+                "message": "quota exhausted",
+                "status": "RESOURCE_EXHAUSTED",
+                "details": [
+                    {
+                        "@type": "type.googleapis.com/google.rpc.ErrorInfo",
+                        "reason": "RESOURCE_EXHAUSTED",
+                        "metadata": {"service": "generativelanguage.googleapis.com"},
+                    }
+                ],
+            }
+        },
+    )
+
+    err = gemini_http_error(response)
+    assert getattr(err, "status_code", None) == 429
+    assert getattr(err, "retry_after", None) == 17.0
+    assert "quota exhausted" in str(err)
+
+
+def test_native_client_accepts_injected_http_client():
+    from agent.gemini_native_adapter import GeminiNativeClient
+
+    injected = SimpleNamespace(close=lambda: None)
+    client = GeminiNativeClient(api_key="AIza-test", http_client=injected)
+    assert client._http is injected
+
+
+@pytest.mark.asyncio
+async def test_async_native_client_streams_without_requiring_async_iterator_from_sync_client():
+    from agent.gemini_native_adapter import AsyncGeminiNativeClient
+
+    chunk = SimpleNamespace(choices=[SimpleNamespace(delta=SimpleNamespace(content="hi"), finish_reason=None)])
+    sync_stream = iter([chunk])
+
+    def _advance(iterator):
+        try:
+            return False, next(iterator)
+        except StopIteration:
+            return True, None
+
+    sync_client = SimpleNamespace(
+        api_key="AIza-test",
+        base_url="https://generativelanguage.googleapis.com/v1beta",
+        chat=SimpleNamespace(completions=SimpleNamespace(create=lambda **kwargs: sync_stream)),
+        _advance_stream_iterator=_advance,
+        close=lambda: None,
+    )
+
+    async_client = AsyncGeminiNativeClient(sync_client)
+    stream = await async_client.chat.completions.create(stream=True)
+    collected = []
+    async for item in stream:
+        collected.append(item)
+    assert collected == [chunk]
+
+
+def test_stream_event_translation_emits_tool_call_delta_with_stable_index():
+    from agent.gemini_native_adapter import translate_stream_event
+
+    tool_call_indices = {}
+    event = {
+        "candidates": [
+            {
+                "content": {
+                    "parts": [
+                        {"functionCall": {"name": "search", "args": {"q": "abc"}}}
+                    ]
+                },
+                "finishReason": "STOP",
+            }
+        ]
+    }
+
+    first = translate_stream_event(event, model="gemini-2.5-flash", tool_call_indices=tool_call_indices)
+    second = translate_stream_event(event, model="gemini-2.5-flash", tool_call_indices=tool_call_indices)
+
+    assert first[0].choices[0].delta.tool_calls[0].index == 0
+    assert second[0].choices[0].delta.tool_calls[0].index == 0
+    assert first[0].choices[0].delta.tool_calls[0].id == second[0].choices[0].delta.tool_calls[0].id
+    assert first[0].choices[0].delta.tool_calls[0].function.arguments == '{"q": "abc"}'
+    assert second[0].choices[0].delta.tool_calls[0].function.arguments == ""
+    assert first[-1].choices[0].finish_reason == "tool_calls"
+
+
+def test_stream_event_translation_keeps_identical_calls_in_distinct_parts():
+    from agent.gemini_native_adapter import translate_stream_event
+
+    event = {
+        "candidates": [
+            {
+                "content": {
+                    "parts": [
+                        {"functionCall": {"name": "search", "args": {"q": "abc"}}},
+                        {"functionCall": {"name": "search", "args": {"q": "abc"}}},
+                    ]
+                },
+                "finishReason": "STOP",
+            }
+        ]
+    }
+
+    chunks = translate_stream_event(event, model="gemini-2.5-flash", tool_call_indices={})
+    tool_chunks = [chunk for chunk in chunks if chunk.choices[0].delta.tool_calls]
+    assert tool_chunks[0].choices[0].delta.tool_calls[0].index == 0
+    assert tool_chunks[1].choices[0].delta.tool_calls[0].index == 1
+    assert tool_chunks[0].choices[0].delta.tool_calls[0].id != tool_chunks[1].choices[0].delta.tool_calls[0].id
diff --git a/tests/agent/test_image_gen_registry.py b/tests/agent/test_image_gen_registry.py
new file mode 100644
index 0000000000..7b492395ca
--- /dev/null
+++ b/tests/agent/test_image_gen_registry.py
@@ -0,0 +1,111 @@
+"""Tests for agent/image_gen_registry.py — provider registration & active lookup."""
+
+from __future__ import annotations
+
+import pytest
+
+from agent import image_gen_registry
+from agent.image_gen_provider import ImageGenProvider
+
+
+class _FakeProvider(ImageGenProvider):
+    def __init__(self, name: str, available: bool = True):
+        self._name = name
+        self._available = available
+
+    @property
+    def name(self) -> str:
+        return self._name
+
+    def is_available(self) -> bool:
+        return self._available
+
+    def generate(self, prompt, aspect_ratio="landscape", **kw):
+        return {"success": True, "image": f"{self._name}://{prompt}"}
+
+
+@pytest.fixture(autouse=True)
+def _reset_registry():
+    image_gen_registry._reset_for_tests()
+    yield
+    image_gen_registry._reset_for_tests()
+
+
+class TestRegisterProvider:
+    def test_register_and_lookup(self):
+        provider = _FakeProvider("fake")
+        image_gen_registry.register_provider(provider)
+        assert image_gen_registry.get_provider("fake") is provider
+
+    def test_rejects_non_provider(self):
+        with pytest.raises(TypeError):
+            image_gen_registry.register_provider("not a provider")  # type: ignore[arg-type]
+
+    def test_rejects_empty_name(self):
+        class Empty(ImageGenProvider):
+            @property
+            def name(self) -> str:
+                return ""
+
+            def generate(self, prompt, aspect_ratio="landscape", **kw):
+                return {}
+
+        with pytest.raises(ValueError):
+            image_gen_registry.register_provider(Empty())
+
+    def test_reregister_overwrites(self):
+        a = _FakeProvider("same")
+        b = _FakeProvider("same")
+        image_gen_registry.register_provider(a)
+        image_gen_registry.register_provider(b)
+        assert image_gen_registry.get_provider("same") is b
+
+    def test_list_is_sorted(self):
+        image_gen_registry.register_provider(_FakeProvider("zeta"))
+        image_gen_registry.register_provider(_FakeProvider("alpha"))
+        names = [p.name for p in image_gen_registry.list_providers()]
+        assert names == ["alpha", "zeta"]
+
+
+class TestGetActiveProvider:
+    def test_single_provider_autoresolves(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        image_gen_registry.register_provider(_FakeProvider("solo"))
+        active = image_gen_registry.get_active_provider()
+        assert active is not None and active.name == "solo"
+
+    def test_fal_preferred_on_multi_without_config(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        image_gen_registry.register_provider(_FakeProvider("fal"))
+        image_gen_registry.register_provider(_FakeProvider("openai"))
+        active = image_gen_registry.get_active_provider()
+        assert active is not None and active.name == "fal"
+
+    def test_explicit_config_wins(self, tmp_path, monkeypatch):
+        import yaml
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        (tmp_path / "config.yaml").write_text(
+            yaml.safe_dump({"image_gen": {"provider": "openai"}})
+        )
+        image_gen_registry.register_provider(_FakeProvider("fal"))
+        image_gen_registry.register_provider(_FakeProvider("openai"))
+        active = image_gen_registry.get_active_provider()
+        assert active is not None and active.name == "openai"
+
+    def test_missing_configured_provider_falls_back(self, tmp_path, monkeypatch):
+        import yaml
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        (tmp_path / "config.yaml").write_text(
+            yaml.safe_dump({"image_gen": {"provider": "replicate"}})
+        )
+        # Only FAL is registered — configured provider doesn't exist
+        image_gen_registry.register_provider(_FakeProvider("fal"))
+        active = image_gen_registry.get_active_provider()
+        # Falls back to FAL preference (legacy default) rather than None
+        assert active is not None and active.name == "fal"
+
+    def test_none_when_empty(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        assert image_gen_registry.get_active_provider() is None
diff --git a/tests/agent/test_insights.py b/tests/agent/test_insights.py
index 985d9f0096..2740daf096 100644
--- a/tests/agent/test_insights.py
+++ b/tests/agent/test_insights.py
@@ -51,6 +51,12 @@ def populated_db(db):
     db.append_message("s1", role="assistant", content="I found the bug. Let me fix it.",
                       tool_calls=[{"function": {"name": "patch"}}])
     db.append_message("s1", role="tool", content="patched successfully", tool_name="patch")
+    db.append_message(
+        "s1",
+        role="assistant",
+        content="Let me load the PR workflow skill.",
+        tool_calls=[{"function": {"name": "skill_view", "arguments": '{"name":"github-pr-workflow"}'}}],
+    )
     db.append_message("s1", role="user", content="Thanks!")
     db.append_message("s1", role="assistant", content="You're welcome!")
 
@@ -88,6 +94,12 @@ def populated_db(db):
     db.append_message("s3", role="assistant", content="And search files",
                       tool_calls=[{"function": {"name": "search_files"}}])
     db.append_message("s3", role="tool", content="found stuff", tool_name="search_files")
+    db.append_message(
+        "s3",
+        role="assistant",
+        content="Load the debugging skill.",
+        tool_calls=[{"function": {"name": "skill_view", "arguments": '{"name":"systematic-debugging"}'}}],
+    )
 
     # Session 4: Discord, same model as s1, ended, 1 day ago
     db.create_session(
@@ -100,6 +112,15 @@ def populated_db(db):
     db.update_token_counts("s4", input_tokens=10000, output_tokens=5000)
     db.append_message("s4", role="user", content="Quick question")
     db.append_message("s4", role="assistant", content="Sure, go ahead")
+    db.append_message(
+        "s4",
+        role="assistant",
+        content="Load and update GitHub skills.",
+        tool_calls=[
+            {"function": {"name": "skill_view", "arguments": '{"name":"github-pr-workflow"}'}},
+            {"function": {"name": "skill_manage", "arguments": '{"name":"github-code-review"}'}},
+        ],
+    )
 
     # Session 5: Old session, 45 days ago (should be excluded from 30-day window)
     db.create_session(
@@ -332,6 +353,35 @@ class TestInsightsPopulated:
         total_pct = sum(t["percentage"] for t in tools)
         assert total_pct == pytest.approx(100.0, abs=0.1)
 
+    def test_skill_breakdown(self, populated_db):
+        engine = InsightsEngine(populated_db)
+        report = engine.generate(days=30)
+        skills = report["skills"]
+
+        assert skills["summary"]["distinct_skills_used"] == 3
+        assert skills["summary"]["total_skill_loads"] == 3
+        assert skills["summary"]["total_skill_edits"] == 1
+        assert skills["summary"]["total_skill_actions"] == 4
+
+        top_skill = skills["top_skills"][0]
+        assert top_skill["skill"] == "github-pr-workflow"
+        assert top_skill["view_count"] == 2
+        assert top_skill["manage_count"] == 0
+        assert top_skill["total_count"] == 2
+        assert top_skill["last_used_at"] is not None
+
+    def test_skill_breakdown_respects_days_filter(self, populated_db):
+        engine = InsightsEngine(populated_db)
+        report = engine.generate(days=3)
+        skills = report["skills"]
+
+        assert skills["summary"]["distinct_skills_used"] == 2
+        assert skills["summary"]["total_skill_loads"] == 2
+        assert skills["summary"]["total_skill_edits"] == 1
+
+        skill_names = [s["skill"] for s in skills["top_skills"]]
+        assert "systematic-debugging" not in skill_names
+
     def test_activity_patterns(self, populated_db):
         engine = InsightsEngine(populated_db)
         report = engine.generate(days=30)
@@ -401,6 +451,7 @@ class TestTerminalFormatting:
         assert "Overview" in text
         assert "Models Used" in text
         assert "Top Tools" in text
+        assert "Top Skills" in text
         assert "Activity Patterns" in text
         assert "Notable Sessions" in text
 
@@ -465,12 +516,12 @@ class TestGatewayFormatting:
         assert "**" in text  # Markdown bold
 
     def test_gateway_format_hides_cost(self, populated_db):
+        """Gateway format omits dollar figures and internal cache details."""
         engine = InsightsEngine(populated_db)
         report = engine.generate(days=30)
         text = engine.format_gateway(report)
 
         assert "$" not in text
-        assert "Est. cost" not in text
         assert "cache" not in text.lower()
 
     def test_gateway_format_shows_models(self, populated_db):
diff --git a/tests/agent/test_kimi_coding_anthropic_thinking.py b/tests/agent/test_kimi_coding_anthropic_thinking.py
new file mode 100644
index 0000000000..706f7e0e16
--- /dev/null
+++ b/tests/agent/test_kimi_coding_anthropic_thinking.py
@@ -0,0 +1,115 @@
+"""Regression guard: don't send Anthropic ``thinking`` to Kimi's /coding endpoint.
+
+Kimi's ``api.kimi.com/coding`` endpoint speaks the Anthropic Messages protocol
+but has its own thinking semantics.  When ``thinking.enabled`` is present in
+the request, Kimi validates the message history and requires every prior
+assistant tool-call message to carry OpenAI-style ``reasoning_content``.
+
+The Anthropic path never populates that field, and
+``convert_messages_to_anthropic`` strips Anthropic thinking blocks on
+third-party endpoints — so after one turn with tool calls the next request
+fails with HTTP 400::
+
+    thinking is enabled but reasoning_content is missing in assistant
+    tool call message at index N
+
+Kimi on the chat_completions route handles ``thinking`` via ``extra_body`` in
+``ChatCompletionsTransport`` (#13503).  On the Anthropic route the right
+thing to do is drop the parameter entirely and let Kimi drive reasoning
+server-side.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+
+class TestKimiCodingSkipsAnthropicThinking:
+    """build_anthropic_kwargs must not inject ``thinking`` for Kimi /coding."""
+
+    @pytest.mark.parametrize(
+        "base_url",
+        [
+            "https://api.kimi.com/coding",
+            "https://api.kimi.com/coding/v1",
+            "https://api.kimi.com/coding/anthropic",
+            "https://api.kimi.com/coding/",
+        ],
+    )
+    def test_kimi_coding_endpoint_omits_thinking(self, base_url: str) -> None:
+        from agent.anthropic_adapter import build_anthropic_kwargs
+
+        kwargs = build_anthropic_kwargs(
+            model="kimi-k2.5",
+            messages=[{"role": "user", "content": "hello"}],
+            tools=None,
+            max_tokens=4096,
+            reasoning_config={"enabled": True, "effort": "medium"},
+            base_url=base_url,
+        )
+        assert "thinking" not in kwargs, (
+            "Anthropic thinking must not be sent to Kimi /coding — "
+            "endpoint requires reasoning_content on history we don't preserve."
+        )
+        assert "output_config" not in kwargs
+
+    def test_kimi_coding_with_explicit_disabled_also_omits(self) -> None:
+        from agent.anthropic_adapter import build_anthropic_kwargs
+
+        kwargs = build_anthropic_kwargs(
+            model="kimi-k2.5",
+            messages=[{"role": "user", "content": "hello"}],
+            tools=None,
+            max_tokens=4096,
+            reasoning_config={"enabled": False},
+            base_url="https://api.kimi.com/coding",
+        )
+        assert "thinking" not in kwargs
+
+    def test_non_kimi_third_party_still_gets_thinking(self) -> None:
+        """MiniMax and other third-party Anthropic endpoints must retain thinking."""
+        from agent.anthropic_adapter import build_anthropic_kwargs
+
+        kwargs = build_anthropic_kwargs(
+            model="MiniMax-M2.7",
+            messages=[{"role": "user", "content": "hello"}],
+            tools=None,
+            max_tokens=4096,
+            reasoning_config={"enabled": True, "effort": "medium"},
+            base_url="https://api.minimax.io/anthropic",
+        )
+        assert "thinking" in kwargs
+        assert kwargs["thinking"]["type"] == "enabled"
+
+    def test_native_anthropic_still_gets_thinking(self) -> None:
+        from agent.anthropic_adapter import build_anthropic_kwargs
+
+        kwargs = build_anthropic_kwargs(
+            model="claude-sonnet-4-20250514",
+            messages=[{"role": "user", "content": "hello"}],
+            tools=None,
+            max_tokens=4096,
+            reasoning_config={"enabled": True, "effort": "medium"},
+            base_url=None,
+        )
+        assert "thinking" in kwargs
+
+    def test_kimi_root_endpoint_unaffected(self) -> None:
+        """Only the /coding route is special-cased — plain api.kimi.com is not.
+
+        ``api.kimi.com`` without ``/coding`` uses the chat_completions transport
+        (see runtime_provider._detect_api_mode_for_url); build_anthropic_kwargs
+        should never see it, but if it somehow does we should not suppress
+        thinking there — that path has different semantics.
+        """
+        from agent.anthropic_adapter import build_anthropic_kwargs
+
+        kwargs = build_anthropic_kwargs(
+            model="kimi-k2.5",
+            messages=[{"role": "user", "content": "hello"}],
+            tools=None,
+            max_tokens=4096,
+            reasoning_config={"enabled": True, "effort": "medium"},
+            base_url="https://api.kimi.com/v1",
+        )
+        assert "thinking" in kwargs
diff --git a/tests/agent/test_memory_provider.py b/tests/agent/test_memory_provider.py
index 9301960b71..5cd0d8ab41 100644
--- a/tests/agent/test_memory_provider.py
+++ b/tests/agent/test_memory_provider.py
@@ -971,8 +971,6 @@ class TestHonchoCadenceTracking:
         class FakeManager:
             def prefetch_context(self, key, query=None):
                 pass
-            def prefetch_dialectic(self, key, query):
-                pass
 
         p._manager = FakeManager()
 
diff --git a/tests/agent/test_memory_user_id.py b/tests/agent/test_memory_user_id.py
index c1b82208d0..7b60b05dd2 100644
--- a/tests/agent/test_memory_user_id.py
+++ b/tests/agent/test_memory_user_id.py
@@ -79,6 +79,28 @@ class TestMemoryManagerUserIdThreading:
         assert p._init_kwargs.get("platform") == "telegram"
         assert p._init_session_id == "sess-123"
 
+    def test_chat_context_forwarded_to_provider(self):
+        mgr = MemoryManager()
+        p = RecordingProvider()
+        mgr.add_provider(p)
+
+        mgr.initialize_all(
+            session_id="sess-chat",
+            platform="discord",
+            user_id="discord_u_7",
+            user_name="fakeusername",
+            chat_id="1485316232612941897",
+            chat_name="fakeassistantname-forums",
+            chat_type="thread",
+            thread_id="1491249007475949698",
+        )
+
+        assert p._init_kwargs.get("user_name") == "fakeusername"
+        assert p._init_kwargs.get("chat_id") == "1485316232612941897"
+        assert p._init_kwargs.get("chat_name") == "fakeassistantname-forums"
+        assert p._init_kwargs.get("chat_type") == "thread"
+        assert p._init_kwargs.get("thread_id") == "1491249007475949698"
+
     def test_no_user_id_when_cli(self):
         """CLI sessions should not have user_id in kwargs."""
         mgr = MemoryManager()
@@ -208,34 +230,81 @@ class TestMem0UserIdScoping:
 
 
 class TestHonchoUserIdScoping:
-    """Verify Honcho plugin uses gateway user_id for peer_name when provided."""
+    """Verify Honcho plugin keeps runtime user scoping separate from config peer_name."""
 
-    def test_gateway_user_id_overrides_peer_name(self):
-        """When user_id is in kwargs and no explicit peer_name, user_id should be used."""
+    def test_gateway_user_id_is_passed_as_runtime_peer(self):
+        """Gateway user_id should scope Honcho sessions without mutating config peer_name."""
         from plugins.memory.honcho import HonchoMemoryProvider
 
         provider = HonchoMemoryProvider()
 
-        # Create a mock config with NO explicit peer_name
         mock_cfg = MagicMock()
         mock_cfg.enabled = True
         mock_cfg.api_key = "test-key"
         mock_cfg.base_url = None
-        mock_cfg.peer_name = ""  # No explicit peer_name — user_id should fill it
-        mock_cfg.recall_mode = "tools"  # Use tools mode to defer session init
+        mock_cfg.peer_name = "static-user"
+        mock_cfg.recall_mode = "context"
+        mock_cfg.context_tokens = None
+        mock_cfg.raw = {}
+        mock_cfg.dialectic_depth = 1
+        mock_cfg.dialectic_depth_levels = None
+        mock_cfg.init_on_session_start = False
+        mock_cfg.ai_peer = "hermes"
+        mock_cfg.resolve_session_name.return_value = "test-sess"
+        mock_cfg.session_strategy = "shared"
 
         with patch(
             "plugins.memory.honcho.client.HonchoClientConfig.from_global_config",
             return_value=mock_cfg,
-        ):
+        ), patch(
+            "plugins.memory.honcho.client.get_honcho_client",
+            return_value=MagicMock(),
+        ), patch(
+            "plugins.memory.honcho.session.HonchoSessionManager",
+        ) as mock_manager_cls:
+            mock_manager = MagicMock()
+            mock_manager.get_or_create.return_value = MagicMock(messages=[])
+            mock_manager_cls.return_value = mock_manager
             provider.initialize(
                 session_id="test-sess",
                 user_id="discord_user_789",
                 platform="discord",
             )
 
-        # The config's peer_name should have been overridden with the user_id
-        assert mock_cfg.peer_name == "discord_user_789"
+        assert mock_cfg.peer_name == "static-user"
+        assert mock_manager_cls.call_args.kwargs["runtime_user_peer_name"] == "discord_user_789"
+
+    def test_session_manager_prefers_runtime_user_id_over_config_peer_name(self):
+        """Session manager should isolate gateway users even when config peer_name is static."""
+        from plugins.memory.honcho.session import HonchoSessionManager
+
+        mock_cfg = MagicMock()
+        mock_cfg.peer_name = "static-user"
+        mock_cfg.ai_peer = "hermes"
+        mock_cfg.write_frequency = "sync"
+        mock_cfg.dialectic_reasoning_level = "low"
+        mock_cfg.dialectic_dynamic = True
+        mock_cfg.dialectic_max_chars = 600
+        mock_cfg.observation_mode = "directional"
+        mock_cfg.user_observe_me = True
+        mock_cfg.user_observe_others = True
+        mock_cfg.ai_observe_me = True
+        mock_cfg.ai_observe_others = True
+
+        manager = HonchoSessionManager(
+            honcho=MagicMock(),
+            config=mock_cfg,
+            runtime_user_peer_name="discord_user_789",
+        )
+
+        with patch.object(manager, "_get_or_create_peer", return_value=MagicMock()), patch.object(
+            manager,
+            "_get_or_create_honcho_session",
+            return_value=(MagicMock(), []),
+        ):
+            session = manager.get_or_create("discord:channel-1")
+
+        assert session.user_peer_id == "discord_user_789"
 
     def test_no_user_id_preserves_config_peer_name(self):
         """Without user_id, the config peer_name should be preserved."""
@@ -287,3 +356,4 @@ class TestAIAgentUserIdPropagation:
             agent = object.__new__(AIAgent)
             agent._user_id = None
             assert agent._user_id is None
+
diff --git a/tests/agent/test_minimax_provider.py b/tests/agent/test_minimax_provider.py
index 85c9c95206..4356b61c5a 100644
--- a/tests/agent/test_minimax_provider.py
+++ b/tests/agent/test_minimax_provider.py
@@ -84,38 +84,6 @@ class TestMinimaxAuxModel:
         assert "highspeed" not in _API_KEY_PROVIDER_AUX_MODELS["minimax-cn"]
 
 
-class TestMinimaxModelCatalog:
-    """Verify the model catalog matches official Anthropic-compat endpoint models.
-
-    Source: https://platform.minimax.io/docs/api-reference/text-anthropic-api
-    """
-
-    def test_catalog_includes_current_models(self):
-        from hermes_cli.models import _PROVIDER_MODELS
-        for provider in ("minimax", "minimax-cn"):
-            models = _PROVIDER_MODELS[provider]
-            assert "MiniMax-M2.7" in models
-            assert "MiniMax-M2.5" in models
-            assert "MiniMax-M2.1" in models
-            assert "MiniMax-M2" in models
-
-    def test_catalog_excludes_m1_family(self):
-        """M1 models are not available on the /anthropic endpoint."""
-        from hermes_cli.models import _PROVIDER_MODELS
-        for provider in ("minimax", "minimax-cn"):
-            models = _PROVIDER_MODELS[provider]
-            assert "MiniMax-M1" not in models
-
-    def test_catalog_excludes_highspeed(self):
-        """Highspeed variants are available but not shown in default catalog
-        (users can still specify them manually)."""
-        from hermes_cli.models import _PROVIDER_MODELS
-        for provider in ("minimax", "minimax-cn"):
-            models = _PROVIDER_MODELS[provider]
-            assert "MiniMax-M2.7-highspeed" not in models
-            assert "MiniMax-M2.5-highspeed" not in models
-
-
 class TestMinimaxBetaHeaders:
     """MiniMax Anthropic-compat endpoints reject fine-grained-tool-streaming beta.
 
diff --git a/tests/agent/test_model_metadata.py b/tests/agent/test_model_metadata.py
index 6a0eab1512..45e7160226 100644
--- a/tests/agent/test_model_metadata.py
+++ b/tests/agent/test_model_metadata.py
@@ -385,6 +385,7 @@ class TestStripProviderPrefix:
         assert _strip_provider_prefix("local:my-model") == "my-model"
         assert _strip_provider_prefix("openrouter:anthropic/claude-sonnet-4") == "anthropic/claude-sonnet-4"
         assert _strip_provider_prefix("anthropic:claude-sonnet-4") == "claude-sonnet-4"
+        assert _strip_provider_prefix("stepfun:step-3.5-flash") == "step-3.5-flash"
 
     def test_ollama_model_tag_preserved(self):
         """Ollama model:tag format must NOT be stripped."""
diff --git a/tests/agent/test_model_metadata_local_ctx.py b/tests/agent/test_model_metadata_local_ctx.py
index 6852a82cc9..5da1ed7037 100644
--- a/tests/agent/test_model_metadata_local_ctx.py
+++ b/tests/agent/test_model_metadata_local_ctx.py
@@ -424,6 +424,68 @@ class TestQueryLocalContextLengthLmStudio:
         )
 
 
+class TestDetectLocalServerTypeAuth:
+    def test_passes_bearer_token_to_probe_requests(self):
+        from agent.model_metadata import detect_local_server_type
+
+        resp = MagicMock()
+        resp.status_code = 200
+
+        client_mock = MagicMock()
+        client_mock.__enter__ = lambda s: client_mock
+        client_mock.__exit__ = MagicMock(return_value=False)
+        client_mock.get.return_value = resp
+
+        with patch("httpx.Client", return_value=client_mock) as mock_client:
+            result = detect_local_server_type("http://localhost:1234/v1", api_key="lm-token")
+
+        assert result == "lm-studio"
+        assert mock_client.call_args.kwargs["headers"] == {
+            "Authorization": "Bearer lm-token"
+        }
+
+
+class TestFetchEndpointModelMetadataLmStudio:
+    """fetch_endpoint_model_metadata should use LM Studio's native models endpoint."""
+
+    def _make_resp(self, body):
+        resp = MagicMock()
+        resp.raise_for_status.return_value = None
+        resp.json.return_value = body
+        return resp
+
+    def test_uses_native_models_endpoint_only(self):
+        from agent.model_metadata import fetch_endpoint_model_metadata
+
+        native_resp = self._make_resp(
+            {
+                "models": [
+                    {
+                        "key": "lmstudio-community/Qwen3.5-27B-GGUF/Qwen3.5-27B-Q8_0.gguf",
+                        "id": "lmstudio-community/Qwen3.5-27B-GGUF/Qwen3.5-27B-Q8_0.gguf",
+                        "max_context_length": 131072,
+                    }
+                ]
+            }
+        )
+
+        with patch("agent.model_metadata.detect_local_server_type", return_value="lm-studio"), \
+             patch("agent.model_metadata.requests.get", return_value=native_resp) as mock_get:
+            result = fetch_endpoint_model_metadata(
+                "http://localhost:1234/v1",
+                api_key="lm-token",
+                force_refresh=True,
+            )
+
+        assert mock_get.call_count == 1
+        assert mock_get.call_args[0][0] == "http://localhost:1234/api/v1/models"
+        assert mock_get.call_args.kwargs["headers"] == {
+            "Authorization": "Bearer lm-token"
+        }
+        assert result["lmstudio-community/Qwen3.5-27B-GGUF/Qwen3.5-27B-Q8_0.gguf"]["context_length"] == 131072
+        assert result["Qwen3.5-27B-GGUF/Qwen3.5-27B-Q8_0.gguf"]["context_length"] == 131072
+
+
 class TestQueryLocalContextLengthNetworkError:
     """_query_local_context_length handles network failures gracefully."""
 
diff --git a/tests/agent/test_models_dev.py b/tests/agent/test_models_dev.py
index be4b3b1390..c2a2140186 100644
--- a/tests/agent/test_models_dev.py
+++ b/tests/agent/test_models_dev.py
@@ -82,6 +82,7 @@ class TestProviderMapping:
     def test_known_providers_mapped(self):
         assert PROVIDER_TO_MODELS_DEV["anthropic"] == "anthropic"
         assert PROVIDER_TO_MODELS_DEV["copilot"] == "github-copilot"
+        assert PROVIDER_TO_MODELS_DEV["stepfun"] == "stepfun"
         assert PROVIDER_TO_MODELS_DEV["kilocode"] == "kilo"
         assert PROVIDER_TO_MODELS_DEV["ai-gateway"] == "vercel"
 
diff --git a/tests/agent/test_prompt_builder.py b/tests/agent/test_prompt_builder.py
index 2b231d2d1f..11712b9519 100644
--- a/tests/agent/test_prompt_builder.py
+++ b/tests/agent/test_prompt_builder.py
@@ -354,6 +354,24 @@ class TestBuildSkillsSystemPrompt:
         assert "web-search" in result
         assert "old-tool" not in result
 
+    def test_rebuilds_prompt_when_disabled_skills_change(self, monkeypatch, tmp_path):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        skill_dir = tmp_path / "skills" / "tools" / "cached-skill"
+        skill_dir.mkdir(parents=True)
+        (skill_dir / "SKILL.md").write_text(
+            "---\nname: cached-skill\ndescription: Cached skill\n---\n"
+        )
+
+        first = build_skills_system_prompt()
+        assert "cached-skill" in first
+
+        (tmp_path / "config.yaml").write_text(
+            "skills:\n  disabled: [cached-skill]\n"
+        )
+
+        second = build_skills_system_prompt()
+        assert "cached-skill" not in second
+
     def test_includes_setup_needed_skills(self, monkeypatch, tmp_path):
         monkeypatch.setenv("HERMES_HOME", str(tmp_path))
         monkeypatch.delenv("MISSING_API_KEY_XYZ", raising=False)
@@ -771,6 +789,24 @@ class TestPromptBuilderConstants:
         assert "cron" in PLATFORM_HINTS
         assert "cli" in PLATFORM_HINTS
 
+    def test_cli_hint_does_not_suggest_media_tags(self):
+        # Regression: MEDIA:/path tags are intercepted only by messaging
+        # gateway platforms. On the CLI they render as literal text and
+        # confuse users. The CLI hint must steer the agent away from them.
+        cli_hint = PLATFORM_HINTS["cli"]
+        assert "MEDIA:" in cli_hint, (
+            "CLI hint should mention MEDIA: in order to tell the agent "
+            "NOT to use it (negative guidance)."
+        )
+        # Must contain explicit "don't" language near the MEDIA reference.
+        assert any(
+            marker in cli_hint.lower()
+            for marker in ("do not emit media", "not intercepted", "do not", "don't")
+        ), "CLI hint should explicitly discourage MEDIA: tags."
+        # Messaging hints should still advertise MEDIA: positively (sanity
+        # check that this test is calibrated correctly).
+        assert "include MEDIA:" in PLATFORM_HINTS["telegram"]
+
 
 # =========================================================================
 # Environment hints
diff --git a/tests/agent/test_proxy_and_url_validation.py b/tests/agent/test_proxy_and_url_validation.py
index 4fd6138a4d..7d7268ed1f 100644
--- a/tests/agent/test_proxy_and_url_validation.py
+++ b/tests/agent/test_proxy_and_url_validation.py
@@ -6,6 +6,8 @@ when proxy env vars or custom endpoint URLs are malformed.
 """
 from __future__ import annotations
 
+import os
+
 import pytest
 
 from agent.auxiliary_client import _validate_base_url, _validate_proxy_env_urls
@@ -31,6 +33,12 @@ def test_proxy_env_accepts_empty(monkeypatch):
     _validate_proxy_env_urls()  # should not raise
 
 
+def test_proxy_env_normalizes_socks_alias(monkeypatch):
+    monkeypatch.setenv("ALL_PROXY", "socks://127.0.0.1:1080/")
+    _validate_proxy_env_urls()
+    assert os.environ["ALL_PROXY"] == "socks5://127.0.0.1:1080/"
+
+
 @pytest.mark.parametrize("key", [
     "HTTP_PROXY", "HTTPS_PROXY", "ALL_PROXY",
     "http_proxy", "https_proxy", "all_proxy",
diff --git a/tests/agent/test_redact.py b/tests/agent/test_redact.py
index b40e6ef7f6..a2c6b60b27 100644
--- a/tests/agent/test_redact.py
+++ b/tests/agent/test_redact.py
@@ -376,3 +376,138 @@ class TestDiscordMentions:
         result = redact_sensitive_text(text)
         assert result.startswith("User ")
         assert result.endswith(" said hello")
+
+
+class TestUrlQueryParamRedaction:
+    """URL query-string redaction (ported from nearai/ironclaw#2529).
+
+    Catches opaque tokens that don't match vendor prefix regexes by
+    matching on parameter NAME rather than value shape.
+    """
+
+    def test_oauth_callback_code(self):
+        text = "GET https://api.example.com/oauth/cb?code=abc123xyz789&state=csrf_ok"
+        result = redact_sensitive_text(text)
+        assert "abc123xyz789" not in result
+        assert "code=***" in result
+        assert "state=csrf_ok" in result  # state is not sensitive
+
+    def test_access_token_query(self):
+        text = "Fetching https://example.com/api?access_token=opaque_value_here_1234&format=json"
+        result = redact_sensitive_text(text)
+        assert "opaque_value_here_1234" not in result
+        assert "access_token=***" in result
+        assert "format=json" in result
+
+    def test_refresh_token_query(self):
+        text = "https://auth.example.com/token?refresh_token=somerefresh&grant_type=refresh"
+        result = redact_sensitive_text(text)
+        assert "somerefresh" not in result
+        assert "grant_type=refresh" in result
+
+    def test_api_key_query(self):
+        text = "https://api.example.com/v1/data?api_key=kABCDEF12345&limit=10"
+        result = redact_sensitive_text(text)
+        assert "kABCDEF12345" not in result
+        assert "limit=10" in result
+
+    def test_presigned_signature(self):
+        text = "https://s3.amazonaws.com/bucket/k?signature=LONG_PRESIGNED_SIG&id=public"
+        result = redact_sensitive_text(text)
+        assert "LONG_PRESIGNED_SIG" not in result
+        assert "id=public" in result
+
+    def test_case_insensitive_param_names(self):
+        """Lowercase/mixed-case sensitive param names are redacted."""
+        # NOTE: All-caps names like TOKEN= are swallowed by _ENV_ASSIGN_RE
+        # (which matches KEY=value patterns greedily) before URL regex runs.
+        # This test uses lowercase names to isolate URL-query redaction.
+        text = "https://example.com?api_key=abcdef&secret=ghijkl"
+        result = redact_sensitive_text(text)
+        assert "abcdef" not in result
+        assert "ghijkl" not in result
+        assert "api_key=***" in result
+        assert "secret=***" in result
+
+    def test_substring_match_does_not_trigger(self):
+        """`token_count` and `session_id` must NOT match `token` / `session`."""
+        text = "https://example.com/cb?token_count=42&session_id=xyz&foo=bar"
+        result = redact_sensitive_text(text)
+        assert "token_count=42" in result
+        assert "session_id=xyz" in result
+
+    def test_url_without_query_unchanged(self):
+        text = "https://example.com/path/to/resource"
+        assert redact_sensitive_text(text) == text
+
+    def test_url_with_fragment(self):
+        text = "https://example.com/page?token=xyz#section"
+        result = redact_sensitive_text(text)
+        assert "token=xyz" not in result
+        assert "#section" in result
+
+    def test_websocket_url_query(self):
+        text = "wss://api.example.com/ws?token=opaqueWsToken123"
+        result = redact_sensitive_text(text)
+        assert "opaqueWsToken123" not in result
+
+
+class TestUrlUserinfoRedaction:
+    """URL userinfo (`scheme://user:pass@host`) for non-DB schemes."""
+
+    def test_https_userinfo(self):
+        text = "URL: https://user:supersecretpw@host.example.com/path"
+        result = redact_sensitive_text(text)
+        assert "supersecretpw" not in result
+        assert "https://user:***@host.example.com" in result
+
+    def test_http_userinfo(self):
+        text = "http://admin:plaintextpass@internal.example.com/api"
+        result = redact_sensitive_text(text)
+        assert "plaintextpass" not in result
+
+    def test_ftp_userinfo(self):
+        text = "ftp://user:ftppass@ftp.example.com/file.txt"
+        result = redact_sensitive_text(text)
+        assert "ftppass" not in result
+
+    def test_url_without_userinfo_unchanged(self):
+        text = "https://example.com/path"
+        assert redact_sensitive_text(text) == text
+
+    def test_db_connstr_still_handled(self):
+        """DB schemes are handled by _DB_CONNSTR_RE, not _URL_USERINFO_RE."""
+        text = "postgres://admin:dbpass@db.internal:5432/app"
+        result = redact_sensitive_text(text)
+        assert "dbpass" not in result
+
+
+class TestFormBodyRedaction:
+    """Form-urlencoded body redaction (k=v&k=v with no other text)."""
+
+    def test_pure_form_body(self):
+        text = "password=mysecret&username=bob&token=opaqueValue"
+        result = redact_sensitive_text(text)
+        assert "mysecret" not in result
+        assert "opaqueValue" not in result
+        assert "username=bob" in result
+
+    def test_oauth_token_request(self):
+        text = "grant_type=password&client_id=app&client_secret=topsecret&username=alice&password=alicepw"
+        result = redact_sensitive_text(text)
+        assert "topsecret" not in result
+        assert "alicepw" not in result
+        assert "client_id=app" in result
+
+    def test_non_form_text_unchanged(self):
+        """Sentences with `&` should NOT trigger form redaction."""
+        text = "I have password=foo and other things"  # contains spaces
+        result = redact_sensitive_text(text)
+        # The space breaks the form regex; passthrough expected.
+        assert "I have" in result
+
+    def test_multiline_text_not_form(self):
+        """Multi-line text is never treated as form body."""
+        text = "first=1\nsecond=2"
+        # Should pass through (still subject to other redactors)
+        assert "first=1" in redact_sensitive_text(text)
diff --git a/tests/agent/test_shell_hooks.py b/tests/agent/test_shell_hooks.py
new file mode 100644
index 0000000000..088c23eb46
--- /dev/null
+++ b/tests/agent/test_shell_hooks.py
@@ -0,0 +1,716 @@
+"""Tests for the shell-hooks subprocess bridge (agent.shell_hooks).
+
+These tests focus on the pure translation layer — JSON serialisation,
+JSON parsing, matcher behaviour, block-schema correctness, and the
+subprocess runner's graceful error handling.  Consent prompts are
+covered in ``test_shell_hooks_consent.py``.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import stat
+from pathlib import Path
+from typing import Any, Dict
+
+import pytest
+
+from agent import shell_hooks
+
+
+# ── helpers ───────────────────────────────────────────────────────────────
+
+
+def _write_script(tmp_path: Path, name: str, body: str) -> Path:
+    path = tmp_path / name
+    path.write_text(body)
+    path.chmod(0o755)
+    return path
+
+
+def _allowlist_pair(monkeypatch, tmp_path, event: str, command: str) -> None:
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_home"))
+    shell_hooks._record_approval(event, command)
+
+
+@pytest.fixture(autouse=True)
+def _reset_registration_state():
+    shell_hooks.reset_for_tests()
+    yield
+    shell_hooks.reset_for_tests()
+
+
+# ── _parse_response ───────────────────────────────────────────────────────
+
+
+class TestParseResponse:
+    def test_block_claude_code_style(self):
+        r = shell_hooks._parse_response(
+            "pre_tool_call",
+            '{"decision": "block", "reason": "nope"}',
+        )
+        assert r == {"action": "block", "message": "nope"}
+
+    def test_block_canonical_style(self):
+        r = shell_hooks._parse_response(
+            "pre_tool_call",
+            '{"action": "block", "message": "nope"}',
+        )
+        assert r == {"action": "block", "message": "nope"}
+
+    def test_block_canonical_wins_over_claude_style(self):
+        r = shell_hooks._parse_response(
+            "pre_tool_call",
+            '{"action": "block", "message": "canonical", '
+            '"decision": "block", "reason": "claude"}',
+        )
+        assert r == {"action": "block", "message": "canonical"}
+
+    def test_empty_stdout_returns_none(self):
+        assert shell_hooks._parse_response("pre_tool_call", "") is None
+        assert shell_hooks._parse_response("pre_tool_call", "   ") is None
+
+    def test_invalid_json_returns_none(self):
+        assert shell_hooks._parse_response("pre_tool_call", "not json") is None
+
+    def test_non_dict_json_returns_none(self):
+        assert shell_hooks._parse_response("pre_tool_call", "[1, 2]") is None
+
+    def test_non_block_pre_tool_call_returns_none(self):
+        r = shell_hooks._parse_response("pre_tool_call", '{"decision": "allow"}')
+        assert r is None
+
+    def test_pre_llm_call_context_passthrough(self):
+        r = shell_hooks._parse_response(
+            "pre_llm_call", '{"context": "today is Friday"}',
+        )
+        assert r == {"context": "today is Friday"}
+
+    def test_subagent_stop_context_passthrough(self):
+        r = shell_hooks._parse_response(
+            "subagent_stop", '{"context": "child role=leaf"}',
+        )
+        assert r == {"context": "child role=leaf"}
+
+    def test_pre_llm_call_block_ignored(self):
+        """Only pre_tool_call honors block directives."""
+        r = shell_hooks._parse_response(
+            "pre_llm_call", '{"decision": "block", "reason": "no"}',
+        )
+        assert r is None
+
+
+# ── _serialize_payload ────────────────────────────────────────────────────
+
+
+class TestSerializePayload:
+    def test_basic_pre_tool_call_schema(self):
+        raw = shell_hooks._serialize_payload(
+            "pre_tool_call",
+            {
+                "tool_name": "terminal",
+                "args": {"command": "ls"},
+                "session_id": "sess-1",
+                "task_id": "t-1",
+                "tool_call_id": "c-1",
+            },
+        )
+        payload = json.loads(raw)
+        assert payload["hook_event_name"] == "pre_tool_call"
+        assert payload["tool_name"] == "terminal"
+        assert payload["tool_input"] == {"command": "ls"}
+        assert payload["session_id"] == "sess-1"
+        assert "cwd" in payload
+        # task_id / tool_call_id end up under extra
+        assert payload["extra"]["task_id"] == "t-1"
+        assert payload["extra"]["tool_call_id"] == "c-1"
+
+    def test_args_not_dict_becomes_null(self):
+        raw = shell_hooks._serialize_payload(
+            "pre_tool_call", {"args": ["not", "a", "dict"]},
+        )
+        payload = json.loads(raw)
+        assert payload["tool_input"] is None
+
+    def test_parent_session_id_used_when_no_session_id(self):
+        raw = shell_hooks._serialize_payload(
+            "subagent_stop", {"parent_session_id": "p-1"},
+        )
+        payload = json.loads(raw)
+        assert payload["session_id"] == "p-1"
+
+    def test_unserialisable_extras_stringified(self):
+        class Weird:
+            def __repr__(self) -> str:
+                return "<weird>"
+
+        raw = shell_hooks._serialize_payload(
+            "on_session_start", {"obj": Weird()},
+        )
+        payload = json.loads(raw)
+        assert payload["extra"]["obj"] == "<weird>"
+
+
+# ── Matcher behaviour ─────────────────────────────────────────────────────
+
+
+class TestMatcher:
+    def test_no_matcher_fires_for_any_tool(self):
+        spec = shell_hooks.ShellHookSpec(
+            event="pre_tool_call", command="echo", matcher=None,
+        )
+        assert spec.matches_tool("terminal")
+        assert spec.matches_tool("write_file")
+
+    def test_single_name_matcher(self):
+        spec = shell_hooks.ShellHookSpec(
+            event="pre_tool_call", command="echo", matcher="terminal",
+        )
+        assert spec.matches_tool("terminal")
+        assert not spec.matches_tool("web_search")
+
+    def test_alternation_matcher(self):
+        spec = shell_hooks.ShellHookSpec(
+            event="pre_tool_call", command="echo", matcher="terminal|file",
+        )
+        assert spec.matches_tool("terminal")
+        assert spec.matches_tool("file")
+        assert not spec.matches_tool("web")
+
+    def test_invalid_regex_falls_back_to_literal(self):
+        spec = shell_hooks.ShellHookSpec(
+            event="pre_tool_call", command="echo", matcher="foo[bar",
+        )
+        assert spec.matches_tool("foo[bar")
+        assert not spec.matches_tool("foo")
+
+    def test_matcher_ignored_when_no_tool_name(self):
+        spec = shell_hooks.ShellHookSpec(
+            event="pre_tool_call", command="echo", matcher="terminal",
+        )
+        assert not spec.matches_tool(None)
+
+    def test_matcher_leading_whitespace_stripped(self):
+        """YAML quirks can introduce leading/trailing whitespace — must
+        not silently break the matcher."""
+        spec = shell_hooks.ShellHookSpec(
+            event="pre_tool_call", command="echo", matcher=" terminal ",
+        )
+        assert spec.matcher == "terminal"
+        assert spec.matches_tool("terminal")
+
+    def test_matcher_trailing_newline_stripped(self):
+        spec = shell_hooks.ShellHookSpec(
+            event="pre_tool_call", command="echo", matcher="terminal\n",
+        )
+        assert spec.matches_tool("terminal")
+
+    def test_whitespace_only_matcher_becomes_none(self):
+        """A matcher that's pure whitespace is treated as 'no matcher'."""
+        spec = shell_hooks.ShellHookSpec(
+            event="pre_tool_call", command="echo", matcher="   ",
+        )
+        assert spec.matcher is None
+        assert spec.matches_tool("anything")
+
+
+# ── End-to-end subprocess behaviour ───────────────────────────────────────
+
+
+class TestCallbackSubprocess:
+    def test_timeout_returns_none(self, tmp_path):
+        # Script that sleeps forever; we set a 1s timeout.
+        script = _write_script(
+            tmp_path, "slow.sh",
+            "#!/usr/bin/env bash\nsleep 60\n",
+        )
+        spec = shell_hooks.ShellHookSpec(
+            event="post_tool_call", command=str(script), timeout=1,
+        )
+        cb = shell_hooks._make_callback(spec)
+        assert cb(tool_name="terminal") is None
+
+    def test_malformed_json_stdout_returns_none(self, tmp_path):
+        script = _write_script(
+            tmp_path, "bad_json.sh",
+            "#!/usr/bin/env bash\necho 'not json at all'\n",
+        )
+        spec = shell_hooks.ShellHookSpec(
+            event="pre_tool_call", command=str(script),
+        )
+        cb = shell_hooks._make_callback(spec)
+        # Matcher is None so the callback fires for any tool.
+        assert cb(tool_name="terminal") is None
+
+    def test_non_zero_exit_with_block_stdout_still_blocks(self, tmp_path):
+        """A script that signals failure via exit code AND prints a block
+        directive must still block — scripts should be free to mix exit
+        codes with parseable output."""
+        script = _write_script(
+            tmp_path, "exit1_block.sh",
+            "#!/usr/bin/env bash\n"
+            'printf \'{"decision": "block", "reason": "via exit 1"}\\n\'\n'
+            "exit 1\n",
+        )
+        spec = shell_hooks.ShellHookSpec(
+            event="pre_tool_call", command=str(script),
+        )
+        cb = shell_hooks._make_callback(spec)
+        assert cb(tool_name="terminal") == {"action": "block", "message": "via exit 1"}
+
+    def test_block_translation_end_to_end(self, tmp_path):
+        """v1 schema-bug regression gate.
+
+        Shell hook returns the Claude-Code-style payload and the bridge
+        must translate it to the canonical Hermes block shape so that
+        get_pre_tool_call_block_message() surfaces the block.
+        """
+        script = _write_script(
+            tmp_path, "blocker.sh",
+            "#!/usr/bin/env bash\n"
+            'printf \'{"decision": "block", "reason": "no terminal"}\\n\'\n',
+        )
+        spec = shell_hooks.ShellHookSpec(
+            event="pre_tool_call",
+            command=str(script),
+            matcher="terminal",
+        )
+        cb = shell_hooks._make_callback(spec)
+        result = cb(tool_name="terminal", args={"command": "rm -rf /"})
+        assert result == {"action": "block", "message": "no terminal"}
+
+    def test_block_aggregation_through_plugin_manager(self, tmp_path, monkeypatch):
+        """Registering via register_from_config makes
+        get_pre_tool_call_block_message surface the block — the real
+        end-to-end control flow used by run_agent._invoke_tool."""
+        from hermes_cli import plugins
+
+        script = _write_script(
+            tmp_path, "block.sh",
+            "#!/usr/bin/env bash\n"
+            'printf \'{"decision": "block", "reason": "blocked-by-shell"}\\n\'\n',
+        )
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path / "home"))
+        monkeypatch.setenv("HERMES_ACCEPT_HOOKS", "1")
+
+        # Fresh manager
+        plugins._plugin_manager = plugins.PluginManager()
+
+        cfg = {
+            "hooks": {
+                "pre_tool_call": [
+                    {"matcher": "terminal", "command": str(script)},
+                ],
+            },
+        }
+        registered = shell_hooks.register_from_config(cfg, accept_hooks=True)
+        assert len(registered) == 1
+
+        msg = plugins.get_pre_tool_call_block_message(
+            tool_name="terminal",
+            args={"command": "rm"},
+        )
+        assert msg == "blocked-by-shell"
+
+    def test_matcher_regex_filters_callback(self, tmp_path, monkeypatch):
+        """A matcher set to 'terminal' must not fire for 'web_search'."""
+        calls = tmp_path / "calls.log"
+        script = _write_script(
+            tmp_path, "log.sh",
+            f"#!/usr/bin/env bash\n"
+            f"echo \"$(cat -)\" >> {calls}\n"
+            f"printf '{{}}\\n'\n",
+        )
+        spec = shell_hooks.ShellHookSpec(
+            event="pre_tool_call",
+            command=str(script),
+            matcher="terminal",
+        )
+        cb = shell_hooks._make_callback(spec)
+        cb(tool_name="terminal", args={"command": "ls"})
+        cb(tool_name="web_search", args={"q": "x"})
+        cb(tool_name="file_read", args={"path": "x"})
+        assert calls.exists()
+        # Only the terminal call wrote to the log
+        assert calls.read_text().count("pre_tool_call") == 1
+
+    def test_payload_schema_delivered(self, tmp_path):
+        capture = tmp_path / "payload.json"
+        script = _write_script(
+            tmp_path, "capture.sh",
+            f"#!/usr/bin/env bash\ncat - > {capture}\nprintf '{{}}\\n'\n",
+        )
+        spec = shell_hooks.ShellHookSpec(
+            event="pre_tool_call", command=str(script),
+        )
+        cb = shell_hooks._make_callback(spec)
+        cb(
+            tool_name="terminal",
+            args={"command": "echo hi"},
+            session_id="sess-77",
+            task_id="task-77",
+        )
+        payload = json.loads(capture.read_text())
+        assert payload["hook_event_name"] == "pre_tool_call"
+        assert payload["tool_name"] == "terminal"
+        assert payload["tool_input"] == {"command": "echo hi"}
+        assert payload["session_id"] == "sess-77"
+        assert "cwd" in payload
+        assert payload["extra"]["task_id"] == "task-77"
+
+    def test_pre_llm_call_context_flows_through(self, tmp_path):
+        script = _write_script(
+            tmp_path, "ctx.sh",
+            "#!/usr/bin/env bash\n"
+            'printf \'{"context": "env-note"}\\n\'\n',
+        )
+        spec = shell_hooks.ShellHookSpec(
+            event="pre_llm_call", command=str(script),
+        )
+        cb = shell_hooks._make_callback(spec)
+        result = cb(
+            session_id="s1", user_message="hello",
+            conversation_history=[], is_first_turn=True,
+            model="gpt-4", platform="cli",
+        )
+        assert result == {"context": "env-note"}
+
+    def test_shlex_handles_paths_with_spaces(self, tmp_path):
+        dir_with_space = tmp_path / "path with space"
+        dir_with_space.mkdir()
+        script = _write_script(
+            dir_with_space, "ok.sh",
+            "#!/usr/bin/env bash\nprintf '{}\\n'\n",
+        )
+        # Quote the path so shlex keeps it as a single token.
+        spec = shell_hooks.ShellHookSpec(
+            event="post_tool_call",
+            command=f'"{script}"',
+        )
+        cb = shell_hooks._make_callback(spec)
+        # No crash = shlex parsed it correctly.
+        assert cb(tool_name="terminal") is None  # empty object parses to None
+
+    def test_missing_binary_logged_not_raised(self, tmp_path):
+        spec = shell_hooks.ShellHookSpec(
+            event="on_session_start",
+            command=str(tmp_path / "does-not-exist"),
+        )
+        cb = shell_hooks._make_callback(spec)
+        # Must not raise — agent loop should continue.
+        assert cb(session_id="s") is None
+
+    def test_non_executable_binary_logged_not_raised(self, tmp_path):
+        path = tmp_path / "no-exec"
+        path.write_text("#!/usr/bin/env bash\necho hi\n")
+        # Intentionally do NOT chmod +x.
+        spec = shell_hooks.ShellHookSpec(
+            event="on_session_start", command=str(path),
+        )
+        cb = shell_hooks._make_callback(spec)
+        assert cb(session_id="s") is None
+
+
+# ── config parsing ────────────────────────────────────────────────────────
+
+
+class TestParseHooksBlock:
+    def test_valid_entry(self):
+        specs = shell_hooks._parse_hooks_block({
+            "pre_tool_call": [
+                {"matcher": "terminal", "command": "/tmp/hook.sh", "timeout": 30},
+            ],
+        })
+        assert len(specs) == 1
+        assert specs[0].event == "pre_tool_call"
+        assert specs[0].matcher == "terminal"
+        assert specs[0].command == "/tmp/hook.sh"
+        assert specs[0].timeout == 30
+
+    def test_unknown_event_skipped(self, caplog):
+        specs = shell_hooks._parse_hooks_block({
+            "pre_tools_call": [  # typo
+                {"command": "/tmp/hook.sh"},
+            ],
+        })
+        assert specs == []
+
+    def test_missing_command_skipped(self):
+        specs = shell_hooks._parse_hooks_block({
+            "pre_tool_call": [{"matcher": "terminal"}],
+        })
+        assert specs == []
+
+    def test_timeout_clamped_to_max(self):
+        specs = shell_hooks._parse_hooks_block({
+            "post_tool_call": [
+                {"command": "/tmp/slow.sh", "timeout": 9999},
+            ],
+        })
+        assert specs[0].timeout == shell_hooks.MAX_TIMEOUT_SECONDS
+
+    def test_non_int_timeout_defaulted(self):
+        specs = shell_hooks._parse_hooks_block({
+            "post_tool_call": [
+                {"command": "/tmp/x.sh", "timeout": "thirty"},
+            ],
+        })
+        assert specs[0].timeout == shell_hooks.DEFAULT_TIMEOUT_SECONDS
+
+    def test_non_list_event_skipped(self):
+        specs = shell_hooks._parse_hooks_block({
+            "pre_tool_call": "not a list",
+        })
+        assert specs == []
+
+    def test_none_hooks_block(self):
+        assert shell_hooks._parse_hooks_block(None) == []
+        assert shell_hooks._parse_hooks_block("string") == []
+        assert shell_hooks._parse_hooks_block([]) == []
+
+    def test_non_tool_event_matcher_warns_and_drops(self, caplog):
+        """matcher: is only honored for pre/post_tool_call; must warn
+        and drop on other events so the spec reflects runtime."""
+        import logging
+        cfg = {"pre_llm_call": [{"matcher": "terminal", "command": "/bin/echo"}]}
+        with caplog.at_level(logging.WARNING, logger=shell_hooks.logger.name):
+            specs = shell_hooks._parse_hooks_block(cfg)
+        assert len(specs) == 1 and specs[0].matcher is None
+        assert any(
+            "only honored for pre_tool_call" in r.getMessage()
+            and "pre_llm_call" in r.getMessage()
+            for r in caplog.records
+        )
+
+
+# ── Idempotent registration ───────────────────────────────────────────────
+
+
+class TestIdempotentRegistration:
+    def test_double_call_registers_once(self, tmp_path, monkeypatch):
+        from hermes_cli import plugins
+
+        script = _write_script(tmp_path, "h.sh",
+                               "#!/usr/bin/env bash\nprintf '{}\\n'\n")
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path / "home"))
+        monkeypatch.setenv("HERMES_ACCEPT_HOOKS", "1")
+
+        plugins._plugin_manager = plugins.PluginManager()
+
+        cfg = {"hooks": {"on_session_start": [{"command": str(script)}]}}
+
+        first = shell_hooks.register_from_config(cfg, accept_hooks=True)
+        second = shell_hooks.register_from_config(cfg, accept_hooks=True)
+        assert len(first) == 1
+        assert second == []
+        # Only one callback on the manager
+        mgr = plugins.get_plugin_manager()
+        assert len(mgr._hooks.get("on_session_start", [])) == 1
+
+    def test_same_command_different_matcher_registers_both(
+        self, tmp_path, monkeypatch,
+    ):
+        """Same script used for different matchers under one event must
+        register both callbacks — dedupe keys on (event, matcher, command)."""
+        from hermes_cli import plugins
+
+        script = _write_script(tmp_path, "h.sh",
+                               "#!/usr/bin/env bash\nprintf '{}\\n'\n")
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path / "home"))
+        monkeypatch.setenv("HERMES_ACCEPT_HOOKS", "1")
+
+        plugins._plugin_manager = plugins.PluginManager()
+
+        cfg = {
+            "hooks": {
+                "pre_tool_call": [
+                    {"matcher": "terminal", "command": str(script)},
+                    {"matcher": "web_search", "command": str(script)},
+                ],
+            },
+        }
+
+        registered = shell_hooks.register_from_config(cfg, accept_hooks=True)
+        assert len(registered) == 2
+        mgr = plugins.get_plugin_manager()
+        assert len(mgr._hooks.get("pre_tool_call", [])) == 2
+
+
+# ── Allowlist concurrency ─────────────────────────────────────────────────
+
+
+class TestAllowlistConcurrency:
+    """Regression tests for the Codex#1 finding: simultaneous
+    _record_approval() calls used to collide on a fixed tmp path and
+    silently lose entries under read-modify-write races."""
+
+    def test_parallel_record_approval_does_not_lose_entries(
+        self, tmp_path, monkeypatch,
+    ):
+        import threading
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path / "home"))
+
+        N = 32
+        barrier = threading.Barrier(N)
+        errors: list = []
+
+        def worker(i: int) -> None:
+            try:
+                barrier.wait(timeout=5)
+                shell_hooks._record_approval(
+                    "on_session_start", f"/bin/hook-{i}.sh",
+                )
+            except Exception as exc:  # pragma: no cover
+                errors.append(exc)
+
+        threads = [threading.Thread(target=worker, args=(i,)) for i in range(N)]
+        for t in threads:
+            t.start()
+        for t in threads:
+            t.join()
+
+        assert not errors, f"worker errors: {errors}"
+
+        data = shell_hooks.load_allowlist()
+        commands = {e["command"] for e in data["approvals"]}
+        assert commands == {f"/bin/hook-{i}.sh" for i in range(N)}, (
+            f"expected all {N} entries, got {len(commands)}"
+        )
+
+    def test_non_posix_fallback_does_not_self_deadlock(
+        self, tmp_path, monkeypatch,
+    ):
+        """Regression: on platforms without fcntl, the fallback lock must
+        be separate from _registered_lock.  register_from_config holds
+        _registered_lock while calling _record_approval (via the consent
+        prompt path), so a shared non-reentrant lock would self-deadlock."""
+        import threading
+
+        monkeypatch.setattr(shell_hooks, "fcntl", None)
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path / "home"))
+
+        completed = threading.Event()
+        errors: list = []
+
+        def target() -> None:
+            try:
+                with shell_hooks._registered_lock:
+                    shell_hooks._record_approval(
+                        "on_session_start", "/bin/x.sh",
+                    )
+                completed.set()
+            except Exception as exc:  # pragma: no cover
+                errors.append(exc)
+                completed.set()
+
+        t = threading.Thread(target=target, daemon=True)
+        t.start()
+        if not completed.wait(timeout=3.0):
+            pytest.fail(
+                "non-POSIX fallback self-deadlocked — "
+                "_locked_update_approvals must not reuse _registered_lock",
+            )
+        t.join(timeout=1.0)
+        assert not errors, f"errors: {errors}"
+        assert shell_hooks._is_allowlisted(
+            "on_session_start", "/bin/x.sh",
+        )
+
+    def test_save_allowlist_failure_logs_actionable_warning(
+        self, tmp_path, monkeypatch, caplog,
+    ):
+        """Persistence failures must log the path, errno, and
+        re-prompt consequence so "hermes keeps asking" is debuggable."""
+        import logging
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path / "home"))
+        monkeypatch.setattr(
+            shell_hooks.tempfile, "mkstemp",
+            lambda *a, **kw: (_ for _ in ()).throw(OSError(28, "No space")),
+        )
+        with caplog.at_level(logging.WARNING, logger=shell_hooks.logger.name):
+            shell_hooks.save_allowlist({"approvals": []})
+        msg = next(
+            (r.getMessage() for r in caplog.records
+             if "Failed to persist" in r.getMessage()), "",
+        )
+        assert "shell-hooks-allowlist.json" in msg
+        assert "No space" in msg
+        assert "re-prompt" in msg
+
+    def test_script_is_executable_handles_interpreter_prefix(self, tmp_path):
+        """For ``python3 hook.py`` and similar the interpreter reads
+        the script, so X_OK on the script itself is not required —
+        only R_OK.  Bare invocations still require X_OK."""
+        script = tmp_path / "hook.py"
+        script.write_text("print()\n")  # readable, NOT executable
+
+        # Interpreter prefix: R_OK is enough.
+        assert shell_hooks.script_is_executable(f"python3 {script}")
+        assert shell_hooks.script_is_executable(f"/usr/bin/env python3 {script}")
+
+        # Bare invocation on the same non-X_OK file: not runnable.
+        assert not shell_hooks.script_is_executable(str(script))
+
+        # Flip +x; bare invocation is now runnable too.
+        script.chmod(0o755)
+        assert shell_hooks.script_is_executable(str(script))
+
+    def test_command_script_path_resolution(self):
+        """Regression: ``_command_script_path`` used to return the first
+        shlex token, which picked the interpreter (``python3``, ``bash``,
+        ``/usr/bin/env``) instead of the actual script for any
+        interpreter-prefixed command.  That broke
+        ``hermes hooks doctor``'s executability check and silently
+        disabled mtime drift detection for such hooks."""
+        cases = [
+            # bare path
+            ("/path/hook.sh", "/path/hook.sh"),
+            ("/bin/echo hi", "/bin/echo"),
+            ("~/hook.sh", "~/hook.sh"),
+            ("hook.sh", "hook.sh"),
+            # interpreter prefix
+            ("python3 /path/hook.py", "/path/hook.py"),
+            ("bash /path/hook.sh", "/path/hook.sh"),
+            ("bash ~/hook.sh", "~/hook.sh"),
+            ("python3 -u /path/hook.py", "/path/hook.py"),
+            ("nice -n 10 /path/hook.sh", "/path/hook.sh"),
+            # /usr/bin/env shebang form — must find the *script*, not env
+            ("/usr/bin/env python3 /path/hook.py", "/path/hook.py"),
+            ("/usr/bin/env bash /path/hook.sh", "/path/hook.sh"),
+            # no path-like tokens → fallback to first token
+            ("my-binary --verbose", "my-binary"),
+            ("python3 -c 'print(1)'", "python3"),
+            # unparseable (unbalanced quotes) → return command as-is
+            ("python3 'unterminated", "python3 'unterminated"),
+            # empty
+            ("", ""),
+        ]
+        for command, expected in cases:
+            got = shell_hooks._command_script_path(command)
+            assert got == expected, f"{command!r} -> {got!r}, expected {expected!r}"
+
+    def test_save_allowlist_uses_unique_tmp_paths(self, tmp_path, monkeypatch):
+        """Two save_allowlist calls in flight must use distinct tmp files
+        so the loser's os.replace does not ENOENT on the winner's sweep."""
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path / "home"))
+        p = shell_hooks.allowlist_path()
+        p.parent.mkdir(parents=True, exist_ok=True)
+
+        tmp_paths_seen: list = []
+        real_mkstemp = shell_hooks.tempfile.mkstemp
+
+        def spying_mkstemp(*args, **kwargs):
+            fd, path = real_mkstemp(*args, **kwargs)
+            tmp_paths_seen.append(path)
+            return fd, path
+
+        monkeypatch.setattr(shell_hooks.tempfile, "mkstemp", spying_mkstemp)
+
+        shell_hooks.save_allowlist({"approvals": [{"event": "a", "command": "x"}]})
+        shell_hooks.save_allowlist({"approvals": [{"event": "b", "command": "y"}]})
+
+        assert len(tmp_paths_seen) == 2
+        assert tmp_paths_seen[0] != tmp_paths_seen[1]
diff --git a/tests/agent/test_shell_hooks_consent.py b/tests/agent/test_shell_hooks_consent.py
new file mode 100644
index 0000000000..e1668e4a1d
--- /dev/null
+++ b/tests/agent/test_shell_hooks_consent.py
@@ -0,0 +1,242 @@
+"""Consent-flow tests for the shell-hook allowlist.
+
+Covers the prompt/non-prompt decision tree: TTY vs non-TTY, and the
+three accept-hooks channels (--accept-hooks, HERMES_ACCEPT_HOOKS env,
+hooks_auto_accept: config key).
+"""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+from agent import shell_hooks
+
+
+@pytest.fixture(autouse=True)
+def _isolated_home(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_home"))
+    monkeypatch.delenv("HERMES_ACCEPT_HOOKS", raising=False)
+    shell_hooks.reset_for_tests()
+    yield
+    shell_hooks.reset_for_tests()
+
+
+def _write_hook_script(tmp_path: Path) -> Path:
+    script = tmp_path / "hook.sh"
+    script.write_text("#!/usr/bin/env bash\nprintf '{}\\n'\n")
+    script.chmod(0o755)
+    return script
+
+
+# ── TTY prompt flow ───────────────────────────────────────────────────────
+
+
+class TestTTYPromptFlow:
+    def test_first_use_prompts_and_approves(self, tmp_path):
+        from hermes_cli import plugins
+
+        script = _write_hook_script(tmp_path)
+        plugins._plugin_manager = plugins.PluginManager()
+
+        with patch("sys.stdin") as mock_stdin, patch("builtins.input", return_value="y"):
+            mock_stdin.isatty.return_value = True
+            registered = shell_hooks.register_from_config(
+                {"hooks": {"on_session_start": [{"command": str(script)}]}},
+                accept_hooks=False,
+            )
+        assert len(registered) == 1
+
+        entry = shell_hooks.allowlist_entry_for("on_session_start", str(script))
+        assert entry is not None
+        assert entry["event"] == "on_session_start"
+        assert entry["command"] == str(script)
+
+    def test_first_use_prompts_and_rejects(self, tmp_path):
+        from hermes_cli import plugins
+
+        script = _write_hook_script(tmp_path)
+        plugins._plugin_manager = plugins.PluginManager()
+
+        with patch("sys.stdin") as mock_stdin, patch("builtins.input", return_value="n"):
+            mock_stdin.isatty.return_value = True
+            registered = shell_hooks.register_from_config(
+                {"hooks": {"on_session_start": [{"command": str(script)}]}},
+                accept_hooks=False,
+            )
+        assert registered == []
+        assert shell_hooks.allowlist_entry_for(
+            "on_session_start", str(script),
+        ) is None
+
+    def test_subsequent_use_does_not_prompt(self, tmp_path):
+        """After the first approval, re-registration must be silent."""
+        from hermes_cli import plugins
+
+        script = _write_hook_script(tmp_path)
+        plugins._plugin_manager = plugins.PluginManager()
+
+        # First call: TTY, approved.
+        with patch("sys.stdin") as mock_stdin, patch("builtins.input", return_value="y"):
+            mock_stdin.isatty.return_value = True
+            shell_hooks.register_from_config(
+                {"hooks": {"on_session_start": [{"command": str(script)}]}},
+                accept_hooks=False,
+            )
+
+        # Reset registration set but keep the allowlist on disk.
+        shell_hooks.reset_for_tests()
+
+        # Second call: TTY, input() must NOT be called.
+        with patch("sys.stdin") as mock_stdin, patch(
+            "builtins.input", side_effect=AssertionError("should not prompt"),
+        ):
+            mock_stdin.isatty.return_value = True
+            registered = shell_hooks.register_from_config(
+                {"hooks": {"on_session_start": [{"command": str(script)}]}},
+                accept_hooks=False,
+            )
+        assert len(registered) == 1
+
+
+# ── non-TTY flow ──────────────────────────────────────────────────────────
+
+
+class TestNonTTYFlow:
+    def test_no_tty_no_flag_skips_registration(self, tmp_path):
+        from hermes_cli import plugins
+
+        script = _write_hook_script(tmp_path)
+        plugins._plugin_manager = plugins.PluginManager()
+
+        with patch("sys.stdin") as mock_stdin:
+            mock_stdin.isatty.return_value = False
+            registered = shell_hooks.register_from_config(
+                {"hooks": {"on_session_start": [{"command": str(script)}]}},
+                accept_hooks=False,
+            )
+        assert registered == []
+
+    def test_no_tty_with_argument_flag_accepts(self, tmp_path):
+        from hermes_cli import plugins
+
+        script = _write_hook_script(tmp_path)
+        plugins._plugin_manager = plugins.PluginManager()
+
+        with patch("sys.stdin") as mock_stdin:
+            mock_stdin.isatty.return_value = False
+            registered = shell_hooks.register_from_config(
+                {"hooks": {"on_session_start": [{"command": str(script)}]}},
+                accept_hooks=True,
+            )
+        assert len(registered) == 1
+
+    def test_no_tty_with_env_accepts(self, tmp_path, monkeypatch):
+        from hermes_cli import plugins
+
+        script = _write_hook_script(tmp_path)
+        plugins._plugin_manager = plugins.PluginManager()
+        monkeypatch.setenv("HERMES_ACCEPT_HOOKS", "1")
+
+        with patch("sys.stdin") as mock_stdin:
+            mock_stdin.isatty.return_value = False
+            registered = shell_hooks.register_from_config(
+                {"hooks": {"on_session_start": [{"command": str(script)}]}},
+                accept_hooks=False,
+            )
+        assert len(registered) == 1
+
+    def test_no_tty_with_config_accepts(self, tmp_path):
+        from hermes_cli import plugins
+
+        script = _write_hook_script(tmp_path)
+        plugins._plugin_manager = plugins.PluginManager()
+
+        with patch("sys.stdin") as mock_stdin:
+            mock_stdin.isatty.return_value = False
+            registered = shell_hooks.register_from_config(
+                {
+                    "hooks_auto_accept": True,
+                    "hooks": {"on_session_start": [{"command": str(script)}]},
+                },
+                accept_hooks=False,
+            )
+        assert len(registered) == 1
+
+
+# ── Allowlist + revoke + mtime ────────────────────────────────────────────
+
+
+class TestAllowlistOps:
+    def test_mtime_recorded_on_approval(self, tmp_path):
+        script = _write_hook_script(tmp_path)
+        shell_hooks._record_approval("on_session_start", str(script))
+
+        entry = shell_hooks.allowlist_entry_for(
+            "on_session_start", str(script),
+        )
+        assert entry is not None
+        assert entry["script_mtime_at_approval"] is not None
+        # ISO-8601 Z-suffix
+        assert entry["script_mtime_at_approval"].endswith("Z")
+
+    def test_revoke_removes_entry(self, tmp_path):
+        script = _write_hook_script(tmp_path)
+        shell_hooks._record_approval("on_session_start", str(script))
+        assert shell_hooks.allowlist_entry_for(
+            "on_session_start", str(script),
+        ) is not None
+
+        removed = shell_hooks.revoke(str(script))
+        assert removed == 1
+        assert shell_hooks.allowlist_entry_for(
+            "on_session_start", str(script),
+        ) is None
+
+    def test_revoke_unknown_returns_zero(self, tmp_path):
+        assert shell_hooks.revoke(str(tmp_path / "never-approved.sh")) == 0
+
+    def test_tilde_path_approval_records_resolvable_mtime(self, tmp_path, monkeypatch):
+        """If the command uses ~ the approval must still find the file."""
+        monkeypatch.setenv("HOME", str(tmp_path))
+        target = tmp_path / "hook.sh"
+        target.write_text("#!/usr/bin/env bash\n")
+        target.chmod(0o755)
+
+        shell_hooks._record_approval("on_session_start", "~/hook.sh")
+        entry = shell_hooks.allowlist_entry_for(
+            "on_session_start", "~/hook.sh",
+        )
+        assert entry is not None
+        # Must not be None — the tilde was expanded before stat().
+        assert entry["script_mtime_at_approval"] is not None
+
+    def test_duplicate_approval_replaces_mtime(self, tmp_path):
+        """Re-approving the same pair refreshes the approval timestamp."""
+        script = _write_hook_script(tmp_path)
+        shell_hooks._record_approval("on_session_start", str(script))
+        original_entry = shell_hooks.allowlist_entry_for(
+            "on_session_start", str(script),
+        )
+        assert original_entry is not None
+
+        # Touch the script to bump its mtime then re-approve.
+        import os
+        import time
+        new_mtime = original_entry.get("script_mtime_at_approval")
+        time.sleep(0.01)
+        os.utime(script, None)  # current time
+
+        shell_hooks._record_approval("on_session_start", str(script))
+
+        # Exactly one entry per (event, command).
+        approvals = shell_hooks.load_allowlist().get("approvals", [])
+        matching = [
+            e for e in approvals
+            if e.get("event") == "on_session_start"
+            and e.get("command") == str(script)
+        ]
+        assert len(matching) == 1
diff --git a/tests/agent/test_skill_commands.py b/tests/agent/test_skill_commands.py
index 57ac7d6b58..e399db619e 100644
--- a/tests/agent/test_skill_commands.py
+++ b/tests/agent/test_skill_commands.py
@@ -405,3 +405,191 @@ class TestPlanSkillHelpers:
         assert "Add a /plan command" in msg
         assert ".hermes/plans/plan.md" in msg
         assert "Runtime note:" in msg
+
+
+class TestSkillDirectoryHeader:
+    """The activation message must expose the absolute skill directory and
+    explain how to resolve relative paths, so skills with bundled scripts
+    don't force the agent into a second ``skill_view()`` round-trip."""
+
+    def test_header_contains_absolute_skill_dir(self, tmp_path):
+        with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
+            skill_dir = _make_skill(tmp_path, "abs-dir-skill")
+            scan_skill_commands()
+            msg = build_skill_invocation_message("/abs-dir-skill", "go")
+
+        assert msg is not None
+        assert f"[Skill directory: {skill_dir}]" in msg
+        assert "Resolve any relative paths" in msg
+
+    def test_supporting_files_shown_with_absolute_paths(self, tmp_path):
+        with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
+            skill_dir = _make_skill(tmp_path, "scripted-skill")
+            (skill_dir / "scripts").mkdir()
+            (skill_dir / "scripts" / "run.js").write_text("console.log('hi')")
+            scan_skill_commands()
+            msg = build_skill_invocation_message("/scripted-skill")
+
+        assert msg is not None
+        # The supporting-files block must emit both the relative form (so the
+        # agent can call skill_view on it) and the absolute form (so it can
+        # run the script directly via terminal).
+        assert "scripts/run.js" in msg
+        assert str(skill_dir / "scripts" / "run.js") in msg
+        assert f"node {skill_dir}/scripts/foo.js" in msg
+
+
+class TestTemplateVarSubstitution:
+    """``${HERMES_SKILL_DIR}`` and ``${HERMES_SESSION_ID}`` in SKILL.md body
+    are replaced before the agent sees the content."""
+
+    def test_substitutes_skill_dir(self, tmp_path):
+        with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
+            skill_dir = _make_skill(
+                tmp_path,
+                "templated",
+                body="Run: node ${HERMES_SKILL_DIR}/scripts/foo.js",
+            )
+            scan_skill_commands()
+            msg = build_skill_invocation_message("/templated")
+
+        assert msg is not None
+        assert f"node {skill_dir}/scripts/foo.js" in msg
+        # The literal template token must not leak through.
+        assert "${HERMES_SKILL_DIR}" not in msg.split("[Skill directory:")[0]
+
+    def test_substitutes_session_id_when_available(self, tmp_path):
+        with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
+            _make_skill(
+                tmp_path,
+                "sess-templated",
+                body="Session: ${HERMES_SESSION_ID}",
+            )
+            scan_skill_commands()
+            msg = build_skill_invocation_message(
+                "/sess-templated", task_id="abc-123"
+            )
+
+        assert msg is not None
+        assert "Session: abc-123" in msg
+
+    def test_leaves_session_id_token_when_missing(self, tmp_path):
+        with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
+            _make_skill(
+                tmp_path,
+                "sess-missing",
+                body="Session: ${HERMES_SESSION_ID}",
+            )
+            scan_skill_commands()
+            msg = build_skill_invocation_message("/sess-missing", task_id=None)
+
+        assert msg is not None
+        # No session — token left intact so the author can spot it.
+        assert "Session: ${HERMES_SESSION_ID}" in msg
+
+    def test_disable_template_vars_via_config(self, tmp_path):
+        with (
+            patch("tools.skills_tool.SKILLS_DIR", tmp_path),
+            patch(
+                "agent.skill_commands._load_skills_config",
+                return_value={"template_vars": False},
+            ),
+        ):
+            _make_skill(
+                tmp_path,
+                "no-sub",
+                body="Run: node ${HERMES_SKILL_DIR}/scripts/foo.js",
+            )
+            scan_skill_commands()
+            msg = build_skill_invocation_message("/no-sub")
+
+        assert msg is not None
+        # Template token must survive when substitution is disabled.
+        assert "${HERMES_SKILL_DIR}/scripts/foo.js" in msg
+
+
+class TestInlineShellExpansion:
+    """Inline ``!`cmd`` snippets in SKILL.md run before the agent sees the
+    content — but only when the user has opted in via config."""
+
+    def test_inline_shell_is_off_by_default(self, tmp_path):
+        with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
+            _make_skill(
+                tmp_path,
+                "dyn-default-off",
+                body="Today is !`echo INLINE_RAN`.",
+            )
+            scan_skill_commands()
+            msg = build_skill_invocation_message("/dyn-default-off")
+
+        assert msg is not None
+        # Default config has inline_shell=False — snippet must stay literal.
+        assert "!`echo INLINE_RAN`" in msg
+        assert "Today is INLINE_RAN." not in msg
+
+    def test_inline_shell_runs_when_enabled(self, tmp_path):
+        with (
+            patch("tools.skills_tool.SKILLS_DIR", tmp_path),
+            patch(
+                "agent.skill_commands._load_skills_config",
+                return_value={"template_vars": True, "inline_shell": True,
+                              "inline_shell_timeout": 5},
+            ),
+        ):
+            _make_skill(
+                tmp_path,
+                "dyn-on",
+                body="Marker: !`echo INLINE_RAN`.",
+            )
+            scan_skill_commands()
+            msg = build_skill_invocation_message("/dyn-on")
+
+        assert msg is not None
+        assert "Marker: INLINE_RAN." in msg
+        assert "!`echo INLINE_RAN`" not in msg
+
+    def test_inline_shell_runs_in_skill_directory(self, tmp_path):
+        """Inline snippets get the skill dir as CWD so relative paths work."""
+        with (
+            patch("tools.skills_tool.SKILLS_DIR", tmp_path),
+            patch(
+                "agent.skill_commands._load_skills_config",
+                return_value={"template_vars": True, "inline_shell": True,
+                              "inline_shell_timeout": 5},
+            ),
+        ):
+            skill_dir = _make_skill(
+                tmp_path,
+                "dyn-cwd",
+                body="Here: !`pwd`",
+            )
+            scan_skill_commands()
+            msg = build_skill_invocation_message("/dyn-cwd")
+
+        assert msg is not None
+        assert f"Here: {skill_dir}" in msg
+
+    def test_inline_shell_timeout_does_not_break_message(self, tmp_path):
+        with (
+            patch("tools.skills_tool.SKILLS_DIR", tmp_path),
+            patch(
+                "agent.skill_commands._load_skills_config",
+                return_value={"template_vars": True, "inline_shell": True,
+                              "inline_shell_timeout": 1},
+            ),
+        ):
+            _make_skill(
+                tmp_path,
+                "dyn-slow",
+                body="Slow: !`sleep 5 && printf DYN_MARKER`",
+            )
+            scan_skill_commands()
+            msg = build_skill_invocation_message("/dyn-slow")
+
+        assert msg is not None
+        # Timeout is surfaced as a marker instead of propagating as an error,
+        # and the rest of the skill message still renders.
+        assert "inline-shell timeout" in msg
+        # The command's intended stdout never made it through — only the
+        # timeout marker (which echoes the command text) survives.
+        assert "DYN_MARKER" not in msg.replace("sleep 5 && printf DYN_MARKER", "")
diff --git a/tests/agent/test_smart_model_routing.py b/tests/agent/test_smart_model_routing.py
deleted file mode 100644
index 7e90256095..0000000000
--- a/tests/agent/test_smart_model_routing.py
+++ /dev/null
@@ -1,61 +0,0 @@
-from agent.smart_model_routing import choose_cheap_model_route
-
-
-_BASE_CONFIG = {
-    "enabled": True,
-    "cheap_model": {
-        "provider": "openrouter",
-        "model": "google/gemini-2.5-flash",
-    },
-}
-
-
-def test_returns_none_when_disabled():
-    cfg = {**_BASE_CONFIG, "enabled": False}
-    assert choose_cheap_model_route("what time is it in tokyo?", cfg) is None
-
-
-def test_routes_short_simple_prompt():
-    result = choose_cheap_model_route("what time is it in tokyo?", _BASE_CONFIG)
-    assert result is not None
-    assert result["provider"] == "openrouter"
-    assert result["model"] == "google/gemini-2.5-flash"
-    assert result["routing_reason"] == "simple_turn"
-
-
-def test_skips_long_prompt():
-    prompt = "please summarize this carefully " * 20
-    assert choose_cheap_model_route(prompt, _BASE_CONFIG) is None
-
-
-def test_skips_code_like_prompt():
-    prompt = "debug this traceback: ```python\nraise ValueError('bad')\n```"
-    assert choose_cheap_model_route(prompt, _BASE_CONFIG) is None
-
-
-def test_skips_tool_heavy_prompt_keywords():
-    prompt = "implement a patch for this docker error"
-    assert choose_cheap_model_route(prompt, _BASE_CONFIG) is None
-
-
-def test_resolve_turn_route_falls_back_to_primary_when_route_runtime_cannot_be_resolved(monkeypatch):
-    from agent.smart_model_routing import resolve_turn_route
-
-    monkeypatch.setattr(
-        "hermes_cli.runtime_provider.resolve_runtime_provider",
-        lambda **kwargs: (_ for _ in ()).throw(RuntimeError("bad route")),
-    )
-    result = resolve_turn_route(
-        "what time is it in tokyo?",
-        _BASE_CONFIG,
-        {
-            "model": "anthropic/claude-sonnet-4",
-            "provider": "openrouter",
-            "base_url": "https://openrouter.ai/api/v1",
-            "api_mode": "chat_completions",
-            "api_key": "sk-primary",
-        },
-    )
-    assert result["model"] == "anthropic/claude-sonnet-4"
-    assert result["runtime"]["provider"] == "openrouter"
-    assert result["label"] is None
diff --git a/tests/agent/test_subagent_progress.py b/tests/agent/test_subagent_progress.py
index 88b2e37902..953f26a69e 100644
--- a/tests/agent/test_subagent_progress.py
+++ b/tests/agent/test_subagent_progress.py
@@ -193,7 +193,7 @@ class TestBuildChildProgressCallback:
         
         # task_index=0 in a batch of 3 → prefix "[1]"
         cb0 = _build_child_progress_callback(0, "test goal", parent, task_count=3)
-        cb0("web_search", "test")
+        cb0("tool.started", "web_search", "test", {})
         output = buf.getvalue()
         assert "[1]" in output
 
@@ -201,7 +201,7 @@ class TestBuildChildProgressCallback:
         buf.truncate(0)
         buf.seek(0)
         cb2 = _build_child_progress_callback(2, "test goal", parent, task_count=3)
-        cb2("web_search", "test")
+        cb2("tool.started", "web_search", "test", {})
         output = buf.getvalue()
         assert "[3]" in output
 
diff --git a/tests/agent/test_subagent_stop_hook.py b/tests/agent/test_subagent_stop_hook.py
new file mode 100644
index 0000000000..a2b417a072
--- /dev/null
+++ b/tests/agent/test_subagent_stop_hook.py
@@ -0,0 +1,224 @@
+"""Tests for the subagent_stop hook event.
+
+Covers wire-up from tools.delegate_tool.delegate_task:
+  * fires once per child in both single-task and batch modes
+  * runs on the parent thread (no re-entrancy for hook authors)
+  * carries child_role when the agent exposes _delegate_role
+  * carries child_role=None when _delegate_role is not set (pre-M3)
+"""
+
+from __future__ import annotations
+
+import json
+import threading
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from tools.delegate_tool import delegate_task
+from hermes_cli import plugins
+
+
+def _make_parent(depth: int = 0, session_id: str = "parent-1"):
+    parent = MagicMock()
+    parent.base_url = "https://openrouter.ai/api/v1"
+    parent.api_key = "***"
+    parent.provider = "openrouter"
+    parent.api_mode = "chat_completions"
+    parent.model = "anthropic/claude-sonnet-4"
+    parent.platform = "cli"
+    parent.providers_allowed = None
+    parent.providers_ignored = None
+    parent.providers_order = None
+    parent.provider_sort = None
+    parent._session_db = None
+    parent._delegate_depth = depth
+    parent._active_children = []
+    parent._active_children_lock = threading.Lock()
+    parent._print_fn = None
+    parent.tool_progress_callback = None
+    parent.thinking_callback = None
+    parent._memory_manager = None
+    parent.session_id = session_id
+    return parent
+
+
+@pytest.fixture(autouse=True)
+def _fresh_plugin_manager():
+    """Each test gets a fresh PluginManager so hook callbacks don't
+    leak between tests."""
+    original = plugins._plugin_manager
+    plugins._plugin_manager = plugins.PluginManager()
+    yield
+    plugins._plugin_manager = original
+
+
+@pytest.fixture(autouse=True)
+def _stub_child_builder(monkeypatch):
+    """Replace _build_child_agent with a MagicMock factory so delegate_task
+    never transitively imports run_agent / openai.  Keeps the test runnable
+    in environments without heavyweight runtime deps installed."""
+    def _fake_build_child(task_index, **kwargs):
+        child = MagicMock()
+        child._delegate_saved_tool_names = []
+        child._credential_pool = None
+        return child
+
+    monkeypatch.setattr(
+        "tools.delegate_tool._build_child_agent", _fake_build_child,
+    )
+
+
+def _register_capturing_hook():
+    captured = []
+
+    def _cb(**kwargs):
+        kwargs["_thread"] = threading.current_thread()
+        captured.append(kwargs)
+
+    mgr = plugins.get_plugin_manager()
+    mgr._hooks.setdefault("subagent_stop", []).append(_cb)
+    return captured
+
+
+# ── single-task mode ──────────────────────────────────────────────────────
+
+
+class TestSingleTask:
+    def test_fires_once(self):
+        captured = _register_capturing_hook()
+
+        with patch("tools.delegate_tool._run_single_child") as mock_run:
+            mock_run.return_value = {
+                "task_index": 0,
+                "status": "completed",
+                "summary": "Done!",
+                "api_calls": 3,
+                "duration_seconds": 5.0,
+                "_child_role": "analyst",
+            }
+            delegate_task(goal="do X", parent_agent=_make_parent())
+
+        assert len(captured) == 1
+        payload = captured[0]
+        assert payload["child_role"] == "analyst"
+        assert payload["child_status"] == "completed"
+        assert payload["child_summary"] == "Done!"
+        assert payload["duration_ms"] == 5000
+
+    def test_fires_on_parent_thread(self):
+        captured = _register_capturing_hook()
+        main_thread = threading.current_thread()
+
+        with patch("tools.delegate_tool._run_single_child") as mock_run:
+            mock_run.return_value = {
+                "task_index": 0, "status": "completed",
+                "summary": "x", "api_calls": 1, "duration_seconds": 0.1,
+                "_child_role": None,
+            }
+            delegate_task(goal="go", parent_agent=_make_parent())
+
+        assert captured[0]["_thread"] is main_thread
+
+    def test_payload_includes_parent_session_id(self):
+        captured = _register_capturing_hook()
+
+        with patch("tools.delegate_tool._run_single_child") as mock_run:
+            mock_run.return_value = {
+                "task_index": 0, "status": "completed",
+                "summary": "x", "api_calls": 1, "duration_seconds": 0.1,
+                "_child_role": None,
+            }
+            delegate_task(
+                goal="go",
+                parent_agent=_make_parent(session_id="sess-xyz"),
+            )
+
+        assert captured[0]["parent_session_id"] == "sess-xyz"
+
+
+# ── batch mode ────────────────────────────────────────────────────────────
+
+
+class TestBatchMode:
+    def test_fires_per_child(self):
+        captured = _register_capturing_hook()
+
+        with patch("tools.delegate_tool._run_single_child") as mock_run:
+            mock_run.side_effect = [
+                {"task_index": 0, "status": "completed",
+                 "summary": "A", "api_calls": 1, "duration_seconds": 1.0,
+                 "_child_role": "role-a"},
+                {"task_index": 1, "status": "completed",
+                 "summary": "B", "api_calls": 2, "duration_seconds": 2.0,
+                 "_child_role": "role-b"},
+                {"task_index": 2, "status": "completed",
+                 "summary": "C", "api_calls": 3, "duration_seconds": 3.0,
+                 "_child_role": "role-c"},
+            ]
+            delegate_task(
+                tasks=[
+                    {"goal": "A"}, {"goal": "B"}, {"goal": "C"},
+                ],
+                parent_agent=_make_parent(),
+            )
+
+        assert len(captured) == 3
+        roles = sorted(c["child_role"] for c in captured)
+        assert roles == ["role-a", "role-b", "role-c"]
+
+    def test_all_fires_on_parent_thread(self):
+        captured = _register_capturing_hook()
+        main_thread = threading.current_thread()
+
+        with patch("tools.delegate_tool._run_single_child") as mock_run:
+            mock_run.side_effect = [
+                {"task_index": 0, "status": "completed",
+                 "summary": "A", "api_calls": 1, "duration_seconds": 1.0,
+                 "_child_role": None},
+                {"task_index": 1, "status": "completed",
+                 "summary": "B", "api_calls": 2, "duration_seconds": 2.0,
+                 "_child_role": None},
+            ]
+            delegate_task(
+                tasks=[{"goal": "A"}, {"goal": "B"}],
+                parent_agent=_make_parent(),
+            )
+
+        for payload in captured:
+            assert payload["_thread"] is main_thread
+
+
+# ── payload shape ─────────────────────────────────────────────────────────
+
+
+class TestPayloadShape:
+    def test_role_absent_becomes_none(self):
+        captured = _register_capturing_hook()
+
+        with patch("tools.delegate_tool._run_single_child") as mock_run:
+            mock_run.return_value = {
+                "task_index": 0, "status": "completed",
+                "summary": "x", "api_calls": 1, "duration_seconds": 0.1,
+                # Deliberately omit _child_role — pre-M3 shape.
+            }
+            delegate_task(goal="do X", parent_agent=_make_parent())
+
+        assert captured[0]["child_role"] is None
+
+    def test_result_does_not_leak_child_role_field(self):
+        """The internal _child_role key must be stripped before the
+        result dict is serialised to JSON."""
+        _register_capturing_hook()
+
+        with patch("tools.delegate_tool._run_single_child") as mock_run:
+            mock_run.return_value = {
+                "task_index": 0, "status": "completed",
+                "summary": "x", "api_calls": 1, "duration_seconds": 0.1,
+                "_child_role": "leaf",
+            }
+            raw = delegate_task(goal="do X", parent_agent=_make_parent())
+
+        parsed = json.loads(raw)
+        assert "results" in parsed
+        assert "_child_role" not in parsed["results"][0]
diff --git a/tests/agent/transports/__init__.py b/tests/agent/transports/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/agent/transports/test_bedrock_transport.py b/tests/agent/transports/test_bedrock_transport.py
new file mode 100644
index 0000000000..f9d78a31ce
--- /dev/null
+++ b/tests/agent/transports/test_bedrock_transport.py
@@ -0,0 +1,164 @@
+"""Tests for the BedrockTransport."""
+
+import json
+import pytest
+from types import SimpleNamespace
+
+from agent.transports import get_transport
+from agent.transports.types import NormalizedResponse, ToolCall
+
+
+@pytest.fixture
+def transport():
+    import agent.transports.bedrock  # noqa: F401
+    return get_transport("bedrock_converse")
+
+
+class TestBedrockBasic:
+
+    def test_api_mode(self, transport):
+        assert transport.api_mode == "bedrock_converse"
+
+    def test_registered(self, transport):
+        assert transport is not None
+
+
+class TestBedrockBuildKwargs:
+
+    def test_basic_kwargs(self, transport):
+        msgs = [{"role": "user", "content": "Hello"}]
+        kw = transport.build_kwargs(model="anthropic.claude-3-5-sonnet-20241022-v2:0", messages=msgs)
+        assert kw["modelId"] == "anthropic.claude-3-5-sonnet-20241022-v2:0"
+        assert kw["__bedrock_converse__"] is True
+        assert kw["__bedrock_region__"] == "us-east-1"
+        assert "messages" in kw
+
+    def test_custom_region(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="anthropic.claude-3-5-sonnet-20241022-v2:0",
+            messages=msgs,
+            region="eu-west-1",
+        )
+        assert kw["__bedrock_region__"] == "eu-west-1"
+
+    def test_max_tokens(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="anthropic.claude-3-5-sonnet-20241022-v2:0",
+            messages=msgs,
+            max_tokens=8192,
+        )
+        assert kw["inferenceConfig"]["maxTokens"] == 8192
+
+
+class TestBedrockConvertTools:
+
+    def test_convert_tools(self, transport):
+        tools = [{
+            "type": "function",
+            "function": {
+                "name": "terminal",
+                "description": "Run commands",
+                "parameters": {"type": "object", "properties": {"command": {"type": "string"}}},
+            }
+        }]
+        result = transport.convert_tools(tools)
+        assert len(result) == 1
+        assert result[0]["toolSpec"]["name"] == "terminal"
+
+
+class TestBedrockValidate:
+
+    def test_none(self, transport):
+        assert transport.validate_response(None) is False
+
+    def test_raw_dict_valid(self, transport):
+        assert transport.validate_response({"output": {"message": {}}}) is True
+
+    def test_raw_dict_invalid(self, transport):
+        assert transport.validate_response({"error": "fail"}) is False
+
+    def test_normalized_valid(self, transport):
+        r = SimpleNamespace(choices=[SimpleNamespace(message=SimpleNamespace(content="hi"))])
+        assert transport.validate_response(r) is True
+
+
+class TestBedrockMapFinishReason:
+
+    def test_end_turn(self, transport):
+        assert transport.map_finish_reason("end_turn") == "stop"
+
+    def test_tool_use(self, transport):
+        assert transport.map_finish_reason("tool_use") == "tool_calls"
+
+    def test_max_tokens(self, transport):
+        assert transport.map_finish_reason("max_tokens") == "length"
+
+    def test_guardrail(self, transport):
+        assert transport.map_finish_reason("guardrail_intervened") == "content_filter"
+
+    def test_unknown(self, transport):
+        assert transport.map_finish_reason("unknown") == "stop"
+
+
+class TestBedrockNormalize:
+
+    def _make_bedrock_response(self, text="Hello", tool_calls=None, stop_reason="end_turn"):
+        """Build a raw Bedrock converse response dict."""
+        content = []
+        if text:
+            content.append({"text": text})
+        if tool_calls:
+            for tc in tool_calls:
+                content.append({
+                    "toolUse": {
+                        "toolUseId": tc["id"],
+                        "name": tc["name"],
+                        "input": tc["input"],
+                    }
+                })
+        return {
+            "output": {"message": {"role": "assistant", "content": content}},
+            "stopReason": stop_reason,
+            "usage": {"inputTokens": 10, "outputTokens": 5, "totalTokens": 15},
+        }
+
+    def test_text_response(self, transport):
+        raw = self._make_bedrock_response(text="Hello world")
+        nr = transport.normalize_response(raw)
+        assert isinstance(nr, NormalizedResponse)
+        assert nr.content == "Hello world"
+        assert nr.finish_reason == "stop"
+
+    def test_tool_call_response(self, transport):
+        raw = self._make_bedrock_response(
+            text=None,
+            tool_calls=[{"id": "tool_1", "name": "terminal", "input": {"command": "ls"}}],
+            stop_reason="tool_use",
+        )
+        nr = transport.normalize_response(raw)
+        assert nr.finish_reason == "tool_calls"
+        assert len(nr.tool_calls) == 1
+        assert nr.tool_calls[0].name == "terminal"
+
+    def test_already_normalized_response(self, transport):
+        """Test normalize_response handles already-normalized SimpleNamespace (from dispatch site)."""
+        pre_normalized = SimpleNamespace(
+            choices=[SimpleNamespace(
+                message=SimpleNamespace(
+                    content="Hello from Bedrock",
+                    tool_calls=None,
+                    reasoning=None,
+                    reasoning_content=None,
+                ),
+                finish_reason="stop",
+            )],
+            usage=SimpleNamespace(prompt_tokens=10, completion_tokens=5, total_tokens=15),
+        )
+        nr = transport.normalize_response(pre_normalized)
+        assert isinstance(nr, NormalizedResponse)
+        assert nr.content == "Hello from Bedrock"
+        assert nr.finish_reason == "stop"
+        assert nr.usage is not None
+        assert nr.usage.prompt_tokens == 10
diff --git a/tests/agent/transports/test_chat_completions.py b/tests/agent/transports/test_chat_completions.py
new file mode 100644
index 0000000000..b44eafd453
--- /dev/null
+++ b/tests/agent/transports/test_chat_completions.py
@@ -0,0 +1,349 @@
+"""Tests for the ChatCompletionsTransport."""
+
+import pytest
+from types import SimpleNamespace
+
+from agent.transports import get_transport
+from agent.transports.types import NormalizedResponse, ToolCall
+
+
+@pytest.fixture
+def transport():
+    import agent.transports.chat_completions  # noqa: F401
+    return get_transport("chat_completions")
+
+
+class TestChatCompletionsBasic:
+
+    def test_api_mode(self, transport):
+        assert transport.api_mode == "chat_completions"
+
+    def test_registered(self, transport):
+        assert transport is not None
+
+    def test_convert_tools_identity(self, transport):
+        tools = [{"type": "function", "function": {"name": "test", "parameters": {}}}]
+        assert transport.convert_tools(tools) is tools
+
+    def test_convert_messages_no_codex_leaks(self, transport):
+        msgs = [{"role": "user", "content": "hi"}]
+        result = transport.convert_messages(msgs)
+        assert result is msgs  # no copy needed
+
+    def test_convert_messages_strips_codex_fields(self, transport):
+        msgs = [
+            {"role": "assistant", "content": "ok", "codex_reasoning_items": [{"id": "rs_1"}],
+             "tool_calls": [{"id": "call_1", "call_id": "call_1", "response_item_id": "fc_1",
+                            "type": "function", "function": {"name": "t", "arguments": "{}"}}]},
+        ]
+        result = transport.convert_messages(msgs)
+        assert "codex_reasoning_items" not in result[0]
+        assert "call_id" not in result[0]["tool_calls"][0]
+        assert "response_item_id" not in result[0]["tool_calls"][0]
+        # Original list untouched (deepcopy-on-demand)
+        assert "codex_reasoning_items" in msgs[0]
+
+
+class TestChatCompletionsBuildKwargs:
+
+    def test_basic_kwargs(self, transport):
+        msgs = [{"role": "user", "content": "Hello"}]
+        kw = transport.build_kwargs(model="gpt-4o", messages=msgs, timeout=30.0)
+        assert kw["model"] == "gpt-4o"
+        assert kw["messages"][0]["content"] == "Hello"
+        assert kw["timeout"] == 30.0
+
+    def test_developer_role_swap(self, transport):
+        msgs = [{"role": "system", "content": "You are helpful"}, {"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(model="gpt-5.4", messages=msgs, model_lower="gpt-5.4")
+        assert kw["messages"][0]["role"] == "developer"
+
+    def test_no_developer_swap_for_non_gpt5(self, transport):
+        msgs = [{"role": "system", "content": "You are helpful"}, {"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(model="claude-sonnet-4", messages=msgs, model_lower="claude-sonnet-4")
+        assert kw["messages"][0]["role"] == "system"
+
+    def test_tools_included(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        tools = [{"type": "function", "function": {"name": "test", "parameters": {}}}]
+        kw = transport.build_kwargs(model="gpt-4o", messages=msgs, tools=tools)
+        assert kw["tools"] == tools
+
+    def test_openrouter_provider_prefs(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gpt-4o", messages=msgs,
+            is_openrouter=True,
+            provider_preferences={"only": ["openai"]},
+        )
+        assert kw["extra_body"]["provider"] == {"only": ["openai"]}
+
+    def test_nous_tags(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(model="gpt-4o", messages=msgs, is_nous=True)
+        assert kw["extra_body"]["tags"] == ["product=hermes-agent"]
+
+    def test_reasoning_default(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gpt-4o", messages=msgs,
+            supports_reasoning=True,
+        )
+        assert kw["extra_body"]["reasoning"] == {"enabled": True, "effort": "medium"}
+
+    def test_nous_omits_disabled_reasoning(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gpt-4o", messages=msgs,
+            supports_reasoning=True,
+            is_nous=True,
+            reasoning_config={"enabled": False},
+        )
+        # Nous rejects enabled=false; reasoning omitted entirely
+        assert "reasoning" not in kw.get("extra_body", {})
+
+    def test_ollama_num_ctx(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="llama3", messages=msgs,
+            ollama_num_ctx=32768,
+        )
+        assert kw["extra_body"]["options"]["num_ctx"] == 32768
+
+    def test_custom_think_false(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="qwen3", messages=msgs,
+            is_custom_provider=True,
+            reasoning_config={"effort": "none"},
+        )
+        assert kw["extra_body"]["think"] is False
+
+    def test_max_tokens_with_fn(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gpt-4o", messages=msgs,
+            max_tokens=4096,
+            max_tokens_param_fn=lambda n: {"max_tokens": n},
+        )
+        assert kw["max_tokens"] == 4096
+
+    def test_ephemeral_overrides_max_tokens(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gpt-4o", messages=msgs,
+            max_tokens=4096,
+            ephemeral_max_output_tokens=2048,
+            max_tokens_param_fn=lambda n: {"max_tokens": n},
+        )
+        assert kw["max_tokens"] == 2048
+
+    def test_nvidia_default_max_tokens(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="glm-4.7", messages=msgs,
+            is_nvidia_nim=True,
+            max_tokens_param_fn=lambda n: {"max_tokens": n},
+        )
+        # NVIDIA default: 16384
+        assert kw["max_tokens"] == 16384
+
+    def test_qwen_default_max_tokens(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="qwen3-coder-plus", messages=msgs,
+            is_qwen_portal=True,
+            max_tokens_param_fn=lambda n: {"max_tokens": n},
+        )
+        # Qwen default: 65536
+        assert kw["max_tokens"] == 65536
+
+    def test_anthropic_max_output_for_claude_on_aggregator(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="anthropic/claude-sonnet-4.6", messages=msgs,
+            is_openrouter=True,
+            anthropic_max_output=64000,
+        )
+        # Set as plain max_tokens (not via fn) because the aggregator proxies to
+        # Anthropic Messages API which requires the field.
+        assert kw["max_tokens"] == 64000
+
+    def test_request_overrides_last(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gpt-4o", messages=msgs,
+            request_overrides={"service_tier": "priority"},
+        )
+        assert kw["service_tier"] == "priority"
+
+    def test_fixed_temperature(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(model="gpt-4o", messages=msgs, fixed_temperature=0.6)
+        assert kw["temperature"] == 0.6
+
+    def test_omit_temperature(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(model="gpt-4o", messages=msgs, omit_temperature=True, fixed_temperature=0.5)
+        # omit wins
+        assert "temperature" not in kw
+
+
+class TestChatCompletionsKimi:
+    """Regression tests for the Kimi/Moonshot quirks migrated into the transport."""
+
+    def test_kimi_max_tokens_default(self, transport):
+        kw = transport.build_kwargs(
+            model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
+            is_kimi=True,
+            max_tokens_param_fn=lambda n: {"max_tokens": n},
+        )
+        # Kimi CLI default: 32000
+        assert kw["max_tokens"] == 32000
+
+    def test_kimi_reasoning_effort_top_level(self, transport):
+        kw = transport.build_kwargs(
+            model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
+            is_kimi=True,
+            reasoning_config={"effort": "high"},
+            max_tokens_param_fn=lambda n: {"max_tokens": n},
+        )
+        # Kimi requires reasoning_effort as a top-level parameter
+        assert kw["reasoning_effort"] == "high"
+
+    def test_kimi_reasoning_effort_omitted_when_thinking_disabled(self, transport):
+        kw = transport.build_kwargs(
+            model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
+            is_kimi=True,
+            reasoning_config={"enabled": False},
+            max_tokens_param_fn=lambda n: {"max_tokens": n},
+        )
+        # Mirror Kimi CLI: omit reasoning_effort entirely when thinking off
+        assert "reasoning_effort" not in kw
+
+    def test_kimi_thinking_enabled_extra_body(self, transport):
+        kw = transport.build_kwargs(
+            model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
+            is_kimi=True,
+            max_tokens_param_fn=lambda n: {"max_tokens": n},
+        )
+        assert kw["extra_body"]["thinking"] == {"type": "enabled"}
+
+    def test_kimi_thinking_disabled_extra_body(self, transport):
+        kw = transport.build_kwargs(
+            model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
+            is_kimi=True,
+            reasoning_config={"enabled": False},
+            max_tokens_param_fn=lambda n: {"max_tokens": n},
+        )
+        assert kw["extra_body"]["thinking"] == {"type": "disabled"}
+
+
+class TestChatCompletionsValidate:
+
+    def test_none(self, transport):
+        assert transport.validate_response(None) is False
+
+    def test_no_choices(self, transport):
+        r = SimpleNamespace(choices=None)
+        assert transport.validate_response(r) is False
+
+    def test_empty_choices(self, transport):
+        r = SimpleNamespace(choices=[])
+        assert transport.validate_response(r) is False
+
+    def test_valid(self, transport):
+        r = SimpleNamespace(choices=[SimpleNamespace(message=SimpleNamespace(content="hi"))])
+        assert transport.validate_response(r) is True
+
+
+class TestChatCompletionsNormalize:
+
+    def test_text_response(self, transport):
+        r = SimpleNamespace(
+            choices=[SimpleNamespace(
+                message=SimpleNamespace(content="Hello", tool_calls=None, reasoning_content=None),
+                finish_reason="stop",
+            )],
+            usage=SimpleNamespace(prompt_tokens=10, completion_tokens=5, total_tokens=15),
+        )
+        nr = transport.normalize_response(r)
+        assert isinstance(nr, NormalizedResponse)
+        assert nr.content == "Hello"
+        assert nr.finish_reason == "stop"
+        assert nr.tool_calls is None
+
+    def test_tool_call_response(self, transport):
+        tc = SimpleNamespace(
+            id="call_123",
+            function=SimpleNamespace(name="terminal", arguments='{"command": "ls"}'),
+        )
+        r = SimpleNamespace(
+            choices=[SimpleNamespace(
+                message=SimpleNamespace(content=None, tool_calls=[tc], reasoning_content=None),
+                finish_reason="tool_calls",
+            )],
+            usage=SimpleNamespace(prompt_tokens=10, completion_tokens=20, total_tokens=30),
+        )
+        nr = transport.normalize_response(r)
+        assert len(nr.tool_calls) == 1
+        assert nr.tool_calls[0].name == "terminal"
+        assert nr.tool_calls[0].id == "call_123"
+
+    def test_tool_call_extra_content_preserved(self, transport):
+        """Gemini 3 thinking models attach extra_content with thought_signature
+        on tool_calls.  Without this replay on the next turn, the API rejects
+        the request with 400.  The transport MUST surface extra_content so the
+        agent loop can write it back into the assistant message."""
+        tc = SimpleNamespace(
+            id="call_gem",
+            function=SimpleNamespace(name="terminal", arguments='{"command": "ls"}'),
+            extra_content={"google": {"thought_signature": "SIG_ABC123"}},
+        )
+        r = SimpleNamespace(
+            choices=[SimpleNamespace(
+                message=SimpleNamespace(content=None, tool_calls=[tc], reasoning_content=None),
+                finish_reason="tool_calls",
+            )],
+            usage=None,
+        )
+        nr = transport.normalize_response(r)
+        assert nr.tool_calls[0].provider_data == {
+            "extra_content": {"google": {"thought_signature": "SIG_ABC123"}}
+        }
+
+    def test_reasoning_content_preserved_separately(self, transport):
+        """DeepSeek/Moonshot use reasoning_content distinct from reasoning.
+        Don't merge them — the thinking-prefill retry check reads each field
+        separately."""
+        r = SimpleNamespace(
+            choices=[SimpleNamespace(
+                message=SimpleNamespace(
+                    content=None, tool_calls=None,
+                    reasoning="summary text",
+                    reasoning_content="detailed scratchpad",
+                ),
+                finish_reason="stop",
+            )],
+            usage=None,
+        )
+        nr = transport.normalize_response(r)
+        assert nr.reasoning == "summary text"
+        assert nr.provider_data == {"reasoning_content": "detailed scratchpad"}
+
+
+class TestChatCompletionsCacheStats:
+
+    def test_no_usage(self, transport):
+        r = SimpleNamespace(usage=None)
+        assert transport.extract_cache_stats(r) is None
+
+    def test_no_details(self, transport):
+        r = SimpleNamespace(usage=SimpleNamespace(prompt_tokens_details=None))
+        assert transport.extract_cache_stats(r) is None
+
+    def test_with_cache(self, transport):
+        details = SimpleNamespace(cached_tokens=500, cache_write_tokens=100)
+        r = SimpleNamespace(usage=SimpleNamespace(prompt_tokens_details=details))
+        result = transport.extract_cache_stats(r)
+        assert result == {"cached_tokens": 500, "creation_tokens": 100}
diff --git a/tests/agent/transports/test_codex_transport.py b/tests/agent/transports/test_codex_transport.py
new file mode 100644
index 0000000000..f97c913af2
--- /dev/null
+++ b/tests/agent/transports/test_codex_transport.py
@@ -0,0 +1,220 @@
+"""Tests for the ResponsesApiTransport (Codex)."""
+
+import json
+import pytest
+from types import SimpleNamespace
+
+from agent.transports import get_transport
+from agent.transports.types import NormalizedResponse, ToolCall
+
+
+@pytest.fixture
+def transport():
+    import agent.transports.codex  # noqa: F401
+    return get_transport("codex_responses")
+
+
+class TestCodexTransportBasic:
+
+    def test_api_mode(self, transport):
+        assert transport.api_mode == "codex_responses"
+
+    def test_registered_on_import(self, transport):
+        assert transport is not None
+
+    def test_convert_tools(self, transport):
+        tools = [{
+            "type": "function",
+            "function": {
+                "name": "terminal",
+                "description": "Run a command",
+                "parameters": {"type": "object", "properties": {"command": {"type": "string"}}},
+            }
+        }]
+        result = transport.convert_tools(tools)
+        assert len(result) == 1
+        assert result[0]["type"] == "function"
+        assert result[0]["name"] == "terminal"
+
+
+class TestCodexBuildKwargs:
+
+    def test_basic_kwargs(self, transport):
+        messages = [
+            {"role": "system", "content": "You are helpful."},
+            {"role": "user", "content": "Hello"},
+        ]
+        kw = transport.build_kwargs(
+            model="gpt-5.4",
+            messages=messages,
+            tools=[],
+        )
+        assert kw["model"] == "gpt-5.4"
+        assert kw["instructions"] == "You are helpful."
+        assert "input" in kw
+        assert kw["store"] is False
+
+    def test_system_extracted_from_messages(self, transport):
+        messages = [
+            {"role": "system", "content": "Custom system prompt"},
+            {"role": "user", "content": "Hi"},
+        ]
+        kw = transport.build_kwargs(model="gpt-5.4", messages=messages, tools=[])
+        assert kw["instructions"] == "Custom system prompt"
+
+    def test_no_system_uses_default(self, transport):
+        messages = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(model="gpt-5.4", messages=messages, tools=[])
+        assert kw["instructions"]  # should be non-empty default
+
+    def test_reasoning_config(self, transport):
+        messages = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gpt-5.4", messages=messages, tools=[],
+            reasoning_config={"effort": "high"},
+        )
+        assert kw.get("reasoning", {}).get("effort") == "high"
+
+    def test_reasoning_disabled(self, transport):
+        messages = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gpt-5.4", messages=messages, tools=[],
+            reasoning_config={"enabled": False},
+        )
+        assert "reasoning" not in kw or kw.get("include") == []
+
+    def test_session_id_sets_cache_key(self, transport):
+        messages = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gpt-5.4", messages=messages, tools=[],
+            session_id="test-session-123",
+        )
+        assert kw.get("prompt_cache_key") == "test-session-123"
+
+    def test_github_responses_no_cache_key(self, transport):
+        messages = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gpt-5.4", messages=messages, tools=[],
+            session_id="test-session",
+            is_github_responses=True,
+        )
+        assert "prompt_cache_key" not in kw
+
+    def test_max_tokens(self, transport):
+        messages = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gpt-5.4", messages=messages, tools=[],
+            max_tokens=4096,
+        )
+        assert kw.get("max_output_tokens") == 4096
+
+    def test_codex_backend_no_max_output_tokens(self, transport):
+        messages = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gpt-5.4", messages=messages, tools=[],
+            max_tokens=4096,
+            is_codex_backend=True,
+        )
+        assert "max_output_tokens" not in kw
+
+    def test_xai_headers(self, transport):
+        messages = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="grok-3", messages=messages, tools=[],
+            session_id="conv-123",
+            is_xai_responses=True,
+        )
+        assert kw.get("extra_headers", {}).get("x-grok-conv-id") == "conv-123"
+
+    def test_minimal_effort_clamped(self, transport):
+        messages = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gpt-5.4", messages=messages, tools=[],
+            reasoning_config={"effort": "minimal"},
+        )
+        # "minimal" should be clamped to "low"
+        assert kw.get("reasoning", {}).get("effort") == "low"
+
+
+class TestCodexValidateResponse:
+
+    def test_none_response(self, transport):
+        assert transport.validate_response(None) is False
+
+    def test_empty_output(self, transport):
+        r = SimpleNamespace(output=[], output_text=None)
+        assert transport.validate_response(r) is False
+
+    def test_valid_output(self, transport):
+        r = SimpleNamespace(output=[{"type": "message", "content": []}])
+        assert transport.validate_response(r) is True
+
+    def test_output_text_fallback_not_valid(self, transport):
+        """validate_response is strict — output_text doesn't make it valid.
+        The caller handles output_text fallback with diagnostic logging."""
+        r = SimpleNamespace(output=None, output_text="Some text")
+        assert transport.validate_response(r) is False
+
+
+class TestCodexMapFinishReason:
+
+    def test_completed(self, transport):
+        assert transport.map_finish_reason("completed") == "stop"
+
+    def test_incomplete(self, transport):
+        assert transport.map_finish_reason("incomplete") == "length"
+
+    def test_failed(self, transport):
+        assert transport.map_finish_reason("failed") == "stop"
+
+    def test_unknown(self, transport):
+        assert transport.map_finish_reason("unknown_status") == "stop"
+
+
+class TestCodexNormalizeResponse:
+
+    def test_text_response(self, transport):
+        """Normalize a simple text Codex response."""
+        r = SimpleNamespace(
+            output=[
+                SimpleNamespace(
+                    type="message",
+                    role="assistant",
+                    content=[SimpleNamespace(type="output_text", text="Hello world")],
+                    status="completed",
+                ),
+            ],
+            status="completed",
+            incomplete_details=None,
+            usage=SimpleNamespace(input_tokens=10, output_tokens=5,
+                                  input_tokens_details=None, output_tokens_details=None),
+        )
+        nr = transport.normalize_response(r)
+        assert isinstance(nr, NormalizedResponse)
+        assert nr.content == "Hello world"
+        assert nr.finish_reason == "stop"
+
+    def test_tool_call_response(self, transport):
+        """Normalize a Codex response with tool calls."""
+        r = SimpleNamespace(
+            output=[
+                SimpleNamespace(
+                    type="function_call",
+                    call_id="call_abc123",
+                    name="terminal",
+                    arguments=json.dumps({"command": "ls"}),
+                    id="fc_abc123",
+                    status="completed",
+                ),
+            ],
+            status="completed",
+            incomplete_details=None,
+            usage=SimpleNamespace(input_tokens=10, output_tokens=20,
+                                  input_tokens_details=None, output_tokens_details=None),
+        )
+        nr = transport.normalize_response(r)
+        assert nr.finish_reason == "tool_calls"
+        assert len(nr.tool_calls) == 1
+        tc = nr.tool_calls[0]
+        assert tc.name == "terminal"
+        assert '"command"' in tc.arguments
diff --git a/tests/agent/transports/test_transport.py b/tests/agent/transports/test_transport.py
new file mode 100644
index 0000000000..b51336d962
--- /dev/null
+++ b/tests/agent/transports/test_transport.py
@@ -0,0 +1,220 @@
+"""Tests for the transport ABC, registry, and AnthropicTransport."""
+
+import pytest
+from types import SimpleNamespace
+from unittest.mock import MagicMock
+
+from agent.transports.base import ProviderTransport
+from agent.transports.types import NormalizedResponse, ToolCall, Usage
+from agent.transports import get_transport, register_transport, _REGISTRY
+
+
+# ── ABC contract tests ──────────────────────────────────────────────────
+
+class TestProviderTransportABC:
+    """Verify the ABC contract is enforceable."""
+
+    def test_cannot_instantiate_abc(self):
+        with pytest.raises(TypeError):
+            ProviderTransport()
+
+    def test_concrete_must_implement_all_abstract(self):
+        class Incomplete(ProviderTransport):
+            @property
+            def api_mode(self):
+                return "test"
+        with pytest.raises(TypeError):
+            Incomplete()
+
+    def test_minimal_concrete(self):
+        class Minimal(ProviderTransport):
+            @property
+            def api_mode(self):
+                return "test_minimal"
+            def convert_messages(self, messages, **kw):
+                return messages
+            def convert_tools(self, tools):
+                return tools
+            def build_kwargs(self, model, messages, tools=None, **params):
+                return {"model": model, "messages": messages}
+            def normalize_response(self, response, **kw):
+                return NormalizedResponse(content="ok", tool_calls=None, finish_reason="stop")
+
+        t = Minimal()
+        assert t.api_mode == "test_minimal"
+        assert t.validate_response(None) is True  # default
+        assert t.extract_cache_stats(None) is None  # default
+        assert t.map_finish_reason("end_turn") == "end_turn"  # default passthrough
+
+
+# ── Registry tests ───────────────────────────────────────────────────────
+
+class TestTransportRegistry:
+
+    def test_get_unregistered_returns_none(self):
+        assert get_transport("nonexistent_mode") is None
+
+    def test_anthropic_registered_on_import(self):
+        import agent.transports.anthropic  # noqa: F401
+        t = get_transport("anthropic_messages")
+        assert t is not None
+        assert t.api_mode == "anthropic_messages"
+
+    def test_register_and_get(self):
+        class DummyTransport(ProviderTransport):
+            @property
+            def api_mode(self):
+                return "dummy_test"
+            def convert_messages(self, messages, **kw):
+                return messages
+            def convert_tools(self, tools):
+                return tools
+            def build_kwargs(self, model, messages, tools=None, **params):
+                return {}
+            def normalize_response(self, response, **kw):
+                return NormalizedResponse(content=None, tool_calls=None, finish_reason="stop")
+
+        register_transport("dummy_test", DummyTransport)
+        t = get_transport("dummy_test")
+        assert t.api_mode == "dummy_test"
+        # Cleanup
+        _REGISTRY.pop("dummy_test", None)
+
+
+# ── AnthropicTransport tests ────────────────────────────────────────────
+
+class TestAnthropicTransport:
+
+    @pytest.fixture
+    def transport(self):
+        import agent.transports.anthropic  # noqa: F401
+        return get_transport("anthropic_messages")
+
+    def test_api_mode(self, transport):
+        assert transport.api_mode == "anthropic_messages"
+
+    def test_convert_tools_simple(self, transport):
+        tools = [{
+            "type": "function",
+            "function": {
+                "name": "test_tool",
+                "description": "A test",
+                "parameters": {"type": "object", "properties": {}},
+            }
+        }]
+        result = transport.convert_tools(tools)
+        assert len(result) == 1
+        assert result[0]["name"] == "test_tool"
+        assert "input_schema" in result[0]
+
+    def test_validate_response_none(self, transport):
+        assert transport.validate_response(None) is False
+
+    def test_validate_response_empty_content(self, transport):
+        r = SimpleNamespace(content=[])
+        assert transport.validate_response(r) is False
+
+    def test_validate_response_valid(self, transport):
+        r = SimpleNamespace(content=[SimpleNamespace(type="text", text="hello")])
+        assert transport.validate_response(r) is True
+
+    def test_map_finish_reason(self, transport):
+        assert transport.map_finish_reason("end_turn") == "stop"
+        assert transport.map_finish_reason("tool_use") == "tool_calls"
+        assert transport.map_finish_reason("max_tokens") == "length"
+        assert transport.map_finish_reason("stop_sequence") == "stop"
+        assert transport.map_finish_reason("refusal") == "content_filter"
+        assert transport.map_finish_reason("model_context_window_exceeded") == "length"
+        assert transport.map_finish_reason("unknown") == "stop"
+
+    def test_extract_cache_stats_none_usage(self, transport):
+        r = SimpleNamespace(usage=None)
+        assert transport.extract_cache_stats(r) is None
+
+    def test_extract_cache_stats_with_cache(self, transport):
+        usage = SimpleNamespace(cache_read_input_tokens=100, cache_creation_input_tokens=50)
+        r = SimpleNamespace(usage=usage)
+        result = transport.extract_cache_stats(r)
+        assert result == {"cached_tokens": 100, "creation_tokens": 50}
+
+    def test_extract_cache_stats_zero(self, transport):
+        usage = SimpleNamespace(cache_read_input_tokens=0, cache_creation_input_tokens=0)
+        r = SimpleNamespace(usage=usage)
+        assert transport.extract_cache_stats(r) is None
+
+    def test_normalize_response_text(self, transport):
+        """Test normalization of a simple text response."""
+        r = SimpleNamespace(
+            content=[SimpleNamespace(type="text", text="Hello world")],
+            stop_reason="end_turn",
+            usage=SimpleNamespace(input_tokens=10, output_tokens=5),
+            model="claude-sonnet-4-6",
+        )
+        nr = transport.normalize_response(r)
+        assert isinstance(nr, NormalizedResponse)
+        assert nr.content == "Hello world"
+        assert nr.tool_calls is None or nr.tool_calls == []
+        assert nr.finish_reason == "stop"
+
+    def test_normalize_response_tool_calls(self, transport):
+        """Test normalization of a tool-use response."""
+        r = SimpleNamespace(
+            content=[
+                SimpleNamespace(
+                    type="tool_use",
+                    id="toolu_123",
+                    name="terminal",
+                    input={"command": "ls"},
+                ),
+            ],
+            stop_reason="tool_use",
+            usage=SimpleNamespace(input_tokens=10, output_tokens=20),
+            model="claude-sonnet-4-6",
+        )
+        nr = transport.normalize_response(r)
+        assert nr.finish_reason == "tool_calls"
+        assert len(nr.tool_calls) == 1
+        tc = nr.tool_calls[0]
+        assert tc.name == "terminal"
+        assert tc.id == "toolu_123"
+        assert '"command"' in tc.arguments
+
+    def test_normalize_response_thinking(self, transport):
+        """Test normalization preserves thinking content."""
+        r = SimpleNamespace(
+            content=[
+                SimpleNamespace(type="thinking", thinking="Let me think..."),
+                SimpleNamespace(type="text", text="The answer is 42"),
+            ],
+            stop_reason="end_turn",
+            usage=SimpleNamespace(input_tokens=10, output_tokens=15),
+            model="claude-sonnet-4-6",
+        )
+        nr = transport.normalize_response(r)
+        assert nr.content == "The answer is 42"
+        assert nr.reasoning == "Let me think..."
+
+    def test_build_kwargs_returns_dict(self, transport):
+        """Test build_kwargs produces a usable kwargs dict."""
+        messages = [{"role": "user", "content": "Hello"}]
+        kw = transport.build_kwargs(
+            model="claude-sonnet-4-6",
+            messages=messages,
+            max_tokens=1024,
+        )
+        assert isinstance(kw, dict)
+        assert "model" in kw
+        assert "max_tokens" in kw
+        assert "messages" in kw
+
+    def test_convert_messages_extracts_system(self, transport):
+        """Test convert_messages separates system from messages."""
+        messages = [
+            {"role": "system", "content": "You are helpful."},
+            {"role": "user", "content": "Hi"},
+        ]
+        system, msgs = transport.convert_messages(messages)
+        # System should be extracted
+        assert system is not None
+        # Messages should only have user
+        assert len(msgs) >= 1
diff --git a/tests/agent/transports/test_types.py b/tests/agent/transports/test_types.py
new file mode 100644
index 0000000000..0be18c688c
--- /dev/null
+++ b/tests/agent/transports/test_types.py
@@ -0,0 +1,151 @@
+"""Tests for agent/transports/types.py — dataclass construction + helpers."""
+
+import json
+import pytest
+
+from agent.transports.types import (
+    NormalizedResponse,
+    ToolCall,
+    Usage,
+    build_tool_call,
+    map_finish_reason,
+)
+
+
+# ---------------------------------------------------------------------------
+# ToolCall
+# ---------------------------------------------------------------------------
+
+class TestToolCall:
+    def test_basic_construction(self):
+        tc = ToolCall(id="call_abc", name="terminal", arguments='{"cmd": "ls"}')
+        assert tc.id == "call_abc"
+        assert tc.name == "terminal"
+        assert tc.arguments == '{"cmd": "ls"}'
+        assert tc.provider_data is None
+
+    def test_none_id(self):
+        tc = ToolCall(id=None, name="read_file", arguments="{}")
+        assert tc.id is None
+
+    def test_provider_data(self):
+        tc = ToolCall(
+            id="call_x",
+            name="t",
+            arguments="{}",
+            provider_data={"call_id": "call_x", "response_item_id": "fc_x"},
+        )
+        assert tc.provider_data["call_id"] == "call_x"
+        assert tc.provider_data["response_item_id"] == "fc_x"
+
+
+# ---------------------------------------------------------------------------
+# Usage
+# ---------------------------------------------------------------------------
+
+class TestUsage:
+    def test_defaults(self):
+        u = Usage()
+        assert u.prompt_tokens == 0
+        assert u.completion_tokens == 0
+        assert u.total_tokens == 0
+        assert u.cached_tokens == 0
+
+    def test_explicit(self):
+        u = Usage(prompt_tokens=100, completion_tokens=50, total_tokens=150, cached_tokens=80)
+        assert u.total_tokens == 150
+
+
+# ---------------------------------------------------------------------------
+# NormalizedResponse
+# ---------------------------------------------------------------------------
+
+class TestNormalizedResponse:
+    def test_text_only(self):
+        r = NormalizedResponse(content="hello", tool_calls=None, finish_reason="stop")
+        assert r.content == "hello"
+        assert r.tool_calls is None
+        assert r.finish_reason == "stop"
+        assert r.reasoning is None
+        assert r.usage is None
+        assert r.provider_data is None
+
+    def test_with_tool_calls(self):
+        tcs = [ToolCall(id="call_1", name="terminal", arguments='{"cmd":"pwd"}')]
+        r = NormalizedResponse(content=None, tool_calls=tcs, finish_reason="tool_calls")
+        assert r.finish_reason == "tool_calls"
+        assert len(r.tool_calls) == 1
+        assert r.tool_calls[0].name == "terminal"
+
+    def test_with_reasoning(self):
+        r = NormalizedResponse(
+            content="answer",
+            tool_calls=None,
+            finish_reason="stop",
+            reasoning="I thought about it",
+        )
+        assert r.reasoning == "I thought about it"
+
+    def test_with_provider_data(self):
+        r = NormalizedResponse(
+            content=None,
+            tool_calls=None,
+            finish_reason="stop",
+            provider_data={"reasoning_details": [{"type": "thinking", "thinking": "hmm"}]},
+        )
+        assert r.provider_data["reasoning_details"][0]["type"] == "thinking"
+
+
+# ---------------------------------------------------------------------------
+# build_tool_call
+# ---------------------------------------------------------------------------
+
+class TestBuildToolCall:
+    def test_dict_arguments_serialized(self):
+        tc = build_tool_call(id="call_1", name="terminal", arguments={"cmd": "ls"})
+        assert tc.arguments == json.dumps({"cmd": "ls"})
+        assert tc.provider_data is None
+
+    def test_string_arguments_passthrough(self):
+        tc = build_tool_call(id="call_2", name="read_file", arguments='{"path": "/tmp"}')
+        assert tc.arguments == '{"path": "/tmp"}'
+
+    def test_provider_fields(self):
+        tc = build_tool_call(
+            id="call_3",
+            name="terminal",
+            arguments="{}",
+            call_id="call_3",
+            response_item_id="fc_3",
+        )
+        assert tc.provider_data == {"call_id": "call_3", "response_item_id": "fc_3"}
+
+    def test_none_id(self):
+        tc = build_tool_call(id=None, name="t", arguments="{}")
+        assert tc.id is None
+
+
+# ---------------------------------------------------------------------------
+# map_finish_reason
+# ---------------------------------------------------------------------------
+
+class TestMapFinishReason:
+    ANTHROPIC_MAP = {
+        "end_turn": "stop",
+        "tool_use": "tool_calls",
+        "max_tokens": "length",
+        "stop_sequence": "stop",
+        "refusal": "content_filter",
+    }
+
+    def test_known_reason(self):
+        assert map_finish_reason("end_turn", self.ANTHROPIC_MAP) == "stop"
+        assert map_finish_reason("tool_use", self.ANTHROPIC_MAP) == "tool_calls"
+        assert map_finish_reason("max_tokens", self.ANTHROPIC_MAP) == "length"
+        assert map_finish_reason("refusal", self.ANTHROPIC_MAP) == "content_filter"
+
+    def test_unknown_reason_defaults_to_stop(self):
+        assert map_finish_reason("something_new", self.ANTHROPIC_MAP) == "stop"
+
+    def test_none_reason(self):
+        assert map_finish_reason(None, self.ANTHROPIC_MAP) == "stop"
diff --git a/tests/cli/test_cli_approval_ui.py b/tests/cli/test_cli_approval_ui.py
index 205e316083..5be1c0ca04 100644
--- a/tests/cli/test_cli_approval_ui.py
+++ b/tests/cli/test_cli_approval_ui.py
@@ -254,3 +254,88 @@ class TestCliApprovalUi:
 
         # Command got truncated with a marker.
         assert "(command truncated" in rendered
+
+
+class TestApprovalCallbackThreadLocalWiring:
+    """Regression guard for the thread-local callback freeze (#13617 / #13618).
+
+    After 62348cff made _approval_callback / _sudo_password_callback thread-local
+    (ACP GHSA-qg5c-hvr5-hjgr), the CLI agent thread could no longer see callbacks
+    registered in the main thread — the dangerous-command prompt silently fell
+    back to stdin input() and deadlocked against prompt_toolkit. The fix is to
+    register the callbacks INSIDE the agent worker thread (matching the ACP
+    pattern). These tests lock in that invariant.
+    """
+
+    def test_main_thread_registration_is_invisible_to_child_thread(self):
+        """Confirms the underlying threading.local semantics that drove the bug.
+
+        If this ever starts passing as "visible", the thread-local isolation
+        is gone and the ACP race GHSA-qg5c-hvr5-hjgr may be back.
+        """
+        from tools.terminal_tool import (
+            set_approval_callback,
+            _get_approval_callback,
+        )
+
+        def main_cb(_cmd, _desc):
+            return "once"
+
+        set_approval_callback(main_cb)
+        try:
+            seen = {}
+
+            def _child():
+                seen["value"] = _get_approval_callback()
+
+            t = threading.Thread(target=_child, daemon=True)
+            t.start()
+            t.join(timeout=2)
+            assert seen["value"] is None
+        finally:
+            set_approval_callback(None)
+
+    def test_child_thread_registration_is_visible_and_cleared_in_finally(self):
+        """The fix pattern: register INSIDE the worker thread, clear in finally.
+
+        This is exactly what cli.py's run_agent() closure does. If this test
+        fails, the CLI approval prompt freeze (#13617) has regressed.
+        """
+        from tools.terminal_tool import (
+            set_approval_callback,
+            set_sudo_password_callback,
+            _get_approval_callback,
+            _get_sudo_password_callback,
+        )
+
+        def approval_cb(_cmd, _desc):
+            return "once"
+
+        def sudo_cb():
+            return "hunter2"
+
+        seen = {}
+
+        def _worker():
+            # Mimic cli.py's run_agent() thread target.
+            set_approval_callback(approval_cb)
+            set_sudo_password_callback(sudo_cb)
+            try:
+                seen["approval"] = _get_approval_callback()
+                seen["sudo"] = _get_sudo_password_callback()
+            finally:
+                set_approval_callback(None)
+                set_sudo_password_callback(None)
+                seen["approval_after"] = _get_approval_callback()
+                seen["sudo_after"] = _get_sudo_password_callback()
+
+        t = threading.Thread(target=_worker, daemon=True)
+        t.start()
+        t.join(timeout=2)
+
+        assert seen["approval"] is approval_cb
+        assert seen["sudo"] is sudo_cb
+        # Finally block must clear both slots — otherwise a reused thread
+        # would hold a stale reference to a disposed CLI instance.
+        assert seen["approval_after"] is None
+        assert seen["sudo_after"] is None
diff --git a/tests/cli/test_cli_external_editor.py b/tests/cli/test_cli_external_editor.py
new file mode 100644
index 0000000000..082c5e40fb
--- /dev/null
+++ b/tests/cli/test_cli_external_editor.py
@@ -0,0 +1,105 @@
+"""Tests for CLI external-editor support."""
+
+from unittest.mock import patch
+
+from cli import HermesCLI
+
+
+class _FakeBuffer:
+    def __init__(self, text=""):
+        self.calls = []
+        self.text = text
+        self.cursor_position = len(text)
+
+    def open_in_editor(self, validate_and_handle=False):
+        self.calls.append(validate_and_handle)
+
+
+class _FakeApp:
+    def __init__(self):
+        self.current_buffer = _FakeBuffer()
+
+
+def _make_cli(with_app=True):
+    cli_obj = HermesCLI.__new__(HermesCLI)
+    cli_obj._app = _FakeApp() if with_app else None
+    cli_obj._command_running = False
+    cli_obj._command_status = ""
+    cli_obj._command_display = ""
+    cli_obj._sudo_state = None
+    cli_obj._secret_state = None
+    cli_obj._approval_state = None
+    cli_obj._clarify_state = None
+    cli_obj._skip_paste_collapse = False
+    return cli_obj
+
+def test_open_external_editor_uses_prompt_toolkit_buffer_editor():
+    cli_obj = _make_cli()
+
+    assert cli_obj._open_external_editor() is True
+    assert cli_obj._app.current_buffer.calls == [False]
+
+
+def test_open_external_editor_rejects_when_no_tui():
+    cli_obj = _make_cli(with_app=False)
+
+    with patch("cli._cprint") as mock_cprint:
+        assert cli_obj._open_external_editor() is False
+
+    assert mock_cprint.called
+    assert "interactive cli" in str(mock_cprint.call_args).lower()
+
+
+def test_open_external_editor_rejects_modal_prompts():
+    cli_obj = _make_cli()
+    cli_obj._approval_state = {"selected": 0}
+
+    with patch("cli._cprint") as mock_cprint:
+        assert cli_obj._open_external_editor() is False
+
+    assert mock_cprint.called
+    assert "active prompt" in str(mock_cprint.call_args).lower()
+
+def test_open_external_editor_uses_explicit_buffer_when_provided():
+    cli_obj = _make_cli()
+    external_buffer = _FakeBuffer()
+
+    assert cli_obj._open_external_editor(buffer=external_buffer) is True
+    assert external_buffer.calls == [False]
+    assert cli_obj._app.current_buffer.calls == []
+
+
+def test_expand_paste_references_replaces_placeholder_with_file_contents(tmp_path):
+    cli_obj = _make_cli()
+    paste_file = tmp_path / "paste.txt"
+    paste_file.write_text("line one\nline two", encoding="utf-8")
+
+    text = f"before [Pasted text #1: 2 lines → {paste_file}] after"
+    expanded = cli_obj._expand_paste_references(text)
+
+    assert expanded == "before line one\nline two after"
+
+
+def test_open_external_editor_expands_paste_placeholders_before_open(tmp_path):
+    cli_obj = _make_cli()
+    paste_file = tmp_path / "paste.txt"
+    paste_file.write_text("alpha\nbeta", encoding="utf-8")
+    buffer = _FakeBuffer(text=f"[Pasted text #1: 2 lines → {paste_file}]")
+
+    assert cli_obj._open_external_editor(buffer=buffer) is True
+    assert buffer.text == "alpha\nbeta"
+    assert buffer.cursor_position == len("alpha\nbeta")
+    assert buffer.calls == [False]
+
+
+def test_open_external_editor_sets_skip_collapse_flag_during_expansion(tmp_path):
+    cli_obj = _make_cli()
+    paste_file = tmp_path / "paste.txt"
+    paste_file.write_text("a\nb\nc\nd\ne\nf", encoding="utf-8")
+    buffer = _FakeBuffer(text=f"[Pasted text #1: 6 lines \u2192 {paste_file}]")
+
+    # After expansion the flag should have been set (to prevent re-collapse)
+    assert cli_obj._open_external_editor(buffer=buffer) is True
+    # Flag is consumed by _on_text_changed, but since no handler is attached
+    # in tests it stays True until the handler resets it.
+    assert cli_obj._skip_paste_collapse is True
diff --git a/tests/cli/test_cli_file_drop.py b/tests/cli/test_cli_file_drop.py
index 78503de8d7..fa6aac1ed1 100644
--- a/tests/cli/test_cli_file_drop.py
+++ b/tests/cli/test_cli_file_drop.py
@@ -147,6 +147,37 @@ class TestEscapedSpaces:
         assert result["path"] == tmp_image_with_spaces
         assert result["remainder"] == "what is this?"
 
+    def test_unquoted_spaces_in_path(self, tmp_image_with_spaces):
+        result = _detect_file_drop(str(tmp_image_with_spaces))
+        assert result is not None
+        assert result["path"] == tmp_image_with_spaces
+        assert result["is_image"] is True
+        assert result["remainder"] == ""
+
+    def test_unquoted_spaces_with_trailing_text(self, tmp_image_with_spaces):
+        user_input = f"{tmp_image_with_spaces} what is this?"
+        result = _detect_file_drop(user_input)
+        assert result is not None
+        assert result["path"] == tmp_image_with_spaces
+        assert result["remainder"] == "what is this?"
+
+    def test_mixed_escaped_and_literal_spaces_in_path(self, tmp_path):
+        img = tmp_path / "Screenshot 2026-04-21 at 1.04.43 PM.png"
+        img.write_bytes(b"\x89PNG\r\n\x1a\n")
+        mixed = str(img).replace("Screenshot ", "Screenshot\\ ").replace("2026-04-21 ", "2026-04-21\\ ").replace("at ", "at\\ ")
+        result = _detect_file_drop(mixed)
+        assert result is not None
+        assert result["path"] == img
+        assert result["is_image"] is True
+        assert result["remainder"] == ""
+
+    def test_file_uri_image_path(self, tmp_image_with_spaces):
+        uri = tmp_image_with_spaces.as_uri()
+        result = _detect_file_drop(uri)
+        assert result is not None
+        assert result["path"] == tmp_image_with_spaces
+        assert result["is_image"] is True
+
     def test_tilde_prefixed_path(self, tmp_path, monkeypatch):
         home = tmp_path / "home"
         img = home / "storage" / "shared" / "Pictures" / "cat.png"
diff --git a/tests/cli/test_cli_markdown_rendering.py b/tests/cli/test_cli_markdown_rendering.py
new file mode 100644
index 0000000000..01f0bab6c6
--- /dev/null
+++ b/tests/cli/test_cli_markdown_rendering.py
@@ -0,0 +1,141 @@
+from io import StringIO
+
+from rich.console import Console
+from rich.markdown import Markdown
+
+from cli import _render_final_assistant_content
+
+
+def _render_to_text(renderable) -> str:
+    buf = StringIO()
+    Console(file=buf, width=80, force_terminal=False, color_system=None).print(renderable)
+    return buf.getvalue()
+
+
+def test_final_assistant_content_uses_markdown_renderable():
+    renderable = _render_final_assistant_content("# Title\n\n- one\n- two")
+
+    assert isinstance(renderable, Markdown)
+    output = _render_to_text(renderable)
+    assert "Title" in output
+    assert "one" in output
+    assert "two" in output
+
+
+def test_final_assistant_content_strips_ansi_before_markdown_rendering():
+    renderable = _render_final_assistant_content("\x1b[31m# Title\x1b[0m")
+
+    output = _render_to_text(renderable)
+    assert "Title" in output
+    assert "\x1b" not in output
+
+
+def test_final_assistant_content_can_strip_markdown_syntax():
+    renderable = _render_final_assistant_content(
+        "***Bold italic***\n~~Strike~~\n- item\n# Title\n`code`",
+        mode="strip",
+    )
+
+    output = _render_to_text(renderable)
+    assert "Bold italic" in output
+    assert "Strike" in output
+    assert "item" in output
+    assert "Title" in output
+    assert "code" in output
+    assert "***" not in output
+    assert "~~" not in output
+    assert "`" not in output
+
+
+def test_strip_mode_preserves_lists():
+    renderable = _render_final_assistant_content(
+        "**Formatting**\n- Ran prettier\n- Files changed\n- Verified clean",
+        mode="strip",
+    )
+
+    output = _render_to_text(renderable)
+    assert "- Ran prettier" in output
+    assert "- Files changed" in output
+    assert "- Verified clean" in output
+    assert "**" not in output
+
+
+def test_strip_mode_preserves_ordered_lists():
+    renderable = _render_final_assistant_content(
+        "1. First item\n2. Second item\n3. Third item",
+        mode="strip",
+    )
+
+    output = _render_to_text(renderable)
+    assert "1. First" in output
+    assert "2. Second" in output
+    assert "3. Third" in output
+
+
+def test_strip_mode_preserves_blockquotes():
+    renderable = _render_final_assistant_content(
+        "> This is quoted text\n> Another quoted line",
+        mode="strip",
+    )
+
+    output = _render_to_text(renderable)
+    assert "> This is quoted" in output
+    assert "> Another quoted" in output
+
+
+def test_strip_mode_preserves_checkboxes():
+    renderable = _render_final_assistant_content(
+        "- [ ] Todo item\n- [x] Done item",
+        mode="strip",
+    )
+
+    output = _render_to_text(renderable)
+    assert "- [ ] Todo" in output
+    assert "- [x] Done" in output
+
+
+def test_strip_mode_preserves_table_structure_while_cleaning_cell_markdown():
+    renderable = _render_final_assistant_content(
+        "| Syntax | Example |\n|---|---|\n| Bold | `**bold**` |\n| Strike | `~~strike~~` |",
+        mode="strip",
+    )
+
+    output = _render_to_text(renderable)
+    assert "| Syntax | Example |" in output
+    assert "|---|---|" in output
+    assert "| Bold | bold |" in output
+    assert "| Strike | strike |" in output
+    assert "**" not in output
+    assert "~~" not in output
+    assert "`" not in output
+
+
+def test_final_assistant_content_can_leave_markdown_raw():
+    renderable = _render_final_assistant_content("***Bold italic***", mode="raw")
+
+    output = _render_to_text(renderable)
+    assert "***Bold italic***" in output
+
+
+def test_strip_mode_preserves_intraword_underscores_in_snake_case_identifiers():
+    renderable = _render_final_assistant_content(
+        "Let me look at test_case_with_underscores and SOME_CONST "
+        "then /tmp/snake_case_dir/file_with_name.py",
+        mode="strip",
+    )
+
+    output = _render_to_text(renderable)
+    assert "test_case_with_underscores" in output
+    assert "SOME_CONST" in output
+    assert "snake_case_dir" in output
+    assert "file_with_name" in output
+
+
+def test_strip_mode_still_strips_boundary_underscore_emphasis():
+    renderable = _render_final_assistant_content(
+        "say _hi_ and __bold__ now",
+        mode="strip",
+    )
+
+    output = _render_to_text(renderable)
+    assert "say hi and bold now" in output
diff --git a/tests/cli/test_cli_provider_resolution.py b/tests/cli/test_cli_provider_resolution.py
index fe4153c804..0c9aab82ad 100644
--- a/tests/cli/test_cli_provider_resolution.py
+++ b/tests/cli/test_cli_provider_resolution.py
@@ -207,48 +207,11 @@ def test_cli_turn_routing_uses_primary_when_disabled(monkeypatch):
     shell.api_mode = "chat_completions"
     shell.base_url = "https://openrouter.ai/api/v1"
     shell.api_key = "sk-primary"
-    shell._smart_model_routing = {"enabled": False}
 
     result = shell._resolve_turn_agent_config("what time is it in tokyo?")
 
     assert result["model"] == "gpt-5"
     assert result["runtime"]["provider"] == "openrouter"
-    assert result["label"] is None
-
-
-def test_cli_turn_routing_uses_cheap_model_when_simple(monkeypatch):
-    cli = _import_cli()
-
-    def _runtime_resolve(**kwargs):
-        assert kwargs["requested"] == "zai"
-        return {
-            "provider": "zai",
-            "api_mode": "chat_completions",
-            "base_url": "https://open.z.ai/api/v1",
-            "api_key": "cheap-key",
-            "source": "env/config",
-        }
-
-    monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve)
-
-    shell = cli.HermesCLI(model="anthropic/claude-sonnet-4", compact=True, max_turns=1)
-    shell.provider = "openrouter"
-    shell.api_mode = "chat_completions"
-    shell.base_url = "https://openrouter.ai/api/v1"
-    shell.api_key = "primary-key"
-    shell._smart_model_routing = {
-        "enabled": True,
-        "cheap_model": {"provider": "zai", "model": "glm-5-air"},
-        "max_simple_chars": 160,
-        "max_simple_words": 28,
-    }
-
-    result = shell._resolve_turn_agent_config("what time is it in tokyo?")
-
-    assert result["model"] == "glm-5-air"
-    assert result["runtime"]["provider"] == "zai"
-    assert result["runtime"]["api_key"] == "cheap-key"
-    assert result["label"] is not None
 
 
 def test_cli_prefers_config_provider_over_stale_env_override(monkeypatch):
diff --git a/tests/cli/test_cli_steer_busy_path.py b/tests/cli/test_cli_steer_busy_path.py
new file mode 100644
index 0000000000..071c741fbe
--- /dev/null
+++ b/tests/cli/test_cli_steer_busy_path.py
@@ -0,0 +1,146 @@
+"""Regression tests for classic-CLI mid-run /steer dispatch.
+
+Background
+----------
+/steer sent while the agent is running used to be queued through
+``self._pending_input`` alongside ordinary user input.  ``process_loop``
+pulls from that queue and calls ``process_command()`` — but while the
+agent is running, ``process_loop`` is blocked inside ``self.chat()``.
+By the time the queued /steer was pulled, ``_agent_running`` had
+already flipped back to False, so ``process_command()`` took the idle
+fallback (``"No agent running; queued as next turn"``) and delivered
+the steer as an ordinary next-turn message.
+
+The fix dispatches /steer inline on the UI thread when the agent is
+running — matching the existing pattern for /model — so the steer
+reaches ``agent.steer()`` (thread-safe) without touching the queue.
+
+These tests exercise the detector + inline dispatch without starting a
+prompt_toolkit app.
+"""
+
+from __future__ import annotations
+
+import importlib
+import sys
+from unittest.mock import MagicMock, patch
+
+
+def _make_cli():
+    """Create a HermesCLI instance with prompt_toolkit stubbed out."""
+    _clean_config = {
+        "model": {
+            "default": "anthropic/claude-opus-4.6",
+            "base_url": "https://openrouter.ai/api/v1",
+            "provider": "auto",
+        },
+        "display": {"compact": False, "tool_progress": "all"},
+        "agent": {},
+        "terminal": {"env_type": "local"},
+    }
+    clean_env = {"LLM_MODEL": "", "HERMES_MAX_ITERATIONS": ""}
+    prompt_toolkit_stubs = {
+        "prompt_toolkit": MagicMock(),
+        "prompt_toolkit.history": MagicMock(),
+        "prompt_toolkit.styles": MagicMock(),
+        "prompt_toolkit.patch_stdout": MagicMock(),
+        "prompt_toolkit.application": MagicMock(),
+        "prompt_toolkit.layout": MagicMock(),
+        "prompt_toolkit.layout.processors": MagicMock(),
+        "prompt_toolkit.filters": MagicMock(),
+        "prompt_toolkit.layout.dimension": MagicMock(),
+        "prompt_toolkit.layout.menus": MagicMock(),
+        "prompt_toolkit.widgets": MagicMock(),
+        "prompt_toolkit.key_binding": MagicMock(),
+        "prompt_toolkit.completion": MagicMock(),
+        "prompt_toolkit.formatted_text": MagicMock(),
+        "prompt_toolkit.auto_suggest": MagicMock(),
+    }
+    with patch.dict(sys.modules, prompt_toolkit_stubs), patch.dict(
+        "os.environ", clean_env, clear=False
+    ):
+        import cli as _cli_mod
+
+        _cli_mod = importlib.reload(_cli_mod)
+        with patch.object(_cli_mod, "get_tool_definitions", return_value=[]), patch.dict(
+            _cli_mod.__dict__, {"CLI_CONFIG": _clean_config}
+        ):
+            return _cli_mod.HermesCLI()
+
+
+class TestSteerInlineDetector:
+    """_should_handle_steer_command_inline gates the busy-path fast dispatch."""
+
+    def test_detects_steer_when_agent_running(self):
+        cli = _make_cli()
+        cli._agent_running = True
+        assert cli._should_handle_steer_command_inline("/steer focus on error handling") is True
+
+    def test_ignores_steer_when_agent_idle(self):
+        """Idle-path /steer should fall through to the normal process_loop
+        dispatch so the queue-style fallback message is emitted."""
+        cli = _make_cli()
+        cli._agent_running = False
+        assert cli._should_handle_steer_command_inline("/steer do something") is False
+
+    def test_ignores_non_slash_input(self):
+        cli = _make_cli()
+        cli._agent_running = True
+        assert cli._should_handle_steer_command_inline("steer without slash") is False
+        assert cli._should_handle_steer_command_inline("") is False
+
+    def test_ignores_other_slash_commands(self):
+        cli = _make_cli()
+        cli._agent_running = True
+        assert cli._should_handle_steer_command_inline("/queue hello") is False
+        assert cli._should_handle_steer_command_inline("/stop") is False
+        assert cli._should_handle_steer_command_inline("/help") is False
+
+    def test_ignores_steer_with_attached_images(self):
+        """Image payloads take the normal path; steer doesn't accept images."""
+        cli = _make_cli()
+        cli._agent_running = True
+        assert cli._should_handle_steer_command_inline("/steer text", has_images=True) is False
+
+
+class TestSteerBusyPathDispatch:
+    """When the detector fires, process_command('/steer ...') must call
+    agent.steer() directly rather than the idle-path fallback."""
+
+    def test_process_command_routes_to_agent_steer(self):
+        """With _agent_running=True and agent.steer present, /steer reaches
+        agent.steer(payload), NOT _pending_input."""
+        cli = _make_cli()
+        cli._agent_running = True
+        cli.agent = MagicMock()
+        cli.agent.steer = MagicMock(return_value=True)
+        # Make sure the idle-path fallback would be observable if taken
+        cli._pending_input = MagicMock()
+
+        cli.process_command("/steer focus on errors")
+
+        cli.agent.steer.assert_called_once_with("focus on errors")
+        cli._pending_input.put.assert_not_called()
+
+    def test_idle_path_queues_as_next_turn(self):
+        """Control — when the agent is NOT running, /steer correctly falls
+        back to next-turn queue semantics.  Demonstrates why the fix was
+        needed: the queue path only works when you can actually drain it."""
+        cli = _make_cli()
+        cli._agent_running = False
+        cli.agent = MagicMock()
+        cli.agent.steer = MagicMock(return_value=True)
+        cli._pending_input = MagicMock()
+
+        cli.process_command("/steer would-be-next-turn")
+
+        # Idle path does NOT call agent.steer
+        cli.agent.steer.assert_not_called()
+        # It puts the payload in the queue as a normal next-turn message
+        cli._pending_input.put.assert_called_once_with("would-be-next-turn")
+
+
+if __name__ == "__main__":  # pragma: no cover
+    import pytest
+
+    pytest.main([__file__, "-v"])
diff --git a/tests/cli/test_cli_user_message_preview.py b/tests/cli/test_cli_user_message_preview.py
new file mode 100644
index 0000000000..f3e84759ee
--- /dev/null
+++ b/tests/cli/test_cli_user_message_preview.py
@@ -0,0 +1,92 @@
+import importlib
+import os
+import sys
+from unittest.mock import MagicMock, patch
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+
+
+_cli_mod = None
+
+
+def _make_cli(user_message_preview=None):
+    global _cli_mod
+    clean_config = {
+        "model": {
+            "default": "anthropic/claude-opus-4.6",
+            "base_url": "https://openrouter.ai/api/v1",
+            "provider": "auto",
+        },
+        "display": {
+            "compact": False,
+            "tool_progress": "all",
+            "user_message_preview": user_message_preview or {"first_lines": 2, "last_lines": 2},
+        },
+        "agent": {},
+        "terminal": {"env_type": "local"},
+    }
+    clean_env = {"LLM_MODEL": "", "HERMES_MAX_ITERATIONS": ""}
+    prompt_toolkit_stubs = {
+        "prompt_toolkit": MagicMock(),
+        "prompt_toolkit.history": MagicMock(),
+        "prompt_toolkit.styles": MagicMock(),
+        "prompt_toolkit.patch_stdout": MagicMock(),
+        "prompt_toolkit.application": MagicMock(),
+        "prompt_toolkit.layout": MagicMock(),
+        "prompt_toolkit.layout.processors": MagicMock(),
+        "prompt_toolkit.filters": MagicMock(),
+        "prompt_toolkit.layout.dimension": MagicMock(),
+        "prompt_toolkit.layout.menus": MagicMock(),
+        "prompt_toolkit.widgets": MagicMock(),
+        "prompt_toolkit.key_binding": MagicMock(),
+        "prompt_toolkit.completion": MagicMock(),
+        "prompt_toolkit.formatted_text": MagicMock(),
+        "prompt_toolkit.auto_suggest": MagicMock(),
+    }
+    with patch.dict(sys.modules, prompt_toolkit_stubs), patch.dict("os.environ", clean_env, clear=False):
+        import cli as mod
+
+        mod = importlib.reload(mod)
+        _cli_mod = mod
+        with patch.object(mod, "get_tool_definitions", return_value=[]), patch.dict(mod.__dict__, {"CLI_CONFIG": clean_config}):
+            return mod.HermesCLI()
+
+
+class TestSubmittedUserMessagePreview:
+    def test_default_preview_shows_first_two_lines_and_last_two_lines(self):
+        cli = _make_cli()
+
+        rendered = cli._format_submitted_user_message_preview(
+            "line1\nline2\nline3\nline4\nline5\nline6"
+        )
+
+        assert "line1" in rendered
+        assert "line2" in rendered
+        assert "line5" in rendered
+        assert "line6" in rendered
+        assert "line3" not in rendered
+        assert "line4" not in rendered
+        assert "(+2 more lines)" in rendered
+
+    def test_preview_can_hide_last_lines(self):
+        cli = _make_cli({"first_lines": 2, "last_lines": 0})
+
+        rendered = cli._format_submitted_user_message_preview(
+            "line1\nline2\nline3\nline4\nline5\nline6"
+        )
+
+        assert "line1" in rendered
+        assert "line2" in rendered
+        assert "line5" not in rendered
+        assert "line6" not in rendered
+        assert "(+4 more lines)" in rendered
+
+    def test_invalid_first_lines_value_falls_back_to_one(self):
+        cli = _make_cli({"first_lines": 0, "last_lines": 2})
+
+        rendered = cli._format_submitted_user_message_preview("line1\nline2\nline3\nline4")
+
+        assert "line1" in rendered
+        assert "line3" in rendered
+        assert "line4" in rendered
+        assert "(+1 more line)" in rendered
diff --git a/tests/cli/test_fast_command.py b/tests/cli/test_fast_command.py
index bc6c8e5fb0..23a1a4aa9f 100644
--- a/tests/cli/test_fast_command.py
+++ b/tests/cli/test_fast_command.py
@@ -183,27 +183,10 @@ class TestFastModeRouting(unittest.TestCase):
             acp_command=None,
             acp_args=[],
             _credential_pool=None,
-            _smart_model_routing={},
             service_tier="priority",
         )
 
-        original_runtime = {
-            "api_key": "***",
-            "base_url": "https://openrouter.ai/api/v1",
-            "provider": "openrouter",
-            "api_mode": "chat_completions",
-            "command": None,
-            "args": [],
-            "credential_pool": None,
-        }
-
-        with patch("agent.smart_model_routing.resolve_turn_route", return_value={
-            "model": "gpt-5.4",
-            "runtime": dict(original_runtime),
-            "label": None,
-            "signature": ("gpt-5.4", "openrouter", "https://openrouter.ai/api/v1", "chat_completions", None, ()),
-        }):
-            route = cli_mod.HermesCLI._resolve_turn_agent_config(stub, "hi")
+        route = cli_mod.HermesCLI._resolve_turn_agent_config(stub, "hi")
 
         # Provider should NOT have changed
         assert route["runtime"]["provider"] == "openrouter"
@@ -222,26 +205,10 @@ class TestFastModeRouting(unittest.TestCase):
             acp_command=None,
             acp_args=[],
             _credential_pool=None,
-            _smart_model_routing={},
             service_tier="priority",
         )
 
-        primary_route = {
-            "model": "gpt-5.3-codex",
-            "runtime": {
-                "api_key": "***",
-                "base_url": "https://openrouter.ai/api/v1",
-                "provider": "openrouter",
-                "api_mode": "chat_completions",
-                "command": None,
-                "args": [],
-                "credential_pool": None,
-            },
-            "label": None,
-            "signature": ("gpt-5.3-codex", "openrouter", "https://openrouter.ai/api/v1", "chat_completions", None, ()),
-        }
-        with patch("agent.smart_model_routing.resolve_turn_route", return_value=primary_route):
-            route = cli_mod.HermesCLI._resolve_turn_agent_config(stub, "hi")
+        route = cli_mod.HermesCLI._resolve_turn_agent_config(stub, "hi")
 
         assert route["runtime"]["provider"] == "openrouter"
         assert route.get("request_overrides") is None
@@ -329,27 +296,10 @@ class TestAnthropicFastMode(unittest.TestCase):
             acp_command=None,
             acp_args=[],
             _credential_pool=None,
-            _smart_model_routing={},
             service_tier="priority",
         )
 
-        original_runtime = {
-            "api_key": "***",
-            "base_url": "https://api.anthropic.com",
-            "provider": "anthropic",
-            "api_mode": "anthropic_messages",
-            "command": None,
-            "args": [],
-            "credential_pool": None,
-        }
-
-        with patch("agent.smart_model_routing.resolve_turn_route", return_value={
-            "model": "claude-opus-4-6",
-            "runtime": dict(original_runtime),
-            "label": None,
-            "signature": ("claude-opus-4-6", "anthropic", "https://api.anthropic.com", "anthropic_messages", None, ()),
-        }):
-            route = cli_mod.HermesCLI._resolve_turn_agent_config(stub, "hi")
+        route = cli_mod.HermesCLI._resolve_turn_agent_config(stub, "hi")
 
         assert route["runtime"]["provider"] == "anthropic"
         assert route["request_overrides"] == {"speed": "fast"}
diff --git a/tests/cli/test_gquota_command.py b/tests/cli/test_gquota_command.py
new file mode 100644
index 0000000000..0740e00126
--- /dev/null
+++ b/tests/cli/test_gquota_command.py
@@ -0,0 +1,21 @@
+from unittest.mock import MagicMock, patch
+
+
+def test_gquota_uses_chat_console_when_tui_is_live():
+    from agent.google_oauth import GoogleOAuthError
+    from cli import HermesCLI
+
+    cli = HermesCLI.__new__(HermesCLI)
+    cli.console = MagicMock()
+    cli._app = object()
+
+    live_console = MagicMock()
+
+    with patch("cli.ChatConsole", return_value=live_console), \
+         patch("agent.google_oauth.get_valid_access_token", side_effect=GoogleOAuthError("No Google OAuth credentials found")), \
+         patch("agent.google_oauth.load_credentials", return_value=None), \
+         patch("agent.google_code_assist.retrieve_user_quota"):
+        cli._handle_gquota_command("/gquota")
+
+    assert live_console.print.call_count == 2
+    cli.console.print.assert_not_called()
diff --git a/tests/cli/test_manual_compress.py b/tests/cli/test_manual_compress.py
index d201f9cee5..9144c94b10 100644
--- a/tests/cli/test_manual_compress.py
+++ b/tests/cli/test_manual_compress.py
@@ -21,6 +21,7 @@ def test_manual_compress_reports_noop_without_success_banner(capsys):
     shell.agent = MagicMock()
     shell.agent.compression_enabled = True
     shell.agent._cached_system_prompt = ""
+    shell.agent.session_id = shell.session_id  # no-op compression: no split
     shell.agent._compress_context.return_value = (list(history), "")
 
     def _estimate(messages):
@@ -48,6 +49,7 @@ def test_manual_compress_explains_when_token_estimate_rises(capsys):
     shell.agent = MagicMock()
     shell.agent.compression_enabled = True
     shell.agent._cached_system_prompt = ""
+    shell.agent.session_id = shell.session_id  # no-op: no split
     shell.agent._compress_context.return_value = (compressed, "")
 
     def _estimate(messages):
@@ -64,3 +66,64 @@ def test_manual_compress_explains_when_token_estimate_rises(capsys):
     assert "✅ Compressed: 4 → 3 messages" in output
     assert "Rough transcript estimate: ~100 → ~120 tokens" in output
     assert "denser summaries" in output
+
+
+def test_manual_compress_syncs_session_id_after_split():
+    """Regression for cli.session_id desync after /compress.
+
+    _compress_context ends the parent session and creates a new child session,
+    mutating agent.session_id. Without syncing, cli.session_id still points
+    at the ended parent — causing /status, /resume, exit summary, and the
+    next end_session() call (e.g. from /resume <id>) to target the wrong row.
+    """
+    shell = _make_cli()
+    history = _make_history()
+    old_id = shell.session_id
+    new_child_id = "20260101_000000_child1"
+
+    compressed = [
+        {"role": "user", "content": "[summary]"},
+        history[-1],
+    ]
+    shell.conversation_history = history
+    shell.agent = MagicMock()
+    shell.agent.compression_enabled = True
+    shell.agent._cached_system_prompt = ""
+    # Simulate _compress_context mutating agent.session_id as a side effect.
+    def _fake_compress(*args, **kwargs):
+        shell.agent.session_id = new_child_id
+        return (compressed, "")
+    shell.agent._compress_context.side_effect = _fake_compress
+    shell.agent.session_id = old_id  # starts in sync
+    shell._pending_title = "stale title"
+
+    with patch("agent.model_metadata.estimate_messages_tokens_rough", return_value=100):
+        shell._manual_compress()
+
+    # CLI session_id must now point at the continuation child, not the parent.
+    assert shell.session_id == new_child_id
+    assert shell.session_id != old_id
+    # Pending title must be cleared — titles belong to the parent lineage and
+    # get regenerated for the continuation.
+    assert shell._pending_title is None
+
+
+def test_manual_compress_no_sync_when_session_id_unchanged():
+    """If compression is a no-op (agent.session_id didn't change), the CLI
+    must NOT clear _pending_title or otherwise disturb session state.
+    """
+    shell = _make_cli()
+    history = _make_history()
+    shell.conversation_history = history
+    shell.agent = MagicMock()
+    shell.agent.compression_enabled = True
+    shell.agent._cached_system_prompt = ""
+    shell.agent.session_id = shell.session_id
+    shell.agent._compress_context.return_value = (list(history), "")
+    shell._pending_title = "keep me"
+
+    with patch("agent.model_metadata.estimate_messages_tokens_rough", return_value=100):
+        shell._manual_compress()
+
+    # No split → pending title untouched.
+    assert shell._pending_title == "keep me"
diff --git a/tests/cli/test_quick_commands.py b/tests/cli/test_quick_commands.py
index 7a89d4ca28..1c94cb1b02 100644
--- a/tests/cli/test_quick_commands.py
+++ b/tests/cli/test_quick_commands.py
@@ -33,6 +33,20 @@ class TestCLIQuickCommands:
         printed = self._printed_plain(cli.console.print.call_args[0][0])
         assert printed == "daily-note"
 
+    def test_exec_command_uses_chat_console_when_tui_is_live(self):
+        cli = self._make_cli({"dn": {"type": "exec", "command": "echo daily-note"}})
+        cli._app = object()
+        live_console = MagicMock()
+
+        with patch("cli.ChatConsole", return_value=live_console):
+            result = cli.process_command("/dn")
+
+        assert result is True
+        live_console.print.assert_called_once()
+        printed = self._printed_plain(live_console.print.call_args[0][0])
+        assert printed == "daily-note"
+        cli.console.print.assert_not_called()
+
     def test_exec_command_stderr_shown_on_no_stdout(self):
         cli = self._make_cli({"err": {"type": "exec", "command": "echo error >&2"}})
         result = cli.process_command("/err")
diff --git a/tests/conftest.py b/tests/conftest.py
index ca4a9a9709..0258e034f9 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -186,6 +186,31 @@ _HERMES_BEHAVIORAL_VARS = frozenset({
     "HERMES_HOME_MODE",
     "BROWSER_CDP_URL",
     "CAMOFOX_URL",
+    # Platform allowlists — not credentials, but if set from any source
+    # (user shell, earlier leaky test, CI env), they change gateway auth
+    # behavior and flake button-authorization tests.
+    "TELEGRAM_ALLOWED_USERS",
+    "DISCORD_ALLOWED_USERS",
+    "WHATSAPP_ALLOWED_USERS",
+    "SLACK_ALLOWED_USERS",
+    "SIGNAL_ALLOWED_USERS",
+    "SIGNAL_GROUP_ALLOWED_USERS",
+    "EMAIL_ALLOWED_USERS",
+    "SMS_ALLOWED_USERS",
+    "MATTERMOST_ALLOWED_USERS",
+    "MATRIX_ALLOWED_USERS",
+    "DINGTALK_ALLOWED_USERS",
+    "FEISHU_ALLOWED_USERS",
+    "WECOM_ALLOWED_USERS",
+    "GATEWAY_ALLOWED_USERS",
+    "GATEWAY_ALLOW_ALL_USERS",
+    "TELEGRAM_ALLOW_ALL_USERS",
+    "DISCORD_ALLOW_ALL_USERS",
+    "WHATSAPP_ALLOW_ALL_USERS",
+    "SLACK_ALLOW_ALL_USERS",
+    "SIGNAL_ALLOW_ALL_USERS",
+    "EMAIL_ALLOW_ALL_USERS",
+    "SMS_ALLOW_ALL_USERS",
 })
 
 
@@ -258,6 +283,107 @@ def _isolate_hermes_home(_hermetic_environment):
     return None
 
 
+# ── Module-level state reset ───────────────────────────────────────────────
+#
+# Python modules are singletons per process, and pytest-xdist workers are
+# long-lived. Module-level dicts/sets (tool registries, approval state,
+# interrupt flags) and ContextVars persist across tests in the same worker,
+# causing tests that pass alone to fail when run with siblings.
+#
+# Each entry in this fixture clears state that belongs to a specific module.
+# New state buckets go here too — this is the single gate that prevents
+# "works alone, flakes in CI" bugs from state leakage.
+#
+# The skill `test-suite-cascade-diagnosis` documents the concrete patterns
+# this closes; the running example was `test_command_guards` failing 12/15
+# CI runs because ``tools.approval._session_approved`` carried approvals
+# from one test's session into another's.
+
+@pytest.fixture(autouse=True)
+def _reset_module_state():
+    """Clear module-level mutable state and ContextVars between tests.
+
+    Keeps state from leaking across tests on the same xdist worker. Modules
+    that don't exist yet (test collection before production import) are
+    skipped silently — production import later creates fresh empty state.
+    """
+    # --- tools.approval — the single biggest source of cross-test pollution ---
+    try:
+        from tools import approval as _approval_mod
+        _approval_mod._session_approved.clear()
+        _approval_mod._session_yolo.clear()
+        _approval_mod._permanent_approved.clear()
+        _approval_mod._pending.clear()
+        _approval_mod._gateway_queues.clear()
+        _approval_mod._gateway_notify_cbs.clear()
+        # ContextVar: reset to empty string so get_current_session_key()
+        # falls through to the env var / default path, matching a fresh
+        # process.
+        _approval_mod._approval_session_key.set("")
+    except Exception:
+        pass
+
+    # --- tools.interrupt — per-thread interrupt flag set ---
+    try:
+        from tools import interrupt as _interrupt_mod
+        with _interrupt_mod._lock:
+            _interrupt_mod._interrupted_threads.clear()
+    except Exception:
+        pass
+
+    # --- gateway.session_context — 9 ContextVars that represent
+    #     the active gateway session. If set in one test and not reset,
+    #     the next test's get_session_env() reads stale values.
+    try:
+        from gateway import session_context as _sc_mod
+        for _cv in (
+            _sc_mod._SESSION_PLATFORM,
+            _sc_mod._SESSION_CHAT_ID,
+            _sc_mod._SESSION_CHAT_NAME,
+            _sc_mod._SESSION_THREAD_ID,
+            _sc_mod._SESSION_USER_ID,
+            _sc_mod._SESSION_USER_NAME,
+            _sc_mod._SESSION_KEY,
+            _sc_mod._CRON_AUTO_DELIVER_PLATFORM,
+            _sc_mod._CRON_AUTO_DELIVER_CHAT_ID,
+            _sc_mod._CRON_AUTO_DELIVER_THREAD_ID,
+        ):
+            _cv.set(_sc_mod._UNSET)
+    except Exception:
+        pass
+
+    # --- tools.env_passthrough — ContextVar<set[str]> with no default ---
+    # LookupError is normal if the test never set it. Setting it to an
+    # empty set unconditionally normalizes the starting state.
+    try:
+        from tools import env_passthrough as _envp_mod
+        _envp_mod._allowed_env_vars_var.set(set())
+    except Exception:
+        pass
+
+    # --- tools.credential_files — ContextVar<dict> ---
+    try:
+        from tools import credential_files as _credf_mod
+        _credf_mod._registered_files_var.set({})
+    except Exception:
+        pass
+
+    # --- tools.file_tools — per-task read history + file-ops cache ---
+    # _read_tracker accumulates per-task_id read history for loop detection,
+    # capped by _READ_HISTORY_CAP. If entries from a prior test persist, the
+    # cap is hit faster than expected and capacity-related tests flake.
+    try:
+        from tools import file_tools as _ft_mod
+        with _ft_mod._read_tracker_lock:
+            _ft_mod._read_tracker.clear()
+        with _ft_mod._file_ops_lock:
+            _ft_mod._file_ops_cache.clear()
+    except Exception:
+        pass
+
+    yield
+
+
 @pytest.fixture()
 def tmp_dir(tmp_path):
     """Provide a temporary directory that is cleaned up automatically."""
diff --git a/tests/cron/test_codex_execution_paths.py b/tests/cron/test_codex_execution_paths.py
index 354c95ddeb..65526f4a8c 100644
--- a/tests/cron/test_codex_execution_paths.py
+++ b/tests/cron/test_codex_execution_paths.py
@@ -152,7 +152,6 @@ def test_gateway_run_agent_codex_path_handles_internal_401_refresh(monkeypatch):
     runner._provider_routing = {}
     runner._fallback_model = None
     runner._running_agents = {}
-    runner._smart_model_routing = {}
     from unittest.mock import MagicMock, AsyncMock
     runner.hooks = MagicMock()
     runner.hooks.emit = AsyncMock()
diff --git a/tests/cron/test_scheduler.py b/tests/cron/test_scheduler.py
index 2717584e46..524490eb09 100644
--- a/tests/cron/test_scheduler.py
+++ b/tests/cron/test_scheduler.py
@@ -772,9 +772,10 @@ class TestRunJobSessionPersistence:
                 pass
 
             def run_conversation(self, *args, **kwargs):
-                seen["platform"] = os.getenv("HERMES_CRON_AUTO_DELIVER_PLATFORM")
-                seen["chat_id"] = os.getenv("HERMES_CRON_AUTO_DELIVER_CHAT_ID")
-                seen["thread_id"] = os.getenv("HERMES_CRON_AUTO_DELIVER_THREAD_ID")
+                from gateway.session_context import get_session_env
+                seen["platform"] = get_session_env("HERMES_CRON_AUTO_DELIVER_PLATFORM") or None
+                seen["chat_id"] = get_session_env("HERMES_CRON_AUTO_DELIVER_CHAT_ID") or None
+                seen["thread_id"] = get_session_env("HERMES_CRON_AUTO_DELIVER_THREAD_ID") or None
                 return {"final_response": "ok"}
 
         with patch("cron.scheduler._hermes_home", tmp_path), \
@@ -1024,7 +1025,7 @@ class TestRunJobSkillBacked:
             "id": "multi-skill-job",
             "name": "multi skill test",
             "prompt": "Combine the results.",
-            "skills": ["blogwatcher", "find-nearby"],
+            "skills": ["blogwatcher", "maps"],
         }
 
         fake_db = MagicMock()
@@ -1057,12 +1058,12 @@ class TestRunJobSkillBacked:
         assert error is None
         assert final_response == "ok"
         assert skill_view_mock.call_count == 2
-        assert [call.args[0] for call in skill_view_mock.call_args_list] == ["blogwatcher", "find-nearby"]
+        assert [call.args[0] for call in skill_view_mock.call_args_list] == ["blogwatcher", "maps"]
 
         prompt_arg = mock_agent.run_conversation.call_args.args[0]
-        assert prompt_arg.index("blogwatcher") < prompt_arg.index("find-nearby")
+        assert prompt_arg.index("blogwatcher") < prompt_arg.index("maps")
         assert "Instructions for blogwatcher." in prompt_arg
-        assert "Instructions for find-nearby." in prompt_arg
+        assert "Instructions for maps." in prompt_arg
         assert "Combine the results." in prompt_arg
 
 
@@ -1175,6 +1176,204 @@ class TestBuildJobPromptSilentHint:
         assert system_pos < prompt_pos
 
 
+class TestParseWakeGate:
+    """Unit tests for _parse_wake_gate — pure function, no side effects."""
+
+    def test_empty_output_wakes(self):
+        from cron.scheduler import _parse_wake_gate
+        assert _parse_wake_gate("") is True
+        assert _parse_wake_gate(None) is True
+
+    def test_whitespace_only_wakes(self):
+        from cron.scheduler import _parse_wake_gate
+        assert _parse_wake_gate("   \n\n  \t\n") is True
+
+    def test_non_json_last_line_wakes(self):
+        from cron.scheduler import _parse_wake_gate
+        assert _parse_wake_gate("hello world") is True
+        assert _parse_wake_gate("line 1\nline 2\nplain text") is True
+
+    def test_json_non_dict_wakes(self):
+        """Bare arrays, numbers, strings must not be interpreted as a gate."""
+        from cron.scheduler import _parse_wake_gate
+        assert _parse_wake_gate("[1, 2, 3]") is True
+        assert _parse_wake_gate("42") is True
+        assert _parse_wake_gate('"wakeAgent"') is True
+
+    def test_wake_gate_false_skips(self):
+        from cron.scheduler import _parse_wake_gate
+        assert _parse_wake_gate('{"wakeAgent": false}') is False
+
+    def test_wake_gate_true_wakes(self):
+        from cron.scheduler import _parse_wake_gate
+        assert _parse_wake_gate('{"wakeAgent": true}') is True
+
+    def test_wake_gate_missing_wakes(self):
+        """A JSON dict without a wakeAgent key defaults to waking."""
+        from cron.scheduler import _parse_wake_gate
+        assert _parse_wake_gate('{"data": {"foo": "bar"}}') is True
+
+    def test_non_boolean_false_still_wakes(self):
+        """Only strict ``False`` skips — truthy/falsy shortcuts are too risky."""
+        from cron.scheduler import _parse_wake_gate
+        assert _parse_wake_gate('{"wakeAgent": 0}') is True
+        assert _parse_wake_gate('{"wakeAgent": null}') is True
+        assert _parse_wake_gate('{"wakeAgent": ""}') is True
+
+    def test_only_last_non_empty_line_parsed(self):
+        from cron.scheduler import _parse_wake_gate
+        multi = 'some log output\nmore output\n{"wakeAgent": false}'
+        assert _parse_wake_gate(multi) is False
+
+    def test_trailing_blank_lines_ignored(self):
+        from cron.scheduler import _parse_wake_gate
+        multi = '{"wakeAgent": false}\n\n\n'
+        assert _parse_wake_gate(multi) is False
+
+    def test_non_last_json_line_does_not_gate(self):
+        """A JSON gate on an earlier line with plain text after it does NOT trigger."""
+        from cron.scheduler import _parse_wake_gate
+        multi = '{"wakeAgent": false}\nactually this is the real output'
+        assert _parse_wake_gate(multi) is True
+
+
+class TestRunJobWakeGate:
+    """Integration tests for run_job wake-gate short-circuit."""
+
+    @pytest.fixture(autouse=True)
+    def _stub_runtime_provider(self):
+        """Stub ``resolve_runtime_provider`` for wake-gate tests.
+
+        ``run_job`` resolves the runtime provider BEFORE constructing
+        ``AIAgent``, so these tests must mock ``resolve_runtime_provider``
+        in addition to ``AIAgent`` — otherwise in a hermetic CI env (no
+        API keys), the resolver raises and the test fails before the
+        patched AIAgent is ever reached.
+        """
+        fake_runtime = {
+            "provider": "openrouter",
+            "api_mode": "chat_completions",
+            "base_url": "https://openrouter.ai/api/v1",
+            "api_key": "test-key",
+            "source": "stub",
+            "requested_provider": None,
+        }
+        with patch(
+            "hermes_cli.runtime_provider.resolve_runtime_provider",
+            return_value=fake_runtime,
+        ):
+            yield
+
+    def _make_job(self, name="wake-gate-test", script="check.py"):
+        """Minimal valid cron job dict for run_job."""
+        return {
+            "id": f"job_{name}",
+            "name": name,
+            "prompt": "Do a thing",
+            "schedule": "*/5 * * * *",
+            "script": script,
+        }
+
+    def test_wake_false_skips_agent_and_returns_silent(self, caplog):
+        """When _run_job_script output ends with {wakeAgent: false}, the agent
+        is not invoked and run_job returns the SILENT marker so delivery is
+        suppressed."""
+        from cron.scheduler import SILENT_MARKER
+        import cron.scheduler as scheduler
+
+        with patch.object(scheduler, "_run_job_script",
+                          return_value=(True, '{"wakeAgent": false}')), \
+             patch("run_agent.AIAgent") as agent_cls:
+            success, doc, final, err = scheduler.run_job(self._make_job())
+
+        assert success is True
+        assert err is None
+        assert final == SILENT_MARKER
+        assert "Script gate returned `wakeAgent=false`" in doc
+        agent_cls.assert_not_called()
+
+    def test_wake_true_runs_agent_with_injected_output(self):
+        """When the script returns {wakeAgent: true, data: ...}, the agent is
+        invoked and the data line still shows up in the prompt."""
+        import cron.scheduler as scheduler
+
+        script_output = '{"wakeAgent": true, "data": {"new": 3}}'
+        agent = MagicMock()
+        agent.run_conversation = MagicMock(return_value={
+            "final_response": "ok", "messages": []
+        })
+        with patch.object(scheduler, "_run_job_script",
+                          return_value=(True, script_output)), \
+             patch("run_agent.AIAgent", return_value=agent) as agent_cls:
+            success, doc, final, err = scheduler.run_job(self._make_job())
+
+        agent_cls.assert_called_once()
+        # The script output should be visible in the prompt passed to
+        # run_conversation.
+        call_kwargs = agent.run_conversation.call_args
+        prompt_arg = call_kwargs.args[0] if call_kwargs.args else call_kwargs.kwargs.get("user_message", "")
+        assert script_output in prompt_arg
+        assert success is True
+        assert err is None
+
+    def test_script_runs_only_once_on_wake(self):
+        """Wake-true path must not re-run the script inside _build_job_prompt
+        (script would execute twice otherwise, wasting work and risking
+        double-side-effects)."""
+        import cron.scheduler as scheduler
+
+        call_count = 0
+        def _script_stub(path):
+            nonlocal call_count
+            call_count += 1
+            return (True, "regular output")
+
+        agent = MagicMock()
+        agent.run_conversation = MagicMock(return_value={
+            "final_response": "ok", "messages": []
+        })
+        with patch.object(scheduler, "_run_job_script", side_effect=_script_stub), \
+             patch("run_agent.AIAgent", return_value=agent):
+            scheduler.run_job(self._make_job())
+
+        assert call_count == 1, f"script ran {call_count}x, expected exactly 1"
+
+    def test_script_failure_does_not_trigger_gate(self):
+        """If _run_job_script returns success=False, the gate is NOT evaluated
+        and the agent still runs (the failure is reported as context)."""
+        import cron.scheduler as scheduler
+
+        # Malicious or broken script whose stderr happens to contain the
+        # gate JSON — we must NOT honor it because ran_ok is False.
+        agent = MagicMock()
+        agent.run_conversation = MagicMock(return_value={
+            "final_response": "ok", "messages": []
+        })
+        with patch.object(scheduler, "_run_job_script",
+                          return_value=(False, '{"wakeAgent": false}')), \
+             patch("run_agent.AIAgent", return_value=agent) as agent_cls:
+            success, doc, final, err = scheduler.run_job(self._make_job())
+
+        agent_cls.assert_called_once()  # Agent DID wake despite the gate-like text
+
+    def test_no_script_path_runs_agent_normally(self):
+        """Regression: jobs without a script still work."""
+        import cron.scheduler as scheduler
+
+        agent = MagicMock()
+        agent.run_conversation = MagicMock(return_value={
+            "final_response": "ok", "messages": []
+        })
+        job = self._make_job(script=None)
+        job.pop("script", None)
+        with patch.object(scheduler, "_run_job_script") as script_fn, \
+             patch("run_agent.AIAgent", return_value=agent) as agent_cls:
+            scheduler.run_job(job)
+
+        script_fn.assert_not_called()
+        agent_cls.assert_called_once()
+
+
 class TestBuildJobPromptMissingSkill:
     """Verify that a missing skill logs a warning and does not crash the job."""
 
@@ -1259,3 +1458,250 @@ class TestSendMediaViaAdapter:
         self._run_with_loop(adapter, "123", media_files, None, {"id": "j3"})
         adapter.send_voice.assert_called_once()
         adapter.send_image_file.assert_called_once()
+
+
+class TestParallelTick:
+    """Verify that tick() runs due jobs concurrently and isolates ContextVars."""
+
+    @pytest.fixture(autouse=True)
+    def _isolate_tick_lock(self, tmp_path):
+        """Point the tick file lock at a per-test temp dir to avoid xdist contention."""
+        lock_dir = tmp_path / "cron"
+        lock_dir.mkdir()
+        with patch("cron.scheduler._LOCK_DIR", lock_dir), \
+             patch("cron.scheduler._LOCK_FILE", lock_dir / ".tick.lock"):
+            yield
+
+    def test_parallel_jobs_run_concurrently(self):
+        """Two jobs launched in the same tick should overlap in time."""
+        import threading
+        import time
+
+        barrier = threading.Barrier(2, timeout=5)
+        call_order = []
+
+        def mock_run_job(job):
+            """Each job hits a barrier — both must be active simultaneously."""
+            call_order.append(("start", job["id"]))
+            barrier.wait()  # blocks until both threads reach here
+            call_order.append(("end", job["id"]))
+            return (True, "output", "response", None)
+
+        jobs = [
+            {"id": "job-a", "name": "a", "deliver": "local"},
+            {"id": "job-b", "name": "b", "deliver": "local"},
+        ]
+
+        with patch("cron.scheduler.get_due_jobs", return_value=jobs), \
+             patch("cron.scheduler.advance_next_run"), \
+             patch("cron.scheduler.run_job", side_effect=mock_run_job), \
+             patch("cron.scheduler.save_job_output", return_value="/tmp/out.md"), \
+             patch("cron.scheduler._deliver_result", return_value=None), \
+             patch("cron.scheduler.mark_job_run"):
+            from cron.scheduler import tick
+            result = tick(verbose=False)
+
+        assert result == 2
+        # Both starts happened before both ends — proof of concurrency
+        starts = [i for i, (action, _) in enumerate(call_order) if action == "start"]
+        ends = [i for i, (action, _) in enumerate(call_order) if action == "end"]
+        assert len(starts) == 2
+        assert len(ends) == 2
+        assert max(starts) < min(ends), f"Jobs not concurrent: {call_order}"
+
+    def test_parallel_jobs_isolated_contextvars(self):
+        """Each job's ContextVars must be isolated — no cross-contamination."""
+        from gateway.session_context import get_session_env
+        seen = {}
+
+        def mock_run_job(job):
+            origin = job.get("origin", {})
+            # run_job sets ContextVars — verify each job sees its own
+            from gateway.session_context import set_session_vars, clear_session_vars
+            tokens = set_session_vars(
+                platform=origin.get("platform", ""),
+                chat_id=str(origin.get("chat_id", "")),
+            )
+            import time
+            time.sleep(0.05)  # give other thread time to set its vars
+            platform = get_session_env("HERMES_SESSION_PLATFORM")
+            chat_id = get_session_env("HERMES_SESSION_CHAT_ID")
+            seen[job["id"]] = {"platform": platform, "chat_id": chat_id}
+            clear_session_vars(tokens)
+            return (True, "output", "response", None)
+
+        jobs = [
+            {"id": "tg-job", "name": "tg", "deliver": "local",
+             "origin": {"platform": "telegram", "chat_id": "111"}},
+            {"id": "dc-job", "name": "dc", "deliver": "local",
+             "origin": {"platform": "discord", "chat_id": "222"}},
+        ]
+
+        with patch("cron.scheduler.get_due_jobs", return_value=jobs), \
+             patch("cron.scheduler.advance_next_run"), \
+             patch("cron.scheduler.run_job", side_effect=mock_run_job), \
+             patch("cron.scheduler.save_job_output", return_value="/tmp/out.md"), \
+             patch("cron.scheduler._deliver_result", return_value=None), \
+             patch("cron.scheduler.mark_job_run"):
+            from cron.scheduler import tick
+            tick(verbose=False)
+
+        assert seen["tg-job"] == {"platform": "telegram", "chat_id": "111"}
+        assert seen["dc-job"] == {"platform": "discord", "chat_id": "222"}
+
+    def test_max_parallel_env_var(self, monkeypatch):
+        """HERMES_CRON_MAX_PARALLEL=1 should restore serial behaviour."""
+        monkeypatch.setenv("HERMES_CRON_MAX_PARALLEL", "1")
+        call_times = []
+
+        def mock_run_job(job):
+            import time
+            call_times.append(("start", job["id"], time.monotonic()))
+            time.sleep(0.05)
+            call_times.append(("end", job["id"], time.monotonic()))
+            return (True, "output", "response", None)
+
+        jobs = [
+            {"id": "s1", "name": "s1", "deliver": "local"},
+            {"id": "s2", "name": "s2", "deliver": "local"},
+        ]
+
+        with patch("cron.scheduler.get_due_jobs", return_value=jobs), \
+             patch("cron.scheduler.advance_next_run"), \
+             patch("cron.scheduler.run_job", side_effect=mock_run_job), \
+             patch("cron.scheduler.save_job_output", return_value="/tmp/out.md"), \
+             patch("cron.scheduler._deliver_result", return_value=None), \
+             patch("cron.scheduler.mark_job_run"):
+            from cron.scheduler import tick
+            result = tick(verbose=False)
+
+        assert result == 2
+        # With max_workers=1, second job starts after first ends
+        end_s1 = [t for action, jid, t in call_times if action == "end" and jid == "s1"][0]
+        start_s2 = [t for action, jid, t in call_times if action == "start" and jid == "s2"][0]
+        assert start_s2 >= end_s1, "Jobs ran concurrently despite max_parallel=1"
+
+
+class TestDeliverResultTimeoutCancelsFuture:
+    """When future.result(timeout=60) raises TimeoutError in the live
+    adapter delivery path, _deliver_result must cancel the orphan
+    coroutine so it cannot duplicate-send after the standalone fallback.
+    """
+
+    def test_live_adapter_timeout_cancels_future_and_falls_back(self):
+        """End-to-end: live adapter hangs past the 60s budget, _deliver_result
+        patches the timeout down to a fast value, confirms future.cancel() fires,
+        and verifies the standalone fallback path still delivers."""
+        from gateway.config import Platform
+        from concurrent.futures import Future
+
+        # Live adapter whose send() coroutine never resolves within the budget
+        adapter = AsyncMock()
+        adapter.send.return_value = MagicMock(success=True)
+
+        pconfig = MagicMock()
+        pconfig.enabled = True
+        mock_cfg = MagicMock()
+        mock_cfg.platforms = {Platform.TELEGRAM: pconfig}
+
+        loop = MagicMock()
+        loop.is_running.return_value = True
+
+        # A real concurrent.futures.Future so .cancel() has real semantics,
+        # but we override .result() to raise TimeoutError exactly like the
+        # 60s wait firing in production.
+        captured_future = Future()
+        cancel_calls = []
+        original_cancel = captured_future.cancel
+
+        def tracking_cancel():
+            cancel_calls.append(True)
+            return original_cancel()
+
+        captured_future.cancel = tracking_cancel
+        captured_future.result = MagicMock(side_effect=TimeoutError("timed out"))
+
+        def fake_run_coro(coro, _loop):
+            coro.close()
+            return captured_future
+
+        job = {
+            "id": "timeout-job",
+            "deliver": "origin",
+            "origin": {"platform": "telegram", "chat_id": "123"},
+        }
+
+        standalone_send = AsyncMock(return_value={"success": True})
+
+        with patch("gateway.config.load_gateway_config", return_value=mock_cfg), \
+             patch("cron.scheduler.load_config", return_value={"cron": {"wrap_response": False}}), \
+             patch("asyncio.run_coroutine_threadsafe", side_effect=fake_run_coro), \
+             patch("tools.send_message_tool._send_to_platform", new=standalone_send):
+            result = _deliver_result(
+                job,
+                "Hello world",
+                adapters={Platform.TELEGRAM: adapter},
+                loop=loop,
+            )
+
+        # 1. The orphan future was cancelled on timeout (the bug fix)
+        assert cancel_calls == [True], "future.cancel() must fire on TimeoutError"
+        # 2. The standalone fallback delivered — no double send, no silent drop
+        assert result is None, f"expected successful delivery, got error: {result!r}"
+        standalone_send.assert_awaited_once()
+
+
+class TestSendMediaTimeoutCancelsFuture:
+    """Same orphan-coroutine guarantee for _send_media_via_adapter's
+    future.result(timeout=30) call. If this times out mid-batch, the
+    in-flight coroutine must be cancelled before the next file is tried.
+    """
+
+    def test_media_send_timeout_cancels_future_and_continues(self):
+        """End-to-end: _send_media_via_adapter with a future whose .result()
+        raises TimeoutError. Assert cancel() fires and the loop proceeds
+        to the next file rather than hanging or crashing."""
+        from concurrent.futures import Future
+
+        adapter = MagicMock()
+        adapter.send_image_file = AsyncMock()
+        adapter.send_video = AsyncMock()
+
+        # First file: future that times out. Second file: future that resolves OK.
+        timeout_future = Future()
+        timeout_cancel_calls = []
+        original_cancel = timeout_future.cancel
+
+        def tracking_cancel():
+            timeout_cancel_calls.append(True)
+            return original_cancel()
+
+        timeout_future.cancel = tracking_cancel
+        timeout_future.result = MagicMock(side_effect=TimeoutError("timed out"))
+
+        ok_future = Future()
+        ok_future.set_result(MagicMock(success=True))
+
+        futures_iter = iter([timeout_future, ok_future])
+
+        def fake_run_coro(coro, _loop):
+            coro.close()
+            return next(futures_iter)
+
+        media_files = [
+            ("/tmp/slow.png", False),   # times out
+            ("/tmp/fast.mp4", False),   # succeeds
+        ]
+
+        loop = MagicMock()
+        job = {"id": "media-timeout"}
+
+        with patch("asyncio.run_coroutine_threadsafe", side_effect=fake_run_coro):
+            # Should not raise — the except Exception clause swallows the timeout
+            _send_media_via_adapter(adapter, "chat-1", media_files, None, loop, job)
+
+        # 1. The timed-out future was cancelled (the bug fix)
+        assert timeout_cancel_calls == [True], "future.cancel() must fire on TimeoutError"
+        # 2. Second file still got dispatched — one timeout doesn't abort the batch
+        adapter.send_video.assert_called_once()
+        assert adapter.send_video.call_args[1]["video_path"] == "/tmp/fast.mp4"
diff --git a/tests/e2e/conftest.py b/tests/e2e/conftest.py
index d9ca627c4f..73ef1c31fb 100644
--- a/tests/e2e/conftest.py
+++ b/tests/e2e/conftest.py
@@ -12,7 +12,7 @@ No LLM, no real platform connections.
 import asyncio
 import sys
 import uuid
-from datetime import datetime
+from datetime import datetime, timezone
 from types import SimpleNamespace
 from unittest.mock import AsyncMock, MagicMock, patch
 
@@ -22,6 +22,7 @@ from gateway.config import GatewayConfig, Platform, PlatformConfig
 from gateway.platforms.base import MessageEvent, SendResult
 from gateway.session import SessionEntry, SessionSource, build_session_key
 
+E2E_MESSAGE_SETTLE_DELAY = 0.3
 
 # Platform library mocks
 
@@ -113,8 +114,9 @@ _ensure_telegram_mock()
 _ensure_discord_mock()
 _ensure_slack_mock()
 
-from gateway.platforms.discord import DiscordAdapter   # noqa: E402
+import discord  # noqa: E402 — mocked above
 from gateway.platforms.telegram import TelegramAdapter  # noqa: E402
+from gateway.platforms.discord import DiscordAdapter  # noqa: E402
 
 import gateway.platforms.slack as _slack_mod  # noqa: E402
 _slack_mod.SLACK_AVAILABLE = True
@@ -264,3 +266,140 @@ def runner(platform, session_entry):
 @pytest.fixture()
 def adapter(platform, runner):
     return make_adapter(platform, runner)
+
+
+# ═══════════════════════════════════════════════════════════════════════════
+# Discord helpers and fixtures
+# ═══════════════════════════════════════════════════════════════════════════
+
+BOT_USER_ID = 99999
+BOT_USER_NAME = "HermesBot"
+CHANNEL_ID = 22222
+GUILD_ID = 44444
+THREAD_ID = 33333
+MESSAGE_ID_COUNTER = 0
+
+
+def _next_message_id() -> int:
+    global MESSAGE_ID_COUNTER
+    MESSAGE_ID_COUNTER += 1
+    return 70000 + MESSAGE_ID_COUNTER
+
+
+def make_fake_bot_user():
+    return SimpleNamespace(
+        id=BOT_USER_ID, name=BOT_USER_NAME,
+        display_name=BOT_USER_NAME, bot=True,
+    )
+
+
+def make_fake_guild(guild_id: int = GUILD_ID, name: str = "Test Server"):
+    return SimpleNamespace(id=guild_id, name=name)
+
+
+def make_fake_text_channel(channel_id: int = CHANNEL_ID, name: str = "general", guild=None):
+    return SimpleNamespace(
+        id=channel_id, name=name,
+        guild=guild or make_fake_guild(),
+        topic=None, type=0,
+    )
+
+
+def make_fake_dm_channel(channel_id: int = 55555):
+    ch = MagicMock(spec=[])
+    ch.id = channel_id
+    ch.name = "DM"
+    ch.topic = None
+    ch.__class__ = discord.DMChannel
+    return ch
+
+
+def make_fake_thread(thread_id: int = THREAD_ID, name: str = "test-thread", parent=None):
+    th = MagicMock(spec=[])
+    th.id = thread_id
+    th.name = name
+    th.parent = parent or make_fake_text_channel()
+    th.parent_id = th.parent.id
+    th.guild = th.parent.guild
+    th.topic = None
+    th.type = 11
+    th.__class__ = discord.Thread
+    return th
+
+
+def make_discord_message(
+    *, content: str = "hello", author=None, channel=None, mentions=None,
+    attachments=None, message_id: int = None,
+):
+    if message_id is None:
+        message_id = _next_message_id()
+    if author is None:
+        author = SimpleNamespace(
+            id=11111, name="testuser", display_name="testuser", bot=False,
+        )
+    if channel is None:
+        channel = make_fake_text_channel()
+    if mentions is None:
+        mentions = []
+    if attachments is None:
+        attachments = []
+
+    return SimpleNamespace(
+        id=message_id, content=content, author=author, channel=channel,
+        mentions=mentions, attachments=attachments,
+        type=getattr(discord, "MessageType", SimpleNamespace()).default,
+        reference=None, created_at=datetime.now(timezone.utc),
+        create_thread=AsyncMock(),
+    )
+
+
+def get_response_text(adapter) -> str | None:
+    """Extract the response text from adapter.send() call args, or None if not called."""
+    if not adapter.send.called:
+        return None
+    return adapter.send.call_args[1].get("content") or adapter.send.call_args[0][1]
+
+
+def _make_discord_adapter_wired(runner=None):
+    """Create a DiscordAdapter wired to a GatewayRunner for e2e tests."""
+    if runner is None:
+        runner = make_runner(Platform.DISCORD)
+
+    config = PlatformConfig(enabled=True, token="e2e-test-token")
+    from gateway.platforms.helpers import ThreadParticipationTracker
+    with patch.object(ThreadParticipationTracker, "_load", return_value=set()):
+        adapter = DiscordAdapter(config)
+
+    bot_user = make_fake_bot_user()
+    adapter._client = SimpleNamespace(
+        user=bot_user,
+        get_channel=lambda _id: None,
+        fetch_channel=AsyncMock(),
+    )
+
+    adapter.send = AsyncMock(return_value=SendResult(success=True, message_id="e2e-resp-1"))
+    adapter.send_typing = AsyncMock()
+    adapter.set_message_handler(runner._handle_message)
+    runner.adapters[Platform.DISCORD] = adapter
+
+    return adapter, runner
+
+
+@pytest.fixture()
+def discord_setup():
+    return _make_discord_adapter_wired()
+
+
+@pytest.fixture()
+def discord_adapter(discord_setup):
+    return discord_setup[0]
+
+
+@pytest.fixture()
+def discord_runner(discord_setup):
+    return discord_setup[1]
+
+
+@pytest.fixture()
+def bot_user():
+    return make_fake_bot_user()
diff --git a/tests/e2e/test_discord_adapter.py b/tests/e2e/test_discord_adapter.py
new file mode 100644
index 0000000000..891d480682
--- /dev/null
+++ b/tests/e2e/test_discord_adapter.py
@@ -0,0 +1,106 @@
+"""Minimal e2e tests for Discord mention stripping + /command detection.
+
+Covers the fix for slash commands not being recognized when sent via
+@mention in a channel, especially after auto-threading.
+"""
+
+import asyncio
+from unittest.mock import AsyncMock
+
+import pytest
+
+from tests.e2e.conftest import (
+    BOT_USER_ID,
+    E2E_MESSAGE_SETTLE_DELAY,
+    get_response_text,
+    make_discord_message,
+    make_fake_dm_channel,
+    make_fake_thread,
+)
+
+pytestmark = pytest.mark.asyncio
+
+
+async def dispatch(adapter, msg):
+    await adapter._handle_message(msg)
+    await asyncio.sleep(E2E_MESSAGE_SETTLE_DELAY)
+
+
+class TestMentionStrippedCommandDispatch:
+    async def test_mention_then_command(self, discord_adapter, bot_user):
+        """<@BOT> /help → mention stripped, /help dispatched."""
+        msg = make_discord_message(
+            content=f"<@{BOT_USER_ID}> /help",
+            mentions=[bot_user],
+        )
+        await dispatch(discord_adapter, msg)
+        response = get_response_text(discord_adapter)
+        assert response is not None
+        assert "/new" in response
+
+    async def test_nickname_mention_then_command(self, discord_adapter, bot_user):
+        """<@!BOT> /help → nickname mention also stripped, /help works."""
+        msg = make_discord_message(
+            content=f"<@!{BOT_USER_ID}> /help",
+            mentions=[bot_user],
+        )
+        await dispatch(discord_adapter, msg)
+        response = get_response_text(discord_adapter)
+        assert response is not None
+        assert "/new" in response
+
+    async def test_text_before_command_not_detected(self, discord_adapter, bot_user):
+        """'<@BOT> something else /help' → mention stripped, but 'something else /help'
+        doesn't start with / so it's treated as text, not a command."""
+        msg = make_discord_message(
+            content=f"<@{BOT_USER_ID}> something else /help",
+            mentions=[bot_user],
+        )
+        await dispatch(discord_adapter, msg)
+        # Message is accepted (not dropped by mention gate), but since it doesn't
+        # start with / it's routed as text — no command output, and no agent in this
+        # mock setup means no send call either.
+        response = get_response_text(discord_adapter)
+        assert response is None or "/new" not in response
+
+    async def test_no_mention_in_channel_dropped(self, discord_adapter):
+        """Message without @mention in server channel → silently dropped."""
+        msg = make_discord_message(content="/help", mentions=[])
+        await dispatch(discord_adapter, msg)
+        assert get_response_text(discord_adapter) is None
+
+    async def test_dm_no_mention_needed(self, discord_adapter):
+        """DMs don't require @mention — /help works directly."""
+        dm = make_fake_dm_channel()
+        msg = make_discord_message(content="/help", channel=dm, mentions=[])
+        await dispatch(discord_adapter, msg)
+        response = get_response_text(discord_adapter)
+        assert response is not None
+        assert "/new" in response
+
+
+class TestAutoThreadingPreservesCommand:
+    async def test_command_detected_after_auto_thread(self, discord_adapter, bot_user, monkeypatch):
+        """@mention /help in channel with auto-thread → thread created AND command dispatched."""
+        monkeypatch.setenv("DISCORD_AUTO_THREAD", "true")
+        fake_thread = make_fake_thread(thread_id=90001, name="help")
+        msg = make_discord_message(
+            content=f"<@{BOT_USER_ID}> /help",
+            mentions=[bot_user],
+        )
+
+        # Simulate discord.py restoring the original raw content (with mention)
+        # after create_thread(), which undoes any prior mention stripping.
+        original_content = msg.content
+
+        async def clobber_content(**kwargs):
+            msg.content = original_content
+            return fake_thread
+
+        msg.create_thread = AsyncMock(side_effect=clobber_content)
+        await dispatch(discord_adapter, msg)
+
+        msg.create_thread.assert_awaited_once()
+        response = get_response_text(discord_adapter)
+        assert response is not None
+        assert "/new" in response
diff --git a/tests/gateway/restart_test_helpers.py b/tests/gateway/restart_test_helpers.py
index 75665325b6..6332a194fe 100644
--- a/tests/gateway/restart_test_helpers.py
+++ b/tests/gateway/restart_test_helpers.py
@@ -108,6 +108,7 @@ def make_restart_runner(
     runner.hooks.emit = AsyncMock()
     runner.pairing_store = MagicMock()
     runner.session_store = MagicMock()
+    runner.session_store._entries = {}
     runner.delivery_router = MagicMock()
 
     platform_adapter = adapter or RestartTestAdapter()
diff --git a/tests/gateway/test_api_server.py b/tests/gateway/test_api_server.py
index d0cebacb88..ca229f26f7 100644
--- a/tests/gateway/test_api_server.py
+++ b/tests/gateway/test_api_server.py
@@ -12,6 +12,7 @@ Tests cover:
 - Error handling (invalid JSON, missing fields)
 """
 
+import asyncio
 import json
 import time
 import uuid
@@ -25,6 +26,7 @@ from gateway.config import GatewayConfig, Platform, PlatformConfig
 from gateway.platforms.api_server import (
     APIServerAdapter,
     ResponseStore,
+    _IdempotencyCache,
     _CORS_HEADERS,
     _derive_chat_session_id,
     check_api_server_requirements,
@@ -104,6 +106,95 @@ class TestResponseStore:
         assert store.delete("resp_missing") is False
 
 
+# ---------------------------------------------------------------------------
+# _IdempotencyCache
+# ---------------------------------------------------------------------------
+
+
+class TestIdempotencyCache:
+    @pytest.mark.asyncio
+    async def test_concurrent_same_key_and_fingerprint_runs_once(self):
+        cache = _IdempotencyCache()
+        gate = asyncio.Event()
+        started = asyncio.Event()
+        calls = 0
+
+        async def compute():
+            nonlocal calls
+            calls += 1
+            started.set()
+            await gate.wait()
+            return ("response", {"total_tokens": 1})
+
+        first = asyncio.create_task(cache.get_or_set("idem-key", "fp-1", compute))
+        second = asyncio.create_task(cache.get_or_set("idem-key", "fp-1", compute))
+
+        await started.wait()
+        assert calls == 1
+
+        gate.set()
+        first_result, second_result = await asyncio.gather(first, second)
+
+        assert first_result == second_result == ("response", {"total_tokens": 1})
+
+    @pytest.mark.asyncio
+    async def test_different_fingerprint_does_not_reuse_inflight_task(self):
+        cache = _IdempotencyCache()
+        gate = asyncio.Event()
+        started = asyncio.Event()
+        calls = 0
+
+        async def compute():
+            nonlocal calls
+            calls += 1
+            result = calls
+            if calls == 2:
+                started.set()
+            await gate.wait()
+            return result
+
+        first = asyncio.create_task(cache.get_or_set("idem-key", "fp-1", compute))
+        second = asyncio.create_task(cache.get_or_set("idem-key", "fp-2", compute))
+
+        await started.wait()
+        assert calls == 2
+
+        gate.set()
+        results = await asyncio.gather(first, second)
+
+        assert sorted(results) == [1, 2]
+
+    @pytest.mark.asyncio
+    async def test_cancelled_waiter_does_not_drop_shared_inflight_task(self):
+        cache = _IdempotencyCache()
+        gate = asyncio.Event()
+        started = asyncio.Event()
+        calls = 0
+
+        async def compute():
+            nonlocal calls
+            calls += 1
+            started.set()
+            await gate.wait()
+            return "response"
+
+        first = asyncio.create_task(cache.get_or_set("idem-key", "fp-1", compute))
+
+        await started.wait()
+        assert calls == 1
+
+        first.cancel()
+        with pytest.raises(asyncio.CancelledError):
+            await first
+
+        second = asyncio.create_task(cache.get_or_set("idem-key", "fp-1", compute))
+        await asyncio.sleep(0)
+        assert calls == 1
+
+        gate.set()
+        assert await second == "response"
+
+
 # ---------------------------------------------------------------------------
 # Adapter initialization
 # ---------------------------------------------------------------------------
diff --git a/tests/gateway/test_api_server_jobs.py b/tests/gateway/test_api_server_jobs.py
index 6c17bb120b..a147657838 100644
--- a/tests/gateway/test_api_server_jobs.py
+++ b/tests/gateway/test_api_server_jobs.py
@@ -20,6 +20,8 @@ from aiohttp.test_utils import TestClient, TestServer
 from gateway.config import PlatformConfig
 from gateway.platforms.api_server import APIServerAdapter, cors_middleware
 
+_MOD = "gateway.platforms.api_server"
+
 
 # ---------------------------------------------------------------------------
 # Helpers
@@ -83,10 +85,10 @@ class TestListJobs:
         """GET /api/jobs returns job list."""
         app = _create_app(adapter)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(
-                APIServerAdapter, "_CRON_AVAILABLE", True
-            ), patch.object(
-                APIServerAdapter, "_cron_list", return_value=[SAMPLE_JOB]
+            with patch(
+                f"{_MOD}._CRON_AVAILABLE", True
+            ), patch(
+                f"{_MOD}._cron_list", return_value=[SAMPLE_JOB]
             ):
                 resp = await cli.get("/api/jobs")
                 assert resp.status == 200
@@ -104,10 +106,10 @@ class TestListJobs:
         app = _create_app(adapter)
         mock_list = MagicMock(return_value=[SAMPLE_JOB])
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(
-                APIServerAdapter, "_CRON_AVAILABLE", True
-            ), patch.object(
-                APIServerAdapter, "_cron_list", mock_list
+            with patch(
+                f"{_MOD}._CRON_AVAILABLE", True
+            ), patch(
+                f"{_MOD}._cron_list", mock_list
             ):
                 resp = await cli.get("/api/jobs?include_disabled=true")
                 assert resp.status == 200
@@ -119,10 +121,10 @@ class TestListJobs:
         app = _create_app(adapter)
         mock_list = MagicMock(return_value=[])
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(
-                APIServerAdapter, "_CRON_AVAILABLE", True
-            ), patch.object(
-                APIServerAdapter, "_cron_list", mock_list
+            with patch(
+                f"{_MOD}._CRON_AVAILABLE", True
+            ), patch(
+                f"{_MOD}._cron_list", mock_list
             ):
                 resp = await cli.get("/api/jobs")
                 assert resp.status == 200
@@ -140,10 +142,10 @@ class TestCreateJob:
         app = _create_app(adapter)
         mock_create = MagicMock(return_value=SAMPLE_JOB)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(
-                APIServerAdapter, "_CRON_AVAILABLE", True
-            ), patch.object(
-                APIServerAdapter, "_cron_create", mock_create
+            with patch(
+                f"{_MOD}._CRON_AVAILABLE", True
+            ), patch(
+                f"{_MOD}._cron_create", mock_create
             ):
                 resp = await cli.post("/api/jobs", json={
                     "name": "test-job",
@@ -164,7 +166,7 @@ class TestCreateJob:
         """POST /api/jobs without name returns 400."""
         app = _create_app(adapter)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(APIServerAdapter, "_CRON_AVAILABLE", True):
+            with patch(f"{_MOD}._CRON_AVAILABLE", True):
                 resp = await cli.post("/api/jobs", json={
                     "schedule": "*/5 * * * *",
                     "prompt": "do something",
@@ -178,7 +180,7 @@ class TestCreateJob:
         """POST /api/jobs with name > 200 chars returns 400."""
         app = _create_app(adapter)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(APIServerAdapter, "_CRON_AVAILABLE", True):
+            with patch(f"{_MOD}._CRON_AVAILABLE", True):
                 resp = await cli.post("/api/jobs", json={
                     "name": "x" * 201,
                     "schedule": "*/5 * * * *",
@@ -192,7 +194,7 @@ class TestCreateJob:
         """POST /api/jobs with prompt > 5000 chars returns 400."""
         app = _create_app(adapter)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(APIServerAdapter, "_CRON_AVAILABLE", True):
+            with patch(f"{_MOD}._CRON_AVAILABLE", True):
                 resp = await cli.post("/api/jobs", json={
                     "name": "test-job",
                     "schedule": "*/5 * * * *",
@@ -207,7 +209,7 @@ class TestCreateJob:
         """POST /api/jobs with repeat=0 returns 400."""
         app = _create_app(adapter)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(APIServerAdapter, "_CRON_AVAILABLE", True):
+            with patch(f"{_MOD}._CRON_AVAILABLE", True):
                 resp = await cli.post("/api/jobs", json={
                     "name": "test-job",
                     "schedule": "*/5 * * * *",
@@ -222,7 +224,7 @@ class TestCreateJob:
         """POST /api/jobs without schedule returns 400."""
         app = _create_app(adapter)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(APIServerAdapter, "_CRON_AVAILABLE", True):
+            with patch(f"{_MOD}._CRON_AVAILABLE", True):
                 resp = await cli.post("/api/jobs", json={
                     "name": "test-job",
                 })
@@ -242,10 +244,10 @@ class TestGetJob:
         app = _create_app(adapter)
         mock_get = MagicMock(return_value=SAMPLE_JOB)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(
-                APIServerAdapter, "_CRON_AVAILABLE", True
-            ), patch.object(
-                APIServerAdapter, "_cron_get", mock_get
+            with patch(
+                f"{_MOD}._CRON_AVAILABLE", True
+            ), patch(
+                f"{_MOD}._cron_get", mock_get
             ):
                 resp = await cli.get(f"/api/jobs/{VALID_JOB_ID}")
                 assert resp.status == 200
@@ -259,10 +261,10 @@ class TestGetJob:
         app = _create_app(adapter)
         mock_get = MagicMock(return_value=None)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(
-                APIServerAdapter, "_CRON_AVAILABLE", True
-            ), patch.object(
-                APIServerAdapter, "_cron_get", mock_get
+            with patch(
+                f"{_MOD}._CRON_AVAILABLE", True
+            ), patch(
+                f"{_MOD}._cron_get", mock_get
             ):
                 resp = await cli.get(f"/api/jobs/{VALID_JOB_ID}")
                 assert resp.status == 404
@@ -272,7 +274,7 @@ class TestGetJob:
         """GET /api/jobs/{id} with non-hex id returns 400."""
         app = _create_app(adapter)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(APIServerAdapter, "_CRON_AVAILABLE", True):
+            with patch(f"{_MOD}._CRON_AVAILABLE", True):
                 resp = await cli.get("/api/jobs/not-a-valid-hex!")
                 assert resp.status == 400
                 data = await resp.json()
@@ -291,10 +293,10 @@ class TestUpdateJob:
         updated_job = {**SAMPLE_JOB, "name": "updated-name"}
         mock_update = MagicMock(return_value=updated_job)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(
-                APIServerAdapter, "_CRON_AVAILABLE", True
-            ), patch.object(
-                APIServerAdapter, "_cron_update", mock_update
+            with patch(
+                f"{_MOD}._CRON_AVAILABLE", True
+            ), patch(
+                f"{_MOD}._cron_update", mock_update
             ):
                 resp = await cli.patch(
                     f"/api/jobs/{VALID_JOB_ID}",
@@ -317,10 +319,10 @@ class TestUpdateJob:
         updated_job = {**SAMPLE_JOB, "name": "new-name"}
         mock_update = MagicMock(return_value=updated_job)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(
-                APIServerAdapter, "_CRON_AVAILABLE", True
-            ), patch.object(
-                APIServerAdapter, "_cron_update", mock_update
+            with patch(
+                f"{_MOD}._CRON_AVAILABLE", True
+            ), patch(
+                f"{_MOD}._cron_update", mock_update
             ):
                 resp = await cli.patch(
                     f"/api/jobs/{VALID_JOB_ID}",
@@ -342,7 +344,7 @@ class TestUpdateJob:
         """PATCH /api/jobs/{id} with only unknown fields returns 400."""
         app = _create_app(adapter)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(APIServerAdapter, "_CRON_AVAILABLE", True):
+            with patch(f"{_MOD}._CRON_AVAILABLE", True):
                 resp = await cli.patch(
                     f"/api/jobs/{VALID_JOB_ID}",
                     json={"evil_field": "malicious"},
@@ -363,10 +365,10 @@ class TestDeleteJob:
         app = _create_app(adapter)
         mock_remove = MagicMock(return_value=True)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(
-                APIServerAdapter, "_CRON_AVAILABLE", True
-            ), patch.object(
-                APIServerAdapter, "_cron_remove", mock_remove
+            with patch(
+                f"{_MOD}._CRON_AVAILABLE", True
+            ), patch(
+                f"{_MOD}._cron_remove", mock_remove
             ):
                 resp = await cli.delete(f"/api/jobs/{VALID_JOB_ID}")
                 assert resp.status == 200
@@ -380,10 +382,10 @@ class TestDeleteJob:
         app = _create_app(adapter)
         mock_remove = MagicMock(return_value=False)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(
-                APIServerAdapter, "_CRON_AVAILABLE", True
-            ), patch.object(
-                APIServerAdapter, "_cron_remove", mock_remove
+            with patch(
+                f"{_MOD}._CRON_AVAILABLE", True
+            ), patch(
+                f"{_MOD}._cron_remove", mock_remove
             ):
                 resp = await cli.delete(f"/api/jobs/{VALID_JOB_ID}")
                 assert resp.status == 404
@@ -401,10 +403,10 @@ class TestPauseJob:
         paused_job = {**SAMPLE_JOB, "enabled": False}
         mock_pause = MagicMock(return_value=paused_job)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(
-                APIServerAdapter, "_CRON_AVAILABLE", True
-            ), patch.object(
-                APIServerAdapter, "_cron_pause", mock_pause
+            with patch(
+                f"{_MOD}._CRON_AVAILABLE", True
+            ), patch(
+                f"{_MOD}._cron_pause", mock_pause
             ):
                 resp = await cli.post(f"/api/jobs/{VALID_JOB_ID}/pause")
                 assert resp.status == 200
@@ -426,10 +428,10 @@ class TestResumeJob:
         resumed_job = {**SAMPLE_JOB, "enabled": True}
         mock_resume = MagicMock(return_value=resumed_job)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(
-                APIServerAdapter, "_CRON_AVAILABLE", True
-            ), patch.object(
-                APIServerAdapter, "_cron_resume", mock_resume
+            with patch(
+                f"{_MOD}._CRON_AVAILABLE", True
+            ), patch(
+                f"{_MOD}._cron_resume", mock_resume
             ):
                 resp = await cli.post(f"/api/jobs/{VALID_JOB_ID}/resume")
                 assert resp.status == 200
@@ -451,10 +453,10 @@ class TestRunJob:
         triggered_job = {**SAMPLE_JOB, "last_run": "2025-01-01T00:00:00Z"}
         mock_trigger = MagicMock(return_value=triggered_job)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(
-                APIServerAdapter, "_CRON_AVAILABLE", True
-            ), patch.object(
-                APIServerAdapter, "_cron_trigger", mock_trigger
+            with patch(
+                f"{_MOD}._CRON_AVAILABLE", True
+            ), patch(
+                f"{_MOD}._cron_trigger", mock_trigger
             ):
                 resp = await cli.post(f"/api/jobs/{VALID_JOB_ID}/run")
                 assert resp.status == 200
@@ -473,7 +475,7 @@ class TestAuthRequired:
         """GET /api/jobs without API key returns 401 when key is set."""
         app = _create_app(auth_adapter)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(APIServerAdapter, "_CRON_AVAILABLE", True):
+            with patch(f"{_MOD}._CRON_AVAILABLE", True):
                 resp = await cli.get("/api/jobs")
                 assert resp.status == 401
 
@@ -482,7 +484,7 @@ class TestAuthRequired:
         """POST /api/jobs without API key returns 401 when key is set."""
         app = _create_app(auth_adapter)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(APIServerAdapter, "_CRON_AVAILABLE", True):
+            with patch(f"{_MOD}._CRON_AVAILABLE", True):
                 resp = await cli.post("/api/jobs", json={
                     "name": "test", "schedule": "* * * * *",
                 })
@@ -493,7 +495,7 @@ class TestAuthRequired:
         """GET /api/jobs/{id} without API key returns 401 when key is set."""
         app = _create_app(auth_adapter)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(APIServerAdapter, "_CRON_AVAILABLE", True):
+            with patch(f"{_MOD}._CRON_AVAILABLE", True):
                 resp = await cli.get(f"/api/jobs/{VALID_JOB_ID}")
                 assert resp.status == 401
 
@@ -502,7 +504,7 @@ class TestAuthRequired:
         """DELETE /api/jobs/{id} without API key returns 401."""
         app = _create_app(auth_adapter)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(APIServerAdapter, "_CRON_AVAILABLE", True):
+            with patch(f"{_MOD}._CRON_AVAILABLE", True):
                 resp = await cli.delete(f"/api/jobs/{VALID_JOB_ID}")
                 assert resp.status == 401
 
@@ -512,10 +514,10 @@ class TestAuthRequired:
         app = _create_app(auth_adapter)
         mock_list = MagicMock(return_value=[])
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(
-                APIServerAdapter, "_CRON_AVAILABLE", True
-            ), patch.object(
-                APIServerAdapter, "_cron_list", mock_list
+            with patch(
+                f"{_MOD}._CRON_AVAILABLE", True
+            ), patch(
+                f"{_MOD}._cron_list", mock_list
             ):
                 resp = await cli.get(
                     "/api/jobs",
@@ -534,7 +536,7 @@ class TestCronUnavailable:
         """GET /api/jobs returns 501 when _CRON_AVAILABLE is False."""
         app = _create_app(adapter)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(APIServerAdapter, "_CRON_AVAILABLE", False):
+            with patch(f"{_MOD}._CRON_AVAILABLE", False):
                 resp = await cli.get("/api/jobs")
                 assert resp.status == 501
                 data = await resp.json()
@@ -551,8 +553,8 @@ class TestCronUnavailable:
             return SAMPLE_JOB
 
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(APIServerAdapter, "_CRON_AVAILABLE", True), patch.object(
-                APIServerAdapter, "_cron_pause", staticmethod(_plain_pause)
+            with patch(f"{_MOD}._CRON_AVAILABLE", True), patch(
+                f"{_MOD}._cron_pause", _plain_pause
             ):
                 resp = await cli.post(f"/api/jobs/{VALID_JOB_ID}/pause")
                 assert resp.status == 200
@@ -571,8 +573,8 @@ class TestCronUnavailable:
             return [SAMPLE_JOB]
 
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(APIServerAdapter, "_CRON_AVAILABLE", True), patch.object(
-                APIServerAdapter, "_cron_list", staticmethod(_plain_list)
+            with patch(f"{_MOD}._CRON_AVAILABLE", True), patch(
+                f"{_MOD}._cron_list", _plain_list
             ):
                 resp = await cli.get("/api/jobs?include_disabled=true")
                 assert resp.status == 200
@@ -593,8 +595,8 @@ class TestCronUnavailable:
             return updated_job
 
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(APIServerAdapter, "_CRON_AVAILABLE", True), patch.object(
-                APIServerAdapter, "_cron_update", staticmethod(_plain_update)
+            with patch(f"{_MOD}._CRON_AVAILABLE", True), patch(
+                f"{_MOD}._cron_update", _plain_update
             ):
                 resp = await cli.patch(
                     f"/api/jobs/{VALID_JOB_ID}",
@@ -611,7 +613,7 @@ class TestCronUnavailable:
         """POST /api/jobs returns 501 when _CRON_AVAILABLE is False."""
         app = _create_app(adapter)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(APIServerAdapter, "_CRON_AVAILABLE", False):
+            with patch(f"{_MOD}._CRON_AVAILABLE", False):
                 resp = await cli.post("/api/jobs", json={
                     "name": "test", "schedule": "* * * * *",
                 })
@@ -622,7 +624,7 @@ class TestCronUnavailable:
         """GET /api/jobs/{id} returns 501 when _CRON_AVAILABLE is False."""
         app = _create_app(adapter)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(APIServerAdapter, "_CRON_AVAILABLE", False):
+            with patch(f"{_MOD}._CRON_AVAILABLE", False):
                 resp = await cli.get(f"/api/jobs/{VALID_JOB_ID}")
                 assert resp.status == 501
 
@@ -631,7 +633,7 @@ class TestCronUnavailable:
         """DELETE /api/jobs/{id} returns 501 when _CRON_AVAILABLE is False."""
         app = _create_app(adapter)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(APIServerAdapter, "_CRON_AVAILABLE", False):
+            with patch(f"{_MOD}._CRON_AVAILABLE", False):
                 resp = await cli.delete(f"/api/jobs/{VALID_JOB_ID}")
                 assert resp.status == 501
 
@@ -640,7 +642,7 @@ class TestCronUnavailable:
         """POST /api/jobs/{id}/pause returns 501 when _CRON_AVAILABLE is False."""
         app = _create_app(adapter)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(APIServerAdapter, "_CRON_AVAILABLE", False):
+            with patch(f"{_MOD}._CRON_AVAILABLE", False):
                 resp = await cli.post(f"/api/jobs/{VALID_JOB_ID}/pause")
                 assert resp.status == 501
 
@@ -649,7 +651,7 @@ class TestCronUnavailable:
         """POST /api/jobs/{id}/resume returns 501 when _CRON_AVAILABLE is False."""
         app = _create_app(adapter)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(APIServerAdapter, "_CRON_AVAILABLE", False):
+            with patch(f"{_MOD}._CRON_AVAILABLE", False):
                 resp = await cli.post(f"/api/jobs/{VALID_JOB_ID}/resume")
                 assert resp.status == 501
 
@@ -658,6 +660,6 @@ class TestCronUnavailable:
         """POST /api/jobs/{id}/run returns 501 when _CRON_AVAILABLE is False."""
         app = _create_app(adapter)
         async with TestClient(TestServer(app)) as cli:
-            with patch.object(APIServerAdapter, "_CRON_AVAILABLE", False):
+            with patch(f"{_MOD}._CRON_AVAILABLE", False):
                 resp = await cli.post(f"/api/jobs/{VALID_JOB_ID}/run")
                 assert resp.status == 501
diff --git a/tests/gateway/test_api_server_multimodal.py b/tests/gateway/test_api_server_multimodal.py
new file mode 100644
index 0000000000..299a050303
--- /dev/null
+++ b/tests/gateway/test_api_server_multimodal.py
@@ -0,0 +1,308 @@
+"""End-to-end tests for inline image inputs on /v1/chat/completions and /v1/responses.
+
+Covers the multimodal normalization path added to the API server.  Unlike the
+adapter-level tests that patch ``_run_agent``, these tests patch
+``AIAgent.run_conversation`` instead so the adapter's full request-handling
+path (including the ``run_agent`` prologue that used to crash on list content)
+executes against a real aiohttp app.
+"""
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+from aiohttp import web
+from aiohttp.test_utils import TestClient, TestServer
+
+from gateway.config import PlatformConfig
+from gateway.platforms.api_server import (
+    APIServerAdapter,
+    _content_has_visible_payload,
+    _normalize_multimodal_content,
+    cors_middleware,
+    security_headers_middleware,
+)
+
+
+# ---------------------------------------------------------------------------
+# Pure-function tests for _normalize_multimodal_content
+# ---------------------------------------------------------------------------
+
+
+class TestNormalizeMultimodalContent:
+    def test_string_passthrough(self):
+        assert _normalize_multimodal_content("hello") == "hello"
+
+    def test_none_returns_empty_string(self):
+        assert _normalize_multimodal_content(None) == ""
+
+    def test_text_only_list_collapses_to_string(self):
+        content = [{"type": "text", "text": "hi"}, {"type": "text", "text": "there"}]
+        assert _normalize_multimodal_content(content) == "hi\nthere"
+
+    def test_responses_input_text_canonicalized(self):
+        content = [{"type": "input_text", "text": "hello"}]
+        assert _normalize_multimodal_content(content) == "hello"
+
+    def test_image_url_preserved_with_text(self):
+        content = [
+            {"type": "text", "text": "describe this"},
+            {"type": "image_url", "image_url": {"url": "https://example.com/cat.png", "detail": "high"}},
+        ]
+        out = _normalize_multimodal_content(content)
+        assert isinstance(out, list)
+        assert out == [
+            {"type": "text", "text": "describe this"},
+            {"type": "image_url", "image_url": {"url": "https://example.com/cat.png", "detail": "high"}},
+        ]
+
+    def test_input_image_converted_to_canonical_shape(self):
+        content = [
+            {"type": "input_text", "text": "hi"},
+            {"type": "input_image", "image_url": "https://example.com/cat.png"},
+        ]
+        out = _normalize_multimodal_content(content)
+        assert out == [
+            {"type": "text", "text": "hi"},
+            {"type": "image_url", "image_url": {"url": "https://example.com/cat.png"}},
+        ]
+
+    def test_data_image_url_accepted(self):
+        content = [{"type": "image_url", "image_url": {"url": "data:image/png;base64,AAAA"}}]
+        out = _normalize_multimodal_content(content)
+        assert out == [{"type": "image_url", "image_url": {"url": "data:image/png;base64,AAAA"}}]
+
+    def test_non_image_data_url_rejected(self):
+        content = [{"type": "image_url", "image_url": {"url": "data:text/plain;base64,SGVsbG8="}}]
+        with pytest.raises(ValueError) as exc:
+            _normalize_multimodal_content(content)
+        assert str(exc.value).startswith("unsupported_content_type:")
+
+    def test_file_part_rejected(self):
+        with pytest.raises(ValueError) as exc:
+            _normalize_multimodal_content([{"type": "file", "file": {"file_id": "f_1"}}])
+        assert str(exc.value).startswith("unsupported_content_type:")
+
+    def test_input_file_part_rejected(self):
+        with pytest.raises(ValueError) as exc:
+            _normalize_multimodal_content([{"type": "input_file", "file_id": "f_1"}])
+        assert str(exc.value).startswith("unsupported_content_type:")
+
+    def test_missing_url_rejected(self):
+        with pytest.raises(ValueError) as exc:
+            _normalize_multimodal_content([{"type": "image_url", "image_url": {}}])
+        assert str(exc.value).startswith("invalid_image_url:")
+
+    def test_bad_scheme_rejected(self):
+        with pytest.raises(ValueError) as exc:
+            _normalize_multimodal_content([{"type": "image_url", "image_url": {"url": "ftp://example.com/x.png"}}])
+        assert str(exc.value).startswith("invalid_image_url:")
+
+    def test_unknown_part_type_rejected(self):
+        with pytest.raises(ValueError) as exc:
+            _normalize_multimodal_content([{"type": "audio", "audio": {}}])
+        assert str(exc.value).startswith("unsupported_content_type:")
+
+
+class TestContentHasVisiblePayload:
+    def test_non_empty_string(self):
+        assert _content_has_visible_payload("hello")
+
+    def test_whitespace_only_string(self):
+        assert not _content_has_visible_payload("   ")
+
+    def test_list_with_image_only(self):
+        assert _content_has_visible_payload([{"type": "image_url", "image_url": {"url": "x"}}])
+
+    def test_list_with_only_empty_text(self):
+        assert not _content_has_visible_payload([{"type": "text", "text": ""}])
+
+
+# ---------------------------------------------------------------------------
+# HTTP integration — real aiohttp client hitting the adapter handlers
+# ---------------------------------------------------------------------------
+
+
+def _make_adapter() -> APIServerAdapter:
+    return APIServerAdapter(PlatformConfig(enabled=True))
+
+
+def _create_app(adapter: APIServerAdapter) -> web.Application:
+    mws = [mw for mw in (cors_middleware, security_headers_middleware) if mw is not None]
+    app = web.Application(middlewares=mws)
+    app["api_server_adapter"] = adapter
+    app.router.add_post("/v1/chat/completions", adapter._handle_chat_completions)
+    app.router.add_post("/v1/responses", adapter._handle_responses)
+    app.router.add_get("/v1/responses/{response_id}", adapter._handle_get_response)
+    return app
+
+
+@pytest.fixture
+def adapter():
+    return _make_adapter()
+
+
+class TestChatCompletionsMultimodalHTTP:
+    @pytest.mark.asyncio
+    async def test_inline_image_preserved_to_run_agent(self, adapter):
+        """Multimodal user content reaches _run_agent as a list of parts."""
+        image_payload = [
+            {"type": "text", "text": "What's in this image?"},
+            {"type": "image_url", "image_url": {"url": "https://example.com/cat.png", "detail": "high"}},
+        ]
+
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            with patch.object(
+                adapter,
+                "_run_agent",
+                new=MagicMock(),
+            ) as mock_run:
+                async def _stub(**kwargs):
+                    mock_run.captured = kwargs
+                    return (
+                        {"final_response": "A cat.", "messages": [], "api_calls": 1},
+                        {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0},
+                    )
+                mock_run.side_effect = _stub
+
+                resp = await cli.post(
+                    "/v1/chat/completions",
+                    json={
+                        "model": "hermes-agent",
+                        "messages": [{"role": "user", "content": image_payload}],
+                    },
+                )
+
+            assert resp.status == 200, await resp.text()
+            assert mock_run.captured["user_message"] == image_payload
+
+    @pytest.mark.asyncio
+    async def test_text_only_array_collapses_to_string(self, adapter):
+        """Text-only array becomes a plain string so logging stays unchanged."""
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            with patch.object(adapter, "_run_agent", new=MagicMock()) as mock_run:
+                async def _stub(**kwargs):
+                    mock_run.captured = kwargs
+                    return (
+                        {"final_response": "ok", "messages": [], "api_calls": 1},
+                        {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0},
+                    )
+                mock_run.side_effect = _stub
+
+                resp = await cli.post(
+                    "/v1/chat/completions",
+                    json={
+                        "model": "hermes-agent",
+                        "messages": [
+                            {"role": "user", "content": [{"type": "text", "text": "hello"}]},
+                        ],
+                    },
+                )
+
+            assert resp.status == 200, await resp.text()
+            assert mock_run.captured["user_message"] == "hello"
+
+    @pytest.mark.asyncio
+    async def test_file_part_returns_400(self, adapter):
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            resp = await cli.post(
+                "/v1/chat/completions",
+                json={
+                    "model": "hermes-agent",
+                    "messages": [
+                        {"role": "user", "content": [{"type": "file", "file": {"file_id": "f_1"}}]},
+                    ],
+                },
+            )
+            assert resp.status == 400
+            body = await resp.json()
+        assert body["error"]["code"] == "unsupported_content_type"
+        assert body["error"]["param"] == "messages[0].content"
+
+    @pytest.mark.asyncio
+    async def test_non_image_data_url_returns_400(self, adapter):
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            resp = await cli.post(
+                "/v1/chat/completions",
+                json={
+                    "model": "hermes-agent",
+                    "messages": [
+                        {
+                            "role": "user",
+                            "content": [
+                                {
+                                    "type": "image_url",
+                                    "image_url": {"url": "data:text/plain;base64,SGVsbG8="},
+                                },
+                            ],
+                        },
+                    ],
+                },
+            )
+            assert resp.status == 400
+            body = await resp.json()
+        assert body["error"]["code"] == "unsupported_content_type"
+
+
+class TestResponsesMultimodalHTTP:
+    @pytest.mark.asyncio
+    async def test_input_image_canonicalized_and_forwarded(self, adapter):
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            with patch.object(adapter, "_run_agent", new=MagicMock()) as mock_run:
+                async def _stub(**kwargs):
+                    mock_run.captured = kwargs
+                    return (
+                        {"final_response": "ok", "messages": [], "api_calls": 1},
+                        {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0},
+                    )
+                mock_run.side_effect = _stub
+
+                resp = await cli.post(
+                    "/v1/responses",
+                    json={
+                        "model": "hermes-agent",
+                        "input": [
+                            {
+                                "role": "user",
+                                "content": [
+                                    {"type": "input_text", "text": "Describe."},
+                                    {
+                                        "type": "input_image",
+                                        "image_url": "https://example.com/cat.png",
+                                    },
+                                ],
+                            }
+                        ],
+                    },
+                )
+
+            assert resp.status == 200, await resp.text()
+            expected = [
+                {"type": "text", "text": "Describe."},
+                {"type": "image_url", "image_url": {"url": "https://example.com/cat.png"}},
+            ]
+            assert mock_run.captured["user_message"] == expected
+
+    @pytest.mark.asyncio
+    async def test_input_file_returns_400(self, adapter):
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            resp = await cli.post(
+                "/v1/responses",
+                json={
+                    "model": "hermes-agent",
+                    "input": [
+                        {
+                            "role": "user",
+                            "content": [{"type": "input_file", "file_id": "f_1"}],
+                        }
+                    ],
+                },
+            )
+            assert resp.status == 400
+            body = await resp.json()
+        assert body["error"]["code"] == "unsupported_content_type"
diff --git a/tests/gateway/test_cancel_background_drain.py b/tests/gateway/test_cancel_background_drain.py
new file mode 100644
index 0000000000..c95fdc062e
--- /dev/null
+++ b/tests/gateway/test_cancel_background_drain.py
@@ -0,0 +1,148 @@
+"""Regression test: cancel_background_tasks must drain late-arrival tasks.
+
+During gateway shutdown, a message arriving while
+cancel_background_tasks is mid-await can spawn a fresh
+_process_message_background task via handle_message, which is added
+to self._background_tasks.  Without the re-drain loop, the subsequent
+_background_tasks.clear() drops the reference; the task runs
+untracked against a disconnecting adapter.
+"""
+
+import asyncio
+from unittest.mock import AsyncMock
+
+import pytest
+
+from gateway.config import Platform, PlatformConfig
+from gateway.platforms.base import BasePlatformAdapter, MessageEvent, MessageType
+from gateway.session import SessionSource, build_session_key
+
+
+class _StubAdapter(BasePlatformAdapter):
+    async def connect(self):
+        pass
+
+    async def disconnect(self):
+        pass
+
+    async def send(self, chat_id, text, **kwargs):
+        return None
+
+    async def get_chat_info(self, chat_id):
+        return {}
+
+
+def _make_adapter():
+    adapter = _StubAdapter(PlatformConfig(enabled=True, token="t"), Platform.TELEGRAM)
+    adapter._send_with_retry = AsyncMock(return_value=None)
+    return adapter
+
+
+def _event(text, cid="42"):
+    return MessageEvent(
+        text=text,
+        message_type=MessageType.TEXT,
+        source=SessionSource(platform=Platform.TELEGRAM, chat_id=cid, chat_type="dm"),
+    )
+
+
+@pytest.mark.asyncio
+async def test_cancel_background_tasks_drains_late_arrivals():
+    """A message that arrives during the gather window must be picked
+    up by the re-drain loop, not leaked as an untracked task."""
+    adapter = _make_adapter()
+    sk = build_session_key(
+        SessionSource(platform=Platform.TELEGRAM, chat_id="42", chat_type="dm")
+    )
+
+    m1_started = asyncio.Event()
+    m1_cleanup_running = asyncio.Event()
+    m2_started = asyncio.Event()
+    m2_cancelled = asyncio.Event()
+
+    async def handler(event):
+        if event.text == "M1":
+            m1_started.set()
+            try:
+                await asyncio.sleep(10)
+            except asyncio.CancelledError:
+                m1_cleanup_running.set()
+                # Widen the gather window with a shielded cleanup
+                # delay so M2 can get injected during it.
+                await asyncio.shield(asyncio.sleep(0.2))
+                raise
+        else:  # M2 — the late arrival
+            m2_started.set()
+            try:
+                await asyncio.sleep(10)
+            except asyncio.CancelledError:
+                m2_cancelled.set()
+                raise
+
+    adapter._message_handler = handler
+
+    # Spawn M1.
+    await adapter.handle_message(_event("M1"))
+    await asyncio.wait_for(m1_started.wait(), timeout=1.0)
+
+    # Kick off shutdown.  This will cancel M1 and await its cleanup.
+    cancel_task = asyncio.create_task(adapter.cancel_background_tasks())
+
+    # Wait until M1's cleanup is running (inside the shielded sleep).
+    # This is the race window: cancel_task is awaiting gather, M1 is
+    # shielded in cleanup, the _active_sessions entry has been cleared
+    # by M1's own finally.
+    await asyncio.wait_for(m1_cleanup_running.wait(), timeout=1.0)
+
+    # Clear the active-session entry (M1's finally hasn't fully run yet,
+    # but in production the platform dispatcher would deliver a new
+    # message that takes the no-active-session spawn path).  For this
+    # repro, make it deterministic.
+    adapter._active_sessions.pop(sk, None)
+
+    # Inject late arrival — spawns a fresh _process_message_background
+    # task and adds it to _background_tasks while cancel_task is still
+    # in gather.
+    await adapter.handle_message(_event("M2"))
+    await asyncio.wait_for(m2_started.wait(), timeout=1.0)
+
+    # Let cancel_task finish.  Round 1's gather completes when M1's
+    # shielded cleanup finishes.  Round 2 should pick up M2.
+    await asyncio.wait_for(cancel_task, timeout=5.0)
+
+    # Assert M2 was drained, not leaked.
+    assert m2_cancelled.is_set(), (
+        "Late-arrival M2 was NOT cancelled by cancel_background_tasks — "
+        "the re-drain loop is missing and the task leaked"
+    )
+    assert adapter._background_tasks == set()
+
+
+@pytest.mark.asyncio
+async def test_cancel_background_tasks_handles_no_tasks():
+    """Regression guard: no tasks, no hang, no error."""
+    adapter = _make_adapter()
+    await adapter.cancel_background_tasks()
+    assert adapter._background_tasks == set()
+
+
+@pytest.mark.asyncio
+async def test_cancel_background_tasks_bounded_rounds():
+    """Regression guard: the drain loop is bounded — it does not spin
+    forever even if late-arrival tasks keep getting spawned."""
+    adapter = _make_adapter()
+
+    # Single well-behaved task that cancels cleanly — baseline check
+    # that the loop terminates in one round.
+    async def quick():
+        try:
+            await asyncio.sleep(10)
+        except asyncio.CancelledError:
+            raise
+
+    task = asyncio.create_task(quick())
+    adapter._background_tasks.add(task)
+
+    await adapter.cancel_background_tasks()
+    assert task.done()
+    assert adapter._background_tasks == set()
diff --git a/tests/gateway/test_complete_path_at_filter.py b/tests/gateway/test_complete_path_at_filter.py
new file mode 100644
index 0000000000..9e5031c0d9
--- /dev/null
+++ b/tests/gateway/test_complete_path_at_filter.py
@@ -0,0 +1,91 @@
+"""Regression tests for the TUI gateway's `complete.path` handler.
+
+Reported during the TUI v2 blitz retest: typing `@folder:` (and `@folder`
+with no colon yet) still surfaced files alongside directories in the
+TUI composer, because the gateway-side completion lives in
+`tui_gateway/server.py` and was never touched by the earlier fix to
+`hermes_cli/commands.py`.
+
+Covers:
+  - `@folder:` only yields directories
+  - `@file:` only yields regular files
+  - Bare `@folder` / `@file` (no colon) lists cwd directly
+  - Explicit prefix is preserved in the completion text
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from tui_gateway import server
+
+
+def _fixture(tmp_path: Path):
+    (tmp_path / "readme.md").write_text("x")
+    (tmp_path / ".env").write_text("x")
+    (tmp_path / "src").mkdir()
+    (tmp_path / "docs").mkdir()
+
+
+def _items(word: str):
+    resp = server.handle_request({"id": "1", "method": "complete.path", "params": {"word": word}})
+
+    return [(it["text"], it["display"], it.get("meta", "")) for it in resp["result"]["items"]]
+
+
+def test_at_folder_colon_only_dirs(tmp_path, monkeypatch):
+    monkeypatch.chdir(tmp_path)
+    _fixture(tmp_path)
+
+    texts = [t for t, _, _ in _items("@folder:")]
+
+    assert all(t.startswith("@folder:") for t in texts), texts
+    assert any(t == "@folder:src/" for t in texts)
+    assert any(t == "@folder:docs/" for t in texts)
+    assert not any(t == "@folder:readme.md" for t in texts)
+    assert not any(t == "@folder:.env" for t in texts)
+
+
+def test_at_file_colon_only_files(tmp_path, monkeypatch):
+    monkeypatch.chdir(tmp_path)
+    _fixture(tmp_path)
+
+    texts = [t for t, _, _ in _items("@file:")]
+
+    assert all(t.startswith("@file:") for t in texts), texts
+    assert any(t == "@file:readme.md" for t in texts)
+    assert not any(t == "@file:src/" for t in texts)
+    assert not any(t == "@file:docs/" for t in texts)
+
+
+def test_at_folder_bare_without_colon_lists_dirs(tmp_path, monkeypatch):
+    monkeypatch.chdir(tmp_path)
+    _fixture(tmp_path)
+
+    texts = [t for t, _, _ in _items("@folder")]
+
+    assert any(t == "@folder:src/" for t in texts), texts
+    assert any(t == "@folder:docs/" for t in texts), texts
+    assert not any(t == "@folder:readme.md" for t in texts)
+
+
+def test_at_file_bare_without_colon_lists_files(tmp_path, monkeypatch):
+    monkeypatch.chdir(tmp_path)
+    _fixture(tmp_path)
+
+    texts = [t for t, _, _ in _items("@file")]
+
+    assert any(t == "@file:readme.md" for t in texts), texts
+    assert not any(t == "@file:src/" for t in texts)
+
+
+def test_bare_at_still_shows_static_refs(tmp_path, monkeypatch):
+    """`@` alone should list the static references so users discover the
+    available prefixes.  (Unchanged behaviour; regression guard.)
+    """
+    monkeypatch.chdir(tmp_path)
+
+    texts = [t for t, _, _ in _items("@")]
+
+    for expected in ("@diff", "@staged", "@file:", "@folder:", "@url:", "@git:"):
+        assert expected in texts, f"missing static ref {expected!r} in {texts!r}"
diff --git a/tests/gateway/test_discord_channel_prompts.py b/tests/gateway/test_discord_channel_prompts.py
index 9c475bdede..e1efd734dc 100644
--- a/tests/gateway/test_discord_channel_prompts.py
+++ b/tests/gateway/test_discord_channel_prompts.py
@@ -75,7 +75,6 @@ def _make_runner():
     runner._service_tier = None
     runner._provider_routing = {}
     runner._fallback_model = None
-    runner._smart_model_routing = {}
     runner._running_agents = {}
     runner._pending_model_notes = {}
     runner._session_db = None
diff --git a/tests/gateway/test_discord_race_polish.py b/tests/gateway/test_discord_race_polish.py
new file mode 100644
index 0000000000..02c927e370
--- /dev/null
+++ b/tests/gateway/test_discord_race_polish.py
@@ -0,0 +1,79 @@
+"""Discord adapter race polish: concurrent join_voice_channel must not
+double-invoke channel.connect() on the same guild."""
+
+import asyncio
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from gateway.config import Platform, PlatformConfig
+
+
+def _make_adapter():
+    from gateway.platforms.discord import DiscordAdapter
+
+    adapter = object.__new__(DiscordAdapter)
+    adapter._platform = Platform.DISCORD
+    adapter.config = PlatformConfig(enabled=True, token="t")
+    adapter._ready_event = asyncio.Event()
+    adapter._allowed_user_ids = set()
+    adapter._allowed_role_ids = set()
+    adapter._voice_clients = {}
+    adapter._voice_locks = {}
+    adapter._voice_receivers = {}
+    adapter._voice_listen_tasks = {}
+    adapter._voice_timeout_tasks = {}
+    adapter._voice_text_channels = {}
+    adapter._voice_sources = {}
+    adapter._client = MagicMock()
+    return adapter
+
+
+@pytest.mark.asyncio
+async def test_concurrent_joins_do_not_double_connect():
+    """Two concurrent join_voice_channel calls on the same guild must
+    serialize through the per-guild lock — only ONE channel.connect()
+    actually fires; the second sees the _voice_clients entry the first
+    just installed."""
+    adapter = _make_adapter()
+
+    connect_count = [0]
+    release = asyncio.Event()
+
+    class FakeVC:
+        def __init__(self, channel):
+            self.channel = channel
+
+        def is_connected(self):
+            return True
+
+        async def move_to(self, _channel):
+            return None
+
+    async def slow_connect(self):
+        connect_count[0] += 1
+        await release.wait()
+        return FakeVC(self)
+
+    channel = MagicMock()
+    channel.id = 111
+    channel.guild.id = 42
+    channel.connect = lambda: slow_connect(channel)
+
+    from gateway.platforms import discord as discord_mod
+    with patch.object(discord_mod, "VoiceReceiver",
+                      MagicMock(return_value=MagicMock(start=lambda: None))):
+        with patch.object(discord_mod.asyncio, "ensure_future",
+                          lambda _c: asyncio.create_task(asyncio.sleep(0))):
+            t1 = asyncio.create_task(adapter.join_voice_channel(channel))
+            t2 = asyncio.create_task(adapter.join_voice_channel(channel))
+            await asyncio.sleep(0.05)
+            release.set()
+            r1, r2 = await asyncio.gather(t1, t2)
+
+    assert connect_count[0] == 1, (
+        f"expected 1 channel.connect() call, got {connect_count[0]} — "
+        "per-guild lock is not serializing join_voice_channel"
+    )
+    assert r1 is True and r2 is True
+    assert 42 in adapter._voice_clients
diff --git a/tests/gateway/test_dm_topics.py b/tests/gateway/test_dm_topics.py
index 69e9629b23..39cabd950a 100644
--- a/tests/gateway/test_dm_topics.py
+++ b/tests/gateway/test_dm_topics.py
@@ -283,6 +283,48 @@ def test_persist_dm_topic_thread_id_skips_if_already_set(tmp_path):
 # ── _get_dm_topic_info ──
 
 
+def test_persist_dm_topic_thread_id_preserves_config_on_write_failure(tmp_path):
+    """Failed writes should leave the original config.yaml intact."""
+    import yaml
+
+    config_data = {
+        "platforms": {
+            "telegram": {
+                "extra": {
+                    "dm_topics": [
+                        {
+                            "chat_id": 111,
+                            "topics": [
+                                {"name": "General", "icon_color": 123},
+                            ],
+                        }
+                    ]
+                }
+            }
+        }
+    }
+
+    config_file = tmp_path / ".hermes" / "config.yaml"
+    config_file.parent.mkdir(parents=True)
+    original_text = yaml.dump(config_data)
+    config_file.write_text(original_text, encoding="utf-8")
+
+    adapter = _make_adapter()
+
+    def fail_dump(*args, **kwargs):
+        raise RuntimeError("boom")
+
+    with patch.object(Path, "home", return_value=tmp_path), \
+         patch.dict(os.environ, {"HERMES_HOME": str(tmp_path / ".hermes")}), \
+         patch("yaml.dump", side_effect=fail_dump):
+        adapter._persist_dm_topic_thread_id(111, "General", 999)
+
+    assert config_file.read_text(encoding="utf-8") == original_text
+    result = yaml.safe_load(config_file.read_text(encoding="utf-8"))
+    topics = result["platforms"]["telegram"]["extra"]["dm_topics"][0]["topics"]
+    assert "thread_id" not in topics[0]
+
+
 def test_get_dm_topic_info_finds_cached_topic():
     """Should return topic config when thread_id is in cache."""
     adapter = _make_adapter([
diff --git a/tests/gateway/test_fast_command.py b/tests/gateway/test_fast_command.py
index dc869ea17f..82cc4fc649 100644
--- a/tests/gateway/test_fast_command.py
+++ b/tests/gateway/test_fast_command.py
@@ -4,7 +4,7 @@ import sys
 import threading
 import types
 from types import SimpleNamespace
-from unittest.mock import AsyncMock, patch
+from unittest.mock import AsyncMock
 
 import pytest
 import yaml
@@ -53,7 +53,6 @@ def _make_runner():
     runner._service_tier = None
     runner._provider_routing = {}
     runner._fallback_model = None
-    runner._smart_model_routing = {}
     runner._running_agents = {}
     runner._pending_model_notes = {}
     runner._session_db = None
@@ -97,13 +96,7 @@ def test_turn_route_injects_priority_processing_without_changing_runtime():
         "credential_pool": None,
     }
 
-    with patch("agent.smart_model_routing.resolve_turn_route", return_value={
-        "model": "gpt-5.4",
-        "runtime": dict(runtime_kwargs),
-        "label": None,
-        "signature": ("gpt-5.4", "openrouter", "https://openrouter.ai/api/v1", "chat_completions", None, ()),
-    }):
-        route = gateway_run.GatewayRunner._resolve_turn_agent_config(runner, "hi", "gpt-5.4", runtime_kwargs)
+    route = gateway_run.GatewayRunner._resolve_turn_agent_config(runner, "hi", "gpt-5.4", runtime_kwargs)
 
     assert route["runtime"]["provider"] == "openrouter"
     assert route["runtime"]["api_mode"] == "chat_completions"
@@ -123,13 +116,7 @@ def test_turn_route_skips_priority_processing_for_unsupported_models():
         "credential_pool": None,
     }
 
-    with patch("agent.smart_model_routing.resolve_turn_route", return_value={
-        "model": "gpt-5.3-codex",
-        "runtime": dict(runtime_kwargs),
-        "label": None,
-        "signature": ("gpt-5.3-codex", "openrouter", "https://openrouter.ai/api/v1", "chat_completions", None, ()),
-    }):
-        route = gateway_run.GatewayRunner._resolve_turn_agent_config(runner, "hi", "gpt-5.3-codex", runtime_kwargs)
+    route = gateway_run.GatewayRunner._resolve_turn_agent_config(runner, "hi", "gpt-5.3-codex", runtime_kwargs)
 
     assert route["request_overrides"] is None
 
diff --git a/tests/gateway/test_feishu.py b/tests/gateway/test_feishu.py
index 661e37ec1a..1813eb31f5 100644
--- a/tests/gateway/test_feishu.py
+++ b/tests/gateway/test_feishu.py
@@ -10,6 +10,8 @@ from pathlib import Path
 from types import SimpleNamespace
 from unittest.mock import AsyncMock, Mock, patch
 
+from gateway.platforms.base import ProcessingOutcome
+
 try:
     import lark_oapi
     _HAS_LARK_OAPI = True
@@ -638,83 +640,54 @@ class TestAdapterBehavior(unittest.TestCase):
         )
 
     @patch.dict(os.environ, {}, clear=True)
-    @unittest.skipUnless(_HAS_LARK_OAPI, "lark-oapi not installed")
-    def test_add_ack_reaction_uses_ok_emoji(self):
-        from gateway.config import PlatformConfig
-        from gateway.platforms.feishu import FeishuAdapter
-
-        adapter = FeishuAdapter(PlatformConfig())
-        captured = {}
-
-        class _ReactionAPI:
-            def create(self, request):
-                captured["request"] = request
-                return SimpleNamespace(
-                    success=lambda: True,
-                    data=SimpleNamespace(reaction_id="r_typing"),
-                )
-
-        adapter._client = SimpleNamespace(
-            im=SimpleNamespace(v1=SimpleNamespace(message_reaction=_ReactionAPI()))
-        )
-
-        async def _direct(func, *args, **kwargs):
-            return func(*args, **kwargs)
-
-        with patch("gateway.platforms.feishu.asyncio.to_thread", side_effect=_direct):
-            reaction_id = asyncio.run(adapter._add_ack_reaction("om_msg"))
-
-        self.assertEqual(reaction_id, "r_typing")
-        self.assertEqual(captured["request"].request_body.reaction_type["emoji_type"], "OK")
-
-    @patch.dict(os.environ, {}, clear=True)
-    def test_add_ack_reaction_logs_warning_on_failure(self):
-        from gateway.config import PlatformConfig
-        from gateway.platforms.feishu import FeishuAdapter
-
-        adapter = FeishuAdapter(PlatformConfig())
-
-        class _ReactionAPI:
-            def create(self, request):
-                raise RuntimeError("boom")
-
-        adapter._client = SimpleNamespace(
-            im=SimpleNamespace(v1=SimpleNamespace(message_reaction=_ReactionAPI()))
-        )
-
-        async def _direct(func, *args, **kwargs):
-            return func(*args, **kwargs)
-
-        with (
-            patch("gateway.platforms.feishu.asyncio.to_thread", side_effect=_direct),
-            self.assertLogs("gateway.platforms.feishu", level="WARNING") as logs,
-        ):
-            reaction_id = asyncio.run(adapter._add_ack_reaction("om_msg"))
-
-        self.assertIsNone(reaction_id)
-        self.assertTrue(
-            any("Failed to add ack reaction to om_msg" in entry for entry in logs.output),
-            logs.output,
-        )
-
-    @patch.dict(os.environ, {}, clear=True)
-    def test_ack_reaction_events_are_ignored_to_avoid_feedback_loops(self):
+    def test_bot_origin_reactions_are_dropped_to_avoid_feedback_loops(self):
         from gateway.config import PlatformConfig
         from gateway.platforms.feishu import FeishuAdapter
 
         adapter = FeishuAdapter(PlatformConfig())
         adapter._loop = object()
+
+        for emoji in ("Typing", "CrossMark"):
+            event = SimpleNamespace(
+                message_id="om_msg",
+                operator_type="bot",
+                reaction_type=SimpleNamespace(emoji_type=emoji),
+            )
+            data = SimpleNamespace(event=event)
+            with patch(
+                "gateway.platforms.feishu.asyncio.run_coroutine_threadsafe"
+            ) as run_threadsafe:
+                adapter._on_reaction_event("im.message.reaction.created_v1", data)
+            run_threadsafe.assert_not_called()
+
+    @patch.dict(os.environ, {}, clear=True)
+    def test_user_reaction_with_managed_emoji_is_still_routed(self):
+        # Operator-origin filter is enough to prevent feedback loops; we must
+        # not additionally swallow user-origin reactions just because their
+        # emoji happens to collide with a lifecycle emoji.
+        from gateway.config import PlatformConfig
+        from gateway.platforms.feishu import FeishuAdapter
+
+        adapter = FeishuAdapter(PlatformConfig())
+        adapter._loop = SimpleNamespace(is_closed=lambda: False)
+
         event = SimpleNamespace(
             message_id="om_msg",
             operator_type="user",
-            reaction_type=SimpleNamespace(emoji_type="OK"),
+            reaction_type=SimpleNamespace(emoji_type="Typing"),
         )
         data = SimpleNamespace(event=event)
 
-        with patch("gateway.platforms.feishu.asyncio.run_coroutine_threadsafe") as run_threadsafe:
-            adapter._on_reaction_event("im.message.reaction.created_v1", data)
+        def _close_coro_and_return_future(coro, _loop):
+            coro.close()
+            return SimpleNamespace(add_done_callback=lambda _: None)
 
-        run_threadsafe.assert_not_called()
+        with patch(
+            "gateway.platforms.feishu.asyncio.run_coroutine_threadsafe",
+            side_effect=_close_coro_and_return_future,
+        ) as run_threadsafe:
+            adapter._on_reaction_event("im.message.reaction.created_v1", data)
+        run_threadsafe.assert_called_once()
 
     @patch.dict(os.environ, {"FEISHU_GROUP_POLICY": "open"}, clear=True)
     def test_group_message_requires_mentions_even_when_policy_open(self):
@@ -743,6 +716,57 @@ class TestAdapterBehavior(unittest.TestCase):
 
         self.assertFalse(adapter._should_accept_group_message(SimpleNamespace(mentions=[other_mention]), sender_id, ""))
 
+    @patch.dict(
+        os.environ,
+        {
+            "FEISHU_BOT_OPEN_ID": "ou_hermes",
+            "FEISHU_BOT_USER_ID": "u_hermes",
+        },
+        clear=True,
+    )
+    def test_other_bot_sender_is_not_treated_as_self_sent_message(self):
+        from gateway.config import PlatformConfig
+        from gateway.platforms.feishu import FeishuAdapter
+
+        adapter = FeishuAdapter(PlatformConfig())
+        event = SimpleNamespace(
+            sender=SimpleNamespace(
+                sender_type="bot",
+                sender_id=SimpleNamespace(open_id="ou_other_bot", user_id="u_other_bot"),
+            )
+        )
+
+        self.assertFalse(adapter._is_self_sent_bot_message(event))
+
+    @patch.dict(
+        os.environ,
+        {
+            "FEISHU_BOT_OPEN_ID": "ou_hermes",
+            "FEISHU_BOT_USER_ID": "u_hermes",
+        },
+        clear=True,
+    )
+    def test_self_bot_sender_is_treated_as_self_sent_message(self):
+        from gateway.config import PlatformConfig
+        from gateway.platforms.feishu import FeishuAdapter
+
+        adapter = FeishuAdapter(PlatformConfig())
+        by_open_id = SimpleNamespace(
+            sender=SimpleNamespace(
+                sender_type="bot",
+                sender_id=SimpleNamespace(open_id="ou_hermes", user_id="u_other"),
+            )
+        )
+        by_user_id = SimpleNamespace(
+            sender=SimpleNamespace(
+                sender_type="app",
+                sender_id=SimpleNamespace(open_id="ou_other", user_id="u_hermes"),
+            )
+        )
+
+        self.assertTrue(adapter._is_self_sent_bot_message(by_open_id))
+        self.assertTrue(adapter._is_self_sent_bot_message(by_user_id))
+
     @patch.dict(
         os.environ,
         {
@@ -2370,6 +2394,134 @@ class TestAdapterBehavior(unittest.TestCase):
         elements = payload["zh_cn"]["content"][0]
         self.assertEqual(elements, [{"tag": "md", "text": "可以用 **粗体** 和 *斜体*。"}])
 
+    @patch.dict(os.environ, {}, clear=True)
+    def test_send_splits_fenced_code_blocks_into_separate_post_rows(self):
+        from gateway.config import PlatformConfig
+        from gateway.platforms.feishu import FeishuAdapter
+
+        adapter = FeishuAdapter(PlatformConfig())
+        captured = {}
+
+        class _MessageAPI:
+            def create(self, request):
+                captured["request"] = request
+                return SimpleNamespace(
+                    success=lambda: True,
+                    data=SimpleNamespace(message_id="om_codeblock"),
+                )
+
+        adapter._client = SimpleNamespace(
+            im=SimpleNamespace(
+                v1=SimpleNamespace(
+                    message=_MessageAPI(),
+                )
+            )
+        )
+
+        async def _direct(func, *args, **kwargs):
+            return func(*args, **kwargs)
+
+        content = (
+            "确认已入库 ✓\n"
+            "文件路径：`/root/.hermes/profiles/agent_cto/cron/jobs.json`\n"
+            "**解码后的内容：**\n"
+            "```json\n"
+            '{"cron": "list"}\n'
+            "```\n"
+            "后续说明仍应保留。"
+        )
+
+        with patch("gateway.platforms.feishu.asyncio.to_thread", side_effect=_direct):
+            result = asyncio.run(
+                adapter.send(
+                    chat_id="oc_chat",
+                    content=content,
+                )
+            )
+
+        self.assertTrue(result.success)
+        self.assertEqual(captured["request"].request_body.msg_type, "post")
+        payload = json.loads(captured["request"].request_body.content)
+        rows = payload["zh_cn"]["content"]
+        self.assertEqual(
+            rows,
+            [
+                [
+                    {
+                        "tag": "md",
+                        "text": "确认已入库 ✓\n文件路径：`/root/.hermes/profiles/agent_cto/cron/jobs.json`\n**解码后的内容：**",
+                    }
+                ],
+                [{"tag": "md", "text": "```json\n{\"cron\": \"list\"}\n```"}],
+                [{"tag": "md", "text": "后续说明仍应保留。"}],
+            ],
+        )
+
+    @patch.dict(os.environ, {}, clear=True)
+    def test_build_post_payload_keeps_fence_like_code_lines_inside_code_block(self):
+        from gateway.config import PlatformConfig
+        from gateway.platforms.feishu import FeishuAdapter
+
+        adapter = FeishuAdapter(PlatformConfig())
+        payload = json.loads(
+            adapter._build_post_payload(
+                "before\n```python\n```oops\n```\nafter"
+            )
+        )
+
+        self.assertEqual(
+            payload["zh_cn"]["content"],
+            [
+                [{"tag": "md", "text": "before"}],
+                [{"tag": "md", "text": "```python\n```oops\n```"}],
+                [{"tag": "md", "text": "after"}],
+            ],
+        )
+
+    @patch.dict(os.environ, {}, clear=True)
+    def test_build_post_payload_preserves_trailing_spaces_in_code_block(self):
+        from gateway.config import PlatformConfig
+        from gateway.platforms.feishu import FeishuAdapter
+
+        adapter = FeishuAdapter(PlatformConfig())
+        payload = json.loads(
+            adapter._build_post_payload(
+                "before\n```python\nline with two spaces  \n```\nafter"
+            )
+        )
+
+        self.assertEqual(
+            payload["zh_cn"]["content"],
+            [
+                [{"tag": "md", "text": "before"}],
+                [{"tag": "md", "text": "```python\nline with two spaces  \n```"}],
+                [{"tag": "md", "text": "after"}],
+            ],
+        )
+
+    @patch.dict(os.environ, {}, clear=True)
+    def test_build_post_payload_splits_multiple_fenced_code_blocks(self):
+        from gateway.config import PlatformConfig
+        from gateway.platforms.feishu import FeishuAdapter
+
+        adapter = FeishuAdapter(PlatformConfig())
+        payload = json.loads(
+            adapter._build_post_payload(
+                "before\n```python\nprint(1)\n```\nmiddle\n```json\n{}\n```\nafter"
+            )
+        )
+
+        self.assertEqual(
+            payload["zh_cn"]["content"],
+            [
+                [{"tag": "md", "text": "before"}],
+                [{"tag": "md", "text": "```python\nprint(1)\n```"}],
+                [{"tag": "md", "text": "middle"}],
+                [{"tag": "md", "text": "```json\n{}\n```"}],
+                [{"tag": "md", "text": "after"}],
+            ],
+        )
+
     @patch.dict(os.environ, {}, clear=True)
     def test_send_falls_back_to_text_when_post_payload_is_rejected(self):
         from gateway.config import PlatformConfig
@@ -2505,6 +2657,135 @@ class TestAdapterBehavior(unittest.TestCase):
         )
 
 
+@unittest.skipUnless(_HAS_LARK_OAPI, "lark-oapi not installed")
+class TestHydrateBotIdentity(unittest.TestCase):
+    """Hydration of bot identity via /open-apis/bot/v3/info and application info.
+
+    Covers the manual-setup path where FEISHU_BOT_OPEN_ID / FEISHU_BOT_USER_ID
+    are not configured. Hydration must populate _bot_open_id so that
+    _is_self_sent_bot_message() can filter the adapter's own outbound echoes.
+    """
+
+    def _make_adapter(self):
+        from gateway.config import PlatformConfig
+        from gateway.platforms.feishu import FeishuAdapter
+
+        return FeishuAdapter(PlatformConfig())
+
+    @patch.dict(os.environ, {}, clear=True)
+    def test_hydration_populates_open_id_from_bot_info(self):
+        adapter = self._make_adapter()
+        adapter._client = Mock()
+        payload = json.dumps(
+            {
+                "code": 0,
+                "bot": {
+                    "bot_name": "Hermes Bot",
+                    "open_id": "ou_hermes_hydrated",
+                },
+            }
+        ).encode("utf-8")
+        response = SimpleNamespace(content=payload)
+        adapter._client.request = Mock(return_value=response)
+
+        asyncio.run(adapter._hydrate_bot_identity())
+
+        self.assertEqual(adapter._bot_open_id, "ou_hermes_hydrated")
+        self.assertEqual(adapter._bot_name, "Hermes Bot")
+        # Application-info fallback must NOT run when bot_name is already set.
+        self.assertFalse(
+            adapter._client.application.v6.application.get.called
+            if hasattr(adapter._client, "application") else False
+        )
+
+    @patch.dict(
+        os.environ,
+        {
+            "FEISHU_BOT_OPEN_ID": "ou_env",
+            "FEISHU_BOT_NAME": "Env Hermes",
+        },
+        clear=True,
+    )
+    def test_hydration_skipped_when_env_vars_supply_both_fields(self):
+        adapter = self._make_adapter()
+        adapter._client = Mock()
+        adapter._client.request = Mock()
+
+        asyncio.run(adapter._hydrate_bot_identity())
+
+        # Neither probe should run — both fields are already populated.
+        adapter._client.request.assert_not_called()
+        self.assertEqual(adapter._bot_open_id, "ou_env")
+        self.assertEqual(adapter._bot_name, "Env Hermes")
+
+    @patch.dict(os.environ, {"FEISHU_BOT_OPEN_ID": "ou_env"}, clear=True)
+    def test_hydration_fills_only_missing_fields(self):
+        """Env-var open_id must NOT be overwritten by a different probe value."""
+        adapter = self._make_adapter()
+        adapter._client = Mock()
+        payload = json.dumps(
+            {
+                "code": 0,
+                "bot": {
+                    "bot_name": "Hermes Bot",
+                    "open_id": "ou_probe_DIFFERENT",
+                },
+            }
+        ).encode("utf-8")
+        adapter._client.request = Mock(return_value=SimpleNamespace(content=payload))
+
+        asyncio.run(adapter._hydrate_bot_identity())
+
+        self.assertEqual(adapter._bot_open_id, "ou_env")  # preserved
+        self.assertEqual(adapter._bot_name, "Hermes Bot")  # filled in
+
+    @patch.dict(os.environ, {}, clear=True)
+    def test_hydration_tolerates_probe_failure_and_falls_back_to_app_info(self):
+        adapter = self._make_adapter()
+        adapter._client = Mock()
+        adapter._client.request = Mock(side_effect=RuntimeError("network down"))
+
+        # Make the application-info fallback succeed for _bot_name.
+        app_response = Mock()
+        app_response.success = Mock(return_value=True)
+        app_response.data = SimpleNamespace(app=SimpleNamespace(app_name="Fallback Bot"))
+        adapter._client.application.v6.application.get = Mock(return_value=app_response)
+        adapter._build_get_application_request = Mock(return_value=object())
+
+        asyncio.run(adapter._hydrate_bot_identity())
+
+        # Primary probe failed — open_id stays empty, but bot_name came from app-info.
+        self.assertEqual(adapter._bot_open_id, "")
+        self.assertEqual(adapter._bot_name, "Fallback Bot")
+
+    @patch.dict(os.environ, {}, clear=True)
+    def test_hydrated_open_id_enables_self_send_filter(self):
+        """E2E: after hydration, _is_self_sent_bot_message() rejects adapter's own id."""
+        adapter = self._make_adapter()
+        adapter._client = Mock()
+        payload = json.dumps(
+            {"code": 0, "bot": {"bot_name": "Hermes", "open_id": "ou_hermes"}}
+        ).encode("utf-8")
+        adapter._client.request = Mock(return_value=SimpleNamespace(content=payload))
+
+        asyncio.run(adapter._hydrate_bot_identity())
+
+        self_event = SimpleNamespace(
+            sender=SimpleNamespace(
+                sender_type="bot",
+                sender_id=SimpleNamespace(open_id="ou_hermes", user_id=""),
+            )
+        )
+        peer_event = SimpleNamespace(
+            sender=SimpleNamespace(
+                sender_type="bot",
+                sender_id=SimpleNamespace(open_id="ou_peer_bot", user_id=""),
+            )
+        )
+        self.assertTrue(adapter._is_self_sent_bot_message(self_event))
+        self.assertFalse(adapter._is_self_sent_bot_message(peer_event))
+
+
 @unittest.skipUnless(_HAS_LARK_OAPI, "lark-oapi not installed")
 class TestPendingInboundQueue(unittest.TestCase):
     """Tests for the loop-not-ready race (#5499): inbound events arriving
@@ -2970,3 +3251,231 @@ class TestSenderNameResolution(unittest.TestCase):
             result = asyncio.run(adapter._resolve_sender_name_from_api("ou_broken"))
 
         self.assertIsNone(result)
+
+
+@unittest.skipUnless(_HAS_LARK_OAPI, "lark-oapi not installed")
+class TestProcessingReactions(unittest.TestCase):
+    """Typing on start → removed on SUCCESS, swapped for CrossMark on FAILURE,
+    removed (no replacement) on CANCELLED."""
+
+    @staticmethod
+    def _run(coro):
+        return asyncio.run(coro)
+
+    def _build_adapter(
+        self,
+        create_success: bool = True,
+        delete_success: bool = True,
+        next_reaction_id: str = "r1",
+    ):
+        from gateway.config import PlatformConfig
+        from gateway.platforms.feishu import FeishuAdapter
+
+        adapter = FeishuAdapter(PlatformConfig())
+        tracker = SimpleNamespace(
+            create_calls=[],
+            delete_calls=[],
+            next_reaction_id=next_reaction_id,
+            create_success=create_success,
+            delete_success=delete_success,
+        )
+
+        def _create(request):
+            tracker.create_calls.append(
+                request.request_body.reaction_type["emoji_type"]
+            )
+            if tracker.create_success:
+                return SimpleNamespace(
+                    success=lambda: True,
+                    data=SimpleNamespace(reaction_id=tracker.next_reaction_id),
+                )
+            return SimpleNamespace(
+                success=lambda: False, code=99, msg="rejected", data=None,
+            )
+
+        def _delete(request):
+            tracker.delete_calls.append(request.reaction_id)
+            return SimpleNamespace(
+                success=lambda: tracker.delete_success,
+                code=0 if tracker.delete_success else 99,
+                msg="success" if tracker.delete_success else "rejected",
+            )
+
+        adapter._client = SimpleNamespace(
+            im=SimpleNamespace(
+                v1=SimpleNamespace(
+                    message_reaction=SimpleNamespace(create=_create, delete=_delete),
+                ),
+            ),
+        )
+        return adapter, tracker
+
+    @staticmethod
+    def _event(message_id: str = "om_msg"):
+        return SimpleNamespace(message_id=message_id)
+
+    def _patch_to_thread(self):
+        async def _direct(func, *args, **kwargs):
+            return func(*args, **kwargs)
+
+        return patch("gateway.platforms.feishu.asyncio.to_thread", side_effect=_direct)
+
+    # ------------------------------------------------------------------ start
+    @patch.dict(os.environ, {}, clear=True)
+    def test_start_adds_typing_and_caches_reaction_id(self):
+        adapter, tracker = self._build_adapter(next_reaction_id="r_typing")
+        with self._patch_to_thread():
+            self._run(adapter.on_processing_start(self._event()))
+        self.assertEqual(tracker.create_calls, ["Typing"])
+        self.assertEqual(adapter._pending_processing_reactions["om_msg"], "r_typing")
+
+    @patch.dict(os.environ, {}, clear=True)
+    def test_start_is_idempotent_for_same_message_id(self):
+        adapter, tracker = self._build_adapter(next_reaction_id="r_typing")
+        with self._patch_to_thread():
+            self._run(adapter.on_processing_start(self._event()))
+            self._run(adapter.on_processing_start(self._event()))
+        self.assertEqual(tracker.create_calls, ["Typing"])
+
+    @patch.dict(os.environ, {}, clear=True)
+    def test_start_does_not_cache_when_create_fails(self):
+        adapter, tracker = self._build_adapter(create_success=False)
+        with self._patch_to_thread():
+            self._run(adapter.on_processing_start(self._event()))
+        self.assertEqual(tracker.create_calls, ["Typing"])
+        self.assertNotIn("om_msg", adapter._pending_processing_reactions)
+
+    # --------------------------------------------------------------- complete
+    @patch.dict(os.environ, {}, clear=True)
+    def test_success_removes_typing_and_adds_nothing(self):
+        adapter, tracker = self._build_adapter(next_reaction_id="r_typing")
+        with self._patch_to_thread():
+            self._run(adapter.on_processing_start(self._event()))
+            self._run(
+                adapter.on_processing_complete(self._event(), ProcessingOutcome.SUCCESS)
+            )
+        self.assertEqual(tracker.create_calls, ["Typing"])
+        self.assertEqual(tracker.delete_calls, ["r_typing"])
+        self.assertNotIn("om_msg", adapter._pending_processing_reactions)
+
+    @patch.dict(os.environ, {}, clear=True)
+    def test_failure_removes_typing_then_adds_cross_mark(self):
+        adapter, tracker = self._build_adapter(next_reaction_id="r_typing")
+        with self._patch_to_thread():
+            self._run(adapter.on_processing_start(self._event()))
+            self._run(
+                adapter.on_processing_complete(self._event(), ProcessingOutcome.FAILURE)
+            )
+        self.assertEqual(tracker.create_calls, ["Typing", "CrossMark"])
+        self.assertEqual(tracker.delete_calls, ["r_typing"])
+
+    @patch.dict(os.environ, {}, clear=True)
+    def test_cancelled_removes_typing_and_adds_nothing(self):
+        adapter, tracker = self._build_adapter(next_reaction_id="r_typing")
+        with self._patch_to_thread():
+            self._run(adapter.on_processing_start(self._event()))
+            self._run(
+                adapter.on_processing_complete(self._event(), ProcessingOutcome.CANCELLED)
+            )
+        self.assertEqual(tracker.create_calls, ["Typing"])
+        self.assertEqual(tracker.delete_calls, ["r_typing"])
+        self.assertNotIn("om_msg", adapter._pending_processing_reactions)
+
+    @patch.dict(os.environ, {}, clear=True)
+    def test_failure_without_preceding_start_still_adds_cross_mark(self):
+        adapter, tracker = self._build_adapter()
+        with self._patch_to_thread():
+            self._run(
+                adapter.on_processing_complete(self._event(), ProcessingOutcome.FAILURE)
+            )
+        self.assertEqual(tracker.create_calls, ["CrossMark"])
+        self.assertEqual(tracker.delete_calls, [])
+
+    @patch.dict(os.environ, {}, clear=True)
+    def test_success_without_preceding_start_is_full_noop(self):
+        adapter, tracker = self._build_adapter()
+        with self._patch_to_thread():
+            self._run(
+                adapter.on_processing_complete(self._event(), ProcessingOutcome.SUCCESS)
+            )
+        self.assertEqual(tracker.create_calls, [])
+        self.assertEqual(tracker.delete_calls, [])
+
+    # ------------------------- delete failure: don't stack badges -----------
+    @patch.dict(os.environ, {}, clear=True)
+    def test_delete_failure_on_failure_outcome_skips_cross_mark(self):
+        # Removing Typing is best-effort — but if it fails, we must NOT
+        # additionally add CrossMark, or the UI would show two contradictory
+        # badges. The handle stays in the cache for LRU to clean up later.
+        adapter, tracker = self._build_adapter(
+            next_reaction_id="r_typing", delete_success=False,
+        )
+        with self._patch_to_thread():
+            self._run(adapter.on_processing_start(self._event()))
+            self._run(
+                adapter.on_processing_complete(self._event(), ProcessingOutcome.FAILURE)
+            )
+        self.assertEqual(tracker.create_calls, ["Typing"])  # CrossMark NOT added
+        self.assertEqual(tracker.delete_calls, ["r_typing"])  # delete was attempted
+        self.assertEqual(
+            adapter._pending_processing_reactions["om_msg"], "r_typing",
+        )  # handle retained
+
+    @patch.dict(os.environ, {}, clear=True)
+    def test_delete_failure_on_success_outcome_retains_handle(self):
+        adapter, tracker = self._build_adapter(
+            next_reaction_id="r_typing", delete_success=False,
+        )
+        with self._patch_to_thread():
+            self._run(adapter.on_processing_start(self._event()))
+            self._run(
+                adapter.on_processing_complete(self._event(), ProcessingOutcome.SUCCESS)
+            )
+        self.assertEqual(tracker.create_calls, ["Typing"])
+        self.assertEqual(tracker.delete_calls, ["r_typing"])
+        self.assertEqual(
+            adapter._pending_processing_reactions["om_msg"], "r_typing",
+        )
+
+    # ------------------------------------------------------------- env toggle
+    @patch.dict(os.environ, {"FEISHU_REACTIONS": "false"}, clear=True)
+    def test_env_disable_short_circuits_both_hooks(self):
+        adapter, tracker = self._build_adapter()
+        with self._patch_to_thread():
+            self._run(adapter.on_processing_start(self._event()))
+            self._run(
+                adapter.on_processing_complete(self._event(), ProcessingOutcome.FAILURE)
+            )
+        self.assertEqual(tracker.create_calls, [])
+        self.assertEqual(tracker.delete_calls, [])
+
+    # ------------------------------------------------------------- LRU bounds
+    @patch.dict(os.environ, {}, clear=True)
+    def test_cache_evicts_oldest_entry_beyond_size_limit(self):
+        from gateway.platforms.feishu import _FEISHU_PROCESSING_REACTION_CACHE_SIZE
+
+        adapter, _ = self._build_adapter()
+        counter = {"n": 0}
+
+        def _create(_request):
+            counter["n"] += 1
+            return SimpleNamespace(
+                success=lambda: True,
+                data=SimpleNamespace(reaction_id=f"r{counter['n']}"),
+            )
+
+        adapter._client.im.v1.message_reaction.create = _create
+
+        with self._patch_to_thread():
+            for i in range(_FEISHU_PROCESSING_REACTION_CACHE_SIZE + 1):
+                self._run(adapter.on_processing_start(self._event(f"om_{i}")))
+
+        self.assertNotIn("om_0", adapter._pending_processing_reactions)
+        self.assertIn(
+            f"om_{_FEISHU_PROCESSING_REACTION_CACHE_SIZE}",
+            adapter._pending_processing_reactions,
+        )
+        self.assertEqual(
+            len(adapter._pending_processing_reactions),
+            _FEISHU_PROCESSING_REACTION_CACHE_SIZE,
+        )
diff --git a/tests/gateway/test_internal_event_bypass_pairing.py b/tests/gateway/test_internal_event_bypass_pairing.py
index d10195b2d5..8878842538 100644
--- a/tests/gateway/test_internal_event_bypass_pairing.py
+++ b/tests/gateway/test_internal_event_bypass_pairing.py
@@ -355,8 +355,17 @@ async def test_none_user_id_does_not_generate_pairing_code(monkeypatch, tmp_path
 async def test_non_internal_event_without_user_triggers_pairing(monkeypatch, tmp_path):
     """Verify the normal (non-internal) path still triggers pairing for unknown users."""
     import gateway.run as gateway_run
+    import gateway.pairing as pairing_mod
 
     monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+    # gateway.pairing.PAIRING_DIR is a module-level constant captured at
+    # import time from whichever HERMES_HOME was set then. Per-test
+    # HERMES_HOME redirection in conftest doesn't retroactively move it.
+    # Override directly so pairing rate-limit state lives in this test's
+    # tmp_path (and so stale state from prior xdist workers can't leak in).
+    pairing_dir = tmp_path / "pairing"
+    pairing_dir.mkdir()
+    monkeypatch.setattr(pairing_mod, "PAIRING_DIR", pairing_dir)
     (tmp_path / "config.yaml").write_text("", encoding="utf-8")
 
     # Clear env vars that could let all users through (loaded by
diff --git a/tests/gateway/test_pending_event_none.py b/tests/gateway/test_pending_event_none.py
index b2e1356fa1..e717c88296 100644
--- a/tests/gateway/test_pending_event_none.py
+++ b/tests/gateway/test_pending_event_none.py
@@ -1,13 +1,18 @@
-"""Tests for the pending_event None guard in recursive _run_agent calls.
+"""Tests for pending follow-up extraction in recursive _run_agent calls.
 
 When pending_event is None (Path B: pending comes from interrupt_message),
 accessing pending_event.channel_prompt previously raised AttributeError.
 This verifies the fix: channel_prompt is captured inside the
 `if pending_event is not None:` block and falls back to None otherwise.
+
+Also verifies that internal control interrupt reasons like "Stop requested"
+do not get recycled into the pending-user-message follow-up path.
 """
 
 from types import SimpleNamespace
 
+from gateway.run import _is_control_interrupt_message
+
 
 def _extract_channel_prompt(pending_event):
     """Reproduce the fixed logic from gateway/run.py.
@@ -21,6 +26,15 @@ def _extract_channel_prompt(pending_event):
     return next_channel_prompt
 
 
+def _extract_pending_text(interrupted, pending_event, interrupt_message):
+    """Reproduce the fixed pending-text selection from gateway/run.py."""
+    if interrupted and pending_event is None and interrupt_message:
+        if _is_control_interrupt_message(interrupt_message):
+            return None
+        return interrupt_message
+    return None
+
+
 class TestPendingEventNoneChannelPrompt:
     """Guard against AttributeError when pending_event is None."""
 
@@ -40,3 +54,19 @@ class TestPendingEventNoneChannelPrompt:
         event = SimpleNamespace()
         result = _extract_channel_prompt(event)
         assert result is None
+
+
+class TestControlInterruptMessages:
+    """Control interrupt reasons must not become follow-up user input."""
+
+    def test_stop_requested_is_not_treated_as_pending_user_message(self):
+        result = _extract_pending_text(True, None, "Stop requested")
+        assert result is None
+
+    def test_session_reset_requested_is_not_treated_as_pending_user_message(self):
+        result = _extract_pending_text(True, None, "Session reset requested")
+        assert result is None
+
+    def test_real_user_interrupt_message_still_requeues(self):
+        result = _extract_pending_text(True, None, "actually use postgres instead")
+        assert result == "actually use postgres instead"
diff --git a/tests/gateway/test_proxy_mode.py b/tests/gateway/test_proxy_mode.py
index f3024cb09f..e25f226ee9 100644
--- a/tests/gateway/test_proxy_mode.py
+++ b/tests/gateway/test_proxy_mode.py
@@ -8,6 +8,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
 import pytest
 
 from gateway.config import Platform, StreamingConfig
+from gateway.platforms.base import resolve_proxy_url
 from gateway.run import GatewayRunner
 from gateway.session import SessionSource
 
@@ -19,6 +20,7 @@ def _make_runner(proxy_url=None):
     runner.config = MagicMock()
     runner.config.streaming = StreamingConfig()
     runner._running_agents = {}
+    runner._session_run_generation = {}
     runner._session_model_overrides = {}
     runner._agent_cache = {}
     runner._agent_cache_lock = None
@@ -132,6 +134,15 @@ class TestGetProxyUrl:
             assert runner._get_proxy_url() is None
 
 
+class TestResolveProxyUrl:
+    def test_normalizes_socks_alias_from_all_proxy(self, monkeypatch):
+        for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
+                    "https_proxy", "http_proxy", "all_proxy"):
+            monkeypatch.delenv(key, raising=False)
+        monkeypatch.setenv("ALL_PROXY", "socks://127.0.0.1:1080/")
+        assert resolve_proxy_url() == "socks5://127.0.0.1:1080/"
+
+
 class TestRunAgentProxyDispatch:
     """Test that _run_agent() delegates to proxy when configured."""
 
@@ -160,10 +171,12 @@ class TestRunAgentProxyDispatch:
             source=source,
             session_id="test-session-123",
             session_key="test-key",
+            run_generation=7,
         )
 
         assert result["final_response"] == "Hello from remote!"
         runner._run_agent_via_proxy.assert_called_once()
+        assert runner._run_agent_via_proxy.call_args.kwargs["run_generation"] == 7
 
     @pytest.mark.asyncio
     async def test_run_agent_skips_proxy_when_not_configured(self, monkeypatch):
@@ -370,6 +383,40 @@ class TestRunAgentViaProxy:
         assert "session_id" in result
         assert result["session_id"] == "sess-123"
 
+    @pytest.mark.asyncio
+    async def test_proxy_stale_generation_returns_empty_result(self, monkeypatch):
+        monkeypatch.setenv("GATEWAY_PROXY_URL", "http://host:8642")
+        monkeypatch.delenv("GATEWAY_PROXY_KEY", raising=False)
+        runner = _make_runner()
+        source = _make_source()
+        runner._session_run_generation["test-key"] = 2
+
+        resp = _FakeSSEResponse(
+            status=200,
+            sse_chunks=[
+                'data: {"choices":[{"delta":{"content":"stale"}}]}\n\n',
+                "data: [DONE]\n\n",
+            ],
+        )
+        session = _FakeSession(resp)
+
+        with patch("gateway.run._load_gateway_config", return_value={}):
+            with _patch_aiohttp(session):
+                with patch("aiohttp.ClientTimeout"):
+                    result = await runner._run_agent_via_proxy(
+                        message="hi",
+                        context_prompt="",
+                        history=[],
+                        source=source,
+                        session_id="sess-123",
+                        session_key="test-key",
+                        run_generation=1,
+                    )
+
+        assert result["final_response"] == ""
+        assert result["messages"] == []
+        assert result["api_calls"] == 0
+
     @pytest.mark.asyncio
     async def test_no_auth_header_without_key(self, monkeypatch):
         monkeypatch.setenv("GATEWAY_PROXY_URL", "http://host:8642")
diff --git a/tests/gateway/test_reply_to_injection.py b/tests/gateway/test_reply_to_injection.py
new file mode 100644
index 0000000000..f75ec6d68f
--- /dev/null
+++ b/tests/gateway/test_reply_to_injection.py
@@ -0,0 +1,159 @@
+"""Tests for reply-to pointer injection in _prepare_inbound_message_text.
+
+The `[Replying to: "..."]` prefix is a *disambiguation pointer*, not
+deduplication. It must always be injected when the user explicitly replies
+to a prior message — even when the quoted text already exists somewhere
+in the conversation history. History can contain the same or similar text
+multiple times, and without an explicit pointer the agent has to guess
+which prior message the user is referencing.
+"""
+import pytest
+
+from gateway.config import GatewayConfig, Platform, PlatformConfig
+from gateway.platforms.base import MessageEvent
+from gateway.run import GatewayRunner
+from gateway.session import SessionSource
+
+
+def _make_runner() -> GatewayRunner:
+    runner = object.__new__(GatewayRunner)
+    runner.config = GatewayConfig(
+        platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="fake")},
+    )
+    runner.adapters = {}
+    runner._model = "openai/gpt-4.1-mini"
+    runner._base_url = None
+    return runner
+
+
+def _source() -> SessionSource:
+    return SessionSource(
+        platform=Platform.TELEGRAM,
+        chat_id="123",
+        chat_name="DM",
+        chat_type="private",
+        user_name="Alice",
+    )
+
+
+@pytest.mark.asyncio
+async def test_reply_prefix_injected_when_text_absent_from_history():
+    runner = _make_runner()
+    source = _source()
+    event = MessageEvent(
+        text="What's the best time to go?",
+        source=source,
+        reply_to_message_id="42",
+        reply_to_text="Japan is great for culture, food, and efficiency.",
+    )
+
+    result = await runner._prepare_inbound_message_text(
+        event=event,
+        source=source,
+        history=[{"role": "user", "content": "unrelated"}],
+    )
+
+    assert result is not None
+    assert result.startswith(
+        '[Replying to: "Japan is great for culture, food, and efficiency."]'
+    )
+    assert result.endswith("What's the best time to go?")
+
+
+@pytest.mark.asyncio
+async def test_reply_prefix_still_injected_when_text_in_history():
+    """Regression test: the pointer must survive even when the quoted text
+    already appears in history. Previously a `found_in_history` guard
+    silently dropped the prefix, leaving the agent to guess which prior
+    message the user was referencing."""
+    runner = _make_runner()
+    source = _source()
+    quoted = "Japan is great for culture, food, and efficiency."
+    event = MessageEvent(
+        text="What's the best time to go?",
+        source=source,
+        reply_to_message_id="42",
+        reply_to_text=quoted,
+    )
+
+    history = [
+        {"role": "user", "content": "I'm thinking of going to Japan or Italy."},
+        {
+            "role": "assistant",
+            "content": (
+                f"{quoted} Italy is better if you prefer a relaxed pace."
+            ),
+        },
+        {"role": "user", "content": "How long should I stay?"},
+        {"role": "assistant", "content": "For Japan, 10-14 days is ideal."},
+    ]
+
+    result = await runner._prepare_inbound_message_text(
+        event=event,
+        source=source,
+        history=history,
+    )
+
+    assert result is not None
+    assert result.startswith(f'[Replying to: "{quoted}"]')
+    assert result.endswith("What's the best time to go?")
+
+
+@pytest.mark.asyncio
+async def test_no_prefix_without_reply_context():
+    runner = _make_runner()
+    source = _source()
+    event = MessageEvent(text="hello", source=source)
+
+    result = await runner._prepare_inbound_message_text(
+        event=event,
+        source=source,
+        history=[],
+    )
+
+    assert result == "hello"
+
+
+@pytest.mark.asyncio
+async def test_no_prefix_when_reply_to_text_is_empty():
+    """reply_to_message_id alone without text (e.g. a reply to a media-only
+    message) should not produce an empty `[Replying to: ""]` prefix."""
+    runner = _make_runner()
+    source = _source()
+    event = MessageEvent(
+        text="hi",
+        source=source,
+        reply_to_message_id="42",
+        reply_to_text=None,
+    )
+
+    result = await runner._prepare_inbound_message_text(
+        event=event,
+        source=source,
+        history=[],
+    )
+
+    assert result == "hi"
+
+
+@pytest.mark.asyncio
+async def test_reply_snippet_truncated_to_500_chars():
+    runner = _make_runner()
+    source = _source()
+    long_text = "x" * 800
+    event = MessageEvent(
+        text="follow-up",
+        source=source,
+        reply_to_message_id="42",
+        reply_to_text=long_text,
+    )
+
+    result = await runner._prepare_inbound_message_text(
+        event=event,
+        source=source,
+        history=[],
+    )
+
+    assert result is not None
+    assert result.startswith('[Replying to: "' + "x" * 500 + '"]')
+    assert "x" * 501 not in result
diff --git a/tests/gateway/test_restart_drain.py b/tests/gateway/test_restart_drain.py
index 3607b1e391..d2977f757f 100644
--- a/tests/gateway/test_restart_drain.py
+++ b/tests/gateway/test_restart_drain.py
@@ -1,6 +1,7 @@
 import asyncio
 import shutil
 import subprocess
+from datetime import datetime
 from unittest.mock import AsyncMock, MagicMock
 
 import pytest
@@ -8,7 +9,7 @@ import pytest
 import gateway.run as gateway_run
 from gateway.platforms.base import MessageEvent, MessageType
 from gateway.restart import DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT
-from gateway.session import build_session_key
+from gateway.session import SessionEntry, build_session_key
 from tests.gateway.restart_test_helpers import make_restart_runner, make_restart_source
 
 
@@ -242,3 +243,31 @@ async def test_shutdown_notification_send_failure_does_not_block():
 
     # Should not raise
     await runner._notify_active_sessions_of_shutdown()
+
+
+@pytest.mark.asyncio
+async def test_shutdown_notification_uses_persisted_origin_for_colon_ids():
+    """Shutdown notifications should route from persisted origin, not reparsed keys."""
+    runner, adapter = make_restart_runner()
+    adapter.send = AsyncMock()
+    source = make_restart_source(chat_id="!room123:example.org", chat_type="group")
+    source.platform = gateway_run.Platform.MATRIX
+    session_key = build_session_key(source)
+    runner._running_agents[session_key] = MagicMock()
+    runner.session_store._entries = {
+        session_key: SessionEntry(
+            session_key=session_key,
+            session_id="sess-1",
+            created_at=datetime.now(),
+            updated_at=datetime.now(),
+            origin=source,
+            platform=source.platform,
+            chat_type=source.chat_type,
+        )
+    }
+    runner.adapters = {gateway_run.Platform.MATRIX: adapter}
+
+    await runner._notify_active_sessions_of_shutdown()
+
+    assert adapter.send.await_count == 1
+    assert adapter.send.await_args.args[0] == "!room123:example.org"
diff --git a/tests/gateway/test_run_progress_topics.py b/tests/gateway/test_run_progress_topics.py
index 4878f2faec..59e9fa0408 100644
--- a/tests/gateway/test_run_progress_topics.py
+++ b/tests/gateway/test_run_progress_topics.py
@@ -51,6 +51,9 @@ class ProgressCaptureAdapter(BasePlatformAdapter):
     async def send_typing(self, chat_id, metadata=None) -> None:
         self.typing.append({"chat_id": chat_id, "metadata": metadata})
 
+    async def stop_typing(self, chat_id) -> None:
+        self.typing.append({"chat_id": chat_id, "metadata": {"stopped": True}})
+
     async def get_chat_info(self, chat_id: str):
         return {"id": chat_id}
 
@@ -90,6 +93,40 @@ class LongPreviewAgent:
         }
 
 
+class DelayedProgressAgent:
+    def __init__(self, **kwargs):
+        self.tool_progress_callback = kwargs.get("tool_progress_callback")
+        self.tools = []
+
+    def run_conversation(self, message, conversation_history=None, task_id=None):
+        self.tool_progress_callback("tool.started", "terminal", "first command", {})
+        time.sleep(0.45)
+        self.tool_progress_callback("tool.started", "terminal", "second command", {})
+        time.sleep(0.1)
+        return {
+            "final_response": "done",
+            "messages": [],
+            "api_calls": 1,
+        }
+
+
+class DelayedInterimAgent:
+    def __init__(self, **kwargs):
+        self.interim_assistant_callback = kwargs.get("interim_assistant_callback")
+        self.tools = []
+
+    def run_conversation(self, message, conversation_history=None, task_id=None):
+        self.interim_assistant_callback("first interim")
+        time.sleep(0.45)
+        self.interim_assistant_callback("second interim")
+        time.sleep(0.1)
+        return {
+            "final_response": "done",
+            "messages": [],
+            "api_calls": 1,
+        }
+
+
 def _make_runner(adapter):
     gateway_run = importlib.import_module("gateway.run")
     GatewayRunner = gateway_run.GatewayRunner
@@ -104,6 +141,7 @@ def _make_runner(adapter):
     runner._fallback_model = None
     runner._session_db = None
     runner._running_agents = {}
+    runner._session_run_generation = {}
     runner.hooks = SimpleNamespace(loaded_hooks=False)
     runner.config = SimpleNamespace(
         thread_sessions_per_user=False,
@@ -744,6 +782,154 @@ async def test_base_processing_releases_post_delivery_callback_after_main_send()
     assert released == [True]
 
 
+@pytest.mark.asyncio
+async def test_run_agent_drops_tool_progress_after_generation_invalidation(monkeypatch, tmp_path):
+    import yaml
+
+    (tmp_path / "config.yaml").write_text(
+        yaml.dump({"display": {"tool_progress": "all"}}),
+        encoding="utf-8",
+    )
+
+    fake_dotenv = types.ModuleType("dotenv")
+    fake_dotenv.load_dotenv = lambda *args, **kwargs: None
+    monkeypatch.setitem(sys.modules, "dotenv", fake_dotenv)
+
+    fake_run_agent = types.ModuleType("run_agent")
+    fake_run_agent.AIAgent = DelayedProgressAgent
+    monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
+    import tools.terminal_tool  # noqa: F401 - register terminal tool metadata
+
+    adapter = ProgressCaptureAdapter(platform=Platform.DISCORD)
+    runner = _make_runner(adapter)
+    gateway_run = importlib.import_module("gateway.run")
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+    monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"})
+
+    source = SessionSource(
+        platform=Platform.DISCORD,
+        chat_id="dm-1",
+        chat_type="dm",
+        thread_id=None,
+    )
+    session_key = "agent:main:discord:dm:dm-1"
+    runner._session_run_generation[session_key] = 1
+
+    original_send = adapter.send
+    invalidated = {"done": False}
+
+    async def send_and_invalidate(chat_id, content, reply_to=None, metadata=None):
+        result = await original_send(chat_id, content, reply_to=reply_to, metadata=metadata)
+        if "first command" in content and not invalidated["done"]:
+            invalidated["done"] = True
+            runner._invalidate_session_run_generation(session_key, reason="test_stop")
+        return result
+
+    adapter.send = send_and_invalidate
+
+    result = await runner._run_agent(
+        message="hello",
+        context_prompt="",
+        history=[],
+        source=source,
+        session_id="sess-progress-stop",
+        session_key=session_key,
+        run_generation=1,
+    )
+
+    all_progress_text = " ".join(call["content"] for call in adapter.sent)
+    all_progress_text += " ".join(call["content"] for call in adapter.edits)
+    assert result["final_response"] == "done"
+    assert 'first command' in all_progress_text
+    assert 'second command' not in all_progress_text
+
+
+@pytest.mark.asyncio
+async def test_run_agent_drops_interim_commentary_after_generation_invalidation(monkeypatch, tmp_path):
+    import yaml
+
+    (tmp_path / "config.yaml").write_text(
+        yaml.dump({"display": {"tool_progress": "off", "interim_assistant_messages": True}}),
+        encoding="utf-8",
+    )
+
+    fake_dotenv = types.ModuleType("dotenv")
+    fake_dotenv.load_dotenv = lambda *args, **kwargs: None
+    monkeypatch.setitem(sys.modules, "dotenv", fake_dotenv)
+
+    fake_run_agent = types.ModuleType("run_agent")
+    fake_run_agent.AIAgent = DelayedInterimAgent
+    monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
+
+    adapter = ProgressCaptureAdapter(platform=Platform.DISCORD)
+    runner = _make_runner(adapter)
+    gateway_run = importlib.import_module("gateway.run")
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+    monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"})
+
+    source = SessionSource(
+        platform=Platform.DISCORD,
+        chat_id="dm-2",
+        chat_type="dm",
+        thread_id=None,
+    )
+    session_key = "agent:main:discord:dm:dm-2"
+    runner._session_run_generation[session_key] = 1
+
+    original_send = adapter.send
+    invalidated = {"done": False}
+
+    async def send_and_invalidate(chat_id, content, reply_to=None, metadata=None):
+        result = await original_send(chat_id, content, reply_to=reply_to, metadata=metadata)
+        if content == "first interim" and not invalidated["done"]:
+            invalidated["done"] = True
+            runner._invalidate_session_run_generation(session_key, reason="test_stop")
+        return result
+
+    adapter.send = send_and_invalidate
+
+    result = await runner._run_agent(
+        message="hello",
+        context_prompt="",
+        history=[],
+        source=source,
+        session_id="sess-commentary-stop",
+        session_key=session_key,
+        run_generation=1,
+    )
+
+    sent_texts = [call["content"] for call in adapter.sent]
+    assert result["final_response"] == "done"
+    assert "first interim" in sent_texts
+    assert "second interim" not in sent_texts
+
+
+@pytest.mark.asyncio
+async def test_keep_typing_stops_immediately_when_interrupt_event_is_set():
+    adapter = ProgressCaptureAdapter(platform=Platform.DISCORD)
+    stop_event = asyncio.Event()
+
+    task = asyncio.create_task(
+        adapter._keep_typing(
+            "dm-typing-stop",
+            interval=30.0,
+            stop_event=stop_event,
+        )
+    )
+    await asyncio.sleep(0.05)
+    stop_event.set()
+    await asyncio.wait_for(task, timeout=0.5)
+
+    normal_typing_calls = [
+        call for call in adapter.typing if call.get("metadata") != {"stopped": True}
+    ]
+    stopped_calls = [
+        call for call in adapter.typing if call.get("metadata") == {"stopped": True}
+    ]
+    assert len(normal_typing_calls) == 1
+    assert len(stopped_calls) == 1
+
+
 @pytest.mark.asyncio
 async def test_verbose_mode_does_not_truncate_args_by_default(monkeypatch, tmp_path):
     """Verbose mode with default tool_preview_length (0) should NOT truncate args.
diff --git a/tests/gateway/test_runner_startup_failures.py b/tests/gateway/test_runner_startup_failures.py
index 977d66fb3b..83ffc0d4d0 100644
--- a/tests/gateway/test_runner_startup_failures.py
+++ b/tests/gateway/test_runner_startup_failures.py
@@ -184,8 +184,15 @@ async def test_start_gateway_replace_force_uses_terminate_pid(monkeypatch, tmp_p
         async def stop(self):
             return None
 
-    monkeypatch.setattr("gateway.status.get_running_pid", lambda: 42)
-    monkeypatch.setattr("gateway.status.remove_pid_file", lambda: None)
+    # get_running_pid returns 42 before we kill the old gateway, then None
+    # after remove_pid_file() clears the record (reflects real behavior).
+    _pid_state = {"alive": True}
+    def _mock_get_running_pid():
+        return 42 if _pid_state["alive"] else None
+    def _mock_remove_pid_file():
+        _pid_state["alive"] = False
+    monkeypatch.setattr("gateway.status.get_running_pid", _mock_get_running_pid)
+    monkeypatch.setattr("gateway.status.remove_pid_file", _mock_remove_pid_file)
     monkeypatch.setattr("gateway.status.release_all_scoped_locks", lambda: 0)
     monkeypatch.setattr("gateway.status.terminate_pid", lambda pid, force=False: calls.append((pid, force)))
     monkeypatch.setattr("gateway.run.os.getpid", lambda: 100)
@@ -253,8 +260,13 @@ async def test_start_gateway_replace_writes_takeover_marker_before_sigterm(
         async def stop(self):
             return None
 
-    monkeypatch.setattr("gateway.status.get_running_pid", lambda: 42)
-    monkeypatch.setattr("gateway.status.remove_pid_file", lambda: None)
+    _pid_state = {"alive": True}
+    def _mock_get_running_pid():
+        return 42 if _pid_state["alive"] else None
+    def _mock_remove_pid_file():
+        _pid_state["alive"] = False
+    monkeypatch.setattr("gateway.status.get_running_pid", _mock_get_running_pid)
+    monkeypatch.setattr("gateway.status.remove_pid_file", _mock_remove_pid_file)
     monkeypatch.setattr("gateway.status.release_all_scoped_locks", lambda: 0)
     monkeypatch.setattr("gateway.status.write_takeover_marker", record_write_marker)
     monkeypatch.setattr("gateway.status.terminate_pid", record_terminate)
@@ -319,3 +331,23 @@ async def test_start_gateway_replace_clears_marker_on_permission_denied(
     assert ok is False
     # Marker must NOT be left behind
     assert not (tmp_path / ".gateway-takeover.json").exists()
+
+
+def test_runner_warns_when_docker_gateway_lacks_explicit_output_mount(monkeypatch, tmp_path, caplog):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    monkeypatch.setenv("TERMINAL_ENV", "docker")
+    monkeypatch.setenv("TERMINAL_DOCKER_VOLUMES", '["/etc/localtime:/etc/localtime:ro"]')
+    config = GatewayConfig(
+        platforms={
+            Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")
+        },
+        sessions_dir=tmp_path / "sessions",
+    )
+
+    with caplog.at_level("WARNING"):
+        GatewayRunner(config)
+
+    assert any(
+        "host-visible output mount" in record.message
+        for record in caplog.records
+    )
diff --git a/tests/gateway/test_running_agent_session_toggles.py b/tests/gateway/test_running_agent_session_toggles.py
new file mode 100644
index 0000000000..fbe0d5163c
--- /dev/null
+++ b/tests/gateway/test_running_agent_session_toggles.py
@@ -0,0 +1,167 @@
+"""Regression tests: /yolo and /verbose dispatch mid-agent-run.
+
+When an agent is running, the gateway's running-agent guard rejects most
+slash commands with "⏳ Agent is running — /{cmd} can't run mid-turn"
+(PR #12334). A small allowlist bypasses that and actually dispatches:
+
+  * /yolo — toggles the session yolo flag; useful to pre-approve a
+    pending approval prompt without waiting for the agent to finish.
+  * /verbose — cycles the per-platform tool-progress display mode;
+    affects the ongoing stream.
+
+Commands whose handlers say "takes effect on next message" stay on the
+catch-all by design:
+
+  * /fast — writes config.yaml only
+  * /reasoning — writes config.yaml only
+
+These tests lock in both behaviors so the allowlist doesn't silently
+grow or shrink.
+"""
+
+from datetime import datetime
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from gateway.config import GatewayConfig, Platform, PlatformConfig
+from gateway.platforms.base import MessageEvent
+from gateway.session import SessionEntry, SessionSource, build_session_key
+
+
+def _make_source() -> SessionSource:
+    return SessionSource(
+        platform=Platform.TELEGRAM,
+        user_id="u1",
+        chat_id="c1",
+        user_name="tester",
+        chat_type="dm",
+    )
+
+
+def _make_event(text: str) -> MessageEvent:
+    return MessageEvent(text=text, source=_make_source(), message_id="m1")
+
+
+def _make_runner():
+    """Minimal GatewayRunner with an active running agent for this session."""
+    from gateway.run import GatewayRunner
+
+    runner = object.__new__(GatewayRunner)
+    runner.config = GatewayConfig(
+        platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")}
+    )
+    adapter = MagicMock()
+    adapter.send = AsyncMock()
+    runner.adapters = {Platform.TELEGRAM: adapter}
+    runner._voice_mode = {}
+    runner.hooks = SimpleNamespace(emit=AsyncMock(), loaded_hooks=False)
+
+    session_entry = SessionEntry(
+        session_key=build_session_key(_make_source()),
+        session_id="sess-1",
+        created_at=datetime.now(),
+        updated_at=datetime.now(),
+        platform=Platform.TELEGRAM,
+        chat_type="dm",
+    )
+    runner.session_store = MagicMock()
+    runner.session_store.get_or_create_session.return_value = session_entry
+    runner.session_store.load_transcript.return_value = []
+    runner.session_store.has_any_sessions.return_value = True
+    runner.session_store.append_to_transcript = MagicMock()
+    runner.session_store.rewrite_transcript = MagicMock()
+    runner.session_store.update_session = MagicMock()
+    runner._running_agents = {}
+    runner._running_agents_ts = {}
+    runner._pending_messages = {}
+    runner._pending_approvals = {}
+    runner._session_db = None
+    runner._reasoning_config = None
+    runner._provider_routing = {}
+    runner._fallback_model = None
+    runner._show_reasoning = False
+    runner._service_tier = None
+    runner._is_user_authorized = lambda _source: True
+    runner._set_session_env = lambda _context: None
+    runner._should_send_voice_reply = lambda *_args, **_kwargs: False
+    runner._send_voice_reply = AsyncMock()
+    runner._capture_gateway_honcho_if_configured = lambda *args, **kwargs: None
+    runner._emit_gateway_run_progress = AsyncMock()
+
+    # Simulate agent actively running for this session so the guard fires.
+    # Note: the stale-eviction branch calls agent.get_activity_summary() and
+    # compares seconds_since_activity against HERMES_AGENT_TIMEOUT. Return a
+    # dict with recent activity so the eviction path doesn't clear our
+    # fake running agent before the toggle guard runs.
+    import time
+    sk = build_session_key(_make_source())
+    agent_mock = MagicMock()
+    agent_mock.get_activity_summary.return_value = {
+        "seconds_since_activity": 0.0,
+        "last_activity_desc": "api_call",
+        "api_call_count": 1,
+        "max_iterations": 60,
+    }
+    runner._running_agents[sk] = agent_mock
+    runner._running_agents_ts[sk] = time.time()
+    return runner
+
+
+@pytest.mark.asyncio
+async def test_yolo_dispatches_mid_run(monkeypatch):
+    """/yolo mid-run must dispatch to its handler, not hit the catch-all."""
+    runner = _make_runner()
+    runner._handle_yolo_command = AsyncMock(return_value="⚡ YOLO mode **ON** for this session")
+
+    result = await runner._handle_message(_make_event("/yolo"))
+
+    runner._handle_yolo_command.assert_awaited_once()
+    assert result == "⚡ YOLO mode **ON** for this session"
+    assert "can't run mid-turn" not in (result or "")
+
+
+@pytest.mark.asyncio
+async def test_verbose_dispatches_mid_run(monkeypatch):
+    """/verbose mid-run must dispatch to its handler, not hit the catch-all."""
+    runner = _make_runner()
+    runner._handle_verbose_command = AsyncMock(return_value="tool progress: new")
+
+    result = await runner._handle_message(_make_event("/verbose"))
+
+    runner._handle_verbose_command.assert_awaited_once()
+    assert result == "tool progress: new"
+    assert "can't run mid-turn" not in (result or "")
+
+
+@pytest.mark.asyncio
+async def test_fast_rejected_mid_run():
+    """/fast mid-run must hit the busy catch-all — config-only, next message."""
+    runner = _make_runner()
+    runner._handle_fast_command = AsyncMock(
+        side_effect=AssertionError("/fast should not dispatch mid-run")
+    )
+
+    result = await runner._handle_message(_make_event("/fast"))
+
+    runner._handle_fast_command.assert_not_awaited()
+    assert result is not None
+    assert "can't run mid-turn" in result
+    assert "/fast" in result
+
+
+@pytest.mark.asyncio
+async def test_reasoning_rejected_mid_run():
+    """/reasoning mid-run must hit the busy catch-all — config-only, next message."""
+    runner = _make_runner()
+    runner._handle_reasoning_command = AsyncMock(
+        side_effect=AssertionError("/reasoning should not dispatch mid-run")
+    )
+
+    result = await runner._handle_message(_make_event("/reasoning high"))
+
+    runner._handle_reasoning_command.assert_not_awaited()
+    assert result is not None
+    assert "can't run mid-turn" in result
+    assert "/reasoning" in result
diff --git a/tests/gateway/test_session.py b/tests/gateway/test_session.py
index 39e4aad3d6..539b12a5e1 100644
--- a/tests/gateway/test_session.py
+++ b/tests/gateway/test_session.py
@@ -356,6 +356,28 @@ class TestBuildSessionContextPrompt:
         assert "**User:** Alice" in prompt
         assert "Multi-user thread" not in prompt
 
+    def test_shared_non_thread_group_prompt_hides_single_user(self):
+        """Shared non-thread group sessions should avoid pinning one user."""
+        config = GatewayConfig(
+            platforms={
+                Platform.TELEGRAM: PlatformConfig(enabled=True, token="fake"),
+            },
+            group_sessions_per_user=False,
+        )
+        source = SessionSource(
+            platform=Platform.TELEGRAM,
+            chat_id="-1002285219667",
+            chat_name="Test Group",
+            chat_type="group",
+            user_name="Alice",
+        )
+        ctx = build_session_context(source, config)
+        prompt = build_session_context_prompt(ctx)
+
+        assert "Multi-user session" in prompt
+        assert "[sender name]" in prompt
+        assert "**User:** Alice" not in prompt
+
     def test_dm_thread_shows_user_not_multi(self):
         """DM threads are single-user and should show User, not multi-user note."""
         config = GatewayConfig(
@@ -1037,6 +1059,7 @@ class TestRewriteTranscriptPreservesReasoning:
             role="assistant",
             content="The answer is 42.",
             reasoning="I need to think step by step.",
+            reasoning_content="provider scratchpad",
             reasoning_details=[{"type": "summary", "text": "step by step"}],
             codex_reasoning_items=[{"id": "r1", "type": "reasoning"}],
         )
@@ -1044,6 +1067,7 @@ class TestRewriteTranscriptPreservesReasoning:
         # Verify all three were stored
         before = db.get_messages_as_conversation(session_id)
         assert before[0].get("reasoning") == "I need to think step by step."
+        assert before[0].get("reasoning_content") == "provider scratchpad"
         assert before[0].get("reasoning_details") == [{"type": "summary", "text": "step by step"}]
         assert before[0].get("codex_reasoning_items") == [{"id": "r1", "type": "reasoning"}]
 
@@ -1060,5 +1084,6 @@ class TestRewriteTranscriptPreservesReasoning:
         # Load again — all three reasoning fields must survive
         after = db.get_messages_as_conversation(session_id)
         assert after[0].get("reasoning") == "I need to think step by step."
+        assert after[0].get("reasoning_content") == "provider scratchpad"
         assert after[0].get("reasoning_details") == [{"type": "summary", "text": "step by step"}]
         assert after[0].get("codex_reasoning_items") == [{"id": "r1", "type": "reasoning"}]
diff --git a/tests/gateway/test_session_list_allowed_sources.py b/tests/gateway/test_session_list_allowed_sources.py
new file mode 100644
index 0000000000..bd6791ff40
--- /dev/null
+++ b/tests/gateway/test_session_list_allowed_sources.py
@@ -0,0 +1,76 @@
+"""Regression tests for the TUI gateway's ``session.list`` handler.
+
+Reported during TUI v2 blitz retest: the ``/resume`` modal inside a TUI
+session only surfaced ``tui``/``cli`` rows, hiding telegram sessions users
+could still resume directly via ``hermes --tui --resume <id>``.
+
+The fix widens the picker to a curated allowlist of user-facing sources
+(tui/cli + chat adapters) while still filtering internal/system sources.
+"""
+
+from __future__ import annotations
+
+from tui_gateway import server
+
+
+class _StubDB:
+    def __init__(self, rows):
+        self.rows = rows
+        self.calls: list[dict] = []
+
+    def list_sessions_rich(self, **kwargs):
+        self.calls.append(kwargs)
+        return list(self.rows)
+
+
+def _call(limit: int = 20):
+    return server.handle_request({
+        "id": "1",
+        "method": "session.list",
+        "params": {"limit": limit},
+    })
+
+
+def test_session_list_includes_telegram_but_filters_internal_sources(monkeypatch):
+    rows = [
+        {"id": "tui-1", "source": "tui", "started_at": 9},
+        {"id": "tool-1", "source": "tool", "started_at": 8},
+        {"id": "tg-1", "source": "telegram", "started_at": 7},
+        {"id": "acp-1", "source": "acp", "started_at": 6},
+        {"id": "cli-1", "source": "cli", "started_at": 5},
+    ]
+    db = _StubDB(rows)
+    monkeypatch.setattr(server, "_get_db", lambda: db)
+
+    resp = _call(limit=10)
+    sessions = resp["result"]["sessions"]
+    ids = [s["id"] for s in sessions]
+
+    assert "tg-1" in ids and "tui-1" in ids and "cli-1" in ids, ids
+    assert "tool-1" not in ids and "acp-1" not in ids, ids
+
+
+def test_session_list_fetches_wider_window_before_filtering(monkeypatch):
+    db = _StubDB([{"id": "x", "source": "cli", "started_at": 1}])
+    monkeypatch.setattr(server, "_get_db", lambda: db)
+
+    _call(limit=10)
+
+    assert len(db.calls) == 1
+    assert db.calls[0].get("source") is None, db.calls[0]
+    assert db.calls[0].get("limit") == 100, db.calls[0]
+
+
+def test_session_list_preserves_ordering_after_filter(monkeypatch):
+    rows = [
+        {"id": "newest", "source": "telegram", "started_at": 5},
+        {"id": "internal", "source": "tool", "started_at": 4},
+        {"id": "middle", "source": "tui", "started_at": 3},
+        {"id": "oldest", "source": "discord", "started_at": 1},
+    ]
+    monkeypatch.setattr(server, "_get_db", lambda: _StubDB(rows))
+
+    resp = _call()
+    ids = [s["id"] for s in resp["result"]["sessions"]]
+
+    assert ids == ["newest", "middle", "oldest"]
diff --git a/tests/gateway/test_session_race_guard.py b/tests/gateway/test_session_race_guard.py
index 8c26abec59..fe1ef011a3 100644
--- a/tests/gateway/test_session_race_guard.py
+++ b/tests/gateway/test_session_race_guard.py
@@ -24,10 +24,18 @@ class _FakeAdapter:
 
     def __init__(self):
         self._pending_messages = {}
+        self._active_sessions = {}
+        self.interrupted_sessions = []
 
     async def send(self, chat_id, text, **kwargs):
         pass
 
+    async def interrupt_session_activity(self, session_key, chat_id):
+        self.interrupted_sessions.append((session_key, chat_id))
+        event = self._active_sessions.get(session_key)
+        if event is not None:
+            event.set()
+
 
 def _make_runner():
     runner = object.__new__(GatewayRunner)
@@ -37,6 +45,7 @@ def _make_runner():
     runner.adapters = {Platform.TELEGRAM: _FakeAdapter()}
     runner._running_agents = {}
     runner._running_agents_ts = {}
+    runner._session_run_generation = {}
     runner._pending_messages = {}
     runner._pending_approvals = {}
     runner._voice_mode = {}
@@ -81,7 +90,7 @@ async def test_sentinel_placed_before_agent_setup():
     # Patch _handle_message_with_agent to capture state at entry
     sentinel_was_set = False
 
-    async def mock_inner(self_inner, ev, src, qk):
+    async def mock_inner(self_inner, ev, src, qk, generation):
         nonlocal sentinel_was_set
         sentinel_was_set = runner._running_agents.get(qk) is _AGENT_PENDING_SENTINEL
         return "ok"
@@ -105,7 +114,7 @@ async def test_sentinel_cleaned_up_after_handler_returns():
     event = _make_event()
     session_key = build_session_key(event.source)
 
-    async def mock_inner(self_inner, ev, src, qk):
+    async def mock_inner(self_inner, ev, src, qk, generation):
         return "ok"
 
     with patch.object(GatewayRunner, "_handle_message_with_agent", mock_inner):
@@ -127,7 +136,7 @@ async def test_sentinel_cleaned_up_on_exception():
     event = _make_event()
     session_key = build_session_key(event.source)
 
-    async def mock_inner(self_inner, ev, src, qk):
+    async def mock_inner(self_inner, ev, src, qk, generation):
         raise RuntimeError("boom")
 
     with patch.object(GatewayRunner, "_handle_message_with_agent", mock_inner):
@@ -154,7 +163,7 @@ async def test_second_message_during_sentinel_queued_not_duplicate():
 
     barrier = asyncio.Event()
 
-    async def slow_inner(self_inner, ev, src, qk):
+    async def slow_inner(self_inner, ev, src, qk, generation):
         # Simulate slow setup — wait until test tells us to proceed
         await barrier.wait()
         return "ok"
@@ -333,7 +342,7 @@ async def test_stop_during_sentinel_force_cleans_session():
 
     barrier = asyncio.Event()
 
-    async def slow_inner(self_inner, ev, src, qk):
+    async def slow_inner(self_inner, ev, src, qk, generation):
         await barrier.wait()
         return "ok"
 
@@ -381,6 +390,7 @@ async def test_stop_hard_kills_running_agent():
     fake_agent = MagicMock()
     fake_agent.get_activity_summary.return_value = {"seconds_since_activity": 0}
     runner._running_agents[session_key] = fake_agent
+    runner.adapters[Platform.TELEGRAM]._active_sessions[session_key] = asyncio.Event()
 
     # Send /stop
     stop_event = _make_event(text="/stop")
@@ -393,6 +403,10 @@ async def test_stop_hard_kills_running_agent():
     assert session_key not in runner._running_agents, (
         "/stop must remove the agent from _running_agents so the session is unlocked"
     )
+    assert runner.adapters[Platform.TELEGRAM].interrupted_sessions == [
+        (session_key, "12345")
+    ]
+    assert runner.adapters[Platform.TELEGRAM]._active_sessions[session_key].is_set()
 
     # Must return a confirmation
     assert result is not None
diff --git a/tests/gateway/test_session_store_prune.py b/tests/gateway/test_session_store_prune.py
index 9b1dca2971..34fa21e25a 100644
--- a/tests/gateway/test_session_store_prune.py
+++ b/tests/gateway/test_session_store_prune.py
@@ -117,11 +117,20 @@ class TestPruneBasics:
         assert "idle" not in store._entries
 
     def test_prune_skips_entries_with_active_processes(self, tmp_path):
-        """Sessions with active bg processes aren't pruned even if old."""
-        active_session_ids = {"sid_active"}
+        """Sessions with active bg processes aren't pruned even if old.
 
-        def _has_active(session_id: str) -> bool:
-            return session_id in active_session_ids
+        The callback is keyed by session_key — matching what
+        process_registry.has_active_for_session() actually consumes in
+        gateway/run.py.  Prior to the fix this test passed the callback a
+        session_id, which silently matched an implementation bug where
+        prune_old_entries was also passing session_id; real-world usage
+        (via process_registry) takes a session_key and never matched, so
+        active sessions were still being pruned.
+        """
+        active_session_keys = {"active"}
+
+        def _has_active(session_key: str) -> bool:
+            return session_key in active_session_keys
 
         store = _make_store(tmp_path, has_active_processes_fn=_has_active)
         store._entries["active"] = _entry(
@@ -137,6 +146,26 @@ class TestPruneBasics:
         assert "active" in store._entries
         assert "idle" not in store._entries
 
+    def test_prune_active_check_uses_session_key_not_session_id(self, tmp_path):
+        """Regression guard: a callback that only recognises session_ids must
+        NOT protect entries during prune.  This pins the fix so a future
+        refactor can't silently revert to passing session_id again.
+        """
+        def _recognises_only_ids(identifier: str) -> bool:
+            return identifier.startswith("sid_")
+
+        store = _make_store(tmp_path, has_active_processes_fn=_recognises_only_ids)
+        store._entries["active"] = _entry(
+            "active", age_days=1000, session_id="sid_active"
+        )
+
+        removed = store.prune_old_entries(max_age_days=90)
+
+        # Entry is pruned because the callback receives "active" (session_key),
+        # not "sid_active" (session_id), so _recognises_only_ids returns False.
+        assert removed == 1
+        assert "active" not in store._entries
+
     def test_prune_does_not_write_disk_when_no_removals(self, tmp_path):
         """If nothing is evictable, _save() should NOT be called."""
         store = _make_store(tmp_path)
diff --git a/tests/gateway/test_shared_group_sender_prefix.py b/tests/gateway/test_shared_group_sender_prefix.py
new file mode 100644
index 0000000000..9f0e525f64
--- /dev/null
+++ b/tests/gateway/test_shared_group_sender_prefix.py
@@ -0,0 +1,70 @@
+import pytest
+
+from gateway.config import GatewayConfig, Platform, PlatformConfig
+from gateway.platforms.base import MessageEvent
+from gateway.run import GatewayRunner
+from gateway.session import SessionSource
+
+
+def _make_runner(config: GatewayConfig) -> GatewayRunner:
+    runner = object.__new__(GatewayRunner)
+    runner.config = config
+    runner.adapters = {}
+    runner._model = "openai/gpt-4.1-mini"
+    runner._base_url = None
+    return runner
+
+
+@pytest.mark.asyncio
+async def test_preprocess_prefixes_sender_for_shared_non_thread_group_session():
+    runner = _make_runner(
+        GatewayConfig(
+            platforms={
+                Platform.TELEGRAM: PlatformConfig(enabled=True, token="fake"),
+            },
+            group_sessions_per_user=False,
+        )
+    )
+    source = SessionSource(
+        platform=Platform.TELEGRAM,
+        chat_id="-1002285219667",
+        chat_name="Test Group",
+        chat_type="group",
+        user_name="Alice",
+    )
+    event = MessageEvent(text="hello", source=source)
+
+    result = await runner._prepare_inbound_message_text(
+        event=event,
+        source=source,
+        history=[],
+    )
+
+    assert result == "[Alice] hello"
+
+
+@pytest.mark.asyncio
+async def test_preprocess_keeps_plain_text_for_default_group_sessions():
+    runner = _make_runner(
+        GatewayConfig(
+            platforms={
+                Platform.TELEGRAM: PlatformConfig(enabled=True, token="fake"),
+            },
+        )
+    )
+    source = SessionSource(
+        platform=Platform.TELEGRAM,
+        chat_id="-1002285219667",
+        chat_name="Test Group",
+        chat_type="group",
+        user_name="Alice",
+    )
+    event = MessageEvent(text="hello", source=source)
+
+    result = await runner._prepare_inbound_message_text(
+        event=event,
+        source=source,
+        history=[],
+    )
+
+    assert result == "hello"
diff --git a/tests/gateway/test_signal.py b/tests/gateway/test_signal.py
index eee3a0db8a..b51ec713f2 100644
--- a/tests/gateway/test_signal.py
+++ b/tests/gateway/test_signal.py
@@ -91,6 +91,29 @@ class TestSignalAdapterInit:
         assert adapter._account_normalized == "+15551234567"
 
 
+class TestSignalConnectCleanup:
+    """Regression coverage for failed connect() cleanup."""
+
+    @pytest.mark.asyncio
+    async def test_releases_lock_and_closes_client_on_healthcheck_failure(self, monkeypatch):
+        adapter = _make_signal_adapter(monkeypatch)
+
+        mock_client = AsyncMock()
+        mock_client.get = AsyncMock(return_value=MagicMock(status_code=503))
+        mock_client.aclose = AsyncMock()
+
+        with patch("gateway.platforms.signal.httpx.AsyncClient", return_value=mock_client), \
+             patch("gateway.status.acquire_scoped_lock", return_value=(True, None)), \
+             patch("gateway.status.release_scoped_lock") as mock_release:
+            result = await adapter.connect()
+
+        assert result is False
+        mock_client.aclose.assert_awaited_once()
+        mock_release.assert_called_once_with("signal-phone", "+15551234567")
+        assert adapter.client is None
+        assert adapter._platform_lock_identity is None
+
+
 class TestSignalHelpers:
     def test_redact_phone_long(self):
         from gateway.platforms.helpers import redact_phone
@@ -283,7 +306,13 @@ class TestSignalSessionSource:
 class TestSignalPhoneRedaction:
     @pytest.fixture(autouse=True)
     def _ensure_redaction_enabled(self, monkeypatch):
+        # agent.redact snapshots _REDACT_ENABLED at import time from the
+        # HERMES_REDACT_SECRETS env var. monkeypatch.delenv is too late —
+        # the module was already imported during test collection with
+        # whatever value was in the env then. Force the flag directly.
+        # See skill: xdist-cross-test-pollution Pattern 5.
         monkeypatch.delenv("HERMES_REDACT_SECRETS", raising=False)
+        monkeypatch.setattr("agent.redact._REDACT_ENABLED", True)
 
     def test_us_number(self):
         from agent.redact import redact_sensitive_text
@@ -438,6 +467,97 @@ class TestSignalSendImageFile:
         assert "failed" in result.error.lower()
 
 
+class TestSignalRecipientResolution:
+    @pytest.mark.asyncio
+    async def test_send_prefers_cached_uuid_for_direct_messages(self, monkeypatch):
+        adapter = _make_signal_adapter(monkeypatch)
+        adapter._stop_typing_indicator = AsyncMock()
+        adapter._remember_recipient_identifiers("+15551230000", "68680952-6d86-45bc-85e0-1a4d186d53ee")
+
+        captured = []
+
+        async def mock_rpc(method, params, rpc_id=None, **kwargs):
+            captured.append({"method": method, "params": dict(params)})
+            return {"timestamp": 1234567890}
+
+        adapter._rpc = mock_rpc
+
+        result = await adapter.send(chat_id="+15551230000", content="hello")
+
+        assert result.success is True
+        assert captured[0]["method"] == "send"
+        assert captured[0]["params"]["recipient"] == ["68680952-6d86-45bc-85e0-1a4d186d53ee"]
+
+    @pytest.mark.asyncio
+    async def test_send_looks_up_uuid_via_list_contacts(self, monkeypatch):
+        adapter = _make_signal_adapter(monkeypatch)
+        adapter._stop_typing_indicator = AsyncMock()
+
+        captured = []
+
+        async def mock_rpc(method, params, rpc_id=None, **kwargs):
+            captured.append({"method": method, "params": dict(params)})
+            if method == "listContacts":
+                return [{
+                    "recipient": "351935789098",
+                    "number": "+15551230000",
+                    "uuid": "68680952-6d86-45bc-85e0-1a4d186d53ee",
+                    "isRegistered": True,
+                }]
+            if method == "send":
+                return {"timestamp": 1234567890}
+            return None
+
+        adapter._rpc = mock_rpc
+
+        result = await adapter.send(chat_id="+15551230000", content="hello")
+
+        assert result.success is True
+        assert captured[0]["method"] == "listContacts"
+        assert captured[1]["method"] == "send"
+        assert captured[1]["params"]["recipient"] == ["68680952-6d86-45bc-85e0-1a4d186d53ee"]
+
+    @pytest.mark.asyncio
+    async def test_send_falls_back_to_phone_when_no_uuid_found(self, monkeypatch):
+        adapter = _make_signal_adapter(monkeypatch)
+        adapter._stop_typing_indicator = AsyncMock()
+
+        captured = []
+
+        async def mock_rpc(method, params, rpc_id=None, **kwargs):
+            captured.append({"method": method, "params": dict(params)})
+            if method == "listContacts":
+                return []
+            if method == "send":
+                return {"timestamp": 1234567890}
+            return None
+
+        adapter._rpc = mock_rpc
+
+        result = await adapter.send(chat_id="+15551230000", content="hello")
+
+        assert result.success is True
+        assert captured[1]["params"]["recipient"] == ["+15551230000"]
+
+    @pytest.mark.asyncio
+    async def test_send_typing_uses_cached_uuid(self, monkeypatch):
+        adapter = _make_signal_adapter(monkeypatch)
+        adapter._remember_recipient_identifiers("+15551230000", "68680952-6d86-45bc-85e0-1a4d186d53ee")
+
+        captured = []
+
+        async def mock_rpc(method, params, rpc_id=None, **kwargs):
+            captured.append({"method": method, "params": dict(params), "rpc_id": rpc_id})
+            return {}
+
+        adapter._rpc = mock_rpc
+
+        await adapter.send_typing("+15551230000")
+
+        assert captured[0]["method"] == "sendTyping"
+        assert captured[0]["params"]["recipient"] == ["68680952-6d86-45bc-85e0-1a4d186d53ee"]
+
+
 # ---------------------------------------------------------------------------
 # send_voice method (#5105)
 # ---------------------------------------------------------------------------
diff --git a/tests/gateway/test_slack.py b/tests/gateway/test_slack.py
index 2a3060f678..cdd27364b7 100644
--- a/tests/gateway/test_slack.py
+++ b/tests/gateway/test_slack.py
@@ -150,6 +150,31 @@ class TestAppMentionHandler:
         assert "/hermes" in registered_commands
 
 
+class TestSlackConnectCleanup:
+    """Regression coverage for failed connect() cleanup."""
+
+    @pytest.mark.asyncio
+    async def test_releases_platform_lock_when_auth_fails(self):
+        config = PlatformConfig(enabled=True, token="xoxb-fake")
+        adapter = SlackAdapter(config)
+
+        mock_app = MagicMock()
+        mock_web_client = AsyncMock()
+        mock_web_client.auth_test = AsyncMock(side_effect=RuntimeError("boom"))
+
+        with patch.object(_slack_mod, "AsyncApp", return_value=mock_app), \
+             patch.object(_slack_mod, "AsyncWebClient", return_value=mock_web_client), \
+             patch.object(_slack_mod, "AsyncSocketModeHandler", return_value=MagicMock()), \
+             patch.dict(os.environ, {"SLACK_APP_TOKEN": "xapp-fake"}), \
+             patch("gateway.status.acquire_scoped_lock", return_value=(True, None)), \
+             patch("gateway.status.release_scoped_lock") as mock_release:
+            result = await adapter.connect()
+
+        assert result is False
+        mock_release.assert_called_once_with("slack-app-token", "xapp-fake")
+        assert adapter._platform_lock_identity is None
+
+
 # ---------------------------------------------------------------------------
 # TestSendDocument
 # ---------------------------------------------------------------------------
@@ -1006,7 +1031,7 @@ class TestReactions:
 
     @pytest.mark.asyncio
     async def test_reactions_in_message_flow(self, adapter):
-        """Reactions should be added on receipt and swapped on completion."""
+        """Reactions should be bracketed around actual processing via hooks."""
         adapter._app.client.reactions_add = AsyncMock()
         adapter._app.client.reactions_remove = AsyncMock()
         adapter._app.client.users_info = AsyncMock(return_value={
@@ -1022,15 +1047,147 @@ class TestReactions:
         }
         await adapter._handle_slack_message(event)
 
-        # Should have added 👀, then removed 👀, then added ✅
+        # _handle_slack_message should register the message for reactions
+        assert "1234567890.000001" in adapter._reacting_message_ids
+
+        # Simulate the base class calling on_processing_start
+        from gateway.platforms.base import MessageEvent, MessageType, SessionSource
+        from gateway.config import Platform
+        source = SessionSource(
+            platform=Platform.SLACK,
+            chat_id="C123",
+            chat_type="dm",
+            user_id="U_USER",
+        )
+        msg_event = MessageEvent(
+            text="hello",
+            message_type=MessageType.TEXT,
+            source=source,
+            message_id="1234567890.000001",
+        )
+        await adapter.on_processing_start(msg_event)
+
+        add_calls = adapter._app.client.reactions_add.call_args_list
+        assert len(add_calls) == 1
+        assert add_calls[0].kwargs["name"] == "eyes"
+
+        # Simulate the base class calling on_processing_complete
+        from gateway.platforms.base import ProcessingOutcome
+        await adapter.on_processing_complete(msg_event, ProcessingOutcome.SUCCESS)
+
         add_calls = adapter._app.client.reactions_add.call_args_list
         remove_calls = adapter._app.client.reactions_remove.call_args_list
         assert len(add_calls) == 2
-        assert add_calls[0].kwargs["name"] == "eyes"
         assert add_calls[1].kwargs["name"] == "white_check_mark"
         assert len(remove_calls) == 1
         assert remove_calls[0].kwargs["name"] == "eyes"
 
+        # Message ID should be cleaned up
+        assert "1234567890.000001" not in adapter._reacting_message_ids
+
+    @pytest.mark.asyncio
+    async def test_reactions_failure_outcome(self, adapter):
+        """Failed processing should add :x: instead of :white_check_mark:."""
+        adapter._app.client.reactions_add = AsyncMock()
+        adapter._app.client.reactions_remove = AsyncMock()
+
+        from gateway.platforms.base import MessageEvent, MessageType, SessionSource, ProcessingOutcome
+        from gateway.config import Platform
+        source = SessionSource(
+            platform=Platform.SLACK,
+            chat_id="C123",
+            chat_type="dm",
+            user_id="U_USER",
+        )
+        adapter._reacting_message_ids.add("1234567890.000002")
+        msg_event = MessageEvent(
+            text="hello",
+            message_type=MessageType.TEXT,
+            source=source,
+            message_id="1234567890.000002",
+        )
+        await adapter.on_processing_complete(msg_event, ProcessingOutcome.FAILURE)
+
+        add_calls = adapter._app.client.reactions_add.call_args_list
+        remove_calls = adapter._app.client.reactions_remove.call_args_list
+        assert len(add_calls) == 1
+        assert add_calls[0].kwargs["name"] == "x"
+        assert len(remove_calls) == 1
+        assert remove_calls[0].kwargs["name"] == "eyes"
+
+    @pytest.mark.asyncio
+    async def test_reactions_skipped_for_non_dm_non_mention(self, adapter):
+        """Non-DM, non-mention messages should not get reactions."""
+        adapter._app.client.reactions_add = AsyncMock()
+        adapter._app.client.reactions_remove = AsyncMock()
+        adapter._app.client.users_info = AsyncMock(return_value={
+            "user": {"profile": {"display_name": "Tyler"}}
+        })
+
+        event = {
+            "text": "hello",
+            "user": "U_USER",
+            "channel": "C123",
+            "channel_type": "channel",
+            "ts": "1234567890.000003",
+        }
+        await adapter._handle_slack_message(event)
+
+        # Should NOT register for reactions when not mentioned in a channel
+        assert "1234567890.000003" not in adapter._reacting_message_ids
+        adapter._app.client.reactions_add.assert_not_called()
+        adapter._app.client.reactions_remove.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_reactions_disabled_via_env(self, adapter, monkeypatch):
+        """SLACK_REACTIONS=false should suppress all reaction lifecycle."""
+        monkeypatch.setenv("SLACK_REACTIONS", "false")
+        adapter._app.client.reactions_add = AsyncMock()
+        adapter._app.client.reactions_remove = AsyncMock()
+        adapter._app.client.users_info = AsyncMock(return_value={
+            "user": {"profile": {"display_name": "Tyler"}}
+        })
+
+        event = {
+            "text": "hello",
+            "user": "U_USER",
+            "channel": "C123",
+            "channel_type": "im",
+            "ts": "1234567890.000004",
+        }
+        await adapter._handle_slack_message(event)
+
+        # Should NOT register for reactions when toggle is off
+        assert "1234567890.000004" not in adapter._reacting_message_ids
+
+        # Hooks should also be no-ops when disabled
+        from gateway.platforms.base import MessageEvent, MessageType, SessionSource, ProcessingOutcome
+        from gateway.config import Platform
+        source = SessionSource(
+            platform=Platform.SLACK,
+            chat_id="C123",
+            chat_type="dm",
+            user_id="U_USER",
+        )
+        msg_event = MessageEvent(
+            text="hello",
+            message_type=MessageType.TEXT,
+            source=source,
+            message_id="1234567890.000004",
+        )
+        # Force-add to verify hooks respect the toggle independently
+        adapter._reacting_message_ids.add("1234567890.000004")
+        await adapter.on_processing_start(msg_event)
+        await adapter.on_processing_complete(msg_event, ProcessingOutcome.SUCCESS)
+
+        adapter._app.client.reactions_add.assert_not_called()
+        adapter._app.client.reactions_remove.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_reactions_enabled_by_default(self, adapter):
+        """SLACK_REACTIONS defaults to true (matches existing behavior)."""
+        assert adapter._reactions_enabled() is True
+
 
 # ---------------------------------------------------------------------------
 # TestThreadReplyHandling
diff --git a/tests/gateway/test_status.py b/tests/gateway/test_status.py
index 04a0856f60..6c371cfbea 100644
--- a/tests/gateway/test_status.py
+++ b/tests/gateway/test_status.py
@@ -19,6 +19,30 @@ class TestGatewayPidState:
         assert isinstance(payload["argv"], list)
         assert payload["argv"]
 
+    def test_write_pid_file_is_atomic_against_concurrent_writers(self, tmp_path, monkeypatch):
+        """Regression: two concurrent --replace invocations must not both win.
+
+        Without O_CREAT|O_EXCL, two processes racing through start_gateway()'s
+        termination-wait would both write to gateway.pid, silently overwriting
+        each other and leaving multiple gateway instances alive (#11718).
+        """
+        import pytest
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+
+        # First write wins.
+        status.write_pid_file()
+        assert (tmp_path / "gateway.pid").exists()
+
+        # Second write (simulating a racing --replace that missed the earlier
+        # guards) must raise FileExistsError rather than clobber the record.
+        with pytest.raises(FileExistsError):
+            status.write_pid_file()
+
+        # Original record is preserved.
+        payload = json.loads((tmp_path / "gateway.pid").read_text())
+        assert payload["pid"] == os.getpid()
+
     def test_get_running_pid_rejects_live_non_gateway_pid(self, tmp_path, monkeypatch):
         monkeypatch.setenv("HERMES_HOME", str(tmp_path))
         pid_path = tmp_path / "gateway.pid"
diff --git a/tests/gateway/test_status_command.py b/tests/gateway/test_status_command.py
index c4a64f30ab..50e1c52cc2 100644
--- a/tests/gateway/test_status_command.py
+++ b/tests/gateway/test_status_command.py
@@ -50,6 +50,7 @@ def _make_runner(session_entry: SessionEntry):
     runner.session_store.rewrite_transcript = MagicMock()
     runner.session_store.update_session = MagicMock()
     runner._running_agents = {}
+    runner._session_run_generation = {}
     runner._pending_messages = {}
     runner._pending_approvals = {}
     runner._session_db = MagicMock()
@@ -223,6 +224,121 @@ async def test_handle_message_persists_agent_token_counts(monkeypatch):
     )
 
 
+@pytest.mark.asyncio
+async def test_handle_message_discards_stale_result_after_session_invalidation(monkeypatch):
+    import gateway.run as gateway_run
+
+    session_entry = SessionEntry(
+        session_key=build_session_key(_make_source()),
+        session_id="sess-1",
+        created_at=datetime.now(),
+        updated_at=datetime.now(),
+        platform=Platform.TELEGRAM,
+        chat_type="dm",
+    )
+    runner = _make_runner(session_entry)
+    runner.session_store.load_transcript.return_value = [{"role": "user", "content": "earlier"}]
+    session_key = session_entry.session_key
+    runner.adapters[Platform.TELEGRAM]._post_delivery_callbacks = {session_key: object()}
+
+    async def _stale_result(**kwargs):
+        runner._invalidate_session_run_generation(kwargs["session_key"], reason="test_stale_result")
+        return {
+            "final_response": "late reply",
+            "messages": [],
+            "tools": [],
+            "history_offset": 0,
+            "last_prompt_tokens": 80,
+            "input_tokens": 120,
+            "output_tokens": 45,
+            "model": "openai/test-model",
+        }
+
+    runner._run_agent = AsyncMock(side_effect=_stale_result)
+
+    monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"})
+    monkeypatch.setattr(
+        "agent.model_metadata.get_model_context_length",
+        lambda *_args, **_kwargs: 100000,
+    )
+
+    result = await runner._handle_message(_make_event("hello"))
+
+    assert result is None
+    runner.session_store.append_to_transcript.assert_not_called()
+    runner.session_store.update_session.assert_not_called()
+    assert session_key not in runner.adapters[Platform.TELEGRAM]._post_delivery_callbacks
+
+
+@pytest.mark.asyncio
+async def test_handle_message_stale_result_keeps_newer_generation_callback(monkeypatch):
+    import gateway.run as gateway_run
+
+    class _Adapter:
+        def __init__(self):
+            self._post_delivery_callbacks = {}
+
+        async def send(self, *args, **kwargs):
+            return None
+
+        def pop_post_delivery_callback(self, session_key, *, generation=None):
+            entry = self._post_delivery_callbacks.get(session_key)
+            if entry is None:
+                return None
+            if isinstance(entry, tuple):
+                entry_generation, callback = entry
+                if generation is not None and entry_generation != generation:
+                    return None
+                self._post_delivery_callbacks.pop(session_key, None)
+                return callback
+            if generation is not None:
+                return None
+            return self._post_delivery_callbacks.pop(session_key, None)
+
+    session_entry = SessionEntry(
+        session_key=build_session_key(_make_source()),
+        session_id="sess-1",
+        created_at=datetime.now(),
+        updated_at=datetime.now(),
+        platform=Platform.TELEGRAM,
+        chat_type="dm",
+    )
+    runner = _make_runner(session_entry)
+    runner.session_store.load_transcript.return_value = [{"role": "user", "content": "earlier"}]
+    session_key = session_entry.session_key
+    adapter = _Adapter()
+    runner.adapters[Platform.TELEGRAM] = adapter
+
+    async def _stale_result(**kwargs):
+        # Simulate a newer run claiming the callback slot before the stale run unwinds.
+        runner._session_run_generation[session_key] = 2
+        adapter._post_delivery_callbacks[session_key] = (2, lambda: None)
+        return {
+            "final_response": "late reply",
+            "messages": [],
+            "tools": [],
+            "history_offset": 0,
+            "last_prompt_tokens": 80,
+            "input_tokens": 120,
+            "output_tokens": 45,
+            "model": "openai/test-model",
+        }
+
+    runner._run_agent = AsyncMock(side_effect=_stale_result)
+
+    monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"})
+    monkeypatch.setattr(
+        "agent.model_metadata.get_model_context_length",
+        lambda *_args, **_kwargs: 100000,
+    )
+
+    result = await runner._handle_message(_make_event("hello"))
+
+    assert result is None
+    assert session_key in adapter._post_delivery_callbacks
+    assert adapter._post_delivery_callbacks[session_key][0] == 2
+
+
 
 @pytest.mark.asyncio
 async def test_status_command_bypasses_active_session_guard():
diff --git a/tests/gateway/test_stream_consumer.py b/tests/gateway/test_stream_consumer.py
index 99ac4dc188..7ae587dadd 100644
--- a/tests/gateway/test_stream_consumer.py
+++ b/tests/gateway/test_stream_consumer.py
@@ -133,6 +133,43 @@ class TestFinalizeCapabilityGate:
         assert picky.edit_message.call_args[1]["finalize"] is True
 
 
+class TestEditMessageFinalizeSignature:
+    """Every concrete platform adapter must accept the ``finalize`` kwarg.
+
+    stream_consumer._send_or_edit always passes ``finalize=`` to
+    ``adapter.edit_message(...)`` (see gateway/stream_consumer.py).  An
+    adapter that overrides edit_message without accepting finalize raises
+    TypeError the first time streaming hits a segment break or final edit.
+    Guard the contract with an explicit signature check so it cannot
+    silently regress — existing tests use MagicMock which swallows any
+    kwarg and cannot catch this.
+    """
+
+    @pytest.mark.parametrize(
+        "module_path,class_name",
+        [
+            ("gateway.platforms.telegram", "TelegramAdapter"),
+            ("gateway.platforms.discord", "DiscordAdapter"),
+            ("gateway.platforms.slack", "SlackAdapter"),
+            ("gateway.platforms.matrix", "MatrixAdapter"),
+            ("gateway.platforms.mattermost", "MattermostAdapter"),
+            ("gateway.platforms.feishu", "FeishuAdapter"),
+            ("gateway.platforms.whatsapp", "WhatsAppAdapter"),
+            ("gateway.platforms.dingtalk", "DingTalkAdapter"),
+        ],
+    )
+    def test_edit_message_accepts_finalize(self, module_path, class_name):
+        import inspect
+
+        module = pytest.importorskip(module_path)
+        cls = getattr(module, class_name)
+        params = inspect.signature(cls.edit_message).parameters
+        assert "finalize" in params, (
+            f"{class_name}.edit_message must accept 'finalize' kwarg; "
+            f"stream_consumer._send_or_edit passes it unconditionally"
+        )
+
+
 class TestSendOrEditMediaStripping:
     """Verify _send_or_edit strips MEDIA: before sending to the platform."""
 
@@ -502,11 +539,13 @@ class TestSegmentBreakOnToolBoundary:
 
     @pytest.mark.asyncio
     async def test_segment_break_clears_failed_edit_fallback_state(self):
-        """A tool boundary after edit failure must not duplicate the next segment."""
+        """A tool boundary after edit failure must flush the undelivered tail
+        without duplicating the prefix the user already saw (#8124)."""
         adapter = MagicMock()
         send_results = [
             SimpleNamespace(success=True, message_id="msg_1"),
             SimpleNamespace(success=True, message_id="msg_2"),
+            SimpleNamespace(success=True, message_id="msg_3"),
         ]
         adapter.send = AsyncMock(side_effect=send_results)
         adapter.edit_message = AsyncMock(return_value=SimpleNamespace(success=False, error="flood_control:6"))
@@ -526,7 +565,60 @@ class TestSegmentBreakOnToolBoundary:
         await task
 
         sent_texts = [call[1]["content"] for call in adapter.send.call_args_list]
-        assert sent_texts == ["Hello ▉", "Next segment"]
+        # The undelivered "world" tail must reach the user, and the next
+        # segment must not duplicate "Hello" that was already visible.
+        assert sent_texts == ["Hello ▉", "world", "Next segment"]
+
+    @pytest.mark.asyncio
+    async def test_segment_break_after_mid_stream_edit_failure_preserves_tail(self):
+        """Regression for #8124: when an earlier edit succeeded but later edits
+        fail (persistent flood control) and a tool boundary arrives before the
+        fallback threshold is reached, the pre-boundary tail must still be
+        delivered — not silently dropped by the segment reset."""
+        adapter = MagicMock()
+        # msg_1 for the initial partial, msg_2 for the flushed tail,
+        # msg_3 for the post-boundary segment.
+        send_results = [
+            SimpleNamespace(success=True, message_id="msg_1"),
+            SimpleNamespace(success=True, message_id="msg_2"),
+            SimpleNamespace(success=True, message_id="msg_3"),
+        ]
+        adapter.send = AsyncMock(side_effect=send_results)
+
+        # First two edits succeed, everything after fails with flood control
+        # — simulating Telegram's "edit once then get rate-limited" pattern.
+        edit_results = [
+            SimpleNamespace(success=True),   # "Hello world ▉"  — succeeds
+            SimpleNamespace(success=False, error="flood_control:6.0"),  # "Hello world more ▉" — flood triggered
+            SimpleNamespace(success=False, error="flood_control:6.0"),  # finalize edit at segment break
+            SimpleNamespace(success=False, error="flood_control:6.0"),  # cursor-strip attempt
+        ]
+        adapter.edit_message = AsyncMock(side_effect=edit_results + [edit_results[-1]] * 10)
+        adapter.MAX_MESSAGE_LENGTH = 4096
+
+        config = StreamConsumerConfig(edit_interval=0.01, buffer_threshold=5, cursor=" ▉")
+        consumer = GatewayStreamConsumer(adapter, "chat_123", config)
+
+        consumer.on_delta("Hello")
+        task = asyncio.create_task(consumer.run())
+        await asyncio.sleep(0.08)
+        consumer.on_delta(" world")
+        await asyncio.sleep(0.08)
+        consumer.on_delta(" more")
+        await asyncio.sleep(0.08)
+        consumer.on_delta(None)  # tool boundary
+        consumer.on_delta("Here is the tool result.")
+        consumer.finish()
+        await task
+
+        sent_texts = [call[1]["content"] for call in adapter.send.call_args_list]
+        # "more" must have been delivered, not dropped.
+        all_text = " ".join(sent_texts)
+        assert "more" in all_text, (
+            f"Pre-boundary tail 'more' was silently dropped: sends={sent_texts}"
+        )
+        # Post-boundary text must also reach the user.
+        assert "Here is the tool result." in all_text
 
     @pytest.mark.asyncio
     async def test_no_message_id_enters_fallback_mode(self):
@@ -1161,3 +1253,87 @@ class TestBufferOnlyMode:
         # text, the consumer may send then edit, or just send once at got_done.
         # The key assertion: this doesn't break.
         assert adapter.send.call_count >= 1
+
+
+# ── Cursor stripping on fallback (#7183) ────────────────────────────────────
+
+
+class TestCursorStrippingOnFallback:
+    """Regression: cursor must be stripped when fallback continuation is empty (#7183).
+
+    When _send_fallback_final is called with nothing new to deliver (the visible
+    partial already matches final_text), the last edit may still show the cursor
+    character because fallback mode was entered after a failed edit.  Before the
+    fix this would leave the message permanently frozen with a visible ▉.
+    """
+
+    @pytest.mark.asyncio
+    async def test_cursor_stripped_when_continuation_empty(self):
+        """_send_fallback_final must attempt a final edit to strip the cursor."""
+        adapter = MagicMock()
+        adapter.MAX_MESSAGE_LENGTH = 4096
+        adapter.edit_message = AsyncMock(
+            return_value=SimpleNamespace(success=True, message_id="msg-1")
+        )
+
+        consumer = GatewayStreamConsumer(
+            adapter, "chat-1",
+            config=StreamConsumerConfig(cursor=" ▉"),
+        )
+        consumer._message_id = "msg-1"
+        consumer._last_sent_text = "Hello world ▉"
+        consumer._fallback_final_send = False
+
+        await consumer._send_fallback_final("Hello world")
+
+        adapter.edit_message.assert_called_once()
+        call_args = adapter.edit_message.call_args
+        assert call_args.kwargs["content"] == "Hello world"
+        assert consumer._already_sent is True
+        # _last_sent_text should reflect the cleaned text after a successful strip
+        assert consumer._last_sent_text == "Hello world"
+
+    @pytest.mark.asyncio
+    async def test_cursor_not_stripped_when_no_cursor_configured(self):
+        """No edit attempted when cursor is not configured."""
+        adapter = MagicMock()
+        adapter.MAX_MESSAGE_LENGTH = 4096
+        adapter.edit_message = AsyncMock()
+
+        consumer = GatewayStreamConsumer(
+            adapter, "chat-1",
+            config=StreamConsumerConfig(cursor=""),
+        )
+        consumer._message_id = "msg-1"
+        consumer._last_sent_text = "Hello world"
+        consumer._fallback_final_send = False
+
+        await consumer._send_fallback_final("Hello world")
+
+        adapter.edit_message.assert_not_called()
+        assert consumer._already_sent is True
+
+    @pytest.mark.asyncio
+    async def test_cursor_strip_edit_failure_handled(self):
+        """If the cursor-stripping edit itself fails, it must not crash and
+        must not corrupt _last_sent_text."""
+        adapter = MagicMock()
+        adapter.MAX_MESSAGE_LENGTH = 4096
+        adapter.edit_message = AsyncMock(
+            return_value=SimpleNamespace(success=False, error="flood_control")
+        )
+
+        consumer = GatewayStreamConsumer(
+            adapter, "chat-1",
+            config=StreamConsumerConfig(cursor=" ▉"),
+        )
+        consumer._message_id = "msg-1"
+        consumer._last_sent_text = "Hello ▉"
+        consumer._fallback_final_send = False
+
+        await consumer._send_fallback_final("Hello")
+
+        # Should still set already_sent despite the cursor-strip edit failure
+        assert consumer._already_sent is True
+        # _last_sent_text must NOT be updated when the edit failed
+        assert consumer._last_sent_text == "Hello ▉"
diff --git a/tests/gateway/test_telegram_documents.py b/tests/gateway/test_telegram_documents.py
index 86e5cb30fb..d5564cbf46 100644
--- a/tests/gateway/test_telegram_documents.py
+++ b/tests/gateway/test_telegram_documents.py
@@ -23,6 +23,7 @@ from gateway.platforms.base import (
     MessageType,
     SendResult,
     SUPPORTED_DOCUMENT_TYPES,
+    SUPPORTED_VIDEO_TYPES,
 )
 
 
@@ -117,6 +118,12 @@ def _make_update(msg):
     return update
 
 
+def _make_video(file_obj=None):
+    video = MagicMock()
+    video.get_file = AsyncMock(return_value=file_obj or _make_file_obj(b"video-bytes"))
+    return video
+
+
 # ---------------------------------------------------------------------------
 # Fixtures
 # ---------------------------------------------------------------------------
@@ -132,10 +139,13 @@ def adapter():
 
 @pytest.fixture(autouse=True)
 def _redirect_cache(tmp_path, monkeypatch):
-    """Point document cache to tmp_path so tests don't touch ~/.hermes."""
+    """Point document/video cache to tmp_path so tests don't touch ~/.hermes."""
     monkeypatch.setattr(
         "gateway.platforms.base.DOCUMENT_CACHE_DIR", tmp_path / "doc_cache"
     )
+    monkeypatch.setattr(
+        "gateway.platforms.base.VIDEO_CACHE_DIR", tmp_path / "video_cache"
+    )
 
 
 # ---------------------------------------------------------------------------
@@ -348,6 +358,37 @@ class TestDocumentDownloadBlock:
         adapter.handle_message.assert_called_once()
 
 
+class TestVideoDownloadBlock:
+    @pytest.mark.asyncio
+    async def test_native_video_is_cached(self, adapter):
+        file_obj = _make_file_obj(b"fake-mp4")
+        file_obj.file_path = "videos/clip.mp4"
+        msg = _make_message()
+        msg.video = _make_video(file_obj)
+        update = _make_update(msg)
+
+        await adapter._handle_media_message(update, MagicMock())
+        event = adapter.handle_message.call_args[0][0]
+        assert event.message_type == MessageType.VIDEO
+        assert len(event.media_urls) == 1
+        assert os.path.exists(event.media_urls[0])
+        assert event.media_types == [SUPPORTED_VIDEO_TYPES[".mp4"]]
+
+    @pytest.mark.asyncio
+    async def test_mp4_document_is_treated_as_video(self, adapter):
+        file_obj = _make_file_obj(b"fake-mp4-doc")
+        doc = _make_document(file_name="good.mp4", mime_type="video/mp4", file_size=1024, file_obj=file_obj)
+        msg = _make_message(document=doc)
+        update = _make_update(msg)
+
+        await adapter._handle_media_message(update, MagicMock())
+        event = adapter.handle_message.call_args[0][0]
+        assert event.message_type == MessageType.VIDEO
+        assert len(event.media_urls) == 1
+        assert os.path.exists(event.media_urls[0])
+        assert event.media_types == [SUPPORTED_VIDEO_TYPES[".mp4"]]
+
+
 # ---------------------------------------------------------------------------
 # TestMediaGroups — media group (album) buffering
 # ---------------------------------------------------------------------------
@@ -483,6 +524,32 @@ class TestSendDocument:
         assert "not found" in result.error.lower()
         connected_adapter._bot.send_document.assert_not_called()
 
+    @pytest.mark.asyncio
+    async def test_send_document_workspace_path_has_docker_hint(self, connected_adapter):
+        """Container-local-looking paths get a more actionable Docker hint."""
+        result = await connected_adapter.send_document(
+            chat_id="12345",
+            file_path="/workspace/report.txt",
+        )
+
+        assert result.success is False
+        assert "docker sandbox" in result.error.lower()
+        assert "host-visible path" in result.error.lower()
+        connected_adapter._bot.send_document.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_send_document_outputs_path_has_docker_hint(self, connected_adapter):
+        """Legacy /outputs paths also get the Docker hint."""
+        result = await connected_adapter.send_document(
+            chat_id="12345",
+            file_path="/outputs/report.txt",
+        )
+
+        assert result.success is False
+        assert "docker sandbox" in result.error.lower()
+        assert "host-visible path" in result.error.lower()
+        connected_adapter._bot.send_document.assert_not_called()
+
     @pytest.mark.asyncio
     async def test_send_document_not_connected(self, adapter):
         """If bot is None, returns not connected error."""
@@ -665,6 +732,17 @@ class TestSendVideo:
         assert result.success is False
         assert "not found" in result.error.lower()
 
+    @pytest.mark.asyncio
+    async def test_send_video_workspace_path_has_docker_hint(self, connected_adapter):
+        result = await connected_adapter.send_video(
+            chat_id="12345",
+            video_path="/workspace/video.mp4",
+        )
+
+        assert result.success is False
+        assert "docker sandbox" in result.error.lower()
+        assert "host-visible path" in result.error.lower()
+
     @pytest.mark.asyncio
     async def test_send_video_not_connected(self, adapter):
         result = await adapter.send_video(
diff --git a/tests/gateway/test_telegram_group_gating.py b/tests/gateway/test_telegram_group_gating.py
index 15ffca9ec3..0381cf6f46 100644
--- a/tests/gateway/test_telegram_group_gating.py
+++ b/tests/gateway/test_telegram_group_gating.py
@@ -71,7 +71,17 @@ def test_group_messages_can_require_direct_trigger_via_config():
     assert adapter._should_process_message(_group_message("hello everyone")) is False
     assert adapter._should_process_message(_group_message("hi @hermes_bot", entities=[_mention_entity("hi @hermes_bot")])) is True
     assert adapter._should_process_message(_group_message("replying", reply_to_bot=True)) is True
-    assert adapter._should_process_message(_group_message("/status"), is_command=True) is True
+    # Commands must also respect require_mention when it is enabled
+    assert adapter._should_process_message(_group_message("/status"), is_command=True) is False
+    # But commands with @mention still pass (Telegram emits a MENTION entity
+    # for /cmd@botname — the bot menu and python-telegram-bot's CommandHandler
+    # rely on this same mechanism)
+    assert adapter._should_process_message(
+        _group_message("/status@hermes_bot", entities=[_mention_entity("/status@hermes_bot")])
+    ) is True
+    # And commands still pass unconditionally when require_mention is disabled
+    adapter_no_mention = _make_adapter(require_mention=False)
+    assert adapter_no_mention._should_process_message(_group_message("/status"), is_command=True) is True
 
 
 def test_free_response_chats_bypass_mention_requirement():
diff --git a/tests/gateway/test_telegram_mention_boundaries.py b/tests/gateway/test_telegram_mention_boundaries.py
new file mode 100644
index 0000000000..2a203857ef
--- /dev/null
+++ b/tests/gateway/test_telegram_mention_boundaries.py
@@ -0,0 +1,185 @@
+"""Tests for Telegram bot mention detection (bug #12545).
+
+The old implementation used a naive substring check
+(`f"@{bot_username}" in text.lower()`), which incorrectly matched partial
+substrings like 'foo@hermes_bot.example'.
+
+Detection now relies entirely on the MessageEntity objects Telegram's server
+emits for real mentions. A bare `@username` substring in message text without
+a corresponding `MENTION` entity is NOT a mention — this correctly ignores
+@handles that appear inside URLs, code blocks, email-like strings, or quoted
+text, because Telegram's parser does not emit mention entities for any of
+those contexts.
+"""
+from types import SimpleNamespace
+
+from gateway.config import Platform, PlatformConfig
+from gateway.platforms.telegram import TelegramAdapter
+
+
+def _make_adapter():
+    adapter = object.__new__(TelegramAdapter)
+    adapter.platform = Platform.TELEGRAM
+    adapter.config = PlatformConfig(enabled=True, token="***", extra={})
+    adapter._bot = SimpleNamespace(id=999, username="hermes_bot")
+    return adapter
+
+
+def _mention_entity(text, mention="@hermes_bot"):
+    """Build a MENTION entity pointing at a literal `@username` in `text`."""
+    offset = text.index(mention)
+    return SimpleNamespace(type="mention", offset=offset, length=len(mention))
+
+
+def _text_mention_entity(offset, length, user_id):
+    """Build a TEXT_MENTION entity (used when the target user has no public @handle)."""
+    return SimpleNamespace(
+        type="text_mention",
+        offset=offset,
+        length=length,
+        user=SimpleNamespace(id=user_id),
+    )
+
+
+def _message(text=None, caption=None, entities=None, caption_entities=None):
+    return SimpleNamespace(
+        text=text,
+        caption=caption,
+        entities=entities or [],
+        caption_entities=caption_entities or [],
+        message_thread_id=None,
+        chat=SimpleNamespace(id=-100, type="group"),
+        reply_to_message=None,
+    )
+
+
+class TestRealMentionsAreDetected:
+    """A real Telegram mention always comes with a MENTION entity — detect those."""
+
+    def test_mention_at_start_of_message(self):
+        adapter = _make_adapter()
+        text = "@hermes_bot hello world"
+        msg = _message(text=text, entities=[_mention_entity(text)])
+        assert adapter._message_mentions_bot(msg) is True
+
+    def test_mention_mid_sentence(self):
+        adapter = _make_adapter()
+        text = "hey @hermes_bot, can you help?"
+        msg = _message(text=text, entities=[_mention_entity(text)])
+        assert adapter._message_mentions_bot(msg) is True
+
+    def test_mention_at_end_of_message(self):
+        adapter = _make_adapter()
+        text = "thanks for looking @hermes_bot"
+        msg = _message(text=text, entities=[_mention_entity(text)])
+        assert adapter._message_mentions_bot(msg) is True
+
+    def test_mention_in_caption(self):
+        adapter = _make_adapter()
+        caption = "photo for @hermes_bot"
+        msg = _message(caption=caption, caption_entities=[_mention_entity(caption)])
+        assert adapter._message_mentions_bot(msg) is True
+
+    def test_text_mention_entity_targets_bot(self):
+        """TEXT_MENTION is Telegram's entity type for @FirstName -> user without a public handle."""
+        adapter = _make_adapter()
+        msg = _message(text="hey you", entities=[_text_mention_entity(4, 3, user_id=999)])
+        assert adapter._message_mentions_bot(msg) is True
+
+
+class TestSubstringFalsePositivesAreRejected:
+    """Bare `@bot_username` substrings without a MENTION entity must NOT match.
+
+    These are all inputs where the OLD substring check returned True incorrectly.
+    A word-boundary regex would still over-match some of these (code blocks,
+    URLs). Entity-based detection handles them all correctly because Telegram's
+    parser does not emit mention entities for non-mention contexts.
+    """
+
+    def test_email_like_substring(self):
+        """bug #12545 exact repro: 'foo@hermes_bot.example'."""
+        adapter = _make_adapter()
+        msg = _message(text="email me at foo@hermes_bot.example")
+        assert adapter._message_mentions_bot(msg) is False
+
+    def test_hostname_substring(self):
+        adapter = _make_adapter()
+        msg = _message(text="contact user@hermes_bot.domain.com")
+        assert adapter._message_mentions_bot(msg) is False
+
+    def test_superstring_username(self):
+        """`@hermes_botx` is a different username; Telegram would emit a mention
+        entity for `@hermes_botx`, not `@hermes_bot`."""
+        adapter = _make_adapter()
+        msg = _message(text="@hermes_botx hello")
+        assert adapter._message_mentions_bot(msg) is False
+
+    def test_underscore_suffix_substring(self):
+        adapter = _make_adapter()
+        msg = _message(text="see @hermes_bot_admin for help")
+        assert adapter._message_mentions_bot(msg) is False
+
+    def test_substring_inside_url_without_entity(self):
+        """@handle inside a URL produces a URL entity, not a MENTION entity."""
+        adapter = _make_adapter()
+        msg = _message(text="see https://example.com/@hermes_bot for details")
+        assert adapter._message_mentions_bot(msg) is False
+
+    def test_substring_inside_code_block_without_entity(self):
+        """Telegram doesn't emit mention entities inside code/pre entities."""
+        adapter = _make_adapter()
+        msg = _message(text="use the string `@hermes_bot` in config")
+        assert adapter._message_mentions_bot(msg) is False
+
+    def test_plain_text_with_no_at_sign(self):
+        adapter = _make_adapter()
+        msg = _message(text="just a normal group message")
+        assert adapter._message_mentions_bot(msg) is False
+
+    def test_email_substring_in_caption(self):
+        adapter = _make_adapter()
+        msg = _message(caption="foo@hermes_bot.example")
+        assert adapter._message_mentions_bot(msg) is False
+
+
+class TestEntityEdgeCases:
+    """Malformed or mismatched entities should not crash or over-match."""
+
+    def test_mention_entity_for_different_username(self):
+        adapter = _make_adapter()
+        text = "@someone_else hi"
+        msg = _message(text=text, entities=[_mention_entity(text, mention="@someone_else")])
+        assert adapter._message_mentions_bot(msg) is False
+
+    def test_text_mention_entity_for_different_user(self):
+        adapter = _make_adapter()
+        msg = _message(text="hi there", entities=[_text_mention_entity(0, 2, user_id=12345)])
+        assert adapter._message_mentions_bot(msg) is False
+
+    def test_malformed_entity_with_negative_offset(self):
+        adapter = _make_adapter()
+        msg = _message(text="@hermes_bot hi",
+                       entities=[SimpleNamespace(type="mention", offset=-1, length=11)])
+        assert adapter._message_mentions_bot(msg) is False
+
+    def test_malformed_entity_with_zero_length(self):
+        adapter = _make_adapter()
+        msg = _message(text="@hermes_bot hi",
+                       entities=[SimpleNamespace(type="mention", offset=0, length=0)])
+        assert adapter._message_mentions_bot(msg) is False
+
+
+class TestCaseInsensitivity:
+    """Telegram usernames are case-insensitive; the slice-compare normalizes both sides."""
+
+    def test_uppercase_mention(self):
+        adapter = _make_adapter()
+        text = "hi @HERMES_BOT"
+        msg = _message(text=text, entities=[_mention_entity(text, mention="@HERMES_BOT")])
+        assert adapter._message_mentions_bot(msg) is True
+
+    def test_mixed_case_mention(self):
+        adapter = _make_adapter()
+        text = "hi @Hermes_Bot"
+        msg = _message(text=text, entities=[_mention_entity(text, mention="@Hermes_Bot")])
+        assert adapter._message_mentions_bot(msg) is True
diff --git a/tests/gateway/test_telegram_webhook_secret.py b/tests/gateway/test_telegram_webhook_secret.py
new file mode 100644
index 0000000000..0f1e786367
--- /dev/null
+++ b/tests/gateway/test_telegram_webhook_secret.py
@@ -0,0 +1,100 @@
+"""Tests for GHSA-3vpc-7q5r-276h — Telegram webhook secret required.
+
+Previously, when TELEGRAM_WEBHOOK_URL was set but TELEGRAM_WEBHOOK_SECRET
+was not, python-telegram-bot received secret_token=None and the webhook
+endpoint accepted any HTTP POST.
+
+The fix refuses to start the adapter in webhook mode without the secret.
+"""
+
+from __future__ import annotations
+
+import re
+import sys
+from pathlib import Path
+
+import pytest
+
+_repo = str(Path(__file__).resolve().parents[2])
+if _repo not in sys.path:
+    sys.path.insert(0, _repo)
+
+
+class TestTelegramWebhookSecretRequired:
+    """Direct source-level check of the webhook-secret guard.
+
+    The guard is embedded in TelegramAdapter.connect() and hard to isolate
+    via mocks (requires a full python-telegram-bot ApplicationBuilder
+    chain). These tests exercise it via source inspection — verifying the
+    check exists, raises RuntimeError with the advisory link, and only
+    fires in webhook mode. End-to-end validation is covered by CI +
+    manual deployment tests.
+    """
+
+    def _get_source(self) -> str:
+        path = Path(_repo) / "gateway" / "platforms" / "telegram.py"
+        return path.read_text(encoding="utf-8")
+
+    def test_webhook_branch_checks_secret(self):
+        """The webhook-mode branch of connect() must read
+        TELEGRAM_WEBHOOK_SECRET and refuse when empty."""
+        src = self._get_source()
+        # The guard must appear after TELEGRAM_WEBHOOK_URL is set
+        assert re.search(
+            r'TELEGRAM_WEBHOOK_SECRET.*?\.strip\(\)\s*\n\s*if not webhook_secret:',
+            src, re.DOTALL,
+        ), (
+            "TelegramAdapter.connect() must strip TELEGRAM_WEBHOOK_SECRET "
+            "and raise when the secret is empty — see GHSA-3vpc-7q5r-276h"
+        )
+
+    def test_guard_raises_runtime_error(self):
+        """The guard raises RuntimeError (not a silent log) so operators
+        see the failure at startup."""
+        src = self._get_source()
+        # Between the "if not webhook_secret:" line and the next blank
+        # line block, we should see a RuntimeError being raised
+        guard_match = re.search(
+            r'if not webhook_secret:\s*\n\s*raise\s+RuntimeError\(',
+            src,
+        )
+        assert guard_match, (
+            "Missing webhook secret must raise RuntimeError — silent "
+            "fall-through was the original GHSA-3vpc-7q5r-276h bypass"
+        )
+
+    def test_guard_message_includes_advisory_link(self):
+        """The RuntimeError message should reference the advisory so
+        operators can read the full context."""
+        src = self._get_source()
+        assert "GHSA-3vpc-7q5r-276h" in src, (
+            "Guard error message must cite the advisory for operator context"
+        )
+
+    def test_guard_message_explains_remediation(self):
+        """The error should tell the operator how to fix it."""
+        src = self._get_source()
+        # Should mention how to generate a secret
+        assert "openssl rand" in src or "TELEGRAM_WEBHOOK_SECRET=" in src, (
+            "Guard error message should show operators how to set "
+            "TELEGRAM_WEBHOOK_SECRET"
+        )
+
+    def test_polling_branch_has_no_secret_guard(self):
+        """Polling mode (else-branch) must NOT require the webhook secret —
+        polling authenticates via the bot token, not a webhook secret."""
+        src = self._get_source()
+        # The guard should appear inside the `if webhook_url:` branch,
+        # not the `else:` polling branch. Rough check: the raise is
+        # followed (within ~60 lines) by an `else:` that starts the
+        # polling branch, and there's no secret-check in that polling
+        # branch.
+        webhook_block = re.search(
+            r'if webhook_url:\s*\n(.*?)\n            else:\s*\n(.*?)\n',
+            src, re.DOTALL,
+        )
+        if webhook_block:
+            webhook_body = webhook_block.group(1)
+            polling_body = webhook_block.group(2)
+            assert "TELEGRAM_WEBHOOK_SECRET" in webhook_body
+            assert "TELEGRAM_WEBHOOK_SECRET" not in polling_body
diff --git a/tests/gateway/test_text_batching.py b/tests/gateway/test_text_batching.py
index 56bc602ef0..1ad89ffd05 100644
--- a/tests/gateway/test_text_batching.py
+++ b/tests/gateway/test_text_batching.py
@@ -148,6 +148,70 @@ class TestDiscordTextBatching:
         await asyncio.sleep(0.25)
         adapter.handle_message.assert_called_once()
 
+    @pytest.mark.asyncio
+    async def test_shield_protects_handle_message_from_cancel(self):
+        """Regression guard: a follow-up chunk arriving while
+        handle_message is mid-flight must NOT cancel the running
+        dispatch.  _enqueue_text_event fires prior_task.cancel() on
+        every new chunk; without asyncio.shield around handle_message
+        the cancel propagates into the agent's streaming request and
+        aborts the response.
+        """
+        adapter = _make_discord_adapter()
+
+        handle_started = asyncio.Event()
+        release_handle = asyncio.Event()
+        first_handle_cancelled = asyncio.Event()
+        first_handle_completed = asyncio.Event()
+        call_count = [0]
+
+        async def slow_handle(event):
+            call_count[0] += 1
+            # Only the first call (batch 1) is the one we're protecting.
+            if call_count[0] == 1:
+                handle_started.set()
+                try:
+                    await release_handle.wait()
+                    first_handle_completed.set()
+                except asyncio.CancelledError:
+                    first_handle_cancelled.set()
+                    raise
+            # Second call (batch 2) returns immediately — not the subject
+            # of this test.
+
+        adapter.handle_message = slow_handle
+
+        # Prime batch 1 and wait for it to land inside handle_message.
+        adapter._enqueue_text_event(_make_event("batch 1", Platform.DISCORD))
+        await asyncio.wait_for(handle_started.wait(), timeout=1.0)
+
+        # A new chunk arrives — _enqueue_text_event fires
+        # prior_task.cancel() on batch 1's flush task, which is
+        # currently awaiting inside handle_message.
+        adapter._enqueue_text_event(_make_event("batch 2 follow-up", Platform.DISCORD))
+
+        # Let the cancel propagate.
+        await asyncio.sleep(0.05)
+
+        # CRITICAL ASSERTION: batch 1's handle_message must NOT have
+        # been cancelled.  Without asyncio.shield this assertion fails
+        # because CancelledError propagates from the flush task's
+        # `await self.handle_message(event)` into slow_handle.
+        assert not first_handle_cancelled.is_set(), (
+            "handle_message for batch 1 was cancelled by a follow-up "
+            "chunk — asyncio.shield is missing or broken"
+        )
+
+        # Release batch 1's handle_message and let it complete.
+        release_handle.set()
+        await asyncio.wait_for(first_handle_completed.wait(), timeout=1.0)
+        assert first_handle_completed.is_set()
+
+        # Cleanup
+        for task in list(adapter._pending_text_batch_tasks.values()):
+            task.cancel()
+        await asyncio.sleep(0.01)
+
 
 # =====================================================================
 # Matrix text batching
diff --git a/tests/gateway/test_unauthorized_dm_behavior.py b/tests/gateway/test_unauthorized_dm_behavior.py
index 627723915a..98e71442bb 100644
--- a/tests/gateway/test_unauthorized_dm_behavior.py
+++ b/tests/gateway/test_unauthorized_dm_behavior.py
@@ -63,6 +63,12 @@ def _make_runner(platform: Platform, config: GatewayConfig):
     runner.pairing_store = MagicMock()
     runner.pairing_store.is_approved.return_value = False
     runner.pairing_store._is_rate_limited.return_value = False
+    # Attributes required by _handle_message for the authorized-user path
+    runner._running_agents = {}
+    runner._running_agents_ts = {}
+    runner._update_prompts = {}
+    runner.hooks = SimpleNamespace(dispatch=AsyncMock(return_value=None))
+    runner._sessions = {}
     return runner, adapter
 
 
@@ -295,3 +301,172 @@ async def test_global_ignore_suppresses_pairing_reply(monkeypatch):
     assert result is None
     runner.pairing_store.generate_code.assert_not_called()
     adapter.send.assert_not_awaited()
+
+
+# ---------------------------------------------------------------------------
+# Allowlist-configured platforms default to "ignore" for unauthorized users
+# (#9337: Signal gateway sends pairing spam when allowlist is configured)
+# ---------------------------------------------------------------------------
+
+@pytest.mark.asyncio
+async def test_signal_with_allowlist_ignores_unauthorized_dm(monkeypatch):
+    """When SIGNAL_ALLOWED_USERS is set, unauthorized DMs are silently dropped.
+
+    This is the primary regression test for #9337: before the fix, Signal
+    would send pairing codes to ANY sender even when a strict allowlist was
+    configured, spamming personal contacts with cryptic bot messages.
+    """
+    _clear_auth_env(monkeypatch)
+    monkeypatch.setenv("SIGNAL_ALLOWED_USERS", "+15550000001")  # allowlist set
+
+    config = GatewayConfig(
+        platforms={Platform.SIGNAL: PlatformConfig(enabled=True)},
+    )
+    runner, adapter = _make_runner(Platform.SIGNAL, config)
+
+    result = await runner._handle_message(
+        _make_event(Platform.SIGNAL, "+15559999999", "+15559999999")  # not in allowlist
+    )
+
+    assert result is None
+    runner.pairing_store.generate_code.assert_not_called()
+    adapter.send.assert_not_awaited()
+
+
+@pytest.mark.asyncio
+async def test_telegram_with_allowlist_ignores_unauthorized_dm(monkeypatch):
+    """Same behavior for Telegram: allowlist ⟹ ignore unauthorized DMs."""
+    _clear_auth_env(monkeypatch)
+    monkeypatch.setenv("TELEGRAM_ALLOWED_USERS", "111111111")
+
+    config = GatewayConfig(
+        platforms={Platform.TELEGRAM: PlatformConfig(enabled=True)},
+    )
+    runner, adapter = _make_runner(Platform.TELEGRAM, config)
+
+    result = await runner._handle_message(
+        _make_event(Platform.TELEGRAM, "999999999", "999999999")
+    )
+
+    assert result is None
+    runner.pairing_store.generate_code.assert_not_called()
+    adapter.send.assert_not_awaited()
+
+
+@pytest.mark.asyncio
+async def test_global_allowlist_ignores_unauthorized_dm(monkeypatch):
+    """GATEWAY_ALLOWED_USERS also triggers the 'ignore' behavior."""
+    _clear_auth_env(monkeypatch)
+    monkeypatch.setenv("GATEWAY_ALLOWED_USERS", "111111111")
+
+    config = GatewayConfig(
+        platforms={Platform.SIGNAL: PlatformConfig(enabled=True)},
+    )
+    runner, adapter = _make_runner(Platform.SIGNAL, config)
+
+    result = await runner._handle_message(
+        _make_event(Platform.SIGNAL, "+15559999999", "+15559999999")
+    )
+
+    assert result is None
+    runner.pairing_store.generate_code.assert_not_called()
+    adapter.send.assert_not_awaited()
+
+
+@pytest.mark.asyncio
+async def test_no_allowlist_still_pairs_by_default(monkeypatch):
+    """Without any allowlist, pairing behavior is preserved (open gateway)."""
+    _clear_auth_env(monkeypatch)
+    # No SIGNAL_ALLOWED_USERS, no GATEWAY_ALLOWED_USERS
+
+    config = GatewayConfig(
+        platforms={Platform.SIGNAL: PlatformConfig(enabled=True)},
+    )
+    runner, adapter = _make_runner(Platform.SIGNAL, config)
+    runner.pairing_store.generate_code.return_value = "PAIR1234"
+
+    result = await runner._handle_message(
+        _make_event(Platform.SIGNAL, "+15559999999", "+15559999999")
+    )
+
+    assert result is None
+    runner.pairing_store.generate_code.assert_called_once()
+    adapter.send.assert_awaited_once()
+    assert "PAIR1234" in adapter.send.await_args.args[1]
+
+
+def test_explicit_pair_config_overrides_allowlist_default(monkeypatch):
+    """Explicit unauthorized_dm_behavior='pair' overrides the allowlist default.
+
+    Operators can opt back in to pairing even with an allowlist by setting
+    unauthorized_dm_behavior: pair in their platform config.  We test the
+    _get_unauthorized_dm_behavior resolver directly to avoid the full
+    _handle_message pipeline which requires extensive runner state.
+    """
+    _clear_auth_env(monkeypatch)
+    monkeypatch.setenv("SIGNAL_ALLOWED_USERS", "+15550000001")
+
+    config = GatewayConfig(
+        platforms={
+            Platform.SIGNAL: PlatformConfig(
+                enabled=True,
+                extra={"unauthorized_dm_behavior": "pair"},  # explicit override
+            ),
+        },
+    )
+    runner, _adapter = _make_runner(Platform.SIGNAL, config)
+
+    # The per-platform explicit config should beat the allowlist-derived default
+    behavior = runner._get_unauthorized_dm_behavior(Platform.SIGNAL)
+    assert behavior == "pair"
+
+
+def test_allowlist_authorized_user_returns_ignore_for_unauthorized(monkeypatch):
+    """_get_unauthorized_dm_behavior returns 'ignore' when allowlist is set.
+
+    We test the resolver directly.  The full _handle_message path for
+    authorized users is covered by the integration tests in this module.
+    """
+    _clear_auth_env(monkeypatch)
+    monkeypatch.setenv("SIGNAL_ALLOWED_USERS", "+15550000001")
+
+    config = GatewayConfig(
+        platforms={Platform.SIGNAL: PlatformConfig(enabled=True)},
+    )
+    runner, _adapter = _make_runner(Platform.SIGNAL, config)
+
+    behavior = runner._get_unauthorized_dm_behavior(Platform.SIGNAL)
+    assert behavior == "ignore"
+
+
+def test_get_unauthorized_dm_behavior_no_allowlist_returns_pair(monkeypatch):
+    """Without any allowlist, 'pair' is still the default."""
+    _clear_auth_env(monkeypatch)
+
+    config = GatewayConfig(
+        platforms={Platform.SIGNAL: PlatformConfig(enabled=True)},
+    )
+    runner, _adapter = _make_runner(Platform.SIGNAL, config)
+
+    behavior = runner._get_unauthorized_dm_behavior(Platform.SIGNAL)
+    assert behavior == "pair"
+
+
+def test_qqbot_with_allowlist_ignores_unauthorized_dm(monkeypatch):
+    """QQBOT is included in the allowlist-aware default (QQ_ALLOWED_USERS).
+
+    Regression guard: the initial #9337 fix omitted QQBOT from the env map
+    inside _get_unauthorized_dm_behavior, even though _is_user_authorized
+    mapped it to QQ_ALLOWED_USERS.  Without QQBOT here, a QQ operator with a
+    strict user allowlist would still get pairing codes sent to strangers.
+    """
+    _clear_auth_env(monkeypatch)
+    monkeypatch.setenv("QQ_ALLOWED_USERS", "allowed-openid-1")
+
+    config = GatewayConfig(
+        platforms={Platform.QQBOT: PlatformConfig(enabled=True)},
+    )
+    runner, _adapter = _make_runner(Platform.QQBOT, config)
+
+    behavior = runner._get_unauthorized_dm_behavior(Platform.QQBOT)
+    assert behavior == "ignore"
diff --git a/tests/gateway/test_usage_command.py b/tests/gateway/test_usage_command.py
index 2915810891..feced75b25 100644
--- a/tests/gateway/test_usage_command.py
+++ b/tests/gateway/test_usage_command.py
@@ -175,3 +175,79 @@ class TestUsageCachedAgent:
             result = await runner._handle_usage_command(event)
 
         assert "Cost: included" in result
+
+
+class TestUsageAccountSection:
+    """Account-limits section appended to /usage output (PR #2486)."""
+
+    @pytest.mark.asyncio
+    async def test_usage_command_includes_account_section(self, monkeypatch):
+        agent = _make_mock_agent(provider="openai-codex")
+        agent.base_url = "https://chatgpt.com/backend-api/codex"
+        agent.api_key = "unused"
+        runner = _make_runner(SK, cached_agent=agent)
+        event = MagicMock()
+
+        monkeypatch.setattr(
+            "gateway.run.fetch_account_usage",
+            lambda provider, base_url=None, api_key=None: object(),
+        )
+        monkeypatch.setattr(
+            "gateway.run.render_account_usage_lines",
+            lambda snapshot, markdown=False: [
+                "📈 **Account limits**",
+                "Provider: openai-codex (Pro)",
+                "Session: 85% remaining (15% used)",
+            ],
+        )
+        with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"), \
+             patch("agent.usage_pricing.estimate_usage_cost") as mock_cost:
+            mock_cost.return_value = MagicMock(amount_usd=None, status="included")
+            result = await runner._handle_usage_command(event)
+
+        assert "📊 **Session Token Usage**" in result
+        assert "📈 **Account limits**" in result
+        assert "Provider: openai-codex (Pro)" in result
+
+    @pytest.mark.asyncio
+    async def test_usage_command_uses_persisted_provider_when_agent_not_running(self, monkeypatch):
+        runner = _make_runner(SK)
+        runner._session_db = MagicMock()
+        runner._session_db.get_session.return_value = {
+            "billing_provider": "openai-codex",
+            "billing_base_url": "https://chatgpt.com/backend-api/codex",
+        }
+        session_entry = MagicMock()
+        session_entry.session_id = "sess-1"
+        runner.session_store.get_or_create_session.return_value = session_entry
+        runner.session_store.load_transcript.return_value = [
+            {"role": "user", "content": "earlier"},
+        ]
+
+        calls = {}
+
+        async def _fake_to_thread(fn, *args, **kwargs):
+            calls["args"] = args
+            calls["kwargs"] = kwargs
+            return fn(*args, **kwargs)
+
+        monkeypatch.setattr("gateway.run.asyncio.to_thread", _fake_to_thread)
+        monkeypatch.setattr(
+            "gateway.run.fetch_account_usage",
+            lambda provider, base_url=None, api_key=None: object(),
+        )
+        monkeypatch.setattr(
+            "gateway.run.render_account_usage_lines",
+            lambda snapshot, markdown=False: [
+                "📈 **Account limits**",
+                "Provider: openai-codex (Pro)",
+            ],
+        )
+
+        event = MagicMock()
+        result = await runner._handle_usage_command(event)
+
+        assert calls["args"] == ("openai-codex",)
+        assert calls["kwargs"]["base_url"] == "https://chatgpt.com/backend-api/codex"
+        assert "📊 **Session Info**" in result
+        assert "📈 **Account limits**" in result
diff --git a/tests/gateway/test_voice_command.py b/tests/gateway/test_voice_command.py
index f25fb972e4..ed36b976e5 100644
--- a/tests/gateway/test_voice_command.py
+++ b/tests/gateway/test_voice_command.py
@@ -99,22 +99,22 @@ class TestHandleVoiceCommand:
         event = _make_event("/voice on")
         result = await runner._handle_voice_command(event)
         assert "enabled" in result.lower()
-        assert runner._voice_mode["123"] == "voice_only"
+        assert runner._voice_mode["telegram:123"] == "voice_only"
 
     @pytest.mark.asyncio
     async def test_voice_off(self, runner):
-        runner._voice_mode["123"] = "voice_only"
+        runner._voice_mode["telegram:123"] = "voice_only"
         event = _make_event("/voice off")
         result = await runner._handle_voice_command(event)
         assert "disabled" in result.lower()
-        assert runner._voice_mode["123"] == "off"
+        assert runner._voice_mode["telegram:123"] == "off"
 
     @pytest.mark.asyncio
     async def test_voice_tts(self, runner):
         event = _make_event("/voice tts")
         result = await runner._handle_voice_command(event)
         assert "tts" in result.lower()
-        assert runner._voice_mode["123"] == "all"
+        assert runner._voice_mode["telegram:123"] == "all"
 
     @pytest.mark.asyncio
     async def test_voice_status_off(self, runner):
@@ -124,7 +124,7 @@ class TestHandleVoiceCommand:
 
     @pytest.mark.asyncio
     async def test_voice_status_on(self, runner):
-        runner._voice_mode["123"] = "voice_only"
+        runner._voice_mode["telegram:123"] = "voice_only"
         event = _make_event("/voice status")
         result = await runner._handle_voice_command(event)
         assert "voice reply" in result.lower()
@@ -134,15 +134,15 @@ class TestHandleVoiceCommand:
         event = _make_event("/voice")
         result = await runner._handle_voice_command(event)
         assert "enabled" in result.lower()
-        assert runner._voice_mode["123"] == "voice_only"
+        assert runner._voice_mode["telegram:123"] == "voice_only"
 
     @pytest.mark.asyncio
     async def test_toggle_on_to_off(self, runner):
-        runner._voice_mode["123"] = "voice_only"
+        runner._voice_mode["telegram:123"] = "voice_only"
         event = _make_event("/voice")
         result = await runner._handle_voice_command(event)
         assert "disabled" in result.lower()
-        assert runner._voice_mode["123"] == "off"
+        assert runner._voice_mode["telegram:123"] == "off"
 
     @pytest.mark.asyncio
     async def test_persistence_saved(self, runner):
@@ -150,39 +150,47 @@ class TestHandleVoiceCommand:
         await runner._handle_voice_command(event)
         assert runner._VOICE_MODE_PATH.exists()
         data = json.loads(runner._VOICE_MODE_PATH.read_text())
-        assert data["123"] == "voice_only"
+        assert data["telegram:123"] == "voice_only"
 
     @pytest.mark.asyncio
     async def test_persistence_loaded(self, runner):
-        runner._VOICE_MODE_PATH.write_text(json.dumps({"456": "all"}))
+        runner._VOICE_MODE_PATH.write_text(json.dumps({"telegram:456": "all"}))
         loaded = runner._load_voice_modes()
-        assert loaded == {"456": "all"}
+        assert loaded == {"telegram:456": "all"}
 
     @pytest.mark.asyncio
     async def test_persistence_saved_for_off(self, runner):
         event = _make_event("/voice off")
         await runner._handle_voice_command(event)
         data = json.loads(runner._VOICE_MODE_PATH.read_text())
-        assert data["123"] == "off"
+        assert data["telegram:123"] == "off"
 
     def test_sync_voice_mode_state_to_adapter_restores_off_chats(self, runner):
-        runner._voice_mode = {"123": "off", "456": "all"}
-        adapter = SimpleNamespace(_auto_tts_disabled_chats=set())
+        from gateway.config import Platform
+        runner._voice_mode = {"telegram:123": "off", "telegram:456": "all"}
+        adapter = SimpleNamespace(
+            _auto_tts_disabled_chats=set(),
+            platform=Platform.TELEGRAM,
+        )
 
         runner._sync_voice_mode_state_to_adapter(adapter)
 
         assert adapter._auto_tts_disabled_chats == {"123"}
 
     def test_restart_restores_voice_off_state(self, runner, tmp_path):
-        runner._VOICE_MODE_PATH.write_text(json.dumps({"123": "off"}))
+        from gateway.config import Platform
+        runner._VOICE_MODE_PATH.write_text(json.dumps({"telegram:123": "off"}))
 
         restored_runner = _make_runner(tmp_path)
         restored_runner._voice_mode = restored_runner._load_voice_modes()
-        adapter = SimpleNamespace(_auto_tts_disabled_chats=set())
+        adapter = SimpleNamespace(
+            _auto_tts_disabled_chats=set(),
+            platform=Platform.TELEGRAM,
+        )
 
         restored_runner._sync_voice_mode_state_to_adapter(adapter)
 
-        assert restored_runner._voice_mode["123"] == "off"
+        assert restored_runner._voice_mode["telegram:123"] == "off"
         assert adapter._auto_tts_disabled_chats == {"123"}
 
     @pytest.mark.asyncio
@@ -191,8 +199,21 @@ class TestHandleVoiceCommand:
         e2 = _make_event("/voice tts", chat_id="bbb")
         await runner._handle_voice_command(e1)
         await runner._handle_voice_command(e2)
-        assert runner._voice_mode["aaa"] == "voice_only"
-        assert runner._voice_mode["bbb"] == "all"
+        assert runner._voice_mode["telegram:aaa"] == "voice_only"
+        assert runner._voice_mode["telegram:bbb"] == "all"
+
+    @pytest.mark.asyncio
+    async def test_platform_isolation(self, runner):
+        """Same chat_id on different platforms must not collide (#12542)."""
+        telegram_event = _make_event("/voice on", chat_id="999")
+        slack_event = _make_event("/voice off", chat_id="999")
+        slack_event.source.platform.value = "slack"
+
+        await runner._handle_voice_command(telegram_event)
+        await runner._handle_voice_command(slack_event)
+
+        assert runner._voice_mode["telegram:999"] == "voice_only"
+        assert runner._voice_mode["slack:999"] == "off"
 
 
 # =====================================================================
@@ -223,9 +244,9 @@ class TestAutoVoiceReply:
         """Call real _should_send_voice_reply on a GatewayRunner instance."""
         chat_id = "123"
         if voice_mode != "off":
-            runner._voice_mode[chat_id] = voice_mode
+            runner._voice_mode["telegram:" + chat_id] = voice_mode
         else:
-            runner._voice_mode.pop(chat_id, None)
+            runner._voice_mode.pop("telegram:" + chat_id, None)
 
         event = _make_event(message_type=message_type)
 
@@ -416,6 +437,7 @@ class TestDiscordPlayTtsSkip:
         adapter.platform = Platform.DISCORD
         adapter.config = config
         adapter._voice_clients = {}
+        adapter._voice_locks = {}
         adapter._voice_text_channels = {}
         adapter._voice_sources = {}
         adapter._voice_timeout_tasks = {}
@@ -712,7 +734,7 @@ class TestVoiceChannelCommands:
         result = await runner._handle_voice_channel_join(event)
         assert "joined" in result.lower()
         assert "General" in result
-        assert runner._voice_mode["123"] == "all"
+        assert runner._voice_mode["discord:123"] == "all"
         assert mock_adapter._voice_sources[111]["chat_id"] == "123"
         assert mock_adapter._voice_sources[111]["chat_type"] == "group"
 
@@ -790,10 +812,10 @@ class TestVoiceChannelCommands:
         mock_adapter.leave_voice_channel = AsyncMock()
         event = self._make_discord_event("/voice leave")
         runner.adapters[event.source.platform] = mock_adapter
-        runner._voice_mode["123"] = "all"
+        runner._voice_mode["discord:123"] = "all"
         result = await runner._handle_voice_channel_leave(event)
         assert "left" in result.lower()
-        assert runner._voice_mode["123"] == "off"
+        assert runner._voice_mode["discord:123"] == "off"
         mock_adapter.leave_voice_channel.assert_called_once_with(111)
 
     # -- _handle_voice_channel_input --
@@ -931,6 +953,7 @@ class TestDiscordVoiceChannelMethods:
         adapter.config = config
         adapter._client = MagicMock()
         adapter._voice_clients = {}
+        adapter._voice_locks = {}
         adapter._voice_text_channels = {}
         adapter._voice_sources = {}
         adapter._voice_timeout_tasks = {}
@@ -1296,11 +1319,11 @@ class TestLeaveExceptionHandling:
         event = _make_event("/voice leave")
         event.raw_message = SimpleNamespace(guild_id=111, guild=None)
         runner.adapters[event.source.platform] = mock_adapter
-        runner._voice_mode["123"] = "all"
+        runner._voice_mode["telegram:123"] = "all"
 
         result = await runner._handle_voice_channel_leave(event)
         assert "left" in result.lower()
-        assert runner._voice_mode["123"] == "off"
+        assert runner._voice_mode["telegram:123"] == "off"
         assert mock_adapter._voice_input_callback is None
 
     @pytest.mark.asyncio
@@ -1314,7 +1337,7 @@ class TestLeaveExceptionHandling:
         event = _make_event("/voice leave")
         event.raw_message = SimpleNamespace(guild_id=111, guild=None)
         runner.adapters[event.source.platform] = mock_adapter
-        runner._voice_mode["123"] = "all"
+        runner._voice_mode["telegram:123"] = "all"
 
         await runner._handle_voice_channel_leave(event)
         assert mock_adapter._voice_input_callback is None
@@ -1712,6 +1735,7 @@ class TestVoiceTimeoutCleansRunnerState:
         adapter.platform = Platform.DISCORD
         adapter.config = config
         adapter._voice_clients = {}
+        adapter._voice_locks = {}
         adapter._voice_text_channels = {}
         adapter._voice_sources = {}
         adapter._voice_timeout_tasks = {}
@@ -1760,11 +1784,11 @@ class TestVoiceTimeoutCleansRunnerState:
     async def test_runner_cleanup_method_removes_voice_mode(self, tmp_path):
         """_handle_voice_timeout_cleanup removes voice_mode for chat."""
         runner = _make_runner(tmp_path)
-        runner._voice_mode["999"] = "all"
+        runner._voice_mode["discord:999"] = "all"
 
         runner._handle_voice_timeout_cleanup("999")
 
-        assert runner._voice_mode["999"] == "off", \
+        assert runner._voice_mode["discord:999"] == "off", \
             "voice_mode must persist explicit off state after timeout cleanup"
 
     @pytest.mark.asyncio
@@ -1802,6 +1826,7 @@ class TestPlaybackTimeout:
         adapter.platform = Platform.DISCORD
         adapter.config = config
         adapter._voice_clients = {}
+        adapter._voice_locks = {}
         adapter._voice_text_channels = {}
         adapter._voice_sources = {}
         adapter._voice_timeout_tasks = {}
@@ -1983,6 +2008,7 @@ class TestVoiceChannelAwareness:
         config.token = "fake-token"
         adapter = object.__new__(DiscordAdapter)
         adapter._voice_clients = {}
+        adapter._voice_locks = {}
         adapter._voice_text_channels = {}
         adapter._voice_sources = {}
         adapter._voice_receivers = {}
@@ -2453,6 +2479,7 @@ class TestVoiceTTSPlayback:
         adapter.platform = Platform.DISCORD
         adapter.config = config
         adapter._voice_clients = {}
+        adapter._voice_locks = {}
         adapter._voice_text_channels = {}
         adapter._voice_sources = {}
         adapter._voice_receivers = {}
@@ -2518,7 +2545,7 @@ class TestVoiceTTSPlayback:
                            agent_msgs=None, already_sent=False):
         from gateway.platforms.base import MessageType, MessageEvent, SessionSource
         from gateway.config import Platform
-        runner._voice_mode["ch1"] = voice_mode
+        runner._voice_mode["discord:ch1"] = voice_mode
         source = SessionSource(
             platform=Platform.DISCORD, chat_id="ch1",
             user_id="1", user_name="test", chat_type="channel",
@@ -2633,6 +2660,7 @@ class TestUDPKeepalive:
         adapter.platform = Platform.DISCORD
         adapter.config = config
         adapter._voice_clients = {}
+        adapter._voice_locks = {}
         adapter._voice_text_channels = {}
         adapter._voice_sources = {}
         adapter._voice_receivers = {}
diff --git a/tests/gateway/test_voice_mode_platform_isolation.py b/tests/gateway/test_voice_mode_platform_isolation.py
new file mode 100644
index 0000000000..444c2d5789
--- /dev/null
+++ b/tests/gateway/test_voice_mode_platform_isolation.py
@@ -0,0 +1,218 @@
+"""Tests for voice mode platform isolation (bug #12542).
+
+Voice mode state stored as {chat_id: mode} without a platform namespace
+caused collisions: Telegram chat '123' and Slack chat '123' shared the
+same key. The fix prefixes keys with platform value: 'telegram:123' vs
+'slack:123'.
+"""
+
+import json
+import tempfile
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from gateway.config import Platform
+from gateway.run import GatewayRunner
+
+
+class TestVoiceKeyHelper:
+    """Test the _voice_key helper method."""
+
+    def test_voice_key_format(self):
+        """_voice_key returns 'platform:chat_id' format."""
+        runner = _make_runner()
+        assert runner._voice_key(Platform.TELEGRAM, "123") == "telegram:123"
+        assert runner._voice_key(Platform.SLACK, "456") == "slack:456"
+        assert runner._voice_key(Platform.DISCORD, "789") == "discord:789"
+
+    def test_voice_key_different_platforms_same_chat_id(self):
+        """Same chat_id on different platforms yields different keys."""
+        runner = _make_runner()
+        key_telegram = runner._voice_key(Platform.TELEGRAM, "123")
+        key_slack = runner._voice_key(Platform.SLACK, "123")
+        key_discord = runner._voice_key(Platform.DISCORD, "123")
+        assert key_telegram != key_slack
+        assert key_slack != key_discord
+        assert key_telegram == "telegram:123"
+        assert key_slack == "slack:123"
+        assert key_discord == "discord:123"
+
+
+class TestVoiceModePlatformIsolation:
+    """Test that voice mode state is isolated by platform."""
+
+    def test_telegram_and_slack_voice_mode_independent(self):
+        """Setting voice mode for Telegram chat '123' does not affect Slack chat '123'."""
+        runner = _make_runner()
+
+        # Enable voice mode for Telegram chat '123'
+        runner._voice_mode[runner._voice_key(Platform.TELEGRAM, "123")] = "all"
+        # Enable voice mode for Slack chat '123' to a different mode
+        runner._voice_mode[runner._voice_key(Platform.SLACK, "123")] = "voice_only"
+
+        # Verify they are independent
+        assert runner._voice_mode.get(runner._voice_key(Platform.TELEGRAM, "123")) == "all"
+        assert runner._voice_mode.get(runner._voice_key(Platform.SLACK, "123")) == "voice_only"
+
+        # Disabling Telegram should not affect Slack
+        runner._voice_mode[runner._voice_key(Platform.TELEGRAM, "123")] = "off"
+        assert runner._voice_mode.get(runner._voice_key(Platform.TELEGRAM, "123")) == "off"
+        assert runner._voice_mode.get(runner._voice_key(Platform.SLACK, "123")) == "voice_only"
+
+
+class TestLegacyKeyMigration:
+    """Test migration of legacy unprefixed keys in _load_voice_modes."""
+
+    def test_load_voice_modes_skips_legacy_keys(self):
+        """_load_voice_modes skips keys without ':' prefix and logs a warning."""
+        runner = _make_runner()
+
+        # Simulate legacy persisted data with unprefixed keys
+        legacy_data = {
+            "123": "all",
+            "456": "voice_only",
+            # Also includes a properly prefixed key (from after the fix)
+            "telegram:789": "off",
+        }
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            voice_path = Path(tmpdir) / "gateway_voice_mode.json"
+            voice_path.write_text(json.dumps(legacy_data))
+
+            with patch.object(runner, "_VOICE_MODE_PATH", voice_path):
+                with patch("gateway.run.logger") as mock_logger:
+                    result = runner._load_voice_modes()
+
+            # Legacy keys without ':' should be skipped
+            assert "123" not in result
+            assert "456" not in result
+            # Prefixed key should be preserved
+            assert result.get("telegram:789") == "off"
+            # Warning should be logged for each legacy key
+            assert mock_logger.warning.called
+            warning_calls = [str(call) for call in mock_logger.warning.call_args_list]
+            assert any("Skipping legacy unprefixed voice mode key" in str(c) for c in warning_calls)
+
+    def test_load_voice_modes_preserves_prefixed_keys(self):
+        """_load_voice_modes correctly loads platform-prefixed keys."""
+        runner = _make_runner()
+
+        persisted_data = {
+            "telegram:123": "all",
+            "slack:456": "voice_only",
+            "discord:789": "off",
+        }
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            voice_path = Path(tmpdir) / "gateway_voice_mode.json"
+            voice_path.write_text(json.dumps(persisted_data))
+
+            with patch.object(runner, "_VOICE_MODE_PATH", voice_path):
+                result = runner._load_voice_modes()
+
+        assert result.get("telegram:123") == "all"
+        assert result.get("slack:456") == "voice_only"
+        assert result.get("discord:789") == "off"
+
+    def test_load_voice_modes_invalid_modes_filtered(self):
+        """_load_voice_modes filters out invalid mode values."""
+        runner = _make_runner()
+
+        data = {
+            "telegram:123": "all",
+            "telegram:456": "invalid_mode",
+            "telegram:789": "voice_only",
+        }
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            voice_path = Path(tmpdir) / "gateway_voice_mode.json"
+            voice_path.write_text(json.dumps(data))
+
+            with patch.object(runner, "_VOICE_MODE_PATH", voice_path):
+                result = runner._load_voice_modes()
+
+        assert result.get("telegram:123") == "all"
+        assert "telegram:456" not in result
+        assert result.get("telegram:789") == "voice_only"
+
+
+class TestSyncVoiceModeStateToAdapter:
+    """Test _sync_voice_mode_state_to_adapter filters by platform."""
+
+    def test_sync_only_includes_platform_chats(self):
+        """Only chats matching the adapter's platform are synced."""
+        runner = _make_runner()
+
+        # Set up voice mode state with multiple platforms
+        runner._voice_mode = {
+            "telegram:123": "off",      # Should sync
+            "telegram:456": "all",       # Should NOT sync (mode is not "off")
+            "slack:123": "off",          # Should NOT sync (different platform)
+            "discord:789": "off",        # Should NOT sync (different platform)
+        }
+
+        # Create a mock Telegram adapter
+        mock_adapter = MagicMock()
+        mock_adapter.platform = Platform.TELEGRAM
+        mock_adapter._auto_tts_disabled_chats = set()
+
+        runner._sync_voice_mode_state_to_adapter(mock_adapter)
+
+        # Only telegram:123 should be in disabled_chats (mode="off" for telegram)
+        assert mock_adapter._auto_tts_disabled_chats == {"123"}
+
+    def test_sync_clears_existing_state(self):
+        """_sync_voice_mode_state_to_adapter clears existing disabled_chats first."""
+        runner = _make_runner()
+
+        runner._voice_mode = {
+            "telegram:123": "off",
+        }
+
+        mock_adapter = MagicMock()
+        mock_adapter.platform = Platform.TELEGRAM
+        mock_adapter._auto_tts_disabled_chats = {"old_chat_id", "another_old"}
+
+        runner._sync_voice_mode_state_to_adapter(mock_adapter)
+
+        # Old entries should be cleared
+        assert mock_adapter._auto_tts_disabled_chats == {"123"}
+
+    def test_sync_returns_early_without_platform(self):
+        """_sync_voice_mode_state_to_adapter returns early if adapter has no platform."""
+        runner = _make_runner()
+        runner._voice_mode = {"telegram:123": "off"}
+
+        mock_adapter = MagicMock()
+        mock_adapter.platform = None
+        mock_adapter._auto_tts_disabled_chats = {"old"}
+
+        runner._sync_voice_mode_state_to_adapter(mock_adapter)
+
+        # disabled_chats should not be modified
+        assert mock_adapter._auto_tts_disabled_chats == {"old"}
+
+    def test_sync_returns_early_without_auto_tts_disabled_chats(self):
+        """_sync_voice_mode_state_to_adapter returns early if adapter lacks _auto_tts_disabled_chats."""
+        runner = _make_runner()
+        runner._voice_mode = {"telegram:123": "off"}
+
+        mock_adapter = MagicMock(spec=[])  # No _auto_tts_disabled_chats attribute
+
+        # Should not raise
+        runner._sync_voice_mode_state_to_adapter(mock_adapter)
+
+
+# ---------------------------------------------------------------------------
+# Helper
+# ---------------------------------------------------------------------------
+
+def _make_runner() -> GatewayRunner:
+    """Create a minimal GatewayRunner for testing."""
+    with patch("gateway.run.GatewayRunner._load_voice_modes", return_value={}):
+        runner = GatewayRunner.__new__(GatewayRunner)
+        runner._voice_mode = {}
+        runner.adapters = {}
+    return runner
diff --git a/tests/gateway/test_webhook_deliver_only.py b/tests/gateway/test_webhook_deliver_only.py
new file mode 100644
index 0000000000..d73a152015
--- /dev/null
+++ b/tests/gateway/test_webhook_deliver_only.py
@@ -0,0 +1,473 @@
+"""Tests for the webhook adapter's ``deliver_only`` route mode.
+
+``deliver_only`` lets external services (Supabase webhooks, monitoring
+alerts, background jobs, other agents) push plain-text notifications to
+a user's chat via the webhook adapter WITHOUT invoking the agent.  The
+rendered prompt template becomes the literal message body.
+
+Covers:
+- Agent is NOT invoked (``handle_message`` never called)
+- Rendered content is delivered to the target platform adapter
+- HTTP returns 200 OK on success, 502 on delivery failure
+- Startup validation rejects ``deliver_only`` without a real delivery target
+- HMAC auth, rate limiting, and idempotency still apply
+"""
+
+import asyncio
+import hashlib
+import hmac
+import json
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+from aiohttp import web
+from aiohttp.test_utils import TestClient, TestServer
+
+from gateway.config import Platform, PlatformConfig
+from gateway.platforms.base import MessageEvent, SendResult
+from gateway.platforms.webhook import WebhookAdapter, _INSECURE_NO_AUTH
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _make_adapter(routes, **extra_kw) -> WebhookAdapter:
+    extra = {"host": "0.0.0.0", "port": 0, "routes": routes}
+    extra.update(extra_kw)
+    config = PlatformConfig(enabled=True, extra=extra)
+    return WebhookAdapter(config)
+
+
+def _create_app(adapter: WebhookAdapter) -> web.Application:
+    app = web.Application()
+    app.router.add_get("/health", adapter._handle_health)
+    app.router.add_post("/webhooks/{route_name}", adapter._handle_webhook)
+    return app
+
+
+def _wire_mock_target(adapter: WebhookAdapter, platform_name: str = "telegram"):
+    """Attach a gateway_runner with a mocked target adapter."""
+    mock_target = AsyncMock()
+    mock_target.send = AsyncMock(return_value=SendResult(success=True))
+
+    mock_runner = MagicMock()
+    mock_runner.adapters = {Platform(platform_name): mock_target}
+    mock_runner.config.get_home_channel.return_value = None
+
+    adapter.gateway_runner = mock_runner
+    return mock_target
+
+
+# ===================================================================
+# Core behaviour: agent bypass
+# ===================================================================
+
+class TestDeliverOnlyBypassesAgent:
+    """The whole point of the feature — handle_message must not be called."""
+
+    @pytest.mark.asyncio
+    async def test_post_delivers_directly_without_agent(self):
+        routes = {
+            "match-alert": {
+                "secret": _INSECURE_NO_AUTH,
+                "deliver": "telegram",
+                "deliver_only": True,
+                "deliver_extra": {"chat_id": "12345"},
+                "prompt": "{payload.user} matched with {payload.other}!",
+            }
+        }
+        adapter = _make_adapter(routes)
+        mock_target = _wire_mock_target(adapter)
+
+        # Guard: handle_message must NOT be called in deliver_only mode
+        handle_message_calls: list[MessageEvent] = []
+
+        async def _capture(event):
+            handle_message_calls.append(event)
+
+        adapter.handle_message = _capture
+
+        app = _create_app(adapter)
+        body = json.dumps(
+            {"payload": {"user": "alice", "other": "bob"}}
+        ).encode()
+
+        async with TestClient(TestServer(app)) as cli:
+            resp = await cli.post(
+                "/webhooks/match-alert",
+                data=body,
+                headers={
+                    "Content-Type": "application/json",
+                    "X-GitHub-Delivery": "delivery-1",
+                },
+            )
+            assert resp.status == 200
+            data = await resp.json()
+            assert data["status"] == "delivered"
+            assert data["route"] == "match-alert"
+            assert data["target"] == "telegram"
+
+        # Let any background tasks settle before asserting no agent call
+        await asyncio.sleep(0.05)
+
+        # Agent was NOT invoked
+        assert handle_message_calls == []
+
+        # Target adapter.send() WAS called with the rendered template
+        mock_target.send.assert_awaited_once()
+        call_args = mock_target.send.await_args
+        chat_id_arg, content_arg = call_args.args[0], call_args.args[1]
+        assert chat_id_arg == "12345"
+        assert content_arg == "alice matched with bob!"
+
+    @pytest.mark.asyncio
+    async def test_template_rendering_works(self):
+        """Dot-notation template variables resolve in deliver_only mode."""
+        routes = {
+            "alert": {
+                "secret": _INSECURE_NO_AUTH,
+                "deliver": "telegram",
+                "deliver_only": True,
+                "deliver_extra": {"chat_id": "chat-1"},
+                "prompt": "Build {build.number} status: {build.status}",
+            }
+        }
+        adapter = _make_adapter(routes)
+        mock_target = _wire_mock_target(adapter)
+        app = _create_app(adapter)
+
+        async with TestClient(TestServer(app)) as cli:
+            resp = await cli.post(
+                "/webhooks/alert",
+                json={"build": {"number": 77, "status": "FAILED"}},
+                headers={"X-GitHub-Delivery": "d-render-1"},
+            )
+            assert resp.status == 200
+
+        mock_target.send.assert_awaited_once()
+        content_arg = mock_target.send.await_args.args[1]
+        assert content_arg == "Build 77 status: FAILED"
+
+    @pytest.mark.asyncio
+    async def test_thread_id_passed_through(self):
+        """deliver_extra.thread_id flows through to the target adapter."""
+        routes = {
+            "r": {
+                "secret": _INSECURE_NO_AUTH,
+                "deliver": "telegram",
+                "deliver_only": True,
+                "deliver_extra": {"chat_id": "c-1", "thread_id": "topic-42"},
+                "prompt": "hi",
+            }
+        }
+        adapter = _make_adapter(routes)
+        mock_target = _wire_mock_target(adapter)
+
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            resp = await cli.post(
+                "/webhooks/r",
+                json={},
+                headers={"X-GitHub-Delivery": "d-thread-1"},
+            )
+            assert resp.status == 200
+
+        assert mock_target.send.await_args.kwargs["metadata"] == {
+            "thread_id": "topic-42"
+        }
+
+
+# ===================================================================
+# HTTP status codes
+# ===================================================================
+
+class TestDeliverOnlyStatusCodes:
+
+    @pytest.mark.asyncio
+    async def test_delivery_failure_returns_502(self):
+        """If the target adapter returns SendResult(success=False), 502."""
+        routes = {
+            "r": {
+                "secret": _INSECURE_NO_AUTH,
+                "deliver": "telegram",
+                "deliver_only": True,
+                "deliver_extra": {"chat_id": "c-1"},
+                "prompt": "hi",
+            }
+        }
+        adapter = _make_adapter(routes)
+        mock_target = _wire_mock_target(adapter)
+        mock_target.send = AsyncMock(
+            return_value=SendResult(success=False, error="rate limited by tg")
+        )
+
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            resp = await cli.post(
+                "/webhooks/r",
+                json={},
+                headers={"X-GitHub-Delivery": "d-fail-1"},
+            )
+            assert resp.status == 502
+            data = await resp.json()
+            # Generic error — no adapter-level detail leaks
+            assert data["error"] == "Delivery failed"
+            assert "rate limited" not in json.dumps(data)
+
+    @pytest.mark.asyncio
+    async def test_delivery_exception_returns_502(self):
+        """If adapter.send() raises, we return 502 (not 500)."""
+        routes = {
+            "r": {
+                "secret": _INSECURE_NO_AUTH,
+                "deliver": "telegram",
+                "deliver_only": True,
+                "deliver_extra": {"chat_id": "c-1"},
+                "prompt": "hi",
+            }
+        }
+        adapter = _make_adapter(routes)
+        mock_target = _wire_mock_target(adapter)
+        mock_target.send = AsyncMock(side_effect=RuntimeError("tg exploded"))
+
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            resp = await cli.post(
+                "/webhooks/r",
+                json={},
+                headers={"X-GitHub-Delivery": "d-exc-1"},
+            )
+            assert resp.status == 502
+            data = await resp.json()
+            assert data["error"] == "Delivery failed"
+            # Exception message must not leak
+            assert "exploded" not in json.dumps(data)
+
+    @pytest.mark.asyncio
+    async def test_target_platform_not_connected_returns_502(self):
+        """deliver_only to a platform the gateway doesn't have → 502."""
+        routes = {
+            "r": {
+                "secret": _INSECURE_NO_AUTH,
+                "deliver": "discord",  # not configured in mock runner
+                "deliver_only": True,
+                "deliver_extra": {"chat_id": "c-1"},
+                "prompt": "hi",
+            }
+        }
+        adapter = _make_adapter(routes)
+        _wire_mock_target(adapter, platform_name="telegram")  # only TG wired
+
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            resp = await cli.post(
+                "/webhooks/r",
+                json={},
+                headers={"X-GitHub-Delivery": "d-no-platform-1"},
+            )
+            assert resp.status == 502
+
+
+# ===================================================================
+# Startup validation
+# ===================================================================
+
+class TestDeliverOnlyStartupValidation:
+
+    @pytest.mark.asyncio
+    async def test_deliver_only_with_log_deliver_rejected(self):
+        """deliver_only=true + deliver=log is nonsense — reject at connect()."""
+        routes = {
+            "bad": {
+                "secret": _INSECURE_NO_AUTH,
+                "deliver": "log",
+                "deliver_only": True,
+                "prompt": "hi",
+            }
+        }
+        adapter = _make_adapter(routes)
+        with pytest.raises(ValueError, match="deliver_only=true but deliver is 'log'"):
+            await adapter.connect()
+
+    @pytest.mark.asyncio
+    async def test_deliver_only_with_missing_deliver_rejected(self):
+        """deliver_only=true with no deliver field defaults to 'log' → reject."""
+        routes = {
+            "bad": {
+                "secret": _INSECURE_NO_AUTH,
+                # no deliver field
+                "deliver_only": True,
+                "prompt": "hi",
+            }
+        }
+        adapter = _make_adapter(routes)
+        with pytest.raises(ValueError, match="deliver_only=true"):
+            await adapter.connect()
+
+    @pytest.mark.asyncio
+    async def test_deliver_only_with_real_target_accepted(self):
+        """Sanity check — a valid deliver_only config passes validation."""
+        routes = {
+            "good": {
+                "secret": _INSECURE_NO_AUTH,
+                "deliver": "telegram",
+                "deliver_only": True,
+                "deliver_extra": {"chat_id": "c-1"},
+                "prompt": "hi",
+            }
+        }
+        adapter = _make_adapter(routes)
+        # connect() does more than validation (binds a socket) — we just
+        # want to verify the validation doesn't raise.  Call it and tear
+        # down immediately.
+        try:
+            started = await adapter.connect()
+            if started:
+                await adapter.disconnect()
+        except ValueError:
+            pytest.fail("valid deliver_only config should not raise ValueError")
+
+
+# ===================================================================
+# Security + reliability invariants still hold
+# ===================================================================
+
+class TestDeliverOnlySecurityInvariants:
+
+    @pytest.mark.asyncio
+    async def test_hmac_still_enforced(self):
+        """deliver_only does NOT bypass HMAC validation."""
+        secret = "real-secret-123"
+        routes = {
+            "r": {
+                "secret": secret,
+                "deliver": "telegram",
+                "deliver_only": True,
+                "deliver_extra": {"chat_id": "c-1"},
+                "prompt": "hi",
+            }
+        }
+        adapter = _make_adapter(routes)
+        mock_target = _wire_mock_target(adapter)
+
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            # No signature header → reject
+            resp = await cli.post(
+                "/webhooks/r",
+                json={},
+                headers={"X-GitHub-Delivery": "d-noauth-1"},
+            )
+            assert resp.status == 401
+
+        # Target never called
+        mock_target.send.assert_not_awaited()
+
+    @pytest.mark.asyncio
+    async def test_idempotency_still_applies(self):
+        """Same delivery_id posted twice → second is suppressed."""
+        routes = {
+            "r": {
+                "secret": _INSECURE_NO_AUTH,
+                "deliver": "telegram",
+                "deliver_only": True,
+                "deliver_extra": {"chat_id": "c-1"},
+                "prompt": "hi",
+            }
+        }
+        adapter = _make_adapter(routes)
+        mock_target = _wire_mock_target(adapter)
+
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            r1 = await cli.post(
+                "/webhooks/r",
+                json={},
+                headers={"X-GitHub-Delivery": "dup-1"},
+            )
+            assert r1.status == 200
+
+            r2 = await cli.post(
+                "/webhooks/r",
+                json={},
+                headers={"X-GitHub-Delivery": "dup-1"},
+            )
+            # Existing webhook adapter treats duplicates as 200 + status=duplicate
+            assert r2.status == 200
+            data = await r2.json()
+            assert data["status"] == "duplicate"
+
+        # Target was called exactly once
+        assert mock_target.send.await_count == 1
+
+    @pytest.mark.asyncio
+    async def test_rate_limit_still_applies(self):
+        """Route-level rate limit caps deliver_only POSTs too."""
+        routes = {
+            "r": {
+                "secret": _INSECURE_NO_AUTH,
+                "deliver": "telegram",
+                "deliver_only": True,
+                "deliver_extra": {"chat_id": "c-1"},
+                "prompt": "hi",
+            }
+        }
+        adapter = _make_adapter(routes, rate_limit=2)
+        _wire_mock_target(adapter)
+
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            for i in range(2):
+                r = await cli.post(
+                    "/webhooks/r",
+                    json={},
+                    headers={"X-GitHub-Delivery": f"rl-{i}"},
+                )
+                assert r.status == 200
+
+            # Third within the window → 429
+            r3 = await cli.post(
+                "/webhooks/r",
+                json={},
+                headers={"X-GitHub-Delivery": "rl-3"},
+            )
+            assert r3.status == 429
+
+
+# ===================================================================
+# Unit: _direct_deliver dispatch
+# ===================================================================
+
+class TestDirectDeliverUnit:
+
+    @pytest.mark.asyncio
+    async def test_dispatches_to_cross_platform_for_messaging_targets(self):
+        adapter = _make_adapter({})
+        mock_target = _wire_mock_target(adapter, "telegram")
+
+        result = await adapter._direct_deliver(
+            "hello",
+            {"deliver": "telegram", "deliver_extra": {"chat_id": "c-1"}},
+        )
+        assert result.success is True
+        mock_target.send.assert_awaited_once_with(
+            "c-1", "hello", metadata=None
+        )
+
+    @pytest.mark.asyncio
+    async def test_dispatches_to_github_comment(self):
+        adapter = _make_adapter({})
+        with patch.object(
+            adapter, "_deliver_github_comment",
+            new=AsyncMock(return_value=SendResult(success=True)),
+        ) as mock_gh:
+            result = await adapter._direct_deliver(
+                "review body",
+                {
+                    "deliver": "github_comment",
+                    "deliver_extra": {"repo": "org/r", "pr_number": "1"},
+                },
+            )
+            assert result.success is True
+            mock_gh.assert_awaited_once()
diff --git a/tests/gateway/test_webhook_signature_rate_limit.py b/tests/gateway/test_webhook_signature_rate_limit.py
new file mode 100644
index 0000000000..54d733f01b
--- /dev/null
+++ b/tests/gateway/test_webhook_signature_rate_limit.py
@@ -0,0 +1,289 @@
+"""Test that HMAC signature validation happens BEFORE rate limiting.
+
+This verifies the fix for bug #12544: invalid signature requests must NOT
+consume rate-limit quota. Before the fix, rate limiting was applied before
+signature validation, so an attacker could exhaust a victim's rate limit
+with invalidly-signed requests and then make valid requests that get rejected
+with 429.
+
+The correct order is:
+1. Read body
+2. Validate HMAC signature (reject 401 if invalid)
+3. Rate limit check (reject 429 if over limit)
+4. Process the webhook
+"""
+
+import hashlib
+import hmac
+import json
+
+import pytest
+from aiohttp import web
+from aiohttp.test_utils import TestClient, TestServer
+
+from gateway.platforms.webhook import WebhookAdapter
+from gateway.config import PlatformConfig
+
+
+def _make_adapter(routes, rate_limit=5, **extra_kw) -> WebhookAdapter:
+    """Create a WebhookAdapter with the given routes."""
+    extra = {
+        "host": "0.0.0.0",
+        "port": 0,
+        "routes": routes,
+        "rate_limit": rate_limit,
+    }
+    extra.update(extra_kw)
+    config = PlatformConfig(enabled=True, extra=extra)
+    return WebhookAdapter(config)
+
+
+def _create_app(adapter: WebhookAdapter) -> web.Application:
+    """Build the aiohttp Application from the adapter."""
+    app = web.Application()
+    app.router.add_get("/health", adapter._handle_health)
+    app.router.add_post("/webhooks/{route_name}", adapter._handle_webhook)
+    return app
+
+
+def _github_signature(body: bytes, secret: str) -> str:
+    """Compute X-Hub-Signature-256 for *body* using *secret*."""
+    return "sha256=" + hmac.new(
+        secret.encode(), body, hashlib.sha256
+    ).hexdigest()
+
+
+SIMPLE_PAYLOAD = {"event": "test", "data": "hello"}
+
+
+class TestSignatureBeforeRateLimit:
+    """Verify that invalid signatures do NOT consume rate limit quota."""
+
+    @pytest.mark.asyncio
+    async def test_invalid_signature_does_not_consume_rate_limit(self):
+        """Send requests with invalid signatures up to the rate limit, then
+        send a valid-signed request and verify it succeeds.
+
+        BEFORE FIX: Invalid signatures consume the rate limit bucket, so
+        after 'rate_limit' bad requests the valid one would get 429.
+        AFTER FIX: Invalid signatures are rejected with 401 first (before
+        rate limiting), so the rate limit bucket is untouched. The valid
+        request after many bad ones still succeeds.
+        """
+        secret = "test-secret-key"
+        route_name = "test-route"
+        routes = {
+            route_name: {
+                "secret": secret,
+                "events": ["push"],
+                "prompt": "Event: {event}",
+                "deliver": "log",
+            }
+        }
+        rate_limit = 5
+        adapter = _make_adapter(routes, rate_limit=rate_limit)
+
+        captured_events = []
+
+        async def _capture(event):
+            captured_events.append(event)
+
+        adapter.handle_message = _capture
+        app = _create_app(adapter)
+
+        body = json.dumps(SIMPLE_PAYLOAD).encode()
+
+        async with TestClient(TestServer(app)) as cli:
+            # First exhaust the rate limit with invalid signatures
+            for i in range(rate_limit):
+                resp = await cli.post(
+                    f"/webhooks/{route_name}",
+                    data=body,
+                    headers={
+                        "Content-Type": "application/json",
+                        "X-GitHub-Event": "push",
+                        "X-Hub-Signature-256": "sha256=invalid",  # bad sig
+                        "X-GitHub-Delivery": f"bad-{i}",
+                    },
+                )
+                # Each invalid signature should be rejected with 401
+                assert resp.status == 401, (
+                    f"Expected 401 for invalid signature, got {resp.status}"
+                )
+
+            # Now send a valid-signed request — it MUST succeed (202)
+            # BEFORE FIX: This would return 429 because the 5 bad requests
+            # consumed the rate limit bucket.
+            # AFTER FIX: Bad requests don't touch rate limiting, so valid
+            # request succeeds.
+            valid_sig = _github_signature(body, secret)
+            resp = await cli.post(
+                f"/webhooks/{route_name}",
+                data=body,
+                headers={
+                    "Content-Type": "application/json",
+                    "X-GitHub-Event": "push",
+                    "X-Hub-Signature-256": valid_sig,
+                    "X-GitHub-Delivery": "good-001",
+                },
+            )
+            assert resp.status == 202, (
+                f"Expected 202 for valid request after invalid signatures, "
+                f"got {resp.status}. Rate limit may have been consumed by "
+                f"invalid requests (bug #12544 not fixed)."
+            )
+
+            data = await resp.json()
+            assert data["status"] == "accepted"
+
+        # The valid event should have been captured
+        assert len(captured_events) == 1
+
+    @pytest.mark.asyncio
+    async def test_valid_signature_still_rate_limited(self):
+        """Verify that VALID requests still respect rate limiting normally."""
+        secret = "test-secret-key"
+        route_name = "test-route"
+        routes = {
+            route_name: {
+                "secret": secret,
+                "events": ["push"],
+                "prompt": "Event: {event}",
+                "deliver": "log",
+            }
+        }
+        rate_limit = 3
+        adapter = _make_adapter(routes, rate_limit=rate_limit)
+
+        captured_events = []
+
+        async def _capture(event):
+            captured_events.append(event)
+
+        adapter.handle_message = _capture
+        app = _create_app(adapter)
+
+        body = json.dumps(SIMPLE_PAYLOAD).encode()
+
+        async with TestClient(TestServer(app)) as cli:
+            # Send 'rate_limit' valid requests — all should succeed
+            for i in range(rate_limit):
+                valid_sig = _github_signature(body, secret)
+                resp = await cli.post(
+                    f"/webhooks/{route_name}",
+                    data=body,
+                    headers={
+                        "Content-Type": "application/json",
+                        "X-GitHub-Event": "push",
+                        "X-Hub-Signature-256": valid_sig,
+                        "X-GitHub-Delivery": f"good-{i}",
+                    },
+                )
+                assert resp.status == 202
+
+            # The next valid request SHOULD be rate-limited
+            valid_sig = _github_signature(body, secret)
+            resp = await cli.post(
+                f"/webhooks/{route_name}",
+                data=body,
+                headers={
+                    "Content-Type": "application/json",
+                    "X-GitHub-Event": "push",
+                    "X-Hub-Signature-256": valid_sig,
+                    "X-GitHub-Delivery": "good-over-limit",
+                },
+            )
+            assert resp.status == 429, (
+                f"Expected 429 when exceeding rate limit with valid requests, "
+                f"got {resp.status}"
+            )
+
+    @pytest.mark.asyncio
+    async def test_mixed_valid_and_invalid_signatures(self):
+        """Interleave invalid and valid requests. Only valid ones count
+        against the rate limit."""
+        secret = "test-secret-key"
+        route_name = "test-route"
+        routes = {
+            route_name: {
+                "secret": secret,
+                "events": ["push"],
+                "prompt": "Event: {event}",
+                "deliver": "log",
+            }
+        }
+        rate_limit = 3
+        adapter = _make_adapter(routes, rate_limit=rate_limit)
+
+        captured_events = []
+
+        async def _capture(event):
+            captured_events.append(event)
+
+        adapter.handle_message = _capture
+        app = _create_app(adapter)
+
+        body = json.dumps(SIMPLE_PAYLOAD).encode()
+
+        async with TestClient(TestServer(app)) as cli:
+            # Send 2 valid requests (should succeed)
+            for i in range(2):
+                valid_sig = _github_signature(body, secret)
+                resp = await cli.post(
+                    f"/webhooks/{route_name}",
+                    data=body,
+                    headers={
+                        "Content-Type": "application/json",
+                        "X-GitHub-Event": "push",
+                        "X-Hub-Signature-256": valid_sig,
+                        "X-GitHub-Delivery": f"good-{i}",
+                    },
+                )
+                assert resp.status == 202
+
+            # Send 10 invalid requests (should all get 401, not consume quota)
+            for i in range(10):
+                resp = await cli.post(
+                    f"/webhooks/{route_name}",
+                    data=body,
+                    headers={
+                        "Content-Type": "application/json",
+                        "X-GitHub-Event": "push",
+                        "X-Hub-Signature-256": "sha256=invalid",
+                        "X-GitHub-Delivery": f"bad-{i}",
+                    },
+                )
+                assert resp.status == 401
+
+            # One more valid request should STILL succeed (only 2 consumed)
+            valid_sig = _github_signature(body, secret)
+            resp = await cli.post(
+                f"/webhooks/{route_name}",
+                data=body,
+                headers={
+                    "Content-Type": "application/json",
+                    "X-GitHub-Event": "push",
+                    "X-Hub-Signature-256": valid_sig,
+                    "X-GitHub-Delivery": "good-3",
+                },
+            )
+            assert resp.status == 202, (
+                f"Expected 202 for 3rd valid request after many invalid ones, "
+                f"got {resp.status}"
+            )
+
+            # The 4th valid request should be rate-limited (2 + 2 = 4 = limit)
+            valid_sig = _github_signature(body, secret)
+            resp = await cli.post(
+                f"/webhooks/{route_name}",
+                data=body,
+                headers={
+                    "Content-Type": "application/json",
+                    "X-GitHub-Event": "push",
+                    "X-Hub-Signature-256": valid_sig,
+                    "X-GitHub-Delivery": "good-4",
+                },
+            )
+            assert resp.status == 429
+
+        assert len(captured_events) == 3
diff --git a/tests/gateway/test_whatsapp_connect.py b/tests/gateway/test_whatsapp_connect.py
index 61ff8f361a..29f7eee3af 100644
--- a/tests/gateway/test_whatsapp_connect.py
+++ b/tests/gateway/test_whatsapp_connect.py
@@ -211,6 +211,30 @@ class TestFileHandleClosedOnError:
         assert adapter._bridge_log_fh is None
 
 
+class TestConnectCleanup:
+    """Verify failure paths release the scoped session lock."""
+
+    @pytest.mark.asyncio
+    async def test_releases_lock_when_npm_install_fails(self):
+        adapter = _make_adapter()
+
+        def _path_exists(path_obj):
+            return not str(path_obj).endswith("node_modules")
+
+        install_result = MagicMock(returncode=1, stderr="install failed")
+
+        with patch("gateway.platforms.whatsapp.check_whatsapp_requirements", return_value=True), \
+             patch.object(Path, "exists", autospec=True, side_effect=_path_exists), \
+             patch("subprocess.run", return_value=install_result), \
+             patch("gateway.status.acquire_scoped_lock", return_value=(True, None)), \
+             patch("gateway.status.release_scoped_lock") as mock_release:
+            result = await adapter.connect()
+
+        assert result is False
+        mock_release.assert_called_once_with("whatsapp-session", str(adapter._session_path))
+        assert adapter._platform_lock_identity is None
+
+
 class TestBridgeRuntimeFailure:
     """Verify runtime bridge death is surfaced as a fatal adapter error."""
 
@@ -429,6 +453,33 @@ class TestKillPortProcess:
 class TestHttpSessionLifecycle:
     """Verify persistent aiohttp.ClientSession is created and cleaned up."""
 
+    @pytest.mark.asyncio
+    async def test_disconnect_uses_taskkill_tree_on_windows(self):
+        """Windows disconnect should target the bridge process tree, not just the parent PID."""
+        adapter = _make_adapter()
+        mock_proc = MagicMock()
+        mock_proc.pid = 12345
+        mock_proc.poll.side_effect = [0]
+        adapter._bridge_process = mock_proc
+        adapter._poll_task = None
+        adapter._http_session = None
+        adapter._running = True
+        adapter._session_lock_identity = None
+
+        with patch("gateway.platforms.whatsapp._IS_WINDOWS", True), \
+             patch("gateway.platforms.whatsapp.subprocess.run", return_value=MagicMock(returncode=0)) as mock_run, \
+             patch("gateway.platforms.whatsapp.asyncio.sleep", new_callable=AsyncMock):
+            await adapter.disconnect()
+
+        mock_run.assert_called_once_with(
+            ["taskkill", "/PID", "12345", "/T"],
+            capture_output=True,
+            text=True,
+            timeout=10,
+        )
+        mock_proc.terminate.assert_not_called()
+        mock_proc.kill.assert_not_called()
+
     @pytest.mark.asyncio
     async def test_session_closed_on_disconnect(self):
         """disconnect() should close self._http_session."""
diff --git a/tests/gateway/test_whatsapp_group_gating.py b/tests/gateway/test_whatsapp_group_gating.py
index 87caa46bab..afe974320c 100644
--- a/tests/gateway/test_whatsapp_group_gating.py
+++ b/tests/gateway/test_whatsapp_group_gating.py
@@ -4,7 +4,8 @@ from unittest.mock import AsyncMock
 from gateway.config import Platform, PlatformConfig, load_gateway_config
 
 
-def _make_adapter(require_mention=None, mention_patterns=None, free_response_chats=None):
+def _make_adapter(require_mention=None, mention_patterns=None, free_response_chats=None,
+                  dm_policy=None, allow_from=None, group_policy=None, group_allow_from=None):
     from gateway.platforms.whatsapp import WhatsAppAdapter
 
     extra = {}
@@ -14,12 +15,25 @@ def _make_adapter(require_mention=None, mention_patterns=None, free_response_cha
         extra["mention_patterns"] = mention_patterns
     if free_response_chats is not None:
         extra["free_response_chats"] = free_response_chats
+    if dm_policy is not None:
+        extra["dm_policy"] = dm_policy
+    if allow_from is not None:
+        extra["allow_from"] = allow_from
+    if group_policy is not None:
+        extra["group_policy"] = group_policy
+    if group_allow_from is not None:
+        extra["group_allow_from"] = group_allow_from
 
     adapter = object.__new__(WhatsAppAdapter)
     adapter.platform = Platform.WHATSAPP
     adapter.config = PlatformConfig(enabled=True, extra=extra)
     adapter._message_handler = AsyncMock()
+    adapter._dm_policy = str(extra.get("dm_policy", "open")).strip().lower()
+    adapter._allow_from = WhatsAppAdapter._coerce_allow_list(extra.get("allow_from"))
+    adapter._group_policy = str(extra.get("group_policy", "open")).strip().lower()
+    adapter._group_allow_from = WhatsAppAdapter._coerce_allow_list(extra.get("group_allow_from"))
     adapter._mention_patterns = adapter._compile_mention_patterns()
+    adapter._free_response_chats = adapter._whatsapp_free_response_chats()
     return adapter
 
 
@@ -36,6 +50,21 @@ def _group_message(body="hello", **overrides):
     return data
 
 
+def _dm_message(body="hello", **overrides):
+    data = {
+        "isGroup": False,
+        "body": body,
+        "senderId": "6281234567890@s.whatsapp.net",
+        "from": "6281234567890@s.whatsapp.net",
+        "botIds": [],
+        "mentionedIds": [],
+    }
+    data.update(overrides)
+    return data
+
+
+# --- Existing tests (unchanged logic, updated helper) ---
+
 def test_group_messages_can_be_opened_via_config():
     adapter = _make_adapter(require_mention=False)
 
@@ -118,10 +147,10 @@ def test_free_response_chats_does_not_bypass_other_groups():
     assert adapter._should_process_message(_group_message("hello everyone")) is False
 
 
-def test_dm_always_passes_even_with_require_mention():
+def test_dm_passes_with_default_open_policy():
     adapter = _make_adapter(require_mention=True)
 
-    dm = {"isGroup": False, "body": "hello", "botIds": [], "mentionedIds": []}
+    dm = _dm_message("hello")
     assert adapter._should_process_message(dm) is True
 
 
@@ -140,3 +169,130 @@ def test_mention_stripping_preserves_body_when_no_mention():
     data = _group_message("just a normal message")
     cleaned = adapter._clean_bot_mention_text(data["body"], data)
     assert cleaned == "just a normal message"
+
+
+# --- New dm_policy tests ---
+
+def test_dm_policy_disabled_blocks_all_dms():
+    adapter = _make_adapter(dm_policy="disabled")
+
+    assert adapter._should_process_message(_dm_message("hello")) is False
+
+
+def test_dm_policy_disabled_still_allows_groups():
+    adapter = _make_adapter(dm_policy="disabled", require_mention=False)
+
+    assert adapter._should_process_message(_group_message("hello")) is True
+
+
+def test_dm_policy_allowlist_blocks_unlisted_sender():
+    adapter = _make_adapter(dm_policy="allowlist", allow_from=["6289999999999@s.whatsapp.net"])
+
+    assert adapter._should_process_message(_dm_message("hello")) is False
+
+
+def test_dm_policy_allowlist_allows_listed_sender():
+    adapter = _make_adapter(dm_policy="allowlist", allow_from=["6281234567890@s.whatsapp.net"])
+
+    assert adapter._should_process_message(_dm_message("hello")) is True
+
+
+def test_dm_policy_open_allows_all_dms():
+    adapter = _make_adapter(dm_policy="open")
+
+    assert adapter._should_process_message(_dm_message("hello")) is True
+
+
+# --- New group_policy tests ---
+
+def test_group_policy_disabled_blocks_all_groups():
+    adapter = _make_adapter(group_policy="disabled", require_mention=False)
+
+    assert adapter._should_process_message(_group_message("hello")) is False
+
+
+def test_group_policy_disabled_still_allows_dms():
+    adapter = _make_adapter(group_policy="disabled")
+
+    assert adapter._should_process_message(_dm_message("hello")) is True
+
+
+def test_group_policy_allowlist_blocks_unlisted_group():
+    adapter = _make_adapter(group_policy="allowlist", group_allow_from=["999999999999@g.us"])
+
+    assert adapter._should_process_message(_group_message("agus test")) is False
+
+
+def test_group_policy_allowlist_allows_listed_group():
+    adapter = _make_adapter(
+        group_policy="allowlist",
+        group_allow_from=["120363001234567890@g.us"],
+        require_mention=True,
+        mention_patterns=[r"^\s*(?:(?:@)?(?:agus|Augustus))\b"],
+    )
+
+    # Listed group — passes the allowlist gate, mention still required
+    assert adapter._should_process_message(_group_message("hello")) is False
+    assert adapter._should_process_message(_group_message("agus test")) is True
+
+
+def test_group_policy_open_allows_all_groups():
+    adapter = _make_adapter(group_policy="open", require_mention=True)
+
+    # Open policy — all groups pass the gate (mention still needed)
+    assert adapter._should_process_message(_group_message("hello")) is False
+    assert adapter._should_process_message(_group_message("/status")) is True
+
+
+# --- Config bridging tests ---
+
+def test_config_bridges_whatsapp_dm_and_group_policy(monkeypatch, tmp_path):
+    hermes_home = tmp_path / ".hermes"
+    hermes_home.mkdir()
+    (hermes_home / "config.yaml").write_text(
+        "whatsapp:\n"
+        "  dm_policy: disabled\n"
+        "  group_policy: allowlist\n"
+        "  group_allow_from:\n"
+        "    - \"120363001234567890@g.us\"\n",
+        encoding="utf-8",
+    )
+
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+    monkeypatch.delenv("WHATSAPP_DM_POLICY", raising=False)
+    monkeypatch.delenv("WHATSAPP_GROUP_POLICY", raising=False)
+    monkeypatch.delenv("WHATSAPP_GROUP_ALLOWED_USERS", raising=False)
+
+    config = load_gateway_config()
+
+    assert config is not None
+    assert config.platforms[Platform.WHATSAPP].extra["dm_policy"] == "disabled"
+    assert config.platforms[Platform.WHATSAPP].extra["group_policy"] == "allowlist"
+    assert config.platforms[Platform.WHATSAPP].extra["group_allow_from"] == ["120363001234567890@g.us"]
+    assert __import__("os").environ["WHATSAPP_DM_POLICY"] == "disabled"
+    assert __import__("os").environ["WHATSAPP_GROUP_POLICY"] == "allowlist"
+    assert __import__("os").environ["WHATSAPP_GROUP_ALLOWED_USERS"] == "120363001234567890@g.us"
+
+
+def test_config_bridges_whatsapp_allow_from(monkeypatch, tmp_path):
+    hermes_home = tmp_path / ".hermes"
+    hermes_home.mkdir()
+    (hermes_home / "config.yaml").write_text(
+        "whatsapp:\n"
+        "  dm_policy: allowlist\n"
+        "  allow_from:\n"
+        "    - \"6281234567890@s.whatsapp.net\"\n",
+        encoding="utf-8",
+    )
+
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+    monkeypatch.delenv("WHATSAPP_DM_POLICY", raising=False)
+    monkeypatch.delenv("WHATSAPP_ALLOWED_USERS", raising=False)
+
+    config = load_gateway_config()
+
+    assert config is not None
+    assert config.platforms[Platform.WHATSAPP].extra["dm_policy"] == "allowlist"
+    assert config.platforms[Platform.WHATSAPP].extra["allow_from"] == ["6281234567890@s.whatsapp.net"]
+    assert __import__("os").environ["WHATSAPP_DM_POLICY"] == "allowlist"
+    assert __import__("os").environ["WHATSAPP_ALLOWED_USERS"] == "6281234567890@s.whatsapp.net"
diff --git a/tests/hermes_cli/test_ai_gateway_models.py b/tests/hermes_cli/test_ai_gateway_models.py
new file mode 100644
index 0000000000..ba608fd08e
--- /dev/null
+++ b/tests/hermes_cli/test_ai_gateway_models.py
@@ -0,0 +1,161 @@
+"""AI Gateway model list and pricing translation.
+
+Vercel AI Gateway exposes ``/v1/models`` with a richer shape than OpenAI's
+spec (type, tags, pricing). The pricing object uses ``input`` / ``output``
+where hermes's shared picker expects ``prompt`` / ``completion``; these tests
+pin the translation and the curated-list filtering.
+"""
+import json
+from unittest.mock import patch, MagicMock
+
+from hermes_cli import models as models_module
+from hermes_cli.models import (
+    VERCEL_AI_GATEWAY_MODELS,
+    _ai_gateway_model_is_free,
+    fetch_ai_gateway_models,
+    fetch_ai_gateway_pricing,
+)
+
+
+def _mock_urlopen(payload):
+    """Build a urlopen() context manager mock returning the given payload."""
+    resp = MagicMock()
+    resp.read.return_value = json.dumps(payload).encode()
+    ctx = MagicMock()
+    ctx.__enter__.return_value = resp
+    ctx.__exit__.return_value = False
+    return ctx
+
+
+def _reset_caches():
+    models_module._ai_gateway_catalog_cache = None
+    models_module._pricing_cache.clear()
+
+
+def test_ai_gateway_pricing_translates_input_output_to_prompt_completion():
+    _reset_caches()
+    payload = {
+        "data": [
+            {
+                "id": "moonshotai/kimi-k2.5",
+                "type": "language",
+                "pricing": {
+                    "input": "0.0000006",
+                    "output": "0.0000025",
+                    "input_cache_read": "0.00000015",
+                    "input_cache_write": "0.0000006",
+                },
+            }
+        ]
+    }
+    with patch("urllib.request.urlopen", return_value=_mock_urlopen(payload)):
+        result = fetch_ai_gateway_pricing(force_refresh=True)
+
+    entry = result["moonshotai/kimi-k2.5"]
+    assert entry["prompt"] == "0.0000006"
+    assert entry["completion"] == "0.0000025"
+    assert entry["input_cache_read"] == "0.00000015"
+    assert entry["input_cache_write"] == "0.0000006"
+
+
+def test_ai_gateway_pricing_returns_empty_on_fetch_failure():
+    _reset_caches()
+    with patch("urllib.request.urlopen", side_effect=OSError("network down")):
+        result = fetch_ai_gateway_pricing(force_refresh=True)
+    assert result == {}
+
+
+def test_ai_gateway_pricing_skips_entries_without_pricing_dict():
+    _reset_caches()
+    payload = {
+        "data": [
+            {"id": "x/y", "pricing": None},
+            {"id": "a/b", "pricing": {"input": "0", "output": "0"}},
+        ]
+    }
+    with patch("urllib.request.urlopen", return_value=_mock_urlopen(payload)):
+        result = fetch_ai_gateway_pricing(force_refresh=True)
+    assert "x/y" not in result
+    assert result["a/b"] == {"prompt": "0", "completion": "0"}
+
+
+def test_ai_gateway_free_detector():
+    assert _ai_gateway_model_is_free({"input": "0", "output": "0"}) is True
+    assert _ai_gateway_model_is_free({"input": "0", "output": "0.01"}) is False
+    assert _ai_gateway_model_is_free({"input": "0.01", "output": "0"}) is False
+    assert _ai_gateway_model_is_free(None) is False
+    assert _ai_gateway_model_is_free({"input": "not a number"}) is False
+
+
+def test_fetch_ai_gateway_models_filters_against_live_catalog():
+    _reset_caches()
+    preferred = [mid for mid, _ in VERCEL_AI_GATEWAY_MODELS]
+    live_ids = preferred[:3]  # only first three exist live
+    payload = {
+        "data": [
+            {"id": mid, "pricing": {"input": "0.001", "output": "0.002"}}
+            for mid in live_ids
+        ]
+    }
+    with patch("urllib.request.urlopen", return_value=_mock_urlopen(payload)):
+        result = fetch_ai_gateway_models(force_refresh=True)
+
+    assert [mid for mid, _ in result] == live_ids
+    assert result[0][1] == "recommended"
+
+
+def test_fetch_ai_gateway_models_tags_free_models():
+    _reset_caches()
+    first_id = VERCEL_AI_GATEWAY_MODELS[0][0]
+    second_id = VERCEL_AI_GATEWAY_MODELS[1][0]
+    payload = {
+        "data": [
+            {"id": first_id, "pricing": {"input": "0.001", "output": "0.002"}},
+            {"id": second_id, "pricing": {"input": "0", "output": "0"}},
+        ]
+    }
+    with patch("urllib.request.urlopen", return_value=_mock_urlopen(payload)):
+        result = fetch_ai_gateway_models(force_refresh=True)
+
+    by_id = dict(result)
+    assert by_id[first_id] == "recommended"
+    assert by_id[second_id] == "free"
+
+
+def test_free_moonshot_model_auto_promoted_to_top_even_if_not_curated():
+    _reset_caches()
+    first_curated = VERCEL_AI_GATEWAY_MODELS[0][0]
+    unlisted_free_moonshot = "moonshotai/kimi-coder-free-preview"
+    payload = {
+        "data": [
+            {"id": first_curated, "pricing": {"input": "0.001", "output": "0.002"}},
+            {"id": unlisted_free_moonshot, "pricing": {"input": "0", "output": "0"}},
+        ]
+    }
+    with patch("urllib.request.urlopen", return_value=_mock_urlopen(payload)):
+        result = fetch_ai_gateway_models(force_refresh=True)
+
+    assert result[0] == (unlisted_free_moonshot, "recommended")
+    assert any(mid == first_curated for mid, _ in result)
+
+
+def test_paid_moonshot_does_not_get_auto_promoted():
+    _reset_caches()
+    first_curated = VERCEL_AI_GATEWAY_MODELS[0][0]
+    payload = {
+        "data": [
+            {"id": first_curated, "pricing": {"input": "0.001", "output": "0.002"}},
+            {"id": "moonshotai/some-paid-variant", "pricing": {"input": "0.001", "output": "0.002"}},
+        ]
+    }
+    with patch("urllib.request.urlopen", return_value=_mock_urlopen(payload)):
+        result = fetch_ai_gateway_models(force_refresh=True)
+
+    assert result[0][0] == first_curated
+
+
+def test_fetch_ai_gateway_models_falls_back_on_error():
+    _reset_caches()
+    with patch("urllib.request.urlopen", side_effect=OSError("network")):
+        result = fetch_ai_gateway_models(force_refresh=True)
+    assert result == list(VERCEL_AI_GATEWAY_MODELS)
diff --git a/tests/hermes_cli/test_api_key_providers.py b/tests/hermes_cli/test_api_key_providers.py
index c56edc4bb2..e8f181fa4a 100644
--- a/tests/hermes_cli/test_api_key_providers.py
+++ b/tests/hermes_cli/test_api_key_providers.py
@@ -15,6 +15,8 @@ from hermes_cli.auth import (
     get_auth_status,
     AuthError,
     KIMI_CODE_BASE_URL,
+    STEPFUN_STEP_PLAN_INTL_BASE_URL,
+    STEPFUN_STEP_PLAN_CN_BASE_URL,
     _resolve_kimi_base_url,
 )
 from hermes_cli.copilot_auth import _try_gh_cli_token
@@ -35,6 +37,7 @@ class TestProviderRegistry:
         ("xai", "xAI", "api_key"),
         ("nvidia", "NVIDIA NIM", "api_key"),
         ("kimi-coding", "Kimi / Moonshot", "api_key"),
+        ("stepfun", "StepFun Step Plan", "api_key"),
         ("minimax", "MiniMax", "api_key"),
         ("minimax-cn", "MiniMax (China)", "api_key"),
         ("ai-gateway", "Vercel AI Gateway", "api_key"),
@@ -71,7 +74,11 @@ class TestProviderRegistry:
 
     def test_kimi_env_vars(self):
         pconfig = PROVIDER_REGISTRY["kimi-coding"]
-        assert pconfig.api_key_env_vars == ("KIMI_API_KEY",)
+        # KIMI_API_KEY is the primary env var; KIMI_CODING_API_KEY is a
+        # secondary fallback for Kimi Code sk-kimi- keys so users don't
+        # have to overload the same variable.
+        assert "KIMI_API_KEY" in pconfig.api_key_env_vars
+        assert "KIMI_CODING_API_KEY" in pconfig.api_key_env_vars
         assert pconfig.base_url_env_var == "KIMI_BASE_URL"
 
     def test_minimax_env_vars(self):
@@ -79,6 +86,11 @@ class TestProviderRegistry:
         assert pconfig.api_key_env_vars == ("MINIMAX_API_KEY",)
         assert pconfig.base_url_env_var == "MINIMAX_BASE_URL"
 
+    def test_stepfun_env_vars(self):
+        pconfig = PROVIDER_REGISTRY["stepfun"]
+        assert pconfig.api_key_env_vars == ("STEPFUN_API_KEY",)
+        assert pconfig.base_url_env_var == "STEPFUN_BASE_URL"
+
     def test_minimax_cn_env_vars(self):
         pconfig = PROVIDER_REGISTRY["minimax-cn"]
         assert pconfig.api_key_env_vars == ("MINIMAX_CN_API_KEY",)
@@ -104,6 +116,7 @@ class TestProviderRegistry:
         assert PROVIDER_REGISTRY["copilot-acp"].inference_base_url == "acp://copilot"
         assert PROVIDER_REGISTRY["zai"].inference_base_url == "https://api.z.ai/api/paas/v4"
         assert PROVIDER_REGISTRY["kimi-coding"].inference_base_url == "https://api.moonshot.ai/v1"
+        assert PROVIDER_REGISTRY["stepfun"].inference_base_url == STEPFUN_STEP_PLAN_INTL_BASE_URL
         assert PROVIDER_REGISTRY["minimax"].inference_base_url == "https://api.minimax.io/anthropic"
         assert PROVIDER_REGISTRY["minimax-cn"].inference_base_url == "https://api.minimaxi.com/anthropic"
         assert PROVIDER_REGISTRY["ai-gateway"].inference_base_url == "https://ai-gateway.vercel.sh/v1"
@@ -126,7 +139,8 @@ PROVIDER_ENV_VARS = (
     "OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN",
     "CLAUDE_CODE_OAUTH_TOKEN",
     "GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY",
-    "KIMI_API_KEY", "KIMI_BASE_URL", "MINIMAX_API_KEY", "MINIMAX_CN_API_KEY",
+    "KIMI_API_KEY", "KIMI_BASE_URL", "STEPFUN_API_KEY", "STEPFUN_BASE_URL",
+    "MINIMAX_API_KEY", "MINIMAX_CN_API_KEY",
     "AI_GATEWAY_API_KEY", "AI_GATEWAY_BASE_URL",
     "KILOCODE_API_KEY", "KILOCODE_BASE_URL",
     "DASHSCOPE_API_KEY", "OPENCODE_ZEN_API_KEY", "OPENCODE_GO_API_KEY",
@@ -152,6 +166,9 @@ class TestResolveProvider:
     def test_explicit_kimi_coding(self):
         assert resolve_provider("kimi-coding") == "kimi-coding"
 
+    def test_explicit_stepfun(self):
+        assert resolve_provider("stepfun") == "stepfun"
+
     def test_explicit_minimax(self):
         assert resolve_provider("minimax") == "minimax"
 
@@ -176,6 +193,9 @@ class TestResolveProvider:
     def test_alias_moonshot(self):
         assert resolve_provider("moonshot") == "kimi-coding"
 
+    def test_alias_step(self):
+        assert resolve_provider("step") == "stepfun"
+
     def test_alias_minimax_underscore(self):
         assert resolve_provider("minimax_cn") == "minimax-cn"
 
@@ -244,6 +264,10 @@ class TestResolveProvider:
         monkeypatch.setenv("KIMI_API_KEY", "test-kimi-key")
         assert resolve_provider("auto") == "kimi-coding"
 
+    def test_auto_detects_stepfun_key(self, monkeypatch):
+        monkeypatch.setenv("STEPFUN_API_KEY", "test-stepfun-key")
+        assert resolve_provider("auto") == "stepfun"
+
     def test_auto_detects_minimax_key(self, monkeypatch):
         monkeypatch.setenv("MINIMAX_API_KEY", "test-mm-key")
         assert resolve_provider("auto") == "minimax"
@@ -308,6 +332,13 @@ class TestApiKeyProviderStatus:
         status = get_api_key_provider_status("kimi-coding")
         assert status["base_url"] == "https://custom.kimi.example/v1"
 
+    def test_stepfun_status_uses_configured_base_url(self, monkeypatch):
+        monkeypatch.setenv("STEPFUN_API_KEY", "stepfun-key")
+        monkeypatch.setenv("STEPFUN_BASE_URL", STEPFUN_STEP_PLAN_CN_BASE_URL)
+        status = get_api_key_provider_status("stepfun")
+        assert status["configured"] is True
+        assert status["base_url"] == STEPFUN_STEP_PLAN_CN_BASE_URL
+
     def test_copilot_status_uses_gh_cli_token(self, monkeypatch):
         monkeypatch.setattr("hermes_cli.copilot_auth._try_gh_cli_token", lambda: "gho_gh_cli_token")
         status = get_api_key_provider_status("copilot")
@@ -425,6 +456,19 @@ class TestResolveApiKeyProviderCredentials:
         assert creds["api_key"] == "kimi-secret-key"
         assert creds["base_url"] == "https://api.moonshot.ai/v1"
 
+    def test_resolve_stepfun_with_key(self, monkeypatch):
+        monkeypatch.setenv("STEPFUN_API_KEY", "stepfun-secret-key")
+        creds = resolve_api_key_provider_credentials("stepfun")
+        assert creds["provider"] == "stepfun"
+        assert creds["api_key"] == "stepfun-secret-key"
+        assert creds["base_url"] == STEPFUN_STEP_PLAN_INTL_BASE_URL
+
+    def test_resolve_stepfun_custom_base_url(self, monkeypatch):
+        monkeypatch.setenv("STEPFUN_API_KEY", "stepfun-secret-key")
+        monkeypatch.setenv("STEPFUN_BASE_URL", STEPFUN_STEP_PLAN_CN_BASE_URL)
+        creds = resolve_api_key_provider_credentials("stepfun")
+        assert creds["base_url"] == STEPFUN_STEP_PLAN_CN_BASE_URL
+
     def test_resolve_minimax_with_key(self, monkeypatch):
         monkeypatch.setenv("MINIMAX_API_KEY", "mm-secret-key")
         creds = resolve_api_key_provider_credentials("minimax")
@@ -515,6 +559,16 @@ class TestRuntimeProviderResolution:
         assert result["api_mode"] == "chat_completions"
         assert result["api_key"] == "kimi-key"
 
+    def test_runtime_stepfun(self, monkeypatch):
+        monkeypatch.setenv("STEPFUN_API_KEY", "stepfun-key")
+        monkeypatch.setenv("STEPFUN_BASE_URL", STEPFUN_STEP_PLAN_CN_BASE_URL)
+        from hermes_cli.runtime_provider import resolve_runtime_provider
+        result = resolve_runtime_provider(requested="stepfun")
+        assert result["provider"] == "stepfun"
+        assert result["api_mode"] == "chat_completions"
+        assert result["api_key"] == "stepfun-key"
+        assert result["base_url"] == STEPFUN_STEP_PLAN_CN_BASE_URL
+
     def test_runtime_minimax(self, monkeypatch):
         monkeypatch.setenv("MINIMAX_API_KEY", "mm-key")
         from hermes_cli.runtime_provider import resolve_runtime_provider
@@ -921,17 +975,13 @@ class TestKimiMoonshotModelListIsolation:
         leaked = set(moonshot_models) & coding_plan_only
         assert not leaked, f"Moonshot list contains Coding Plan-only models: {leaked}"
 
-    def test_moonshot_list_contains_shared_models(self):
+    def test_moonshot_list_non_empty(self):
         from hermes_cli.main import _PROVIDER_MODELS
-        moonshot_models = _PROVIDER_MODELS["moonshot"]
-        assert "kimi-k2.5" in moonshot_models
-        assert "kimi-k2-thinking" in moonshot_models
+        assert len(_PROVIDER_MODELS["moonshot"]) >= 1
 
-    def test_coding_plan_list_contains_plan_specific_models(self):
+    def test_coding_plan_list_non_empty(self):
         from hermes_cli.main import _PROVIDER_MODELS
-        coding_models = _PROVIDER_MODELS["kimi-coding"]
-        assert "kimi-for-coding" in coding_models
-        assert "kimi-k2-thinking-turbo" in coding_models
+        assert len(_PROVIDER_MODELS["kimi-coding"]) >= 1
 
 
 # =============================================================================
@@ -944,14 +994,12 @@ class TestHuggingFaceModels:
     def test_main_provider_models_has_huggingface(self):
         from hermes_cli.main import _PROVIDER_MODELS
         assert "huggingface" in _PROVIDER_MODELS
-        models = _PROVIDER_MODELS["huggingface"]
-        assert len(models) >= 6, "Expected at least 6 curated HF models"
+        assert len(_PROVIDER_MODELS["huggingface"]) >= 1
 
     def test_models_py_has_huggingface(self):
         from hermes_cli.models import _PROVIDER_MODELS
         assert "huggingface" in _PROVIDER_MODELS
-        models = _PROVIDER_MODELS["huggingface"]
-        assert len(models) >= 6
+        assert len(_PROVIDER_MODELS["huggingface"]) >= 1
 
     def test_model_lists_match(self):
         """Model lists in main.py and models.py should be identical."""
diff --git a/tests/hermes_cli/test_arcee_provider.py b/tests/hermes_cli/test_arcee_provider.py
index 39b4e57876..e9eea77f93 100644
--- a/tests/hermes_cli/test_arcee_provider.py
+++ b/tests/hermes_cli/test_arcee_provider.py
@@ -115,12 +115,12 @@ class TestArceeCredentials:
 
 class TestArceeModelCatalog:
     def test_static_model_list(self):
+        """Arcee has a static _PROVIDER_MODELS catalog entry. Specific model
+        names change with releases and don't belong in tests.
+        """
         from hermes_cli.models import _PROVIDER_MODELS
         assert "arcee" in _PROVIDER_MODELS
-        models = _PROVIDER_MODELS["arcee"]
-        assert "trinity-large-thinking" in models
-        assert "trinity-large-preview" in models
-        assert "trinity-mini" in models
+        assert len(_PROVIDER_MODELS["arcee"]) >= 1
 
     def test_canonical_provider_entry(self):
         from hermes_cli.models import CANONICAL_PROVIDERS
diff --git a/tests/hermes_cli/test_argparse_flag_propagation.py b/tests/hermes_cli/test_argparse_flag_propagation.py
index 7787fdd6ff..741425a82d 100644
--- a/tests/hermes_cli/test_argparse_flag_propagation.py
+++ b/tests/hermes_cli/test_argparse_flag_propagation.py
@@ -91,3 +91,42 @@ class TestYoloEnvVar:
         args = parser.parse_args(["chat"])
         self._simulate_cmd_chat_yolo_check(args)
         assert os.environ.get("HERMES_YOLO_MODE") is None
+
+
+class TestAcceptHooksOnAgentSubparsers:
+    """Verify --accept-hooks is accepted at every agent-subcommand
+    position (before the subcommand, between group/subcommand, and
+    after the leaf subcommand) for gateway/cron/mcp/acp.  Regression
+    against prior behaviour where the flag only worked on the root
+    parser and `chat`, so `hermes gateway run --accept-hooks` failed
+    with `unrecognized arguments`."""
+
+    @pytest.mark.parametrize("argv", [
+        ["--accept-hooks", "gateway", "run", "--help"],
+        ["gateway", "--accept-hooks", "run", "--help"],
+        ["gateway", "run", "--accept-hooks", "--help"],
+        ["--accept-hooks", "cron", "tick", "--help"],
+        ["cron", "--accept-hooks", "tick", "--help"],
+        ["cron", "tick", "--accept-hooks", "--help"],
+        ["cron", "run", "--accept-hooks", "dummy-id", "--help"],
+        ["--accept-hooks", "mcp", "serve", "--help"],
+        ["mcp", "--accept-hooks", "serve", "--help"],
+        ["mcp", "serve", "--accept-hooks", "--help"],
+        ["acp", "--accept-hooks", "--help"],
+    ])
+    def test_accepted_at_every_position(self, argv):
+        """Invoking `hermes <argv>` must exit 0 (help) rather than
+        failing with `unrecognized arguments`."""
+        import subprocess
+        result = subprocess.run(
+            [sys.executable, "-m", "hermes_cli.main", *argv],
+            capture_output=True,
+            text=True,
+            timeout=15,
+        )
+        assert result.returncode == 0, (
+            f"argv={argv!r} returned {result.returncode}\n"
+            f"stdout: {result.stdout[:300]}\n"
+            f"stderr: {result.stderr[:300]}"
+        )
+        assert "unrecognized arguments" not in result.stderr
diff --git a/tests/hermes_cli/test_at_context_completion_filter.py b/tests/hermes_cli/test_at_context_completion_filter.py
new file mode 100644
index 0000000000..dfd44b4727
--- /dev/null
+++ b/tests/hermes_cli/test_at_context_completion_filter.py
@@ -0,0 +1,90 @@
+"""Regression test: `@folder:` completion must only surface directories and
+`@file:` must only surface regular files.
+
+Reported during TUI v2 blitz testing: typing `@folder:` showed .dockerignore,
+.env, .gitignore, etc. alongside the actual directories because the path-
+completion branch yielded every entry regardless of the explicit prefix, and
+auto-switched the completion kind based on `is_dir`. That defeated the user's
+explicit choice and rendered the `@folder:` / `@file:` prefixes useless for
+filtering.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Iterable
+
+from hermes_cli.commands import SlashCommandCompleter
+
+
+def _run(tmp_path: Path, word: str) -> list[tuple[str, str]]:
+    (tmp_path / "readme.md").write_text("x")
+    (tmp_path / ".env").write_text("x")
+    (tmp_path / "src").mkdir()
+    (tmp_path / "docs").mkdir()
+
+    completer = SlashCommandCompleter.__new__(SlashCommandCompleter)
+    completions: Iterable = completer._context_completions(word)
+
+    return [(c.text, c.display_meta) for c in completions if c.text.startswith(("@file:", "@folder:"))]
+
+
+def test_at_folder_only_yields_directories(tmp_path, monkeypatch):
+    monkeypatch.chdir(tmp_path)
+
+    texts = [t for t, _ in _run(tmp_path, "@folder:")]
+
+    assert all(t.startswith("@folder:") for t in texts), texts
+    assert any(t == "@folder:src/" for t in texts)
+    assert any(t == "@folder:docs/" for t in texts)
+    assert not any(t == "@folder:readme.md" for t in texts)
+    assert not any(t == "@folder:.env" for t in texts)
+
+
+def test_at_file_only_yields_files(tmp_path, monkeypatch):
+    monkeypatch.chdir(tmp_path)
+
+    texts = [t for t, _ in _run(tmp_path, "@file:")]
+
+    assert all(t.startswith("@file:") for t in texts), texts
+    assert any(t == "@file:readme.md" for t in texts)
+    assert any(t == "@file:.env" for t in texts)
+    assert not any(t == "@file:src/" for t in texts)
+    assert not any(t == "@file:docs/" for t in texts)
+
+
+def test_at_folder_preserves_prefix_on_empty_match(tmp_path, monkeypatch):
+    """User typed `@folder:` (no partial) — completion text must keep the
+    `@folder:` prefix even though the previous implementation auto-rewrote
+    it to `@file:` for non-dir entries.
+    """
+    monkeypatch.chdir(tmp_path)
+
+    texts = [t for t, _ in _run(tmp_path, "@folder:")]
+
+    assert texts, "expected at least one directory completion"
+    for t in texts:
+        assert t.startswith("@folder:"), f"prefix leaked: {t}"
+
+
+def test_at_folder_bare_without_colon_lists_directories(tmp_path, monkeypatch):
+    """Typing `@folder` alone (no colon yet) should surface directories so
+    users don't need to first accept the static `@folder:` hint before
+    seeing what they're picking from.
+    """
+    monkeypatch.chdir(tmp_path)
+
+    texts = [t for t, _ in _run(tmp_path, "@folder")]
+
+    assert any(t == "@folder:src/" for t in texts), texts
+    assert any(t == "@folder:docs/" for t in texts), texts
+    assert not any(t == "@folder:readme.md" for t in texts)
+
+
+def test_at_file_bare_without_colon_lists_files(tmp_path, monkeypatch):
+    monkeypatch.chdir(tmp_path)
+
+    texts = [t for t, _ in _run(tmp_path, "@file")]
+
+    assert any(t == "@file:readme.md" for t in texts), texts
+    assert not any(t == "@file:src/" for t in texts)
diff --git a/tests/hermes_cli/test_auth_commands.py b/tests/hermes_cli/test_auth_commands.py
index 5b0d9062b9..fb749b6ae7 100644
--- a/tests/hermes_cli/test_auth_commands.py
+++ b/tests/hermes_cli/test_auth_commands.py
@@ -1011,3 +1011,466 @@ def test_seed_from_singletons_respects_codex_suppression(tmp_path, monkeypatch):
     # Verify the auth store was NOT modified (no auto-import happened)
     after = json.loads((hermes_home / "auth.json").read_text())
     assert "openai-codex" not in after.get("providers", {})
+
+
+def test_auth_remove_env_seeded_suppresses_shell_exported_var(tmp_path, monkeypatch, capsys):
+    """`hermes auth remove xai 1` must stick even when the env var is exported
+    by the shell (not written into ~/.hermes/.env).  Before PR for #13371 the
+    removal silently restored on next load_pool() because _seed_from_env()
+    re-read os.environ.  Now env:<VAR> is suppressed in auth.json.
+    """
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    # Simulate shell export (NOT written to .env)
+    monkeypatch.setenv("XAI_API_KEY", "sk-xai-shell-export")
+    (hermes_home / ".env").write_text("")
+
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "credential_pool": {
+                "xai": [{
+                    "id": "env-1",
+                    "label": "XAI_API_KEY",
+                    "auth_type": "api_key",
+                    "priority": 0,
+                    "source": "env:XAI_API_KEY",
+                    "access_token": "sk-xai-shell-export",
+                    "base_url": "https://api.x.ai/v1",
+                }]
+            },
+        },
+    )
+
+    from types import SimpleNamespace
+    from hermes_cli.auth_commands import auth_remove_command
+    auth_remove_command(SimpleNamespace(provider="xai", target="1"))
+
+    # Suppression marker written
+    after = json.loads((hermes_home / "auth.json").read_text())
+    assert "env:XAI_API_KEY" in after.get("suppressed_sources", {}).get("xai", [])
+
+    # Diagnostic printed pointing at the shell
+    out = capsys.readouterr().out
+    assert "still set in your shell environment" in out
+    assert "Cleared XAI_API_KEY from .env" not in out  # wasn't in .env
+
+    # Fresh simulation: shell re-exports, reload pool
+    monkeypatch.setenv("XAI_API_KEY", "sk-xai-shell-export")
+    from agent.credential_pool import load_pool
+    pool = load_pool("xai")
+    assert not pool.has_credentials(), "pool must stay empty — env:XAI_API_KEY suppressed"
+
+
+def test_auth_remove_env_seeded_dotenv_only_no_shell_hint(tmp_path, monkeypatch, capsys):
+    """When the env var lives only in ~/.hermes/.env (not the shell), the
+    shell-hint should NOT be printed — avoid scaring the user about a
+    non-existent shell export.
+    """
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    # Key ONLY in .env, shell must not have it
+    monkeypatch.delenv("DEEPSEEK_API_KEY", raising=False)
+    (hermes_home / ".env").write_text("DEEPSEEK_API_KEY=sk-ds-only\n")
+    # Mimic load_env() populating os.environ
+    monkeypatch.setenv("DEEPSEEK_API_KEY", "sk-ds-only")
+
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "credential_pool": {
+                "deepseek": [{
+                    "id": "env-1",
+                    "label": "DEEPSEEK_API_KEY",
+                    "auth_type": "api_key",
+                    "priority": 0,
+                    "source": "env:DEEPSEEK_API_KEY",
+                    "access_token": "sk-ds-only",
+                }]
+            },
+        },
+    )
+
+    from types import SimpleNamespace
+    from hermes_cli.auth_commands import auth_remove_command
+    auth_remove_command(SimpleNamespace(provider="deepseek", target="1"))
+
+    out = capsys.readouterr().out
+    assert "Cleared DEEPSEEK_API_KEY from .env" in out
+    assert "still set in your shell environment" not in out
+    assert (hermes_home / ".env").read_text().strip() == ""
+
+
+def test_auth_add_clears_env_suppression_for_provider(tmp_path, monkeypatch):
+    """Re-adding a credential via `hermes auth add <provider>` clears any
+    env:<VAR> suppression marker — strong signal the user wants auth back.
+    Matches the Codex device_code re-link behaviour.
+    """
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+    monkeypatch.delenv("XAI_API_KEY", raising=False)
+
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "providers": {},
+            "suppressed_sources": {"xai": ["env:XAI_API_KEY"]},
+        },
+    )
+
+    from types import SimpleNamespace
+    from hermes_cli.auth import is_source_suppressed
+    from hermes_cli.auth_commands import auth_add_command
+
+    assert is_source_suppressed("xai", "env:XAI_API_KEY") is True
+    auth_add_command(SimpleNamespace(
+        provider="xai", auth_type="api_key",
+        api_key="sk-xai-manual", label="manual",
+    ))
+    assert is_source_suppressed("xai", "env:XAI_API_KEY") is False
+
+
+def test_seed_from_env_respects_env_suppression(tmp_path, monkeypatch):
+    """_seed_from_env() must skip env:<VAR> sources that the user suppressed
+    via `hermes auth remove`.  This is the gate that prevents shell-exported
+    keys from resurrecting removed credentials.
+    """
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+    monkeypatch.setenv("XAI_API_KEY", "sk-xai-shell-export")
+
+    (hermes_home / "auth.json").write_text(json.dumps({
+        "version": 1,
+        "providers": {},
+        "suppressed_sources": {"xai": ["env:XAI_API_KEY"]},
+    }))
+
+    from agent.credential_pool import _seed_from_env
+
+    entries = []
+    changed, active = _seed_from_env("xai", entries)
+    assert changed is False
+    assert entries == []
+    assert active == set()
+
+
+def test_seed_from_env_respects_openrouter_suppression(tmp_path, monkeypatch):
+    """OpenRouter is the special-case branch in _seed_from_env; verify it
+    honours suppression too.
+    """
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+    monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-shell-export")
+
+    (hermes_home / "auth.json").write_text(json.dumps({
+        "version": 1,
+        "providers": {},
+        "suppressed_sources": {"openrouter": ["env:OPENROUTER_API_KEY"]},
+    }))
+
+    from agent.credential_pool import _seed_from_env
+
+    entries = []
+    changed, active = _seed_from_env("openrouter", entries)
+    assert changed is False
+    assert entries == []
+    assert active == set()
+
+
+# =============================================================================
+# Unified credential-source stickiness — every source Hermes reads from has a
+# registered RemovalStep in agent.credential_sources, and every seeding path
+# gates on is_source_suppressed.  Below: one test per source proving remove
+# sticks across a fresh load_pool() call.
+# =============================================================================
+
+
+def test_seed_from_singletons_respects_nous_suppression(tmp_path, monkeypatch):
+    """nous device_code must not re-seed from auth.json when suppressed."""
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    (hermes_home / "auth.json").write_text(json.dumps({
+        "version": 1,
+        "providers": {"nous": {"access_token": "tok", "refresh_token": "r", "expires_at": 9999999999}},
+        "suppressed_sources": {"nous": ["device_code"]},
+    }))
+
+    from agent.credential_pool import _seed_from_singletons
+    entries = []
+    changed, active = _seed_from_singletons("nous", entries)
+    assert changed is False
+    assert entries == []
+    assert active == set()
+
+
+def test_seed_from_singletons_respects_copilot_suppression(tmp_path, monkeypatch):
+    """copilot gh_cli must not re-seed when suppressed."""
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    (hermes_home / "auth.json").write_text(json.dumps({
+        "version": 1,
+        "providers": {},
+        "suppressed_sources": {"copilot": ["gh_cli"]},
+    }))
+
+    # Stub resolve_copilot_token to return a live token
+    import hermes_cli.copilot_auth as ca
+    monkeypatch.setattr(ca, "resolve_copilot_token", lambda: ("ghp_fake", "gh auth token"))
+
+    from agent.credential_pool import _seed_from_singletons
+    entries = []
+    changed, active = _seed_from_singletons("copilot", entries)
+    assert changed is False
+    assert entries == []
+    assert active == set()
+
+
+def test_seed_from_singletons_respects_qwen_suppression(tmp_path, monkeypatch):
+    """qwen-oauth qwen-cli must not re-seed from ~/.qwen/oauth_creds.json when suppressed."""
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    (hermes_home / "auth.json").write_text(json.dumps({
+        "version": 1,
+        "providers": {},
+        "suppressed_sources": {"qwen-oauth": ["qwen-cli"]},
+    }))
+
+    import hermes_cli.auth as ha
+    monkeypatch.setattr(ha, "resolve_qwen_runtime_credentials", lambda **kw: {
+        "api_key": "tok", "source": "qwen-cli", "base_url": "https://q",
+    })
+
+    from agent.credential_pool import _seed_from_singletons
+    entries = []
+    changed, active = _seed_from_singletons("qwen-oauth", entries)
+    assert changed is False
+    assert entries == []
+    assert active == set()
+
+
+def test_seed_from_singletons_respects_hermes_pkce_suppression(tmp_path, monkeypatch):
+    """anthropic hermes_pkce must not re-seed from ~/.hermes/.anthropic_oauth.json when suppressed."""
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    import yaml
+    (hermes_home / "config.yaml").write_text(yaml.dump({"model": {"provider": "anthropic", "model": "claude"}}))
+    (hermes_home / "auth.json").write_text(json.dumps({
+        "version": 1,
+        "providers": {},
+        "suppressed_sources": {"anthropic": ["hermes_pkce"]},
+    }))
+
+    # Stub the readers so only hermes_pkce is "available"; claude_code returns None
+    import agent.anthropic_adapter as aa
+    monkeypatch.setattr(aa, "read_hermes_oauth_credentials", lambda: {
+        "accessToken": "tok", "refreshToken": "r", "expiresAt": 9999999999000,
+    })
+    monkeypatch.setattr(aa, "read_claude_code_credentials", lambda: None)
+
+    from agent.credential_pool import _seed_from_singletons
+    entries = []
+    changed, active = _seed_from_singletons("anthropic", entries)
+    # hermes_pkce suppressed, claude_code returns None → nothing should be seeded
+    assert entries == []
+    assert "hermes_pkce" not in active
+
+
+def test_seed_custom_pool_respects_config_suppression(tmp_path, monkeypatch):
+    """Custom provider config:<name> source must not re-seed when suppressed."""
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    import yaml
+    (hermes_home / "config.yaml").write_text(yaml.dump({
+        "model": {},
+        "custom_providers": [
+            {"name": "my", "base_url": "https://c.example.com", "api_key": "sk-custom"},
+        ],
+    }))
+
+    from agent.credential_pool import _seed_custom_pool, get_custom_provider_pool_key
+    pool_key = get_custom_provider_pool_key("https://c.example.com")
+
+    (hermes_home / "auth.json").write_text(json.dumps({
+        "version": 1,
+        "providers": {},
+        "suppressed_sources": {pool_key: ["config:my"]},
+    }))
+
+    entries = []
+    changed, active = _seed_custom_pool(pool_key, entries)
+    assert changed is False
+    assert entries == []
+    assert "config:my" not in active
+
+
+def test_credential_sources_registry_has_expected_steps():
+    """Sanity check — the registry contains the expected RemovalSteps.
+
+    Guards against accidentally dropping a step during future refactors.
+    If you add a new credential source, add it to the expected set below.
+    """
+    from agent.credential_sources import _REGISTRY
+
+    descriptions = {step.description for step in _REGISTRY}
+    expected = {
+        "gh auth token / COPILOT_GITHUB_TOKEN / GH_TOKEN",
+        "Any env-seeded credential (XAI_API_KEY, DEEPSEEK_API_KEY, etc.)",
+        "~/.claude/.credentials.json",
+        "~/.hermes/.anthropic_oauth.json",
+        "auth.json providers.nous",
+        "auth.json providers.openai-codex + ~/.codex/auth.json",
+        "~/.qwen/oauth_creds.json",
+        "Custom provider config.yaml api_key field",
+    }
+    assert descriptions == expected, f"Registry mismatch. Got: {descriptions}"
+
+
+def test_credential_sources_find_step_returns_none_for_manual():
+    """Manual entries have nothing external to clean up — no step registered."""
+    from agent.credential_sources import find_removal_step
+    assert find_removal_step("openrouter", "manual") is None
+    assert find_removal_step("xai", "manual") is None
+
+
+def test_credential_sources_find_step_copilot_before_generic_env(tmp_path, monkeypatch):
+    """copilot env:GH_TOKEN must dispatch to the copilot step, not the
+    generic env-var step.  The copilot step handles the duplicate-source
+    problem (same token seeded as both gh_cli and env:<VAR>); the generic
+    env step would only suppress one of the variants.
+    """
+    from agent.credential_sources import find_removal_step
+
+    step = find_removal_step("copilot", "env:GH_TOKEN")
+    assert step is not None
+    assert "copilot" in step.description.lower() or "gh" in step.description.lower()
+
+    # Generic step still matches any other provider's env var
+    step = find_removal_step("xai", "env:XAI_API_KEY")
+    assert step is not None
+    assert "env-seeded" in step.description.lower()
+
+
+def test_auth_remove_copilot_suppresses_all_variants(tmp_path, monkeypatch):
+    """Removing any copilot source must suppress gh_cli + all env:* variants
+    so the duplicate-seed paths don't resurrect the credential.
+    """
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "credential_pool": {
+                "copilot": [{
+                    "id": "c1",
+                    "label": "gh auth token",
+                    "auth_type": "api_key",
+                    "priority": 0,
+                    "source": "gh_cli",
+                    "access_token": "ghp_fake",
+                }]
+            },
+        },
+    )
+
+    from types import SimpleNamespace
+    from hermes_cli.auth import is_source_suppressed
+    from hermes_cli.auth_commands import auth_remove_command
+
+    auth_remove_command(SimpleNamespace(provider="copilot", target="1"))
+
+    assert is_source_suppressed("copilot", "gh_cli")
+    assert is_source_suppressed("copilot", "env:COPILOT_GITHUB_TOKEN")
+    assert is_source_suppressed("copilot", "env:GH_TOKEN")
+    assert is_source_suppressed("copilot", "env:GITHUB_TOKEN")
+
+
+def test_auth_add_clears_all_suppressions_including_non_env(tmp_path, monkeypatch):
+    """Re-adding a credential via `hermes auth add <provider>` clears ALL
+    suppression markers for the provider, not just env:*.  This matches
+    the single "re-engage" semantic — the user wants auth back, period.
+    """
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "providers": {},
+            "suppressed_sources": {
+                "copilot": ["gh_cli", "env:GH_TOKEN", "env:COPILOT_GITHUB_TOKEN"],
+            },
+        },
+    )
+
+    from types import SimpleNamespace
+    from hermes_cli.auth import is_source_suppressed
+    from hermes_cli.auth_commands import auth_add_command
+
+    auth_add_command(SimpleNamespace(
+        provider="copilot", auth_type="api_key",
+        api_key="ghp-manual", label="m",
+    ))
+
+    assert not is_source_suppressed("copilot", "gh_cli")
+    assert not is_source_suppressed("copilot", "env:GH_TOKEN")
+    assert not is_source_suppressed("copilot", "env:COPILOT_GITHUB_TOKEN")
+
+
+def test_auth_remove_codex_manual_device_code_suppresses_canonical(tmp_path, monkeypatch):
+    """Removing a manual:device_code entry (from `hermes auth add openai-codex`)
+    must suppress the canonical ``device_code`` key, not ``manual:device_code``.
+    The re-seed gate in _seed_from_singletons checks ``device_code``.
+    """
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "providers": {"openai-codex": {"tokens": {"access_token": "t", "refresh_token": "r"}}},
+            "credential_pool": {
+                "openai-codex": [{
+                    "id": "cdx",
+                    "label": "manual-codex",
+                    "auth_type": "oauth",
+                    "priority": 0,
+                    "source": "manual:device_code",
+                    "access_token": "t",
+                }]
+            },
+        },
+    )
+
+    from types import SimpleNamespace
+    from hermes_cli.auth import is_source_suppressed
+    from hermes_cli.auth_commands import auth_remove_command
+
+    auth_remove_command(SimpleNamespace(provider="openai-codex", target="1"))
+    assert is_source_suppressed("openai-codex", "device_code")
diff --git a/tests/hermes_cli/test_auth_nous_provider.py b/tests/hermes_cli/test_auth_nous_provider.py
index 89a2455041..b6d70a26ff 100644
--- a/tests/hermes_cli/test_auth_nous_provider.py
+++ b/tests/hermes_cli/test_auth_nous_provider.py
@@ -27,15 +27,23 @@ class TestResolveVerifyFallback:
         })
         assert result is True
 
-    def test_valid_ca_bundle_in_auth_state_is_returned(self, tmp_path):
+    def test_valid_ca_bundle_in_auth_state_is_returned(self, tmp_path, monkeypatch):
+        import ssl
         from hermes_cli.auth import _resolve_verify
 
         ca_file = tmp_path / "ca-bundle.pem"
         ca_file.write_text("fake cert")
+
+        # Avoid loading actual PEM — just verify the return type
+        mock_ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
+        monkeypatch.setattr(ssl, "create_default_context", lambda **kw: mock_ctx)
+
         result = _resolve_verify(auth_state={
             "tls": {"insecure": False, "ca_bundle": str(ca_file)},
         })
-        assert result == str(ca_file)
+        assert isinstance(result, ssl.SSLContext), (
+            f"Expected ssl.SSLContext but got {type(result).__name__}: {result!r}"
+        )
 
     def test_missing_ssl_cert_file_env_falls_back(self, monkeypatch):
         from hermes_cli.auth import _resolve_verify
@@ -76,13 +84,21 @@ class TestResolveVerifyFallback:
         result = _resolve_verify(ca_bundle="/nonexistent/explicit-ca.pem")
         assert result is True
 
-    def test_explicit_ca_bundle_param_valid_is_returned(self, tmp_path):
+    def test_explicit_ca_bundle_param_valid_is_returned(self, tmp_path, monkeypatch):
+        import ssl
         from hermes_cli.auth import _resolve_verify
 
         ca_file = tmp_path / "explicit-ca.pem"
         ca_file.write_text("fake cert")
+
+        # Avoid loading actual PEM — just verify the return type
+        mock_ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
+        monkeypatch.setattr(ssl, "create_default_context", lambda **kw: mock_ctx)
+
         result = _resolve_verify(ca_bundle=str(ca_file))
-        assert result == str(ca_file)
+        assert isinstance(result, ssl.SSLContext), (
+            f"Expected ssl.SSLContext but got {type(result).__name__}: {result!r}"
+        )
 
 
 def _setup_nous_auth(
@@ -360,7 +376,6 @@ class TestLoginNousSkipKeepsCurrent:
             lambda *a, **kw: prompt_returns,
         )
         monkeypatch.setattr(models_mod, "get_pricing_for_provider", lambda p: {})
-        monkeypatch.setattr(models_mod, "filter_nous_free_models", lambda ids, p: ids)
         monkeypatch.setattr(models_mod, "check_nous_free_tier", lambda: None)
         monkeypatch.setattr(
             models_mod, "partition_nous_models_by_tier",
diff --git a/tests/hermes_cli/test_aux_config.py b/tests/hermes_cli/test_aux_config.py
index 4810c0a698..e3acaa39b8 100644
--- a/tests/hermes_cli/test_aux_config.py
+++ b/tests/hermes_cli/test_aux_config.py
@@ -39,6 +39,15 @@ def test_title_generation_present_in_default_config():
     assert tg["provider"] == "auto"
     assert tg["model"] == ""
     assert tg["timeout"] > 0
+    assert tg["extra_body"] == {}
+
+
+def test_session_search_defaults_include_extra_body_and_concurrency():
+    ss = DEFAULT_CONFIG["auxiliary"]["session_search"]
+    assert ss["provider"] == "auto"
+    assert ss["model"] == ""
+    assert ss["extra_body"] == {}
+    assert ss["max_concurrency"] == 3
 
 
 def test_aux_tasks_keys_all_exist_in_default_config():
diff --git a/tests/hermes_cli/test_backup.py b/tests/hermes_cli/test_backup.py
index b4589dc915..35089ecd28 100644
--- a/tests/hermes_cli/test_backup.py
+++ b/tests/hermes_cli/test_backup.py
@@ -702,6 +702,34 @@ class TestBackupEdgeCases:
         # Zip should still be created with the readable files
         assert out_zip.exists()
 
+    def test_pre1980_timestamp_skipped(self, tmp_path, monkeypatch):
+        """Backup skips files with pre-1980 timestamps (ZIP limitation)."""
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        (hermes_home / "config.yaml").write_text("model: test\n")
+
+        # Create a file with epoch timestamp (1970-01-01)
+        old_file = hermes_home / "ancient.txt"
+        old_file.write_text("old data")
+        os.utime(old_file, (0, 0))
+
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+
+        out_zip = tmp_path / "out.zip"
+        args = Namespace(output=str(out_zip))
+
+        from hermes_cli.backup import run_backup
+        run_backup(args)
+
+        # Zip should still be created with the valid files
+        assert out_zip.exists()
+        with zipfile.ZipFile(out_zip, "r") as zf:
+            names = zf.namelist()
+            assert "config.yaml" in names
+            # The pre-1980 file should be skipped, not crash the backup
+            assert "ancient.txt" not in names
+
     def test_skips_output_zip_inside_hermes(self, tmp_path, monkeypatch):
         """Backup skips its own output zip if it's inside hermes root."""
         hermes_home = tmp_path / ".hermes"
diff --git a/tests/hermes_cli/test_codex_cli_model_picker.py b/tests/hermes_cli/test_codex_cli_model_picker.py
index 2af837fde7..56e364fda5 100644
--- a/tests/hermes_cli/test_codex_cli_model_picker.py
+++ b/tests/hermes_cli/test_codex_cli_model_picker.py
@@ -1,14 +1,14 @@
-"""Regression test: openai-codex must appear in /model picker when
-credentials are only in the Codex CLI shared file (~/.codex/auth.json)
-and haven't been migrated to the Hermes auth store yet.
+"""Regression tests for the /model picker's credential-discovery paths.
 
-Root cause: list_authenticated_providers() checked the raw Hermes auth
-store but didn't know about the Codex CLI fallback import path.
+Covers:
+ - Normal path (tokens already in Hermes auth store)
+ - Claude Code fallback (tokens only in ~/.claude/.credentials.json)
+ - Negative case (no credentials anywhere)
 
-Fix: _seed_from_singletons() now imports from the Codex CLI when the
-Hermes auth store has no openai-codex tokens, and
-list_authenticated_providers() falls back to load_pool() for OAuth
-providers.
+Note: auto-import from ~/.codex/auth.json was removed in #12360 — Hermes
+now owns its own openai-codex auth state, and users explicitly adopt
+existing Codex CLI tokens via `hermes auth openai-codex`. The old
+"Codex CLI shared file" discovery tests were removed with that change.
 """
 
 import base64
@@ -31,83 +31,6 @@ def _make_fake_jwt(expiry_offset: int = 3600) -> str:
     return f"{header}.{payload}.fakesig"
 
 
-@pytest.fixture()
-def codex_cli_only_env(tmp_path, monkeypatch):
-    """Set up an environment where Codex tokens exist only in ~/.codex/auth.json,
-    NOT in the Hermes auth store."""
-    hermes_home = tmp_path / ".hermes"
-    hermes_home.mkdir()
-    codex_home = tmp_path / ".codex"
-    codex_home.mkdir()
-
-    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
-    monkeypatch.setenv("CODEX_HOME", str(codex_home))
-
-    # Empty Hermes auth store
-    (hermes_home / "auth.json").write_text(
-        json.dumps({"version": 2, "providers": {}})
-    )
-
-    # Valid Codex CLI tokens
-    fake_jwt = _make_fake_jwt()
-    (codex_home / "auth.json").write_text(
-        json.dumps({
-            "tokens": {
-                "access_token": fake_jwt,
-                "refresh_token": "fake-refresh-token",
-            }
-        })
-    )
-
-    # Clear provider env vars so only OAuth is a detection path
-    for var in [
-        "OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY",
-        "NOUS_API_KEY", "DEEPSEEK_API_KEY", "COPILOT_GITHUB_TOKEN",
-        "GH_TOKEN", "GEMINI_API_KEY",
-    ]:
-        monkeypatch.delenv(var, raising=False)
-
-    return hermes_home
-
-
-def test_codex_cli_tokens_detected_by_model_picker(codex_cli_only_env):
-    """openai-codex should appear when tokens only exist in ~/.codex/auth.json."""
-    from hermes_cli.model_switch import list_authenticated_providers
-
-    providers = list_authenticated_providers(
-        current_provider="openai-codex",
-        max_models=10,
-    )
-    slugs = [p["slug"] for p in providers]
-    assert "openai-codex" in slugs, (
-        f"openai-codex not found in /model picker providers: {slugs}"
-    )
-
-    codex = next(p for p in providers if p["slug"] == "openai-codex")
-    assert codex["is_current"] is True
-    assert codex["total_models"] > 0
-
-
-def test_codex_cli_tokens_migrated_after_detection(codex_cli_only_env):
-    """After the /model picker detects Codex CLI tokens, they should be
-    migrated into the Hermes auth store for subsequent fast lookups."""
-    from hermes_cli.model_switch import list_authenticated_providers
-
-    # First call triggers migration
-    list_authenticated_providers(current_provider="openai-codex")
-
-    # Verify tokens are now in Hermes auth store
-    auth_path = codex_cli_only_env / "auth.json"
-    store = json.loads(auth_path.read_text())
-    providers = store.get("providers", {})
-    assert "openai-codex" in providers, (
-        f"openai-codex not migrated to Hermes auth store: {list(providers.keys())}"
-    )
-    tokens = providers["openai-codex"].get("tokens", {})
-    assert tokens.get("access_token"), "access_token missing after migration"
-    assert tokens.get("refresh_token"), "refresh_token missing after migration"
-
-
 @pytest.fixture()
 def hermes_auth_only_env(tmp_path, monkeypatch):
     """Tokens already in Hermes auth store (no Codex CLI needed)."""
diff --git a/tests/hermes_cli/test_codex_models.py b/tests/hermes_cli/test_codex_models.py
index a924ff4689..cffce2a0e4 100644
--- a/tests/hermes_cli/test_codex_models.py
+++ b/tests/hermes_cli/test_codex_models.py
@@ -54,7 +54,7 @@ def test_get_codex_model_ids_falls_back_to_curated_defaults(tmp_path, monkeypatc
 
     assert models[: len(DEFAULT_CODEX_MODELS)] == DEFAULT_CODEX_MODELS
     assert "gpt-5.4" in models
-    assert "gpt-5.3-codex-spark" in models
+    assert "gpt-5.3-codex-spark" not in models
 
 
 def test_get_codex_model_ids_adds_forward_compat_models_from_templates(monkeypatch):
@@ -65,7 +65,7 @@ def test_get_codex_model_ids_adds_forward_compat_models_from_templates(monkeypat
 
     models = get_codex_model_ids(access_token="codex-access-token")
 
-    assert models == ["gpt-5.2-codex", "gpt-5.4-mini", "gpt-5.4", "gpt-5.3-codex", "gpt-5.3-codex-spark"]
+    assert models == ["gpt-5.2-codex", "gpt-5.4-mini", "gpt-5.4", "gpt-5.3-codex"]
 
 
 def test_model_command_uses_runtime_access_token_for_codex_list(monkeypatch):
diff --git a/tests/hermes_cli/test_commands.py b/tests/hermes_cli/test_commands.py
index c14e60224f..49e114aeff 100644
--- a/tests/hermes_cli/test_commands.py
+++ b/tests/hermes_cli/test_commands.py
@@ -688,6 +688,32 @@ class TestTelegramMenuCommands:
                 f"Command '{name}' is {len(name)} chars (limit {_TG_NAME_LIMIT})"
             )
 
+    def test_includes_plugin_commands_via_lazy_discovery(self, tmp_path, monkeypatch):
+        """Telegram menu generation should discover plugin slash commands on first access."""
+        from unittest.mock import patch
+        import hermes_cli.plugins as plugins_mod
+
+        plugin_dir = tmp_path / "plugins" / "cmd-plugin"
+        plugin_dir.mkdir(parents=True, exist_ok=True)
+        (plugin_dir / "plugin.yaml").write_text(
+            "name: cmd-plugin\nversion: 0.1.0\ndescription: Test plugin\n"
+        )
+        (plugin_dir / "__init__.py").write_text(
+            "def register(ctx):\n"
+            "    ctx.register_command('lcm', lambda args: 'ok', description='LCM status and diagnostics')\n"
+        )
+        # Opt-in: plugins are opt-in by default, so enable in config.yaml
+        (tmp_path / "config.yaml").write_text(
+            "plugins:\n  enabled:\n    - cmd-plugin\n"
+        )
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+
+        with patch.object(plugins_mod, "_plugin_manager", None):
+            menu, _ = telegram_menu_commands(max_commands=100)
+
+        menu_names = {name for name, _ in menu}
+        assert "lcm" in menu_names
+
     def test_excludes_telegram_disabled_skills(self, tmp_path, monkeypatch):
         """Skills disabled for telegram should not appear in the menu."""
         from unittest.mock import patch, MagicMock
diff --git a/tests/hermes_cli/test_config.py b/tests/hermes_cli/test_config.py
index 4330424b9a..5c719cbc21 100644
--- a/tests/hermes_cli/test_config.py
+++ b/tests/hermes_cli/test_config.py
@@ -459,7 +459,8 @@ class TestCustomProviderCompatibility:
             migrate_config(interactive=False, quiet=True)
             raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
 
-        assert raw["_config_version"] == 19
+        from hermes_cli.config import DEFAULT_CONFIG
+        assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"]
         assert raw["providers"]["openai-direct"] == {
             "api": "https://api.openai.com/v1",
             "api_key": "test-key",
@@ -501,7 +502,8 @@ class TestCustomProviderCompatibility:
         assert compatible[0]["provider_key"] == "openai-direct"
         assert compatible[0]["api_mode"] == "codex_responses"
 
-    def test_compatible_custom_providers_prefers_api_then_url_then_base_url(self, tmp_path):
+    def test_compatible_custom_providers_prefers_base_url_then_url_then_api(self, tmp_path):
+        """URL field precedence is base_url > url > api (PR #9332)."""
         config_path = tmp_path / "config.yaml"
         config_path.write_text(
             yaml.safe_dump(
@@ -526,7 +528,7 @@ class TestCustomProviderCompatibility:
         assert compatible == [
             {
                 "name": "My Provider",
-                "base_url": "https://api.example.com/v1",
+                "base_url": "https://base.example.com/v1",
                 "provider_key": "my-provider",
             }
         ]
@@ -606,7 +608,8 @@ class TestInterimAssistantMessageConfig:
             migrate_config(interactive=False, quiet=True)
             raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
 
-        assert raw["_config_version"] == 19
+        from hermes_cli.config import DEFAULT_CONFIG
+        assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"]
         assert raw["display"]["tool_progress"] == "off"
         assert raw["display"]["interim_assistant_messages"] is True
 
@@ -626,6 +629,14 @@ class TestDiscordChannelPromptsConfig:
             migrate_config(interactive=False, quiet=True)
             raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
 
-        assert raw["_config_version"] == 19
+        from hermes_cli.config import DEFAULT_CONFIG
+        assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"]
         assert raw["discord"]["auto_thread"] is True
         assert raw["discord"]["channel_prompts"] == {}
+
+
+class TestUserMessagePreviewConfig:
+    def test_default_config_preview_line_counts(self):
+        preview = DEFAULT_CONFIG["display"]["user_message_preview"]
+        assert preview["first_lines"] == 2
+        assert preview["last_lines"] == 2
diff --git a/tests/hermes_cli/test_config_drift.py b/tests/hermes_cli/test_config_drift.py
new file mode 100644
index 0000000000..6fa96042c5
--- /dev/null
+++ b/tests/hermes_cli/test_config_drift.py
@@ -0,0 +1,36 @@
+"""Regression tests for removed dead config keys.
+
+This file guards against accidental re-introduction of config keys that were
+documented or declared at some point but never actually wired up to read code.
+Future dead-config regressions can accumulate here.
+"""
+
+import inspect
+
+
+def test_delegation_default_toolsets_removed_from_cli_config():
+    """delegation.default_toolsets was dead config — never read by
+    _load_config() or anywhere else. Removed.
+
+    Guards against accidental re-introduction in cli.py's CLI_CONFIG default
+    dict. If this test fails, someone re-added the key without wiring it up
+    to _load_config() in tools/delegate_tool.py.
+
+    We inspect the source of load_cli_config() instead of asserting on the
+    runtime CLI_CONFIG dict because CLI_CONFIG is populated by deep-merging
+    the user's ~/.hermes/config.yaml over the defaults (cli.py:359-366).
+    A contributor who still has the legacy key set in their own config
+    would cause a false failure, and HERMES_HOME patching via conftest
+    doesn't help because cli._hermes_home is frozen at module import time
+    (cli.py:76) — before any autouse fixture can fire. Source inspection
+    sidesteps all of that: it tests the defaults literal directly.
+    """
+    from cli import load_cli_config
+
+    source = inspect.getsource(load_cli_config)
+    assert '"default_toolsets"' not in source, (
+        "delegation.default_toolsets was removed because it was never read. "
+        "Do not re-add it to cli.py's CLI_CONFIG default dict; "
+        "use tools/delegate_tool.py's DEFAULT_TOOLSETS module constant or "
+        "wire a new config key through _load_config()."
+    )
diff --git a/tests/hermes_cli/test_config_validation.py b/tests/hermes_cli/test_config_validation.py
index 39a3eca724..c18afc9110 100644
--- a/tests/hermes_cli/test_config_validation.py
+++ b/tests/hermes_cli/test_config_validation.py
@@ -13,7 +13,7 @@ class TestCustomProvidersValidation:
         issues = validate_config_structure({
             "custom_providers": {
                 "name": "Generativelanguage.googleapis.com",
-                "base_url": "https://generativelanguage.googleapis.com/v1beta/openai",
+                "base_url": "https://generativelanguage.googleapis.com/v1beta",
                 "api_key": "xxx",
                 "model": "models/gemini-2.5-flash",
                 "rate_limit_delay": 2.0,
diff --git a/tests/hermes_cli/test_cron.py b/tests/hermes_cli/test_cron.py
index 9ae9204827..8593195a1b 100644
--- a/tests/hermes_cli/test_cron.py
+++ b/tests/hermes_cli/test_cron.py
@@ -54,12 +54,12 @@ class TestCronCommandLifecycle:
                 deliver=None,
                 repeat=None,
                 skill=None,
-                skills=["find-nearby", "blogwatcher"],
+                skills=["maps", "blogwatcher"],
                 clear_skills=False,
             )
         )
         updated = get_job(job["id"])
-        assert updated["skills"] == ["find-nearby", "blogwatcher"]
+        assert updated["skills"] == ["maps", "blogwatcher"]
         assert updated["name"] == "Edited Job"
         assert updated["prompt"] == "Revised prompt"
         assert updated["schedule_display"] == "every 120m"
@@ -95,7 +95,7 @@ class TestCronCommandLifecycle:
                 deliver=None,
                 repeat=None,
                 skill=None,
-                skills=["blogwatcher", "find-nearby"],
+                skills=["blogwatcher", "maps"],
             )
         )
         out = capsys.readouterr().out
@@ -103,5 +103,5 @@ class TestCronCommandLifecycle:
 
         jobs = list_jobs()
         assert len(jobs) == 1
-        assert jobs[0]["skills"] == ["blogwatcher", "find-nearby"]
+        assert jobs[0]["skills"] == ["blogwatcher", "maps"]
         assert jobs[0]["name"] == "Skill combo"
diff --git a/tests/hermes_cli/test_detect_api_mode_for_url.py b/tests/hermes_cli/test_detect_api_mode_for_url.py
new file mode 100644
index 0000000000..f758570ea5
--- /dev/null
+++ b/tests/hermes_cli/test_detect_api_mode_for_url.py
@@ -0,0 +1,79 @@
+"""Tests for hermes_cli.runtime_provider._detect_api_mode_for_url.
+
+The helper maps base URLs to api_modes for three cases:
+  * api.openai.com  → codex_responses
+  * api.x.ai        → codex_responses
+  * */anthropic     → anthropic_messages (third-party gateways like MiniMax,
+                                          Zhipu GLM, LiteLLM proxies)
+
+Consolidating the /anthropic detection in this helper (instead of three
+inline ``endswith`` checks spread across _resolve_runtime_from_pool_entry,
+the explicit-provider path, and the api-key-provider path) means every
+future update to the detection logic lives in one place.
+"""
+
+from __future__ import annotations
+
+from hermes_cli.runtime_provider import _detect_api_mode_for_url
+
+
+class TestCodexResponsesDetection:
+    def test_openai_api_returns_codex_responses(self):
+        assert _detect_api_mode_for_url("https://api.openai.com/v1") == "codex_responses"
+
+    def test_xai_api_returns_codex_responses(self):
+        assert _detect_api_mode_for_url("https://api.x.ai/v1") == "codex_responses"
+
+    def test_openrouter_is_not_codex_responses(self):
+        # api.openai.com check must exclude openrouter (which routes to openai-hosted models).
+        assert _detect_api_mode_for_url("https://openrouter.ai/api/v1") is None
+
+    def test_openai_host_suffix_does_not_match(self):
+        assert _detect_api_mode_for_url("https://api.openai.com.example/v1") is None
+
+    def test_openai_path_segment_does_not_match(self):
+        assert _detect_api_mode_for_url("https://proxy.example.test/api.openai.com/v1") is None
+
+    def test_xai_host_suffix_does_not_match(self):
+        assert _detect_api_mode_for_url("https://api.x.ai.example/v1") is None
+
+
+class TestAnthropicMessagesDetection:
+    """Third-party gateways that speak the Anthropic protocol under /anthropic."""
+
+    def test_minimax_anthropic_endpoint(self):
+        assert _detect_api_mode_for_url("https://api.minimax.io/anthropic") == "anthropic_messages"
+
+    def test_minimax_cn_anthropic_endpoint(self):
+        assert _detect_api_mode_for_url("https://api.minimaxi.com/anthropic") == "anthropic_messages"
+
+    def test_dashscope_anthropic_endpoint(self):
+        assert (
+            _detect_api_mode_for_url("https://dashscope.aliyuncs.com/api/v2/apps/anthropic")
+            == "anthropic_messages"
+        )
+
+    def test_trailing_slash_tolerated(self):
+        assert _detect_api_mode_for_url("https://api.minimax.io/anthropic/") == "anthropic_messages"
+
+    def test_uppercase_path_tolerated(self):
+        assert _detect_api_mode_for_url("https://API.MINIMAX.IO/Anthropic") == "anthropic_messages"
+
+    def test_anthropic_in_middle_of_path_does_not_match(self):
+        # The helper requires ``/anthropic`` as the path SUFFIX, not anywhere.
+        # Protects against false positives on e.g. /anthropic/v1/models.
+        assert _detect_api_mode_for_url("https://api.example.com/anthropic/v1") is None
+
+
+class TestDefaultCase:
+    def test_generic_url_returns_none(self):
+        assert _detect_api_mode_for_url("https://api.together.xyz/v1") is None
+
+    def test_empty_string_returns_none(self):
+        assert _detect_api_mode_for_url("") is None
+
+    def test_none_returns_none(self):
+        assert _detect_api_mode_for_url(None) is None
+
+    def test_localhost_returns_none(self):
+        assert _detect_api_mode_for_url("http://localhost:11434/v1") is None
diff --git a/tests/hermes_cli/test_determine_api_mode_hostname.py b/tests/hermes_cli/test_determine_api_mode_hostname.py
new file mode 100644
index 0000000000..8b6cd042ce
--- /dev/null
+++ b/tests/hermes_cli/test_determine_api_mode_hostname.py
@@ -0,0 +1,43 @@
+"""Regression tests for ``determine_api_mode`` hostname handling.
+
+Companion to tests/hermes_cli/test_detect_api_mode_for_url.py — the same
+false-positive class (custom URLs containing ``api.openai.com`` /
+``api.anthropic.com`` as a path segment or host suffix) must be rejected
+by ``determine_api_mode`` as well, since it's the code path used by
+custom/unknown providers in ``resolve_custom_provider``.
+"""
+
+from __future__ import annotations
+
+from hermes_cli.providers import determine_api_mode
+
+
+class TestOpenAIHostHardening:
+    def test_native_openai_url_is_codex_responses(self):
+        assert determine_api_mode("", "https://api.openai.com/v1") == "codex_responses"
+
+    def test_openai_host_suffix_is_not_codex(self):
+        assert determine_api_mode("", "https://api.openai.com.example/v1") == "chat_completions"
+
+    def test_openai_path_segment_is_not_codex(self):
+        assert determine_api_mode("", "https://proxy.example.test/api.openai.com/v1") == "chat_completions"
+
+
+class TestAnthropicHostHardening:
+    def test_native_anthropic_url_is_anthropic_messages(self):
+        assert determine_api_mode("", "https://api.anthropic.com") == "anthropic_messages"
+
+    def test_anthropic_host_suffix_is_not_anthropic(self):
+        assert determine_api_mode("", "https://api.anthropic.com.example/v1") == "chat_completions"
+
+    def test_anthropic_path_segment_is_not_anthropic(self):
+        # A proxy whose path contains ``api.anthropic.com`` must not be misrouted.
+        # Note: the ``/anthropic`` convention for third-party gateways still wins
+        # via explicit path-suffix check — see test_anthropic_path_suffix_still_wins.
+        assert determine_api_mode("", "https://proxy.example.test/api.anthropic.com/v1") == "chat_completions"
+
+    def test_anthropic_path_suffix_still_wins(self):
+        # Third-party Anthropic-compatible gateways (MiniMax, Zhipu GLM, LiteLLM
+        # proxies) expose the Anthropic protocol under a ``/anthropic`` suffix.
+        # That convention must still resolve to anthropic_messages.
+        assert determine_api_mode("", "https://api.minimax.io/anthropic") == "anthropic_messages"
diff --git a/tests/hermes_cli/test_env_loader.py b/tests/hermes_cli/test_env_loader.py
index b85ef4becd..f94649a634 100644
--- a/tests/hermes_cli/test_env_loader.py
+++ b/tests/hermes_cli/test_env_loader.py
@@ -33,6 +33,25 @@ def test_project_env_overrides_stale_shell_values_when_user_env_missing(tmp_path
     assert os.getenv("OPENAI_BASE_URL") == "https://project.example/v1"
 
 
+def test_project_env_is_sanitized_before_loading(tmp_path, monkeypatch):
+    home = tmp_path / "hermes"
+    project_env = tmp_path / ".env"
+    project_env.write_text(
+        "TELEGRAM_BOT_TOKEN=8356550917:AAGGEkzg06Hrc3Hjb3Sa1jkGVDOdU_lYy2Q"
+        "ANTHROPIC_API_KEY=sk-ant-test123\n",
+        encoding="utf-8",
+    )
+
+    monkeypatch.delenv("TELEGRAM_BOT_TOKEN", raising=False)
+    monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
+
+    loaded = load_hermes_dotenv(hermes_home=home, project_env=project_env)
+
+    assert loaded == [project_env]
+    assert os.getenv("TELEGRAM_BOT_TOKEN") == "8356550917:AAGGEkzg06Hrc3Hjb3Sa1jkGVDOdU_lYy2Q"
+    assert os.getenv("ANTHROPIC_API_KEY") == "sk-ant-test123"
+
+
 def test_user_env_takes_precedence_over_project_env(tmp_path, monkeypatch):
     home = tmp_path / "hermes"
     home.mkdir()
diff --git a/tests/hermes_cli/test_gemini_provider.py b/tests/hermes_cli/test_gemini_provider.py
index dbb1111fcf..1daeb281f0 100644
--- a/tests/hermes_cli/test_gemini_provider.py
+++ b/tests/hermes_cli/test_gemini_provider.py
@@ -22,7 +22,7 @@ class TestGeminiProviderRegistry:
         assert pconfig.id == "gemini"
         assert pconfig.name == "Google AI Studio"
         assert pconfig.auth_type == "api_key"
-        assert pconfig.inference_base_url == "https://generativelanguage.googleapis.com/v1beta/openai"
+        assert pconfig.inference_base_url == "https://generativelanguage.googleapis.com/v1beta"
 
     def test_gemini_env_vars(self):
         pconfig = PROVIDER_REGISTRY["gemini"]
@@ -99,7 +99,7 @@ class TestGeminiCredentials:
         creds = resolve_api_key_provider_credentials("gemini")
         assert creds["provider"] == "gemini"
         assert creds["api_key"] == "google-secret"
-        assert creds["base_url"] == "https://generativelanguage.googleapis.com/v1beta/openai"
+        assert creds["base_url"] == "https://generativelanguage.googleapis.com/v1beta"
 
     def test_resolve_with_gemini_api_key(self, monkeypatch):
         monkeypatch.setenv("GEMINI_API_KEY", "gemini-secret")
@@ -119,24 +119,18 @@ class TestGeminiCredentials:
         assert result["provider"] == "gemini"
         assert result["api_mode"] == "chat_completions"
         assert result["api_key"] == "google-key"
-        assert result["base_url"] == "https://generativelanguage.googleapis.com/v1beta/openai"
+        assert result["base_url"] == "https://generativelanguage.googleapis.com/v1beta"
 
 
 # ── Model Catalog ──
 
 class TestGeminiModelCatalog:
-    def test_provider_models_exist(self):
+    def test_provider_entry_exists(self):
+        """Gemini provider has a model catalog entry. Specific model names
+        are data that changes with Google releases and don't belong in tests.
+        """
         assert "gemini" in _PROVIDER_MODELS
-        models = _PROVIDER_MODELS["gemini"]
-        assert "gemini-2.5-pro" in models
-        assert "gemini-2.5-flash" in models
-        assert "gemma-4-31b-it" not in models
-
-    def test_provider_models_has_3x(self):
-        models = _PROVIDER_MODELS["gemini"]
-        assert "gemini-3.1-pro-preview" in models
-        assert "gemini-3-flash-preview" in models
-        assert "gemini-3.1-flash-lite-preview" in models
+        assert len(_PROVIDER_MODELS["gemini"]) >= 1
 
     def test_provider_label(self):
         assert "gemini" in _PROVIDER_LABELS
@@ -193,50 +187,89 @@ class TestGeminiAgentInit:
         importlib.reload(run_agent)
 
     def test_gemini_agent_uses_chat_completions(self, monkeypatch):
-        """Gemini falls through to chat_completions — no special elif needed."""
+        """Gemini still reports chat_completions even though the transport is native."""
         monkeypatch.setenv("GOOGLE_API_KEY", "test-key")
-        with patch("run_agent.OpenAI") as mock_openai:
-            mock_openai.return_value = MagicMock()
+        with patch("agent.gemini_native_adapter.GeminiNativeClient") as mock_client:
+            mock_client.return_value = MagicMock()
             from run_agent import AIAgent
             agent = AIAgent(
                 model="gemini-2.5-flash",
                 provider="gemini",
                 api_key="test-key",
-                base_url="https://generativelanguage.googleapis.com/v1beta/openai",
+                base_url="https://generativelanguage.googleapis.com/v1beta",
             )
             assert agent.api_mode == "chat_completions"
             assert agent.provider == "gemini"
 
-    def test_gemini_uses_bearer_auth(self, monkeypatch):
-        """Gemini OpenAI-compatible endpoint should receive the real API key."""
+    def test_gemini_agent_uses_native_client(self, monkeypatch):
         monkeypatch.setenv("GOOGLE_API_KEY", "AIzaSy_REAL_KEY")
-        real_key = "AIzaSy_REAL_KEY"
-        with patch("run_agent.OpenAI") as mock_openai:
-            mock_openai.return_value = MagicMock()
+        with patch("agent.gemini_native_adapter.GeminiNativeClient") as mock_client, \
+             patch("run_agent.OpenAI") as mock_openai, \
+             patch("run_agent.ContextCompressor") as mock_compressor:
+            mock_client.return_value = MagicMock()
+            mock_compressor.return_value = MagicMock(context_length=1048576, threshold_tokens=524288)
             from run_agent import AIAgent
             AIAgent(
                 model="gemini-2.5-flash",
                 provider="gemini",
-                api_key=real_key,
+                api_key="AIzaSy_REAL_KEY",
+                base_url="https://generativelanguage.googleapis.com/v1beta",
+            )
+        assert mock_client.called
+        mock_openai.assert_not_called()
+
+    def test_gemini_custom_base_url_keeps_openai_client(self, monkeypatch):
+        monkeypatch.setenv("GOOGLE_API_KEY", "AIzaSy_REAL_KEY")
+        with patch("agent.gemini_native_adapter.GeminiNativeClient") as mock_client, \
+             patch("run_agent.OpenAI") as mock_openai, \
+             patch("run_agent.ContextCompressor") as mock_compressor:
+            mock_openai.return_value = MagicMock()
+            mock_compressor.return_value = MagicMock(context_length=128000, threshold_tokens=64000)
+            from run_agent import AIAgent
+            AIAgent(
+                model="gemini-2.5-flash",
+                provider="gemini",
+                api_key="AIzaSy_REAL_KEY",
+                base_url="https://proxy.example.com/v1",
+            )
+        mock_openai.assert_called_once()
+
+    def test_gemini_openai_compat_base_url_keeps_openai_client(self, monkeypatch):
+        monkeypatch.setenv("GOOGLE_API_KEY", "AIzaSy_REAL_KEY")
+        with patch("agent.gemini_native_adapter.GeminiNativeClient") as mock_client, \
+             patch("run_agent.OpenAI") as mock_openai, \
+             patch("run_agent.ContextCompressor") as mock_compressor:
+            mock_openai.return_value = MagicMock()
+            mock_compressor.return_value = MagicMock(context_length=1048576, threshold_tokens=524288)
+            from run_agent import AIAgent
+            AIAgent(
+                model="gemini-2.5-flash",
+                provider="gemini",
+                api_key="AIzaSy_REAL_KEY",
                 base_url="https://generativelanguage.googleapis.com/v1beta/openai",
             )
-        call_kwargs = mock_openai.call_args[1]
-        assert call_kwargs.get("api_key") == real_key
-        headers = call_kwargs.get("default_headers", {})
-        assert "x-goog-api-key" not in headers
+        mock_openai.assert_called_once()
 
-    def test_gemini_resolve_provider_client_auth(self, monkeypatch):
-        """resolve_provider_client('gemini') should pass the real API key through."""
+    def test_gemini_resolve_provider_client_uses_native_client(self, monkeypatch):
+        """resolve_provider_client('gemini') should build GeminiNativeClient."""
         monkeypatch.setenv("GEMINI_API_KEY", "AIzaSy_TEST_KEY")
-        real_key = "AIzaSy_TEST_KEY"
-        with patch("agent.auxiliary_client.OpenAI") as mock_openai:
+        with patch("agent.gemini_native_adapter.GeminiNativeClient") as mock_client, \
+             patch("agent.auxiliary_client.OpenAI") as mock_openai:
+            mock_client.return_value = MagicMock()
+            from agent.auxiliary_client import resolve_provider_client
+            resolve_provider_client("gemini")
+        assert mock_client.called
+        mock_openai.assert_not_called()
+
+    def test_gemini_resolve_provider_client_keeps_openai_for_non_native_base_url(self, monkeypatch):
+        monkeypatch.setenv("GOOGLE_API_KEY", "AIzaSy_TEST_KEY")
+        monkeypatch.setenv("GEMINI_BASE_URL", "https://proxy.example.com/v1")
+        with patch("agent.gemini_native_adapter.GeminiNativeClient") as mock_client, \
+             patch("agent.auxiliary_client.OpenAI") as mock_openai:
             mock_openai.return_value = MagicMock()
             from agent.auxiliary_client import resolve_provider_client
             resolve_provider_client("gemini")
-        call_kwargs = mock_openai.call_args[1]
-        assert call_kwargs.get("api_key") == real_key
-        headers = call_kwargs.get("default_headers", {})
-        assert "x-goog-api-key" not in headers
+        mock_openai.assert_called_once()
 
 
 # ── models.dev Integration ──
diff --git a/tests/hermes_cli/test_hooks_cli.py b/tests/hermes_cli/test_hooks_cli.py
new file mode 100644
index 0000000000..6d4609c523
--- /dev/null
+++ b/tests/hermes_cli/test_hooks_cli.py
@@ -0,0 +1,268 @@
+"""Tests for the ``hermes hooks`` CLI subcommand."""
+
+from __future__ import annotations
+
+import io
+import json
+import sys
+from contextlib import redirect_stdout
+from pathlib import Path
+from types import SimpleNamespace
+from unittest.mock import patch
+
+import pytest
+
+from agent import shell_hooks
+from hermes_cli import hooks as hooks_cli
+
+
+@pytest.fixture(autouse=True)
+def _isolated_home(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "home"))
+    monkeypatch.delenv("HERMES_ACCEPT_HOOKS", raising=False)
+    shell_hooks.reset_for_tests()
+    yield
+    shell_hooks.reset_for_tests()
+
+
+def _hook_script(tmp_path: Path, body: str, name: str = "hook.sh") -> Path:
+    p = tmp_path / name
+    p.write_text(body)
+    p.chmod(0o755)
+    return p
+
+
+def _run(sub_args: SimpleNamespace) -> str:
+    """Capture stdout for a hooks_command invocation."""
+    buf = io.StringIO()
+    with redirect_stdout(buf):
+        hooks_cli.hooks_command(sub_args)
+    return buf.getvalue()
+
+
+# ── list ──────────────────────────────────────────────────────────────────
+
+
+class TestHooksList:
+    def test_empty_config(self, tmp_path):
+        with patch("hermes_cli.config.load_config", return_value={}):
+            out = _run(SimpleNamespace(hooks_action="list"))
+        assert "No shell hooks configured" in out
+
+    def test_shows_configured_and_consent_status(self, tmp_path):
+        script = _hook_script(
+            tmp_path, "#!/usr/bin/env bash\nprintf '{}\\n'\n",
+        )
+        cfg = {
+            "hooks": {
+                "pre_tool_call": [
+                    {"matcher": "terminal", "command": str(script), "timeout": 30},
+                ],
+                "on_session_start": [
+                    {"command": str(script)},
+                ],
+            }
+        }
+
+        # Approve one of the two so we can see both states in the output
+        shell_hooks._record_approval("pre_tool_call", str(script))
+
+        with patch("hermes_cli.config.load_config", return_value=cfg):
+            out = _run(SimpleNamespace(hooks_action="list"))
+
+        assert "[pre_tool_call]" in out
+        assert "[on_session_start]" in out
+        assert "✓ allowed" in out
+        assert "✗ not allowlisted" in out
+        assert str(script) in out
+
+
+# ── test ──────────────────────────────────────────────────────────────────
+
+
+class TestHooksTest:
+    def test_synthetic_payload_matches_production_shape(self, tmp_path):
+        """`hermes hooks test` must feed the script stdin in the same
+        shape invoke_hook() would at runtime.  Prior to this fix,
+        run_once bypassed _serialize_payload and the two paths diverged —
+        scripts tested with `hermes hooks test` saw different top-level
+        keys than at runtime, silently breaking in production."""
+        capture = tmp_path / "captured.json"
+        script = _hook_script(
+            tmp_path,
+            f"#!/usr/bin/env bash\ncat - > {capture}\nprintf '{{}}\\n'\n",
+        )
+        cfg = {"hooks": {"subagent_stop": [{"command": str(script)}]}}
+        with patch("hermes_cli.config.load_config", return_value=cfg):
+            _run(SimpleNamespace(
+                hooks_action="test", event="subagent_stop",
+                for_tool=None, payload_file=None,
+            ))
+
+        seen = json.loads(capture.read_text())
+        # Same top-level keys _serialize_payload produces at runtime
+        assert set(seen.keys()) == {
+            "hook_event_name", "tool_name", "tool_input",
+            "session_id", "cwd", "extra",
+        }
+        # parent_session_id was routed to top-level session_id (matches runtime)
+        assert seen["session_id"] == "parent-sess"
+        assert "parent_session_id" not in seen["extra"]
+        # subagent_stop has no tool, so tool_name / tool_input are null
+        assert seen["tool_name"] is None
+        assert seen["tool_input"] is None
+
+    def test_fires_real_subprocess_and_parses_block(self, tmp_path):
+        block_script = _hook_script(
+            tmp_path,
+            "#!/usr/bin/env bash\n"
+            'printf \'{"decision": "block", "reason": "nope"}\\n\'\n',
+            name="block.sh",
+        )
+        cfg = {
+            "hooks": {
+                "pre_tool_call": [
+                    {"matcher": "terminal", "command": str(block_script)},
+                ],
+            },
+        }
+        with patch("hermes_cli.config.load_config", return_value=cfg):
+            out = _run(SimpleNamespace(
+                hooks_action="test", event="pre_tool_call",
+                for_tool="terminal", payload_file=None,
+            ))
+
+        # Parsed block appears in output
+        assert '"action": "block"' in out
+        assert '"message": "nope"' in out
+
+    def test_for_tool_matcher_filters(self, tmp_path):
+        script = _hook_script(tmp_path, "#!/usr/bin/env bash\nprintf '{}\\n'\n")
+        cfg = {
+            "hooks": {
+                "pre_tool_call": [
+                    {"matcher": "terminal", "command": str(script)},
+                ],
+            }
+        }
+        with patch("hermes_cli.config.load_config", return_value=cfg):
+            out = _run(SimpleNamespace(
+                hooks_action="test", event="pre_tool_call",
+                for_tool="web_search", payload_file=None,
+            ))
+        assert "No shell hooks" in out
+
+    def test_unknown_event(self):
+        with patch("hermes_cli.config.load_config", return_value={}):
+            out = _run(SimpleNamespace(
+                hooks_action="test", event="bogus_event",
+                for_tool=None, payload_file=None,
+            ))
+        assert "Unknown event" in out
+
+
+# ── revoke ────────────────────────────────────────────────────────────────
+
+
+class TestHooksRevoke:
+    def test_revoke_removes_entry(self, tmp_path):
+        script = _hook_script(tmp_path, "#!/usr/bin/env bash\n")
+        shell_hooks._record_approval("on_session_start", str(script))
+
+        out = _run(SimpleNamespace(hooks_action="revoke", command=str(script)))
+        assert "Removed 1" in out
+        assert shell_hooks.allowlist_entry_for(
+            "on_session_start", str(script),
+        ) is None
+
+    def test_revoke_unknown(self, tmp_path):
+        out = _run(SimpleNamespace(
+            hooks_action="revoke", command=str(tmp_path / "never.sh"),
+        ))
+        assert "No allowlist entry" in out
+
+
+# ── doctor ────────────────────────────────────────────────────────────────
+
+
+class TestHooksDoctor:
+    def test_flags_missing_exec_bit(self, tmp_path):
+        script = tmp_path / "hook.sh"
+        script.write_text("#!/usr/bin/env bash\nprintf '{}\\n'\n")
+        # No chmod — intentionally not executable
+        cfg = {"hooks": {"on_session_start": [{"command": str(script)}]}}
+        with patch("hermes_cli.config.load_config", return_value=cfg):
+            out = _run(SimpleNamespace(hooks_action="doctor"))
+        assert "not executable" in out.lower()
+
+    def test_flags_unallowlisted(self, tmp_path):
+        script = _hook_script(tmp_path, "#!/usr/bin/env bash\nprintf '{}\\n'\n")
+        cfg = {"hooks": {"on_session_start": [{"command": str(script)}]}}
+        with patch("hermes_cli.config.load_config", return_value=cfg):
+            out = _run(SimpleNamespace(hooks_action="doctor"))
+        assert "not allowlisted" in out.lower()
+
+    def test_flags_invalid_json(self, tmp_path):
+        script = _hook_script(
+            tmp_path,
+            "#!/usr/bin/env bash\necho 'not json!'\n",
+        )
+        shell_hooks._record_approval("on_session_start", str(script))
+        cfg = {"hooks": {"on_session_start": [{"command": str(script)}]}}
+        with patch("hermes_cli.config.load_config", return_value=cfg):
+            out = _run(SimpleNamespace(hooks_action="doctor"))
+        assert "not valid JSON" in out
+
+    def test_flags_mtime_drift(self, tmp_path, monkeypatch):
+        """Allowlist with older mtime than current -> drift warning."""
+        script = _hook_script(tmp_path, "#!/usr/bin/env bash\nprintf '{}\\n'\n")
+
+        # Manually stash an allowlist entry with an old mtime
+        from agent.shell_hooks import allowlist_path
+        allowlist_path().parent.mkdir(parents=True, exist_ok=True)
+        allowlist_path().write_text(json.dumps({
+            "approvals": [
+                {
+                    "event": "on_session_start",
+                    "command": str(script),
+                    "approved_at": "2000-01-01T00:00:00Z",
+                    "script_mtime_at_approval": "2000-01-01T00:00:00Z",
+                }
+            ]
+        }))
+
+        cfg = {"hooks": {"on_session_start": [{"command": str(script)}]}}
+        with patch("hermes_cli.config.load_config", return_value=cfg):
+            out = _run(SimpleNamespace(hooks_action="doctor"))
+        assert "modified since approval" in out
+
+    def test_clean_script_runs(self, tmp_path):
+        script = _hook_script(tmp_path, "#!/usr/bin/env bash\nprintf '{}\\n'\n")
+        shell_hooks._record_approval("on_session_start", str(script))
+        cfg = {"hooks": {"on_session_start": [{"command": str(script)}]}}
+        with patch("hermes_cli.config.load_config", return_value=cfg):
+            out = _run(SimpleNamespace(hooks_action="doctor"))
+        assert "All shell hooks look healthy" in out
+
+    def test_unallowlisted_script_is_not_executed(self, tmp_path):
+        """Regression for M4: `hermes hooks doctor` used to run every
+        listed script against a synthetic payload as part of its JSON
+        smoke test, which contradicted the documented workflow of
+        "spot newly-added hooks *before they register*".  An un-allowlisted
+        script must not be executed during `doctor`."""
+        sentinel = tmp_path / "executed"
+        # Script would touch the sentinel if executed; we assert it wasn't.
+        script = _hook_script(
+            tmp_path,
+            f"#!/usr/bin/env bash\ntouch {sentinel}\nprintf '{{}}\\n'\n",
+        )
+        cfg = {"hooks": {"on_session_start": [{"command": str(script)}]}}
+        with patch("hermes_cli.config.load_config", return_value=cfg):
+            out = _run(SimpleNamespace(hooks_action="doctor"))
+
+        assert not sentinel.exists(), (
+            "doctor executed an un-allowlisted script — "
+            "M4 gate regressed"
+        )
+        assert "not allowlisted" in out.lower()
+        assert "skipped JSON smoke test" in out
diff --git a/tests/hermes_cli/test_image_gen_picker.py b/tests/hermes_cli/test_image_gen_picker.py
new file mode 100644
index 0000000000..27c502def8
--- /dev/null
+++ b/tests/hermes_cli/test_image_gen_picker.py
@@ -0,0 +1,174 @@
+"""Tests for plugin image_gen providers injecting themselves into the picker.
+
+Covers `_plugin_image_gen_providers`, `_visible_providers`, and
+`_toolset_needs_configuration_prompt` handling of plugin providers.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from agent import image_gen_registry
+from agent.image_gen_provider import ImageGenProvider
+
+
+class _FakeProvider(ImageGenProvider):
+    def __init__(self, name: str, available: bool = True, schema=None, models=None):
+        self._name = name
+        self._available = available
+        self._schema = schema or {
+            "name": name.title(),
+            "badge": "test",
+            "tag": f"{name} test tag",
+            "env_vars": [{"key": f"{name.upper()}_API_KEY", "prompt": f"{name} key"}],
+        }
+        self._models = models or [
+            {"id": f"{name}-model-v1", "display": f"{name} v1",
+             "speed": "~5s", "strengths": "test", "price": "$"},
+        ]
+
+    @property
+    def name(self) -> str:
+        return self._name
+
+    def is_available(self) -> bool:
+        return self._available
+
+    def list_models(self):
+        return list(self._models)
+
+    def default_model(self):
+        return self._models[0]["id"] if self._models else None
+
+    def get_setup_schema(self):
+        return dict(self._schema)
+
+    def generate(self, prompt, aspect_ratio="landscape", **kw):
+        return {"success": True, "image": f"{self._name}://{prompt}"}
+
+
+@pytest.fixture(autouse=True)
+def _reset_registry():
+    image_gen_registry._reset_for_tests()
+    yield
+    image_gen_registry._reset_for_tests()
+
+
+class TestPluginPickerInjection:
+    def test_plugin_providers_returns_registered(self, monkeypatch):
+        from hermes_cli import tools_config
+
+        image_gen_registry.register_provider(_FakeProvider("myimg"))
+
+        rows = tools_config._plugin_image_gen_providers()
+        names = [r["name"] for r in rows]
+        plugin_names = [r.get("image_gen_plugin_name") for r in rows]
+
+        assert "Myimg" in names
+        assert "myimg" in plugin_names
+
+    def test_fal_skipped_to_avoid_duplicate(self, monkeypatch):
+        from hermes_cli import tools_config
+
+        # Simulate a FAL plugin being registered — the picker already has
+        # hardcoded FAL rows in TOOL_CATEGORIES, so plugin-FAL must be
+        # skipped to avoid showing FAL twice.
+        image_gen_registry.register_provider(_FakeProvider("fal"))
+        image_gen_registry.register_provider(_FakeProvider("openai"))
+
+        rows = tools_config._plugin_image_gen_providers()
+        names = [r.get("image_gen_plugin_name") for r in rows]
+        assert "fal" not in names
+        assert "openai" in names
+
+    def test_visible_providers_includes_plugins_for_image_gen(self, monkeypatch):
+        from hermes_cli import tools_config
+
+        image_gen_registry.register_provider(_FakeProvider("someimg"))
+
+        cat = tools_config.TOOL_CATEGORIES["image_gen"]
+        visible = tools_config._visible_providers(cat, {})
+        plugin_names = [p.get("image_gen_plugin_name") for p in visible if p.get("image_gen_plugin_name")]
+        assert "someimg" in plugin_names
+
+    def test_visible_providers_does_not_inject_into_other_categories(self, monkeypatch):
+        from hermes_cli import tools_config
+
+        image_gen_registry.register_provider(_FakeProvider("someimg"))
+
+        # Browser category must NOT see image_gen plugins.
+        browser = tools_config.TOOL_CATEGORIES["browser"]
+        visible = tools_config._visible_providers(browser, {})
+        assert all(p.get("image_gen_plugin_name") is None for p in visible)
+
+
+class TestPluginCatalog:
+    def test_plugin_catalog_returns_models(self):
+        from hermes_cli import tools_config
+
+        image_gen_registry.register_provider(_FakeProvider("catimg"))
+
+        catalog, default = tools_config._plugin_image_gen_catalog("catimg")
+        assert "catimg-model-v1" in catalog
+        assert default == "catimg-model-v1"
+
+    def test_plugin_catalog_empty_for_unknown(self):
+        from hermes_cli import tools_config
+
+        catalog, default = tools_config._plugin_image_gen_catalog("does-not-exist")
+        assert catalog == {}
+        assert default is None
+
+
+class TestConfigPrompt:
+    def test_image_gen_satisfied_by_plugin_provider(self, monkeypatch, tmp_path):
+        """When a plugin provider reports is_available(), the picker should
+        not force a setup prompt on the user."""
+        from hermes_cli import tools_config
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        monkeypatch.delenv("FAL_KEY", raising=False)
+
+        image_gen_registry.register_provider(_FakeProvider("avail-img", available=True))
+
+        assert tools_config._toolset_needs_configuration_prompt("image_gen", {}) is False
+
+    def test_image_gen_still_prompts_when_nothing_available(self, monkeypatch, tmp_path):
+        from hermes_cli import tools_config
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        monkeypatch.delenv("FAL_KEY", raising=False)
+
+        image_gen_registry.register_provider(_FakeProvider("unavail-img", available=False))
+
+        assert tools_config._toolset_needs_configuration_prompt("image_gen", {}) is True
+
+
+class TestConfigWriting:
+    def test_picking_plugin_provider_writes_provider_and_model(self, monkeypatch, tmp_path):
+        """When a user picks a plugin-backed image_gen provider with no
+        env vars needed, ``_configure_provider`` should write both
+        ``image_gen.provider`` and ``image_gen.model``."""
+        from hermes_cli import tools_config
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        image_gen_registry.register_provider(_FakeProvider("noenv", schema={
+            "name": "NoEnv",
+            "badge": "free",
+            "tag": "",
+            "env_vars": [],
+        }))
+
+        # Stub out the interactive model picker — no TTY in tests.
+        monkeypatch.setattr(tools_config, "_prompt_choice", lambda *a, **kw: 0)
+
+        config: dict = {}
+        provider_row = {
+            "name": "NoEnv",
+            "env_vars": [],
+            "image_gen_plugin_name": "noenv",
+        }
+        tools_config._configure_provider(provider_row, config)
+
+        assert config["image_gen"]["provider"] == "noenv"
+        assert config["image_gen"]["model"] == "noenv-model-v1"
diff --git a/tests/hermes_cli/test_model_provider_persistence.py b/tests/hermes_cli/test_model_provider_persistence.py
index a06facd300..0674836809 100644
--- a/tests/hermes_cli/test_model_provider_persistence.py
+++ b/tests/hermes_cli/test_model_provider_persistence.py
@@ -32,6 +32,8 @@ def config_home(tmp_path, monkeypatch):
     monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
     monkeypatch.delenv("OPENAI_API_KEY", raising=False)
     monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+    monkeypatch.delenv("STEPFUN_API_KEY", raising=False)
+    monkeypatch.delenv("STEPFUN_BASE_URL", raising=False)
     return home
 
 
@@ -330,3 +332,33 @@ class TestBaseUrlValidation:
 
         saved = get_env_value("GLM_BASE_URL") or ""
         assert saved == "", "Empty input should not save a base URL"
+
+    def test_stepfun_provider_saved_with_selected_region(self, config_home, monkeypatch):
+        from hermes_cli.main import _model_flow_stepfun
+        from hermes_cli.config import load_config, get_env_value
+
+        monkeypatch.setenv("STEPFUN_API_KEY", "stepfun-test-key")
+
+        with patch(
+            "hermes_cli.main._prompt_provider_choice",
+            return_value=1,
+        ), patch(
+            "hermes_cli.models.fetch_api_models",
+            return_value=["step-3.5-flash", "step-3-agent-lite"],
+        ), patch(
+            "hermes_cli.auth._prompt_model_selection",
+            return_value="step-3-agent-lite",
+        ), patch(
+            "hermes_cli.auth.deactivate_provider",
+        ):
+            _model_flow_stepfun(load_config(), "old-model")
+
+        import yaml
+
+        config = yaml.safe_load((config_home / "config.yaml").read_text()) or {}
+        model = config.get("model")
+        assert isinstance(model, dict)
+        assert model.get("provider") == "stepfun"
+        assert model.get("default") == "step-3-agent-lite"
+        assert model.get("base_url") == "https://api.stepfun.com/step_plan/v1"
+        assert get_env_value("STEPFUN_BASE_URL") == "https://api.stepfun.com/step_plan/v1"
diff --git a/tests/hermes_cli/test_model_switch_custom_providers.py b/tests/hermes_cli/test_model_switch_custom_providers.py
index 8c39eef18c..2bd7edbf1d 100644
--- a/tests/hermes_cli/test_model_switch_custom_providers.py
+++ b/tests/hermes_cli/test_model_switch_custom_providers.py
@@ -156,3 +156,100 @@ def test_list_deduplicates_same_model_in_group(monkeypatch):
     assert len(my_rows) == 1
     assert my_rows[0]["models"] == ["llama3", "mistral"]
     assert my_rows[0]["total_models"] == 2
+
+
+def test_list_enumerates_dict_format_models_alongside_default(monkeypatch):
+    """custom_providers entry with dict-format ``models:`` plus singular
+    ``model:`` should surface the default and every dict key.
+
+    Regression: Hermes's own writer stores configured models as a dict
+    keyed by model id, but the /model picker previously only honored the
+    singular ``model:`` field, so multi-model custom providers appeared
+    to have only the active model.
+    """
+    monkeypatch.setattr("agent.models_dev.fetch_models_dev", lambda: {})
+    monkeypatch.setattr(providers_mod, "HERMES_OVERLAYS", {})
+
+    providers = list_authenticated_providers(
+        current_provider="openai-codex",
+        user_providers={},
+        custom_providers=[
+            {
+                "name": "DeepSeek",
+                "base_url": "https://api.deepseek.com",
+                "api_mode": "chat_completions",
+                "model": "deepseek-chat",
+                "models": {
+                    "deepseek-chat": {"context_length": 128000},
+                    "deepseek-reasoner": {"context_length": 128000},
+                },
+            }
+        ],
+        max_models=50,
+    )
+
+    ds_rows = [p for p in providers if p["name"] == "DeepSeek"]
+    assert len(ds_rows) == 1
+    assert ds_rows[0]["models"] == ["deepseek-chat", "deepseek-reasoner"]
+    assert ds_rows[0]["total_models"] == 2
+
+
+def test_list_enumerates_dict_format_models_without_singular_model(monkeypatch):
+    """Dict-format ``models:`` with no singular ``model:`` should still
+    enumerate every dict key (previously the picker reported 0 models)."""
+    monkeypatch.setattr("agent.models_dev.fetch_models_dev", lambda: {})
+    monkeypatch.setattr(providers_mod, "HERMES_OVERLAYS", {})
+
+    providers = list_authenticated_providers(
+        current_provider="openai-codex",
+        user_providers={},
+        custom_providers=[
+            {
+                "name": "Thor",
+                "base_url": "http://thor.lab:8337/v1",
+                "models": {
+                    "gemma-4-26B-A4B-it-MXFP4_MOE": {"context_length": 262144},
+                    "Qwen3.5-35B-A3B-MXFP4_MOE": {"context_length": 262144},
+                    "gemma-4-31B-it-Q4_K_M": {"context_length": 262144},
+                },
+            }
+        ],
+        max_models=50,
+    )
+
+    thor_rows = [p for p in providers if p["name"] == "Thor"]
+    assert len(thor_rows) == 1
+    assert set(thor_rows[0]["models"]) == {
+        "gemma-4-26B-A4B-it-MXFP4_MOE",
+        "Qwen3.5-35B-A3B-MXFP4_MOE",
+        "gemma-4-31B-it-Q4_K_M",
+    }
+    assert thor_rows[0]["total_models"] == 3
+
+
+def test_list_dedupes_dict_model_matching_singular_default(monkeypatch):
+    """When the singular ``model:`` is also a key in the ``models:`` dict,
+    it must appear exactly once in the picker."""
+    monkeypatch.setattr("agent.models_dev.fetch_models_dev", lambda: {})
+    monkeypatch.setattr(providers_mod, "HERMES_OVERLAYS", {})
+
+    providers = list_authenticated_providers(
+        current_provider="openai-codex",
+        user_providers={},
+        custom_providers=[
+            {
+                "name": "DeepSeek",
+                "base_url": "https://api.deepseek.com",
+                "model": "deepseek-chat",
+                "models": {
+                    "deepseek-chat": {"context_length": 128000},
+                    "deepseek-reasoner": {"context_length": 128000},
+                },
+            }
+        ],
+        max_models=50,
+    )
+
+    ds_rows = [p for p in providers if p["name"] == "DeepSeek"]
+    assert ds_rows[0]["models"].count("deepseek-chat") == 1
+    assert ds_rows[0]["models"] == ["deepseek-chat", "deepseek-reasoner"]
diff --git a/tests/hermes_cli/test_model_switch_opencode_anthropic.py b/tests/hermes_cli/test_model_switch_opencode_anthropic.py
index 79a8377744..ae56dce238 100644
--- a/tests/hermes_cli/test_model_switch_opencode_anthropic.py
+++ b/tests/hermes_cli/test_model_switch_opencode_anthropic.py
@@ -214,7 +214,7 @@ class TestAgentSwitchModelDefenseInDepth:
         # client factory.
         captured = {}
 
-        def _fake_build_anthropic_client(api_key, base_url):
+        def _fake_build_anthropic_client(api_key, base_url, **kwargs):
             captured["api_key"] = api_key
             captured["base_url"] = base_url
             return object()  # placeholder client — no real calls expected
@@ -226,7 +226,7 @@ class TestAgentSwitchModelDefenseInDepth:
         class _Sentinel(Exception):
             pass
 
-        def _raise_after_capture(api_key, base_url):
+        def _raise_after_capture(api_key, base_url, **kwargs):
             captured["api_key"] = api_key
             captured["base_url"] = base_url
             raise _Sentinel("strip verified")
diff --git a/tests/hermes_cli/test_model_validation.py b/tests/hermes_cli/test_model_validation.py
index 1ddf6ab639..6a1a230c48 100644
--- a/tests/hermes_cli/test_model_validation.py
+++ b/tests/hermes_cli/test_model_validation.py
@@ -63,6 +63,11 @@ class TestParseModelInput:
         assert provider == "zai"
         assert model == "glm-5"
 
+    def test_stepfun_alias_resolved(self):
+        provider, model = parse_model_input("step:step-3.5-flash", "openrouter")
+        assert provider == "stepfun"
+        assert model == "step-3.5-flash"
+
     def test_no_slash_no_colon_keeps_provider(self):
         provider, model = parse_model_input("gpt-5.4", "openrouter")
         assert provider == "openrouter"
@@ -154,6 +159,7 @@ class TestNormalizeProvider:
         assert normalize_provider("glm") == "zai"
         assert normalize_provider("kimi") == "kimi-coding"
         assert normalize_provider("moonshot") == "kimi-coding"
+        assert normalize_provider("step") == "stepfun"
         assert normalize_provider("github-copilot") == "copilot"
 
     def test_case_insensitive(self):
@@ -164,6 +170,7 @@ class TestProviderLabel:
     def test_known_labels_and_auto(self):
         assert provider_label("anthropic") == "Anthropic"
         assert provider_label("kimi") == "Kimi / Kimi Coding Plan"
+        assert provider_label("stepfun") == "StepFun Step Plan"
         assert provider_label("copilot") == "GitHub Copilot"
         assert provider_label("copilot-acp") == "GitHub Copilot ACP"
         assert provider_label("auto") == "Auto"
@@ -193,6 +200,16 @@ class TestProviderModelIds:
     def test_zai_returns_glm_models(self):
         assert "glm-5" in provider_model_ids("zai")
 
+    def test_stepfun_prefers_live_catalog(self):
+        with patch(
+            "hermes_cli.auth.resolve_api_key_provider_credentials",
+            return_value={"api_key": "***", "base_url": "https://api.stepfun.com/step_plan/v1"},
+        ), patch(
+            "hermes_cli.models.fetch_api_models",
+            return_value=["step-3.5-flash", "step-3-agent-lite"],
+        ):
+            assert provider_model_ids("stepfun") == ["step-3.5-flash", "step-3-agent-lite"]
+
     def test_copilot_prefers_live_catalog(self):
         with patch("hermes_cli.auth.resolve_api_key_provider_credentials", return_value={"api_key": "gh-token"}), \
              patch("hermes_cli.models._fetch_github_models", return_value=["gpt-5.4", "claude-sonnet-4.6"]):
@@ -457,29 +474,62 @@ class TestValidateApiNotFound:
         assert "not found" in result["message"]
 
 
-# -- validate — API unreachable — reject with guidance ----------------
+# -- validate — API unreachable — soft-accept via catalog or warning --------
 
 class TestValidateApiFallback:
-    def test_any_model_rejected_when_api_down(self):
-        result = _validate("anthropic/claude-opus-4.6", api_models=None)
-        assert result["accepted"] is False
-        assert result["persist"] is False
+    """When /models is unreachable, the validator must accept the model (with
+    a warning) rather than reject it outright — otherwise provider switches
+    fail in the gateway for any provider whose /models endpoint is down or
+    doesn't exist (e.g. opencode-go returns 404 HTML).
 
-    def test_unknown_model_also_rejected_when_api_down(self):
-        result = _validate("anthropic/claude-next-gen", api_models=None)
-        assert result["accepted"] is False
-        assert result["persist"] is False
-        assert "could not reach" in result["message"].lower()
+    Two paths:
+      1. Provider has a curated catalog (``_PROVIDER_MODELS`` / live fetch):
+         validate against it (recognized=True for known models,
+         recognized=False with 'Note:' for unknown).
+      2. Provider has no catalog: accept with a generic 'Note:' warning.
 
-    def test_zai_model_rejected_when_api_down(self):
+    In both cases ``accepted`` and ``persist`` must be True so the gateway can
+    write the ``_session_model_overrides`` entry.
+    """
+
+    def test_known_model_accepted_via_catalog_when_api_down(self):
+        # Force the openrouter catalog lookup to return a deterministic list.
+        with patch(
+            "hermes_cli.models.provider_model_ids",
+            return_value=["anthropic/claude-opus-4.6", "openai/gpt-5.4"],
+        ):
+            result = _validate("anthropic/claude-opus-4.6", api_models=None)
+        assert result["accepted"] is True
+        assert result["persist"] is True
+        assert result["recognized"] is True
+
+    def test_unknown_model_accepted_with_note_when_api_down(self):
+        with patch(
+            "hermes_cli.models.provider_model_ids",
+            return_value=["anthropic/claude-opus-4.6", "openai/gpt-5.4"],
+        ):
+            result = _validate("anthropic/claude-next-gen", api_models=None)
+        assert result["accepted"] is True
+        assert result["persist"] is True
+        assert result["recognized"] is False
+        # Message flags it as unverified against the catalog.
+        assert "not found" in result["message"].lower() or "note" in result["message"].lower()
+
+    def test_zai_known_model_accepted_via_catalog_when_api_down(self):
+        # glm-5 is in the zai curated catalog (_PROVIDER_MODELS["zai"]).
         result = _validate("glm-5", provider="zai", api_models=None)
-        assert result["accepted"] is False
-        assert result["persist"] is False
+        assert result["accepted"] is True
+        assert result["persist"] is True
+        assert result["recognized"] is True
 
-    def test_unknown_provider_rejected_when_api_down(self):
-        result = _validate("some-model", provider="totally-unknown", api_models=None)
-        assert result["accepted"] is False
-        assert result["persist"] is False
+    def test_unknown_provider_soft_accepted_when_api_down(self):
+        # No catalog for unknown providers — soft-accept with a Note.
+        with patch("hermes_cli.models.provider_model_ids", return_value=[]):
+            result = _validate("some-model", provider="totally-unknown", api_models=None)
+        assert result["accepted"] is True
+        assert result["persist"] is True
+        assert result["recognized"] is False
+        assert "note" in result["message"].lower()
 
     def test_custom_endpoint_warns_with_probed_url_and_v1_hint(self):
         with patch(
@@ -540,3 +590,63 @@ class TestValidateCodexAutoCorrection:
         assert result["recognized"] is False
         assert result.get("corrected_model") is None
         assert "not found" in result["message"]
+
+
+# -- probe_api_models — Cloudflare UA mitigation --------------------------------
+
+class TestProbeApiModelsUserAgent:
+    """Probing custom /v1/models must send a Hermes User-Agent.
+
+    Some custom Claude proxies (e.g. ``packyapi.com``) sit behind Cloudflare with
+    Browser Integrity Check enabled. The default ``Python-urllib/3.x`` signature
+    is rejected with HTTP 403 ``error code: 1010``, which ``probe_api_models``
+    swallowed into ``{"models": None}``, surfacing to users as a misleading
+    "Could not reach the ... API to validate ..." error — even though the
+    endpoint is reachable and the listing exists.
+    """
+
+    def _make_mock_response(self, body: bytes):
+        from unittest.mock import MagicMock
+        mock_resp = MagicMock()
+        mock_resp.__enter__ = MagicMock(return_value=mock_resp)
+        mock_resp.__exit__ = MagicMock(return_value=False)
+        mock_resp.read = MagicMock(return_value=body)
+        return mock_resp
+
+    def test_probe_sends_hermes_user_agent(self):
+        from unittest.mock import patch
+
+        body = b'{"data":[{"id":"claude-opus-4.7"}]}'
+        with patch(
+            "hermes_cli.models.urllib.request.urlopen",
+            return_value=self._make_mock_response(body),
+        ) as mock_urlopen:
+            result = probe_api_models("sk-test", "https://example.com/v1")
+
+        assert result["models"] == ["claude-opus-4.7"]
+        # The urlopen call receives a Request object as its first positional arg
+        req = mock_urlopen.call_args[0][0]
+        ua = req.get_header("User-agent")  # urllib title-cases header names
+        assert ua, "probe_api_models must send a User-Agent header"
+        assert ua.startswith("hermes-cli/"), (
+            f"User-Agent must advertise hermes-cli, got {ua!r}"
+        )
+        # Must not fall back to urllib's default — that's what Cloudflare 1010 blocks.
+        assert not ua.startswith("Python-urllib")
+
+    def test_probe_user_agent_sent_without_api_key(self):
+        """UA must be present even for endpoints that don't need auth."""
+        from unittest.mock import patch
+
+        body = b'{"data":[]}'
+        with patch(
+            "hermes_cli.models.urllib.request.urlopen",
+            return_value=self._make_mock_response(body),
+        ) as mock_urlopen:
+            probe_api_models(None, "https://example.com/v1")
+
+        req = mock_urlopen.call_args[0][0]
+        ua = req.get_header("User-agent")
+        assert ua and ua.startswith("hermes-cli/")
+        # No Authorization was set, but UA must still be present.
+        assert req.get_header("Authorization") is None
diff --git a/tests/hermes_cli/test_models.py b/tests/hermes_cli/test_models.py
index fc86caeeb5..b493fd2b63 100644
--- a/tests/hermes_cli/test_models.py
+++ b/tests/hermes_cli/test_models.py
@@ -4,7 +4,6 @@ from unittest.mock import patch, MagicMock
 
 from hermes_cli.models import (
     OPENROUTER_MODELS, fetch_openrouter_models, model_ids, detect_provider_for_model,
-    filter_nous_free_models, _NOUS_ALLOWED_FREE_MODELS,
     is_nous_free_tier, partition_nous_models_by_tier,
     check_nous_free_tier, _FREE_TIER_CACHE_TTL,
 )
@@ -88,6 +87,131 @@ class TestFetchOpenRouterModels:
 
         assert models == OPENROUTER_MODELS
 
+    def test_filters_out_models_without_tool_support(self, monkeypatch):
+        """Models whose supported_parameters omits 'tools' must not appear in the picker.
+
+        hermes-agent is tool-calling-first — surfacing a non-tool model leads to
+        immediate runtime failures when the user selects it. Ported from
+        Kilo-Org/kilocode#9068.
+        """
+        class _Resp:
+            def __enter__(self):
+                return self
+
+            def __exit__(self, exc_type, exc, tb):
+                return False
+
+            def read(self):
+                # opus-4.6 advertises tools → kept
+                # nano-image has explicit supported_parameters that OMITS tools → dropped
+                # qwen3.6-plus advertises tools → kept
+                return (
+                    b'{"data":['
+                    b'{"id":"anthropic/claude-opus-4.6","pricing":{"prompt":"0.000015","completion":"0.000075"},'
+                    b'"supported_parameters":["temperature","tools","tool_choice"]},'
+                    b'{"id":"google/gemini-3-pro-image-preview","pricing":{"prompt":"0.00001","completion":"0.00003"},'
+                    b'"supported_parameters":["temperature","response_format"]},'
+                    b'{"id":"qwen/qwen3.6-plus","pricing":{"prompt":"0.000000325","completion":"0.00000195"},'
+                    b'"supported_parameters":["tools","temperature"]}'
+                    b']}'
+                )
+
+        # Include the image-only id in the curated list so it has a chance to be surfaced.
+        monkeypatch.setattr(
+            _models_mod,
+            "OPENROUTER_MODELS",
+            [
+                ("anthropic/claude-opus-4.6", ""),
+                ("google/gemini-3-pro-image-preview", ""),
+                ("qwen/qwen3.6-plus", ""),
+            ],
+        )
+        monkeypatch.setattr(_models_mod, "_openrouter_catalog_cache", None)
+        with patch("hermes_cli.models.urllib.request.urlopen", return_value=_Resp()):
+            models = fetch_openrouter_models(force_refresh=True)
+
+        ids = [mid for mid, _ in models]
+        assert "anthropic/claude-opus-4.6" in ids
+        assert "qwen/qwen3.6-plus" in ids
+        # Image-only model advertised supported_parameters WITHOUT tools → must be dropped.
+        assert "google/gemini-3-pro-image-preview" not in ids
+
+    def test_permissive_when_supported_parameters_missing(self, monkeypatch):
+        """Models missing the supported_parameters field keep appearing in the picker.
+
+        Some OpenRouter-compatible gateways (Nous Portal, private mirrors, older
+        catalog snapshots) don't populate supported_parameters. Treating missing
+        as 'unknown → allow' prevents the picker from silently emptying on
+        those gateways.
+        """
+        class _Resp:
+            def __enter__(self):
+                return self
+
+            def __exit__(self, exc_type, exc, tb):
+                return False
+
+            def read(self):
+                # No supported_parameters field at all on either entry.
+                return (
+                    b'{"data":['
+                    b'{"id":"anthropic/claude-opus-4.6","pricing":{"prompt":"0.000015","completion":"0.000075"}},'
+                    b'{"id":"qwen/qwen3.6-plus","pricing":{"prompt":"0.000000325","completion":"0.00000195"}}'
+                    b']}'
+                )
+
+        monkeypatch.setattr(_models_mod, "_openrouter_catalog_cache", None)
+        with patch("hermes_cli.models.urllib.request.urlopen", return_value=_Resp()):
+            models = fetch_openrouter_models(force_refresh=True)
+
+        ids = [mid for mid, _ in models]
+        assert "anthropic/claude-opus-4.6" in ids
+        assert "qwen/qwen3.6-plus" in ids
+
+
+class TestOpenRouterToolSupportHelper:
+    """Unit tests for _openrouter_model_supports_tools (Kilo port #9068)."""
+
+    def test_tools_in_supported_parameters(self):
+        from hermes_cli.models import _openrouter_model_supports_tools
+        assert _openrouter_model_supports_tools(
+            {"id": "x", "supported_parameters": ["temperature", "tools"]}
+        ) is True
+
+    def test_tools_missing_from_supported_parameters(self):
+        from hermes_cli.models import _openrouter_model_supports_tools
+        assert _openrouter_model_supports_tools(
+            {"id": "x", "supported_parameters": ["temperature", "response_format"]}
+        ) is False
+
+    def test_supported_parameters_absent_is_permissive(self):
+        """Missing field → allow (so older / non-OR gateways still work)."""
+        from hermes_cli.models import _openrouter_model_supports_tools
+        assert _openrouter_model_supports_tools({"id": "x"}) is True
+
+    def test_supported_parameters_none_is_permissive(self):
+        from hermes_cli.models import _openrouter_model_supports_tools
+        assert _openrouter_model_supports_tools({"id": "x", "supported_parameters": None}) is True
+
+    def test_supported_parameters_malformed_is_permissive(self):
+        """Malformed (non-list) value → allow rather than silently drop."""
+        from hermes_cli.models import _openrouter_model_supports_tools
+        assert _openrouter_model_supports_tools(
+            {"id": "x", "supported_parameters": "tools,temperature"}
+        ) is True
+
+    def test_non_dict_item_is_permissive(self):
+        from hermes_cli.models import _openrouter_model_supports_tools
+        assert _openrouter_model_supports_tools(None) is True
+        assert _openrouter_model_supports_tools("anthropic/claude-opus-4.6") is True
+
+    def test_empty_supported_parameters_list_drops_model(self):
+        """Explicit empty list → no tools → drop."""
+        from hermes_cli.models import _openrouter_model_supports_tools
+        assert _openrouter_model_supports_tools(
+            {"id": "x", "supported_parameters": []}
+        ) is False
+
 
 class TestFindOpenrouterSlug:
     def test_exact_match(self):
@@ -168,89 +292,6 @@ class TestDetectProviderForModel:
         assert result[0] not in ("nous",)  # nous has claude models but shouldn't be suggested
 
 
-class TestFilterNousFreeModels:
-    """Tests for filter_nous_free_models — Nous Portal free-model policy."""
-
-    _PAID = {"prompt": "0.000003", "completion": "0.000015"}
-    _FREE = {"prompt": "0", "completion": "0"}
-
-    def test_paid_models_kept(self):
-        """Regular paid models pass through unchanged."""
-        models = ["anthropic/claude-opus-4.6", "openai/gpt-5.4"]
-        pricing = {m: self._PAID for m in models}
-        assert filter_nous_free_models(models, pricing) == models
-
-    def test_free_non_allowlist_models_removed(self):
-        """Free models NOT in the allowlist are filtered out."""
-        models = ["anthropic/claude-opus-4.6", "arcee-ai/trinity-large-preview:free"]
-        pricing = {
-            "anthropic/claude-opus-4.6": self._PAID,
-            "arcee-ai/trinity-large-preview:free": self._FREE,
-        }
-        result = filter_nous_free_models(models, pricing)
-        assert result == ["anthropic/claude-opus-4.6"]
-
-    def test_allowlist_model_kept_when_free(self):
-        """Allowlist models are kept when they report as free."""
-        models = ["anthropic/claude-opus-4.6", "xiaomi/mimo-v2-pro"]
-        pricing = {
-            "anthropic/claude-opus-4.6": self._PAID,
-            "xiaomi/mimo-v2-pro": self._FREE,
-        }
-        result = filter_nous_free_models(models, pricing)
-        assert result == ["anthropic/claude-opus-4.6", "xiaomi/mimo-v2-pro"]
-
-    def test_allowlist_model_removed_when_paid(self):
-        """Allowlist models are removed when they are NOT free."""
-        models = ["anthropic/claude-opus-4.6", "xiaomi/mimo-v2-pro"]
-        pricing = {
-            "anthropic/claude-opus-4.6": self._PAID,
-            "xiaomi/mimo-v2-pro": self._PAID,
-        }
-        result = filter_nous_free_models(models, pricing)
-        assert result == ["anthropic/claude-opus-4.6"]
-
-    def test_no_pricing_returns_all(self):
-        """When pricing data is unavailable, all models pass through."""
-        models = ["anthropic/claude-opus-4.6", "nvidia/nemotron-3-super-120b-a12b:free"]
-        assert filter_nous_free_models(models, {}) == models
-
-    def test_model_with_no_pricing_entry_treated_as_paid(self):
-        """A model missing from the pricing dict is kept (assumed paid)."""
-        models = ["anthropic/claude-opus-4.6", "openai/gpt-5.4"]
-        pricing = {"anthropic/claude-opus-4.6": self._PAID}  # gpt-5.4 not in pricing
-        result = filter_nous_free_models(models, pricing)
-        assert result == models
-
-    def test_mixed_scenario(self):
-        """End-to-end: mix of paid, free-allowed, free-disallowed, allowlist-not-free."""
-        models = [
-            "anthropic/claude-opus-4.6",       # paid, not allowlist → keep
-            "nvidia/nemotron-3-super-120b-a12b:free",  # free, not allowlist → drop
-            "xiaomi/mimo-v2-pro",              # free, allowlist → keep
-            "xiaomi/mimo-v2-omni",             # paid, allowlist → drop
-            "openai/gpt-5.4",                  # paid, not allowlist → keep
-        ]
-        pricing = {
-            "anthropic/claude-opus-4.6": self._PAID,
-            "nvidia/nemotron-3-super-120b-a12b:free": self._FREE,
-            "xiaomi/mimo-v2-pro": self._FREE,
-            "xiaomi/mimo-v2-omni": self._PAID,
-            "openai/gpt-5.4": self._PAID,
-        }
-        result = filter_nous_free_models(models, pricing)
-        assert result == [
-            "anthropic/claude-opus-4.6",
-            "xiaomi/mimo-v2-pro",
-            "openai/gpt-5.4",
-        ]
-
-    def test_allowlist_contains_expected_models(self):
-        """Sanity: the allowlist has the models we expect."""
-        assert "xiaomi/mimo-v2-pro" in _NOUS_ALLOWED_FREE_MODELS
-        assert "xiaomi/mimo-v2-omni" in _NOUS_ALLOWED_FREE_MODELS
-
-
 class TestIsNousFreeTier:
     """Tests for is_nous_free_tier — account tier detection."""
 
@@ -376,3 +417,190 @@ class TestCheckNousFreeTierCache:
     def test_cache_ttl_is_short(self):
         """TTL should be short enough to catch upgrades quickly (<=5 min)."""
         assert _FREE_TIER_CACHE_TTL <= 300
+
+
+class TestNousRecommendedModels:
+    """Tests for fetch_nous_recommended_models + get_nous_recommended_aux_model."""
+
+    _SAMPLE_PAYLOAD = {
+        "paidRecommendedModels": [],
+        "freeRecommendedModels": [],
+        "paidRecommendedCompactionModel": None,
+        "paidRecommendedVisionModel": None,
+        "freeRecommendedCompactionModel": {
+            "modelName": "google/gemini-3-flash-preview",
+            "displayName": "Google: Gemini 3 Flash Preview",
+        },
+        "freeRecommendedVisionModel": {
+            "modelName": "google/gemini-3-flash-preview",
+            "displayName": "Google: Gemini 3 Flash Preview",
+        },
+    }
+
+    def setup_method(self):
+        _models_mod._nous_recommended_cache.clear()
+
+    def teardown_method(self):
+        _models_mod._nous_recommended_cache.clear()
+
+    def _mock_urlopen(self, payload):
+        """Return a context-manager mock mimicking urllib.request.urlopen()."""
+        import json as _json
+        response = MagicMock()
+        response.read.return_value = _json.dumps(payload).encode()
+        cm = MagicMock()
+        cm.__enter__.return_value = response
+        cm.__exit__.return_value = False
+        return cm
+
+    def test_fetch_caches_per_portal_url(self):
+        from hermes_cli.models import fetch_nous_recommended_models
+        mock_cm = self._mock_urlopen(self._SAMPLE_PAYLOAD)
+        with patch("urllib.request.urlopen", return_value=mock_cm) as mock_urlopen:
+            a = fetch_nous_recommended_models("https://portal.example.com")
+            b = fetch_nous_recommended_models("https://portal.example.com")
+        assert a == self._SAMPLE_PAYLOAD
+        assert b == self._SAMPLE_PAYLOAD
+        assert mock_urlopen.call_count == 1  # second call served from cache
+
+    def test_fetch_cache_is_keyed_per_portal(self):
+        from hermes_cli.models import fetch_nous_recommended_models
+        mock_cm = self._mock_urlopen(self._SAMPLE_PAYLOAD)
+        with patch("urllib.request.urlopen", return_value=mock_cm) as mock_urlopen:
+            fetch_nous_recommended_models("https://portal.example.com")
+            fetch_nous_recommended_models("https://portal.staging-nousresearch.com")
+        assert mock_urlopen.call_count == 2  # different portals → separate fetches
+
+    def test_fetch_returns_empty_on_network_failure(self):
+        from hermes_cli.models import fetch_nous_recommended_models
+        with patch("urllib.request.urlopen", side_effect=OSError("boom")):
+            result = fetch_nous_recommended_models("https://portal.example.com")
+        assert result == {}
+
+    def test_fetch_force_refresh_bypasses_cache(self):
+        from hermes_cli.models import fetch_nous_recommended_models
+        mock_cm = self._mock_urlopen(self._SAMPLE_PAYLOAD)
+        with patch("urllib.request.urlopen", return_value=mock_cm) as mock_urlopen:
+            fetch_nous_recommended_models("https://portal.example.com")
+            fetch_nous_recommended_models("https://portal.example.com", force_refresh=True)
+        assert mock_urlopen.call_count == 2
+
+    def test_get_aux_model_returns_vision_recommendation(self):
+        from hermes_cli.models import get_nous_recommended_aux_model
+        with patch(
+            "hermes_cli.models.fetch_nous_recommended_models",
+            return_value=self._SAMPLE_PAYLOAD,
+        ):
+            # Free tier → free vision recommendation.
+            model = get_nous_recommended_aux_model(vision=True, free_tier=True)
+        assert model == "google/gemini-3-flash-preview"
+
+    def test_get_aux_model_returns_compaction_recommendation(self):
+        from hermes_cli.models import get_nous_recommended_aux_model
+        payload = dict(self._SAMPLE_PAYLOAD)
+        payload["freeRecommendedCompactionModel"] = {"modelName": "minimax/minimax-m2.7"}
+        with patch(
+            "hermes_cli.models.fetch_nous_recommended_models",
+            return_value=payload,
+        ):
+            model = get_nous_recommended_aux_model(vision=False, free_tier=True)
+        assert model == "minimax/minimax-m2.7"
+
+    def test_get_aux_model_returns_none_when_field_null(self):
+        from hermes_cli.models import get_nous_recommended_aux_model
+        payload = dict(self._SAMPLE_PAYLOAD)
+        payload["freeRecommendedCompactionModel"] = None
+        with patch(
+            "hermes_cli.models.fetch_nous_recommended_models",
+            return_value=payload,
+        ):
+            model = get_nous_recommended_aux_model(vision=False, free_tier=True)
+        assert model is None
+
+    def test_get_aux_model_returns_none_on_empty_payload(self):
+        from hermes_cli.models import get_nous_recommended_aux_model
+        with patch("hermes_cli.models.fetch_nous_recommended_models", return_value={}):
+            assert get_nous_recommended_aux_model(vision=False, free_tier=True) is None
+            assert get_nous_recommended_aux_model(vision=True, free_tier=False) is None
+
+    def test_get_aux_model_returns_none_when_modelname_blank(self):
+        from hermes_cli.models import get_nous_recommended_aux_model
+        payload = {"freeRecommendedCompactionModel": {"modelName": "  "}}
+        with patch(
+            "hermes_cli.models.fetch_nous_recommended_models",
+            return_value=payload,
+        ):
+            assert get_nous_recommended_aux_model(vision=False, free_tier=True) is None
+
+    def test_paid_tier_prefers_paid_recommendation(self):
+        """Paid-tier users should get the paid model when it's populated."""
+        from hermes_cli.models import get_nous_recommended_aux_model
+        payload = {
+            "paidRecommendedCompactionModel": {"modelName": "anthropic/claude-opus-4.7"},
+            "freeRecommendedCompactionModel": {"modelName": "google/gemini-3-flash-preview"},
+            "paidRecommendedVisionModel": {"modelName": "openai/gpt-5.4"},
+            "freeRecommendedVisionModel": {"modelName": "google/gemini-3-flash-preview"},
+        }
+        with patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload):
+            text = get_nous_recommended_aux_model(vision=False, free_tier=False)
+            vision = get_nous_recommended_aux_model(vision=True, free_tier=False)
+        assert text == "anthropic/claude-opus-4.7"
+        assert vision == "openai/gpt-5.4"
+
+    def test_paid_tier_falls_back_to_free_when_paid_is_null(self):
+        """If the Portal returns null for the paid field, fall back to free."""
+        from hermes_cli.models import get_nous_recommended_aux_model
+        payload = {
+            "paidRecommendedCompactionModel": None,
+            "freeRecommendedCompactionModel": {"modelName": "google/gemini-3-flash-preview"},
+            "paidRecommendedVisionModel": None,
+            "freeRecommendedVisionModel": {"modelName": "google/gemini-3-flash-preview"},
+        }
+        with patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload):
+            text = get_nous_recommended_aux_model(vision=False, free_tier=False)
+            vision = get_nous_recommended_aux_model(vision=True, free_tier=False)
+        assert text == "google/gemini-3-flash-preview"
+        assert vision == "google/gemini-3-flash-preview"
+
+    def test_free_tier_never_uses_paid_recommendation(self):
+        """Free-tier users must not get paid-only recommendations."""
+        from hermes_cli.models import get_nous_recommended_aux_model
+        payload = {
+            "paidRecommendedCompactionModel": {"modelName": "anthropic/claude-opus-4.7"},
+            "freeRecommendedCompactionModel": None,  # no free recommendation
+        }
+        with patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload):
+            model = get_nous_recommended_aux_model(vision=False, free_tier=True)
+        # Free tier must return None — never leak the paid model.
+        assert model is None
+
+    def test_auto_detects_tier_when_not_supplied(self):
+        """Default behaviour: call check_nous_free_tier() to pick the tier."""
+        from hermes_cli.models import get_nous_recommended_aux_model
+        payload = {
+            "paidRecommendedCompactionModel": {"modelName": "paid-model"},
+            "freeRecommendedCompactionModel": {"modelName": "free-model"},
+        }
+        with (
+            patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload),
+            patch("hermes_cli.models.check_nous_free_tier", return_value=True),
+        ):
+            assert get_nous_recommended_aux_model(vision=False) == "free-model"
+        with (
+            patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload),
+            patch("hermes_cli.models.check_nous_free_tier", return_value=False),
+        ):
+            assert get_nous_recommended_aux_model(vision=False) == "paid-model"
+
+    def test_tier_detection_error_defaults_to_paid(self):
+        """If tier detection raises, assume paid so we don't downgrade silently."""
+        from hermes_cli.models import get_nous_recommended_aux_model
+        payload = {
+            "paidRecommendedCompactionModel": {"modelName": "paid-model"},
+            "freeRecommendedCompactionModel": {"modelName": "free-model"},
+        }
+        with (
+            patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload),
+            patch("hermes_cli.models.check_nous_free_tier", side_effect=RuntimeError("boom")),
+        ):
+            assert get_nous_recommended_aux_model(vision=False) == "paid-model"
diff --git a/tests/hermes_cli/test_non_ascii_credential.py b/tests/hermes_cli/test_non_ascii_credential.py
index fe39335eb6..caac425c2b 100644
--- a/tests/hermes_cli/test_non_ascii_credential.py
+++ b/tests/hermes_cli/test_non_ascii_credential.py
@@ -54,15 +54,17 @@ class TestEnvLoaderSanitization:
     """Tests for _sanitize_loaded_credentials in env_loader."""
 
     def test_strips_non_ascii_from_api_key(self, monkeypatch):
-        from hermes_cli.env_loader import _sanitize_loaded_credentials
+        from hermes_cli.env_loader import _sanitize_loaded_credentials, _WARNED_KEYS
 
+        _WARNED_KEYS.discard("OPENROUTER_API_KEY")
         monkeypatch.setenv("OPENROUTER_API_KEY", "sk-proj-abcʋdef")
         _sanitize_loaded_credentials()
         assert os.environ["OPENROUTER_API_KEY"] == "sk-proj-abcdef"
 
     def test_strips_non_ascii_from_token(self, monkeypatch):
-        from hermes_cli.env_loader import _sanitize_loaded_credentials
+        from hermes_cli.env_loader import _sanitize_loaded_credentials, _WARNED_KEYS
 
+        _WARNED_KEYS.discard("DISCORD_BOT_TOKEN")
         monkeypatch.setenv("DISCORD_BOT_TOKEN", "tokénvalue")
         _sanitize_loaded_credentials()
         assert os.environ["DISCORD_BOT_TOKEN"] == "toknvalue"
@@ -81,3 +83,51 @@ class TestEnvLoaderSanitization:
         monkeypatch.setenv("OPENAI_API_KEY", "sk-proj-allascii123")
         _sanitize_loaded_credentials()
         assert os.environ["OPENAI_API_KEY"] == "sk-proj-allascii123"
+
+    def test_warns_to_stderr_when_stripping(self, monkeypatch, capsys):
+        """Silent stripping masks bad keys as opaque provider 400s (see #6843 fallout).
+
+        Users must be told when a copy-paste artifact was removed so they
+        can re-copy the key if authentication fails.
+        """
+        from hermes_cli.env_loader import _sanitize_loaded_credentials, _WARNED_KEYS
+
+        _WARNED_KEYS.discard("GOOGLE_API_KEY")
+        monkeypatch.setenv("GOOGLE_API_KEY", "AIzaSy\u200babcdef")  # ZWSP mid-key
+        _sanitize_loaded_credentials()
+        assert os.environ["GOOGLE_API_KEY"] == "AIzaSyabcdef"
+
+        captured = capsys.readouterr()
+        assert "GOOGLE_API_KEY" in captured.err
+        assert "U+200B" in captured.err
+        assert "re-copy" in captured.err.lower()
+
+    def test_warning_fires_only_once_per_key(self, monkeypatch, capsys):
+        """Repeated loads (user env + project env) must not double-warn."""
+        from hermes_cli.env_loader import _sanitize_loaded_credentials, _WARNED_KEYS
+
+        _WARNED_KEYS.discard("GEMINI_API_KEY")
+        monkeypatch.setenv("GEMINI_API_KEY", "AIza\u028bbad")
+        _sanitize_loaded_credentials()
+        first = capsys.readouterr().err
+
+        monkeypatch.setenv("GEMINI_API_KEY", "AIza\u028bbad2")
+        _sanitize_loaded_credentials()
+        second = capsys.readouterr().err
+
+        assert "GEMINI_API_KEY" in first
+        assert second == ""  # no repeat warning
+
+    def test_ascii_control_chars_not_stripped(self, monkeypatch, capsys):
+        """ASCII control bytes (e.g. ESC 0x1B from terminal paste) are NOT non-ASCII.
+
+        This is intentional — they're valid ASCII for HTTP headers even if the
+        provider rejects them. Documents the scope of the sanitizer.
+        """
+        from hermes_cli.env_loader import _sanitize_loaded_credentials, _WARNED_KEYS
+
+        _WARNED_KEYS.clear()
+        monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant\x1bapi-key")
+        _sanitize_loaded_credentials()
+        assert os.environ["ANTHROPIC_API_KEY"] == "sk-ant\x1bapi-key"
+        assert capsys.readouterr().err == ""
diff --git a/tests/hermes_cli/test_opencode_go_in_model_list.py b/tests/hermes_cli/test_opencode_go_in_model_list.py
index a84701f09c..647ee2bee8 100644
--- a/tests/hermes_cli/test_opencode_go_in_model_list.py
+++ b/tests/hermes_cli/test_opencode_go_in_model_list.py
@@ -15,7 +15,7 @@ def test_opencode_go_appears_when_api_key_set():
     opencode_go = next((p for p in providers if p["slug"] == "opencode-go"), None)
     
     assert opencode_go is not None, "opencode-go should appear when OPENCODE_GO_API_KEY is set"
-    assert opencode_go["models"] == ["kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.7", "minimax-m2.5"]
+    assert opencode_go["models"] == ["kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.7", "minimax-m2.5"]
     # opencode-go can appear as "built-in" (from PROVIDER_TO_MODELS_DEV when
     # models.dev is reachable) or "hermes" (from HERMES_OVERLAYS fallback when
     # the API is unavailable, e.g. in CI).
diff --git a/tests/hermes_cli/test_opencode_go_validation_fallback.py b/tests/hermes_cli/test_opencode_go_validation_fallback.py
new file mode 100644
index 0000000000..f0ae76098e
--- /dev/null
+++ b/tests/hermes_cli/test_opencode_go_validation_fallback.py
@@ -0,0 +1,133 @@
+"""Tests for the static-catalog fallback in validate_requested_model.
+
+OpenCode Go and OpenCode Zen publish an OpenAI-compatible API at paths that do
+NOT expose ``/models`` (the path returns the marketing site's HTML 404).  This
+caused ``validate_requested_model`` to return ``accepted=False`` for every
+model on those providers, which in turn made ``switch_model()`` fail and the
+gateway's ``/model <name> --provider opencode-go`` command never write to
+``_session_model_overrides``.
+
+These tests cover the catalog-fallback path: when ``fetch_api_models`` returns
+``None``, the validator must consult ``provider_model_ids()`` for the provider
+(populated from ``_PROVIDER_MODELS``) rather than rejecting outright.
+"""
+
+from unittest.mock import patch
+
+from hermes_cli.models import validate_requested_model
+
+
+_UNREACHABLE_PROBE = {
+    "models": None,
+    "probed_url": "https://opencode.ai/zen/go/v1/models",
+    "resolved_base_url": "https://opencode.ai/zen/go/v1",
+    "suggested_base_url": None,
+    "used_fallback": False,
+}
+
+
+def _patched(func):
+    """Decorator: force fetch_api_models / probe_api_models to simulate an
+    unreachable /models endpoint, proving the catalog path is used."""
+    def wrapper(*args, **kwargs):
+        with patch("hermes_cli.models.fetch_api_models", return_value=None), \
+             patch("hermes_cli.models.probe_api_models", return_value=_UNREACHABLE_PROBE):
+            return func(*args, **kwargs)
+    wrapper.__name__ = func.__name__
+    return wrapper
+
+
+# ---------------------------------------------------------------------------
+# opencode-go: curated catalog in _PROVIDER_MODELS
+# ---------------------------------------------------------------------------
+
+
+@_patched
+def test_opencode_go_known_model_accepted():
+    """A model present in the opencode-go curated catalog must be accepted
+    even when /models is unreachable."""
+    result = validate_requested_model("kimi-k2.6", "opencode-go")
+    assert result["accepted"] is True
+    assert result["persist"] is True
+    assert result["recognized"] is True
+    assert result["message"] is None
+
+
+@_patched
+def test_opencode_go_known_model_case_insensitive():
+    """Catalog lookup is case-insensitive."""
+    result = validate_requested_model("KIMI-K2.6", "opencode-go")
+    assert result["accepted"] is True
+    assert result["recognized"] is True
+
+
+@_patched
+def test_opencode_go_typo_auto_corrected():
+    """A close typo (>= 0.9 similarity) is auto-corrected to the catalog
+    entry."""
+    # 'kimi-k2.55' vs 'kimi-k2.5' ratio ≈ 0.95 — within the 0.9 cutoff.
+    result = validate_requested_model("kimi-k2.55", "opencode-go")
+    assert result["accepted"] is True
+    assert result["recognized"] is True
+    assert result.get("corrected_model") == "kimi-k2.5"
+
+
+@_patched
+def test_opencode_go_unknown_model_accepted_with_suggestion():
+    """An unknown model that has a medium-similarity match (>= 0.5 but < 0.9)
+    is accepted with recognized=False and a 'similar models' hint.  The key
+    invariant: the gateway MUST be able to persist this override, so
+    accepted/persist must both be True."""
+    # 'kimi-k3-preview' vs 'kimi-k2.6' — similar enough to suggest, not to auto-correct.
+    result = validate_requested_model("kimi-k3-preview", "opencode-go")
+    assert result["accepted"] is True
+    assert result["persist"] is True
+    assert result["recognized"] is False
+    assert "kimi-k3-preview" in result["message"]
+    assert "curated catalog" in result["message"]
+
+
+@_patched
+def test_opencode_go_totally_unknown_model_still_accepted():
+    """A model with zero similarity to the catalog is still accepted (no
+    suggestion line) so the user can try a model that hasn't made it into the
+    curated list yet."""
+    result = validate_requested_model("some-brand-new-model", "opencode-go")
+    assert result["accepted"] is True
+    assert result["persist"] is True
+    assert result["recognized"] is False
+    # No suggestion text (no close matches)
+    assert "Similar models" not in result["message"]
+    assert "opencode" in result["message"].lower() or "opencode go" in result["message"].lower()
+
+
+# ---------------------------------------------------------------------------
+# opencode-zen: same pattern as opencode-go
+# ---------------------------------------------------------------------------
+
+
+@_patched
+def test_opencode_zen_known_model_accepted():
+    """opencode-zen also uses _PROVIDER_MODELS; kimi-k2 is in its catalog."""
+    result = validate_requested_model("kimi-k2", "opencode-zen")
+    assert result["accepted"] is True
+    assert result["recognized"] is True
+
+
+# ---------------------------------------------------------------------------
+# Unknown provider with no catalog: soft-accept (honors the comment's intent)
+# ---------------------------------------------------------------------------
+
+
+@_patched
+def test_provider_without_catalog_accepts_with_warning():
+    """When a provider has no entry in _PROVIDER_MODELS and /models is
+    unreachable, accept the model with a 'Note:' warning rather than reject.
+    This matches the in-code comment: 'Accept and persist, but warn so typos
+    don't silently break things.'"""
+    # Use a made-up provider name that won't resolve to any catalog.
+    result = validate_requested_model("some-model", "provider-that-does-not-exist")
+    assert result["accepted"] is True
+    assert result["persist"] is True
+    assert result["recognized"] is False
+    assert "Note:" in result["message"]
diff --git a/tests/hermes_cli/test_plugin_scanner_recursion.py b/tests/hermes_cli/test_plugin_scanner_recursion.py
new file mode 100644
index 0000000000..b6e2641681
--- /dev/null
+++ b/tests/hermes_cli/test_plugin_scanner_recursion.py
@@ -0,0 +1,357 @@
+"""Tests for PR1 pluggable image gen: scanner recursion, kinds, path keys.
+
+Covers ``_scan_directory`` recursion into category namespaces
+(``plugins/image_gen/openai/``), ``kind`` parsing, path-derived registry
+keys, and the new gate logic (bundled backends auto-load; user backends
+still opt-in; exclusive kind skipped; unknown kinds → standalone warning).
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any, Dict
+
+import pytest
+import yaml
+
+from hermes_cli.plugins import PluginManager, PluginManifest
+
+
+# ── Helpers ────────────────────────────────────────────────────────────────
+
+
+def _write_plugin(
+    root: Path,
+    segments: list[str],
+    *,
+    manifest_extra: Dict[str, Any] | None = None,
+    register_body: str = "pass",
+) -> Path:
+    """Create a plugin dir at ``root/<segments...>/`` with plugin.yaml + __init__.py.
+
+    ``segments`` lets tests build both flat (``["my-plugin"]``) and
+    category-namespaced (``["image_gen", "openai"]``) layouts.
+    """
+    plugin_dir = root
+    for seg in segments:
+        plugin_dir = plugin_dir / seg
+    plugin_dir.mkdir(parents=True, exist_ok=True)
+
+    manifest = {
+        "name": segments[-1],
+        "version": "0.1.0",
+        "description": f"Test plugin {'/'.join(segments)}",
+    }
+    if manifest_extra:
+        manifest.update(manifest_extra)
+    (plugin_dir / "plugin.yaml").write_text(yaml.dump(manifest))
+    (plugin_dir / "__init__.py").write_text(
+        f"def register(ctx):\n    {register_body}\n"
+    )
+    return plugin_dir
+
+
+def _enable(hermes_home: Path, name: str) -> None:
+    """Append ``name`` to ``plugins.enabled`` in ``<hermes_home>/config.yaml``."""
+    cfg_path = hermes_home / "config.yaml"
+    cfg: dict = {}
+    if cfg_path.exists():
+        try:
+            cfg = yaml.safe_load(cfg_path.read_text()) or {}
+        except Exception:
+            cfg = {}
+    plugins_cfg = cfg.setdefault("plugins", {})
+    enabled = plugins_cfg.setdefault("enabled", [])
+    if isinstance(enabled, list) and name not in enabled:
+        enabled.append(name)
+    cfg_path.write_text(yaml.safe_dump(cfg))
+
+
+# ── Scanner recursion ──────────────────────────────────────────────────────
+
+
+class TestCategoryNamespaceRecursion:
+    def test_category_namespace_discovered(self, tmp_path, monkeypatch):
+        """``<root>/image_gen/openai/plugin.yaml`` is discovered with key
+        ``image_gen/openai`` when the ``image_gen`` parent has no manifest."""
+        import os
+        hermes_home = Path(os.environ["HERMES_HOME"])  # set by hermetic conftest fixture
+        user_plugins = hermes_home / "plugins"
+
+        _write_plugin(user_plugins, ["image_gen", "openai"])
+        _enable(hermes_home, "image_gen/openai")
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        assert "image_gen/openai" in mgr._plugins
+        loaded = mgr._plugins["image_gen/openai"]
+        assert loaded.manifest.key == "image_gen/openai"
+        assert loaded.manifest.name == "openai"
+        assert loaded.enabled is True
+
+    def test_flat_plugin_key_matches_name(self, tmp_path, monkeypatch):
+        """Flat plugins keep their bare name as the key (back-compat)."""
+        import os
+        hermes_home = Path(os.environ["HERMES_HOME"])  # set by hermetic conftest fixture
+        user_plugins = hermes_home / "plugins"
+
+        _write_plugin(user_plugins, ["my-plugin"])
+        _enable(hermes_home, "my-plugin")
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        assert "my-plugin" in mgr._plugins
+        assert mgr._plugins["my-plugin"].manifest.key == "my-plugin"
+
+    def test_depth_cap_two(self, tmp_path, monkeypatch):
+        """Plugins nested three levels deep are not discovered.
+
+        ``<root>/a/b/c/plugin.yaml`` should NOT be picked up — cap is
+        two segments.
+        """
+        import os
+        hermes_home = Path(os.environ["HERMES_HOME"])  # set by hermetic conftest fixture
+        user_plugins = hermes_home / "plugins"
+
+        _write_plugin(user_plugins, ["a", "b", "c"])
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        non_bundled = [
+            k for k, p in mgr._plugins.items()
+            if p.manifest.source != "bundled"
+        ]
+        assert non_bundled == []
+
+    def test_category_dir_with_manifest_is_leaf(self, tmp_path, monkeypatch):
+        """If ``image_gen/plugin.yaml`` exists, ``image_gen`` itself IS the
+        plugin and its children are ignored."""
+        import os
+        hermes_home = Path(os.environ["HERMES_HOME"])  # set by hermetic conftest fixture
+        user_plugins = hermes_home / "plugins"
+
+        # parent has a manifest → stop recursing
+        _write_plugin(user_plugins, ["image_gen"])
+        # child also has a manifest — should NOT be found because we stop
+        # at the parent.
+        _write_plugin(user_plugins, ["image_gen", "openai"])
+        _enable(hermes_home, "image_gen")
+        _enable(hermes_home, "image_gen/openai")
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        # The bundled plugins/image_gen/openai/ exists in the repo — filter
+        # it out so we're only asserting on the user-dir layout.
+        user_plugins_in_registry = {
+            k for k, p in mgr._plugins.items() if p.manifest.source != "bundled"
+        }
+        assert "image_gen" in user_plugins_in_registry
+        assert "image_gen/openai" not in user_plugins_in_registry
+
+
+# ── Kind parsing ───────────────────────────────────────────────────────────
+
+
+class TestKindField:
+    def test_default_kind_is_standalone(self, tmp_path, monkeypatch):
+        import os
+        hermes_home = Path(os.environ["HERMES_HOME"])  # set by hermetic conftest fixture
+        _write_plugin(hermes_home / "plugins", ["p1"])
+        _enable(hermes_home, "p1")
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        assert mgr._plugins["p1"].manifest.kind == "standalone"
+
+    @pytest.mark.parametrize("kind", ["backend", "exclusive", "standalone"])
+    def test_valid_kinds_parsed(self, kind, tmp_path, monkeypatch):
+        import os
+        hermes_home = Path(os.environ["HERMES_HOME"])  # set by hermetic conftest fixture
+        _write_plugin(
+            hermes_home / "plugins",
+            ["p1"],
+            manifest_extra={"kind": kind},
+        )
+        # Not all kinds auto-load, but manifest should parse.
+        _enable(hermes_home, "p1")
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        assert "p1" in mgr._plugins
+        assert mgr._plugins["p1"].manifest.kind == kind
+
+    def test_unknown_kind_falls_back_to_standalone(self, tmp_path, monkeypatch, caplog):
+        import os
+        hermes_home = Path(os.environ["HERMES_HOME"])  # set by hermetic conftest fixture
+        _write_plugin(
+            hermes_home / "plugins",
+            ["p1"],
+            manifest_extra={"kind": "bogus"},
+        )
+        _enable(hermes_home, "p1")
+
+        with caplog.at_level("WARNING"):
+            mgr = PluginManager()
+            mgr.discover_and_load()
+
+        assert mgr._plugins["p1"].manifest.kind == "standalone"
+        assert any(
+            "unknown kind" in rec.getMessage() for rec in caplog.records
+        )
+
+
+# ── Gate logic ─────────────────────────────────────────────────────────────
+
+
+class TestBackendGate:
+    def test_user_backend_still_gated_by_enabled(self, tmp_path, monkeypatch):
+        """User-installed ``kind: backend`` plugins still require opt-in —
+        they're not trusted by default."""
+        import os
+        hermes_home = Path(os.environ["HERMES_HOME"])  # set by hermetic conftest fixture
+        user_plugins = hermes_home / "plugins"
+
+        _write_plugin(
+            user_plugins,
+            ["image_gen", "fancy"],
+            manifest_extra={"kind": "backend"},
+        )
+        # Do NOT opt in.
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        loaded = mgr._plugins["image_gen/fancy"]
+        assert loaded.enabled is False
+        assert "not enabled" in (loaded.error or "")
+
+    def test_user_backend_loads_when_enabled(self, tmp_path, monkeypatch):
+        import os
+        hermes_home = Path(os.environ["HERMES_HOME"])  # set by hermetic conftest fixture
+        user_plugins = hermes_home / "plugins"
+
+        _write_plugin(
+            user_plugins,
+            ["image_gen", "fancy"],
+            manifest_extra={"kind": "backend"},
+        )
+        _enable(hermes_home, "image_gen/fancy")
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        assert mgr._plugins["image_gen/fancy"].enabled is True
+
+    def test_exclusive_kind_skipped(self, tmp_path, monkeypatch):
+        """``kind: exclusive`` plugins are recorded but not loaded — the
+        category's own discovery system handles them (memory today)."""
+        import os
+        hermes_home = Path(os.environ["HERMES_HOME"])  # set by hermetic conftest fixture
+        _write_plugin(
+            hermes_home / "plugins",
+            ["some-backend"],
+            manifest_extra={"kind": "exclusive"},
+        )
+        _enable(hermes_home, "some-backend")
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        loaded = mgr._plugins["some-backend"]
+        assert loaded.enabled is False
+        assert "exclusive" in (loaded.error or "")
+
+
+# ── Bundled backend auto-load (integration with real bundled plugin) ────────
+
+
+class TestBundledBackendAutoLoad:
+    def test_bundled_image_gen_openai_autoloads(self, tmp_path, monkeypatch):
+        """The bundled ``plugins/image_gen/openai/`` plugin loads without
+        any opt-in — it's ``kind: backend`` and shipped in-repo."""
+        import os
+        hermes_home = Path(os.environ["HERMES_HOME"])  # set by hermetic conftest fixture
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        assert "image_gen/openai" in mgr._plugins
+        loaded = mgr._plugins["image_gen/openai"]
+        assert loaded.manifest.source == "bundled"
+        assert loaded.manifest.kind == "backend"
+        assert loaded.enabled is True, f"error: {loaded.error}"
+
+
+# ── PluginContext.register_image_gen_provider ───────────────────────────────
+
+
+class TestRegisterImageGenProvider:
+    def test_accepts_valid_provider(self, tmp_path, monkeypatch):
+        from agent import image_gen_registry
+        from agent.image_gen_provider import ImageGenProvider
+
+        image_gen_registry._reset_for_tests()
+
+        class FakeProvider(ImageGenProvider):
+            @property
+            def name(self) -> str:
+                return "fake-test"
+
+            def generate(self, prompt, aspect_ratio="landscape", **kw):
+                return {"success": True, "image": "test://fake"}
+
+        import os
+        hermes_home = Path(os.environ["HERMES_HOME"])  # set by hermetic conftest fixture
+        plugin_dir = _write_plugin(
+            hermes_home / "plugins",
+            ["my-img-plugin"],
+            register_body=(
+                "from agent.image_gen_provider import ImageGenProvider\n"
+                "    class P(ImageGenProvider):\n"
+                "        @property\n"
+                "        def name(self): return 'fake-ctx'\n"
+                "        def generate(self, prompt, aspect_ratio='landscape', **kw):\n"
+                "            return {'success': True, 'image': 'x://y'}\n"
+                "    ctx.register_image_gen_provider(P())"
+            ),
+        )
+        _enable(hermes_home, "my-img-plugin")
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        assert mgr._plugins["my-img-plugin"].enabled is True
+        assert image_gen_registry.get_provider("fake-ctx") is not None
+
+        image_gen_registry._reset_for_tests()
+
+    def test_rejects_non_provider(self, tmp_path, monkeypatch, caplog):
+        from agent import image_gen_registry
+
+        image_gen_registry._reset_for_tests()
+
+        import os
+        hermes_home = Path(os.environ["HERMES_HOME"])  # set by hermetic conftest fixture
+        _write_plugin(
+            hermes_home / "plugins",
+            ["bad-img-plugin"],
+            register_body="ctx.register_image_gen_provider('not a provider')",
+        )
+        _enable(hermes_home, "bad-img-plugin")
+
+        with caplog.at_level("WARNING"):
+            mgr = PluginManager()
+            mgr.discover_and_load()
+
+        # Plugin loaded (register returned normally) but nothing was
+        # registered in the provider registry.
+        assert mgr._plugins["bad-img-plugin"].enabled is True
+        assert image_gen_registry.get_provider("not a provider") is None
+
+        image_gen_registry._reset_for_tests()
diff --git a/tests/hermes_cli/test_plugins.py b/tests/hermes_cli/test_plugins.py
index a97340df58..04d056771b 100644
--- a/tests/hermes_cli/test_plugins.py
+++ b/tests/hermes_cli/test_plugins.py
@@ -30,8 +30,19 @@ from hermes_cli.plugins import (
 
 
 def _make_plugin_dir(base: Path, name: str, *, register_body: str = "pass",
-                     manifest_extra: dict | None = None) -> Path:
-    """Create a minimal plugin directory with plugin.yaml + __init__.py."""
+                     manifest_extra: dict | None = None,
+                     auto_enable: bool = True) -> Path:
+    """Create a minimal plugin directory with plugin.yaml + __init__.py.
+
+    If *auto_enable* is True (default), also write the plugin's name into
+    ``<hermes_home>/config.yaml`` under ``plugins.enabled``. Plugins are
+    opt-in by default, so tests that expect the plugin to actually load
+    need this. Pass ``auto_enable=False`` for tests that exercise the
+    unenabled path.
+
+    *base* is expected to be ``<hermes_home>/plugins/``; we derive
+    ``<hermes_home>`` from it by walking one level up.
+    """
     plugin_dir = base / name
     plugin_dir.mkdir(parents=True, exist_ok=True)
 
@@ -43,6 +54,31 @@ def _make_plugin_dir(base: Path, name: str, *, register_body: str = "pass",
     (plugin_dir / "__init__.py").write_text(
         f"def register(ctx):\n    {register_body}\n"
     )
+
+    if auto_enable:
+        # Write/merge plugins.enabled in <HERMES_HOME>/config.yaml.
+        # Config is always read from HERMES_HOME (not from the project
+        # dir for project plugins), so that's where we opt in.
+        import os
+        hermes_home_str = os.environ.get("HERMES_HOME")
+        if hermes_home_str:
+            hermes_home = Path(hermes_home_str)
+        else:
+            hermes_home = base.parent
+        hermes_home.mkdir(parents=True, exist_ok=True)
+        cfg_path = hermes_home / "config.yaml"
+        cfg: dict = {}
+        if cfg_path.exists():
+            try:
+                cfg = yaml.safe_load(cfg_path.read_text()) or {}
+            except Exception:
+                cfg = {}
+        plugins_cfg = cfg.setdefault("plugins", {})
+        enabled = plugins_cfg.setdefault("enabled", [])
+        if isinstance(enabled, list) and name not in enabled:
+            enabled.append(name)
+        cfg_path.write_text(yaml.safe_dump(cfg))
+
     return plugin_dir
 
 
@@ -102,7 +138,12 @@ class TestPluginDiscovery:
         mgr.discover_and_load()
         mgr.discover_and_load()  # second call should no-op
 
-        assert len(mgr._plugins) == 1
+        # Filter out bundled plugins — they're always discovered.
+        non_bundled = {
+            n: p for n, p in mgr._plugins.items()
+            if p.manifest.source != "bundled"
+        }
+        assert len(non_bundled) == 1
 
     def test_discover_skips_dir_without_manifest(self, tmp_path, monkeypatch):
         """Directories without plugin.yaml are silently skipped."""
@@ -113,7 +154,12 @@ class TestPluginDiscovery:
         mgr = PluginManager()
         mgr.discover_and_load()
 
-        assert len(mgr._plugins) == 0
+        # Filter out bundled plugins — they're always discovered.
+        non_bundled = {
+            n: p for n, p in mgr._plugins.items()
+            if p.manifest.source != "bundled"
+        }
+        assert len(non_bundled) == 0
 
     def test_entry_points_scanned(self, tmp_path, monkeypatch):
         """Entry-point based plugins are discovered (mocked)."""
@@ -152,7 +198,13 @@ class TestPluginLoading:
         plugin_dir = plugins_dir / "bad_plugin"
         plugin_dir.mkdir(parents=True)
         (plugin_dir / "plugin.yaml").write_text(yaml.dump({"name": "bad_plugin"}))
-        monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test"))
+        # Explicitly enable so the loader tries to import it and hits the
+        # missing-init error.
+        hermes_home = tmp_path / "hermes_test"
+        (hermes_home / "config.yaml").write_text(
+            yaml.safe_dump({"plugins": {"enabled": ["bad_plugin"]}})
+        )
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
 
         mgr = PluginManager()
         mgr.discover_and_load()
@@ -160,6 +212,8 @@ class TestPluginLoading:
         assert "bad_plugin" in mgr._plugins
         assert not mgr._plugins["bad_plugin"].enabled
         assert mgr._plugins["bad_plugin"].error is not None
+        # Should be the missing-init error, not "not enabled".
+        assert "not enabled" not in mgr._plugins["bad_plugin"].error
 
     def test_load_missing_register_fn(self, tmp_path, monkeypatch):
         """Plugin without register() function records an error."""
@@ -168,7 +222,12 @@ class TestPluginLoading:
         plugin_dir.mkdir(parents=True)
         (plugin_dir / "plugin.yaml").write_text(yaml.dump({"name": "no_reg"}))
         (plugin_dir / "__init__.py").write_text("# no register function\n")
-        monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test"))
+        # Explicitly enable it so the loader actually tries to import.
+        hermes_home = tmp_path / "hermes_test"
+        (hermes_home / "config.yaml").write_text(
+            yaml.safe_dump({"plugins": {"enabled": ["no_reg"]}})
+        )
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
 
         mgr = PluginManager()
         mgr.discover_and_load()
@@ -191,6 +250,73 @@ class TestPluginLoading:
 
         assert "hermes_plugins.ns_plugin" in sys.modules
 
+    def test_user_memory_plugin_auto_coerced_to_exclusive(self, tmp_path, monkeypatch):
+        """User-installed memory plugins must NOT be loaded by the general
+        PluginManager — they belong to plugins/memory discovery.
+
+        Regression test for the mempalace crash:
+            'PluginContext' object has no attribute 'register_memory_provider'
+
+        A plugin that calls ``ctx.register_memory_provider`` in its
+        ``__init__.py`` should be auto-detected and treated as
+        ``kind: exclusive`` so the general loader records the manifest but
+        does not import/register() it. The real activation happens through
+        ``plugins/memory/__init__.py`` via ``memory.provider`` config.
+        """
+        plugins_dir = tmp_path / "hermes_test" / "plugins"
+        plugin_dir = plugins_dir / "mempalace"
+        plugin_dir.mkdir(parents=True)
+        # No explicit `kind:` — the heuristic should kick in.
+        (plugin_dir / "plugin.yaml").write_text(yaml.dump({"name": "mempalace"}))
+        (plugin_dir / "__init__.py").write_text(
+            "class MemPalaceProvider:\n"
+            "    pass\n"
+            "def register(ctx):\n"
+            "    ctx.register_memory_provider('mempalace', MemPalaceProvider)\n"
+        )
+        # Even if the user explicitly enables it in config, the loader
+        # should still treat it as exclusive and skip general loading.
+        hermes_home = tmp_path / "hermes_test"
+        (hermes_home / "config.yaml").write_text(
+            yaml.safe_dump({"plugins": {"enabled": ["mempalace"]}})
+        )
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        assert "mempalace" in mgr._plugins
+        entry = mgr._plugins["mempalace"]
+        assert entry.manifest.kind == "exclusive", (
+            f"Expected auto-coerced kind='exclusive', got {entry.manifest.kind}"
+        )
+        # Not loaded by general manager (no register() call, no AttributeError).
+        assert not entry.enabled
+        assert entry.module is None
+        assert "exclusive" in (entry.error or "").lower()
+
+    def test_explicit_standalone_kind_not_coerced(self, tmp_path, monkeypatch):
+        """If a plugin explicitly declares ``kind: standalone`` in its
+        manifest, the memory-provider heuristic must NOT override it —
+        even if the source happens to mention ``MemoryProvider``.
+        """
+        plugins_dir = tmp_path / "hermes_test" / "plugins"
+        plugin_dir = plugins_dir / "not_memory"
+        plugin_dir.mkdir(parents=True)
+        (plugin_dir / "plugin.yaml").write_text(
+            yaml.dump({"name": "not_memory", "kind": "standalone"})
+        )
+        (plugin_dir / "__init__.py").write_text(
+            "# This plugin inspects MemoryProvider docs but isn't one.\n"
+            "def register(ctx):\n    pass\n"
+        )
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test"))
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        assert mgr._plugins["not_memory"].manifest.kind == "standalone"
+
 
 # ── TestPluginHooks ────────────────────────────────────────────────────────
 
@@ -201,6 +327,8 @@ class TestPluginHooks:
     def test_valid_hooks_include_request_scoped_api_hooks(self):
         assert "pre_api_request" in VALID_HOOKS
         assert "post_api_request" in VALID_HOOKS
+        assert "transform_terminal_output" in VALID_HOOKS
+        assert "transform_tool_result" in VALID_HOOKS
 
     def test_register_and_invoke_hook(self, tmp_path, monkeypatch):
         """Registered hooks are called on invoke_hook()."""
@@ -297,6 +425,30 @@ class TestPluginHooks:
         )
         assert results == [{"seen": 2, "mc": 5, "tc": 3}]
 
+    def test_transform_terminal_output_hook_can_be_registered_and_invoked(self, tmp_path, monkeypatch):
+        plugins_dir = tmp_path / "hermes_test" / "plugins"
+        _make_plugin_dir(
+            plugins_dir, "transform_hook",
+            register_body=(
+                'ctx.register_hook("transform_terminal_output", '
+                'lambda **kw: f"{kw[\'command\']}|{kw[\'returncode\']}|{kw[\'env_type\']}|{kw[\'task_id\']}|{len(kw[\'output\'])}")'
+            ),
+        )
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test"))
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        results = mgr.invoke_hook(
+            "transform_terminal_output",
+            command="echo hello",
+            output="abcdef",
+            returncode=7,
+            task_id="task-1",
+            env_type="local",
+        )
+        assert results == ["echo hello|7|local|task-1|6"]
+
     def test_invalid_hook_name_warns(self, tmp_path, monkeypatch, caplog):
         """Registering an unknown hook name logs a warning."""
         plugins_dir = tmp_path / "hermes_test" / "plugins"
@@ -378,7 +530,11 @@ class TestPluginContext:
             '        handler=lambda args, **kw: "echo",\n'
             '    )\n'
         )
-        monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test"))
+        hermes_home = tmp_path / "hermes_test"
+        (hermes_home / "config.yaml").write_text(
+            yaml.safe_dump({"plugins": {"enabled": ["tool_plugin"]}})
+        )
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
 
         mgr = PluginManager()
         mgr.discover_and_load()
@@ -412,7 +568,11 @@ class TestPluginToolVisibility:
             '        handler=lambda args, **kw: "ok",\n'
             '    )\n'
         )
-        monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test"))
+        hermes_home = tmp_path / "hermes_test"
+        (hermes_home / "config.yaml").write_text(
+            yaml.safe_dump({"plugins": {"enabled": ["vis_plugin"]}})
+        )
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
 
         mgr = PluginManager()
         mgr.discover_and_load()
@@ -702,6 +862,81 @@ class TestPluginCommands:
             assert "cmd-b" in cmds
             assert cmds["cmd-a"]["description"] == "A"
 
+    def test_get_plugin_command_handler_discovers_plugins_lazily(self, tmp_path, monkeypatch):
+        """Handler lookup should work before any explicit discover_plugins() call."""
+        plugins_dir = tmp_path / "hermes_test" / "plugins"
+        _make_plugin_dir(
+            plugins_dir,
+            "cmd-plugin",
+            register_body='ctx.register_command("lazycmd", lambda a: f"ok:{a}", description="Lazy")',
+        )
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test"))
+
+        import hermes_cli.plugins as plugins_mod
+
+        with patch.object(plugins_mod, "_plugin_manager", None):
+            handler = get_plugin_command_handler("lazycmd")
+            assert handler is not None
+            assert handler("x") == "ok:x"
+
+    def test_get_plugin_commands_discovers_plugins_lazily(self, tmp_path, monkeypatch):
+        """Command listing should trigger plugin discovery on first access."""
+        plugins_dir = tmp_path / "hermes_test" / "plugins"
+        _make_plugin_dir(
+            plugins_dir,
+            "cmd-plugin",
+            register_body='ctx.register_command("lazycmd", lambda a: a, description="Lazy")',
+        )
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test"))
+
+        import hermes_cli.plugins as plugins_mod
+
+        with patch.object(plugins_mod, "_plugin_manager", None):
+            cmds = get_plugin_commands()
+            assert "lazycmd" in cmds
+            assert cmds["lazycmd"]["description"] == "Lazy"
+
+    def test_get_plugin_context_engine_discovers_plugins_lazily(self, tmp_path, monkeypatch):
+        """Context engine lookup should work before any explicit discover_plugins() call."""
+        hermes_home = tmp_path / "hermes_test"
+        plugins_dir = hermes_home / "plugins"
+        plugin_dir = plugins_dir / "engine-plugin"
+        plugin_dir.mkdir(parents=True, exist_ok=True)
+        (plugin_dir / "plugin.yaml").write_text(
+            yaml.dump({
+                "name": "engine-plugin",
+                "version": "0.1.0",
+                "description": "Test engine plugin",
+            })
+        )
+        (plugin_dir / "__init__.py").write_text(
+            "from agent.context_engine import ContextEngine\n\n"
+            "class StubEngine(ContextEngine):\n"
+            "    @property\n"
+            "    def name(self):\n"
+            "        return 'stub-engine'\n\n"
+            "    def update_from_response(self, usage):\n"
+            "        return None\n\n"
+            "    def should_compress(self, prompt_tokens):\n"
+            "        return False\n\n"
+            "    def compress(self, messages, current_tokens):\n"
+            "        return messages\n\n"
+            "def register(ctx):\n"
+            "    ctx.register_context_engine(StubEngine())\n"
+        )
+        # Opt-in: plugins are opt-in by default, so enable in config.yaml
+        (hermes_home / "config.yaml").write_text(
+            yaml.safe_dump({"plugins": {"enabled": ["engine-plugin"]}})
+        )
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+        import hermes_cli.plugins as plugins_mod
+
+        with patch.object(plugins_mod, "_plugin_manager", None):
+            engine = plugins_mod.get_plugin_context_engine()
+            assert engine is not None
+            assert engine.name == "stub-engine"
+
     def test_commands_tracked_on_loaded_plugin(self, tmp_path, monkeypatch):
         """Commands registered during discover_and_load() are tracked on LoadedPlugin."""
         plugins_dir = tmp_path / "hermes_test" / "plugins"
@@ -723,20 +958,24 @@ class TestPluginCommands:
     def test_commands_in_list_plugins_output(self, tmp_path, monkeypatch):
         """list_plugins() includes command count."""
         plugins_dir = tmp_path / "hermes_test" / "plugins"
+        # Set HERMES_HOME BEFORE _make_plugin_dir so auto-enable targets
+        # the right config.yaml.
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test"))
         _make_plugin_dir(
             plugins_dir, "cmd-plugin",
             register_body=(
                 'ctx.register_command("mycmd", lambda a: "ok", description="Test")'
             ),
         )
-        monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test"))
 
         mgr = PluginManager()
         mgr.discover_and_load()
 
         info = mgr.list_plugins()
-        assert len(info) == 1
-        assert info[0]["commands"] == 1
+        # Filter out bundled plugins — they're always discovered.
+        cmd_info = [p for p in info if p["name"] == "cmd-plugin"]
+        assert len(cmd_info) == 1
+        assert cmd_info[0]["commands"] == 1
 
     def test_handler_receives_raw_args(self):
         """The handler is called with the raw argument string."""
diff --git a/tests/hermes_cli/test_provider_config_validation.py b/tests/hermes_cli/test_provider_config_validation.py
new file mode 100644
index 0000000000..775e3284c6
--- /dev/null
+++ b/tests/hermes_cli/test_provider_config_validation.py
@@ -0,0 +1,137 @@
+"""Tests for providers config entry validation and normalization.
+
+Covers Issue #9332: camelCase keys silently ignored, non-URL strings
+accepted as base_url, and unknown keys go unreported.
+"""
+
+import logging
+from unittest.mock import patch
+
+import pytest
+
+from hermes_cli.config import _normalize_custom_provider_entry
+
+
+class TestNormalizeCustomProviderEntry:
+    """Tests for _normalize_custom_provider_entry validation."""
+
+    def test_valid_entry_snake_case(self):
+        """Standard snake_case entry should normalize correctly."""
+        entry = {
+            "base_url": "https://api.example.com/v1",
+            "api_key": "sk-test-key",
+        }
+        result = _normalize_custom_provider_entry(entry, provider_key="myhost")
+        assert result is not None
+        assert result["name"] == "myhost"
+        assert result["base_url"] == "https://api.example.com/v1"
+        assert result["api_key"] == "sk-test-key"
+
+    def test_camel_case_api_key_mapped(self):
+        """camelCase apiKey should be auto-mapped to api_key."""
+        entry = {
+            "base_url": "https://api.example.com/v1",
+            "apiKey": "sk-test-key",
+        }
+        result = _normalize_custom_provider_entry(entry, provider_key="myhost")
+        assert result is not None
+        assert result["api_key"] == "sk-test-key"
+
+    def test_camel_case_base_url_mapped(self):
+        """camelCase baseUrl should be auto-mapped to base_url."""
+        entry = {
+            "baseUrl": "https://api.example.com/v1",
+            "api_key": "sk-test-key",
+        }
+        result = _normalize_custom_provider_entry(entry, provider_key="myhost")
+        assert result is not None
+        assert result["base_url"] == "https://api.example.com/v1"
+
+    def test_non_url_api_field_rejected(self):
+        """Non-URL string in 'api' field should be skipped with a warning."""
+        entry = {
+            "api": "openai-reverse-proxy",
+            "api_key": "sk-test-key",
+        }
+        result = _normalize_custom_provider_entry(entry, provider_key="nvidia")
+        # Should return None because no valid URL was found
+        assert result is None
+
+    def test_valid_url_in_api_field_accepted(self):
+        """Valid URL in 'api' field should still be accepted."""
+        entry = {
+            "api": "https://integrate.api.nvidia.com/v1",
+            "api_key": "sk-test-key",
+        }
+        result = _normalize_custom_provider_entry(entry, provider_key="nvidia")
+        assert result is not None
+        assert result["base_url"] == "https://integrate.api.nvidia.com/v1"
+
+    def test_base_url_preferred_over_api(self):
+        """base_url should be checked before api field."""
+        entry = {
+            "base_url": "https://correct.example.com/v1",
+            "api": "https://wrong.example.com/v1",
+            "api_key": "sk-test-key",
+        }
+        result = _normalize_custom_provider_entry(entry, provider_key="test")
+        assert result is not None
+        assert result["base_url"] == "https://correct.example.com/v1"
+
+    def test_unknown_keys_logged(self, caplog):
+        """Unknown config keys should produce a warning."""
+        entry = {
+            "base_url": "https://api.example.com/v1",
+            "api_key": "sk-test-key",
+            "unknownField": "value",
+            "anotherBad": 42,
+        }
+        with caplog.at_level(logging.WARNING):
+            result = _normalize_custom_provider_entry(entry, provider_key="test")
+        assert result is not None
+        assert any("unknown config keys" in r.message.lower() for r in caplog.records)
+
+    def test_camel_case_warning_logged(self, caplog):
+        """camelCase alias mapping should produce a warning."""
+        entry = {
+            "baseUrl": "https://api.example.com/v1",
+            "apiKey": "sk-test-key",
+        }
+        with caplog.at_level(logging.WARNING):
+            result = _normalize_custom_provider_entry(entry, provider_key="test")
+        assert result is not None
+        camel_warnings = [r for r in caplog.records if "camelcase" in r.message.lower() or "auto-mapped" in r.message.lower()]
+        assert len(camel_warnings) >= 1
+
+    def test_snake_case_takes_precedence_over_camel(self):
+        """If both snake_case and camelCase exist, snake_case wins."""
+        entry = {
+            "api_key": "snake-key",
+            "apiKey": "camel-key",
+            "base_url": "https://api.example.com/v1",
+        }
+        result = _normalize_custom_provider_entry(entry, provider_key="test")
+        assert result is not None
+        assert result["api_key"] == "snake-key"
+
+    def test_non_dict_returns_none(self):
+        """Non-dict entry should return None."""
+        assert _normalize_custom_provider_entry("not-a-dict") is None
+        assert _normalize_custom_provider_entry(42) is None
+        assert _normalize_custom_provider_entry(None) is None
+
+    def test_no_url_returns_none(self):
+        """Entry with no valid URL in any field should return None."""
+        entry = {
+            "api_key": "sk-test-key",
+        }
+        result = _normalize_custom_provider_entry(entry, provider_key="test")
+        assert result is None
+
+    def test_no_name_returns_none(self):
+        """Entry with no name and no provider_key should return None."""
+        entry = {
+            "base_url": "https://api.example.com/v1",
+        }
+        result = _normalize_custom_provider_entry(entry, provider_key="")
+        assert result is None
diff --git a/tests/hermes_cli/test_runtime_provider_resolution.py b/tests/hermes_cli/test_runtime_provider_resolution.py
index c7510a55b8..9d2232f39c 100644
--- a/tests/hermes_cli/test_runtime_provider_resolution.py
+++ b/tests/hermes_cli/test_runtime_provider_resolution.py
@@ -1412,3 +1412,90 @@ def test_named_custom_runtime_no_model_when_absent(monkeypatch):
 
     resolved = rp.resolve_runtime_provider(requested="my-server")
     assert "model" not in resolved
+
+
+# ---------------------------------------------------------------------------
+# GHSA-76xc-57q6-vm5m — Ollama URL substring leak
+#
+# Same bug class as the previously-fixed GHSA-xf8p-v2cg-h7h5 (OpenRouter).
+# _resolve_openrouter_runtime's custom-endpoint branch selects OLLAMA_API_KEY
+# when the base_url "looks like" ollama.com. Previous implementation used
+# raw substring match; a custom base_url whose PATH or look-alike host
+# merely contained "ollama.com" leaked OLLAMA_API_KEY to that endpoint.
+# Fix: use base_url_host_matches (same helper as the OpenRouter sweep).
+# ---------------------------------------------------------------------------
+
+class TestOllamaUrlSubstringLeak:
+    """Call-site regression tests for the fix in _resolve_openrouter_runtime."""
+
+    def _make_cfg(self, base_url):
+        return {"base_url": base_url, "api_key": "", "provider": "custom"}
+
+    def test_ollama_key_not_leaked_to_path_injection(self, monkeypatch):
+        """http://127.0.0.1:9000/ollama.com/v1 — attacker endpoint with
+        ollama.com in PATH. Must resolve to OPENAI_API_KEY, not OLLAMA_API_KEY."""
+        monkeypatch.setenv("OPENAI_API_KEY", "oa-secret")
+        monkeypatch.setenv("OPENROUTER_API_KEY", "or-secret")
+        monkeypatch.setenv("OLLAMA_API_KEY", "ol-SECRET-should-not-leak")
+        monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "custom")
+        monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg(
+            "http://127.0.0.1:9000/ollama.com/v1"
+        ))
+        monkeypatch.setattr(rp, "load_pool", lambda provider: None)
+        monkeypatch.setattr(rp, "_try_resolve_from_custom_pool", lambda *a, **k: None)
+
+        resolved = rp.resolve_runtime_provider(requested="custom")
+
+        assert "ol-SECRET" not in resolved["api_key"], (
+            "OLLAMA_API_KEY must not be sent to an endpoint whose "
+            "hostname is not ollama.com (GHSA-76xc-57q6-vm5m)"
+        )
+        assert resolved["api_key"] == "oa-secret"
+
+    def test_ollama_key_not_leaked_to_lookalike_host(self, monkeypatch):
+        """ollama.com.attacker.test — look-alike host. OLLAMA_API_KEY
+        must not be sent."""
+        monkeypatch.setenv("OPENAI_API_KEY", "oa-secret")
+        monkeypatch.setenv("OLLAMA_API_KEY", "ol-SECRET-should-not-leak")
+        monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "custom")
+        monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg(
+            "http://ollama.com.attacker.test:9000/v1"
+        ))
+        monkeypatch.setattr(rp, "load_pool", lambda provider: None)
+        monkeypatch.setattr(rp, "_try_resolve_from_custom_pool", lambda *a, **k: None)
+
+        resolved = rp.resolve_runtime_provider(requested="custom")
+
+        assert "ol-SECRET" not in resolved["api_key"]
+        assert resolved["api_key"] == "oa-secret"
+
+    def test_ollama_key_sent_to_genuine_ollama_com(self, monkeypatch):
+        """https://ollama.com/v1 — legit Ollama Cloud. OLLAMA_API_KEY
+        should be used."""
+        monkeypatch.setenv("OPENAI_API_KEY", "oa-secret")
+        monkeypatch.setenv("OLLAMA_API_KEY", "ol-legit-key")
+        monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "custom")
+        monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg(
+            "https://ollama.com/v1"
+        ))
+        monkeypatch.setattr(rp, "load_pool", lambda provider: None)
+        monkeypatch.setattr(rp, "_try_resolve_from_custom_pool", lambda *a, **k: None)
+
+        resolved = rp.resolve_runtime_provider(requested="custom")
+
+        assert resolved["api_key"] == "ol-legit-key"
+
+    def test_ollama_key_sent_to_ollama_subdomain(self, monkeypatch):
+        """https://api.ollama.com/v1 — legit subdomain."""
+        monkeypatch.setenv("OPENAI_API_KEY", "oa-secret")
+        monkeypatch.setenv("OLLAMA_API_KEY", "ol-legit-key")
+        monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "custom")
+        monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg(
+            "https://api.ollama.com/v1"
+        ))
+        monkeypatch.setattr(rp, "load_pool", lambda provider: None)
+        monkeypatch.setattr(rp, "_try_resolve_from_custom_pool", lambda *a, **k: None)
+
+        resolved = rp.resolve_runtime_provider(requested="custom")
+
+        assert resolved["api_key"] == "ol-legit-key"
diff --git a/tests/hermes_cli/test_setup_agent_settings.py b/tests/hermes_cli/test_setup_agent_settings.py
new file mode 100644
index 0000000000..868be7508c
--- /dev/null
+++ b/tests/hermes_cli/test_setup_agent_settings.py
@@ -0,0 +1,29 @@
+"""Tests for agent-settings copy in the interactive setup wizard."""
+
+from hermes_cli.setup import setup_agent_settings
+
+
+def test_setup_agent_settings_uses_displayed_max_iterations_value(tmp_path, monkeypatch, capsys):
+    """The helper text should match the value shown in the prompt."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+
+    config = {
+        "agent": {"max_turns": 90},
+        "display": {"tool_progress": "all"},
+        "compression": {"threshold": 0.50},
+        "session_reset": {"mode": "both", "idle_minutes": 1440, "at_hour": 4},
+    }
+
+    prompt_answers = iter(["60", "all", "0.5"])
+
+    monkeypatch.setattr("hermes_cli.setup.get_env_value", lambda key: "60" if key == "HERMES_MAX_ITERATIONS" else "")
+    monkeypatch.setattr("hermes_cli.setup.prompt", lambda *args, **kwargs: next(prompt_answers))
+    monkeypatch.setattr("hermes_cli.setup.prompt_choice", lambda *args, **kwargs: 4)
+    monkeypatch.setattr("hermes_cli.setup.save_env_value", lambda *args, **kwargs: None)
+    monkeypatch.setattr("hermes_cli.setup.save_config", lambda *args, **kwargs: None)
+
+    setup_agent_settings(config)
+
+    out = capsys.readouterr().out
+    assert "Press Enter to keep 60." in out
+    assert "Default is 90" not in out
diff --git a/tests/hermes_cli/test_setup_openclaw_migration.py b/tests/hermes_cli/test_setup_openclaw_migration.py
index fe80263905..a458bd3761 100644
--- a/tests/hermes_cli/test_setup_openclaw_migration.py
+++ b/tests/hermes_cli/test_setup_openclaw_migration.py
@@ -437,6 +437,112 @@ class TestGetSectionConfigSummary:
             result = setup_mod._get_section_config_summary({}, "tools")
         assert "Browser" in result
 
+    # Regression tests for issue #13025: the model / gateway summaries used
+    # stale, hardcoded env-var allowlists that drifted from the real setup +
+    # status flows.  Every case below would previously return ``None`` and
+    # force OpenClaw migration to re-run setup for an already-configured
+    # section.
+
+    def test_model_recognises_zai_glm_api_key(self):
+        """GLM_API_KEY (zai provider) should count as configured."""
+        def env_side(key):
+            return "glm-test-key" if key == "GLM_API_KEY" else ""
+
+        with patch.object(setup_mod, "get_env_value", side_effect=env_side):
+            result = setup_mod._get_section_config_summary(
+                {"model": {"provider": "zai", "default": "glm-5"}}, "model"
+            )
+        assert result == "glm-5"
+
+    def test_model_recognises_minimax_api_key(self):
+        """MINIMAX_API_KEY should count as configured."""
+        def env_side(key):
+            return "minimax-key" if key == "MINIMAX_API_KEY" else ""
+
+        with patch.object(setup_mod, "get_env_value", side_effect=env_side):
+            result = setup_mod._get_section_config_summary(
+                {"model": {"provider": "minimax", "default": "MiniMax-M1"}},
+                "model",
+            )
+        assert result == "MiniMax-M1"
+
+    def test_gateway_recognises_whatsapp_enabled(self):
+        """WhatsApp uses WHATSAPP_ENABLED (not WHATSAPP_PHONE_NUMBER_ID)."""
+        def env_side(key):
+            return "true" if key == "WHATSAPP_ENABLED" else ""
+
+        with patch.object(setup_mod, "get_env_value", side_effect=env_side):
+            result = setup_mod._get_section_config_summary({}, "gateway")
+        assert result is not None
+        assert "WhatsApp" in result
+
+    def test_gateway_recognises_signal_http_url(self):
+        """Signal uses SIGNAL_HTTP_URL (not SIGNAL_ACCOUNT)."""
+        def env_side(key):
+            return "http://signal.local" if key == "SIGNAL_HTTP_URL" else ""
+
+        with patch.object(setup_mod, "get_env_value", side_effect=env_side):
+            result = setup_mod._get_section_config_summary({}, "gateway")
+        assert result is not None
+        assert "Signal" in result
+
+    def test_model_ignores_bare_gh_token(self):
+        """GH_TOKEN is commonly set for `gh` / git and must NOT count as a
+        configured inference provider on its own — mirrors the copilot
+        exclusion in resolve_provider()."""
+        def env_side(key):
+            return "gho_xxx" if key == "GH_TOKEN" else ""
+
+        with patch.object(setup_mod, "get_env_value", side_effect=env_side):
+            result = setup_mod._get_section_config_summary({}, "model")
+        assert result is None
+
+    def test_model_ignores_bare_github_token(self):
+        """GITHUB_TOKEN is commonly set in CI and must not trigger skip."""
+        def env_side(key):
+            return "ghp_xxx" if key == "GITHUB_TOKEN" else ""
+
+        with patch.object(setup_mod, "get_env_value", side_effect=env_side):
+            result = setup_mod._get_section_config_summary({}, "model")
+        assert result is None
+
+    def test_model_ignores_claude_code_oauth_token(self):
+        """CLAUDE_CODE_OAUTH_TOKEN is set by Claude Code itself and must not
+        trigger skip — mirrors the _IMPLICIT_ENV_VARS guard in
+        is_provider_explicitly_configured()."""
+        def env_side(key):
+            return "sk-ant-oat01-xxx" if key == "CLAUDE_CODE_OAUTH_TOKEN" else ""
+
+        with patch.object(setup_mod, "get_env_value", side_effect=env_side):
+            result = setup_mod._get_section_config_summary({}, "model")
+        assert result is None
+
+    def test_model_copilot_recognised_when_explicitly_chosen(self):
+        """If the user picked copilot in config, GH_TOKEN *does* count —
+        only the auto-detect path excludes it."""
+        def env_side(key):
+            return "gho_xxx" if key == "GH_TOKEN" else ""
+
+        cfg = {"model": {"provider": "copilot", "default": "gpt-5"}}
+        with patch.object(setup_mod, "get_env_value", side_effect=env_side):
+            result = setup_mod._get_section_config_summary(cfg, "model")
+        assert result == "gpt-5"
+
+    def test_gateway_matches_platform_registry(self):
+        """Every platform in _GATEWAY_PLATFORMS should be recognised by its
+        own env-var sentinel — i.e. the summary must not drift from the
+        registry used by the setup checklist."""
+        for label, env_var, _fn in setup_mod._GATEWAY_PLATFORMS:
+            def env_side(key, _target=env_var):
+                return "x" if key == _target else ""
+            with patch.object(setup_mod, "get_env_value", side_effect=env_side):
+                result = setup_mod._get_section_config_summary({}, "gateway")
+            expected = setup_mod._gateway_platform_short_label(label)
+            assert result is not None, f"{label} ({env_var}) not recognised"
+            assert expected in result, (
+                f"{label} ({env_var}) recognised but label missing from summary: {result!r}"
+            )
+
 
 class TestSkipConfiguredSection:
     """Test the _skip_configured_section helper."""
diff --git a/tests/hermes_cli/test_timeouts.py b/tests/hermes_cli/test_timeouts.py
new file mode 100644
index 0000000000..0f641a5c1b
--- /dev/null
+++ b/tests/hermes_cli/test_timeouts.py
@@ -0,0 +1,308 @@
+from __future__ import annotations
+
+import textwrap
+
+from hermes_cli.timeouts import (
+    get_provider_request_timeout,
+    get_provider_stale_timeout,
+)
+
+
+def _write_config(tmp_path, body: str) -> None:
+    (tmp_path / "config.yaml").write_text(textwrap.dedent(body), encoding="utf-8")
+
+
+def test_model_timeout_override_wins(monkeypatch, tmp_path):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    _write_config(
+        tmp_path,
+        """\
+        providers:
+          anthropic:
+            request_timeout_seconds: 30
+            models:
+              claude-opus-4.6:
+                timeout_seconds: 120
+        """,
+    )
+
+    assert get_provider_request_timeout("anthropic", "claude-opus-4.6") == 120.0
+
+
+def test_provider_timeout_used_when_no_model_override(monkeypatch, tmp_path):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    _write_config(
+        tmp_path,
+        """\
+        providers:
+          ollama-local:
+            request_timeout_seconds: 300
+        """,
+    )
+
+    assert get_provider_request_timeout("ollama-local", "qwen3:32b") == 300.0
+
+
+def test_model_stale_timeout_override_wins(monkeypatch, tmp_path):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    _write_config(
+        tmp_path,
+        """\
+        providers:
+          openai-codex:
+            stale_timeout_seconds: 600
+            models:
+              gpt-5.4:
+                stale_timeout_seconds: 1800
+        """,
+    )
+
+    assert get_provider_stale_timeout("openai-codex", "gpt-5.4") == 1800.0
+
+
+def test_provider_stale_timeout_used_when_no_model_override(monkeypatch, tmp_path):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    _write_config(
+        tmp_path,
+        """\
+        providers:
+          openai-codex:
+            stale_timeout_seconds: 900
+        """,
+    )
+
+    assert get_provider_stale_timeout("openai-codex", "gpt-5.4") == 900.0
+
+
+def test_missing_timeout_returns_none(monkeypatch, tmp_path):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    _write_config(
+        tmp_path,
+        """\
+        providers:
+          anthropic:
+            models:
+              claude-opus-4.6:
+                context_length: 200000
+        """,
+    )
+
+    assert get_provider_request_timeout("anthropic", "claude-opus-4.6") is None
+    assert get_provider_request_timeout("missing-provider", "claude-opus-4.6") is None
+
+
+def test_invalid_timeout_values_return_none(monkeypatch, tmp_path):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    _write_config(
+        tmp_path,
+        """\
+        providers:
+          anthropic:
+            request_timeout_seconds: "fast"
+            models:
+              claude-opus-4.6:
+                timeout_seconds: -5
+          ollama-local:
+            request_timeout_seconds: -1
+        """,
+    )
+
+    assert get_provider_request_timeout("anthropic", "claude-opus-4.6") is None
+    assert get_provider_request_timeout("anthropic", "claude-sonnet-4.5") is None
+    assert get_provider_request_timeout("ollama-local") is None
+
+
+def test_invalid_stale_timeout_values_return_none(monkeypatch, tmp_path):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    _write_config(
+        tmp_path,
+        """\
+        providers:
+          openai-codex:
+            stale_timeout_seconds: "slow"
+            models:
+              gpt-5.4:
+                stale_timeout_seconds: -1
+        """,
+    )
+
+    assert get_provider_stale_timeout("openai-codex", "gpt-5.4") is None
+    assert get_provider_stale_timeout("openai-codex", "gpt-5.5") is None
+
+
+def test_anthropic_adapter_honors_timeout_kwarg():
+    """build_anthropic_client(timeout=X) overrides the 900s default read timeout."""
+    pytest = __import__("pytest")
+    anthropic = pytest.importorskip("anthropic")  # skip if optional SDK missing
+    from agent.anthropic_adapter import build_anthropic_client
+
+    c_default = build_anthropic_client("sk-ant-dummy", None)
+    c_custom = build_anthropic_client("sk-ant-dummy", None, timeout=45.0)
+    c_invalid = build_anthropic_client("sk-ant-dummy", None, timeout=-1)
+
+    # Default stays at 900s; custom overrides; invalid falls back to default
+    assert c_default.timeout.read == 900.0
+    assert c_custom.timeout.read == 45.0
+    assert c_invalid.timeout.read == 900.0
+    # Connect timeout always stays at 10s regardless
+    assert c_default.timeout.connect == 10.0
+    assert c_custom.timeout.connect == 10.0
+
+
+def test_resolved_api_call_timeout_priority(monkeypatch, tmp_path):
+    """AIAgent._resolved_api_call_timeout() honors config > env > default priority."""
+    # Isolate HERMES_HOME
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    (tmp_path / ".env").write_text("", encoding="utf-8")
+
+    # Case A: config wins over env var
+    _write_config(tmp_path, """\
+        providers:
+          openrouter:
+            request_timeout_seconds: 77
+            models:
+              openai/gpt-4o-mini:
+                timeout_seconds: 42
+        """)
+    monkeypatch.setenv("HERMES_API_TIMEOUT", "999")
+
+    from run_agent import AIAgent
+    agent = AIAgent(
+        model="openai/gpt-4o-mini",
+        provider="openrouter",
+        api_key="sk-dummy",
+        base_url="https://openrouter.ai/api/v1",
+        quiet_mode=True,
+        skip_context_files=True,
+        skip_memory=True,
+        platform="cli",
+    )
+    # Per-model override wins
+    assert agent._resolved_api_call_timeout() == 42.0
+
+    # Provider-level (different model, no per-model override)
+    agent.model = "some/other-model"
+    assert agent._resolved_api_call_timeout() == 77.0
+
+    # Case B: no config → env wins
+    _write_config(tmp_path, "")
+    # Clear the cached config load
+    import importlib
+    from hermes_cli import config as cfg_mod
+    importlib.reload(cfg_mod)
+    from hermes_cli import timeouts as to_mod
+    importlib.reload(to_mod)
+    import run_agent as ra_mod
+    importlib.reload(ra_mod)
+
+    agent2 = ra_mod.AIAgent(
+        model="some/model",
+        provider="openrouter",
+        api_key="sk-dummy",
+        base_url="https://openrouter.ai/api/v1",
+        quiet_mode=True,
+        skip_context_files=True,
+        skip_memory=True,
+        platform="cli",
+    )
+    assert agent2._resolved_api_call_timeout() == 999.0
+
+    # Case C: no config, no env → 1800.0 default
+    monkeypatch.delenv("HERMES_API_TIMEOUT", raising=False)
+    assert agent2._resolved_api_call_timeout() == 1800.0
+
+
+def test_resolved_api_call_stale_timeout_priority(monkeypatch, tmp_path):
+    """AIAgent stale timeout honors config > env > default priority."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    (tmp_path / ".env").write_text("", encoding="utf-8")
+
+    _write_config(tmp_path, """\
+        providers:
+          openai-codex:
+            stale_timeout_seconds: 600
+            models:
+              gpt-5.4:
+                stale_timeout_seconds: 1800
+        """)
+    monkeypatch.setenv("HERMES_API_CALL_STALE_TIMEOUT", "999")
+
+    from run_agent import AIAgent
+    agent = AIAgent(
+        model="gpt-5.4",
+        provider="openai-codex",
+        api_key="sk-dummy",
+        base_url="https://chatgpt.com/backend-api/codex",
+        quiet_mode=True,
+        skip_context_files=True,
+        skip_memory=True,
+        platform="cli",
+    )
+    assert agent._resolved_api_call_stale_timeout_base() == (1800.0, False)
+
+    agent.model = "gpt-5.5"
+    assert agent._resolved_api_call_stale_timeout_base() == (600.0, False)
+
+    _write_config(tmp_path, "")
+    import importlib
+    from hermes_cli import config as cfg_mod
+    importlib.reload(cfg_mod)
+    from hermes_cli import timeouts as to_mod
+    importlib.reload(to_mod)
+    import run_agent as ra_mod
+    importlib.reload(ra_mod)
+
+    agent2 = ra_mod.AIAgent(
+        model="gpt-5.4",
+        provider="openai-codex",
+        api_key="sk-dummy",
+        base_url="https://chatgpt.com/backend-api/codex",
+        quiet_mode=True,
+        skip_context_files=True,
+        skip_memory=True,
+        platform="cli",
+    )
+    assert agent2._resolved_api_call_stale_timeout_base() == (999.0, False)
+
+    monkeypatch.delenv("HERMES_API_CALL_STALE_TIMEOUT", raising=False)
+    assert agent2._resolved_api_call_stale_timeout_base() == (300.0, True)
+
+
+def test_default_non_stream_stale_timeout_auto_disables_for_local_endpoints(monkeypatch, tmp_path):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    (tmp_path / ".env").write_text("", encoding="utf-8")
+    monkeypatch.delenv("HERMES_API_CALL_STALE_TIMEOUT", raising=False)
+
+    from run_agent import AIAgent
+    agent = AIAgent(
+        model="qwen3:32b",
+        provider="ollama-local",
+        api_key="sk-dummy",
+        base_url="http://127.0.0.1:11434/v1",
+        quiet_mode=True,
+        skip_context_files=True,
+        skip_memory=True,
+        platform="cli",
+    )
+
+    assert agent._compute_non_stream_stale_timeout([]) == float("inf")
+
+
+def test_explicit_non_stream_stale_timeout_is_honored_for_local_endpoints(monkeypatch, tmp_path):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    (tmp_path / ".env").write_text("", encoding="utf-8")
+    monkeypatch.setenv("HERMES_API_CALL_STALE_TIMEOUT", "300")
+
+    from run_agent import AIAgent
+    agent = AIAgent(
+        model="qwen3:32b",
+        provider="ollama-local",
+        api_key="sk-dummy",
+        base_url="http://127.0.0.1:11434/v1",
+        quiet_mode=True,
+        skip_context_files=True,
+        skip_memory=True,
+        platform="cli",
+    )
+
+    assert agent._compute_non_stream_stale_timeout([]) == 300.0
diff --git a/tests/hermes_cli/test_tools_config.py b/tests/hermes_cli/test_tools_config.py
index 8911d46dca..9fb2745acd 100644
--- a/tests/hermes_cli/test_tools_config.py
+++ b/tests/hermes_cli/test_tools_config.py
@@ -3,6 +3,8 @@
 from unittest.mock import patch
 
 from hermes_cli.tools_config import (
+    _DEFAULT_OFF_TOOLSETS,
+    _apply_toolset_change,
     _configure_provider,
     _get_platform_tools,
     _platform_toolset_summary,
@@ -21,6 +23,7 @@ def test_get_platform_tools_uses_default_when_platform_not_configured():
     enabled = _get_platform_tools(config, "cli")
 
     assert enabled
+    assert enabled.isdisjoint(_DEFAULT_OFF_TOOLSETS)
 
 
 def test_configurable_toolsets_include_messaging():
@@ -32,6 +35,12 @@ def test_get_platform_tools_default_telegram_includes_messaging():
     assert "messaging" in enabled
 
 
+def test_get_platform_tools_homeassistant_platform_keeps_homeassistant_toolset():
+    enabled = _get_platform_tools({}, "homeassistant")
+
+    assert "homeassistant" in enabled
+
+
 def test_get_platform_tools_preserves_explicit_empty_selection():
     config = {"platform_toolsets": {"cli": []}}
 
@@ -40,6 +49,32 @@ def test_get_platform_tools_preserves_explicit_empty_selection():
     assert enabled == set()
 
 
+def test_apply_toolset_change_from_default_does_not_enable_default_off_toolsets():
+    """Disabling one default toolset on a fresh config must not persist
+    default-off toolsets as explicitly enabled.
+    """
+    config = {}
+
+    with patch("hermes_cli.tools_config.save_config"):
+        _apply_toolset_change(config, "cli", ["memory"], "disable")
+
+    saved = set(config["platform_toolsets"]["cli"])
+    assert "memory" not in saved
+    assert "terminal" in saved
+    assert saved.isdisjoint(_DEFAULT_OFF_TOOLSETS)
+
+
+def test_apply_toolset_change_can_enable_default_off_toolset_from_default():
+    config = {}
+
+    with patch("hermes_cli.tools_config.save_config"):
+        _apply_toolset_change(config, "cli", ["homeassistant"], "enable")
+
+    saved = set(config["platform_toolsets"]["cli"])
+    assert "homeassistant" in saved
+    assert "terminal" in saved
+
+
 def test_get_platform_tools_handles_null_platform_toolsets():
     """YAML `platform_toolsets:` with no value parses as None — the old
     ``config.get("platform_toolsets", {})`` pattern would then crash with
diff --git a/tests/hermes_cli/test_update_check.py b/tests/hermes_cli/test_update_check.py
index 84d5475228..2bdc9b2462 100644
--- a/tests/hermes_cli/test_update_check.py
+++ b/tests/hermes_cli/test_update_check.py
@@ -113,23 +113,6 @@ def test_prefetch_non_blocking():
         assert banner._update_result == 5
 
 
-def test_get_update_result_timeout():
-    """get_update_result() returns None when check hasn't completed within timeout."""
-    import hermes_cli.banner as banner
-
-    # Reset module state — don't set the event
-    banner._update_result = None
-    banner._update_check_done = threading.Event()
-
-    start = time.monotonic()
-    result = banner.get_update_result(timeout=0.1)
-    elapsed = time.monotonic() - start
-
-    # Should have waited ~0.1s and returned None
-    assert result is None
-    assert elapsed < 0.5
-
-
 def test_invalidate_update_cache_clears_all_profiles(tmp_path):
     """_invalidate_update_cache() should delete .update_check from ALL profiles."""
     from hermes_cli.main import _invalidate_update_cache
diff --git a/tests/hermes_cli/test_update_config_clears_custom_fields.py b/tests/hermes_cli/test_update_config_clears_custom_fields.py
new file mode 100644
index 0000000000..6d74a1c037
--- /dev/null
+++ b/tests/hermes_cli/test_update_config_clears_custom_fields.py
@@ -0,0 +1,84 @@
+"""Tests for hermes_cli.auth._update_config_for_provider clearing stale fields.
+
+When the user switches from a custom provider (e.g. MiniMax with
+``api_mode: anthropic_messages``, ``api_key: mxp-...``) to a built-in
+provider (e.g. OpenRouter), the stale ``api_key`` and ``api_mode`` would
+otherwise override the new provider's credentials and transport choice.
+
+Built-in providers that legitimately need a specific ``api_mode`` (copilot,
+xai) compute it at request-resolution time in
+``_copilot_runtime_api_mode`` / ``_detect_api_mode_for_url``, so removing
+the persisted value here is safe.
+"""
+
+from __future__ import annotations
+
+import yaml
+
+from hermes_cli.auth import _update_config_for_provider
+from hermes_cli.config import get_config_path
+
+
+def _read_model_cfg() -> dict:
+    path = get_config_path()
+    if not path.exists():
+        return {}
+    data = yaml.safe_load(path.read_text()) or {}
+    model = data.get("model", {})
+    return model if isinstance(model, dict) else {}
+
+
+def _seed_custom_provider_config(api_mode: str = "anthropic_messages") -> None:
+    """Write a config.yaml mimicking a user on a MiniMax-style custom provider."""
+    path = get_config_path()
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(
+        yaml.safe_dump(
+            {
+                "model": {
+                    "provider": "custom",
+                    "base_url": "https://api.minimax.io/anthropic",
+                    "api_key": "mxp-stale-key",
+                    "api_mode": api_mode,
+                    "default": "claude-sonnet-4-6",
+                }
+            },
+            sort_keys=False,
+        )
+    )
+
+
+class TestUpdateConfigForProviderClearsStaleCustomFields:
+    def test_switching_to_openrouter_clears_api_key_and_api_mode(self):
+        _seed_custom_provider_config()
+
+        _update_config_for_provider(
+            "openrouter",
+            "https://openrouter.ai/api/v1",
+            default_model="anthropic/claude-sonnet-4.6",
+        )
+
+        model_cfg = _read_model_cfg()
+        assert model_cfg.get("provider") == "openrouter"
+        assert model_cfg.get("base_url") == "https://openrouter.ai/api/v1"
+        assert "api_key" not in model_cfg, (
+            "Stale custom api_key would leak into OpenRouter requests — must be cleared"
+        )
+        assert "api_mode" not in model_cfg, (
+            "Stale api_mode=anthropic_messages from MiniMax would mis-route "
+            "OpenRouter requests to the Anthropic SDK — must be cleared"
+        )
+
+    def test_switching_to_nous_clears_stale_api_mode(self):
+        _seed_custom_provider_config()
+        _update_config_for_provider("nous", "https://inference-api.nousresearch.com/v1")
+        model_cfg = _read_model_cfg()
+        assert model_cfg.get("provider") == "nous"
+        assert "api_mode" not in model_cfg
+        assert "api_key" not in model_cfg
+
+    def test_switching_clears_codex_responses_api_mode(self):
+        """Also covers codex_responses, not just anthropic_messages."""
+        _seed_custom_provider_config(api_mode="codex_responses")
+        _update_config_for_provider("openrouter", "https://openrouter.ai/api/v1")
+        assert "api_mode" not in _read_model_cfg()
diff --git a/tests/hermes_cli/test_user_providers_model_switch.py b/tests/hermes_cli/test_user_providers_model_switch.py
index 222b539048..989a6cbedc 100644
--- a/tests/hermes_cli/test_user_providers_model_switch.py
+++ b/tests/hermes_cli/test_user_providers_model_switch.py
@@ -86,6 +86,117 @@ def test_list_authenticated_providers_dedupes_models_when_default_in_list(monkey
     assert user_prov["models"].count("model-a") == 1, "model-a should not be duplicated"
 
 
+def test_list_authenticated_providers_enumerates_dict_format_models(monkeypatch):
+    """providers: dict entries with ``models:`` as a dict keyed by model id
+    (canonical Hermes write format) should surface every key in the picker.
+
+    Regression: the ``providers:`` dict path previously only accepted
+    list-format ``models:`` and silently dropped dict-format entries,
+    even though Hermes's own writer and downstream readers use dict format.
+    """
+    monkeypatch.setattr("agent.models_dev.fetch_models_dev", lambda: {})
+    monkeypatch.setattr("hermes_cli.providers.HERMES_OVERLAYS", {})
+
+    user_providers = {
+        "local-ollama": {
+            "name": "Local Ollama",
+            "api": "http://localhost:11434/v1",
+            "default_model": "minimax-m2.7:cloud",
+            "models": {
+                "minimax-m2.7:cloud": {"context_length": 196608},
+                "kimi-k2.5:cloud": {"context_length": 200000},
+                "glm-5.1:cloud": {"context_length": 202752},
+            },
+        }
+    }
+
+    providers = list_authenticated_providers(
+        current_provider="local-ollama",
+        user_providers=user_providers,
+        custom_providers=[],
+        max_models=50,
+    )
+
+    user_prov = next(
+        (p for p in providers if p.get("is_user_defined") and p["slug"] == "local-ollama"),
+        None,
+    )
+
+    assert user_prov is not None
+    assert user_prov["total_models"] == 3
+    assert user_prov["models"] == [
+        "minimax-m2.7:cloud",
+        "kimi-k2.5:cloud",
+        "glm-5.1:cloud",
+    ]
+
+
+def test_list_authenticated_providers_dict_models_without_default_model(monkeypatch):
+    """Dict-format ``models:`` without a ``default_model`` must still expose
+    every dict key, not collapse to an empty list."""
+    monkeypatch.setattr("agent.models_dev.fetch_models_dev", lambda: {})
+    monkeypatch.setattr("hermes_cli.providers.HERMES_OVERLAYS", {})
+
+    user_providers = {
+        "multimodel": {
+            "api": "http://example.com/v1",
+            "models": {
+                "alpha": {"context_length": 8192},
+                "beta": {"context_length": 16384},
+            },
+        }
+    }
+
+    providers = list_authenticated_providers(
+        current_provider="",
+        user_providers=user_providers,
+        custom_providers=[],
+    )
+
+    user_prov = next(
+        (p for p in providers if p.get("is_user_defined") and p["slug"] == "multimodel"),
+        None,
+    )
+
+    assert user_prov is not None
+    assert user_prov["total_models"] == 2
+    assert set(user_prov["models"]) == {"alpha", "beta"}
+
+
+def test_list_authenticated_providers_dict_models_dedupe_with_default(monkeypatch):
+    """When ``default_model`` is also a key in the ``models:`` dict, it must
+    appear exactly once (list already had this for list-format models)."""
+    monkeypatch.setattr("agent.models_dev.fetch_models_dev", lambda: {})
+    monkeypatch.setattr("hermes_cli.providers.HERMES_OVERLAYS", {})
+
+    user_providers = {
+        "my-provider": {
+            "api": "http://example.com/v1",
+            "default_model": "model-a",
+            "models": {
+                "model-a": {"context_length": 8192},
+                "model-b": {"context_length": 16384},
+                "model-c": {"context_length": 32768},
+            },
+        }
+    }
+
+    providers = list_authenticated_providers(
+        current_provider="my-provider",
+        user_providers=user_providers,
+        custom_providers=[],
+    )
+
+    user_prov = next(
+        (p for p in providers if p.get("is_user_defined")),
+        None,
+    )
+
+    assert user_prov is not None
+    assert user_prov["total_models"] == 3
+    assert user_prov["models"].count("model-a") == 1
+
+
 def test_list_authenticated_providers_fallback_to_default_only(monkeypatch):
     """When no models array is provided, should fall back to default_model."""
     monkeypatch.setattr("agent.models_dev.fetch_models_dev", lambda: {})
@@ -116,6 +227,131 @@ def test_list_authenticated_providers_fallback_to_default_only(monkeypatch):
     assert user_prov["models"] == ["single-model"]
 
 
+def test_list_authenticated_providers_accepts_base_url_and_singular_model(monkeypatch):
+    """providers: dict entries written in canonical Hermes shape
+    (``base_url`` + singular ``model``) should resolve the same as the
+    legacy ``api`` + ``default_model`` shape.
+
+    Regression: section 3 previously only read ``api``/``url`` and
+    ``default_model``, so new-shape entries written by Hermes's own writer
+    surfaced with empty ``api_url`` and no default.
+    """
+    monkeypatch.setattr("agent.models_dev.fetch_models_dev", lambda: {})
+    monkeypatch.setattr("hermes_cli.providers.HERMES_OVERLAYS", {})
+
+    user_providers = {
+        "custom": {
+            "base_url": "http://example.com/v1",
+            "model": "gpt-5.4",
+            "models": {
+                "gpt-5.4": {},
+                "grok-4.20-beta": {},
+                "minimax-m2.7": {},
+            },
+        }
+    }
+
+    providers = list_authenticated_providers(
+        current_provider="custom",
+        user_providers=user_providers,
+        custom_providers=[],
+        max_models=50,
+    )
+
+    custom = next((p for p in providers if p["slug"] == "custom"), None)
+    assert custom is not None
+    assert custom["api_url"] == "http://example.com/v1"
+    assert custom["models"] == ["gpt-5.4", "grok-4.20-beta", "minimax-m2.7"]
+    assert custom["total_models"] == 3
+
+
+def test_list_authenticated_providers_dedupes_when_user_and_custom_overlap(monkeypatch):
+    """When the same slug appears in both ``providers:`` dict and
+    ``custom_providers:`` list, emit exactly one row (providers: dict wins
+    since it is processed first).
+
+    Regression: section 3 previously had no ``seen_slugs`` check, so
+    overlapping entries produced two picker rows for the same provider.
+    """
+    monkeypatch.setattr("agent.models_dev.fetch_models_dev", lambda: {})
+    monkeypatch.setattr("hermes_cli.providers.HERMES_OVERLAYS", {})
+
+    providers = list_authenticated_providers(
+        current_provider="custom",
+        user_providers={
+            "custom": {
+                "base_url": "http://example.com/v1",
+                "model": "gpt-5.4",
+                "models": {
+                    "gpt-5.4": {},
+                    "grok-4.20-beta": {},
+                },
+            }
+        },
+        custom_providers=[
+            {
+                "name": "custom",
+                "base_url": "http://example.com/v1",
+                "model": "legacy-only-model",
+            }
+        ],
+        max_models=50,
+    )
+
+    matches = [p for p in providers if p["slug"] == "custom"]
+    assert len(matches) == 1
+    # providers: dict wins — legacy-only-model is suppressed.
+    assert matches[0]["models"] == ["gpt-5.4", "grok-4.20-beta"]
+
+
+def test_list_authenticated_providers_no_duplicate_labels_across_schemas(monkeypatch):
+    """Regression: same endpoint in both ``providers:`` dict AND ``custom_providers:``
+    list (e.g. via ``get_compatible_custom_providers()``) must not emit two picker
+    rows with identical display names.
+
+    Before the fix, section 3 emitted bare-slug rows ("openrouter") and section 4
+    emitted ``custom:openrouter`` rows for the same endpoint — both labelled
+    identically, bypassing ``seen_slugs`` dedup because the slug shapes differ.
+    """
+    monkeypatch.setattr("agent.models_dev.fetch_models_dev", lambda: {})
+    monkeypatch.setattr("hermes_cli.providers.HERMES_OVERLAYS", {})
+
+    shared_entries = [
+        ("endpoint-a", "http://a.local/v1"),
+        ("endpoint-b", "http://b.local/v1"),
+        ("endpoint-c", "http://c.local/v1"),
+    ]
+
+    user_providers = {
+        name: {"name": name, "base_url": url, "model": "m1"}
+        for name, url in shared_entries
+    }
+    custom_providers = [
+        {"name": name, "base_url": url, "model": "m1"}
+        for name, url in shared_entries
+    ]
+
+    providers = list_authenticated_providers(
+        current_provider="none",
+        user_providers=user_providers,
+        custom_providers=custom_providers,
+        max_models=50,
+    )
+
+    user_rows = [p for p in providers if p.get("source") == "user-config"]
+    # Expect one row per shared entry — not two.
+    assert len(user_rows) == len(shared_entries), (
+        f"Expected {len(shared_entries)} rows, got {len(user_rows)}: "
+        f"{[(p['slug'], p['name']) for p in user_rows]}"
+    )
+
+    # And zero duplicate display labels.
+    labels = [p["name"].lower() for p in user_rows]
+    assert len(labels) == len(set(labels)), (
+        f"Duplicate labels across picker rows: {labels}"
+    )
+
+
 # =============================================================================
 # Tests for _get_named_custom_provider with providers: dict
 # =============================================================================
diff --git a/tests/hermes_cli/test_web_server.py b/tests/hermes_cli/test_web_server.py
index e99e49d80c..f990ed56ae 100644
--- a/tests/hermes_cli/test_web_server.py
+++ b/tests/hermes_cli/test_web_server.py
@@ -101,14 +101,19 @@ class TestWebServerEndpoints:
     """Test the FastAPI REST endpoints using Starlette TestClient."""
 
     @pytest.fixture(autouse=True)
-    def _setup_test_client(self):
-        """Create a TestClient — import is deferred to avoid requiring fastapi."""
+    def _setup_test_client(self, monkeypatch, _isolate_hermes_home):
+        """Create a TestClient and isolate the state DB under the test HERMES_HOME."""
         try:
             from starlette.testclient import TestClient
         except ImportError:
             pytest.skip("fastapi/starlette not installed")
 
+        import hermes_state
+        from hermes_constants import get_hermes_home
         from hermes_cli.web_server import app, _SESSION_TOKEN
+
+        monkeypatch.setattr(hermes_state, "DEFAULT_DB_PATH", get_hermes_home() / "state.db")
+
         self.client = TestClient(app)
         self.client.headers["Authorization"] = f"Bearer {_SESSION_TOKEN}"
 
@@ -511,12 +516,18 @@ class TestNewEndpoints:
     """Tests for session detail, logs, cron, skills, tools, raw config, analytics."""
 
     @pytest.fixture(autouse=True)
-    def _setup(self):
+    def _setup(self, monkeypatch, _isolate_hermes_home):
         try:
             from starlette.testclient import TestClient
         except ImportError:
             pytest.skip("fastapi/starlette not installed")
+
+        import hermes_state
+        from hermes_constants import get_hermes_home
         from hermes_cli.web_server import app, _SESSION_TOKEN
+
+        monkeypatch.setattr(hermes_state, "DEFAULT_DB_PATH", get_hermes_home() / "state.db")
+
         self.client = TestClient(app)
         self.client.headers["Authorization"] = f"Bearer {_SESSION_TOKEN}"
 
@@ -692,8 +703,75 @@ class TestNewEndpoints:
         assert "daily" in data
         assert "by_model" in data
         assert "totals" in data
+        assert "skills" in data
         assert isinstance(data["daily"], list)
         assert "total_sessions" in data["totals"]
+        assert "total_api_calls" in data["totals"]
+        assert data["skills"] == {
+            "summary": {
+                "total_skill_loads": 0,
+                "total_skill_edits": 0,
+                "total_skill_actions": 0,
+                "distinct_skills_used": 0,
+            },
+            "top_skills": [],
+        }
+
+    def test_analytics_usage_includes_skill_breakdown(self):
+        from hermes_state import SessionDB
+
+        db = SessionDB()
+        try:
+            db.create_session(
+                session_id="skills-analytics-test",
+                source="cli",
+                model="anthropic/claude-sonnet-4",
+            )
+            db.update_token_counts(
+                "skills-analytics-test",
+                input_tokens=120,
+                output_tokens=45,
+            )
+            db.append_message(
+                "skills-analytics-test",
+                role="assistant",
+                content="Loading and updating skills.",
+                tool_calls=[
+                    {
+                        "function": {
+                            "name": "skill_view",
+                            "arguments": '{"name":"github-pr-workflow"}',
+                        }
+                    },
+                    {
+                        "function": {
+                            "name": "skill_manage",
+                            "arguments": '{"name":"github-code-review"}',
+                        }
+                    },
+                ],
+            )
+        finally:
+            db.close()
+
+        resp = self.client.get("/api/analytics/usage?days=7")
+        assert resp.status_code == 200
+
+        data = resp.json()
+        assert data["skills"]["summary"] == {
+            "total_skill_loads": 1,
+            "total_skill_edits": 1,
+            "total_skill_actions": 2,
+            "distinct_skills_used": 2,
+        }
+        assert len(data["skills"]["top_skills"]) == 2
+
+        top_skill = data["skills"]["top_skills"][0]
+        assert top_skill["skill"] == "github-pr-workflow"
+        assert top_skill["view_count"] == 1
+        assert top_skill["manage_count"] == 0
+        assert top_skill["total_count"] == 1
+        assert top_skill["last_used_at"] is not None
 
     def test_session_token_endpoint_removed(self):
         """GET /api/auth/session-token no longer exists."""
diff --git a/tests/hermes_cli/test_web_server_host_header.py b/tests/hermes_cli/test_web_server_host_header.py
new file mode 100644
index 0000000000..966127b05c
--- /dev/null
+++ b/tests/hermes_cli/test_web_server_host_header.py
@@ -0,0 +1,148 @@
+"""Tests for GHSA-ppp5-vxwm-4cf7 — Host-header validation.
+
+DNS rebinding defence: a victim browser that has the dashboard open
+could be tricked into fetching from an attacker-controlled hostname
+that TTL-flips to 127.0.0.1. Same-origin / CORS checks won't help —
+the browser now treats the attacker origin as same-origin. Validating
+the Host header at the application layer rejects the attack.
+"""
+
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+import pytest
+
+_repo = str(Path(__file__).resolve().parents[1])
+if _repo not in sys.path:
+    sys.path.insert(0, _repo)
+
+
+class TestHostHeaderValidator:
+    """Unit test the _is_accepted_host helper directly — cheaper and
+    more thorough than spinning up the full FastAPI app."""
+
+    def test_loopback_bind_accepts_loopback_names(self):
+        from hermes_cli.web_server import _is_accepted_host
+
+        for bound in ("127.0.0.1", "localhost", "::1"):
+            for host_header in (
+                "127.0.0.1", "127.0.0.1:9119",
+                "localhost", "localhost:9119",
+                "[::1]", "[::1]:9119",
+            ):
+                assert _is_accepted_host(host_header, bound), (
+                    f"bound={bound} must accept host={host_header}"
+                )
+
+    def test_loopback_bind_rejects_attacker_hostnames(self):
+        """The core rebinding defence: attacker-controlled hosts that
+        TTL-flip to 127.0.0.1 must be rejected."""
+        from hermes_cli.web_server import _is_accepted_host
+
+        for bound in ("127.0.0.1", "localhost"):
+            for attacker in (
+                "evil.example",
+                "evil.example:9119",
+                "rebind.attacker.test:80",
+                "localhost.attacker.test",  # subdomain trick
+                "127.0.0.1.evil.test",  # lookalike IP prefix
+                "",  # missing Host
+            ):
+                assert not _is_accepted_host(attacker, bound), (
+                    f"bound={bound} must reject attacker host={attacker!r}"
+                )
+
+    def test_zero_zero_bind_accepts_anything(self):
+        """0.0.0.0 means operator explicitly opted into all-interfaces
+        (requires --insecure). No Host-layer defence is possible — rely
+        on operator network controls."""
+        from hermes_cli.web_server import _is_accepted_host
+
+        for host in ("10.0.0.5", "evil.example", "my-server.corp.net"):
+            assert _is_accepted_host(host, "0.0.0.0")
+            assert _is_accepted_host(host + ":9119", "0.0.0.0")
+
+    def test_explicit_non_loopback_bind_requires_exact_match(self):
+        """If the operator bound to a specific non-loopback hostname,
+        the Host header must match exactly."""
+        from hermes_cli.web_server import _is_accepted_host
+
+        assert _is_accepted_host("my-server.corp.net", "my-server.corp.net")
+        assert _is_accepted_host("my-server.corp.net:9119", "my-server.corp.net")
+        # Different host — reject
+        assert not _is_accepted_host("evil.example", "my-server.corp.net")
+        # Loopback — reject (we bound to a specific non-loopback name)
+        assert not _is_accepted_host("localhost", "my-server.corp.net")
+
+    def test_case_insensitive_comparison(self):
+        """Host headers are case-insensitive per RFC — accept variations."""
+        from hermes_cli.web_server import _is_accepted_host
+
+        assert _is_accepted_host("LOCALHOST", "127.0.0.1")
+        assert _is_accepted_host("LocalHost:9119", "127.0.0.1")
+
+
+class TestHostHeaderMiddleware:
+    """End-to-end test via the FastAPI app — verify the middleware
+    rejects bad Host headers with 400."""
+
+    def test_rebinding_request_rejected(self):
+        from fastapi.testclient import TestClient
+        from hermes_cli.web_server import app
+
+        # Simulate start_server having set the bound_host
+        app.state.bound_host = "127.0.0.1"
+        try:
+            client = TestClient(app)
+            # The TestClient sends Host: testserver by default — which is
+            # NOT a loopback alias, so the middleware must reject it.
+            resp = client.get(
+                "/api/status",
+                headers={"Host": "evil.example"},
+            )
+            assert resp.status_code == 400
+            assert "Invalid Host header" in resp.json()["detail"]
+        finally:
+            # Clean up so other tests don't inherit the bound_host
+            if hasattr(app.state, "bound_host"):
+                del app.state.bound_host
+
+    def test_legit_loopback_request_accepted(self):
+        from fastapi.testclient import TestClient
+        from hermes_cli.web_server import app
+
+        app.state.bound_host = "127.0.0.1"
+        try:
+            client = TestClient(app)
+            # /api/status is in _PUBLIC_API_PATHS — passes auth — so the
+            # only thing that can reject is the host header middleware
+            resp = client.get(
+                "/api/status",
+                headers={"Host": "localhost:9119"},
+            )
+            # Either 200 (endpoint served) or some other non-400 —
+            # just not the host-rejection 400
+            assert resp.status_code != 400 or (
+                "Invalid Host header" not in resp.json().get("detail", "")
+            )
+        finally:
+            if hasattr(app.state, "bound_host"):
+                del app.state.bound_host
+
+    def test_no_bound_host_skips_validation(self):
+        """If app.state.bound_host isn't set (e.g. running under test
+        infra without calling start_server), middleware must pass through
+        rather than crash."""
+        from fastapi.testclient import TestClient
+        from hermes_cli.web_server import app
+
+        # Make sure bound_host isn't set
+        if hasattr(app.state, "bound_host"):
+            del app.state.bound_host
+
+        client = TestClient(app)
+        resp = client.get("/api/status")
+        # Should get through to the status endpoint, not a 400
+        assert resp.status_code != 400
diff --git a/tests/hermes_cli/test_xiaomi_provider.py b/tests/hermes_cli/test_xiaomi_provider.py
index 57e5bdda85..f26740483c 100644
--- a/tests/hermes_cli/test_xiaomi_provider.py
+++ b/tests/hermes_cli/test_xiaomi_provider.py
@@ -136,13 +136,15 @@ class TestXiaomiModelCatalog:
         assert PROVIDER_TO_MODELS_DEV["xiaomi"] == "xiaomi"
 
     def test_static_model_list_fallback(self):
-        """Static _PROVIDER_MODELS fallback must exist for model picker."""
+        """Static _PROVIDER_MODELS fallback must exist for model picker.
+
+        We only assert the provider key is present — the specific model
+        names are data that changes with upstream releases and doesn't
+        belong in tests.
+        """
         from hermes_cli.models import _PROVIDER_MODELS
         assert "xiaomi" in _PROVIDER_MODELS
-        models = _PROVIDER_MODELS["xiaomi"]
-        assert "mimo-v2-pro" in models
-        assert "mimo-v2-omni" in models
-        assert "mimo-v2-flash" in models
+        assert len(_PROVIDER_MODELS["xiaomi"]) >= 1
 
     def test_list_agentic_models_mock(self, monkeypatch):
         """When models.dev returns Xiaomi data, list_agentic_models should return models."""
diff --git a/tests/honcho_plugin/test_async_memory.py b/tests/honcho_plugin/test_async_memory.py
index 936f478846..5df8d27454 100644
--- a/tests/honcho_plugin/test_async_memory.py
+++ b/tests/honcho_plugin/test_async_memory.py
@@ -460,10 +460,3 @@ class TestPrefetchCacheAccessors:
         assert mgr.pop_context_result("cli:test") == payload
         assert mgr.pop_context_result("cli:test") == {}
 
-    def test_set_and_pop_dialectic_result(self):
-        mgr = _make_manager(write_frequency="turn")
-
-        mgr.set_dialectic_result("cli:test", "Resume with toolset cleanup")
-
-        assert mgr.pop_dialectic_result("cli:test") == "Resume with toolset cleanup"
-        assert mgr.pop_dialectic_result("cli:test") == ""
diff --git a/tests/honcho_plugin/test_cli.py b/tests/honcho_plugin/test_cli.py
index 006d687dc1..a6fc39ea7c 100644
--- a/tests/honcho_plugin/test_cli.py
+++ b/tests/honcho_plugin/test_cli.py
@@ -26,6 +26,9 @@ class TestCmdStatus:
             write_frequency = "async"
             session_strategy = "per-session"
             context_tokens = 800
+            dialectic_reasoning_level = "low"
+            reasoning_level_cap = "high"
+            reasoning_heuristic = True
 
             def resolve_session_name(self):
                 return "hermes"
diff --git a/tests/honcho_plugin/test_session.py b/tests/honcho_plugin/test_session.py
index 9784959d37..2542611831 100644
--- a/tests/honcho_plugin/test_session.py
+++ b/tests/honcho_plugin/test_session.py
@@ -568,15 +568,15 @@ class TestToolsModeInitBehavior:
 
         with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \
              patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \
-             patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager), \
+             patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager) as mock_manager_cls, \
              patch("hermes_constants.get_hermes_home", return_value=MagicMock()):
             provider.initialize(session_id="test-session-001", **init_kwargs)
 
-        return provider, cfg
+        return provider, cfg, mock_manager_cls
 
     def test_tools_lazy_default(self):
         """tools + initOnSessionStart=false → session NOT initialized after initialize()."""
-        provider, _ = self._make_provider_with_config(
+        provider, _, _ = self._make_provider_with_config(
             recall_mode="tools", init_on_session_start=False,
         )
         assert provider._session_initialized is False
@@ -585,7 +585,7 @@ class TestToolsModeInitBehavior:
 
     def test_tools_eager_init(self):
         """tools + initOnSessionStart=true → session IS initialized after initialize()."""
-        provider, _ = self._make_provider_with_config(
+        provider, _, _ = self._make_provider_with_config(
             recall_mode="tools", init_on_session_start=True,
         )
         assert provider._session_initialized is True
@@ -593,33 +593,34 @@ class TestToolsModeInitBehavior:
 
     def test_tools_eager_prefetch_still_empty(self):
         """tools mode with eager init still returns empty from prefetch() (no auto-injection)."""
-        provider, _ = self._make_provider_with_config(
+        provider, _, _ = self._make_provider_with_config(
             recall_mode="tools", init_on_session_start=True,
         )
         assert provider.prefetch("test query") == ""
 
     def test_tools_lazy_prefetch_empty(self):
         """tools mode with lazy init also returns empty from prefetch()."""
-        provider, _ = self._make_provider_with_config(
+        provider, _, _ = self._make_provider_with_config(
             recall_mode="tools", init_on_session_start=False,
         )
         assert provider.prefetch("test query") == ""
 
     def test_explicit_peer_name_not_overridden_by_user_id(self):
         """Explicit peerName in config must not be replaced by gateway user_id."""
-        _, cfg = self._make_provider_with_config(
+        _, cfg, _ = self._make_provider_with_config(
             recall_mode="tools", init_on_session_start=True,
             peer_name="Kathie", user_id="8439114563",
         )
         assert cfg.peer_name == "Kathie"
 
     def test_user_id_used_when_no_peer_name(self):
-        """Gateway user_id is used as peer_name when no explicit peerName configured."""
-        _, cfg = self._make_provider_with_config(
+        """Gateway user_id is passed separately from config peer_name."""
+        _, cfg, mock_manager_cls = self._make_provider_with_config(
             recall_mode="tools", init_on_session_start=True,
             peer_name=None, user_id="8439114563",
         )
-        assert cfg.peer_name == "8439114563"
+        assert cfg.peer_name is None
+        assert mock_manager_cls.call_args.kwargs["runtime_user_peer_name"] == "8439114563"
 
 
 class TestPerSessionMigrateGuard:
@@ -815,6 +816,27 @@ class TestDialecticInputGuard:
 # ---------------------------------------------------------------------------
 
 
+def _settle_prewarm(provider):
+    """Wait for the session-start prewarm dialectic thread, then return the
+    provider to a clean 'nothing fired yet' state so cadence/first-turn/
+    trivial-prompt tests can assert from a known baseline."""
+    if provider._prefetch_thread:
+        provider._prefetch_thread.join(timeout=3.0)
+    with provider._prefetch_lock:
+        provider._prefetch_result = ""
+        provider._prefetch_result_fired_at = -999
+    provider._prefetch_thread = None
+    provider._prefetch_thread_started_at = 0.0
+    provider._last_dialectic_turn = -999
+    provider._dialectic_empty_streak = 0
+    if getattr(provider, "_manager", None) is not None:
+        try:
+            provider._manager.dialectic_query.reset_mock()
+            provider._manager.prefetch_context.reset_mock()
+        except AttributeError:
+            pass
+
+
 class TestDialecticCadenceDefaults:
     """Regression tests for dialectic_cadence default value."""
 
@@ -840,12 +862,15 @@ class TestDialecticCadenceDefaults:
              patch("hermes_constants.get_hermes_home", return_value=MagicMock()):
             provider.initialize(session_id="test-session-001")
 
+        _settle_prewarm(provider)
         return provider
 
-    def test_default_is_3(self):
-        """Default dialectic_cadence should be 3 to avoid per-turn LLM calls."""
+    def test_unset_falls_back_to_1(self):
+        """Unset dialecticCadence falls back to 1 (every turn) for backwards
+        compatibility with existing configs that predate the setting. The
+        setup wizard writes 2 explicitly on new configs."""
         provider = self._make_provider()
-        assert provider._dialectic_cadence == 3
+        assert provider._dialectic_cadence == 1
 
     def test_config_override(self):
         """dialecticCadence from config overrides the default."""
@@ -908,6 +933,7 @@ class TestDialecticDepth:
              patch("hermes_constants.get_hermes_home", return_value=MagicMock()):
             provider.initialize(session_id="test-session-001")
 
+        _settle_prewarm(provider)
         return provider
 
     def test_default_depth_is_1(self):
@@ -1027,46 +1053,6 @@ class TestDialecticDepth:
         assert provider._manager.dialectic_query.call_count == 2
         assert "Synthesis" in result
 
-    def test_first_turn_runs_dialectic_synchronously(self):
-        """First turn should fire the dialectic synchronously (cold start)."""
-        from unittest.mock import MagicMock, patch
-        provider = self._make_provider(cfg_extra={"dialectic_depth": 1})
-        provider._manager = MagicMock()
-        provider._manager.dialectic_query.return_value = "cold start synthesis"
-        provider._manager.get_prefetch_context.return_value = None
-        provider._manager.pop_context_result.return_value = None
-        provider._session_key = "test"
-        provider._base_context_cache = ""  # cold start
-        provider._last_dialectic_turn = -999  # never fired
-
-        result = provider.prefetch("hello world")
-        assert "cold start synthesis" in result
-        assert provider._manager.dialectic_query.call_count == 1
-        # After first-turn sync, _last_dialectic_turn should be updated
-        assert provider._last_dialectic_turn != -999
-
-    def test_first_turn_dialectic_does_not_double_fire(self):
-        """After first-turn sync dialectic, queue_prefetch should skip (cadence)."""
-        from unittest.mock import MagicMock
-        provider = self._make_provider(cfg_extra={"dialectic_depth": 1})
-        provider._manager = MagicMock()
-        provider._manager.dialectic_query.return_value = "cold start synthesis"
-        provider._manager.get_prefetch_context.return_value = None
-        provider._manager.pop_context_result.return_value = None
-        provider._session_key = "test"
-        provider._base_context_cache = ""
-        provider._last_dialectic_turn = -999
-        provider._turn_count = 0
-
-        # First turn fires sync dialectic
-        provider.prefetch("hello")
-        assert provider._manager.dialectic_query.call_count == 1
-
-        # Now queue_prefetch on same turn should skip (cadence: 0 - 0 < 3)
-        provider._manager.dialectic_query.reset_mock()
-        provider.queue_prefetch("hello")
-        assert provider._manager.dialectic_query.call_count == 0
-
     def test_run_dialectic_depth_bails_early_on_strong_signal(self):
         """Depth 2 skips pass 1 when pass 0 returns strong signal."""
         from unittest.mock import MagicMock
@@ -1083,6 +1069,584 @@ class TestDialecticDepth:
         assert provider._manager.dialectic_query.call_count == 1
 
 
+# ---------------------------------------------------------------------------
+# Trivial-prompt heuristic + dialectic cadence silent-failure guards
+# ---------------------------------------------------------------------------
+
+
+class TestTrivialPromptHeuristic:
+    """Trivial prompts ('ok', 'y', slash commands) must short-circuit injection."""
+
+    @staticmethod
+    def _make_provider():
+        from unittest.mock import patch, MagicMock
+        from plugins.memory.honcho.client import HonchoClientConfig
+
+        cfg = HonchoClientConfig(api_key="test-key", enabled=True, recall_mode="hybrid")
+        provider = HonchoMemoryProvider()
+        mock_manager = MagicMock()
+        mock_session = MagicMock()
+        mock_session.messages = []
+        mock_manager.get_or_create.return_value = mock_session
+
+        with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \
+             patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \
+             patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager), \
+             patch("hermes_constants.get_hermes_home", return_value=MagicMock()):
+            provider.initialize(session_id="test-session-trivial")
+        _settle_prewarm(provider)
+        return provider
+
+    def test_classifier_catches_common_trivial_forms(self):
+        for t in ("ok", "OK", " ok ", "y", "yes", "sure", "thanks", "lgtm", "/help", "", "   "):
+            assert HonchoMemoryProvider._is_trivial_prompt(t), f"expected trivial: {t!r}"
+
+    def test_classifier_lets_substantive_prompts_through(self):
+        for t in ("hello world", "what's my name", "explain this", "ok so what's next"):
+            assert not HonchoMemoryProvider._is_trivial_prompt(t), f"expected non-trivial: {t!r}"
+
+    def test_prefetch_skips_on_trivial_prompt(self):
+        provider = self._make_provider()
+        provider._session_key = "test"
+        provider._base_context_cache = "cached base"
+        provider._last_dialectic_turn = 0
+        provider._turn_count = 5
+
+        assert provider.prefetch("ok") == ""
+        assert provider.prefetch("/help") == ""
+        # Dialectic should not have fired
+        assert provider._manager.dialectic_query.call_count == 0
+
+    def test_queue_prefetch_skips_on_trivial_prompt(self):
+        provider = self._make_provider()
+        provider._session_key = "test"
+        provider._turn_count = 10
+        provider._last_dialectic_turn = -999  # would otherwise fire
+        # initialize() pre-warms; clear call counts before the assertion.
+        provider._manager.prefetch_context.reset_mock()
+        provider._manager.dialectic_query.reset_mock()
+
+        provider.queue_prefetch("y")
+        # Trivial prompts short-circuit both context refresh and dialectic fire.
+        assert provider._manager.prefetch_context.call_count == 0
+        assert provider._manager.dialectic_query.call_count == 0
+
+
+class TestDialecticCadenceAdvancesOnSuccess:
+    """Cadence tracker advances only when the dialectic call returns a
+    non-empty result. Empty results (transient API error, sparse representation)
+    must retry on the next eligible turn instead of waiting the full cadence."""
+
+    @staticmethod
+    def _make_provider():
+        from unittest.mock import patch, MagicMock
+        from plugins.memory.honcho.client import HonchoClientConfig
+
+        cfg = HonchoClientConfig(
+            api_key="test-key", enabled=True, recall_mode="hybrid", dialectic_depth=1,
+        )
+        provider = HonchoMemoryProvider()
+        mock_manager = MagicMock()
+        mock_session = MagicMock()
+        mock_session.messages = []
+        mock_manager.get_or_create.return_value = mock_session
+
+        with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \
+             patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \
+             patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager), \
+             patch("hermes_constants.get_hermes_home", return_value=MagicMock()):
+            provider.initialize(session_id="test-session-retry")
+        _settle_prewarm(provider)
+        return provider
+
+    def test_empty_dialectic_result_does_not_advance_cadence(self):
+        import time as _time
+        provider = self._make_provider()
+        provider._session_key = "test"
+        provider._manager.dialectic_query.return_value = ""  # silent failure
+        provider._turn_count = 5
+        provider._last_dialectic_turn = 0  # would fire (5 - 0 = 5 ≥ 3)
+
+        provider.queue_prefetch("hello")
+        # wait for the background thread to settle
+        if provider._prefetch_thread:
+            provider._prefetch_thread.join(timeout=2.0)
+
+        # Dialectic call was attempted
+        assert provider._manager.dialectic_query.call_count == 1
+        # But cadence tracker did NOT advance — next turn should retry
+        assert provider._last_dialectic_turn == 0
+
+    def test_non_empty_dialectic_result_advances_cadence(self):
+        provider = self._make_provider()
+        provider._session_key = "test"
+        provider._manager.dialectic_query.return_value = "real synthesis output"
+        provider._turn_count = 5
+        provider._last_dialectic_turn = 0
+
+        provider.queue_prefetch("hello")
+        if provider._prefetch_thread:
+            provider._prefetch_thread.join(timeout=2.0)
+
+        assert provider._last_dialectic_turn == 5
+
+    def test_in_flight_thread_is_not_stacked(self):
+        import threading as _threading
+        import time as _time
+        provider = self._make_provider()
+        provider._session_key = "test"
+        provider._turn_count = 10
+        provider._last_dialectic_turn = 0
+
+        # Simulate a prior thread still running (fresh, not stale)
+        hold = _threading.Event()
+
+        def _block():
+            hold.wait(timeout=5.0)
+
+        fresh = _threading.Thread(target=_block, daemon=True)
+        fresh.start()
+        provider._prefetch_thread = fresh
+        provider._prefetch_thread_started_at = _time.monotonic()  # fresh start
+
+        provider.queue_prefetch("hello")
+        # Should have short-circuited — no new dialectic call
+        assert provider._manager.dialectic_query.call_count == 0
+        hold.set()
+        fresh.join(timeout=2.0)
+
+
+class TestSessionStartDialecticPrewarm:
+    """Session-start prewarm fires a depth-aware dialectic whose result is
+    consumed by turn 1 — no duplicate .chat() and no dead-cache orphaning."""
+
+    @staticmethod
+    def _make_provider(cfg_extra=None, dialectic_result="prewarm synthesis"):
+        from unittest.mock import patch, MagicMock
+        from plugins.memory.honcho.client import HonchoClientConfig
+
+        defaults = dict(api_key="test-key", enabled=True, recall_mode="hybrid")
+        if cfg_extra:
+            defaults.update(cfg_extra)
+        cfg = HonchoClientConfig(**defaults)
+        provider = HonchoMemoryProvider()
+        mock_manager = MagicMock()
+        mock_manager.get_or_create.return_value = MagicMock(messages=[])
+        mock_manager.get_prefetch_context.return_value = None
+        mock_manager.pop_context_result.return_value = None
+        mock_manager.dialectic_query.return_value = dialectic_result
+
+        with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \
+             patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \
+             patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager), \
+             patch("hermes_constants.get_hermes_home", return_value=MagicMock()):
+            provider.initialize(session_id="test-prewarm")
+        return provider
+
+    def test_prewarm_populates_prefetch_result(self):
+        p = self._make_provider()
+        # Wait for prewarm thread to land
+        if p._prefetch_thread:
+            p._prefetch_thread.join(timeout=3.0)
+        with p._prefetch_lock:
+            assert p._prefetch_result == "prewarm synthesis"
+        assert p._last_dialectic_turn == 0
+
+    def test_turn1_consumes_prewarm_without_duplicate_dialectic(self):
+        """With prewarm result already in _prefetch_result, turn 1 prefetch
+        should NOT fire another dialectic."""
+        p = self._make_provider()
+        if p._prefetch_thread:
+            p._prefetch_thread.join(timeout=3.0)
+        p._manager.dialectic_query.reset_mock()
+        p._session_key = "test-prewarm"
+        p._base_context_cache = ""
+        p._turn_count = 1
+
+        result = p.prefetch("hello world")
+        assert "prewarm synthesis" in result
+        # The sync first-turn path must NOT have fired another .chat()
+        assert p._manager.dialectic_query.call_count == 0
+
+    def test_turn1_falls_back_to_sync_when_prewarm_missing(self):
+        """If the prewarm produced nothing (empty graph, API blip), turn 1
+        still fires its own sync dialectic."""
+        p = self._make_provider(dialectic_result="")  # prewarm returns empty
+        if p._prefetch_thread:
+            p._prefetch_thread.join(timeout=3.0)
+        with p._prefetch_lock:
+            assert p._prefetch_result == ""  # prewarm landed nothing
+        # Switch dialectic_query to return something on the sync first-turn call
+        p._manager.dialectic_query.return_value = "sync recovery"
+        p._manager.dialectic_query.reset_mock()
+        p._session_key = "test-prewarm"
+        p._base_context_cache = ""
+        p._turn_count = 1
+
+        result = p.prefetch("hello world")
+        assert "sync recovery" in result
+        assert p._manager.dialectic_query.call_count == 1
+
+
+class TestDialecticLiveness:
+    """Liveness + observability: stale-thread recovery, stale-result discard,
+    empty-streak backoff, and the snapshot method used for diagnostics."""
+
+    @staticmethod
+    def _make_provider(cfg_extra=None):
+        from unittest.mock import patch, MagicMock
+        from plugins.memory.honcho.client import HonchoClientConfig
+
+        defaults = dict(api_key="test-key", enabled=True, recall_mode="hybrid", timeout=2.0)
+        if cfg_extra:
+            defaults.update(cfg_extra)
+        cfg = HonchoClientConfig(**defaults)
+        provider = HonchoMemoryProvider()
+        mock_manager = MagicMock()
+        mock_manager.get_or_create.return_value = MagicMock(messages=[])
+        mock_manager.get_prefetch_context.return_value = None
+        mock_manager.pop_context_result.return_value = None
+        mock_manager.dialectic_query.return_value = ""  # default: silent
+
+        with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \
+             patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \
+             patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager), \
+             patch("hermes_constants.get_hermes_home", return_value=MagicMock()):
+            provider.initialize(session_id="test-liveness")
+        _settle_prewarm(provider)
+        return provider
+
+    def test_stale_thread_is_treated_as_dead(self):
+        """A thread older than timeout × multiplier no longer blocks new fires."""
+        import threading as _threading
+        p = self._make_provider()
+        p._session_key = "test"
+        p._turn_count = 10
+        p._last_dialectic_turn = 0
+        p._manager.dialectic_query.return_value = "fresh synthesis"
+
+        # Plant an alive thread with an old timestamp (stale)
+        hold = _threading.Event()
+        stuck = _threading.Thread(target=lambda: hold.wait(timeout=10.0), daemon=True)
+        stuck.start()
+        p._prefetch_thread = stuck
+        # timeout=2.0, multiplier=2.0, so anything older than 4s is stale
+        p._prefetch_thread_started_at = 0.0  # very old (1970 monotonic baseline)
+
+        p.queue_prefetch("hello")
+        # New thread should have been spawned since stuck one is stale
+        assert p._prefetch_thread is not stuck, "stale thread must be recycled"
+        if p._prefetch_thread:
+            p._prefetch_thread.join(timeout=2.0)
+        assert p._manager.dialectic_query.call_count == 1
+        hold.set()
+        stuck.join(timeout=2.0)
+
+    def test_stale_pending_result_is_discarded_on_read(self):
+        """A pending dialectic result from many turns ago is discarded
+        instead of injected against a fresh conversational pivot."""
+        p = self._make_provider(cfg_extra={"raw": {"dialecticCadence": 2}})
+        p._session_key = "test"
+        p._base_context_cache = "base ctx"
+        with p._prefetch_lock:
+            p._prefetch_result = "ancient synthesis"
+            p._prefetch_result_fired_at = 1
+        # cadence=2, multiplier=2 → stale after 4 turns since fire
+        p._turn_count = 10
+        p._last_dialectic_turn = 1  # prevents sync first-turn path
+
+        result = p.prefetch("what's new")
+        assert "ancient synthesis" not in result, "stale pending must be discarded"
+        # Cache slot cleared
+        with p._prefetch_lock:
+            assert p._prefetch_result == ""
+            assert p._prefetch_result_fired_at == -999
+
+    def test_fresh_pending_result_is_kept(self):
+        """A pending result within the staleness window is injected normally."""
+        p = self._make_provider(cfg_extra={"raw": {"dialecticCadence": 3}})
+        p._session_key = "test"
+        p._base_context_cache = ""
+        with p._prefetch_lock:
+            p._prefetch_result = "recent synthesis"
+            p._prefetch_result_fired_at = 8
+        p._turn_count = 9  # 1 turn since fire, well within cadence × 2 = 6
+        p._last_dialectic_turn = 8
+
+        result = p.prefetch("what's new")
+        assert "recent synthesis" in result
+
+    def test_empty_streak_widens_effective_cadence(self):
+        """After N empty returns, the gate waits cadence + N turns."""
+        p = self._make_provider(cfg_extra={"raw": {"dialecticCadence": 1}})
+        p._dialectic_empty_streak = 3
+        # cadence=1, streak=3 → effective = 4
+        assert p._effective_cadence() == 4
+
+    def test_backoff_is_capped(self):
+        """Effective cadence is capped at cadence × _BACKOFF_MAX."""
+        p = self._make_provider(cfg_extra={"raw": {"dialecticCadence": 2}})
+        p._dialectic_empty_streak = 100
+        # cadence=2, ceiling = 2 × 8 = 16
+        assert p._effective_cadence() == 16
+
+    def test_success_resets_empty_streak(self):
+        """A non-empty result zeroes the streak so healthy operation restores
+        the base cadence immediately."""
+        p = self._make_provider(cfg_extra={"raw": {"dialecticCadence": 1}})
+        p._session_key = "test"
+        p._dialectic_empty_streak = 5
+        p._turn_count = 10
+        p._last_dialectic_turn = 0
+        p._manager.dialectic_query.return_value = "real output"
+
+        p.queue_prefetch("hello")
+        if p._prefetch_thread:
+            p._prefetch_thread.join(timeout=2.0)
+        assert p._dialectic_empty_streak == 0
+        assert p._last_dialectic_turn == 10
+
+    def test_empty_result_increments_streak(self):
+        p = self._make_provider(cfg_extra={"raw": {"dialecticCadence": 1}})
+        p._session_key = "test"
+        p._turn_count = 5
+        p._last_dialectic_turn = 0
+        p._manager.dialectic_query.return_value = ""  # empty
+
+        p.queue_prefetch("hello")
+        if p._prefetch_thread:
+            p._prefetch_thread.join(timeout=2.0)
+        assert p._dialectic_empty_streak == 1
+        assert p._last_dialectic_turn == 0  # cadence not advanced
+
+    def test_liveness_snapshot_shape(self):
+        p = self._make_provider()
+        snap = p.liveness_snapshot()
+        for key in (
+            "turn_count", "last_dialectic_turn", "pending_result_fired_at",
+            "empty_streak", "effective_cadence", "thread_alive", "thread_age_seconds",
+        ):
+            assert key in snap
+
+
+class TestDialecticLifecycleSmoke:
+    """End-to-end smoke walking a multi-turn session through prewarm,
+    turn 1 consume, trivial skip, cadence fire, empty-result retry,
+    heuristic bump, and session-end flush."""
+
+    @staticmethod
+    def _make_provider(cfg_extra=None):
+        from unittest.mock import patch, MagicMock
+        from plugins.memory.honcho.client import HonchoClientConfig
+
+        defaults = dict(
+            api_key="test-key", enabled=True, recall_mode="hybrid",
+            dialectic_reasoning_level="low", reasoning_heuristic=True,
+            reasoning_level_cap="high", dialectic_depth=1,
+        )
+        if cfg_extra:
+            defaults.update(cfg_extra)
+        cfg = HonchoClientConfig(**defaults)
+        provider = HonchoMemoryProvider()
+        mock_manager = MagicMock()
+        mock_session = MagicMock()
+        mock_session.messages = []
+        mock_manager.get_or_create.return_value = mock_session
+        mock_manager.get_prefetch_context.return_value = None
+        mock_manager.pop_context_result.return_value = None
+
+        with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \
+             patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \
+             patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager), \
+             patch("hermes_constants.get_hermes_home", return_value=MagicMock()):
+            return provider, mock_manager, cfg
+
+    def _await_thread(self, provider):
+        if provider._prefetch_thread:
+            provider._prefetch_thread.join(timeout=3.0)
+
+    def test_full_multi_turn_session(self):
+        """Walks init → turns 1..8 → session end. Asserts at every step that
+        the plugin did exactly what it should and nothing more.
+
+        Uses dialecticCadence=3 so we can exercise skip-turns between fires
+        and the silent-failure retry path without their gates tripping each
+        other. Trivial + slash skips apply independent of cadence.
+        """
+        from unittest.mock import patch, MagicMock
+        provider, mgr, cfg = self._make_provider(
+            cfg_extra={"raw": {"dialecticCadence": 3}}
+        )
+
+        # Program the dialectic responses in the exact order they'll be requested.
+        # An extra or missing call fails the test — strong smoke signal.
+        responses = iter([
+            "prewarm: user is eri, works on hermes",      # session-start prewarm
+            "cadence fire: long query synthesis",         # turn 4 queue_prefetch
+            "",                                           # turn 7 fire: silent failure
+            "retry success: fresh synthesis",             # turn 8 queue_prefetch retry
+        ])
+        mgr.dialectic_query.side_effect = lambda *a, **kw: next(responses)
+
+        # ---- init: prewarm fires ----
+        with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \
+             patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \
+             patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mgr), \
+             patch("hermes_constants.get_hermes_home", return_value=MagicMock()):
+            provider.initialize(session_id="smoke-test")
+
+        self._await_thread(provider)
+        with provider._prefetch_lock:
+            assert provider._prefetch_result.startswith("prewarm"), \
+                "session-start prewarm must land in _prefetch_result"
+        assert provider._last_dialectic_turn == 0, "prewarm marks turn 0"
+        assert mgr.dialectic_query.call_count == 1
+
+        # ---- turn 1: consume prewarm, no duplicate dialectic ----
+        provider.on_turn_start(1, "hey")
+        inject1 = provider.prefetch("hey")
+        assert "prewarm" in inject1, "turn 1 must surface prewarm"
+        provider.sync_turn("hey", "hi there")
+        provider.queue_prefetch("hey")  # cadence gate: (1-0)<3 → skip
+        self._await_thread(provider)
+        assert mgr.dialectic_query.call_count == 1, \
+            "turn 1 must not fire — prewarm covered it and cadence skips"
+
+        # ---- turn 2: trivial 'ok' → skip everything ----
+        mgr.prefetch_context.reset_mock()
+        provider.on_turn_start(2, "ok")
+        assert provider.prefetch("ok") == "", "trivial prompt must short-circuit injection"
+        provider.sync_turn("ok", "cool")
+        provider.queue_prefetch("ok")
+        self._await_thread(provider)
+        assert mgr.dialectic_query.call_count == 1, "trivial must not fire dialectic"
+        assert mgr.prefetch_context.call_count == 0, "trivial must not fire context refresh"
+
+        # ---- turn 3: slash '/help' → also skip ----
+        provider.on_turn_start(3, "/help")
+        assert provider.prefetch("/help") == ""
+        provider.queue_prefetch("/help")
+        assert mgr.dialectic_query.call_count == 1
+
+        # ---- turn 4: long query → cadence fires + heuristic bumps ----
+        long_q = "walk me through " + ("x " * 100)  # ~200 chars → heuristic +1
+        provider.on_turn_start(4, long_q)
+        provider.prefetch(long_q)
+        provider.sync_turn(long_q, "sure")
+        provider.queue_prefetch(long_q)  # (4-0)≥3 → fires
+        self._await_thread(provider)
+        assert mgr.dialectic_query.call_count == 2, "turn 4 cadence fire"
+        _, kwargs = mgr.dialectic_query.call_args
+        assert kwargs.get("reasoning_level") in ("medium", "high"), \
+            f"long query must bump reasoning level above 'low'; got {kwargs.get('reasoning_level')}"
+        assert provider._last_dialectic_turn == 4, "cadence tracker advances on success"
+
+        # ---- turns 5–6: cadence cooldown, no fires ----
+        for t in (5, 6):
+            provider.on_turn_start(t, "tell me more")
+            provider.queue_prefetch("tell me more")
+            self._await_thread(provider)
+        assert mgr.dialectic_query.call_count == 2, "turns 5–6 blocked by cadence window"
+
+        # ---- turn 7: fires but silent failure (empty dialectic) ----
+        provider.on_turn_start(7, "and then what")
+        provider.queue_prefetch("and then what")  # (7-4)≥3 → fires
+        self._await_thread(provider)
+        assert mgr.dialectic_query.call_count == 3, "turn 7 fires"
+        assert provider._last_dialectic_turn == 4, \
+            "silent failure must NOT burn the cadence window"
+
+        # ---- turn 8: retries because cadence didn't advance ----
+        provider.on_turn_start(8, "try again")
+        provider.queue_prefetch("try again")  # (8-4)≥3 → fires again
+        self._await_thread(provider)
+        assert mgr.dialectic_query.call_count == 4, \
+            "turn 8 retries because turn 7's empty result didn't advance cadence"
+        assert provider._last_dialectic_turn == 8, "retry success advances"
+
+        # ---- session end: flush messages ----
+        provider.on_session_end([])
+        mgr.flush_all.assert_called()
+
+
+class TestReasoningHeuristic:
+    """Char-count heuristic that scales the auto-injected reasoning level by
+    query length, clamped at reasoning_level_cap."""
+
+    @staticmethod
+    def _make_provider(cfg_extra=None):
+        from unittest.mock import patch, MagicMock
+        from plugins.memory.honcho.client import HonchoClientConfig
+
+        defaults = dict(
+            api_key="test-key", enabled=True, recall_mode="hybrid",
+            dialectic_reasoning_level="low", reasoning_heuristic=True,
+            reasoning_level_cap="high",
+        )
+        if cfg_extra:
+            defaults.update(cfg_extra)
+        cfg = HonchoClientConfig(**defaults)
+        provider = HonchoMemoryProvider()
+        mock_manager = MagicMock()
+        mock_manager.get_or_create.return_value = MagicMock(messages=[])
+        with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \
+             patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \
+             patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager), \
+             patch("hermes_constants.get_hermes_home", return_value=MagicMock()):
+            provider.initialize(session_id="test-heuristic")
+        _settle_prewarm(provider)
+        return provider
+
+    def test_short_query_stays_at_base(self):
+        p = self._make_provider()
+        assert p._apply_reasoning_heuristic("low", "hey") == "low"
+
+    def test_medium_query_bumps_one_level(self):
+        p = self._make_provider()
+        q = "x" * 150
+        assert p._apply_reasoning_heuristic("low", q) == "medium"
+
+    def test_long_query_bumps_two_levels(self):
+        p = self._make_provider()
+        q = "x" * 500
+        assert p._apply_reasoning_heuristic("low", q) == "high"
+
+    def test_bump_respects_cap(self):
+        p = self._make_provider(cfg_extra={"reasoning_level_cap": "medium"})
+        q = "x" * 500  # would hit 'high' without the cap
+        assert p._apply_reasoning_heuristic("low", q) == "medium"
+
+    def test_max_never_auto_selected_with_default_cap(self):
+        p = self._make_provider(cfg_extra={"dialectic_reasoning_level": "high"})
+        q = "x" * 500  # base=high, bump would push to 'max'
+        assert p._apply_reasoning_heuristic("high", q) == "high"
+
+    def test_heuristic_disabled_returns_base(self):
+        p = self._make_provider(cfg_extra={"reasoning_heuristic": False})
+        q = "x" * 500
+        assert p._apply_reasoning_heuristic("low", q) == "low"
+
+    def test_resolve_pass_level_applies_heuristic_at_base_mapping(self):
+        """Depth=1, pass 0 maps to 'base' → heuristic applies."""
+        p = self._make_provider()
+        q = "x" * 150
+        assert p._resolve_pass_level(0, query=q) == "medium"
+
+    def test_resolve_pass_level_does_not_touch_explicit_per_pass(self):
+        """dialecticDepthLevels wins absolutely — no heuristic scaling."""
+        p = self._make_provider(cfg_extra={"dialectic_depth_levels": ["minimal"]})
+        q = "x" * 500  # heuristic would otherwise bump to 'high'
+        assert p._resolve_pass_level(0, query=q) == "minimal"
+
+    def test_resolve_pass_level_does_not_touch_lighter_passes(self):
+        """Depth 3 pass 0 is hardcoded 'minimal' — heuristic must not bump it."""
+        p = self._make_provider(cfg_extra={"dialectic_depth": 3})
+        q = "x" * 500
+        assert p._resolve_pass_level(0, query=q) == "minimal"
+        # But the 'base' pass (idx 1 for depth 3) does get heuristic
+        assert p._resolve_pass_level(1, query=q) == "high"
+
+
 # ---------------------------------------------------------------------------
 # set_peer_card None guard
 # ---------------------------------------------------------------------------
diff --git a/tests/plugins/image_gen/__init__.py b/tests/plugins/image_gen/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/plugins/image_gen/test_openai_provider.py b/tests/plugins/image_gen/test_openai_provider.py
new file mode 100644
index 0000000000..670722efbd
--- /dev/null
+++ b/tests/plugins/image_gen/test_openai_provider.py
@@ -0,0 +1,243 @@
+"""Tests for the bundled OpenAI image_gen plugin (gpt-image-2, three tiers)."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from types import SimpleNamespace
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+import plugins.image_gen.openai as openai_plugin
+
+
+# 1×1 transparent PNG — valid bytes for save_b64_image()
+_PNG_HEX = (
+    "89504e470d0a1a0a0000000d49484452000000010000000108060000001f15c4"
+    "890000000d49444154789c6300010000000500010d0a2db40000000049454e44"
+    "ae426082"
+)
+
+
+def _b64_png() -> str:
+    import base64
+    return base64.b64encode(bytes.fromhex(_PNG_HEX)).decode()
+
+
+def _fake_response(*, b64=None, url=None, revised_prompt=None):
+    item = SimpleNamespace(b64_json=b64, url=url, revised_prompt=revised_prompt)
+    return SimpleNamespace(data=[item])
+
+
+@pytest.fixture(autouse=True)
+def _tmp_hermes_home(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    yield tmp_path
+
+
+@pytest.fixture
+def provider(monkeypatch):
+    monkeypatch.setenv("OPENAI_API_KEY", "test-key")
+    return openai_plugin.OpenAIImageGenProvider()
+
+
+def _patched_openai(fake_client: MagicMock):
+    fake_openai = MagicMock()
+    fake_openai.OpenAI.return_value = fake_client
+    return patch.dict("sys.modules", {"openai": fake_openai})
+
+
+# ── Metadata ────────────────────────────────────────────────────────────────
+
+
+class TestMetadata:
+    def test_name(self, provider):
+        assert provider.name == "openai"
+
+    def test_default_model(self, provider):
+        assert provider.default_model() == "gpt-image-2-medium"
+
+    def test_list_models_three_tiers(self, provider):
+        ids = [m["id"] for m in provider.list_models()]
+        assert ids == ["gpt-image-2-low", "gpt-image-2-medium", "gpt-image-2-high"]
+
+    def test_catalog_entries_have_display_speed_strengths(self, provider):
+        for entry in provider.list_models():
+            assert entry["display"].startswith("GPT Image 2")
+            assert entry["speed"]
+            assert entry["strengths"]
+
+
+# ── Availability ────────────────────────────────────────────────────────────
+
+
+class TestAvailability:
+    def test_no_api_key_unavailable(self, monkeypatch):
+        monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+        assert openai_plugin.OpenAIImageGenProvider().is_available() is False
+
+    def test_api_key_set_available(self, monkeypatch):
+        monkeypatch.setenv("OPENAI_API_KEY", "test")
+        assert openai_plugin.OpenAIImageGenProvider().is_available() is True
+
+
+# ── Model resolution ────────────────────────────────────────────────────────
+
+
+class TestModelResolution:
+    def test_default_is_medium(self):
+        model_id, meta = openai_plugin._resolve_model()
+        assert model_id == "gpt-image-2-medium"
+        assert meta["quality"] == "medium"
+
+    def test_env_var_override(self, monkeypatch):
+        monkeypatch.setenv("OPENAI_IMAGE_MODEL", "gpt-image-2-high")
+        model_id, meta = openai_plugin._resolve_model()
+        assert model_id == "gpt-image-2-high"
+        assert meta["quality"] == "high"
+
+    def test_env_var_unknown_falls_back(self, monkeypatch):
+        monkeypatch.setenv("OPENAI_IMAGE_MODEL", "bogus-tier")
+        model_id, _ = openai_plugin._resolve_model()
+        assert model_id == openai_plugin.DEFAULT_MODEL
+
+    def test_config_openai_model(self, tmp_path):
+        import yaml
+        (tmp_path / "config.yaml").write_text(
+            yaml.safe_dump({"image_gen": {"openai": {"model": "gpt-image-2-low"}}})
+        )
+        model_id, meta = openai_plugin._resolve_model()
+        assert model_id == "gpt-image-2-low"
+        assert meta["quality"] == "low"
+
+    def test_config_top_level_model(self, tmp_path):
+        """``image_gen.model: gpt-image-2-high`` also works (top-level)."""
+        import yaml
+        (tmp_path / "config.yaml").write_text(
+            yaml.safe_dump({"image_gen": {"model": "gpt-image-2-high"}})
+        )
+        model_id, meta = openai_plugin._resolve_model()
+        assert model_id == "gpt-image-2-high"
+        assert meta["quality"] == "high"
+
+
+# ── Generate ────────────────────────────────────────────────────────────────
+
+
+class TestGenerate:
+    def test_empty_prompt_rejected(self, provider):
+        result = provider.generate("", aspect_ratio="square")
+        assert result["success"] is False
+        assert result["error_type"] == "invalid_argument"
+
+    def test_missing_api_key(self, monkeypatch):
+        monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+        result = openai_plugin.OpenAIImageGenProvider().generate("a cat")
+        assert result["success"] is False
+        assert result["error_type"] == "auth_required"
+
+    def test_b64_saves_to_cache(self, provider, tmp_path):
+        import base64
+        png_bytes = bytes.fromhex(_PNG_HEX)
+        fake_client = MagicMock()
+        fake_client.images.generate.return_value = _fake_response(b64=_b64_png())
+
+        with _patched_openai(fake_client):
+            result = provider.generate("a cat", aspect_ratio="landscape")
+
+        assert result["success"] is True
+        assert result["model"] == "gpt-image-2-medium"
+        assert result["aspect_ratio"] == "landscape"
+        assert result["provider"] == "openai"
+        assert result["quality"] == "medium"
+
+        saved = Path(result["image"])
+        assert saved.exists()
+        assert saved.parent == tmp_path / "cache" / "images"
+        assert saved.read_bytes() == png_bytes
+
+        call_kwargs = fake_client.images.generate.call_args.kwargs
+        # All tiers hit the single underlying API model.
+        assert call_kwargs["model"] == "gpt-image-2"
+        assert call_kwargs["quality"] == "medium"
+        assert call_kwargs["size"] == "1536x1024"
+        # gpt-image-2 rejects response_format — we must NOT send it.
+        assert "response_format" not in call_kwargs
+
+    @pytest.mark.parametrize("tier,expected_quality", [
+        ("gpt-image-2-low", "low"),
+        ("gpt-image-2-medium", "medium"),
+        ("gpt-image-2-high", "high"),
+    ])
+    def test_tier_maps_to_quality(self, provider, monkeypatch, tier, expected_quality):
+        monkeypatch.setenv("OPENAI_IMAGE_MODEL", tier)
+        fake_client = MagicMock()
+        fake_client.images.generate.return_value = _fake_response(b64=_b64_png())
+
+        with _patched_openai(fake_client):
+            result = provider.generate("a cat")
+
+        assert result["model"] == tier
+        assert result["quality"] == expected_quality
+        assert fake_client.images.generate.call_args.kwargs["quality"] == expected_quality
+        # Always the same underlying API model regardless of tier.
+        assert fake_client.images.generate.call_args.kwargs["model"] == "gpt-image-2"
+
+    @pytest.mark.parametrize("aspect,expected_size", [
+        ("landscape", "1536x1024"),
+        ("square", "1024x1024"),
+        ("portrait", "1024x1536"),
+    ])
+    def test_aspect_ratio_mapping(self, provider, aspect, expected_size):
+        fake_client = MagicMock()
+        fake_client.images.generate.return_value = _fake_response(b64=_b64_png())
+
+        with _patched_openai(fake_client):
+            provider.generate("a cat", aspect_ratio=aspect)
+
+        assert fake_client.images.generate.call_args.kwargs["size"] == expected_size
+
+    def test_revised_prompt_passed_through(self, provider):
+        fake_client = MagicMock()
+        fake_client.images.generate.return_value = _fake_response(
+            b64=_b64_png(), revised_prompt="A photo of a cat",
+        )
+
+        with _patched_openai(fake_client):
+            result = provider.generate("a cat")
+
+        assert result["revised_prompt"] == "A photo of a cat"
+
+    def test_api_error_returns_error_response(self, provider):
+        fake_client = MagicMock()
+        fake_client.images.generate.side_effect = RuntimeError("boom")
+
+        with _patched_openai(fake_client):
+            result = provider.generate("a cat")
+
+        assert result["success"] is False
+        assert result["error_type"] == "api_error"
+        assert "boom" in result["error"]
+
+    def test_empty_response_data(self, provider):
+        fake_client = MagicMock()
+        fake_client.images.generate.return_value = SimpleNamespace(data=[])
+
+        with _patched_openai(fake_client):
+            result = provider.generate("a cat")
+
+        assert result["success"] is False
+        assert result["error_type"] == "empty_response"
+
+    def test_url_fallback_if_api_changes(self, provider):
+        """Defensive: if OpenAI ever returns URL instead of b64, pass through."""
+        fake_client = MagicMock()
+        fake_client.images.generate.return_value = _fake_response(
+            b64=None, url="https://example.com/img.png",
+        )
+
+        with _patched_openai(fake_client):
+            result = provider.generate("a cat")
+
+        assert result["success"] is True
+        assert result["image"] == "https://example.com/img.png"
diff --git a/tests/plugins/memory/test_hindsight_provider.py b/tests/plugins/memory/test_hindsight_provider.py
index 5548a29ad4..db86f7626f 100644
--- a/tests/plugins/memory/test_hindsight_provider.py
+++ b/tests/plugins/memory/test_hindsight_provider.py
@@ -6,6 +6,7 @@ turn counting, tags), and schema completeness.
 """
 
 import json
+import re
 import threading
 from types import SimpleNamespace
 from unittest.mock import AsyncMock, MagicMock, patch
@@ -18,6 +19,7 @@ from plugins.memory.hindsight import (
     REFLECT_SCHEMA,
     RETAIN_SCHEMA,
     _load_config,
+    _normalize_retain_tags,
 )
 
 
@@ -32,14 +34,30 @@ def _clean_env(monkeypatch):
     for key in (
         "HINDSIGHT_API_KEY", "HINDSIGHT_API_URL", "HINDSIGHT_BANK_ID",
         "HINDSIGHT_BUDGET", "HINDSIGHT_MODE", "HINDSIGHT_LLM_API_KEY",
+        "HINDSIGHT_RETAIN_TAGS", "HINDSIGHT_RETAIN_SOURCE",
+        "HINDSIGHT_RETAIN_USER_PREFIX", "HINDSIGHT_RETAIN_ASSISTANT_PREFIX",
     ):
         monkeypatch.delenv(key, raising=False)
 
 
 def _make_mock_client():
     """Create a mock Hindsight client with async methods."""
+    async def _aretain(
+        bank_id,
+        content,
+        timestamp=None,
+        context=None,
+        document_id=None,
+        metadata=None,
+        entities=None,
+        tags=None,
+        update_mode=None,
+        retain_async=None,
+    ):
+        return SimpleNamespace(ok=True)
+
     client = MagicMock()
-    client.aretain = AsyncMock()
+    client.aretain = AsyncMock(side_effect=_aretain)
     client.arecall = AsyncMock(
         return_value=SimpleNamespace(
             results=[
@@ -56,6 +74,14 @@ def _make_mock_client():
     return client
 
 
+class _FakeSessionDB:
+    def __init__(self, messages=None):
+        self._messages = list(messages or [])
+
+    def get_messages_as_conversation(self, session_id):
+        return list(self._messages)
+
+
 @pytest.fixture()
 def provider(tmp_path, monkeypatch):
     """Create an initialized HindsightMemoryProvider with a mock client."""
@@ -109,6 +135,18 @@ def provider_with_config(tmp_path, monkeypatch):
     return _make
 
 
+def test_normalize_retain_tags_accepts_csv_and_dedupes():
+    assert _normalize_retain_tags("agent:fakeassistantname, source_system:hermes-agent, agent:fakeassistantname") == [
+        "agent:fakeassistantname",
+        "source_system:hermes-agent",
+    ]
+
+
+def test_normalize_retain_tags_accepts_json_array_string():
+    value = json.dumps(["agent:fakeassistantname", "source_system:hermes-agent"])
+    assert _normalize_retain_tags(value) == ["agent:fakeassistantname", "source_system:hermes-agent"]
+
+
 # ---------------------------------------------------------------------------
 # Schema tests
 # ---------------------------------------------------------------------------
@@ -118,6 +156,7 @@ class TestSchemas:
     def test_retain_schema_has_content(self):
         assert RETAIN_SCHEMA["name"] == "hindsight_retain"
         assert "content" in RETAIN_SCHEMA["parameters"]["properties"]
+        assert "tags" in RETAIN_SCHEMA["parameters"]["properties"]
         assert "content" in RETAIN_SCHEMA["parameters"]["required"]
 
     def test_recall_schema_has_query(self):
@@ -160,7 +199,10 @@ class TestConfig:
 
     def test_custom_config_values(self, provider_with_config):
         p = provider_with_config(
-            tags=["tag1", "tag2"],
+            retain_tags=["tag1", "tag2"],
+            retain_source="hermes",
+            retain_user_prefix="User (fakeusername)",
+            retain_assistant_prefix="Assistant (fakeassistantname)",
             recall_tags=["recall-tag"],
             recall_tags_match="all",
             auto_retain=False,
@@ -175,6 +217,10 @@ class TestConfig:
             bank_mission="Test agent mission",
         )
         assert p._tags == ["tag1", "tag2"]
+        assert p._retain_tags == ["tag1", "tag2"]
+        assert p._retain_source == "hermes"
+        assert p._retain_user_prefix == "User (fakeusername)"
+        assert p._retain_assistant_prefix == "Assistant (fakeassistantname)"
         assert p._recall_tags == ["recall-tag"]
         assert p._recall_tags_match == "all"
         assert p._auto_retain is False
@@ -222,11 +268,20 @@ class TestToolHandlers:
         assert call_kwargs["content"] == "user likes dark mode"
 
     def test_retain_with_tags(self, provider_with_config):
-        p = provider_with_config(tags=["pref", "ui"])
+        p = provider_with_config(retain_tags=["pref", "ui"])
         p.handle_tool_call("hindsight_retain", {"content": "likes dark mode"})
         call_kwargs = p._client.aretain.call_args.kwargs
         assert call_kwargs["tags"] == ["pref", "ui"]
 
+    def test_retain_merges_per_call_tags_with_config_tags(self, provider_with_config):
+        p = provider_with_config(retain_tags=["pref", "ui"])
+        p.handle_tool_call(
+            "hindsight_retain",
+            {"content": "likes dark mode", "tags": ["client:x", "ui"]},
+        )
+        call_kwargs = p._client.aretain.call_args.kwargs
+        assert call_kwargs["tags"] == ["pref", "ui", "client:x"]
+
     def test_retain_without_tags(self, provider):
         provider.handle_tool_call("hindsight_retain", {"content": "hello"})
         call_kwargs = provider._client.aretain.call_args.kwargs
@@ -389,38 +444,58 @@ class TestPrefetch:
 
 
 class TestSyncTurn:
-    def _get_retain_kwargs(self, provider):
-        """Helper to get the kwargs from the aretain_batch call."""
-        return provider._client.aretain_batch.call_args.kwargs
+    def test_sync_turn_retains_metadata_rich_turn(self, provider_with_config):
+        p = provider_with_config(
+            retain_tags=["conv", "session1"],
+            retain_source="hermes",
+            retain_user_prefix="User (fakeusername)",
+            retain_assistant_prefix="Assistant (fakeassistantname)",
+        )
+        p.initialize(
+            session_id="session-1",
+            platform="discord",
+            user_id="fakeusername-123",
+            user_name="fakeusername",
+            chat_id="1485316232612941897",
+            chat_name="fakeassistantname-forums",
+            chat_type="thread",
+            thread_id="1491249007475949698",
+            agent_identity="fakeassistantname",
+        )
+        p._client = _make_mock_client()
 
-    def _get_retain_content(self, provider):
-        """Helper to get the raw content string from the first item."""
-        kwargs = self._get_retain_kwargs(provider)
-        return kwargs["items"][0]["content"]
+        p.sync_turn("hello", "hi there")
+        p._sync_thread.join(timeout=5.0)
 
-    def _get_retain_messages(self, provider):
-        """Helper to parse the first turn's messages from retained content.
-
-        Content is a JSON array of turns: [[msgs...], [msgs...], ...]
-        For single-turn tests, returns the first turn's messages.
-        """
-        content = self._get_retain_content(provider)
-        turns = json.loads(content)
-        return turns[0] if len(turns) == 1 else turns
-
-    def test_sync_turn_retains(self, provider):
-        provider.sync_turn("hello", "hi there")
-        if provider._sync_thread:
-            provider._sync_thread.join(timeout=5.0)
-        provider._client.aretain_batch.assert_called_once()
-        messages = self._get_retain_messages(provider)
-        assert len(messages) == 2
-        assert messages[0]["role"] == "user"
-        assert messages[0]["content"] == "hello"
-        assert "timestamp" in messages[0]
-        assert messages[1]["role"] == "assistant"
-        assert messages[1]["content"] == "hi there"
-        assert "timestamp" in messages[1]
+        p._client.aretain_batch.assert_called_once()
+        call_kwargs = p._client.aretain_batch.call_args.kwargs
+        assert call_kwargs["bank_id"] == "test-bank"
+        assert call_kwargs["document_id"] == "session-1"
+        assert call_kwargs["retain_async"] is True
+        assert len(call_kwargs["items"]) == 1
+        item = call_kwargs["items"][0]
+        assert item["context"] == "conversation between Hermes Agent and the User"
+        assert item["tags"] == ["conv", "session1"]
+        content = json.loads(item["content"])
+        assert len(content) == 1
+        assert content[0][0]["role"] == "user"
+        assert content[0][0]["content"] == "User (fakeusername): hello"
+        assert content[0][1]["role"] == "assistant"
+        assert content[0][1]["content"] == "Assistant (fakeassistantname): hi there"
+        assert item["metadata"]["source"] == "hermes"
+        assert item["metadata"]["session_id"] == "session-1"
+        assert item["metadata"]["platform"] == "discord"
+        assert item["metadata"]["user_id"] == "fakeusername-123"
+        assert item["metadata"]["user_name"] == "fakeusername"
+        assert item["metadata"]["chat_id"] == "1485316232612941897"
+        assert item["metadata"]["chat_name"] == "fakeassistantname-forums"
+        assert item["metadata"]["chat_type"] == "thread"
+        assert item["metadata"]["thread_id"] == "1491249007475949698"
+        assert item["metadata"]["agent_identity"] == "fakeassistantname"
+        assert item["metadata"]["turn_index"] == "1"
+        assert item["metadata"]["message_count"] == "2"
+        assert re.fullmatch(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?\+00:00", content[0][0]["timestamp"])
+        assert re.fullmatch(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z", item["metadata"]["retained_at"])
 
     def test_sync_turn_skipped_when_auto_retain_off(self, provider_with_config):
         p = provider_with_config(auto_retain=False)
@@ -428,93 +503,33 @@ class TestSyncTurn:
         assert p._sync_thread is None
         p._client.aretain_batch.assert_not_called()
 
-    def test_sync_turn_with_tags(self, provider_with_config):
-        p = provider_with_config(tags=["conv", "session1"])
-        p.sync_turn("hello", "hi")
-        if p._sync_thread:
-            p._sync_thread.join(timeout=5.0)
-        item = p._client.aretain_batch.call_args.kwargs["items"][0]
-        assert item["tags"] == ["conv", "session1"]
-
-    def test_sync_turn_uses_aretain_batch(self, provider):
-        """sync_turn should use aretain_batch with retain_async."""
-        provider.sync_turn("hello", "hi")
-        if provider._sync_thread:
-            provider._sync_thread.join(timeout=5.0)
-        provider._client.aretain_batch.assert_called_once()
-        call_kwargs = provider._client.aretain_batch.call_args.kwargs
-        assert call_kwargs["document_id"] == "test-session"
-        assert call_kwargs["retain_async"] is True
-        assert len(call_kwargs["items"]) == 1
-        assert call_kwargs["items"][0]["context"] == "conversation between Hermes Agent and the User"
-
-    def test_sync_turn_custom_context(self, provider_with_config):
-        p = provider_with_config(retain_context="my-agent")
-        p.sync_turn("hello", "hi")
-        if p._sync_thread:
-            p._sync_thread.join(timeout=5.0)
-        item = p._client.aretain_batch.call_args.kwargs["items"][0]
-        assert item["context"] == "my-agent"
-
     def test_sync_turn_every_n_turns(self, provider_with_config):
-        """With retain_every_n_turns=3, only retains on every 3rd turn."""
-        p = provider_with_config(retain_every_n_turns=3)
-
+        p = provider_with_config(retain_every_n_turns=3, retain_async=False)
         p.sync_turn("turn1-user", "turn1-asst")
-        assert p._sync_thread is None  # not retained yet
-
+        assert p._sync_thread is None
         p.sync_turn("turn2-user", "turn2-asst")
-        assert p._sync_thread is None  # not retained yet
-
+        assert p._sync_thread is None
         p.sync_turn("turn3-user", "turn3-asst")
-        assert p._sync_thread is not None  # retained!
         p._sync_thread.join(timeout=5.0)
-
         p._client.aretain_batch.assert_called_once()
-        content = p._client.aretain_batch.call_args.kwargs["items"][0]["content"]
-        # Should contain all 3 turns
-        assert "turn1-user" in content
-        assert "turn2-user" in content
-        assert "turn3-user" in content
-
-    def test_sync_turn_accumulates_full_session(self, provider_with_config):
-        """Each retain sends the ENTIRE session, not just the latest batch."""
-        p = provider_with_config(retain_every_n_turns=2)
-
-        p.sync_turn("turn1-user", "turn1-asst")
-        p.sync_turn("turn2-user", "turn2-asst")
-        if p._sync_thread:
-            p._sync_thread.join(timeout=5.0)
-
-        p._client.aretain_batch.reset_mock()
-
-        p.sync_turn("turn3-user", "turn3-asst")
-        p.sync_turn("turn4-user", "turn4-asst")
-        if p._sync_thread:
-            p._sync_thread.join(timeout=5.0)
-
-        content = p._client.aretain_batch.call_args.kwargs["items"][0]["content"]
-        # Should contain ALL turns from the session
-        assert "turn1-user" in content
-        assert "turn2-user" in content
-        assert "turn3-user" in content
-        assert "turn4-user" in content
-
-    def test_sync_turn_passes_document_id(self, provider):
-        """sync_turn should pass session_id as document_id for dedup."""
-        provider.sync_turn("hello", "hi")
-        if provider._sync_thread:
-            provider._sync_thread.join(timeout=5.0)
-        call_kwargs = provider._client.aretain_batch.call_args.kwargs
+        call_kwargs = p._client.aretain_batch.call_args.kwargs
         assert call_kwargs["document_id"] == "test-session"
+        assert call_kwargs["retain_async"] is False
+        item = call_kwargs["items"][0]
+        content = json.loads(item["content"])
+        assert len(content) == 3
+        assert content[-1][0]["role"] == "user"
+        assert content[-1][0]["content"] == "User: turn3-user"
+        assert content[-1][1]["role"] == "assistant"
+        assert content[-1][1]["content"] == "Assistant: turn3-asst"
+        assert item["metadata"]["turn_index"] == "3"
+        assert item["metadata"]["message_count"] == "6"
 
     def test_sync_turn_error_does_not_raise(self, provider):
-        """Errors in sync_turn should be swallowed (non-blocking)."""
         provider._client.aretain_batch.side_effect = RuntimeError("network error")
         provider.sync_turn("hello", "hi")
         if provider._sync_thread:
             provider._sync_thread.join(timeout=5.0)
-        # Should not raise
 
 
 # ---------------------------------------------------------------------------
@@ -555,10 +570,11 @@ class TestConfigSchema:
             "mode", "api_url", "api_key", "llm_provider", "llm_api_key",
             "llm_model", "bank_id", "bank_mission", "bank_retain_mission",
             "recall_budget", "memory_mode", "recall_prefetch_method",
-            "tags", "recall_tags", "recall_tags_match",
+            "retain_tags", "retain_source",
+            "retain_user_prefix", "retain_assistant_prefix",
+            "recall_tags", "recall_tags_match",
             "auto_recall", "auto_retain",
-            "retain_every_n_turns", "retain_async",
-            "retain_context",
+            "retain_every_n_turns", "retain_async", "retain_context",
             "recall_max_tokens", "recall_max_input_chars",
             "recall_prompt_preamble",
         }
diff --git a/tests/plugins/test_disk_cleanup_plugin.py b/tests/plugins/test_disk_cleanup_plugin.py
new file mode 100644
index 0000000000..e1463bced7
--- /dev/null
+++ b/tests/plugins/test_disk_cleanup_plugin.py
@@ -0,0 +1,427 @@
+"""Tests for the disk-cleanup plugin.
+
+Covers the bundled plugin at ``plugins/disk-cleanup/``:
+
+  * ``disk_cleanup`` library: track / forget / dry_run / quick / status,
+    ``is_safe_path`` and ``guess_category`` filtering.
+  * Plugin ``__init__``: ``post_tool_call`` hook auto-tracks files created
+    by ``write_file`` / ``terminal``; ``on_session_end`` hook runs quick
+    cleanup when anything was tracked during the turn.
+  * Slash command handler: status / dry-run / quick / track / forget /
+    unknown subcommand behaviours.
+  * Bundled-plugin discovery via ``PluginManager.discover_and_load``.
+"""
+
+import importlib
+import json
+import sys
+from pathlib import Path
+
+import pytest
+
+
+@pytest.fixture(autouse=True)
+def _isolate_env(tmp_path, monkeypatch):
+    """Isolate HERMES_HOME for each test.
+
+    The global hermetic fixture already redirects HERMES_HOME to a tempdir,
+    but we want the plugin to work with a predictable subpath. We reset
+    HERMES_HOME here for clarity.
+    """
+    hermes_home = tmp_path / ".hermes"
+    hermes_home.mkdir()
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+    yield hermes_home
+
+
+def _load_lib():
+    """Import the plugin's library module directly from the repo path."""
+    repo_root = Path(__file__).resolve().parents[2]
+    lib_path = repo_root / "plugins" / "disk-cleanup" / "disk_cleanup.py"
+    spec = importlib.util.spec_from_file_location(
+        "disk_cleanup_under_test", lib_path
+    )
+    mod = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(mod)
+    return mod
+
+
+def _load_plugin_init():
+    """Import the plugin's __init__.py (which depends on the library)."""
+    repo_root = Path(__file__).resolve().parents[2]
+    plugin_dir = repo_root / "plugins" / "disk-cleanup"
+    # Use the PluginManager's module naming convention so relative imports work.
+    spec = importlib.util.spec_from_file_location(
+        "hermes_plugins.disk_cleanup",
+        plugin_dir / "__init__.py",
+        submodule_search_locations=[str(plugin_dir)],
+    )
+    # Ensure parent namespace package exists for the relative `. import disk_cleanup`
+    import types
+    if "hermes_plugins" not in sys.modules:
+        ns = types.ModuleType("hermes_plugins")
+        ns.__path__ = []
+        sys.modules["hermes_plugins"] = ns
+    mod = importlib.util.module_from_spec(spec)
+    mod.__package__ = "hermes_plugins.disk_cleanup"
+    mod.__path__ = [str(plugin_dir)]
+    sys.modules["hermes_plugins.disk_cleanup"] = mod
+    spec.loader.exec_module(mod)
+    return mod
+
+
+# ---------------------------------------------------------------------------
+# Library tests
+# ---------------------------------------------------------------------------
+
+class TestIsSafePath:
+    def test_accepts_path_under_hermes_home(self, _isolate_env):
+        dg = _load_lib()
+        p = _isolate_env / "subdir" / "file.txt"
+        p.parent.mkdir()
+        p.write_text("x")
+        assert dg.is_safe_path(p) is True
+
+    def test_rejects_outside_hermes_home(self, _isolate_env):
+        dg = _load_lib()
+        assert dg.is_safe_path(Path("/etc/passwd")) is False
+
+    def test_accepts_tmp_hermes_prefix(self, _isolate_env, tmp_path):
+        dg = _load_lib()
+        assert dg.is_safe_path(Path("/tmp/hermes-abc/x.log")) is True
+
+    def test_rejects_plain_tmp(self, _isolate_env):
+        dg = _load_lib()
+        assert dg.is_safe_path(Path("/tmp/other.log")) is False
+
+    def test_rejects_windows_mount(self, _isolate_env):
+        dg = _load_lib()
+        assert dg.is_safe_path(Path("/mnt/c/Users/x/test.txt")) is False
+
+
+class TestGuessCategory:
+    def test_test_prefix(self, _isolate_env):
+        dg = _load_lib()
+        p = _isolate_env / "test_foo.py"
+        p.write_text("x")
+        assert dg.guess_category(p) == "test"
+
+    def test_tmp_prefix(self, _isolate_env):
+        dg = _load_lib()
+        p = _isolate_env / "tmp_foo.log"
+        p.write_text("x")
+        assert dg.guess_category(p) == "test"
+
+    def test_dot_test_suffix(self, _isolate_env):
+        dg = _load_lib()
+        p = _isolate_env / "mything.test.js"
+        p.write_text("x")
+        assert dg.guess_category(p) == "test"
+
+    def test_skips_protected_top_level(self, _isolate_env):
+        dg = _load_lib()
+        logs_dir = _isolate_env / "logs"
+        logs_dir.mkdir()
+        p = logs_dir / "test_log.txt"
+        p.write_text("x")
+        # Even though it matches test_* pattern, logs/ is excluded.
+        assert dg.guess_category(p) is None
+
+    def test_cron_subtree_categorised(self, _isolate_env):
+        dg = _load_lib()
+        cron_dir = _isolate_env / "cron"
+        cron_dir.mkdir()
+        p = cron_dir / "job_output.md"
+        p.write_text("x")
+        assert dg.guess_category(p) == "cron-output"
+
+    def test_ordinary_file_returns_none(self, _isolate_env):
+        dg = _load_lib()
+        p = _isolate_env / "notes.md"
+        p.write_text("x")
+        assert dg.guess_category(p) is None
+
+
+class TestTrackForgetQuick:
+    def test_track_then_quick_deletes_test(self, _isolate_env):
+        dg = _load_lib()
+        p = _isolate_env / "test_a.py"
+        p.write_text("x")
+        assert dg.track(str(p), "test", silent=True) is True
+        summary = dg.quick()
+        assert summary["deleted"] == 1
+        assert not p.exists()
+
+    def test_track_dedup(self, _isolate_env):
+        dg = _load_lib()
+        p = _isolate_env / "test_a.py"
+        p.write_text("x")
+        assert dg.track(str(p), "test", silent=True) is True
+        # Second call returns False (already tracked)
+        assert dg.track(str(p), "test", silent=True) is False
+
+    def test_track_rejects_outside_home(self, _isolate_env):
+        dg = _load_lib()
+        # /etc/hostname exists on most Linux boxes; fall back if not.
+        outside = "/etc/hostname" if Path("/etc/hostname").exists() else "/etc/passwd"
+        assert dg.track(outside, "test", silent=True) is False
+
+    def test_track_skips_missing(self, _isolate_env):
+        dg = _load_lib()
+        assert dg.track(str(_isolate_env / "nope.txt"), "test", silent=True) is False
+
+    def test_forget_removes_entry(self, _isolate_env):
+        dg = _load_lib()
+        p = _isolate_env / "keep.tmp"
+        p.write_text("x")
+        dg.track(str(p), "temp", silent=True)
+        assert dg.forget(str(p)) == 1
+        assert p.exists()  # forget does NOT delete the file
+
+    def test_quick_preserves_unexpired_temp(self, _isolate_env):
+        dg = _load_lib()
+        p = _isolate_env / "fresh.tmp"
+        p.write_text("x")
+        dg.track(str(p), "temp", silent=True)
+        summary = dg.quick()
+        assert summary["deleted"] == 0
+        assert p.exists()
+
+    def test_quick_preserves_protected_top_level_dirs(self, _isolate_env):
+        dg = _load_lib()
+        for d in ("logs", "memories", "sessions", "cron", "cache"):
+            (_isolate_env / d).mkdir()
+        dg.quick()
+        for d in ("logs", "memories", "sessions", "cron", "cache"):
+            assert (_isolate_env / d).exists(), f"{d}/ should be preserved"
+
+
+class TestStatus:
+    def test_empty_status(self, _isolate_env):
+        dg = _load_lib()
+        s = dg.status()
+        assert s["total_tracked"] == 0
+        assert s["top10"] == []
+
+    def test_status_with_entries(self, _isolate_env):
+        dg = _load_lib()
+        p = _isolate_env / "big.tmp"
+        p.write_text("y" * 100)
+        dg.track(str(p), "temp", silent=True)
+        s = dg.status()
+        assert s["total_tracked"] == 1
+        assert len(s["top10"]) == 1
+        rendered = dg.format_status(s)
+        assert "temp" in rendered
+        assert "big.tmp" in rendered
+
+
+class TestDryRun:
+    def test_classifies_by_category(self, _isolate_env):
+        dg = _load_lib()
+        test_f = _isolate_env / "test_x.py"
+        test_f.write_text("x")
+        big = _isolate_env / "big.bin"
+        big.write_bytes(b"z" * 10)
+        dg.track(str(test_f), "test", silent=True)
+        dg.track(str(big), "other", silent=True)
+        auto, prompt = dg.dry_run()
+        # test → auto, other → neither (doesn't hit any rule)
+        assert any(i["path"] == str(test_f) for i in auto)
+
+
+# ---------------------------------------------------------------------------
+# Plugin hooks tests
+# ---------------------------------------------------------------------------
+
+class TestPostToolCallHook:
+    def test_write_file_test_pattern_tracked(self, _isolate_env):
+        pi = _load_plugin_init()
+        p = _isolate_env / "test_created.py"
+        p.write_text("x")
+        pi._on_post_tool_call(
+            tool_name="write_file",
+            args={"path": str(p), "content": "x"},
+            result="OK",
+            task_id="t1", session_id="s1",
+        )
+        tracked_file = _isolate_env / "disk-cleanup" / "tracked.json"
+        data = json.loads(tracked_file.read_text())
+        assert len(data) == 1
+        assert data[0]["category"] == "test"
+
+    def test_write_file_non_test_not_tracked(self, _isolate_env):
+        pi = _load_plugin_init()
+        p = _isolate_env / "notes.md"
+        p.write_text("x")
+        pi._on_post_tool_call(
+            tool_name="write_file",
+            args={"path": str(p), "content": "x"},
+            result="OK",
+            task_id="t2", session_id="s2",
+        )
+        tracked_file = _isolate_env / "disk-cleanup" / "tracked.json"
+        assert not tracked_file.exists() or tracked_file.read_text().strip() == "[]"
+
+    def test_terminal_command_picks_up_paths(self, _isolate_env):
+        pi = _load_plugin_init()
+        p = _isolate_env / "tmp_created.log"
+        p.write_text("x")
+        pi._on_post_tool_call(
+            tool_name="terminal",
+            args={"command": f"touch {p}"},
+            result=f"created {p}\n",
+            task_id="t3", session_id="s3",
+        )
+        tracked_file = _isolate_env / "disk-cleanup" / "tracked.json"
+        data = json.loads(tracked_file.read_text())
+        assert any(Path(i["path"]) == p.resolve() for i in data)
+
+    def test_ignores_unrelated_tool(self, _isolate_env):
+        pi = _load_plugin_init()
+        pi._on_post_tool_call(
+            tool_name="read_file",
+            args={"path": str(_isolate_env / "test_x.py")},
+            result="contents",
+            task_id="t4", session_id="s4",
+        )
+        # read_file should never trigger tracking.
+        tracked_file = _isolate_env / "disk-cleanup" / "tracked.json"
+        assert not tracked_file.exists() or tracked_file.read_text().strip() == "[]"
+
+
+class TestOnSessionEndHook:
+    def test_runs_quick_when_test_files_tracked(self, _isolate_env):
+        pi = _load_plugin_init()
+        p = _isolate_env / "test_cleanup.py"
+        p.write_text("x")
+        pi._on_post_tool_call(
+            tool_name="write_file",
+            args={"path": str(p), "content": "x"},
+            result="OK",
+            task_id="", session_id="s1",
+        )
+        assert p.exists()
+        pi._on_session_end(session_id="s1", completed=True, interrupted=False)
+        assert not p.exists(), "test file should be auto-deleted"
+
+    def test_noop_when_no_test_tracked(self, _isolate_env):
+        pi = _load_plugin_init()
+        # Nothing tracked → on_session_end should not raise.
+        pi._on_session_end(session_id="empty", completed=True, interrupted=False)
+
+
+# ---------------------------------------------------------------------------
+# Slash command
+# ---------------------------------------------------------------------------
+
+class TestSlashCommand:
+    def test_help(self, _isolate_env):
+        pi = _load_plugin_init()
+        out = pi._handle_slash("help")
+        assert "disk-cleanup" in out
+        assert "status" in out
+
+    def test_status_empty(self, _isolate_env):
+        pi = _load_plugin_init()
+        out = pi._handle_slash("status")
+        assert "nothing tracked" in out
+
+    def test_track_rejects_missing(self, _isolate_env):
+        pi = _load_plugin_init()
+        out = pi._handle_slash(
+            f"track {_isolate_env / 'nope.txt'} temp"
+        )
+        assert "Not tracked" in out
+
+    def test_track_rejects_bad_category(self, _isolate_env):
+        pi = _load_plugin_init()
+        p = _isolate_env / "a.tmp"
+        p.write_text("x")
+        out = pi._handle_slash(f"track {p} banana")
+        assert "Unknown category" in out
+
+    def test_track_and_forget(self, _isolate_env):
+        pi = _load_plugin_init()
+        p = _isolate_env / "a.tmp"
+        p.write_text("x")
+        out = pi._handle_slash(f"track {p} temp")
+        assert "Tracked" in out
+        out = pi._handle_slash(f"forget {p}")
+        assert "Removed 1" in out
+
+    def test_unknown_subcommand(self, _isolate_env):
+        pi = _load_plugin_init()
+        out = pi._handle_slash("foobar")
+        assert "Unknown subcommand" in out
+
+    def test_quick_on_empty(self, _isolate_env):
+        pi = _load_plugin_init()
+        out = pi._handle_slash("quick")
+        assert "Cleaned 0 files" in out
+
+
+# ---------------------------------------------------------------------------
+# Bundled-plugin discovery
+# ---------------------------------------------------------------------------
+
+class TestBundledDiscovery:
+    def _write_enabled_config(self, hermes_home, names):
+        """Write plugins.enabled allow-list to config.yaml."""
+        import yaml
+        cfg_path = hermes_home / "config.yaml"
+        cfg_path.write_text(yaml.safe_dump({"plugins": {"enabled": list(names)}}))
+
+    def test_disk_cleanup_discovered_but_not_loaded_by_default(self, _isolate_env):
+        """Bundled plugins are discovered but NOT loaded without opt-in."""
+        from hermes_cli import plugins as pmod
+        mgr = pmod.PluginManager()
+        mgr.discover_and_load()
+        # Discovered — appears in the registry
+        assert "disk-cleanup" in mgr._plugins
+        loaded = mgr._plugins["disk-cleanup"]
+        assert loaded.manifest.source == "bundled"
+        # But NOT enabled — no hooks or commands registered
+        assert not loaded.enabled
+        assert loaded.error and "not enabled" in loaded.error
+
+    def test_disk_cleanup_loads_when_enabled(self, _isolate_env):
+        """Adding to plugins.enabled activates the bundled plugin."""
+        self._write_enabled_config(_isolate_env, ["disk-cleanup"])
+        from hermes_cli import plugins as pmod
+        mgr = pmod.PluginManager()
+        mgr.discover_and_load()
+        loaded = mgr._plugins["disk-cleanup"]
+        assert loaded.enabled
+        assert "post_tool_call" in loaded.hooks_registered
+        assert "on_session_end" in loaded.hooks_registered
+        assert "disk-cleanup" in loaded.commands_registered
+
+    def test_disabled_beats_enabled(self, _isolate_env):
+        """plugins.disabled wins even if the plugin is also in plugins.enabled."""
+        import yaml
+        cfg_path = _isolate_env / "config.yaml"
+        cfg_path.write_text(yaml.safe_dump({
+            "plugins": {
+                "enabled": ["disk-cleanup"],
+                "disabled": ["disk-cleanup"],
+            }
+        }))
+        from hermes_cli import plugins as pmod
+        mgr = pmod.PluginManager()
+        mgr.discover_and_load()
+        loaded = mgr._plugins["disk-cleanup"]
+        assert not loaded.enabled
+        assert loaded.error == "disabled via config"
+
+    def test_memory_and_context_engine_subdirs_skipped(self, _isolate_env):
+        """Bundled scan must NOT pick up plugins/memory or plugins/context_engine
+        as top-level plugins — they have their own discovery paths."""
+        self._write_enabled_config(
+            _isolate_env, ["memory", "context_engine", "disk-cleanup"]
+        )
+        from hermes_cli import plugins as pmod
+        mgr = pmod.PluginManager()
+        mgr.discover_and_load()
+        assert "memory" not in mgr._plugins
+        assert "context_engine" not in mgr._plugins
diff --git a/tests/run_agent/test_anthropic_error_handling.py b/tests/run_agent/test_anthropic_error_handling.py
index cdf3372544..2fb1fe2194 100644
--- a/tests/run_agent/test_anthropic_error_handling.py
+++ b/tests/run_agent/test_anthropic_error_handling.py
@@ -152,7 +152,7 @@ class _FakeAnthropicClient:
         pass
 
 
-def _fake_build_anthropic_client(key, base_url=None):
+def _fake_build_anthropic_client(key, base_url=None, **kwargs):
     return _FakeAnthropicClient()
 
 
diff --git a/tests/run_agent/test_anthropic_prompt_cache_policy.py b/tests/run_agent/test_anthropic_prompt_cache_policy.py
new file mode 100644
index 0000000000..7a85022a5c
--- /dev/null
+++ b/tests/run_agent/test_anthropic_prompt_cache_policy.py
@@ -0,0 +1,232 @@
+"""Tests for AIAgent._anthropic_prompt_cache_policy().
+
+The policy returns ``(should_cache, use_native_layout)`` for five endpoint
+classes. The test matrix pins the decision for each so a regression (e.g.
+silently dropping caching on third-party Anthropic gateways, or applying
+the native layout on OpenRouter) surfaces loudly.
+"""
+
+from __future__ import annotations
+
+from unittest.mock import MagicMock
+
+from run_agent import AIAgent
+
+
+def _make_agent(
+    *,
+    provider: str = "openrouter",
+    base_url: str = "https://openrouter.ai/api/v1",
+    api_mode: str = "chat_completions",
+    model: str = "anthropic/claude-sonnet-4.6",
+) -> AIAgent:
+    agent = AIAgent.__new__(AIAgent)
+    agent.provider = provider
+    agent.base_url = base_url
+    agent.api_mode = api_mode
+    agent.model = model
+    agent._base_url_lower = (base_url or "").lower()
+    agent.client = MagicMock()
+    agent.quiet_mode = True
+    return agent
+
+
+class TestNativeAnthropic:
+    def test_claude_on_native_anthropic_caches_with_native_layout(self):
+        agent = _make_agent(
+            provider="anthropic",
+            base_url="https://api.anthropic.com",
+            api_mode="anthropic_messages",
+            model="claude-sonnet-4-6",
+        )
+        assert agent._anthropic_prompt_cache_policy() == (True, True)
+
+    def test_api_anthropic_host_detected_even_when_provider_label_differs(self):
+        # Some pool configurations label native Anthropic as "anthropic-direct"
+        # or similar; falling back to hostname keeps caching on.
+        agent = _make_agent(
+            provider="anthropic-direct",
+            base_url="https://api.anthropic.com",
+            api_mode="anthropic_messages",
+            model="claude-opus-4.6",
+        )
+        assert agent._anthropic_prompt_cache_policy() == (True, True)
+
+
+class TestOpenRouter:
+    def test_claude_on_openrouter_caches_with_envelope_layout(self):
+        agent = _make_agent(
+            provider="openrouter",
+            base_url="https://openrouter.ai/api/v1",
+            api_mode="chat_completions",
+            model="anthropic/claude-sonnet-4.6",
+        )
+        should, native = agent._anthropic_prompt_cache_policy()
+        assert should is True
+        assert native is False  # OpenRouter uses envelope layout
+
+    def test_non_claude_on_openrouter_does_not_cache(self):
+        agent = _make_agent(
+            provider="openrouter",
+            base_url="https://openrouter.ai/api/v1",
+            api_mode="chat_completions",
+            model="openai/gpt-5.4",
+        )
+        assert agent._anthropic_prompt_cache_policy() == (False, False)
+
+
+class TestThirdPartyAnthropicGateway:
+    """Third-party gateways speaking the Anthropic protocol (MiniMax, Zhipu GLM, LiteLLM)."""
+
+    def test_minimax_claude_via_anthropic_messages(self):
+        agent = _make_agent(
+            provider="custom",
+            base_url="https://api.minimax.io/anthropic",
+            api_mode="anthropic_messages",
+            model="claude-sonnet-4-6",
+        )
+        should, native = agent._anthropic_prompt_cache_policy()
+        assert should is True, "Third-party Anthropic gateway with Claude must cache"
+        assert native is True, "Third-party Anthropic gateway uses native cache_control layout"
+
+    def test_third_party_without_claude_name_does_not_cache(self):
+        # A provider exposing e.g. GLM via anthropic_messages transport — we
+        # don't know whether it supports cache_control, so stay conservative.
+        agent = _make_agent(
+            provider="custom",
+            base_url="https://api.minimax.io/anthropic",
+            api_mode="anthropic_messages",
+            model="minimax-m2.7",
+        )
+        assert agent._anthropic_prompt_cache_policy() == (False, False)
+
+
+class TestOpenAIWireFormatOnCustomProvider:
+    """A custom provider using chat_completions (OpenAI wire) should NOT get caching."""
+
+    def test_custom_openai_wire_does_not_cache_even_with_claude_name(self):
+        # This is the blocklist risk #9621 failed to avoid: sending
+        # cache_control fields in OpenAI-wire JSON can trip strict providers
+        # that reject unknown keys.  Stay off unless the transport is
+        # explicitly anthropic_messages or the aggregator is OpenRouter.
+        agent = _make_agent(
+            provider="custom",
+            base_url="https://api.fireworks.ai/inference/v1",
+            api_mode="chat_completions",
+            model="claude-sonnet-4",
+        )
+        assert agent._anthropic_prompt_cache_policy() == (False, False)
+
+
+class TestQwenAlibabaFamily:
+    """Qwen on OpenCode/OpenCode-Go/Alibaba — needs cache_control even on OpenAI-wire.
+
+    Upstream pi-mono #3392 / #3393 documented that these providers serve
+    zero cache hits without Anthropic-style markers. Regression reported
+    by community user (Qwen3.6 on opencode-go burning through
+    subscription with no cache). Envelope layout, not native, because the
+    wire format is OpenAI chat.completions.
+    """
+
+    def test_qwen_on_opencode_go_caches_with_envelope_layout(self):
+        agent = _make_agent(
+            provider="opencode-go",
+            base_url="https://opencode.ai/v1",
+            api_mode="chat_completions",
+            model="qwen3.6-plus",
+        )
+        should, native = agent._anthropic_prompt_cache_policy()
+        assert should is True, "Qwen on opencode-go must cache"
+        assert native is False, "opencode-go is OpenAI-wire; envelope layout"
+
+    def test_qwen35_plus_on_opencode_go(self):
+        agent = _make_agent(
+            provider="opencode-go",
+            base_url="https://opencode.ai/v1",
+            api_mode="chat_completions",
+            model="qwen3.5-plus",
+        )
+        assert agent._anthropic_prompt_cache_policy() == (True, False)
+
+    def test_qwen_on_opencode_zen_caches(self):
+        agent = _make_agent(
+            provider="opencode",
+            base_url="https://opencode.ai/v1",
+            api_mode="chat_completions",
+            model="qwen3-coder-plus",
+        )
+        assert agent._anthropic_prompt_cache_policy() == (True, False)
+
+    def test_qwen_on_direct_alibaba_caches(self):
+        agent = _make_agent(
+            provider="alibaba",
+            base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
+            api_mode="chat_completions",
+            model="qwen3-coder",
+        )
+        assert agent._anthropic_prompt_cache_policy() == (True, False)
+
+    def test_non_qwen_on_opencode_go_does_not_cache(self):
+        # GLM / Kimi on opencode-go don't need markers (they have automatic
+        # server-side caching or none at all).
+        agent = _make_agent(
+            provider="opencode-go",
+            base_url="https://opencode.ai/v1",
+            api_mode="chat_completions",
+            model="glm-5",
+        )
+        assert agent._anthropic_prompt_cache_policy() == (False, False)
+
+    def test_kimi_on_opencode_go_does_not_cache(self):
+        agent = _make_agent(
+            provider="opencode-go",
+            base_url="https://opencode.ai/v1",
+            api_mode="chat_completions",
+            model="kimi-k2.5",
+        )
+        assert agent._anthropic_prompt_cache_policy() == (False, False)
+
+    def test_qwen_on_openrouter_not_affected(self):
+        # Qwen via OpenRouter falls through — OpenRouter has its own
+        # upstream caching arrangement for Qwen (provider-dependent).
+        agent = _make_agent(
+            provider="openrouter",
+            base_url="https://openrouter.ai/api/v1",
+            api_mode="chat_completions",
+            model="qwen/qwen3-coder",
+        )
+        assert agent._anthropic_prompt_cache_policy() == (False, False)
+
+
+class TestExplicitOverrides:
+    """Policy accepts keyword overrides for switch_model / fallback activation."""
+
+    def test_overrides_take_precedence_over_self(self):
+        agent = _make_agent(
+            provider="openrouter",
+            base_url="https://openrouter.ai/api/v1",
+            api_mode="chat_completions",
+            model="openai/gpt-5.4",
+        )
+        # Simulate switch_model evaluating cache policy for a Claude target
+        # before self.model is mutated.
+        should, native = agent._anthropic_prompt_cache_policy(
+            model="anthropic/claude-sonnet-4.6",
+        )
+        assert (should, native) == (True, False)
+
+    def test_fallback_target_evaluated_independently(self):
+        # Starting on native Anthropic but falling back to OpenRouter.
+        agent = _make_agent(
+            provider="anthropic",
+            base_url="https://api.anthropic.com",
+            api_mode="anthropic_messages",
+            model="claude-opus-4.6",
+        )
+        should, native = agent._anthropic_prompt_cache_policy(
+            provider="openrouter",
+            base_url="https://openrouter.ai/api/v1",
+            api_mode="chat_completions",
+            model="anthropic/claude-sonnet-4.6",
+        )
+        assert (should, native) == (True, False)
diff --git a/tests/run_agent/test_anthropic_third_party_oauth_guard.py b/tests/run_agent/test_anthropic_third_party_oauth_guard.py
new file mode 100644
index 0000000000..b45190daab
--- /dev/null
+++ b/tests/run_agent/test_anthropic_third_party_oauth_guard.py
@@ -0,0 +1,182 @@
+"""Tests for ``_is_anthropic_oauth`` guard against third-party Anthropic-compatible providers.
+
+The invariant: ``self._is_anthropic_oauth`` must only ever be True when
+``self.provider == 'anthropic'`` (native Anthropic).  Third-party providers
+that speak the Anthropic protocol (MiniMax, Zhipu GLM, Alibaba DashScope,
+Kimi, LiteLLM proxies, etc.) must never trip OAuth code paths — doing so
+injects Claude-Code identity headers and system prompts that cause
+401/403 from those endpoints.
+
+This test class covers all FIVE sites that assign ``_is_anthropic_oauth``:
+
+1. ``AIAgent.__init__``                              (line ~1022)
+2. ``AIAgent.switch_model``                          (line ~1832)
+3. ``AIAgent._try_refresh_anthropic_client_credentials`` (line ~5335)
+4. ``AIAgent._swap_credential``                      (line ~5378)
+5. ``AIAgent._try_activate_fallback``                (line ~6536)
+"""
+
+from __future__ import annotations
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from run_agent import AIAgent
+
+
+# A plausible-looking OAuth token (``sk-ant-`` without the ``-api`` suffix).
+_OAUTH_LIKE_TOKEN = "sk-ant-oauth-example-1234567890abcdef"
+_API_KEY_TOKEN = "sk-ant-api-abcdef1234567890"
+
+
+@pytest.fixture
+def agent():
+    """Minimal AIAgent construction, skipping tool discovery."""
+    with (
+        patch("run_agent.get_tool_definitions", return_value=[]),
+        patch("run_agent.check_toolset_requirements", return_value={}),
+        patch("run_agent.OpenAI"),
+    ):
+        a = AIAgent(
+            api_key="test-key-1234567890",
+            base_url="https://openrouter.ai/api/v1",
+            quiet_mode=True,
+            skip_context_files=True,
+            skip_memory=True,
+        )
+        a.client = MagicMock()
+        return a
+
+
+class TestOAuthFlagOnRefresh:
+    """Site 3 — _try_refresh_anthropic_client_credentials."""
+
+    def test_third_party_provider_refresh_is_noop(self, agent):
+        """Refresh path returns False immediately when provider != anthropic — the
+        OAuth flag can never be mutated for third-party providers. Double-defended
+        by the per-assignment guard at line ~5393 so future refactors can't
+        reintroduce the bug."""
+        agent.api_mode = "anthropic_messages"
+        agent.provider = "minimax"          # ← third-party
+        agent._anthropic_api_key = "***"
+        agent._anthropic_client = MagicMock()
+        agent._is_anthropic_oauth = False
+
+        with (
+            patch("agent.anthropic_adapter.resolve_anthropic_token",
+                  return_value=_OAUTH_LIKE_TOKEN),
+            patch("agent.anthropic_adapter.build_anthropic_client",
+                  return_value=MagicMock()),
+        ):
+            result = agent._try_refresh_anthropic_client_credentials()
+
+        # The function short-circuits on non-anthropic providers.
+        assert result is False
+        # And the flag is untouched regardless.
+        assert agent._is_anthropic_oauth is False
+
+    def test_native_anthropic_preserves_existing_oauth_behaviour(self, agent):
+        """Regression: native anthropic with OAuth token still flips flag to True."""
+        agent.api_mode = "anthropic_messages"
+        agent.provider = "anthropic"
+        agent._anthropic_api_key = "***"
+        agent._anthropic_client = MagicMock()
+        agent._is_anthropic_oauth = False
+
+        with (
+            patch("agent.anthropic_adapter.resolve_anthropic_token",
+                  return_value=_OAUTH_LIKE_TOKEN),
+            patch("agent.anthropic_adapter.build_anthropic_client",
+                  return_value=MagicMock()),
+        ):
+            result = agent._try_refresh_anthropic_client_credentials()
+
+        assert result is True
+        assert agent._is_anthropic_oauth is True
+
+
+class TestOAuthFlagOnCredentialSwap:
+    """Site 4 — _swap_credential (credential pool rotation)."""
+
+    def test_pool_swap_on_third_party_never_flips_oauth(self, agent):
+        agent.api_mode = "anthropic_messages"
+        agent.provider = "glm"              # ← Zhipu GLM via /anthropic
+        agent._anthropic_api_key = "old-key"
+        agent._anthropic_base_url = "https://open.bigmodel.cn/api/anthropic"
+        agent._anthropic_client = MagicMock()
+        agent._is_anthropic_oauth = False
+
+        entry = MagicMock()
+        entry.runtime_api_key = _OAUTH_LIKE_TOKEN
+        entry.runtime_base_url = "https://open.bigmodel.cn/api/anthropic"
+
+        with patch("agent.anthropic_adapter.build_anthropic_client",
+                   return_value=MagicMock()):
+            agent._swap_credential(entry)
+
+        assert agent._is_anthropic_oauth is False
+
+
+class TestOAuthFlagOnConstruction:
+    """Site 1 — AIAgent.__init__ on a third-party anthropic_messages provider."""
+
+    def test_minimax_init_does_not_flip_oauth(self):
+        with (
+            patch("run_agent.get_tool_definitions", return_value=[]),
+            patch("run_agent.check_toolset_requirements", return_value={}),
+            patch("agent.anthropic_adapter.build_anthropic_client",
+                  return_value=MagicMock()),
+            # Simulate a stale ANTHROPIC_TOKEN in the env — the init code
+            # MUST NOT fall back to it when provider != anthropic.
+            patch("agent.anthropic_adapter.resolve_anthropic_token",
+                  return_value=_OAUTH_LIKE_TOKEN),
+        ):
+            agent = AIAgent(
+                api_key="minimax-key-1234",
+                base_url="https://api.minimax.io/anthropic",
+                provider="minimax",
+                api_mode="anthropic_messages",
+                model="claude-sonnet-4-6",
+                quiet_mode=True,
+                skip_context_files=True,
+                skip_memory=True,
+            )
+
+        # The effective key should be the explicit minimax-key, not the
+        # stale Anthropic OAuth token, and the OAuth flag must be False.
+        assert agent._anthropic_api_key == "minimax-key-1234"
+        assert agent._is_anthropic_oauth is False
+
+
+class TestOAuthFlagOnFallbackActivation:
+    """Site 5 — _try_activate_fallback targeting a third-party Anthropic endpoint."""
+
+    def test_fallback_to_third_party_does_not_flip_oauth(self, agent):
+        """Directly mimic the post-fallback assignment at line ~6537."""
+        from agent.anthropic_adapter import _is_oauth_token
+
+        # Emulate the relevant lines of _try_activate_fallback without
+        # running the entire recovery stack (which pulls in streaming,
+        # sessions, etc.).
+        fb_provider = "minimax"
+        effective_key = _OAUTH_LIKE_TOKEN
+        agent._is_anthropic_oauth = (
+            _is_oauth_token(effective_key) if fb_provider == "anthropic" else False
+        )
+        assert agent._is_anthropic_oauth is False
+
+
+class TestApiKeyTokensAlwaysSafe:
+    """Regression: plain API-key shapes must always resolve to non-OAuth, any provider."""
+
+    def test_native_anthropic_with_api_key_token(self):
+        from agent.anthropic_adapter import _is_oauth_token
+        assert _is_oauth_token(_API_KEY_TOKEN) is False
+
+    def test_third_party_key_shape(self):
+        from agent.anthropic_adapter import _is_oauth_token
+        # Third-party key shapes (MiniMax 'mxp-...', GLM 'glm.sess.', etc.)
+        # already return False from _is_oauth_token; the guard adds a second
+        # defense line in case future token formats accidentally look OAuth-y.
+        assert _is_oauth_token("mxp-abcdef123") is False
diff --git a/tests/run_agent/test_anthropic_truncation_continuation.py b/tests/run_agent/test_anthropic_truncation_continuation.py
new file mode 100644
index 0000000000..d109ccf583
--- /dev/null
+++ b/tests/run_agent/test_anthropic_truncation_continuation.py
@@ -0,0 +1,114 @@
+"""Regression test for anthropic_messages truncation continuation.
+
+When an Anthropic response hits ``stop_reason: max_tokens`` (mapped to
+``finish_reason == 'length'`` in run_agent), the agent must retry with
+a continuation prompt — the same behavior it has always had for
+chat_completions and bedrock_converse.  Before this PR, the
+``if self.api_mode in ('chat_completions', 'bedrock_converse'):`` guard
+silently dropped Anthropic-wire truncations on the floor, returning a
+half-finished response with no retry.
+
+We don't exercise the full agent loop here (it's 3000 lines of inference,
+streaming, plugin hooks, etc.) — instead we verify the normalization
+adapter produces exactly the shape the continuation block now consumes.
+"""
+
+from __future__ import annotations
+
+from types import SimpleNamespace
+
+import pytest
+
+
+def _make_anthropic_text_block(text: str) -> SimpleNamespace:
+    return SimpleNamespace(type="text", text=text)
+
+
+def _make_anthropic_tool_use_block(name: str = "my_tool") -> SimpleNamespace:
+    return SimpleNamespace(
+        type="tool_use",
+        id="toolu_01",
+        name=name,
+        input={"foo": "bar"},
+    )
+
+
+def _make_anthropic_response(blocks, stop_reason: str = "max_tokens"):
+    return SimpleNamespace(
+        id="msg_01",
+        type="message",
+        role="assistant",
+        model="claude-sonnet-4-6",
+        content=blocks,
+        stop_reason=stop_reason,
+        stop_sequence=None,
+        usage=SimpleNamespace(input_tokens=100, output_tokens=200),
+    )
+
+
+class TestTruncatedAnthropicResponseNormalization:
+    """normalize_anthropic_response() gives us the shape _build_assistant_message expects."""
+
+    def test_text_only_truncation_produces_text_content_no_tool_calls(self):
+        """Pure-text Anthropic truncation → continuation path should fire."""
+        from agent.anthropic_adapter import normalize_anthropic_response
+
+        response = _make_anthropic_response(
+            [_make_anthropic_text_block("partial response that was cut off")]
+        )
+        msg, finish = normalize_anthropic_response(response)
+
+        # The continuation block checks these two attributes:
+        #   assistant_message.content  → appended to truncated_response_prefix
+        #   assistant_message.tool_calls → guards the text-retry branch
+        assert msg.content is not None
+        assert "partial response" in msg.content
+        assert not msg.tool_calls, (
+            "Pure-text truncation must have no tool_calls so the text-continuation "
+            "branch (not the tool-retry branch) fires"
+        )
+        assert finish == "length", "max_tokens stop_reason must map to OpenAI-style 'length'"
+
+    def test_truncated_tool_call_produces_tool_calls(self):
+        """Tool-use truncation → tool-call retry path should fire."""
+        from agent.anthropic_adapter import normalize_anthropic_response
+
+        response = _make_anthropic_response(
+            [
+                _make_anthropic_text_block("thinking..."),
+                _make_anthropic_tool_use_block(),
+            ]
+        )
+        msg, finish = normalize_anthropic_response(response)
+
+        assert bool(msg.tool_calls), (
+            "Truncation mid-tool_use must expose tool_calls so the "
+            "tool-call retry branch fires instead of text continuation"
+        )
+        assert finish == "length"
+
+    def test_empty_content_does_not_crash(self):
+        """Empty response.content — defensive: treat as a truncation with no text."""
+        from agent.anthropic_adapter import normalize_anthropic_response
+
+        response = _make_anthropic_response([])
+        msg, finish = normalize_anthropic_response(response)
+        # Depending on the adapter, content may be "" or None — both are
+        # acceptable; what matters is no exception.
+        assert msg is not None
+        assert not msg.tool_calls
+
+
+class TestContinuationLogicBranching:
+    """Symbolic check that the api_mode gate now includes anthropic_messages."""
+
+    @pytest.mark.parametrize("api_mode", ["chat_completions", "bedrock_converse", "anthropic_messages"])
+    def test_all_three_api_modes_hit_continuation_branch(self, api_mode):
+        # The guard in run_agent.py is:
+        #   if self.api_mode in ("chat_completions", "bedrock_converse", "anthropic_messages"):
+        assert api_mode in ("chat_completions", "bedrock_converse", "anthropic_messages")
+
+    def test_codex_responses_still_excluded(self):
+        # codex_responses has its own truncation path (not continuation-based)
+        # and should NOT be routed through the shared block.
+        assert "codex_responses" not in ("chat_completions", "bedrock_converse", "anthropic_messages")
diff --git a/tests/run_agent/test_compression_feasibility.py b/tests/run_agent/test_compression_feasibility.py
index 0756fcda6a..25dc0c01ab 100644
--- a/tests/run_agent/test_compression_feasibility.py
+++ b/tests/run_agent/test_compression_feasibility.py
@@ -10,6 +10,8 @@ Two-phase design:
 
 from unittest.mock import MagicMock, patch
 
+import pytest
+
 from run_agent import AIAgent
 from agent.context_compressor import ContextCompressor
 
@@ -38,7 +40,7 @@ def _make_agent(
     agent.status_callback = None
     agent.tool_progress_callback = None
     agent._compression_warning = None
-    agent.config = None
+    agent._aux_compression_context_length_config = None
 
     compressor = MagicMock(spec=ContextCompressor)
     compressor.context_length = main_context
@@ -51,12 +53,13 @@ def _make_agent(
 # ── Core warning logic ──────────────────────────────────────────────
 
 
-@patch("agent.model_metadata.get_model_context_length", return_value=32_768)
+@patch("agent.model_metadata.get_model_context_length", return_value=80_000)
 @patch("agent.auxiliary_client.get_text_auxiliary_client")
-def test_warns_when_aux_context_below_threshold(mock_get_client, mock_ctx_len):
-    """Warning emitted when aux model context < main model threshold."""
+def test_auto_corrects_threshold_when_aux_context_below_threshold(mock_get_client, mock_ctx_len):
+    """Auto-correction: aux >= 64K floor but < threshold → lower threshold
+    to aux_context so compression still works this session."""
     agent = _make_agent(main_context=200_000, threshold_percent=0.50)
-    # threshold = 100,000 — aux has only 32,768
+    # threshold = 100,000 — aux has 80,000 (above 64K floor, below threshold)
     mock_client = MagicMock()
     mock_client.base_url = "https://openrouter.ai/api/v1"
     mock_client.api_key = "sk-aux"
@@ -69,16 +72,41 @@ def test_warns_when_aux_context_below_threshold(mock_get_client, mock_ctx_len):
 
     assert len(messages) == 1
     assert "Compression model" in messages[0]
-    assert "32,768" in messages[0]
-    assert "100,000" in messages[0]
-    assert "will not be possible" in messages[0]
-    # Actionable fix guidance included
-    assert "Fix options" in messages[0]
+    assert "80,000" in messages[0]        # aux context
+    assert "100,000" in messages[0]       # old threshold
+    assert "Auto-lowered" in messages[0]
+    # Actionable persistence guidance included
+    assert "config.yaml" in messages[0]
     assert "auxiliary:" in messages[0]
     assert "compression:" in messages[0]
     assert "threshold:" in messages[0]
     # Warning stored for gateway replay
     assert agent._compression_warning is not None
+    # Threshold on the live compressor was actually lowered
+    assert agent.context_compressor.threshold_tokens == 80_000
+
+
+@patch("agent.model_metadata.get_model_context_length", return_value=32_768)
+@patch("agent.auxiliary_client.get_text_auxiliary_client")
+def test_rejects_aux_below_minimum_context(mock_get_client, mock_ctx_len):
+    """Hard floor: aux context < MINIMUM_CONTEXT_LENGTH (64K) → session
+    refuses to start (ValueError), mirroring the main-model rejection."""
+    agent = _make_agent(main_context=200_000, threshold_percent=0.50)
+    mock_client = MagicMock()
+    mock_client.base_url = "https://openrouter.ai/api/v1"
+    mock_client.api_key = "sk-aux"
+    mock_get_client.return_value = (mock_client, "tiny-aux-model")
+
+    agent._emit_status = lambda msg: None
+
+    with pytest.raises(ValueError) as exc_info:
+        agent._check_compression_model_feasibility()
+
+    err = str(exc_info.value)
+    assert "tiny-aux-model" in err
+    assert "32,768" in err
+    assert "64,000" in err
+    assert "below the minimum" in err
 
 
 @patch("agent.model_metadata.get_model_context_length", return_value=200_000)
@@ -138,13 +166,7 @@ def test_feasibility_check_passes_config_context_length(mock_get_client, mock_ct
     get_model_context_length so custom endpoints that lack /models still
     report the correct context window (fixes #8499)."""
     agent = _make_agent(main_context=200_000, threshold_percent=0.85)
-    agent.config = {
-        "auxiliary": {
-            "compression": {
-                "context_length": 1_000_000,
-            },
-        },
-    }
+    agent._aux_compression_context_length_config = 1_000_000
     mock_client = MagicMock()
     mock_client.base_url = "http://custom-endpoint:8080/v1"
     mock_client.api_key = "sk-custom"
@@ -166,13 +188,7 @@ def test_feasibility_check_passes_config_context_length(mock_get_client, mock_ct
 def test_feasibility_check_ignores_invalid_context_length(mock_get_client, mock_ctx_len):
     """Non-integer context_length in config is silently ignored."""
     agent = _make_agent(main_context=200_000, threshold_percent=0.50)
-    agent.config = {
-        "auxiliary": {
-            "compression": {
-                "context_length": "not-a-number",
-            },
-        },
-    }
+    agent._aux_compression_context_length_config = None
     mock_client = MagicMock()
     mock_client.base_url = "http://custom:8080/v1"
     mock_client.api_key = "sk-test"
@@ -189,6 +205,58 @@ def test_feasibility_check_ignores_invalid_context_length(mock_get_client, mock_
     )
 
 
+def test_init_feasibility_check_uses_aux_context_override_from_config():
+    """Real AIAgent init should cache and forward auxiliary.compression.context_length."""
+
+    class _StubCompressor:
+        def __init__(self, *args, **kwargs):
+            self.context_length = 200_000
+            self.threshold_tokens = 100_000
+            self.threshold_percent = 0.50
+
+        def get_tool_schemas(self):
+            return []
+
+        def on_session_start(self, *args, **kwargs):
+            return None
+
+    cfg = {
+        "auxiliary": {
+            "compression": {
+                "context_length": 1_000_000,
+            },
+        },
+    }
+    mock_client = MagicMock()
+    mock_client.base_url = "http://custom-endpoint:8080/v1"
+    mock_client.api_key = "sk-custom"
+
+    with (
+        patch("hermes_cli.config.load_config", return_value=cfg),
+        patch("run_agent.get_tool_definitions", return_value=[]),
+        patch("run_agent.check_toolset_requirements", return_value={}),
+        patch("run_agent.OpenAI"),
+        patch("run_agent.ContextCompressor", new=_StubCompressor),
+        patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(mock_client, "custom/big-model")),
+        patch("agent.model_metadata.get_model_context_length", return_value=1_000_000) as mock_ctx_len,
+    ):
+        agent = AIAgent(
+            api_key="test-key-1234567890",
+            base_url="https://openrouter.ai/api/v1",
+            quiet_mode=True,
+            skip_context_files=True,
+            skip_memory=True,
+        )
+
+    assert agent._aux_compression_context_length_config == 1_000_000
+    mock_ctx_len.assert_called_once_with(
+        "custom/big-model",
+        base_url="http://custom-endpoint:8080/v1",
+        api_key="sk-custom",
+        config_context_length=1_000_000,
+    )
+
+
 @patch("agent.auxiliary_client.get_text_auxiliary_client")
 def test_warns_when_no_auxiliary_provider(mock_get_client):
     """Warning emitted when no auxiliary provider is configured."""
@@ -254,8 +322,9 @@ def test_exact_threshold_boundary_no_warning(mock_get_client, mock_ctx_len):
 
 @patch("agent.model_metadata.get_model_context_length", return_value=99_999)
 @patch("agent.auxiliary_client.get_text_auxiliary_client")
-def test_just_below_threshold_warns(mock_get_client, mock_ctx_len):
-    """Warning fires when aux context is one token below the threshold."""
+def test_just_below_threshold_auto_corrects(mock_get_client, mock_ctx_len):
+    """Auto-correct fires when aux context is one token below the threshold
+    (and above the 64K hard floor)."""
     agent = _make_agent(main_context=200_000, threshold_percent=0.50)
     mock_client = MagicMock()
     mock_client.base_url = "https://openrouter.ai/api/v1"
@@ -269,12 +338,14 @@ def test_just_below_threshold_warns(mock_get_client, mock_ctx_len):
 
     assert len(messages) == 1
     assert "small-model" in messages[0]
+    assert "Auto-lowered" in messages[0]
+    assert agent.context_compressor.threshold_tokens == 99_999
 
 
 # ── Two-phase: __init__ + run_conversation replay ───────────────────
 
 
-@patch("agent.model_metadata.get_model_context_length", return_value=32_768)
+@patch("agent.model_metadata.get_model_context_length", return_value=80_000)
 @patch("agent.auxiliary_client.get_text_auxiliary_client")
 def test_warning_stored_for_gateway_replay(mock_get_client, mock_ctx_len):
     """__init__ stores the warning; _replay sends it through status_callback."""
@@ -298,7 +369,7 @@ def test_warning_stored_for_gateway_replay(mock_get_client, mock_ctx_len):
     agent._replay_compression_warning()
 
     assert any(
-        ev == "lifecycle" and "will not be possible" in msg
+        ev == "lifecycle" and "Auto-lowered" in msg
         for ev, msg in callback_events
     )
 
@@ -335,7 +406,7 @@ def test_replay_without_callback_is_noop():
     agent._replay_compression_warning()
 
 
-@patch("agent.model_metadata.get_model_context_length", return_value=32_768)
+@patch("agent.model_metadata.get_model_context_length", return_value=80_000)
 @patch("agent.auxiliary_client.get_text_auxiliary_client")
 def test_run_conversation_clears_warning_after_replay(mock_get_client, mock_ctx_len):
     """After replay in run_conversation, _compression_warning is cleared
diff --git a/tests/run_agent/test_compression_trigger_excludes_reasoning.py b/tests/run_agent/test_compression_trigger_excludes_reasoning.py
new file mode 100644
index 0000000000..24fe2868fc
--- /dev/null
+++ b/tests/run_agent/test_compression_trigger_excludes_reasoning.py
@@ -0,0 +1,61 @@
+"""Verify compression trigger excludes reasoning/completion tokens (#12026).
+
+Thinking models (GLM-5.1, QwQ, DeepSeek R1) inflate completion_tokens with
+reasoning tokens that don't consume context window space.  The compression
+trigger must use only prompt_tokens so sessions aren't prematurely split.
+"""
+
+import types
+import pytest
+from unittest.mock import MagicMock, patch
+
+
+def _make_agent_stub(prompt_tokens, completion_tokens, threshold_tokens):
+    """Create a minimal stub that exercises the compression check path."""
+    compressor = types.SimpleNamespace(
+        last_prompt_tokens=prompt_tokens,
+        last_completion_tokens=completion_tokens,
+        threshold_tokens=threshold_tokens,
+    )
+    # Replicate the fixed logic from run_agent.py ~line 11273
+    if compressor.last_prompt_tokens > 0:
+        real_tokens = compressor.last_prompt_tokens  # Fixed: no completion
+    else:
+        real_tokens = 0
+    return real_tokens, compressor
+
+
+class TestCompressionTriggerExcludesReasoning:
+    def test_high_reasoning_tokens_should_not_trigger_compression(self):
+        """With the old bug, 40k prompt + 80k reasoning = 120k > 100k threshold.
+        After the fix, only 40k prompt is compared — no compression."""
+        real_tokens, comp = _make_agent_stub(
+            prompt_tokens=40_000,
+            completion_tokens=80_000,  # reasoning-heavy model
+            threshold_tokens=100_000,
+        )
+        assert real_tokens == 40_000
+        assert real_tokens < comp.threshold_tokens, (
+            "Should NOT trigger compression — only prompt tokens matter"
+        )
+
+    def test_high_prompt_tokens_should_trigger_compression(self):
+        """When prompt tokens genuinely exceed the threshold, compress."""
+        real_tokens, comp = _make_agent_stub(
+            prompt_tokens=110_000,
+            completion_tokens=5_000,
+            threshold_tokens=100_000,
+        )
+        assert real_tokens == 110_000
+        assert real_tokens >= comp.threshold_tokens, (
+            "Should trigger compression — prompt tokens exceed threshold"
+        )
+
+    def test_zero_prompt_tokens_falls_back(self):
+        """When provider returns 0 prompt tokens, real_tokens is 0 (fallback path)."""
+        real_tokens, _ = _make_agent_stub(
+            prompt_tokens=0,
+            completion_tokens=50_000,
+            threshold_tokens=100_000,
+        )
+        assert real_tokens == 0
diff --git a/tests/run_agent/test_concurrent_interrupt.py b/tests/run_agent/test_concurrent_interrupt.py
index e5d8b88e72..4cb858b121 100644
--- a/tests/run_agent/test_concurrent_interrupt.py
+++ b/tests/run_agent/test_concurrent_interrupt.py
@@ -77,6 +77,10 @@ def _make_agent(monkeypatch):
     stub._execute_tool_calls_concurrent = _ra.AIAgent._execute_tool_calls_concurrent.__get__(stub)
     stub.interrupt = _ra.AIAgent.interrupt.__get__(stub)
     stub.clear_interrupt = _ra.AIAgent.clear_interrupt.__get__(stub)
+    # /steer injection (added in PR #12116) fires after every concurrent
+    # tool batch. Stub it as a no-op — this test exercises interrupt
+    # fanout, not steer injection.
+    stub._apply_pending_steer_to_tool_results = lambda *a, **kw: None
     stub._invoke_tool = MagicMock(side_effect=lambda *a, **kw: '{"ok": true}')
     return stub
 
diff --git a/tests/run_agent/test_context_token_tracking.py b/tests/run_agent/test_context_token_tracking.py
index 6800a2b491..772dfa89b1 100644
--- a/tests/run_agent/test_context_token_tracking.py
+++ b/tests/run_agent/test_context_token_tracking.py
@@ -40,7 +40,7 @@ class _FakeOpenAIClient:
 def _make_agent(monkeypatch, api_mode, provider, response_fn):
     _patch_bootstrap(monkeypatch)
     if api_mode == "anthropic_messages":
-        monkeypatch.setattr("agent.anthropic_adapter.build_anthropic_client", lambda k, b=None: _FakeAnthropicClient())
+        monkeypatch.setattr("agent.anthropic_adapter.build_anthropic_client", lambda k, b=None, **kwargs: _FakeAnthropicClient())
     if provider == "openai-codex":
         monkeypatch.setattr(
             "agent.auxiliary_client.resolve_provider_client",
diff --git a/tests/run_agent/test_create_openai_client_proxy_env.py b/tests/run_agent/test_create_openai_client_proxy_env.py
new file mode 100644
index 0000000000..9ef8e3dcd1
--- /dev/null
+++ b/tests/run_agent/test_create_openai_client_proxy_env.py
@@ -0,0 +1,145 @@
+"""Regression guard: _create_openai_client must honor HTTP(S)_PROXY env vars.
+
+When #11277 re-landed TCP keepalives, ``_create_openai_client`` began passing
+a custom ``transport=httpx.HTTPTransport(...)`` to ``httpx.Client``. httpx only
+auto-reads ``HTTP_PROXY`` / ``HTTPS_PROXY`` / ``ALL_PROXY`` when
+``transport is None`` (see ``Client.__init__``:
+``allow_env_proxies = trust_env and transport is None``). As a result, proxy
+env vars were silently ignored for the primary chat client, causing requests
+to bypass local proxies (Clash, corporate egress, etc.) and hit upstream
+directly from the raw interface.
+
+For users on WSL2 + Clash TUN this surfaced as Cloudflare ``cf-mitigated:
+challenge`` 403s against ``chatgpt.com/backend-api/codex`` once they upgraded
+past #11277. The fix forwards the proxy URL explicitly to ``httpx.Client``
+while keeping the keepalive-enabled transport in place.
+
+This test pins that the constructed ``httpx.Client`` mounts an ``HTTPProxy``
+pool when a proxy env var is set, AND that the socket-level keepalive
+transport is still installed on the no-proxy default path.
+"""
+from unittest.mock import patch
+
+import httpx
+
+from run_agent import AIAgent, _get_proxy_from_env
+
+
+def _make_agent():
+    return AIAgent(
+        api_key="test-key",
+        base_url="https://chatgpt.com/backend-api/codex",
+        provider="openai-codex",
+        model="gpt-5.4",
+        quiet_mode=True,
+        skip_context_files=True,
+        skip_memory=True,
+    )
+
+
+def _extract_http_client(client_kwargs: dict):
+    """_create_openai_client calls ``OpenAI(**client_kwargs)``; grab the injected client."""
+    return client_kwargs.get("http_client")
+
+
+def test_get_proxy_from_env_prefers_https_then_http_then_all(monkeypatch):
+    for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
+                "https_proxy", "http_proxy", "all_proxy"):
+        monkeypatch.delenv(key, raising=False)
+    assert _get_proxy_from_env() is None
+
+    monkeypatch.setenv("ALL_PROXY", "http://all:1")
+    assert _get_proxy_from_env() == "http://all:1"
+
+    monkeypatch.setenv("HTTP_PROXY", "http://http:2")
+    assert _get_proxy_from_env() == "http://http:2"
+
+    monkeypatch.setenv("HTTPS_PROXY", "http://https:3")
+    assert _get_proxy_from_env() == "http://https:3"
+
+
+def test_get_proxy_from_env_ignores_blank_values(monkeypatch):
+    for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
+                "https_proxy", "http_proxy", "all_proxy"):
+        monkeypatch.delenv(key, raising=False)
+    monkeypatch.setenv("HTTPS_PROXY", "   ")
+    monkeypatch.setenv("HTTP_PROXY", "http://real-proxy:8080")
+    assert _get_proxy_from_env() == "http://real-proxy:8080"
+
+
+def test_get_proxy_from_env_normalizes_socks_alias(monkeypatch):
+    for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
+                "https_proxy", "http_proxy", "all_proxy"):
+        monkeypatch.delenv(key, raising=False)
+    monkeypatch.setenv("ALL_PROXY", "socks://127.0.0.1:1080/")
+    assert _get_proxy_from_env() == "socks5://127.0.0.1:1080/"
+
+
+@patch("run_agent.OpenAI")
+def test_create_openai_client_routes_via_proxy_when_env_set(mock_openai, monkeypatch):
+    """With HTTPS_PROXY set, the custom httpx.Client must mount an HTTPProxy pool.
+
+    This is the WSL2 + Clash / corporate-egress case. Before the fix, the custom
+    transport suppressed httpx's env-proxy auto-detection, so requests bypassed
+    the proxy entirely.
+    """
+    for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
+                "https_proxy", "http_proxy", "all_proxy"):
+        monkeypatch.delenv(key, raising=False)
+    monkeypatch.setenv("HTTPS_PROXY", "http://127.0.0.1:7897")
+
+    agent = _make_agent()
+    kwargs = {
+        "api_key": "test-key",
+        "base_url": "https://chatgpt.com/backend-api/codex",
+    }
+    agent._create_openai_client(kwargs, reason="test", shared=False)
+
+    forwarded = mock_openai.call_args.kwargs
+    http_client = _extract_http_client(forwarded)
+    assert isinstance(http_client, httpx.Client), (
+        "Expected _create_openai_client to inject a keepalive-enabled "
+        "httpx.Client; got %r" % (http_client,)
+    )
+    # Verify a proxy mount exists. httpx Client(proxy=...) rewrites _mounts so
+    # the proxied pool (HTTPProxy) sits alongside the base transport.
+    proxied_pools = [
+        type(mount._pool).__name__
+        for mount in http_client._mounts.values()
+        if mount is not None and hasattr(mount, "_pool")
+    ]
+    assert "HTTPProxy" in proxied_pools, (
+        "Expected httpx.Client to route through HTTPProxy when HTTPS_PROXY is "
+        "set; found pools: %r" % (proxied_pools,)
+    )
+    http_client.close()
+
+
+@patch("run_agent.OpenAI")
+def test_create_openai_client_no_proxy_when_env_unset(mock_openai, monkeypatch):
+    """Without proxy env vars, the keepalive transport must still be installed
+    and no HTTPProxy mount should exist."""
+    for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
+                "https_proxy", "http_proxy", "all_proxy"):
+        monkeypatch.delenv(key, raising=False)
+
+    agent = _make_agent()
+    kwargs = {
+        "api_key": "test-key",
+        "base_url": "https://chatgpt.com/backend-api/codex",
+    }
+    agent._create_openai_client(kwargs, reason="test", shared=False)
+
+    forwarded = mock_openai.call_args.kwargs
+    http_client = _extract_http_client(forwarded)
+    assert isinstance(http_client, httpx.Client)
+    pool_types = [
+        type(mount._pool).__name__
+        for mount in http_client._mounts.values()
+        if mount is not None and hasattr(mount, "_pool")
+    ]
+    assert "HTTPProxy" not in pool_types, (
+        "No proxy env set but httpx.Client still mounted HTTPProxy; "
+        "pools were %r" % (pool_types,)
+    )
+    http_client.close()
diff --git a/tests/run_agent/test_interrupt_propagation.py b/tests/run_agent/test_interrupt_propagation.py
index ed1f21bfa1..9dd8ce327e 100644
--- a/tests/run_agent/test_interrupt_propagation.py
+++ b/tests/run_agent/test_interrupt_propagation.py
@@ -33,6 +33,11 @@ class TestInterruptPropagationToChild(unittest.TestCase):
         agent._active_children = []
         agent._active_children_lock = threading.Lock()
         agent.quiet_mode = True
+        # Provider/model/base_url are read by stale-timeout resolution paths;
+        # the specific values don't matter for interrupt tests.
+        agent.provider = "openrouter"
+        agent.model = "test/model"
+        agent._base_url = "http://localhost:1234"
         return agent
 
     def test_parent_interrupt_sets_child_flag(self):
diff --git a/tests/run_agent/test_memory_provider_init.py b/tests/run_agent/test_memory_provider_init.py
new file mode 100644
index 0000000000..89431db85d
--- /dev/null
+++ b/tests/run_agent/test_memory_provider_init.py
@@ -0,0 +1,39 @@
+"""Regression tests for memory provider selection during AIAgent init."""
+
+from types import SimpleNamespace
+from unittest.mock import patch
+
+
+def test_blank_memory_provider_does_not_auto_enable_honcho():
+    """Blank memory.provider should remain opt-out even if Honcho fallback looks configured."""
+    cfg = {"memory": {"provider": ""}, "agent": {}}
+    honcho_cfg = SimpleNamespace(enabled=True, api_key="stale-key", base_url=None)
+
+    with (
+        patch("hermes_cli.config.load_config", return_value=cfg),
+        patch("hermes_cli.config.save_config") as save_config,
+        patch(
+            "plugins.memory.honcho.client.HonchoClientConfig.from_global_config",
+            return_value=honcho_cfg,
+        ) as from_global_config,
+        patch("plugins.memory.load_memory_provider") as load_memory_provider,
+        patch("agent.model_metadata.get_model_context_length", return_value=204_800),
+        patch("run_agent.get_tool_definitions", return_value=[]),
+        patch("run_agent.check_toolset_requirements", return_value={}),
+        patch("run_agent.OpenAI"),
+    ):
+        from run_agent import AIAgent
+
+        agent = AIAgent(
+            api_key="test-key-1234567890",
+            base_url="https://openrouter.ai/api/v1",
+            quiet_mode=True,
+            skip_context_files=True,
+            skip_memory=False,
+        )
+
+    assert agent._memory_manager is None
+    from_global_config.assert_not_called()
+    load_memory_provider.assert_not_called()
+    save_config.assert_not_called()
+
diff --git a/tests/run_agent/test_provider_attribution_headers.py b/tests/run_agent/test_provider_attribution_headers.py
new file mode 100644
index 0000000000..a2c543ee74
--- /dev/null
+++ b/tests/run_agent/test_provider_attribution_headers.py
@@ -0,0 +1,65 @@
+"""Attribution default_headers applied per provider via base-URL detection.
+
+Mirrors the OpenRouter pattern for the Vercel AI Gateway so that
+referrerUrl / appName / User-Agent flow into gateway analytics.
+"""
+from unittest.mock import MagicMock, patch
+
+from run_agent import AIAgent
+
+
+@patch("run_agent.OpenAI")
+def test_openrouter_base_url_applies_or_headers(mock_openai):
+    mock_openai.return_value = MagicMock()
+    agent = AIAgent(
+        api_key="test-key",
+        base_url="https://openrouter.ai/api/v1",
+        model="test/model",
+        quiet_mode=True,
+        skip_context_files=True,
+        skip_memory=True,
+    )
+
+    agent._apply_client_headers_for_base_url("https://openrouter.ai/api/v1")
+
+    headers = agent._client_kwargs["default_headers"]
+    assert headers["HTTP-Referer"] == "https://hermes-agent.nousresearch.com"
+    assert headers["X-OpenRouter-Title"] == "Hermes Agent"
+
+
+@patch("run_agent.OpenAI")
+def test_ai_gateway_base_url_applies_attribution_headers(mock_openai):
+    mock_openai.return_value = MagicMock()
+    agent = AIAgent(
+        api_key="test-key",
+        base_url="https://openrouter.ai/api/v1",
+        model="test/model",
+        quiet_mode=True,
+        skip_context_files=True,
+        skip_memory=True,
+    )
+
+    agent._apply_client_headers_for_base_url("https://ai-gateway.vercel.sh/v1")
+
+    headers = agent._client_kwargs["default_headers"]
+    assert headers["HTTP-Referer"] == "https://hermes-agent.nousresearch.com"
+    assert headers["X-Title"] == "Hermes Agent"
+    assert headers["User-Agent"].startswith("HermesAgent/")
+
+
+@patch("run_agent.OpenAI")
+def test_unknown_base_url_clears_default_headers(mock_openai):
+    mock_openai.return_value = MagicMock()
+    agent = AIAgent(
+        api_key="test-key",
+        base_url="https://openrouter.ai/api/v1",
+        model="test/model",
+        quiet_mode=True,
+        skip_context_files=True,
+        skip_memory=True,
+    )
+    agent._client_kwargs["default_headers"] = {"X-Stale": "yes"}
+
+    agent._apply_client_headers_for_base_url("https://api.example.com/v1")
+
+    assert "default_headers" not in agent._client_kwargs
diff --git a/tests/run_agent/test_provider_parity.py b/tests/run_agent/test_provider_parity.py
index c415951e2f..f96dbf4212 100644
--- a/tests/run_agent/test_provider_parity.py
+++ b/tests/run_agent/test_provider_parity.py
@@ -12,6 +12,7 @@ from types import SimpleNamespace
 from unittest.mock import patch, MagicMock
 
 import pytest
+from agent.codex_responses_adapter import _chat_messages_to_responses_input, _normalize_codex_response, _preflight_codex_input_items
 
 sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
 sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
@@ -251,8 +252,12 @@ class TestBuildApiKwargsChatCompletionsServiceTier:
         assert "service_tier" not in kwargs
 
 
-class TestBuildApiKwargsKimiFixedTemperature:
-    def test_kimi_for_coding_forces_temperature_on_main_chat_path(self, monkeypatch):
+class TestBuildApiKwargsKimiNoTemperatureOverride:
+    def test_kimi_for_coding_omits_temperature(self, monkeypatch):
+        """Temperature should NOT be set client-side for Kimi models.
+
+        The Kimi gateway selects the correct temperature server-side.
+        """
         agent = _make_agent(
             monkeypatch,
             "kimi-coding",
@@ -261,7 +266,7 @@ class TestBuildApiKwargsKimiFixedTemperature:
         )
         messages = [{"role": "user", "content": "hi"}]
         kwargs = agent._build_api_kwargs(messages)
-        assert kwargs["temperature"] == 0.6
+        assert "temperature" not in kwargs
 
 
 class TestBuildApiKwargsAIGateway:
@@ -442,7 +447,7 @@ class TestChatMessagesToResponsesInput:
         agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
                             base_url="https://chatgpt.com/backend-api/codex")
         messages = [{"role": "user", "content": "hello"}]
-        items = agent._chat_messages_to_responses_input(messages)
+        items = _chat_messages_to_responses_input(messages)
         assert items == [{"role": "user", "content": "hello"}]
 
     def test_system_messages_filtered(self, monkeypatch):
@@ -452,7 +457,7 @@ class TestChatMessagesToResponsesInput:
             {"role": "system", "content": "be helpful"},
             {"role": "user", "content": "hello"},
         ]
-        items = agent._chat_messages_to_responses_input(messages)
+        items = _chat_messages_to_responses_input(messages)
         assert len(items) == 1
         assert items[0]["role"] == "user"
 
@@ -468,7 +473,7 @@ class TestChatMessagesToResponsesInput:
                 "function": {"name": "web_search", "arguments": '{"query": "test"}'},
             }],
         }]
-        items = agent._chat_messages_to_responses_input(messages)
+        items = _chat_messages_to_responses_input(messages)
         fc_items = [i for i in items if i.get("type") == "function_call"]
         assert len(fc_items) == 1
         assert fc_items[0]["name"] == "web_search"
@@ -478,7 +483,7 @@ class TestChatMessagesToResponsesInput:
         agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
                             base_url="https://chatgpt.com/backend-api/codex")
         messages = [{"role": "tool", "tool_call_id": "call_abc", "content": "result here"}]
-        items = agent._chat_messages_to_responses_input(messages)
+        items = _chat_messages_to_responses_input(messages)
         assert items[0]["type"] == "function_call_output"
         assert items[0]["call_id"] == "call_abc"
         assert items[0]["output"] == "result here"
@@ -498,7 +503,7 @@ class TestChatMessagesToResponsesInput:
             },
             {"role": "user", "content": "continue"},
         ]
-        items = agent._chat_messages_to_responses_input(messages)
+        items = _chat_messages_to_responses_input(messages)
         reasoning_items = [i for i in items if i.get("type") == "reasoning"]
         assert len(reasoning_items) == 1
         assert reasoning_items[0]["encrypted_content"] == "gAAAA_test_blob"
@@ -511,7 +516,7 @@ class TestChatMessagesToResponsesInput:
             {"role": "assistant", "content": "hi"},
             {"role": "user", "content": "hello"},
         ]
-        items = agent._chat_messages_to_responses_input(messages)
+        items = _chat_messages_to_responses_input(messages)
         reasoning_items = [i for i in items if i.get("type") == "reasoning"]
         assert len(reasoning_items) == 0
 
@@ -535,7 +540,7 @@ class TestNormalizeCodexResponse:
             ],
             status="completed",
         )
-        msg, reason = agent._normalize_codex_response(response)
+        msg, reason = _normalize_codex_response(response)
         assert msg.content == "Hello!"
         assert reason == "stop"
 
@@ -553,7 +558,7 @@ class TestNormalizeCodexResponse:
             ],
             status="completed",
         )
-        msg, reason = agent._normalize_codex_response(response)
+        msg, reason = _normalize_codex_response(response)
         assert msg.content == "42"
         assert "math" in msg.reasoning
         assert reason == "stop"
@@ -572,7 +577,7 @@ class TestNormalizeCodexResponse:
             ],
             status="completed",
         )
-        msg, reason = agent._normalize_codex_response(response)
+        msg, reason = _normalize_codex_response(response)
         assert msg.codex_reasoning_items is not None
         assert len(msg.codex_reasoning_items) == 1
         assert msg.codex_reasoning_items[0]["encrypted_content"] == "gAAAA_secret_blob_123"
@@ -588,7 +593,7 @@ class TestNormalizeCodexResponse:
             ],
             status="completed",
         )
-        msg, reason = agent._normalize_codex_response(response)
+        msg, reason = _normalize_codex_response(response)
         assert msg.codex_reasoning_items is None
 
     def test_tool_calls_extracted(self, monkeypatch):
@@ -601,7 +606,7 @@ class TestNormalizeCodexResponse:
             ],
             status="completed",
         )
-        msg, reason = agent._normalize_codex_response(response)
+        msg, reason = _normalize_codex_response(response)
         assert reason == "tool_calls"
         assert len(msg.tool_calls) == 1
         assert msg.tool_calls[0].function.name == "web_search"
@@ -817,7 +822,7 @@ class TestCodexReasoningPreflight:
              "summary": [{"type": "summary_text", "text": "Thinking about it"}]},
             {"role": "assistant", "content": "hi there"},
         ]
-        normalized = agent._preflight_codex_input_items(raw_input)
+        normalized = _preflight_codex_input_items(raw_input)
         reasoning_items = [i for i in normalized if i.get("type") == "reasoning"]
         assert len(reasoning_items) == 1
         assert reasoning_items[0]["encrypted_content"] == "abc123encrypted"
@@ -833,7 +838,7 @@ class TestCodexReasoningPreflight:
         raw_input = [
             {"type": "reasoning", "encrypted_content": "abc123"},
         ]
-        normalized = agent._preflight_codex_input_items(raw_input)
+        normalized = _preflight_codex_input_items(raw_input)
         assert len(normalized) == 1
         assert "id" not in normalized[0]
         assert normalized[0]["summary"] == []  # default empty summary
@@ -845,7 +850,7 @@ class TestCodexReasoningPreflight:
             {"type": "reasoning", "encrypted_content": ""},
             {"role": "user", "content": "hello"},
         ]
-        normalized = agent._preflight_codex_input_items(raw_input)
+        normalized = _preflight_codex_input_items(raw_input)
         reasoning_items = [i for i in normalized if i.get("type") == "reasoning"]
         assert len(reasoning_items) == 0
 
@@ -864,7 +869,7 @@ class TestCodexReasoningPreflight:
             },
             {"role": "user", "content": "follow up"},
         ]
-        items = agent._chat_messages_to_responses_input(messages)
+        items = _chat_messages_to_responses_input(messages)
         reasoning_items = [i for i in items if isinstance(i, dict) and i.get("type") == "reasoning"]
         assert len(reasoning_items) == 1
         assert reasoning_items[0]["encrypted_content"] == "enc123"
diff --git a/tests/run_agent/test_repair_tool_call_arguments.py b/tests/run_agent/test_repair_tool_call_arguments.py
new file mode 100644
index 0000000000..3b8d86d144
--- /dev/null
+++ b/tests/run_agent/test_repair_tool_call_arguments.py
@@ -0,0 +1,107 @@
+"""Tests for _repair_tool_call_arguments — malformed JSON repair pipeline."""
+
+import json
+import pytest
+
+from run_agent import _repair_tool_call_arguments
+
+
+class TestRepairToolCallArguments:
+    """Verify each repair stage in the pipeline."""
+
+    # -- Stage 1: empty / whitespace-only --
+
+    def test_empty_string_returns_empty_object(self):
+        assert _repair_tool_call_arguments("", "t") == "{}"
+
+    def test_whitespace_only_returns_empty_object(self):
+        assert _repair_tool_call_arguments("   \n\t  ", "t") == "{}"
+
+    def test_none_type_returns_empty_object(self):
+        """Non-string input (e.g. None from a broken model response)."""
+        assert _repair_tool_call_arguments(None, "t") == "{}"
+
+    # -- Stage 2: Python None literal --
+
+    def test_python_none_literal(self):
+        assert _repair_tool_call_arguments("None", "t") == "{}"
+
+    def test_python_none_with_whitespace(self):
+        assert _repair_tool_call_arguments("  None  ", "t") == "{}"
+
+    # -- Stage 3: trailing comma repair --
+
+    def test_trailing_comma_in_object(self):
+        result = _repair_tool_call_arguments('{"key": "value",}', "t")
+        assert json.loads(result) == {"key": "value"}
+
+    def test_trailing_comma_in_array(self):
+        result = _repair_tool_call_arguments('{"a": [1, 2,]}', "t")
+        parsed = json.loads(result)
+        assert parsed == {"a": [1, 2]}
+
+    def test_multiple_trailing_commas(self):
+        result = _repair_tool_call_arguments('{"a": 1, "b": 2,}', "t")
+        parsed = json.loads(result)
+        assert parsed["a"] == 1
+        assert parsed["b"] == 2
+
+    # -- Stage 4: unclosed brackets --
+
+    def test_unclosed_brace(self):
+        result = _repair_tool_call_arguments('{"key": "value"', "t")
+        parsed = json.loads(result)
+        assert parsed == {"key": "value"}
+
+    def test_unclosed_bracket_and_brace(self):
+        result = _repair_tool_call_arguments('{"a": [1, 2', "t")
+        # Bracket counting adds ']' then '}', producing {"a": [1, 2]}
+        # which is valid JSON.  But the naive count can't always recover
+        # complex nesting — verify we at least get valid JSON.
+        json.loads(result)
+
+    # -- Stage 5: excess closing delimiters --
+
+    def test_extra_closing_brace(self):
+        result = _repair_tool_call_arguments('{"key": "value"}}', "t")
+        parsed = json.loads(result)
+        assert parsed == {"key": "value"}
+
+    def test_extra_closing_bracket(self):
+        result = _repair_tool_call_arguments('{"a": [1]]}', "t")
+        # Should produce valid JSON
+        json.loads(result)
+
+    # -- Stage 6: last resort --
+
+    def test_unrepairable_garbage_returns_empty_object(self):
+        assert _repair_tool_call_arguments("totally not json", "t") == "{}"
+
+    def test_unrepairable_partial_returns_empty_object(self):
+        # Truncated in the middle of a string key — bracket closing won't help
+        assert _repair_tool_call_arguments('{"truncated": "val', "t") == "{}"
+
+    # -- Valid JSON passthrough (this path is via except, but still works) --
+
+    def test_already_valid_json_passes_through(self):
+        """When json.loads fails for a non-JSON reason (shouldn't normally
+        happen), but the repair pipeline still produces valid output."""
+        raw = '{"path": "/tmp/foo", "content": "hello"}'
+        result = _repair_tool_call_arguments(raw, "t")
+        parsed = json.loads(result)
+        assert parsed["path"] == "/tmp/foo"
+
+    # -- Combined repairs --
+
+    def test_trailing_comma_plus_unclosed_brace(self):
+        result = _repair_tool_call_arguments('{"a": 1, "b": 2,', "t")
+        # Trailing comma stripped first, then closing brace added.
+        # May or may not fully recover — verify valid JSON at minimum.
+        json.loads(result)
+
+    def test_real_world_glm_truncation(self):
+        """Simulates GLM-5.1 truncating mid-argument."""
+        raw = '{"command": "ls -la /tmp", "timeout": 30, "background":'
+        result = _repair_tool_call_arguments(raw, "terminal")
+        # Should at least be valid JSON, even if background is lost
+        json.loads(result)
diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py
index d30445cf45..fc252c7448 100644
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@@ -16,6 +16,7 @@ from types import SimpleNamespace
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
+from agent.codex_responses_adapter import _chat_messages_to_responses_input, _normalize_codex_response, _preflight_codex_input_items
 
 import run_agent
 from run_agent import AIAgent
@@ -918,6 +919,118 @@ class TestBuildApiKwargs:
         assert kwargs["messages"] is messages
         assert kwargs["timeout"] == 1800.0
 
+    def test_public_moonshot_kimi_k2_5_omits_temperature(self, agent):
+        """Kimi models should NOT have client-side temperature overrides.
+
+        The Kimi gateway selects the correct temperature server-side.
+        """
+        agent.base_url = "https://api.moonshot.ai/v1"
+        agent._base_url_lower = agent.base_url.lower()
+        agent.model = "kimi-k2.5"
+        messages = [{"role": "user", "content": "hi"}]
+
+        kwargs = agent._build_api_kwargs(messages)
+
+        assert "temperature" not in kwargs
+
+    def test_public_moonshot_cn_kimi_k2_5_omits_temperature(self, agent):
+        agent.base_url = "https://api.moonshot.cn/v1"
+        agent._base_url_lower = agent.base_url.lower()
+        agent.model = "kimi-k2.5"
+        messages = [{"role": "user", "content": "hi"}]
+
+        kwargs = agent._build_api_kwargs(messages)
+
+        assert "temperature" not in kwargs
+
+    def test_kimi_coding_endpoint_omits_temperature(self, agent):
+        agent.base_url = "https://api.kimi.com/coding/v1"
+        agent._base_url_lower = agent.base_url.lower()
+        agent.model = "kimi-k2.5"
+        messages = [{"role": "user", "content": "hi"}]
+
+        kwargs = agent._build_api_kwargs(messages)
+
+        assert "temperature" not in kwargs
+
+    def test_kimi_coding_endpoint_sends_max_tokens_and_reasoning(self, agent):
+        """Kimi endpoint should send max_tokens=32000 and reasoning_effort as
+        top-level params, matching Kimi CLI's default behavior."""
+        agent.base_url = "https://api.kimi.com/coding/v1"
+        agent._base_url_lower = agent.base_url.lower()
+        agent.model = "kimi-for-coding"
+        messages = [{"role": "user", "content": "hi"}]
+
+        kwargs = agent._build_api_kwargs(messages)
+
+        assert kwargs["max_tokens"] == 32000
+        assert kwargs["reasoning_effort"] == "medium"
+
+    def test_kimi_coding_endpoint_respects_custom_effort(self, agent):
+        """reasoning_effort should reflect reasoning_config.effort when set."""
+        agent.base_url = "https://api.kimi.com/coding/v1"
+        agent._base_url_lower = agent.base_url.lower()
+        agent.model = "kimi-for-coding"
+        agent.reasoning_config = {"enabled": True, "effort": "high"}
+        messages = [{"role": "user", "content": "hi"}]
+
+        kwargs = agent._build_api_kwargs(messages)
+
+        assert kwargs["reasoning_effort"] == "high"
+
+    def test_kimi_coding_endpoint_sends_thinking_extra_body(self, agent):
+        """Kimi endpoint should send extra_body.thinking={"type":"enabled"}
+        to activate reasoning mode, mirroring Kimi CLI's with_thinking()."""
+        agent.base_url = "https://api.kimi.com/coding/v1"
+        agent._base_url_lower = agent.base_url.lower()
+        agent.model = "kimi-for-coding"
+        messages = [{"role": "user", "content": "hi"}]
+
+        kwargs = agent._build_api_kwargs(messages)
+
+        assert kwargs["extra_body"]["thinking"] == {"type": "enabled"}
+
+    def test_kimi_coding_endpoint_disables_thinking(self, agent):
+        """When reasoning_config.enabled=False, thinking should be disabled
+        and reasoning_effort should be omitted entirely — mirroring Kimi
+        CLI's with_thinking("off") which maps to reasoning_effort=None."""
+        agent.base_url = "https://api.kimi.com/coding/v1"
+        agent._base_url_lower = agent.base_url.lower()
+        agent.model = "kimi-for-coding"
+        agent.reasoning_config = {"enabled": False}
+        messages = [{"role": "user", "content": "hi"}]
+
+        kwargs = agent._build_api_kwargs(messages)
+
+        assert kwargs["extra_body"]["thinking"] == {"type": "disabled"}
+        assert "reasoning_effort" not in kwargs
+
+    def test_moonshot_endpoint_sends_max_tokens_and_reasoning(self, agent):
+        """api.moonshot.ai should get the same Kimi-compatible params."""
+        agent.base_url = "https://api.moonshot.ai/v1"
+        agent._base_url_lower = agent.base_url.lower()
+        agent.model = "kimi-k2.5"
+        messages = [{"role": "user", "content": "hi"}]
+
+        kwargs = agent._build_api_kwargs(messages)
+
+        assert kwargs["max_tokens"] == 32000
+        assert kwargs["reasoning_effort"] == "medium"
+        assert kwargs["extra_body"]["thinking"] == {"type": "enabled"}
+
+    def test_moonshot_cn_endpoint_sends_max_tokens_and_reasoning(self, agent):
+        """api.moonshot.cn (China endpoint) should get the same params."""
+        agent.base_url = "https://api.moonshot.cn/v1"
+        agent._base_url_lower = agent.base_url.lower()
+        agent.model = "kimi-k2.5"
+        messages = [{"role": "user", "content": "hi"}]
+
+        kwargs = agent._build_api_kwargs(messages)
+
+        assert kwargs["max_tokens"] == 32000
+        assert kwargs["reasoning_effort"] == "medium"
+        assert kwargs["extra_body"]["thinking"] == {"type": "enabled"}
+
     def test_provider_preferences_injected(self, agent):
         agent.base_url = "https://openrouter.ai/api/v1"
         agent.providers_allowed = ["Anthropic"]
@@ -1103,6 +1216,15 @@ class TestBuildAssistantMessage:
         result = agent._build_assistant_message(msg, "stop")
         assert result["reasoning"] == "thinking"
 
+    def test_reasoning_content_preserved_separately(self, agent):
+        msg = _mock_assistant_msg(
+            content="answer",
+            reasoning="summary",
+            reasoning_content="provider scratchpad",
+        )
+        result = agent._build_assistant_message(msg, "stop")
+        assert result["reasoning_content"] == "provider scratchpad"
+
     def test_with_tool_calls(self, agent):
         tc = _mock_tool_call(name="web_search", arguments='{"q":"test"}', call_id="c1")
         msg = _mock_assistant_msg(content="", tool_calls=[tc])
@@ -1285,6 +1407,7 @@ class TestExecuteToolCalls:
         tc = _mock_tool_call(name="web_search", arguments='{"q":"test"}', call_id="c1")
         mock_msg = _mock_assistant_msg(content="", tool_calls=[tc])
         messages = []
+        agent.platform = "cli"
         agent.tool_progress_callback = None
 
         with patch("run_agent.handle_function_call", return_value="search result"), \
@@ -1296,6 +1419,21 @@ class TestExecuteToolCalls:
         assert len(messages) == 1
         assert messages[0]["role"] == "tool"
 
+    def test_quiet_tool_output_suppressed_without_progress_callback_for_non_cli_agent(self, agent):
+        tc = _mock_tool_call(name="web_search", arguments='{"q":"test"}', call_id="c1")
+        mock_msg = _mock_assistant_msg(content="", tool_calls=[tc])
+        messages = []
+        agent.platform = None
+        agent.tool_progress_callback = None
+
+        with patch("run_agent.handle_function_call", return_value="search result"), \
+             patch.object(agent, "_safe_print") as mock_print:
+            agent._execute_tool_calls(mock_msg, messages, "task-1")
+
+        mock_print.assert_not_called()
+        assert len(messages) == 1
+        assert messages[0]["role"] == "tool"
+
     def test_vprint_suppressed_in_parseable_quiet_mode(self, agent):
         agent.suppress_status_output = True
 
@@ -1876,6 +2014,30 @@ class TestRunConversation:
         assert all("message_count" in c and "messages" not in c for c in pre_request_calls)
         assert all("usage" in c and "response" not in c for c in post_request_calls)
 
+    def test_content_with_tool_calls_stays_silent_for_non_cli_quiet_mode(self, agent):
+        self._setup_agent(agent)
+        agent.platform = None
+        tc = _mock_tool_call(name="web_search", arguments="{}", call_id="c1")
+        resp1 = _mock_response(
+            content="I'll search for that.",
+            finish_reason="tool_calls",
+            tool_calls=[tc],
+        )
+        resp2 = _mock_response(content="Done searching", finish_reason="stop")
+        agent.client.chat.completions.create.side_effect = [resp1, resp2]
+
+        with (
+            patch("run_agent.handle_function_call", return_value="search result"),
+            patch.object(agent, "_safe_print") as mock_print,
+            patch.object(agent, "_persist_session"),
+            patch.object(agent, "_save_trajectory"),
+            patch.object(agent, "_cleanup_task_resources"),
+        ):
+            result = agent.run_conversation("search something")
+
+        assert result["final_response"] == "Done searching"
+        mock_print.assert_not_called()
+
     def test_interrupt_breaks_loop(self, agent):
         self._setup_agent(agent)
 
@@ -3575,7 +3737,9 @@ class TestAnthropicCredentialRefresh:
             assert agent._try_refresh_anthropic_client_credentials() is True
 
         old_client.close.assert_called_once()
-        rebuild.assert_called_once_with("sk-ant-oat01-fresh-token", "https://api.anthropic.com")
+        rebuild.assert_called_once_with(
+            "sk-ant-oat01-fresh-token", "https://api.anthropic.com", timeout=None,
+        )
         assert agent._anthropic_client is new_client
         assert agent._anthropic_api_key == "sk-ant-oat01-fresh-token"
 
@@ -3675,9 +3839,13 @@ class TestStreamingApiCall:
         callback.assert_any_call("World")
 
     def test_tool_call_accumulation(self, agent):
+        # Per OpenAI streaming spec, function names are delivered atomically
+        # in the first chunk; only `arguments` is fragmented across chunks.
+        # The accumulator uses assignment for names (immune to MiniMax/NIM
+        # resends of the full name) and `+=` for arguments.
         chunks = [
-            _make_chunk(tool_calls=[_make_tc_delta(0, "call_1", "web_", '{"q":')]),
-            _make_chunk(tool_calls=[_make_tc_delta(0, None, "search", '"test"}')]),
+            _make_chunk(tool_calls=[_make_tc_delta(0, "call_1", "web_search", '{"q":')]),
+            _make_chunk(tool_calls=[_make_tc_delta(0, None, None, '"test"}')]),
             _make_chunk(finish_reason="tool_calls"),
         ]
         agent.client.chat.completions.create.return_value = iter(chunks)
@@ -4029,6 +4197,90 @@ class TestPersistUserMessageOverride:
         assert first_db_write["content"] == "Hello there"
 
 
+class TestReasoningReplayForStrictProviders:
+    """Assistant replay must preserve provider-native reasoning fields."""
+
+    def _setup_agent(self, agent):
+        agent._cached_system_prompt = "You are helpful."
+        agent._use_prompt_caching = False
+        agent.tool_delay = 0
+        agent.compression_enabled = False
+        agent.save_trajectories = False
+
+    def test_kimi_tool_replay_includes_empty_reasoning_content(self, agent):
+        self._setup_agent(agent)
+        agent.base_url = "https://api.kimi.com/coding/v1"
+        agent._base_url_lower = agent.base_url.lower()
+        agent.provider = "kimi-coding"
+
+        prior_assistant = {
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [
+                {
+                    "id": "c1",
+                    "type": "function",
+                    "function": {"name": "terminal", "arguments": "{\"command\":\"date\"}"},
+                }
+            ],
+        }
+        tool_result = {"role": "tool", "tool_call_id": "c1", "content": "Tue Apr 21"}
+        final_resp = _mock_response(content="done", finish_reason="stop")
+        agent.client.chat.completions.create.return_value = final_resp
+
+        with (
+            patch.object(agent, "_persist_session"),
+            patch.object(agent, "_save_trajectory"),
+            patch.object(agent, "_cleanup_task_resources"),
+        ):
+            result = agent.run_conversation(
+                "next step",
+                conversation_history=[prior_assistant, tool_result],
+            )
+
+        assert result["completed"] is True
+        sent_messages = agent.client.chat.completions.create.call_args.kwargs["messages"]
+        replayed_assistant = next(msg for msg in sent_messages if msg.get("role") == "assistant")
+        assert replayed_assistant["role"] == "assistant"
+        assert replayed_assistant["tool_calls"][0]["function"]["name"] == "terminal"
+        assert "reasoning_content" in replayed_assistant
+        assert replayed_assistant["reasoning_content"] == ""
+
+    def test_explicit_reasoning_content_beats_normalized_reasoning_on_replay(self, agent):
+        self._setup_agent(agent)
+        prior_assistant = {
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [
+                {
+                    "id": "c1",
+                    "type": "function",
+                    "function": {"name": "web_search", "arguments": "{\"q\":\"test\"}"},
+                }
+            ],
+            "reasoning": "summary reasoning",
+            "reasoning_content": "provider-native scratchpad",
+        }
+        tool_result = {"role": "tool", "tool_call_id": "c1", "content": "ok"}
+        final_resp = _mock_response(content="done", finish_reason="stop")
+        agent.client.chat.completions.create.return_value = final_resp
+
+        with (
+            patch.object(agent, "_persist_session"),
+            patch.object(agent, "_save_trajectory"),
+            patch.object(agent, "_cleanup_task_resources"),
+        ):
+            result = agent.run_conversation(
+                "next step",
+                conversation_history=[prior_assistant, tool_result],
+            )
+
+        assert result["completed"] is True
+        sent_messages = agent.client.chat.completions.create.call_args.kwargs["messages"]
+        replayed_assistant = next(msg for msg in sent_messages if msg.get("role") == "assistant")
+        assert replayed_assistant["reasoning_content"] == "provider-native scratchpad"
+
+
 # ---------------------------------------------------------------------------
 # Bugfix: _vprint force=True on error messages during TTS
 # ---------------------------------------------------------------------------
@@ -4090,7 +4342,7 @@ class TestNormalizeCodexDictArguments:
         json.dumps, not str(), so downstream json.loads() succeeds."""
         args_dict = {"query": "weather in NYC", "units": "celsius"}
         response = self._make_codex_response("function_call", args_dict)
-        msg, _ = agent._normalize_codex_response(response)
+        msg, _ = _normalize_codex_response(response)
         tc = msg.tool_calls[0]
         parsed = json.loads(tc.function.arguments)
         assert parsed == args_dict
@@ -4099,7 +4351,7 @@ class TestNormalizeCodexDictArguments:
         """dict arguments from custom_tool_call must also use json.dumps."""
         args_dict = {"path": "/tmp/test.txt", "content": "hello"}
         response = self._make_codex_response("custom_tool_call", args_dict)
-        msg, _ = agent._normalize_codex_response(response)
+        msg, _ = _normalize_codex_response(response)
         tc = msg.tool_calls[0]
         parsed = json.loads(tc.function.arguments)
         assert parsed == args_dict
@@ -4108,7 +4360,7 @@ class TestNormalizeCodexDictArguments:
         """String arguments must pass through without modification."""
         args_str = '{"query": "test"}'
         response = self._make_codex_response("function_call", args_str)
-        msg, _ = agent._normalize_codex_response(response)
+        msg, _ = _normalize_codex_response(response)
         tc = msg.tool_calls[0]
         assert tc.function.arguments == args_str
 
diff --git a/tests/run_agent/test_run_agent_codex_responses.py b/tests/run_agent/test_run_agent_codex_responses.py
index 81213aaf67..16ab3f02d0 100644
--- a/tests/run_agent/test_run_agent_codex_responses.py
+++ b/tests/run_agent/test_run_agent_codex_responses.py
@@ -640,7 +640,8 @@ def test_run_conversation_codex_tool_round_trip(monkeypatch):
 
 def test_chat_messages_to_responses_input_uses_call_id_for_function_call(monkeypatch):
     agent = _build_agent(monkeypatch)
-    items = agent._chat_messages_to_responses_input(
+    from agent.codex_responses_adapter import _chat_messages_to_responses_input
+    items = _chat_messages_to_responses_input(
         [
             {"role": "user", "content": "Run terminal"},
             {
@@ -668,7 +669,8 @@ def test_chat_messages_to_responses_input_uses_call_id_for_function_call(monkeyp
 
 def test_chat_messages_to_responses_input_accepts_call_pipe_fc_ids(monkeypatch):
     agent = _build_agent(monkeypatch)
-    items = agent._chat_messages_to_responses_input(
+    from agent.codex_responses_adapter import _chat_messages_to_responses_input
+    items = _chat_messages_to_responses_input(
         [
             {"role": "user", "content": "Run terminal"},
             {
@@ -696,7 +698,8 @@ def test_chat_messages_to_responses_input_accepts_call_pipe_fc_ids(monkeypatch):
 
 def test_preflight_codex_api_kwargs_strips_optional_function_call_id(monkeypatch):
     agent = _build_agent(monkeypatch)
-    preflight = agent._preflight_codex_api_kwargs(
+    from agent.codex_responses_adapter import _preflight_codex_api_kwargs
+    preflight = _preflight_codex_api_kwargs(
         {
             "model": "gpt-5-codex",
             "instructions": "You are Hermes.",
@@ -724,7 +727,8 @@ def test_preflight_codex_api_kwargs_rejects_function_call_output_without_call_id
     agent = _build_agent(monkeypatch)
 
     with pytest.raises(ValueError, match="function_call_output is missing call_id"):
-        agent._preflight_codex_api_kwargs(
+        from agent.codex_responses_adapter import _preflight_codex_api_kwargs
+        _preflight_codex_api_kwargs(
             {
                 "model": "gpt-5-codex",
                 "instructions": "You are Hermes.",
@@ -741,7 +745,8 @@ def test_preflight_codex_api_kwargs_rejects_unsupported_request_fields(monkeypat
     kwargs["some_unknown_field"] = "value"
 
     with pytest.raises(ValueError, match="unsupported field"):
-        agent._preflight_codex_api_kwargs(kwargs)
+        from agent.codex_responses_adapter import _preflight_codex_api_kwargs
+        _preflight_codex_api_kwargs(kwargs)
 
 
 def test_preflight_codex_api_kwargs_allows_reasoning_and_temperature(monkeypatch):
@@ -752,7 +757,8 @@ def test_preflight_codex_api_kwargs_allows_reasoning_and_temperature(monkeypatch
     kwargs["temperature"] = 0.7
     kwargs["max_output_tokens"] = 4096
 
-    result = agent._preflight_codex_api_kwargs(kwargs)
+    from agent.codex_responses_adapter import _preflight_codex_api_kwargs
+    result = _preflight_codex_api_kwargs(kwargs)
     assert result["reasoning"] == {"effort": "high", "summary": "auto"}
     assert result["include"] == ["reasoning.encrypted_content"]
     assert result["temperature"] == 0.7
@@ -764,7 +770,8 @@ def test_preflight_codex_api_kwargs_allows_service_tier(monkeypatch):
     kwargs = _codex_request_kwargs()
     kwargs["service_tier"] = "priority"
 
-    result = agent._preflight_codex_api_kwargs(kwargs)
+    from agent.codex_responses_adapter import _preflight_codex_api_kwargs
+    result = _preflight_codex_api_kwargs(kwargs)
     assert result["service_tier"] == "priority"
 
 
@@ -841,7 +848,8 @@ def test_run_conversation_codex_continues_after_incomplete_interim_message(monke
 
 def test_normalize_codex_response_marks_commentary_only_message_as_incomplete(monkeypatch):
     agent = _build_agent(monkeypatch)
-    assistant_message, finish_reason = agent._normalize_codex_response(
+    from agent.codex_responses_adapter import _normalize_codex_response
+    assistant_message, finish_reason = _normalize_codex_response(
         _codex_commentary_message_response("I'll inspect the repository first.")
     )
 
@@ -1068,7 +1076,8 @@ def test_normalize_codex_response_marks_reasoning_only_as_incomplete(monkeypatch
     sends them into the empty-content retry loop (3 retries then failure).
     """
     agent = _build_agent(monkeypatch)
-    assistant_message, finish_reason = agent._normalize_codex_response(
+    from agent.codex_responses_adapter import _normalize_codex_response
+    assistant_message, finish_reason = _normalize_codex_response(
         _codex_reasoning_only_response()
     )
 
@@ -1101,7 +1110,8 @@ def test_normalize_codex_response_reasoning_with_content_is_stop(monkeypatch):
         status="completed",
         model="gpt-5-codex",
     )
-    assistant_message, finish_reason = agent._normalize_codex_response(response)
+    from agent.codex_responses_adapter import _normalize_codex_response
+    assistant_message, finish_reason = _normalize_codex_response(response)
 
     assert finish_reason == "stop"
     assert "Here is the answer" in assistant_message.content
@@ -1186,7 +1196,8 @@ def test_chat_messages_to_responses_input_reasoning_only_has_following_item(monk
             ],
         },
     ]
-    items = agent._chat_messages_to_responses_input(messages)
+    from agent.codex_responses_adapter import _chat_messages_to_responses_input
+    items = _chat_messages_to_responses_input(messages)
 
     # Find the reasoning item
     reasoning_indices = [i for i, it in enumerate(items) if it.get("type") == "reasoning"]
@@ -1273,7 +1284,8 @@ def test_chat_messages_to_responses_input_deduplicates_reasoning_ids(monkeypatch
             ],
         },
     ]
-    items = agent._chat_messages_to_responses_input(messages)
+    from agent.codex_responses_adapter import _chat_messages_to_responses_input
+    items = _chat_messages_to_responses_input(messages)
 
     reasoning_items = [it for it in items if it.get("type") == "reasoning"]
     # Dedup: rs_aaa appears in both turns but should only be emitted once.
@@ -1299,7 +1311,8 @@ def test_preflight_codex_input_deduplicates_reasoning_ids(monkeypatch):
         {"type": "reasoning", "id": "rs_zzz", "encrypted_content": "enc_b"},
         {"role": "assistant", "content": "done"},
     ]
-    normalized = agent._preflight_codex_input_items(raw_input)
+    from agent.codex_responses_adapter import _preflight_codex_input_items
+    normalized = _preflight_codex_input_items(raw_input)
 
     reasoning_items = [it for it in normalized if it.get("type") == "reasoning"]
     # rs_xyz duplicate should be collapsed to one item; rs_zzz kept.
diff --git a/tests/run_agent/test_run_agent_multimodal_prologue.py b/tests/run_agent/test_run_agent_multimodal_prologue.py
new file mode 100644
index 0000000000..1d470d0609
--- /dev/null
+++ b/tests/run_agent/test_run_agent_multimodal_prologue.py
@@ -0,0 +1,103 @@
+"""Regression tests for run_conversation's prologue handling of multimodal content.
+
+PR #5621 and earlier multimodal PRs hit an ``AttributeError`` in
+``run_agent.run_conversation`` because the prologue unconditionally called
+``user_message[:80] + "..."`` / ``.replace()`` / ``_safe_print(f"...{user_message[:60]}")``
+on what was now a list.  These tests cover the two fixes:
+
+  1. ``_summarize_user_message_for_log`` accepts strings, lists, and ``None``.
+  2. ``_chat_content_to_responses_parts`` converts chat-style content to the
+     Responses API ``input_text`` / ``input_image`` shape.
+
+They do NOT boot the full AIAgent — the prologue-fix guarantees are pure
+function contracts at module scope.
+"""
+
+from run_agent import _chat_content_to_responses_parts, _summarize_user_message_for_log
+
+
+class TestSummarizeUserMessageForLog:
+    def test_plain_string_passthrough(self):
+        assert _summarize_user_message_for_log("hello world") == "hello world"
+
+    def test_none_returns_empty_string(self):
+        assert _summarize_user_message_for_log(None) == ""
+
+    def test_text_only_list(self):
+        content = [{"type": "text", "text": "hi"}, {"type": "text", "text": "there"}]
+        assert _summarize_user_message_for_log(content) == "hi there"
+
+    def test_list_with_image_only(self):
+        content = [{"type": "image_url", "image_url": {"url": "https://x"}}]
+        # Image-only: "[1 image]" marker, no trailing space.
+        assert _summarize_user_message_for_log(content) == "[1 image]"
+
+    def test_list_with_text_and_image(self):
+        content = [
+            {"type": "text", "text": "describe this"},
+            {"type": "image_url", "image_url": {"url": "https://x"}},
+        ]
+        summary = _summarize_user_message_for_log(content)
+        assert "[1 image]" in summary
+        assert "describe this" in summary
+
+    def test_list_with_multiple_images(self):
+        content = [
+            {"type": "text", "text": "compare these"},
+            {"type": "image_url", "image_url": {"url": "a"}},
+            {"type": "image_url", "image_url": {"url": "b"}},
+        ]
+        summary = _summarize_user_message_for_log(content)
+        assert "[2 images]" in summary
+
+    def test_scalar_fallback(self):
+        assert _summarize_user_message_for_log(42) == "42"
+
+    def test_list_supports_slice_and_replace(self):
+        """The whole point of this helper: its output must be a plain str."""
+        content = [{"type": "text", "text": "x" * 200}, {"type": "image_url", "image_url": {"url": "y"}}]
+        summary = _summarize_user_message_for_log(content)
+        # These are the operations the run_conversation prologue performs.
+        _ = summary[:80] + "..."
+        _ = summary.replace("\n", " ")
+
+
+class TestChatContentToResponsesParts:
+    def test_non_list_returns_empty(self):
+        assert _chat_content_to_responses_parts("hi") == []
+        assert _chat_content_to_responses_parts(None) == []
+
+    def test_text_parts_become_input_text(self):
+        content = [{"type": "text", "text": "hello"}]
+        assert _chat_content_to_responses_parts(content) == [{"type": "input_text", "text": "hello"}]
+
+    def test_image_url_object_becomes_input_image(self):
+        content = [{"type": "image_url", "image_url": {"url": "https://x", "detail": "high"}}]
+        assert _chat_content_to_responses_parts(content) == [
+            {"type": "input_image", "image_url": "https://x", "detail": "high"},
+        ]
+
+    def test_bare_string_image_url(self):
+        content = [{"type": "image_url", "image_url": "https://x"}]
+        assert _chat_content_to_responses_parts(content) == [{"type": "input_image", "image_url": "https://x"}]
+
+    def test_responses_format_passthrough(self):
+        """Input already in Responses format should round-trip cleanly."""
+        content = [
+            {"type": "input_text", "text": "hi"},
+            {"type": "input_image", "image_url": "https://x"},
+        ]
+        assert _chat_content_to_responses_parts(content) == [
+            {"type": "input_text", "text": "hi"},
+            {"type": "input_image", "image_url": "https://x"},
+        ]
+
+    def test_unknown_parts_skipped(self):
+        """Unknown types shouldn't crash — filtered silently at this level
+        (the API server's normalizer rejects them earlier)."""
+        content = [{"type": "text", "text": "ok"}, {"type": "audio", "x": "y"}]
+        assert _chat_content_to_responses_parts(content) == [{"type": "input_text", "text": "ok"}]
+
+    def test_empty_url_image_skipped(self):
+        content = [{"type": "image_url", "image_url": {"url": ""}}]
+        assert _chat_content_to_responses_parts(content) == []
diff --git a/tests/run_agent/test_steer.py b/tests/run_agent/test_steer.py
index a298ede8c0..d99a0af805 100644
--- a/tests/run_agent/test_steer.py
+++ b/tests/run_agent/test_steer.py
@@ -85,7 +85,7 @@ class TestSteerInjection:
         # The LAST tool result is modified; earlier ones are untouched.
         assert messages[2]["content"] == "ls output A"
         assert "ls output B" in messages[3]["content"]
-        assert "[USER STEER" in messages[3]["content"]
+        assert "User guidance:" in messages[3]["content"]
         assert "please also check auth.log" in messages[3]["content"]
         # And pending_steer is consumed.
         assert agent._pending_steer is None
@@ -107,18 +107,19 @@ class TestSteerInjection:
         # Steer should remain pending (nothing to drain into)
         assert agent._pending_steer == "steer"
 
-    def test_marker_is_unambiguous_about_origin(self):
-        """The injection marker must make clear the text is from the user
-        and not tool output — this is the cache-safe way to signal
-        provenance without violating message-role alternation.
+    def test_marker_labels_text_as_user_guidance(self):
+        """The injection marker must label the appended text as user
+        guidance so the model attributes it to the user rather than
+        confusing it with tool output.  This is the cache-safe way to
+        signal provenance without violating message-role alternation.
         """
         agent = _bare_agent()
         agent.steer("stop after next step")
         messages = [{"role": "tool", "content": "x", "tool_call_id": "1"}]
         agent._apply_pending_steer_to_tool_results(messages, num_tool_msgs=1)
         content = messages[-1]["content"]
-        assert "USER STEER" in content
-        assert "not tool output" in content.lower() or "injected mid-run" in content.lower()
+        assert "User guidance:" in content
+        assert "stop after next step" in content
 
     def test_multimodal_content_list_preserved(self):
         """Anthropic-style list content should be preserved, with the steer
@@ -199,6 +200,82 @@ class TestSteerClearedOnInterrupt:
         assert agent._pending_steer is None
 
 
+class TestPreApiCallSteerDrain:
+    """Test that steers arriving during an API call are drained before the
+    next API call — not deferred until the next tool batch.  This is the
+    fix for the scenario where /steer sent during model thinking only lands
+    after the agent is completely done."""
+
+    def test_pre_api_drain_injects_into_last_tool_result(self):
+        """If a steer is pending when the main loop starts building
+        api_messages, it should be injected into the last tool result
+        in the messages list."""
+        agent = _bare_agent()
+        # Simulate messages after a tool batch completed
+        messages = [
+            {"role": "user", "content": "do something"},
+            {"role": "assistant", "content": "ok", "tool_calls": [
+                {"id": "tc1", "function": {"name": "terminal", "arguments": "{}"}}
+            ]},
+            {"role": "tool", "content": "output here", "tool_call_id": "tc1"},
+        ]
+        # Steer arrives during API call (set after tool execution)
+        agent.steer("focus on error handling")
+        # Simulate what the pre-API-call drain does:
+        _pre_api_steer = agent._drain_pending_steer()
+        assert _pre_api_steer == "focus on error handling"
+        # Inject into last tool msg (mirrors the new code in run_conversation)
+        for _si in range(len(messages) - 1, -1, -1):
+            if messages[_si].get("role") == "tool":
+                messages[_si]["content"] += f"\n\nUser guidance: {_pre_api_steer}"
+                break
+        assert "User guidance:" in messages[-1]["content"]
+        assert "focus on error handling" in messages[-1]["content"]
+        assert agent._pending_steer is None
+
+    def test_pre_api_drain_restashes_when_no_tool_message(self):
+        """If there are no tool results yet (first iteration), the steer
+        should be put back into _pending_steer for the post-tool drain."""
+        agent = _bare_agent()
+        messages = [
+            {"role": "user", "content": "hello"},
+        ]
+        agent.steer("early steer")
+        _pre_api_steer = agent._drain_pending_steer()
+        assert _pre_api_steer == "early steer"
+        # No tool message found — put it back
+        found = False
+        for _si in range(len(messages) - 1, -1, -1):
+            if messages[_si].get("role") == "tool":
+                found = True
+                break
+        assert not found
+        # Restash
+        agent._pending_steer = _pre_api_steer
+        assert agent._pending_steer == "early steer"
+
+    def test_pre_api_drain_finds_tool_msg_past_assistant(self):
+        """The pre-API drain should scan backwards past a non-tool message
+        (e.g., if an assistant message was somehow appended after tools)
+        and still find the tool result."""
+        agent = _bare_agent()
+        messages = [
+            {"role": "user", "content": "do something"},
+            {"role": "assistant", "content": "let me check", "tool_calls": [
+                {"id": "tc1", "function": {"name": "web_search", "arguments": "{}"}}
+            ]},
+            {"role": "tool", "content": "search results", "tool_call_id": "tc1"},
+        ]
+        agent.steer("change approach")
+        _pre_api_steer = agent._drain_pending_steer()
+        assert _pre_api_steer is not None
+        for _si in range(len(messages) - 1, -1, -1):
+            if messages[_si].get("role") == "tool":
+                messages[_si]["content"] += f"\n\nUser guidance: {_pre_api_steer}"
+                break
+        assert "change approach" in messages[2]["content"]
+
+
 class TestSteerCommandRegistry:
     def test_steer_in_command_registry(self):
         """The /steer slash command must be registered so it reaches all
diff --git a/tests/run_agent/test_streaming.py b/tests/run_agent/test_streaming.py
index e4825599af..ff99264c79 100644
--- a/tests/run_agent/test_streaming.py
+++ b/tests/run_agent/test_streaming.py
@@ -169,6 +169,8 @@ class TestStreamingAccumulator:
         mock_create.return_value = mock_client
 
         agent = AIAgent(
+            api_key="test-key",
+            base_url="https://openrouter.ai/api/v1",
             model="test/model",
             quiet_mode=True,
             skip_context_files=True,
diff --git a/tests/run_agent/test_switch_model_fallback_prune.py b/tests/run_agent/test_switch_model_fallback_prune.py
new file mode 100644
index 0000000000..99af3579f3
--- /dev/null
+++ b/tests/run_agent/test_switch_model_fallback_prune.py
@@ -0,0 +1,93 @@
+"""Regression test for TUI v2 blitz bug: explicit /model --provider switch
+silently fell back to the old primary provider on the next turn because the
+fallback chain — seeded from config at agent __init__ — kept entries for the
+provider the user just moved away from.
+
+Reported: "switched from openrouter provider to anthropic api key via hermes
+model and the tui keeps trying openrouter".
+"""
+
+from unittest.mock import MagicMock, patch
+
+from run_agent import AIAgent
+
+
+def _make_agent(chain):
+    agent = AIAgent.__new__(AIAgent)
+
+    agent.provider = "openrouter"
+    agent.model = "x-ai/grok-4"
+    agent.base_url = "https://openrouter.ai/api/v1"
+    agent.api_key = "or-key"
+    agent.api_mode = "chat_completions"
+    agent.client = MagicMock()
+    agent._client_kwargs = {"api_key": "or-key", "base_url": "https://openrouter.ai/api/v1"}
+    agent.context_compressor = None
+    agent._anthropic_api_key = ""
+    agent._anthropic_base_url = None
+    agent._anthropic_client = None
+    agent._is_anthropic_oauth = False
+    agent._cached_system_prompt = "cached"
+    agent._primary_runtime = {}
+    agent._fallback_activated = False
+    agent._fallback_index = 0
+    agent._fallback_chain = list(chain)
+    agent._fallback_model = chain[0] if chain else None
+
+    return agent
+
+
+def _switch_to_anthropic(agent):
+    with (
+        patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
+        patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-xyz"),
+        patch("agent.anthropic_adapter._is_oauth_token", return_value=False),
+        patch("hermes_cli.timeouts.get_provider_request_timeout", return_value=None),
+    ):
+        agent.switch_model(
+            new_model="claude-sonnet-4-5",
+            new_provider="anthropic",
+            api_key="sk-ant-xyz",
+            base_url="https://api.anthropic.com",
+            api_mode="anthropic_messages",
+        )
+
+
+def test_switch_drops_old_primary_from_fallback_chain():
+    agent = _make_agent([
+        {"provider": "openrouter", "model": "x-ai/grok-4"},
+        {"provider": "nous", "model": "hermes-4"},
+    ])
+
+    _switch_to_anthropic(agent)
+
+    providers = [entry["provider"] for entry in agent._fallback_chain]
+
+    assert "openrouter" not in providers, "old primary must be pruned"
+    assert "anthropic" not in providers, "new primary is redundant in the chain"
+    assert providers == ["nous"]
+    assert agent._fallback_model == {"provider": "nous", "model": "hermes-4"}
+
+
+def test_switch_with_empty_chain_stays_empty():
+    agent = _make_agent([])
+
+    _switch_to_anthropic(agent)
+
+    assert agent._fallback_chain == []
+    assert agent._fallback_model is None
+
+
+def test_switch_within_same_provider_preserves_chain():
+    chain = [{"provider": "openrouter", "model": "x-ai/grok-4"}]
+    agent = _make_agent(chain)
+
+    with patch("hermes_cli.timeouts.get_provider_request_timeout", return_value=None):
+        agent.switch_model(
+            new_model="openai/gpt-5",
+            new_provider="openrouter",
+            api_key="or-key",
+            base_url="https://openrouter.ai/api/v1",
+        )
+
+    assert agent._fallback_chain == chain
diff --git a/tests/test_account_usage.py b/tests/test_account_usage.py
new file mode 100644
index 0000000000..072dc21c6f
--- /dev/null
+++ b/tests/test_account_usage.py
@@ -0,0 +1,203 @@
+from datetime import datetime, timezone
+
+from agent.account_usage import (
+    AccountUsageSnapshot,
+    AccountUsageWindow,
+    fetch_account_usage,
+    render_account_usage_lines,
+)
+
+
+class _Response:
+    def __init__(self, payload, status_code=200):
+        self._payload = payload
+        self.status_code = status_code
+
+    def raise_for_status(self):
+        if self.status_code >= 400:
+            raise RuntimeError(f"HTTP {self.status_code}")
+
+    def json(self):
+        return self._payload
+
+
+class _Client:
+    def __init__(self, payload):
+        self._payload = payload
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc, tb):
+        return False
+
+    def get(self, url, headers=None):
+        return _Response(self._payload)
+
+
+class _RoutingClient:
+    def __init__(self, payloads):
+        self._payloads = payloads
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc, tb):
+        return False
+
+    def get(self, url, headers=None):
+        return _Response(self._payloads[url])
+
+
+def test_fetch_account_usage_codex(monkeypatch):
+    monkeypatch.setattr(
+        "agent.account_usage.resolve_codex_runtime_credentials",
+        lambda refresh_if_expiring=True: {
+            "provider": "openai-codex",
+            "base_url": "https://chatgpt.com/backend-api/codex",
+            "api_key": "access-token",
+        },
+    )
+    monkeypatch.setattr(
+        "agent.account_usage._read_codex_tokens",
+        lambda: {"tokens": {"account_id": "acct_123"}},
+    )
+    monkeypatch.setattr(
+        "agent.account_usage.httpx.Client",
+        lambda timeout=15.0: _Client(
+            {
+                "plan_type": "pro",
+                "rate_limit": {
+                    "primary_window": {
+                        "used_percent": 15,
+                        "reset_at": 1_900_000_000,
+                        "limit_window_seconds": 18000,
+                    },
+                    "secondary_window": {
+                        "used_percent": 40,
+                        "reset_at": 1_900_500_000,
+                        "limit_window_seconds": 604800,
+                    },
+                },
+                "credits": {"has_credits": True, "balance": 12.5},
+            }
+        ),
+    )
+
+    snapshot = fetch_account_usage("openai-codex")
+
+    assert snapshot is not None
+    assert snapshot.plan == "Pro"
+    assert len(snapshot.windows) == 2
+    assert snapshot.windows[0].label == "Session"
+    assert snapshot.windows[0].used_percent == 15.0
+    assert snapshot.windows[0].reset_at == datetime.fromtimestamp(1_900_000_000, tz=timezone.utc)
+    assert "Credits balance: $12.50" in snapshot.details
+
+
+def test_render_account_usage_lines_includes_reset_and_provider():
+    snapshot = AccountUsageSnapshot(
+        provider="openai-codex",
+        source="usage_api",
+        fetched_at=datetime.now(timezone.utc),
+        plan="Pro",
+        windows=(
+            AccountUsageWindow(
+                label="Session",
+                used_percent=25,
+                reset_at=datetime.now(timezone.utc),
+            ),
+        ),
+        details=("Credits balance: $9.99",),
+    )
+    lines = render_account_usage_lines(snapshot)
+
+    assert lines[0] == "📈 Account limits"
+    assert "openai-codex (Pro)" in lines[1]
+    assert "Session: 75% remaining (25% used)" in lines[2]
+    assert "Credits balance: $9.99" in lines[3]
+
+
+def test_fetch_account_usage_openrouter_uses_limit_remaining_and_ignores_deprecated_rate_limit(monkeypatch):
+    monkeypatch.setattr(
+        "agent.account_usage.resolve_runtime_provider",
+        lambda requested, explicit_base_url=None, explicit_api_key=None: {
+            "provider": "openrouter",
+            "base_url": "https://openrouter.ai/api/v1",
+            "api_key": "sk-test",
+        },
+    )
+    monkeypatch.setattr(
+        "agent.account_usage.httpx.Client",
+        lambda timeout=10.0: _RoutingClient(
+            {
+                "https://openrouter.ai/api/v1/credits": {
+                    "data": {"total_credits": 300.0, "total_usage": 10.92}
+                },
+                "https://openrouter.ai/api/v1/key": {
+                    "data": {
+                        "limit": 100.0,
+                        "limit_remaining": 70.0,
+                        "limit_reset": "monthly",
+                        "usage": 12.5,
+                        "usage_daily": 0.5,
+                        "usage_weekly": 2.0,
+                        "usage_monthly": 8.0,
+                        "rate_limit": {"requests": -1, "interval": "10s"},
+                    }
+                },
+            }
+        ),
+    )
+
+    snapshot = fetch_account_usage("openrouter")
+
+    assert snapshot is not None
+    assert snapshot.windows == (
+        AccountUsageWindow(
+            label="API key quota",
+            used_percent=30.0,
+            detail="$70.00 of $100.00 remaining • resets monthly",
+        ),
+    )
+    assert "Credits balance: $289.08" in snapshot.details
+    assert "API key usage: $12.50 total • $0.50 today • $2.00 this week • $8.00 this month" in snapshot.details
+    assert all("-1 requests / 10s" not in line for line in render_account_usage_lines(snapshot))
+
+
+def test_fetch_account_usage_openrouter_omits_quota_window_when_key_has_no_limit(monkeypatch):
+    monkeypatch.setattr(
+        "agent.account_usage.resolve_runtime_provider",
+        lambda requested, explicit_base_url=None, explicit_api_key=None: {
+            "provider": "openrouter",
+            "base_url": "https://openrouter.ai/api/v1",
+            "api_key": "sk-test",
+        },
+    )
+    monkeypatch.setattr(
+        "agent.account_usage.httpx.Client",
+        lambda timeout=10.0: _RoutingClient(
+            {
+                "https://openrouter.ai/api/v1/credits": {
+                    "data": {"total_credits": 100.0, "total_usage": 25.5}
+                },
+                "https://openrouter.ai/api/v1/key": {
+                    "data": {
+                        "limit": None,
+                        "limit_remaining": None,
+                        "usage": 25.5,
+                        "usage_daily": 1.25,
+                        "usage_weekly": 4.5,
+                        "usage_monthly": 18.0,
+                    }
+                },
+            }
+        ),
+    )
+
+    snapshot = fetch_account_usage("openrouter")
+
+    assert snapshot is not None
+    assert snapshot.windows == ()
+    assert "Credits balance: $74.50" in snapshot.details
+    assert "API key usage: $25.50 total • $1.25 today • $4.50 this week • $18.00 this month" in snapshot.details
diff --git a/tests/test_base_url_hostname.py b/tests/test_base_url_hostname.py
new file mode 100644
index 0000000000..cdf8450a25
--- /dev/null
+++ b/tests/test_base_url_hostname.py
@@ -0,0 +1,160 @@
+"""Targeted tests for ``utils.base_url_hostname`` and ``base_url_host_matches``.
+
+These helpers are used across provider routing, auxiliary client, setup
+wizards, billing routes, and the trajectory compressor to avoid the
+substring-match false-positive class documented in
+tests/agent/test_direct_provider_url_detection.py.
+"""
+
+from __future__ import annotations
+
+from utils import base_url_hostname, base_url_host_matches
+
+
+# ─── base_url_hostname ────────────────────────────────────────────────────
+
+
+def test_empty_returns_empty_string():
+    assert base_url_hostname("") == ""
+    assert base_url_hostname(None) == ""  # type: ignore[arg-type]
+
+
+def test_plain_host_without_scheme():
+    assert base_url_hostname("api.openai.com") == "api.openai.com"
+    assert base_url_hostname("api.openai.com/v1") == "api.openai.com"
+
+
+def test_https_url_extracts_hostname_only():
+    assert base_url_hostname("https://api.openai.com/v1") == "api.openai.com"
+    assert base_url_hostname("https://api.x.ai/v1") == "api.x.ai"
+    assert base_url_hostname("https://api.anthropic.com") == "api.anthropic.com"
+
+
+def test_hostname_case_insensitive():
+    assert base_url_hostname("https://API.OpenAI.com/v1") == "api.openai.com"
+
+
+def test_trailing_dot_stripped():
+    assert base_url_hostname("https://api.openai.com./v1") == "api.openai.com"
+
+
+def test_path_containing_provider_host_is_not_the_hostname():
+    assert base_url_hostname("https://proxy.example.test/api.openai.com/v1") == "proxy.example.test"
+    assert base_url_hostname("https://proxy.example.test/api.anthropic.com/v1") == "proxy.example.test"
+
+
+def test_host_suffix_is_not_the_provider():
+    assert base_url_hostname("https://api.openai.com.example/v1") == "api.openai.com.example"
+    assert base_url_hostname("https://api.x.ai.example/v1") == "api.x.ai.example"
+
+
+def test_port_is_ignored():
+    assert base_url_hostname("https://api.openai.com:443/v1") == "api.openai.com"
+
+
+def test_whitespace_stripped():
+    assert base_url_hostname("  https://api.openai.com/v1  ") == "api.openai.com"
+
+
+# ─── base_url_host_matches ────────────────────────────────────────────────
+
+
+class TestBaseUrlHostMatchesExact:
+    def test_exact_domain_matches(self):
+        assert base_url_host_matches("https://openrouter.ai/api/v1", "openrouter.ai") is True
+        assert base_url_host_matches("https://moonshot.ai", "moonshot.ai") is True
+
+    def test_subdomain_matches(self):
+        # A subdomain of the registered domain should match — needed for
+        # api.moonshot.ai / api.kimi.com / portal.qwen.ai lookups that
+        # accept both the bare registrable domain and any subdomain under it.
+        assert base_url_host_matches("https://api.moonshot.ai/v1", "moonshot.ai") is True
+        assert base_url_host_matches("https://api.kimi.com/v1", "api.kimi.com") is True
+        assert base_url_host_matches("https://portal.qwen.ai/v1", "portal.qwen.ai") is True
+
+
+class TestBaseUrlHostMatchesNegatives:
+    """The reason this helper exists — defend against substring collisions."""
+
+    def test_path_segment_containing_domain_does_not_match(self):
+        assert base_url_host_matches("https://evil.test/moonshot.ai/v1", "moonshot.ai") is False
+        assert base_url_host_matches("https://proxy.example.test/openrouter.ai/v1", "openrouter.ai") is False
+        assert base_url_host_matches("https://proxy/api.kimi.com/v1", "api.kimi.com") is False
+
+    def test_host_suffix_does_not_match(self):
+        # Attacker-controlled hosts that end with the domain string are not
+        # the domain.
+        assert base_url_host_matches("https://moonshot.ai.evil/v1", "moonshot.ai") is False
+        assert base_url_host_matches("https://openrouter.ai.example/v1", "openrouter.ai") is False
+
+    def test_host_prefix_does_not_match(self):
+        # "fake-openrouter.ai" is not a subdomain of openrouter.ai.
+        assert base_url_host_matches("https://fake-openrouter.ai/v1", "openrouter.ai") is False
+
+
+class TestBaseUrlHostMatchesEdgeCases:
+    def test_empty_base_url_returns_false(self):
+        assert base_url_host_matches("", "openrouter.ai") is False
+        assert base_url_host_matches(None, "openrouter.ai") is False  # type: ignore[arg-type]
+
+    def test_empty_domain_returns_false(self):
+        assert base_url_host_matches("https://openrouter.ai/v1", "") is False
+
+    def test_case_insensitive(self):
+        assert base_url_host_matches("https://OpenRouter.AI/v1", "openrouter.ai") is True
+        assert base_url_host_matches("https://openrouter.ai/v1", "OPENROUTER.AI") is True
+
+    def test_trailing_dot_on_domain_stripped(self):
+        assert base_url_host_matches("https://openrouter.ai/v1", "openrouter.ai.") is True
+
+
+class TestOllamaUrlHostCheck:
+    """GHSA-76xc-57q6-vm5m — ollama.com was using a raw substring match for
+    credential selection (same bug class as GHSA-xf8p-v2cg-h7h5 for OpenRouter).
+    These tests lock in that the base_url_host_matches fix correctly rejects
+    the same attack vectors for Ollama.
+    """
+
+    def test_ollama_com_path_injection_rejected(self):
+        """http://evil.test/ollama.com/v1 — ollama.com appears in the path,
+        not the host. Must not be treated as Ollama Cloud."""
+        assert base_url_host_matches(
+            "http://127.0.0.1:9000/ollama.com/v1", "ollama.com"
+        ) is False
+
+    def test_ollama_com_subdomain_lookalike_rejected(self):
+        """ollama.com.attacker.test is a separate host, not ollama.com."""
+        assert base_url_host_matches(
+            "http://ollama.com.attacker.test:9000/v1", "ollama.com"
+        ) is False
+
+    def test_ollama_com_localtest_me_rejected(self):
+        """ollama.com.localtest.me resolves to 127.0.0.1 via localtest.me
+        but its true hostname is localtest.me, not ollama.com."""
+        assert base_url_host_matches(
+            "http://ollama.com.localtest.me:9000/v1", "ollama.com"
+        ) is False
+
+    def test_ollama_ai_is_not_ollama_com(self):
+        """Different TLD. ollama.ai is not ollama.com."""
+        assert base_url_host_matches(
+            "https://ollama.ai/v1", "ollama.com"
+        ) is False
+
+    def test_localhost_ollama_port_is_not_ollama_com(self):
+        """http://localhost:11434/v1 is a local Ollama install, but its
+        hostname is localhost, so OLLAMA_API_KEY (an ollama.com-only secret)
+        must not be sent."""
+        assert base_url_host_matches(
+            "http://localhost:11434/v1", "ollama.com"
+        ) is False
+
+    def test_genuine_ollama_com_matches(self):
+        assert base_url_host_matches(
+            "https://ollama.com/api/generate", "ollama.com"
+        ) is True
+
+    def test_ollama_com_subdomain_matches(self):
+        assert base_url_host_matches(
+            "https://api.ollama.com/v1", "ollama.com"
+        ) is True
diff --git a/tests/test_batch_runner_checkpoint.py b/tests/test_batch_runner_checkpoint.py
index 4ce105d75d..440e421cc5 100644
--- a/tests/test_batch_runner_checkpoint.py
+++ b/tests/test_batch_runner_checkpoint.py
@@ -12,7 +12,7 @@ import pytest
 import sys
 sys.path.insert(0, str(Path(__file__).parent.parent))
 
-from batch_runner import BatchRunner
+from batch_runner import BatchRunner, _process_batch_worker
 
 
 @pytest.fixture
@@ -157,3 +157,32 @@ class TestResumePreservesProgress:
 
         assert checkpoint_data["completed_prompts"] == []
         assert checkpoint_data["run_name"] == "test_run"
+
+
+class TestBatchWorkerResumeBehavior:
+    def test_discarded_no_reasoning_prompts_are_marked_completed(self, tmp_path, monkeypatch):
+        batch_file = tmp_path / "batch_1.jsonl"
+        prompt_result = {
+            "success": True,
+            "trajectory": [{"role": "assistant", "content": "x"}],
+            "reasoning_stats": {"has_any_reasoning": False},
+            "tool_stats": {},
+            "metadata": {},
+            "completed": True,
+            "api_calls": 1,
+            "toolsets_used": [],
+        }
+
+        monkeypatch.setattr("batch_runner._process_single_prompt", lambda *args, **kwargs: prompt_result)
+
+        result = _process_batch_worker((
+            1,
+            [(0, {"prompt": "hi"})],
+            tmp_path,
+            set(),
+            {"verbose": False},
+        ))
+
+        assert result["discarded_no_reasoning"] == 1
+        assert result["completed_prompts"] == [0]
+        assert not batch_file.exists() or batch_file.read_text() == ""
diff --git a/tests/test_cli_file_drop.py b/tests/test_cli_file_drop.py
index 386aba5d17..5161e435f0 100644
--- a/tests/test_cli_file_drop.py
+++ b/tests/test_cli_file_drop.py
@@ -147,6 +147,27 @@ class TestEscapedSpaces:
         assert result["path"] == tmp_image_with_spaces
         assert result["remainder"] == "what is this?"
 
+    def test_unquoted_spaces_in_path(self, tmp_image_with_spaces):
+        result = _detect_file_drop(str(tmp_image_with_spaces))
+        assert result is not None
+        assert result["path"] == tmp_image_with_spaces
+        assert result["is_image"] is True
+        assert result["remainder"] == ""
+
+    def test_unquoted_spaces_with_trailing_text(self, tmp_image_with_spaces):
+        user_input = f"{tmp_image_with_spaces} what is this?"
+        result = _detect_file_drop(user_input)
+        assert result is not None
+        assert result["path"] == tmp_image_with_spaces
+        assert result["remainder"] == "what is this?"
+
+    def test_file_uri_image_path(self, tmp_image_with_spaces):
+        uri = tmp_image_with_spaces.as_uri()
+        result = _detect_file_drop(uri)
+        assert result is not None
+        assert result["path"] == tmp_image_with_spaces
+        assert result["is_image"] is True
+
 
 # ---------------------------------------------------------------------------
 # Tests: edge cases
diff --git a/tests/test_hermes_state.py b/tests/test_hermes_state.py
index d54d7b9fb0..f405cf8bd5 100644
--- a/tests/test_hermes_state.py
+++ b/tests/test_hermes_state.py
@@ -46,6 +46,37 @@ class TestSessionLifecycle:
         assert isinstance(session["ended_at"], float)
         assert session["end_reason"] == "user_exit"
 
+    def test_end_session_preserves_original_end_reason(self, db):
+        """The first end_reason wins — compression splits must not be
+        overwritten when a later stale ``end_session()`` call lands on the
+        same row (e.g. from a CLI session_id that desynced after compression
+        and then tried to /resume another session).
+        """
+        db.create_session(session_id="s1", source="cli")
+        db.end_session("s1", end_reason="compression")
+        first_ended_at = db.get_session("s1")["ended_at"]
+
+        # Simulate a stale CLI holding the old session_id and calling
+        # end_session() again with a different reason.
+        time.sleep(0.01)
+        db.end_session("s1", end_reason="resumed_other")
+
+        session = db.get_session("s1")
+        assert session["end_reason"] == "compression"
+        assert session["ended_at"] == first_ended_at
+
+    def test_end_session_after_reopen_allows_re_end(self, db):
+        """reopen_session() is the explicit escape hatch for re-ending a
+        closed session. After reopen, end_session() takes effect again.
+        """
+        db.create_session(session_id="s1", source="cli")
+        db.end_session("s1", end_reason="compression")
+        db.reopen_session("s1")
+        db.end_session("s1", end_reason="user_exit")
+
+        session = db.get_session("s1")
+        assert session["end_reason"] == "user_exit"
+
     def test_update_system_prompt(self, db):
         db.create_session(session_id="s1", source="cli")
         db.update_system_prompt("s1", "You are a helpful assistant.")
@@ -62,6 +93,27 @@ class TestSessionLifecycle:
         assert session["input_tokens"] == 300
         assert session["output_tokens"] == 150
 
+    def test_update_token_counts_tracks_api_call_count(self, db):
+        """api_call_count increments with each update_token_counts call."""
+        db.create_session(session_id="s1", source="cli")
+        db.update_token_counts("s1", input_tokens=100, output_tokens=50, api_call_count=1)
+        db.update_token_counts("s1", input_tokens=100, output_tokens=50, api_call_count=1)
+        db.update_token_counts("s1", input_tokens=100, output_tokens=50, api_call_count=1)
+
+        session = db.get_session("s1")
+        assert session["api_call_count"] == 3
+
+    def test_update_token_counts_api_call_count_absolute(self, db):
+        """absolute mode sets api_call_count directly."""
+        db.create_session(session_id="s1", source="cli")
+        db.update_token_counts("s1", input_tokens=100, output_tokens=50, api_call_count=1)
+        db.update_token_counts("s1", input_tokens=300, output_tokens=150,
+                               api_call_count=5, absolute=True)
+
+        session = db.get_session("s1")
+        assert session["api_call_count"] == 5
+        assert session["input_tokens"] == 300
+
     def test_update_token_counts_backfills_model_when_null(self, db):
         db.create_session(session_id="s1", source="telegram")
         db.update_token_counts("s1", input_tokens=10, output_tokens=5, model="openai/gpt-5.4")
@@ -224,6 +276,38 @@ class TestMessageStorage:
         assert msg["reasoning"] == "Thinking about what to say"
         assert msg["reasoning_details"] == details
 
+    def test_reasoning_content_persisted_and_restored(self, db):
+        """reasoning_content must survive session replay as its own field."""
+        db.create_session(session_id="s1", source="cli")
+        db.append_message(
+            "s1",
+            role="assistant",
+            content="Hello",
+            reasoning="Short summary",
+            reasoning_content="Longer provider-native scratchpad",
+        )
+
+        conv = db.get_messages_as_conversation("s1")
+        assert len(conv) == 1
+        assert conv[0]["reasoning"] == "Short summary"
+        assert conv[0]["reasoning_content"] == "Longer provider-native scratchpad"
+
+    def test_reasoning_content_empty_string_restored_for_assistant(self, db):
+        """Empty reasoning_content still needs to round-trip for strict replays."""
+        db.create_session(session_id="s1", source="cli")
+        db.append_message(
+            "s1",
+            role="assistant",
+            content="",
+            tool_calls=[{"id": "c1", "type": "function", "function": {"name": "date", "arguments": "{}"}}],
+            reasoning_content="",
+        )
+
+        conv = db.get_messages_as_conversation("s1")
+        assert len(conv) == 1
+        assert "reasoning_content" in conv[0]
+        assert conv[0]["reasoning_content"] == ""
+
     def test_reasoning_not_set_for_non_assistant(self, db):
         """reasoning is never leaked onto user or tool messages."""
         db.create_session(session_id="s1", source="telegram")
@@ -334,6 +418,25 @@ class TestFTS5Search:
         assert isinstance(results[0]["context"], list)
         assert len(results[0]["context"]) > 0
 
+    def test_search_context_uses_session_neighbors_when_ids_are_interleaved(self, db):
+        db.create_session(session_id="s1", source="cli")
+        db.create_session(session_id="s2", source="cli")
+
+        db.append_message("s1", role="user", content="before needle")
+        db.append_message("s2", role="user", content="other session message")
+        db.append_message("s1", role="assistant", content="needle match")
+        db.append_message("s2", role="assistant", content="another other session message")
+        db.append_message("s1", role="user", content="after needle")
+
+        results = db.search_messages('"needle match"')
+        needle_result = next(r for r in results if r["session_id"] == "s1" and "needle match" in r["snippet"])
+
+        assert [msg["content"] for msg in needle_result["context"]] == [
+            "before needle",
+            "needle match",
+            "after needle",
+        ]
+
     def test_search_special_chars_do_not_crash(self, db):
         """FTS5 special characters in queries must not raise OperationalError."""
         db.create_session(session_id="s1", source="cli")
@@ -1070,7 +1173,7 @@ class TestSchemaInit:
     def test_schema_version(self, db):
         cursor = db._conn.execute("SELECT version FROM schema_version")
         version = cursor.fetchone()[0]
-        assert version == 6
+        assert version == 8
 
     def test_title_column_exists(self, db):
         """Verify the title column was created in the sessions table."""
@@ -1126,18 +1229,24 @@ class TestSchemaInit:
         conn.commit()
         conn.close()
 
-        # Open with SessionDB — should migrate to v6
+        # Open with SessionDB — should migrate to v8
         migrated_db = SessionDB(db_path=db_path)
 
         # Verify migration
         cursor = migrated_db._conn.execute("SELECT version FROM schema_version")
-        assert cursor.fetchone()[0] == 6
+        assert cursor.fetchone()[0] == 8
 
         # Verify title column exists and is NULL for existing sessions
         session = migrated_db.get_session("existing")
         assert session is not None
         assert session["title"] is None
 
+        # Verify api_call_count column was added with default 0
+        cursor = migrated_db._conn.execute(
+            "SELECT api_call_count FROM sessions WHERE id = 'existing'"
+        )
+        assert cursor.fetchone()[0] == 0
+
         # Verify we can set title on migrated session
         assert migrated_db.set_session_title("existing", "Migrated Title") is True
         session = migrated_db.get_session("existing")
@@ -1350,6 +1459,178 @@ class TestListSessionsRich:
         assert "Line one Line two" in sessions[0]["preview"]
 
 
+class TestCompressionChainProjection:
+    """Tests for lineage-aware list_sessions_rich — compressed conversations
+    surface as their live continuation tip, not the dead parent root.
+    """
+
+    def _build_compression_chain(self, db, t0: float):
+        """Helper: builds root -> delegate -> compression-child -> tip chain.
+
+        Returns (root_id, delegate_id, mid_id, tip_id).
+        """
+        import time as _time
+        # Root that gets compressed
+        db.create_session("root1", "cli")
+        db._conn.execute("UPDATE sessions SET started_at=? WHERE id=?", (t0, "root1"))
+        db.append_message("root1", "user", "help me refactor auth")
+
+        # Delegate subagent spawned while root1 was live (before it ended)
+        db.create_session("delegate1", "cli", parent_session_id="root1")
+        db._conn.execute(
+            "UPDATE sessions SET started_at=?, ended_at=? WHERE id=?",
+            (t0 + 600, t0 + 650, "delegate1"),
+        )
+        db.append_message("delegate1", "user", "delegate task")
+
+        # root1 compressed at t0+1800
+        t_compress_root = t0 + 1800
+        db._conn.execute(
+            "UPDATE sessions SET ended_at=?, end_reason=? WHERE id=?",
+            (t_compress_root, "compression", "root1"),
+        )
+
+        # Continuation mid created 1s after parent ended
+        db.create_session("mid1", "cli", parent_session_id="root1")
+        db._conn.execute(
+            "UPDATE sessions SET started_at=? WHERE id=?",
+            (t_compress_root + 1, "mid1"),
+        )
+        db.append_message("mid1", "user", "continuing")
+
+        # mid1 also compressed
+        t_compress_mid = t_compress_root + 1800
+        db._conn.execute(
+            "UPDATE sessions SET ended_at=?, end_reason=? WHERE id=?",
+            (t_compress_mid, "compression", "mid1"),
+        )
+
+        # Tip — latest continuation
+        db.create_session("tip1", "cli", parent_session_id="mid1")
+        db._conn.execute(
+            "UPDATE sessions SET started_at=? WHERE id=?",
+            (t_compress_mid + 1, "tip1"),
+        )
+        db.append_message("tip1", "user", "latest message")
+
+        db._conn.commit()
+        return ("root1", "delegate1", "mid1", "tip1")
+
+    def test_get_compression_tip_walks_full_chain(self, db):
+        import time as _time
+        self._build_compression_chain(db, _time.time() - 3600)
+        assert db.get_compression_tip("root1") == "tip1"
+        assert db.get_compression_tip("mid1") == "tip1"
+        assert db.get_compression_tip("tip1") == "tip1"
+
+    def test_get_compression_tip_returns_self_for_uncompressed(self, db):
+        db.create_session("solo", "cli")
+        assert db.get_compression_tip("solo") == "solo"
+
+    def test_get_compression_tip_skips_delegate_children(self, db):
+        """Delegate subagents have parent_session_id set but were created
+        BEFORE the parent ended. They must not be followed as compression
+        continuations — the started_at >= ended_at guard handles this.
+        """
+        import time as _time
+        self._build_compression_chain(db, _time.time() - 3600)
+        # delegate1 is a child of root1 but NOT a compression continuation.
+        # root1's tip must be tip1 (via mid1), not delegate1.
+        assert db.get_compression_tip("root1") == "tip1"
+
+    def test_list_surfaces_tip_for_compressed_root(self, db):
+        """The list must show the tip's id/message_count/preview in place of
+        the root row, so users can see and resume the live conversation.
+        """
+        import time as _time
+        self._build_compression_chain(db, _time.time() - 3600)
+        # Add an uncompressed root for comparison.
+        db.create_session("solo", "cli")
+        db.append_message("solo", "user", "standalone")
+        db._conn.commit()
+
+        sessions = db.list_sessions_rich(source="cli", limit=20)
+        ids = [s["id"] for s in sessions]
+        # Only top-level conversations appear: tip1 (projected from root1) + solo.
+        # Delegate children, mid1, and the dead root1 must NOT be in the list.
+        assert "tip1" in ids
+        assert "solo" in ids
+        assert "root1" not in ids
+        assert "mid1" not in ids
+        assert "delegate1" not in ids
+
+        tip_row = next(s for s in sessions if s["id"] == "tip1")
+        # The row surfaces the tip's identity but preserves the root's start
+        # timestamp for stable ordering and lineage tracking.
+        assert tip_row["_lineage_root_id"] == "root1"
+        assert tip_row["preview"].startswith("latest message")
+        assert tip_row["ended_at"] is None  # tip is still live
+        assert tip_row["end_reason"] is None
+
+    def test_list_without_projection_returns_raw_root(self, db):
+        """project_compression_tips=False returns the raw parent-NULL root
+        rows — useful for admin/debug UIs.
+        """
+        import time as _time
+        self._build_compression_chain(db, _time.time() - 3600)
+        sessions = db.list_sessions_rich(
+            source="cli", limit=20, project_compression_tips=False
+        )
+        ids = [s["id"] for s in sessions]
+        assert "root1" in ids
+        assert "tip1" not in ids
+
+        root_row = next(s for s in sessions if s["id"] == "root1")
+        assert root_row["end_reason"] == "compression"
+        assert "_lineage_root_id" not in root_row
+
+    def test_list_preserves_sort_by_started_at(self, db):
+        """Chronological ordering uses the ROOT's started_at (conversation
+        start), not the tip's. This keeps lineage entries stable in the list
+        even as new compressions push the tip forward in time.
+        """
+        import time as _time
+        t0 = _time.time() - 3600
+        self._build_compression_chain(db, t0)
+
+        # Create a newer standalone session that should sort above the lineage
+        # if we used tip.started_at, but below if we correctly use root.started_at.
+        t_between = t0 + 120  # between root1 and its compression
+        db.create_session("newer", "cli")
+        db._conn.execute("UPDATE sessions SET started_at=? WHERE id=?", (t_between, "newer"))
+        db.append_message("newer", "user", "newer session started after root1")
+        db._conn.commit()
+
+        sessions = db.list_sessions_rich(source="cli", limit=20)
+        ids_in_order = [s["id"] for s in sessions]
+        # 'newer' started AFTER root1 but BEFORE tip1's actual started_at.
+        # Correct ordering (by root started_at): newer > tip1's lineage entry.
+        assert ids_in_order.index("newer") < ids_in_order.index("tip1")
+
+    def test_list_handles_broken_chain_gracefully(self, db):
+        """A compression root with no child (e.g. DB corruption or a partial
+        end_session call that didn't finish creating the child) must not
+        crash the list — it should fall back to surfacing the root as-is.
+        """
+        import time as _time
+        t0 = _time.time() - 100
+        db.create_session("orphan", "cli")
+        db._conn.execute("UPDATE sessions SET started_at=? WHERE id=?", (t0, "orphan"))
+        db._conn.execute(
+            "UPDATE sessions SET ended_at=?, end_reason=? WHERE id=?",
+            (t0 + 10, "compression", "orphan"),
+        )
+        db._conn.commit()
+
+        sessions = db.list_sessions_rich(source="cli", limit=10)
+        ids = [s["id"] for s in sessions]
+        assert "orphan" in ids
+        row = next(s for s in sessions if s["id"] == "orphan")
+        # No tip means no projection — row stays raw.
+        assert "_lineage_root_id" not in row
+        assert row["end_reason"] == "compression"
+
+
 # =========================================================================
 # Session source exclusion (--source flag for third-party isolation)
 # =========================================================================
@@ -1510,3 +1791,124 @@ class TestConcurrentWriteSafety:
         assert "30" in src, (
             "SQLite timeout should be at least 30s to handle CLI/gateway lock contention"
         )
+
+
+# =========================================================================
+# Auto-maintenance: state_meta + vacuum + maybe_auto_prune_and_vacuum
+# =========================================================================
+
+class TestStateMeta:
+    def test_get_meta_missing_returns_none(self, db):
+        assert db.get_meta("nonexistent") is None
+
+    def test_set_then_get_meta(self, db):
+        db.set_meta("foo", "bar")
+        assert db.get_meta("foo") == "bar"
+
+    def test_set_meta_upsert(self, db):
+        """set_meta overwrites existing value (ON CONFLICT DO UPDATE)."""
+        db.set_meta("key", "v1")
+        db.set_meta("key", "v2")
+        assert db.get_meta("key") == "v2"
+
+
+class TestVacuum:
+    def test_vacuum_runs_without_error(self, db):
+        """VACUUM must succeed on a fresh DB (no rows to reclaim)."""
+        db.create_session(session_id="s1", source="cli")
+        db.append_message(session_id="s1", role="user", content="hi")
+        # Should not raise, even though there's nothing significant to reclaim.
+        db.vacuum()
+
+
+class TestAutoMaintenance:
+    def _make_old_ended(self, db, sid: str, days_old: int = 100):
+        """Create a session that is ended and was started `days_old` days ago."""
+        db.create_session(session_id=sid, source="cli")
+        db.end_session(sid, end_reason="done")
+        db._conn.execute(
+            "UPDATE sessions SET started_at = ? WHERE id = ?",
+            (time.time() - days_old * 86400, sid),
+        )
+        db._conn.commit()
+
+    def test_first_run_prunes_and_vacuums(self, db):
+        self._make_old_ended(db, "old1", days_old=100)
+        self._make_old_ended(db, "old2", days_old=100)
+        db.create_session(session_id="new", source="cli")  # active, must survive
+
+        result = db.maybe_auto_prune_and_vacuum(retention_days=90)
+        assert result["skipped"] is False
+        assert result["pruned"] == 2
+        assert result["vacuumed"] is True
+        assert result.get("error") is None
+        assert db.get_session("old1") is None
+        assert db.get_session("old2") is None
+        assert db.get_session("new") is not None
+
+    def test_second_call_within_interval_skips(self, db):
+        self._make_old_ended(db, "old", days_old=100)
+        first = db.maybe_auto_prune_and_vacuum(
+            retention_days=90, min_interval_hours=24
+        )
+        assert first["skipped"] is False
+        assert first["pruned"] == 1
+
+        # Create another prunable session; a second call within
+        # min_interval_hours should still skip without touching it.
+        self._make_old_ended(db, "old2", days_old=100)
+        second = db.maybe_auto_prune_and_vacuum(
+            retention_days=90, min_interval_hours=24
+        )
+        assert second["skipped"] is True
+        assert second["pruned"] == 0
+        assert db.get_session("old2") is not None  # untouched
+
+    def test_second_call_after_interval_runs_again(self, db):
+        self._make_old_ended(db, "old", days_old=100)
+        db.maybe_auto_prune_and_vacuum(retention_days=90, min_interval_hours=24)
+
+        # Backdate the last-run marker to force another run.
+        db.set_meta("last_auto_prune", str(time.time() - 48 * 3600))
+
+        self._make_old_ended(db, "old2", days_old=100)
+        result = db.maybe_auto_prune_and_vacuum(
+            retention_days=90, min_interval_hours=24
+        )
+        assert result["skipped"] is False
+        assert result["pruned"] == 1
+        assert db.get_session("old2") is None
+
+    def test_no_prunable_sessions_no_vacuum(self, db):
+        """When prune deletes 0 rows, VACUUM is skipped (wasted I/O)."""
+        db.create_session(session_id="fresh", source="cli")  # too recent
+        result = db.maybe_auto_prune_and_vacuum(retention_days=90)
+        assert result["skipped"] is False
+        assert result["pruned"] == 0
+        assert result["vacuumed"] is False
+        # But last-run is still recorded so we don't retry immediately.
+        assert db.get_meta("last_auto_prune") is not None
+
+    def test_vacuum_disabled_via_flag(self, db):
+        self._make_old_ended(db, "old", days_old=100)
+        result = db.maybe_auto_prune_and_vacuum(retention_days=90, vacuum=False)
+        assert result["pruned"] == 1
+        assert result["vacuumed"] is False
+
+    def test_corrupt_last_run_marker_treated_as_no_prior_run(self, db):
+        """A non-numeric marker must not break maintenance."""
+        db.set_meta("last_auto_prune", "not-a-timestamp")
+        self._make_old_ended(db, "old", days_old=100)
+        result = db.maybe_auto_prune_and_vacuum(retention_days=90)
+        assert result["skipped"] is False
+        assert result["pruned"] == 1
+
+    def test_state_meta_survives_vacuum(self, db):
+        """Marker written just before VACUUM must still be readable after."""
+        self._make_old_ended(db, "old", days_old=100)
+        db.maybe_auto_prune_and_vacuum(retention_days=90)
+        marker = db.get_meta("last_auto_prune")
+        assert marker is not None
+        # Should parse as a float timestamp close to now.
+        assert abs(float(marker) - time.time()) < 60
+
diff --git a/tests/test_mini_swe_runner.py b/tests/test_mini_swe_runner.py
index adecb5582a..16ef262861 100644
--- a/tests/test_mini_swe_runner.py
+++ b/tests/test_mini_swe_runner.py
@@ -2,7 +2,11 @@ from types import SimpleNamespace
 from unittest.mock import MagicMock, patch
 
 
-def test_run_task_forces_kimi_fixed_temperature():
+def test_run_task_kimi_omits_temperature():
+    """Kimi models should NOT have client-side temperature overrides.
+
+    The Kimi gateway selects the correct temperature server-side.
+    """
     with patch("openai.OpenAI") as mock_openai:
         client = MagicMock()
         client.chat.completions.create.return_value = SimpleNamespace(
@@ -25,4 +29,32 @@ def test_run_task_forces_kimi_fixed_temperature():
         result = runner.run_task("2+2")
 
     assert result["completed"] is True
-    assert client.chat.completions.create.call_args.kwargs["temperature"] == 0.6
+    assert "temperature" not in client.chat.completions.create.call_args.kwargs
+
+
+def test_run_task_public_moonshot_kimi_k2_5_omits_temperature():
+    """kimi-k2.5 on the public Moonshot API should not get a forced temperature."""
+    with patch("openai.OpenAI") as mock_openai:
+        client = MagicMock()
+        client.base_url = "https://api.moonshot.ai/v1"
+        client.chat.completions.create.return_value = SimpleNamespace(
+            choices=[SimpleNamespace(message=SimpleNamespace(content="done", tool_calls=[]))]
+        )
+        mock_openai.return_value = client
+
+        from mini_swe_runner import MiniSWERunner
+
+        runner = MiniSWERunner(
+            model="kimi-k2.5",
+            base_url="https://api.moonshot.ai/v1",
+            api_key="test-key",
+            env_type="local",
+            max_iterations=1,
+        )
+        runner._create_env = MagicMock()
+        runner._cleanup_env = MagicMock()
+
+        result = runner.run_task("2+2")
+
+    assert result["completed"] is True
+    assert "temperature" not in client.chat.completions.create.call_args.kwargs
diff --git a/tests/test_minimax_model_validation.py b/tests/test_minimax_model_validation.py
new file mode 100644
index 0000000000..a1475d0bd4
--- /dev/null
+++ b/tests/test_minimax_model_validation.py
@@ -0,0 +1,130 @@
+"""Tests for MiniMax model validation via static catalog (issues #12611, #12460, #12399, #12547).
+
+MiniMax and MiniMax-CN providers don't expose /v1/models, so validate_requested_model()
+must validate against the static catalog instead of probing the live API.
+"""
+
+from unittest.mock import patch
+
+import pytest
+
+from hermes_cli.models import validate_requested_model
+
+
+class TestMiniMaxModelValidation:
+    """Test that validate_requested_model handles MiniMax providers correctly."""
+
+    @pytest.fixture(autouse=True)
+    def _isolate_minimax(self):
+        """Ensure MiniMax catalog is used even if a live /v1/models endpoint exists."""
+        # Simulate fetch_api_models returning None (i.e., /v1/models is unreachable),
+        # proving that the catalog path is taken.
+        probe_payload = {
+            "models": None,
+            "probed_url": "https://api.minimax.io/v1/models",
+            "resolved_base_url": "https://api.minimax.io/v1",
+            "suggested_base_url": None,
+            "used_fallback": False,
+        }
+        with patch("hermes_cli.models.fetch_api_models", return_value=None), \
+             patch("hermes_cli.models.probe_api_models", return_value=probe_payload):
+            yield
+
+    # -------------------------------------------------------------------------
+    # Test 1: A known MiniMax model is accepted with recognized=True
+    # -------------------------------------------------------------------------
+    def test_valid_minimax_model_accepted(self):
+        result = validate_requested_model("MiniMax-M2.7", "minimax")
+        assert result["accepted"] is True
+        assert result["persist"] is True
+        assert result["recognized"] is True
+        assert result["message"] is None
+
+    # -------------------------------------------------------------------------
+    # Test 1b: Case-insensitive lookup matches catalog entries
+    # -------------------------------------------------------------------------
+    def test_valid_minimax_model_case_insensitive(self):
+        result = validate_requested_model("minimax-m2.7", "minimax")
+        assert result["accepted"] is True
+        assert result["persist"] is True
+        assert result["recognized"] is True
+        assert result["message"] is None
+
+    def test_valid_minimax_model_uppercase(self):
+        result = validate_requested_model("MINIMAX-M2.7", "minimax")
+        assert result["accepted"] is True
+        assert result["recognized"] is True
+
+    # -------------------------------------------------------------------------
+    # Test 2: A near-match model on minimax-cn triggers a suggestion (not auto-correct)
+    # -------------------------------------------------------------------------
+    def test_near_match_minimax_cn_suggests_similar(self):
+        # "MiniMax-M2.7-highspeed" is somewhat similar to "MiniMax-M2.7" (ratio ~0.71)
+        # but below the 0.9 auto-correct cutoff. It should be accepted with a
+        # recognized=False and a similar-models suggestion (ratio > 0.5).
+        result = validate_requested_model("MiniMax-M2.7-highspeed", "minimax-cn")
+        assert result["accepted"] is True
+        assert result["persist"] is True
+        assert result["recognized"] is False
+        # Should NOT auto-correct (ratio 0.71 < 0.9)
+        assert "corrected_model" not in result
+        # But should suggest similar models (ratio 0.71 > 0.5)
+        assert "MiniMax-M2.7" in result["message"]
+
+    # -------------------------------------------------------------------------
+    # Test 3: A completely unknown model is accepted (not rejected) with a warning
+    # -------------------------------------------------------------------------
+    def test_unknown_minimax_model_accepted_with_warning(self):
+        # "NotARealModel" has very low similarity to any MiniMax model (~0.16).
+        # It should still be accepted (not rejected), with recognized=False and
+        # a note that MiniMax doesn't expose /models.
+        result = validate_requested_model("NotARealModel", "minimax")
+        assert result["accepted"] is True
+        assert result["persist"] is True
+        assert result["recognized"] is False
+        assert "NotARealModel" in result["message"]
+        assert "not found in the MiniMax catalog" in result["message"]
+        assert "MiniMax does not expose a /models endpoint" in result["message"]
+
+    # -------------------------------------------------------------------------
+    # Test 4: Verify catalog path is used (probe_api_models returns None)
+    # -------------------------------------------------------------------------
+    def test_minimax_uses_catalog_not_api_probe(self):
+        """Ensure that when fetch_api_models returns None, the catalog is still checked."""
+        # The _isolate_minimax fixture already patches fetch_api_models to return None.
+        # If we reach the catalog path, MiniMax-M2.5 should be found and recognized.
+        result = validate_requested_model("MiniMax-M2.5", "minimax")
+        assert result["accepted"] is True
+        assert result["recognized"] is True
+        assert result["message"] is None
+
+
+class TestMiniMaxCatalogPathRequired:
+    """Prove the catalog path is necessary: without it, MiniMax would fail.
+
+    These tests demonstrate that when fetch_api_models returns None (simulating
+    the real 404 from MiniMax /v1/models), the openai-codex-style catalog path
+    is the only way to avoid a "Could not reach the API" failure.
+    """
+
+    def test_minimax_without_fix_would_reach_api_probe(self):
+        """Without the catalog block, minimax falls through to fetch_api_models.
+
+        This test documents the before-fix behavior: when the MiniMax block
+        is absent, the code falls through to `api_models = fetch_api_models(...)`
+        which returns None (404), leading to rejection.
+        """
+        probe_payload = {
+            "models": None,
+            "probed_url": "https://api.minimax.io/v1/models",
+            "resolved_base_url": "https://api.minimax.io/v1",
+            "suggested_base_url": None,
+            "used_fallback": False,
+        }
+        with patch("hermes_cli.models.fetch_api_models", return_value=None), \
+             patch("hermes_cli.models.probe_api_models", return_value=probe_payload):
+            # Before fix: this would return accepted=False because api_models is None
+            # After fix: returns accepted=True via catalog path
+            result = validate_requested_model("MiniMax-M2.7", "minimax")
+            # The fix makes this True; without the fix it would be False
+            assert result["accepted"] is True
diff --git a/tests/test_model_tools.py b/tests/test_model_tools.py
index bb8a79ab0b..12654e350f 100644
--- a/tests/test_model_tools.py
+++ b/tests/test_model_tools.py
@@ -72,6 +72,15 @@ class TestHandleFunctionCall:
                 session_id="session-1",
                 tool_call_id="call-1",
             ),
+            call(
+                "transform_tool_result",
+                tool_name="web_search",
+                args={"q": "test"},
+                result='{"ok":true}',
+                task_id="task-1",
+                session_id="session-1",
+                tool_call_id="call-1",
+            ),
         ]
 
 
diff --git a/tests/test_toolsets.py b/tests/test_toolsets.py
index 9a982bb5bf..183f9e514f 100644
--- a/tests/test_toolsets.py
+++ b/tests/test_toolsets.py
@@ -198,12 +198,22 @@ class TestToolsetConsistency:
                 assert inc in TOOLSETS, f"{name} includes unknown toolset '{inc}'"
 
     def test_hermes_platforms_share_core_tools(self):
-        """All hermes-* platform toolsets should have the same tools."""
+        """All hermes-* platform toolsets share the same core tools.
+
+        Platform-specific additions (e.g. ``discord_server`` on
+        hermes-discord, gated on DISCORD_BOT_TOKEN) are allowed on top —
+        the invariant is that the core set is identical across platforms.
+        """
         platforms = ["hermes-cli", "hermes-telegram", "hermes-discord", "hermes-whatsapp", "hermes-slack", "hermes-signal", "hermes-homeassistant"]
         tool_sets = [set(TOOLSETS[p]["tools"]) for p in platforms]
-        # All platform toolsets should be identical
-        for ts in tool_sets[1:]:
-            assert ts == tool_sets[0]
+        # All platforms must contain the shared core; platform-specific
+        # extras are OK (subset check, not equality).
+        core = set.intersection(*tool_sets)
+        for name, ts in zip(platforms, tool_sets):
+            assert core.issubset(ts), f"{name} is missing core tools: {core - ts}"
+        # Sanity: the shared core must be non-trivial (i.e. we didn't
+        # silently let a platform diverge so far that nothing is shared).
+        assert len(core) > 20, f"Suspiciously small shared core: {len(core)} tools"
 
 
 class TestPluginToolsets:
diff --git a/tests/test_trajectory_compressor.py b/tests/test_trajectory_compressor.py
index 682097173a..7978aab4c2 100644
--- a/tests/test_trajectory_compressor.py
+++ b/tests/test_trajectory_compressor.py
@@ -31,7 +31,8 @@ def test_import_loads_env_from_hermes_home(tmp_path, monkeypatch):
     assert os.getenv("OPENROUTER_API_KEY") == "from-hermes-home"
 
 
-def test_generate_summary_custom_client_forces_kimi_temperature():
+def test_generate_summary_kimi_omits_temperature():
+    """Kimi models should have temperature omitted — server manages it."""
     config = CompressionConfig(
         summarization_model="kimi-for-coding",
         temperature=0.3,
@@ -51,7 +52,57 @@ def test_generate_summary_custom_client_forces_kimi_temperature():
     result = compressor._generate_summary("tool output", metrics)
 
     assert result.startswith("[CONTEXT SUMMARY]:")
-    assert compressor.client.chat.completions.create.call_args.kwargs["temperature"] == 0.6
+    assert "temperature" not in compressor.client.chat.completions.create.call_args.kwargs
+
+
+def test_generate_summary_public_moonshot_kimi_k2_5_omits_temperature():
+    """kimi-k2.5 on the public Moonshot API should not get a forced temperature."""
+    config = CompressionConfig(
+        summarization_model="kimi-k2.5",
+        base_url="https://api.moonshot.ai/v1",
+        temperature=0.3,
+        summary_target_tokens=100,
+        max_retries=1,
+    )
+    compressor = TrajectoryCompressor.__new__(TrajectoryCompressor)
+    compressor.config = config
+    compressor.logger = MagicMock()
+    compressor._use_call_llm = False
+    compressor.client = MagicMock()
+    compressor.client.chat.completions.create.return_value = SimpleNamespace(
+        choices=[SimpleNamespace(message=SimpleNamespace(content="[CONTEXT SUMMARY]: summary"))]
+    )
+
+    metrics = TrajectoryMetrics()
+    result = compressor._generate_summary("tool output", metrics)
+
+    assert result.startswith("[CONTEXT SUMMARY]:")
+    assert "temperature" not in compressor.client.chat.completions.create.call_args.kwargs
+
+
+def test_generate_summary_public_moonshot_cn_kimi_k2_5_omits_temperature():
+    """kimi-k2.5 on api.moonshot.cn should not get a forced temperature."""
+    config = CompressionConfig(
+        summarization_model="kimi-k2.5",
+        base_url="https://api.moonshot.cn/v1",
+        temperature=0.3,
+        summary_target_tokens=100,
+        max_retries=1,
+    )
+    compressor = TrajectoryCompressor.__new__(TrajectoryCompressor)
+    compressor.config = config
+    compressor.logger = MagicMock()
+    compressor._use_call_llm = False
+    compressor.client = MagicMock()
+    compressor.client.chat.completions.create.return_value = SimpleNamespace(
+        choices=[SimpleNamespace(message=SimpleNamespace(content="[CONTEXT SUMMARY]: summary"))]
+    )
+
+    metrics = TrajectoryMetrics()
+    result = compressor._generate_summary("tool output", metrics)
+
+    assert result.startswith("[CONTEXT SUMMARY]:")
+    assert "temperature" not in compressor.client.chat.completions.create.call_args.kwargs
 
 
 # ---------------------------------------------------------------------------
diff --git a/tests/test_trajectory_compressor_async.py b/tests/test_trajectory_compressor_async.py
index 7bf5191621..369b980b8f 100644
--- a/tests/test_trajectory_compressor_async.py
+++ b/tests/test_trajectory_compressor_async.py
@@ -117,7 +117,8 @@ class TestSourceLineVerification:
 
 
 @pytest.mark.asyncio
-async def test_generate_summary_async_custom_client_forces_kimi_temperature():
+async def test_generate_summary_async_kimi_omits_temperature():
+    """Kimi models should have temperature omitted — server manages it."""
     from trajectory_compressor import CompressionConfig, TrajectoryCompressor, TrajectoryMetrics
 
     config = CompressionConfig(
@@ -140,4 +141,62 @@ async def test_generate_summary_async_custom_client_forces_kimi_temperature():
     result = await compressor._generate_summary_async("tool output", metrics)
 
     assert result.startswith("[CONTEXT SUMMARY]:")
-    assert async_client.chat.completions.create.call_args.kwargs["temperature"] == 0.6
+    assert "temperature" not in async_client.chat.completions.create.call_args.kwargs
+
+
+@pytest.mark.asyncio
+async def test_generate_summary_async_public_moonshot_kimi_k2_5_omits_temperature():
+    """kimi-k2.5 on the public Moonshot API should not get a forced temperature."""
+    from trajectory_compressor import CompressionConfig, TrajectoryCompressor, TrajectoryMetrics
+
+    config = CompressionConfig(
+        summarization_model="kimi-k2.5",
+        base_url="https://api.moonshot.ai/v1",
+        temperature=0.3,
+        summary_target_tokens=100,
+        max_retries=1,
+    )
+    compressor = TrajectoryCompressor.__new__(TrajectoryCompressor)
+    compressor.config = config
+    compressor.logger = MagicMock()
+    compressor._use_call_llm = False
+    async_client = MagicMock()
+    async_client.chat.completions.create = MagicMock(return_value=SimpleNamespace(
+        choices=[SimpleNamespace(message=SimpleNamespace(content="[CONTEXT SUMMARY]: summary"))]
+    ))
+    compressor._get_async_client = MagicMock(return_value=async_client)
+
+    metrics = TrajectoryMetrics()
+    result = await compressor._generate_summary_async("tool output", metrics)
+
+    assert result.startswith("[CONTEXT SUMMARY]:")
+    assert "temperature" not in async_client.chat.completions.create.call_args.kwargs
+
+
+@pytest.mark.asyncio
+async def test_generate_summary_async_public_moonshot_cn_kimi_k2_5_omits_temperature():
+    """kimi-k2.5 on api.moonshot.cn should not get a forced temperature."""
+    from trajectory_compressor import CompressionConfig, TrajectoryCompressor, TrajectoryMetrics
+
+    config = CompressionConfig(
+        summarization_model="kimi-k2.5",
+        base_url="https://api.moonshot.cn/v1",
+        temperature=0.3,
+        summary_target_tokens=100,
+        max_retries=1,
+    )
+    compressor = TrajectoryCompressor.__new__(TrajectoryCompressor)
+    compressor.config = config
+    compressor.logger = MagicMock()
+    compressor._use_call_llm = False
+    async_client = MagicMock()
+    async_client.chat.completions.create = MagicMock(return_value=SimpleNamespace(
+        choices=[SimpleNamespace(message=SimpleNamespace(content="[CONTEXT SUMMARY]: summary"))]
+    ))
+    compressor._get_async_client = MagicMock(return_value=async_client)
+
+    metrics = TrajectoryMetrics()
+    result = await compressor._generate_summary_async("tool output", metrics)
+
+    assert result.startswith("[CONTEXT SUMMARY]:")
+    assert "temperature" not in async_client.chat.completions.create.call_args.kwargs
diff --git a/tests/test_transform_tool_result_hook.py b/tests/test_transform_tool_result_hook.py
new file mode 100644
index 0000000000..508c0bdc0c
--- /dev/null
+++ b/tests/test_transform_tool_result_hook.py
@@ -0,0 +1,193 @@
+"""Tests for the ``transform_tool_result`` plugin hook wired into
+``model_tools.handle_function_call``.
+
+Mirrors the ``transform_terminal_output`` hook tests from Phase 1 but
+targets the generic tool-result seam that runs for every tool dispatch.
+"""
+
+import json
+import os
+from pathlib import Path
+from unittest.mock import MagicMock
+
+import hermes_cli.plugins as plugins_mod
+import model_tools
+
+
+_UNSET = object()
+
+
+def _run_handle_function_call(
+    monkeypatch,
+    *,
+    tool_name="dummy_tool",
+    tool_args=None,
+    dispatch_result='{"output": "original"}',
+    invoke_hook=_UNSET,
+):
+    """Drive ``handle_function_call`` with a mocked registry dispatch."""
+    from tools.registry import registry
+
+    monkeypatch.setattr(
+        registry, "dispatch",
+        lambda name, args, **kw: dispatch_result,
+    )
+    # Skip unrelated side effects (read-loop tracker).
+    monkeypatch.setattr(model_tools, "_READ_SEARCH_TOOLS", frozenset())
+
+    if invoke_hook is not _UNSET:
+        # Patch the symbol actually imported inside handle_function_call.
+        monkeypatch.setattr("hermes_cli.plugins.invoke_hook", invoke_hook)
+
+    return model_tools.handle_function_call(
+        tool_name,
+        tool_args or {},
+        task_id="t1",
+        session_id="s1",
+        tool_call_id="tc1",
+        skip_pre_tool_call_hook=True,
+    )
+
+
+def test_result_unchanged_when_no_hook_registered(monkeypatch):
+    # Real invoke_hook with no plugins loaded returns [].
+    monkeypatch.setenv("HERMES_HOME", "/tmp/hermes_no_plugins")
+    # Force a fresh plugin manager so no stale plugins pollute state.
+    plugins_mod._plugin_manager = plugins_mod.PluginManager()
+
+    out = _run_handle_function_call(monkeypatch)
+    assert out == '{"output": "original"}'
+
+
+def test_result_unchanged_for_none_hook_return(monkeypatch):
+    out = _run_handle_function_call(
+        monkeypatch,
+        invoke_hook=lambda hook_name, **kw: [None],
+    )
+    assert out == '{"output": "original"}'
+
+
+def test_result_ignores_non_string_hook_returns(monkeypatch):
+    out = _run_handle_function_call(
+        monkeypatch,
+        invoke_hook=lambda hook_name, **kw: [{"bad": True}, 123, ["nope"]],
+    )
+    assert out == '{"output": "original"}'
+
+
+def test_first_valid_string_return_replaces_result(monkeypatch):
+    out = _run_handle_function_call(
+        monkeypatch,
+        invoke_hook=lambda hook_name, **kw: [None, {"x": 1}, "first", "second"],
+    )
+    assert out == "first"
+
+
+def test_hook_receives_expected_kwargs(monkeypatch):
+    captured = {}
+
+    def _hook(hook_name, **kwargs):
+        if hook_name == "transform_tool_result":
+            captured.update(kwargs)
+        return []
+
+    out = _run_handle_function_call(
+        monkeypatch,
+        tool_name="my_tool",
+        tool_args={"a": 1, "b": "x"},
+        dispatch_result='{"ok": true}',
+        invoke_hook=_hook,
+    )
+    assert out == '{"ok": true}'
+    assert captured["tool_name"] == "my_tool"
+    assert captured["args"] == {"a": 1, "b": "x"}
+    assert captured["result"] == '{"ok": true}'
+    assert captured["task_id"] == "t1"
+    assert captured["session_id"] == "s1"
+    assert captured["tool_call_id"] == "tc1"
+
+
+def test_hook_exception_falls_back_to_original(monkeypatch):
+    def _raise(*_a, **_kw):
+        raise RuntimeError("boom")
+
+    out = _run_handle_function_call(
+        monkeypatch,
+        invoke_hook=_raise,
+    )
+    assert out == '{"output": "original"}'
+
+
+def test_post_tool_call_remains_observational(monkeypatch):
+    """post_tool_call return values must NOT replace the result."""
+    def _hook(hook_name, **kw):
+        if hook_name == "post_tool_call":
+            # Observers returning a string must be ignored.
+            return ["observer return should be ignored"]
+        return []
+
+    out = _run_handle_function_call(
+        monkeypatch,
+        invoke_hook=_hook,
+    )
+    assert out == '{"output": "original"}'
+
+
+def test_transform_tool_result_runs_after_post_tool_call(monkeypatch):
+    """post_tool_call sees ORIGINAL result; transform_tool_result sees same and may replace."""
+    observed = []
+
+    def _hook(hook_name, **kw):
+        if hook_name == "post_tool_call":
+            observed.append(("post_tool_call", kw["result"]))
+            return []
+        if hook_name == "transform_tool_result":
+            observed.append(("transform_tool_result", kw["result"]))
+            return ["rewritten"]
+        return []
+
+    out = _run_handle_function_call(
+        monkeypatch,
+        dispatch_result='{"raw": "value"}',
+        invoke_hook=_hook,
+    )
+    assert out == "rewritten"
+    # Both hooks saw the ORIGINAL (untransformed) result.
+    assert observed == [
+        ("post_tool_call", '{"raw": "value"}'),
+        ("transform_tool_result", '{"raw": "value"}'),
+    ]
+
+
+def test_transform_tool_result_integration_with_real_plugin(monkeypatch, tmp_path):
+    """End-to-end: load a real plugin from HERMES_HOME and verify it rewrites results."""
+    import yaml
+
+    hermes_home = Path(os.environ["HERMES_HOME"])
+    plugins_dir = hermes_home / "plugins"
+    plugin_dir = plugins_dir / "transform_result_canon"
+    plugin_dir.mkdir(parents=True)
+    (plugin_dir / "plugin.yaml").write_text("name: transform_result_canon\n", encoding="utf-8")
+    (plugin_dir / "__init__.py").write_text(
+        "def register(ctx):\n"
+        '    ctx.register_hook("transform_tool_result", '
+        'lambda **kw: f\'CANON[{kw["tool_name"]}]\' + kw["result"])\n',
+        encoding="utf-8",
+    )
+    # Plugins are opt-in — must be listed in plugins.enabled to load.
+    cfg_path = hermes_home / "config.yaml"
+    cfg_path.write_text(
+        yaml.safe_dump({"plugins": {"enabled": ["transform_result_canon"]}}),
+        encoding="utf-8",
+    )
+
+    # Force a fresh plugin manager so the new config is picked up.
+    plugins_mod._plugin_manager = plugins_mod.PluginManager()
+    plugins_mod.discover_plugins()
+
+    out = _run_handle_function_call(
+        monkeypatch,
+        tool_name="some_tool",
+        dispatch_result='{"payload": 42}',
+    )
+    assert out == 'CANON[some_tool]{"payload": 42}'
diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py
index 35bc3f449b..7a7f632844 100644
--- a/tests/test_tui_gateway_server.py
+++ b/tests/test_tui_gateway_server.py
@@ -1,4 +1,5 @@
 import json
+import os
 import sys
 import threading
 import time
@@ -230,6 +231,48 @@ def test_config_set_model_global_persists(monkeypatch):
     assert saved["model"]["base_url"] == "https://api.anthropic.com"
 
 
+def test_config_set_model_syncs_inference_provider_env(monkeypatch):
+    """After an explicit provider switch, HERMES_INFERENCE_PROVIDER must
+    reflect the user's choice so ambient re-resolution (credential pool
+    refresh, aux clients) picks up the new provider instead of the original
+    one persisted in config or shell env.
+
+    Regression: a TUI user switched openrouter → anthropic and the TUI kept
+    trying openrouter because the env-var-backed resolvers still saw the old
+    provider.
+    """
+    class _Agent:
+        provider = "openrouter"
+        model = "old/model"
+        base_url = ""
+        api_key = "sk-or"
+
+        def switch_model(self, **_kwargs):
+            return None
+
+    result = types.SimpleNamespace(
+        success=True,
+        new_model="claude-sonnet-4.6",
+        target_provider="anthropic",
+        api_key="sk-ant",
+        base_url="https://api.anthropic.com",
+        api_mode="anthropic_messages",
+        warning_message="",
+    )
+
+    server._sessions["sid"] = _session(agent=_Agent())
+    monkeypatch.setenv("HERMES_INFERENCE_PROVIDER", "openrouter")
+    monkeypatch.setattr("hermes_cli.model_switch.switch_model", lambda **_kwargs: result)
+    monkeypatch.setattr(server, "_restart_slash_worker", lambda session: None)
+    monkeypatch.setattr(server, "_emit", lambda *args, **kwargs: None)
+
+    server.handle_request(
+        {"id": "1", "method": "config.set", "params": {"session_id": "sid", "key": "model", "value": "claude-sonnet-4.6 --provider anthropic"}}
+    )
+
+    assert os.environ["HERMES_INFERENCE_PROVIDER"] == "anthropic"
+
+
 def test_config_set_personality_rejects_unknown_name(monkeypatch):
     monkeypatch.setattr(server, "_available_personalities", lambda cfg=None: {"helpful": "You are helpful."})
     resp = server.handle_request(
@@ -350,6 +393,11 @@ def test_prompt_submit_expands_context_refs(monkeypatch):
 def test_image_attach_appends_local_image(monkeypatch):
     fake_cli = types.ModuleType("cli")
     fake_cli._IMAGE_EXTENSIONS = {".png"}
+    fake_cli._detect_file_drop = lambda raw: {
+        "path": Path("/tmp/cat.png"),
+        "is_image": True,
+        "remainder": "",
+    }
     fake_cli._split_path_input = lambda raw: (raw, "")
     fake_cli._resolve_attachment_path = lambda raw: Path("/tmp/cat.png")
 
@@ -363,6 +411,31 @@ def test_image_attach_appends_local_image(monkeypatch):
     assert len(server._sessions["sid"]["attached_images"]) == 1
 
 
+def test_image_attach_accepts_unquoted_screenshot_path_with_spaces(monkeypatch):
+    screenshot = Path("/tmp/Screenshot 2026-04-21 at 1.04.43 PM.png")
+    fake_cli = types.ModuleType("cli")
+    fake_cli._IMAGE_EXTENSIONS = {".png"}
+    fake_cli._detect_file_drop = lambda raw: {
+        "path": screenshot,
+        "is_image": True,
+        "remainder": "",
+    }
+    fake_cli._split_path_input = lambda raw: ("/tmp/Screenshot", "2026-04-21 at 1.04.43 PM.png")
+    fake_cli._resolve_attachment_path = lambda raw: None
+
+    server._sessions["sid"] = _session()
+    monkeypatch.setitem(sys.modules, "cli", fake_cli)
+
+    resp = server.handle_request(
+        {"id": "1", "method": "image.attach", "params": {"session_id": "sid", "path": str(screenshot)}}
+    )
+
+    assert resp["result"]["attached"] is True
+    assert resp["result"]["path"] == str(screenshot)
+    assert resp["result"]["remainder"] == ""
+    assert len(server._sessions["sid"]["attached_images"]) == 1
+
+
 def test_commands_catalog_surfaces_quick_commands(monkeypatch):
     monkeypatch.setattr(server, "_load_cfg", lambda: {"quick_commands": {
         "build": {"type": "exec", "command": "npm run build"},
@@ -546,3 +619,641 @@ def test_session_info_includes_mcp_servers(monkeypatch):
 
     assert info["mcp_servers"] == fake_status
 
+
+# ---------------------------------------------------------------------------
+# History-mutating commands must reject while session.running is True.
+# Without these guards, prompt.submit's post-run history write either
+# clobbers the mutation (version matches) or silently drops the agent's
+# output (version mismatch) — both produce UI<->backend state desync.
+# ---------------------------------------------------------------------------
+
+
+def test_session_undo_rejects_while_running():
+    """Fix for TUI silent-drop #1: /undo must not mutate history
+    while the agent is mid-turn — would either clobber the undo or
+    cause prompt.submit to silently drop the agent's response."""
+    server._sessions["sid"] = _session(running=True, history=[
+        {"role": "user", "content": "hi"},
+        {"role": "assistant", "content": "hello"},
+    ])
+    try:
+        resp = server.handle_request(
+            {"id": "1", "method": "session.undo", "params": {"session_id": "sid"}}
+        )
+        assert resp.get("error"), "session.undo should reject while running"
+        assert resp["error"]["code"] == 4009
+        assert "session busy" in resp["error"]["message"]
+        # History must be unchanged
+        assert len(server._sessions["sid"]["history"]) == 2
+    finally:
+        server._sessions.pop("sid", None)
+
+
+def test_session_undo_allowed_when_idle():
+    """Regression guard: when not running, /undo still works."""
+    server._sessions["sid"] = _session(running=False, history=[
+        {"role": "user", "content": "hi"},
+        {"role": "assistant", "content": "hello"},
+    ])
+    try:
+        resp = server.handle_request(
+            {"id": "1", "method": "session.undo", "params": {"session_id": "sid"}}
+        )
+        assert resp.get("result"), f"got error: {resp.get('error')}"
+        assert resp["result"]["removed"] == 2
+        assert server._sessions["sid"]["history"] == []
+    finally:
+        server._sessions.pop("sid", None)
+
+
+def test_session_compress_rejects_while_running(monkeypatch):
+    server._sessions["sid"] = _session(running=True)
+    try:
+        resp = server.handle_request(
+            {"id": "1", "method": "session.compress", "params": {"session_id": "sid"}}
+        )
+        assert resp.get("error")
+        assert resp["error"]["code"] == 4009
+    finally:
+        server._sessions.pop("sid", None)
+
+
+def test_rollback_restore_rejects_full_history_while_running(monkeypatch):
+    """Full-history rollback must reject; file-scoped rollback still allowed."""
+    server._sessions["sid"] = _session(running=True)
+    try:
+        resp = server.handle_request(
+            {"id": "1", "method": "rollback.restore", "params": {"session_id": "sid", "hash": "abc"}}
+        )
+        assert resp.get("error"), "full-history rollback should reject while running"
+        assert resp["error"]["code"] == 4009
+    finally:
+        server._sessions.pop("sid", None)
+
+
+def test_prompt_submit_history_version_mismatch_surfaces_warning(monkeypatch):
+    """Fix for TUI silent-drop #2: the defensive backstop at prompt.submit
+    must attach a 'warning' to message.complete when history was
+    mutated externally during the turn (instead of silently dropping
+    the agent's output)."""
+    # Agent bumps history_version itself mid-run to simulate an external
+    # mutation slipping past the guards.
+    session_ref = {"s": None}
+
+    class _RacyAgent:
+        def run_conversation(self, prompt, conversation_history=None, stream_callback=None):
+            # Simulate: something external bumped history_version
+            # while we were running.
+            with session_ref["s"]["history_lock"]:
+                session_ref["s"]["history_version"] += 1
+            return {"final_response": "agent reply", "messages": [{"role": "assistant", "content": "agent reply"}]}
+
+    class _ImmediateThread:
+        def __init__(self, target=None, daemon=None):
+            self._target = target
+
+        def start(self):
+            self._target()
+
+    server._sessions["sid"] = _session(agent=_RacyAgent())
+    session_ref["s"] = server._sessions["sid"]
+    emits: list[tuple] = []
+    try:
+        monkeypatch.setattr(server.threading, "Thread", _ImmediateThread)
+        monkeypatch.setattr(server, "_get_usage", lambda _a: {})
+        monkeypatch.setattr(server, "render_message", lambda _t, _c: "")
+        monkeypatch.setattr(server, "_emit", lambda *a: emits.append(a))
+
+        resp = server.handle_request(
+            {"id": "1", "method": "prompt.submit", "params": {"session_id": "sid", "text": "hi"}}
+        )
+        assert resp.get("result"), f"got error: {resp.get('error')}"
+
+        # History should NOT contain the agent's output (version mismatch)
+        assert server._sessions["sid"]["history"] == []
+
+        # message.complete must carry a 'warning' so the UI / operator
+        # knows the output was not persisted.
+        complete_calls = [a for a in emits if a[0] == "message.complete"]
+        assert len(complete_calls) == 1
+        _, _, payload = complete_calls[0]
+        assert "warning" in payload, (
+            "message.complete must include a 'warning' field on "
+            "history_version mismatch — otherwise the UI silently "
+            "shows output that was never persisted"
+        )
+        assert "not saved" in payload["warning"].lower() or "changed" in payload["warning"].lower()
+    finally:
+        server._sessions.pop("sid", None)
+
+
+def test_prompt_submit_history_version_match_persists_normally(monkeypatch):
+    """Regression guard: the backstop does not affect the happy path."""
+    class _Agent:
+        def run_conversation(self, prompt, conversation_history=None, stream_callback=None):
+            return {"final_response": "reply", "messages": [{"role": "assistant", "content": "reply"}]}
+
+    class _ImmediateThread:
+        def __init__(self, target=None, daemon=None):
+            self._target = target
+
+        def start(self):
+            self._target()
+
+    server._sessions["sid"] = _session(agent=_Agent())
+    emits: list[tuple] = []
+    try:
+        monkeypatch.setattr(server.threading, "Thread", _ImmediateThread)
+        monkeypatch.setattr(server, "_get_usage", lambda _a: {})
+        monkeypatch.setattr(server, "render_message", lambda _t, _c: "")
+        monkeypatch.setattr(server, "_emit", lambda *a: emits.append(a))
+
+        resp = server.handle_request(
+            {"id": "1", "method": "prompt.submit", "params": {"session_id": "sid", "text": "hi"}}
+        )
+        assert resp.get("result")
+
+        # History was written
+        assert server._sessions["sid"]["history"] == [{"role": "assistant", "content": "reply"}]
+        assert server._sessions["sid"]["history_version"] == 1
+
+        # No warning should be attached
+        complete_calls = [a for a in emits if a[0] == "message.complete"]
+        assert len(complete_calls) == 1
+        _, _, payload = complete_calls[0]
+        assert "warning" not in payload
+    finally:
+        server._sessions.pop("sid", None)
+
+
+# ---------------------------------------------------------------------------
+# session.interrupt must only cancel pending prompts owned by the calling
+# session — it must not blast-resolve clarify/sudo/secret prompts on
+# unrelated sessions sharing the same tui_gateway process.  Without
+# session scoping the other sessions' prompts silently resolve to empty
+# strings, unblocking their agent threads as if the user cancelled.
+# ---------------------------------------------------------------------------
+
+
+def test_interrupt_only_clears_own_session_pending():
+    """session.interrupt on session A must NOT release pending prompts
+    that belong to session B."""
+    import types
+
+    session_a = _session()
+    session_a["agent"] = types.SimpleNamespace(interrupt=lambda: None)
+    session_b = _session()
+    session_b["agent"] = types.SimpleNamespace(interrupt=lambda: None)
+    server._sessions["sid_a"] = session_a
+    server._sessions["sid_b"] = session_b
+
+    try:
+        # Simulate pending prompts on both sessions (what _block creates
+        # while a clarify/sudo/secret request is outstanding).
+        ev_a = threading.Event()
+        ev_b = threading.Event()
+        server._pending["rid-a"] = ("sid_a", ev_a)
+        server._pending["rid-b"] = ("sid_b", ev_b)
+        server._answers.clear()
+
+        # Interrupt session A.
+        resp = server.handle_request(
+            {"id": "1", "method": "session.interrupt", "params": {"session_id": "sid_a"}}
+        )
+        assert resp.get("result"), f"got error: {resp.get('error')}"
+
+        # Session A's pending must be released to empty.
+        assert ev_a.is_set(), "sid_a pending Event should be set after interrupt"
+        assert server._answers.get("rid-a") == ""
+
+        # Session B's pending MUST remain untouched — no cross-session blast.
+        assert not ev_b.is_set(), (
+            "CRITICAL: session.interrupt on sid_a released a pending prompt "
+            "belonging to sid_b — other sessions' clarify/sudo/secret "
+            "prompts are being silently cancelled"
+        )
+        assert "rid-b" not in server._answers
+    finally:
+        server._sessions.pop("sid_a", None)
+        server._sessions.pop("sid_b", None)
+        server._pending.pop("rid-a", None)
+        server._pending.pop("rid-b", None)
+        server._answers.pop("rid-a", None)
+        server._answers.pop("rid-b", None)
+
+
+def test_interrupt_clears_multiple_own_pending():
+    """When a single session has multiple pending prompts (uncommon but
+    possible via nested tool calls), interrupt must release all of them."""
+    import types
+
+    sess = _session()
+    sess["agent"] = types.SimpleNamespace(interrupt=lambda: None)
+    server._sessions["sid"] = sess
+
+    try:
+        ev1, ev2 = threading.Event(), threading.Event()
+        server._pending["r1"] = ("sid", ev1)
+        server._pending["r2"] = ("sid", ev2)
+
+        resp = server.handle_request(
+            {"id": "1", "method": "session.interrupt", "params": {"session_id": "sid"}}
+        )
+        assert resp.get("result")
+        assert ev1.is_set() and ev2.is_set()
+        assert server._answers.get("r1") == "" and server._answers.get("r2") == ""
+    finally:
+        server._sessions.pop("sid", None)
+        for key in ("r1", "r2"):
+            server._pending.pop(key, None)
+            server._answers.pop(key, None)
+
+
+def test_clear_pending_without_sid_clears_all():
+    """_clear_pending(None) is the shutdown path — must still release
+    every pending prompt regardless of owning session."""
+    ev1, ev2, ev3 = threading.Event(), threading.Event(), threading.Event()
+    server._pending["a"] = ("sid_x", ev1)
+    server._pending["b"] = ("sid_y", ev2)
+    server._pending["c"] = ("sid_z", ev3)
+    try:
+        server._clear_pending(None)
+        assert ev1.is_set() and ev2.is_set() and ev3.is_set()
+    finally:
+        for key in ("a", "b", "c"):
+            server._pending.pop(key, None)
+            server._answers.pop(key, None)
+
+
+def test_respond_unpacks_sid_tuple_correctly():
+    """After the (sid, Event) tuple change, _respond must still work."""
+    ev = threading.Event()
+    server._pending["rid-x"] = ("sid_x", ev)
+    try:
+        resp = server.handle_request(
+            {"id": "1", "method": "clarify.respond",
+             "params": {"request_id": "rid-x", "answer": "the answer"}}
+        )
+        assert resp.get("result")
+        assert ev.is_set()
+        assert server._answers.get("rid-x") == "the answer"
+    finally:
+        server._pending.pop("rid-x", None)
+        server._answers.pop("rid-x", None)
+
+
+
+# ---------------------------------------------------------------------------
+# /model switch and other agent-mutating commands must reject while the
+# session is running.  agent.switch_model() mutates self.model, self.provider,
+# self.base_url, self.client etc. in place — the worker thread running
+# agent.run_conversation is reading those on every iteration.  Same class of
+# bug as the session.undo / session.compress mid-run silent-drop; same fix
+# pattern: reject with 4009 while running.
+# ---------------------------------------------------------------------------
+
+
+def test_config_set_model_rejects_while_running(monkeypatch):
+    """/model via config.set must reject during an in-flight turn."""
+    seen = {"called": False}
+
+    def _fake_apply(sid, session, raw):
+        seen["called"] = True
+        return {"value": raw, "warning": ""}
+
+    monkeypatch.setattr(server, "_apply_model_switch", _fake_apply)
+
+    server._sessions["sid"] = _session(running=True)
+    try:
+        resp = server.handle_request({
+            "id": "1", "method": "config.set",
+            "params": {"session_id": "sid", "key": "model", "value": "anthropic/claude-sonnet-4.6"},
+        })
+        assert resp.get("error")
+        assert resp["error"]["code"] == 4009
+        assert "session busy" in resp["error"]["message"]
+        assert not seen["called"], (
+            "_apply_model_switch was called mid-turn — would race with "
+            "the worker thread reading agent.model / agent.client"
+        )
+    finally:
+        server._sessions.pop("sid", None)
+
+
+def test_config_set_model_allowed_when_idle(monkeypatch):
+    """Regression guard: idle sessions can still switch models."""
+    seen = {"called": False}
+
+    def _fake_apply(sid, session, raw):
+        seen["called"] = True
+        return {"value": "newmodel", "warning": ""}
+
+    monkeypatch.setattr(server, "_apply_model_switch", _fake_apply)
+
+    server._sessions["sid"] = _session(running=False)
+    try:
+        resp = server.handle_request({
+            "id": "1", "method": "config.set",
+            "params": {"session_id": "sid", "key": "model", "value": "newmodel"},
+        })
+        assert resp.get("result")
+        assert resp["result"]["value"] == "newmodel"
+        assert seen["called"]
+    finally:
+        server._sessions.pop("sid", None)
+
+
+def test_mirror_slash_side_effects_rejects_mutating_commands_while_running(monkeypatch):
+    """Slash worker passthrough (e.g. /model, /personality, /prompt,
+    /compress) must reject during an in-flight turn.  Same race as
+    config.set — mutates live agent state while run_conversation is
+    reading it."""
+    import types
+
+    applied = {"model": False, "compress": False}
+
+    def _fake_apply_model(sid, session, arg):
+        applied["model"] = True
+        return {"value": arg, "warning": ""}
+
+    def _fake_compress(session, focus):
+        applied["compress"] = True
+        return (0, {})
+
+    monkeypatch.setattr(server, "_apply_model_switch", _fake_apply_model)
+    monkeypatch.setattr(server, "_compress_session_history", _fake_compress)
+
+    session = _session(running=True)
+    session["agent"] = types.SimpleNamespace(model="x")
+
+    for cmd, expected_name in [
+        ("/model new/model", "model"),
+        ("/personality default", "personality"),
+        ("/prompt", "prompt"),
+        ("/compress", "compress"),
+    ]:
+        warning = server._mirror_slash_side_effects("sid", session, cmd)
+        assert "session busy" in warning, (
+            f"{cmd} should have returned busy warning, got: {warning!r}"
+        )
+        assert f"/{expected_name}" in warning
+
+    # None of the mutating side-effect helpers should have fired.
+    assert not applied["model"], "model switch fired despite running session"
+    assert not applied["compress"], "compress fired despite running session"
+
+
+def test_mirror_slash_side_effects_allowed_when_idle(monkeypatch):
+    """Regression guard: idle session still runs the side effects."""
+    import types
+
+    applied = {"model": False}
+
+    def _fake_apply_model(sid, session, arg):
+        applied["model"] = True
+        return {"value": arg, "warning": ""}
+
+    monkeypatch.setattr(server, "_apply_model_switch", _fake_apply_model)
+
+    session = _session(running=False)
+    session["agent"] = types.SimpleNamespace(model="x")
+
+    warning = server._mirror_slash_side_effects("sid", session, "/model foo")
+    # Should NOT contain "session busy" — the switch went through.
+    assert "session busy" not in warning
+    assert applied["model"]
+
+
+# ---------------------------------------------------------------------------
+# session.create / session.close race: fast /new churn must not orphan the
+# slash_worker subprocess or the global approval-notify registration.
+# ---------------------------------------------------------------------------
+
+
+def test_session_create_close_race_does_not_orphan_worker(monkeypatch):
+    """Regression guard: if session.close runs while session.create's
+    _build thread is still constructing the agent, the build thread
+    must detect the orphan and clean up the slash_worker + notify
+    registration it's about to install.  Without the cleanup those
+    resources leak — the subprocess stays alive until atexit and the
+    notify callback lingers in the global registry."""
+    import threading
+
+    closed_workers: list[str] = []
+    unregistered_keys: list[str] = []
+
+    class _FakeWorker:
+        def __init__(self, key, model):
+            self.key = key
+            self._closed = False
+
+        def close(self):
+            self._closed = True
+            closed_workers.append(self.key)
+
+    class _FakeAgent:
+        def __init__(self):
+            self.model = "x"
+            self.provider = "openrouter"
+            self.base_url = ""
+            self.api_key = ""
+
+    # Make _build block until we release it — simulates slow agent init
+    release_build = threading.Event()
+
+    def _slow_make_agent(sid, key):
+        release_build.wait(timeout=3.0)
+        return _FakeAgent()
+
+    # Stub everything _build touches
+    monkeypatch.setattr(server, "_make_agent", _slow_make_agent)
+    monkeypatch.setattr(server, "_SlashWorker", _FakeWorker)
+    monkeypatch.setattr(server, "_get_db", lambda: types.SimpleNamespace(create_session=lambda *a, **kw: None))
+    monkeypatch.setattr(server, "_session_info", lambda _a: {"model": "x"})
+    monkeypatch.setattr(server, "_probe_credentials", lambda _a: None)
+    monkeypatch.setattr(server, "_wire_callbacks", lambda _sid: None)
+    monkeypatch.setattr(server, "_emit", lambda *a, **kw: None)
+
+    # Shim register/unregister to observe leaks
+    import tools.approval as _approval
+    monkeypatch.setattr(_approval, "register_gateway_notify",
+                        lambda key, cb: None)
+    monkeypatch.setattr(_approval, "unregister_gateway_notify",
+                        lambda key: unregistered_keys.append(key))
+    monkeypatch.setattr(_approval, "load_permanent_allowlist", lambda: None)
+
+    # Start: session.create spawns _build thread, returns synchronously
+    resp = server.handle_request({
+        "id": "1", "method": "session.create", "params": {"cols": 80},
+    })
+    assert resp.get("result"), f"got error: {resp.get('error')}"
+    sid = resp["result"]["session_id"]
+
+    # Build thread is blocked in _slow_make_agent.  Close the session
+    # NOW — this pops _sessions[sid] before _build can install the
+    # worker/notify.
+    close_resp = server.handle_request({
+        "id": "2", "method": "session.close", "params": {"session_id": sid},
+    })
+    assert close_resp.get("result", {}).get("closed") is True
+
+    # At this point session.close saw slash_worker=None (not yet
+    # installed) so it didn't close anything.  Release the build thread
+    # and let it finish — it should detect the orphan and clean up the
+    # worker it just allocated + unregister the notify.
+    release_build.set()
+
+    # Give the build thread a moment to run through its finally.
+    for _ in range(100):
+        if closed_workers:
+            break
+        import time
+        time.sleep(0.02)
+
+    assert len(closed_workers) == 1, (
+        f"orphan worker was not cleaned up — closed_workers={closed_workers}"
+    )
+    # Notify may be unregistered by both session.close (unconditional)
+    # and the orphan-cleanup path; the key guarantee is that the build
+    # thread does at least one unregister call (any prior close
+    # already popped the callback; the duplicate is a no-op).
+    assert len(unregistered_keys) >= 1, (
+        f"orphan notify registration was not unregistered — "
+        f"unregistered_keys={unregistered_keys}"
+    )
+
+
+def test_session_create_no_race_keeps_worker_alive(monkeypatch):
+    """Regression guard: when session.close does NOT race, the build
+    thread must install the worker + notify normally and leave them
+    alone (no over-eager cleanup)."""
+    closed_workers: list[str] = []
+    unregistered_keys: list[str] = []
+
+    class _FakeWorker:
+        def __init__(self, key, model):
+            self.key = key
+
+        def close(self):
+            closed_workers.append(self.key)
+
+    class _FakeAgent:
+        def __init__(self):
+            self.model = "x"
+            self.provider = "openrouter"
+            self.base_url = ""
+            self.api_key = ""
+
+    monkeypatch.setattr(server, "_make_agent", lambda sid, key: _FakeAgent())
+    monkeypatch.setattr(server, "_SlashWorker", _FakeWorker)
+    monkeypatch.setattr(server, "_get_db", lambda: types.SimpleNamespace(create_session=lambda *a, **kw: None))
+    monkeypatch.setattr(server, "_session_info", lambda _a: {"model": "x"})
+    monkeypatch.setattr(server, "_probe_credentials", lambda _a: None)
+    monkeypatch.setattr(server, "_wire_callbacks", lambda _sid: None)
+    monkeypatch.setattr(server, "_emit", lambda *a, **kw: None)
+
+    import tools.approval as _approval
+    monkeypatch.setattr(_approval, "register_gateway_notify", lambda key, cb: None)
+    monkeypatch.setattr(_approval, "unregister_gateway_notify",
+                        lambda key: unregistered_keys.append(key))
+    monkeypatch.setattr(_approval, "load_permanent_allowlist", lambda: None)
+
+    resp = server.handle_request({
+        "id": "1", "method": "session.create", "params": {"cols": 80},
+    })
+    sid = resp["result"]["session_id"]
+
+    # Wait for the build to finish (ready event inside session dict).
+    session = server._sessions[sid]
+    session["agent_ready"].wait(timeout=2.0)
+
+    # Build finished without a close race — nothing should have been
+    # cleaned up by the orphan check.
+    assert closed_workers == [], (
+        f"build thread closed its own worker despite no race: {closed_workers}"
+    )
+    assert unregistered_keys == [], (
+        f"build thread unregistered its own notify despite no race: {unregistered_keys}"
+    )
+
+    # Session should have the live worker installed.
+    assert session.get("slash_worker") is not None
+
+    # Cleanup
+    server._sessions.pop(sid, None)
+
+
+# --------------------------------------------------------------------------
+# model.options — curated-list parity with `hermes model` and classic /model
+# --------------------------------------------------------------------------
+
+
+def test_model_options_does_not_overwrite_curated_models(monkeypatch):
+    """The TUI model.options handler must surface the same curated model
+    list as `hermes model` and the classic CLI /model picker.
+
+    Regression: earlier versions of this handler unconditionally replaced
+    each provider's curated ``models`` field with ``provider_model_ids()``
+    (live /models catalog).  That pulled in hundreds of non-agentic models
+    for providers like Nous whose /models endpoint returns image/video
+    generators, rerankers, embeddings, and TTS models alongside chat models.
+    """
+    curated_providers = [
+        {
+            "slug": "nous",
+            "name": "Nous",
+            "models": ["moonshotai/kimi-k2.5", "anthropic/claude-opus-4.7"],
+            "total_models": 30,
+            "source": "built-in",
+            "is_current": False,
+            "is_user_defined": False,
+        },
+    ]
+
+    monkeypatch.setattr(
+        server,
+        "_load_cfg",
+        lambda: {"providers": {}, "custom_providers": []},
+    )
+
+    with patch(
+        "hermes_cli.model_switch.list_authenticated_providers",
+        return_value=curated_providers,
+    ) as listing:
+        # If provider_model_ids gets called at all, the handler is still
+        # overwriting curated with live — that's the regression we're
+        # guarding against.
+        with patch("hermes_cli.models.provider_model_ids") as live_fetch:
+            resp = server._methods["model.options"](99, {"session_id": ""})
+
+    assert "result" in resp, resp
+    providers = resp["result"]["providers"]
+    nous = next((p for p in providers if p.get("slug") == "nous"), None)
+    assert nous is not None
+    assert nous["models"] == [
+        "moonshotai/kimi-k2.5",
+        "anthropic/claude-opus-4.7",
+    ]
+    assert nous["total_models"] == 30
+    # Handler must not consult the live catalog — curated is the truth.
+    live_fetch.assert_not_called()
+    # list_authenticated_providers is the single source.
+    assert listing.call_count == 1
+
+
+def test_model_options_propagates_list_exception(monkeypatch):
+    """If list_authenticated_providers itself raises, surface as an RPC
+    error rather than swallowing to a blank picker."""
+    monkeypatch.setattr(
+        server,
+        "_load_cfg",
+        lambda: {"providers": {}, "custom_providers": []},
+    )
+    with patch(
+        "hermes_cli.model_switch.list_authenticated_providers",
+        side_effect=RuntimeError("catalog blew up"),
+    ):
+        resp = server._methods["model.options"](77, {"session_id": ""})
+    assert "error" in resp
+    assert resp["error"]["code"] == 5033
+    assert "catalog blew up" in resp["error"]["message"]
diff --git a/tests/tools/test_browser_camofox.py b/tests/tools/test_browser_camofox.py
index 81d69967de..8cf24bdafd 100644
--- a/tests/tools/test_browser_camofox.py
+++ b/tests/tools/test_browser_camofox.py
@@ -260,6 +260,72 @@ class TestCamofoxGetImages:
         assert result["images"][0]["src"] == "https://x.com/img.png"
 
 
+class TestCamofoxVisionConfig:
+    @patch("tools.browser_camofox.requests.post")
+    @patch("tools.browser_camofox._get")
+    @patch("tools.browser_camofox._get_raw")
+    def test_camofox_vision_uses_configured_temperature_and_timeout(self, mock_get_raw, mock_get, mock_post, monkeypatch):
+        monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
+        mock_post.return_value = _mock_response(json_data={"tabId": "tab11", "url": "https://x.com"})
+        camofox_navigate("https://x.com", task_id="t11")
+
+        snapshot_text = '- button "Submit"\n'
+        raw_resp = MagicMock()
+        raw_resp.content = b"fakepng"
+        mock_get_raw.return_value = raw_resp
+        mock_get.return_value = {"snapshot": snapshot_text}
+
+        mock_response = MagicMock()
+        mock_choice = MagicMock()
+        mock_choice.message.content = "Camofox screenshot analysis"
+        mock_response.choices = [mock_choice]
+
+        with (
+            patch("tools.browser_camofox.open", create=True) as mock_open,
+            patch("agent.auxiliary_client.call_llm", return_value=mock_response) as mock_llm,
+            patch("hermes_cli.config.load_config", return_value={"auxiliary": {"vision": {"temperature": 1, "timeout": 45}}}),
+        ):
+            mock_open.return_value.__enter__.return_value.read.return_value = b"fakepng"
+            result = json.loads(camofox_vision("what is on the page?", annotate=True, task_id="t11"))
+
+        assert result["success"] is True
+        assert result["analysis"] == "Camofox screenshot analysis"
+        assert mock_llm.call_args.kwargs["temperature"] == 1.0
+        assert mock_llm.call_args.kwargs["timeout"] == 45.0
+
+    @patch("tools.browser_camofox.requests.post")
+    @patch("tools.browser_camofox._get")
+    @patch("tools.browser_camofox._get_raw")
+    def test_camofox_vision_defaults_temperature_when_config_omits_it(self, mock_get_raw, mock_get, mock_post, monkeypatch):
+        monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
+        mock_post.return_value = _mock_response(json_data={"tabId": "tab12", "url": "https://x.com"})
+        camofox_navigate("https://x.com", task_id="t12")
+
+        snapshot_text = '- button "Submit"\n'
+        raw_resp = MagicMock()
+        raw_resp.content = b"fakepng"
+        mock_get_raw.return_value = raw_resp
+        mock_get.return_value = {"snapshot": snapshot_text}
+
+        mock_response = MagicMock()
+        mock_choice = MagicMock()
+        mock_choice.message.content = "Default camofox screenshot analysis"
+        mock_response.choices = [mock_choice]
+
+        with (
+            patch("tools.browser_camofox.open", create=True) as mock_open,
+            patch("agent.auxiliary_client.call_llm", return_value=mock_response) as mock_llm,
+            patch("hermes_cli.config.load_config", return_value={"auxiliary": {"vision": {}}}),
+        ):
+            mock_open.return_value.__enter__.return_value.read.return_value = b"fakepng"
+            result = json.loads(camofox_vision("what is on the page?", annotate=True, task_id="t12"))
+
+        assert result["success"] is True
+        assert result["analysis"] == "Default camofox screenshot analysis"
+        assert mock_llm.call_args.kwargs["temperature"] == 0.1
+        assert mock_llm.call_args.kwargs["timeout"] == 120.0
+
+
 # ---------------------------------------------------------------------------
 # Routing integration — verify browser_tool routes to camofox
 # ---------------------------------------------------------------------------
diff --git a/tests/tools/test_browser_camofox_state.py b/tests/tools/test_browser_camofox_state.py
index 05f679efee..9ce3d13202 100644
--- a/tests/tools/test_browser_camofox_state.py
+++ b/tests/tools/test_browser_camofox_state.py
@@ -58,10 +58,3 @@ class TestCamofoxConfigDefaults:
 
         browser_cfg = DEFAULT_CONFIG["browser"]
         assert browser_cfg["camofox"]["managed_persistence"] is False
-
-    def test_config_version_matches_current_schema(self):
-        from hermes_cli.config import DEFAULT_CONFIG
-
-        # The current schema version is tracked globally; unrelated default
-        # options may bump it after browser defaults are added.
-        assert DEFAULT_CONFIG["_config_version"] == 18
diff --git a/tests/tools/test_browser_cdp_tool.py b/tests/tools/test_browser_cdp_tool.py
new file mode 100644
index 0000000000..e7e187ceb0
--- /dev/null
+++ b/tests/tools/test_browser_cdp_tool.py
@@ -0,0 +1,408 @@
+"""Unit tests for browser_cdp tool.
+
+Uses a tiny in-process ``websockets`` server to simulate a CDP endpoint —
+gives real protocol coverage (connect, send, recv, close) without needing
+a real Chrome instance.
+"""
+from __future__ import annotations
+
+import asyncio
+import json
+import threading
+import time
+from typing import Any, Dict, List
+
+import pytest
+
+import websockets
+from websockets.asyncio.server import serve
+
+from tools import browser_cdp_tool
+
+
+# ---------------------------------------------------------------------------
+# In-process CDP mock server
+# ---------------------------------------------------------------------------
+
+
+class _CDPServer:
+    """A tiny CDP-over-WebSocket mock.
+
+    Each client gets a greeting-free stream.  The server replies to each
+    inbound request whose ``id`` is set, using the registered handler for
+    that method.  If no handler is registered, returns a generic CDP error.
+    """
+
+    def __init__(self) -> None:
+        self._handlers: Dict[str, Any] = {}
+        self._responses: List[Dict[str, Any]] = []
+        self._loop: asyncio.AbstractEventLoop | None = None
+        self._server: Any = None
+        self._thread: threading.Thread | None = None
+        self._host = "127.0.0.1"
+        self._port = 0
+
+    # --- handler registration --------------------------------------------
+
+    def on(self, method: str, handler):
+        """Register a handler ``handler(params, session_id) -> dict or Exception``."""
+        self._handlers[method] = handler
+
+    # --- lifecycle -------------------------------------------------------
+
+    def start(self) -> str:
+        ready = threading.Event()
+
+        def _run() -> None:
+            self._loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(self._loop)
+
+            async def _handler(ws):
+                try:
+                    async for raw in ws:
+                        msg = json.loads(raw)
+                        call_id = msg.get("id")
+                        method = msg.get("method", "")
+                        params = msg.get("params", {}) or {}
+                        session_id = msg.get("sessionId")
+                        self._responses.append(msg)
+
+                        fn = self._handlers.get(method)
+                        if fn is None:
+                            reply = {
+                                "id": call_id,
+                                "error": {
+                                    "code": -32601,
+                                    "message": f"No handler for {method}",
+                                },
+                            }
+                        else:
+                            try:
+                                result = fn(params, session_id)
+                                if isinstance(result, Exception):
+                                    raise result
+                                reply = {"id": call_id, "result": result}
+                            except Exception as exc:
+                                reply = {
+                                    "id": call_id,
+                                    "error": {"code": -1, "message": str(exc)},
+                                }
+                        if session_id:
+                            reply["sessionId"] = session_id
+                        await ws.send(json.dumps(reply))
+                except websockets.exceptions.ConnectionClosed:
+                    pass
+
+            async def _serve() -> None:
+                self._server = await serve(_handler, self._host, 0)
+                sock = next(iter(self._server.sockets))
+                self._port = sock.getsockname()[1]
+                ready.set()
+                await self._server.wait_closed()
+
+            try:
+                self._loop.run_until_complete(_serve())
+            finally:
+                self._loop.close()
+
+        self._thread = threading.Thread(target=_run, daemon=True)
+        self._thread.start()
+        if not ready.wait(timeout=5.0):
+            raise RuntimeError("CDP mock server failed to start within 5s")
+        return f"ws://{self._host}:{self._port}/devtools/browser/mock"
+
+    def stop(self) -> None:
+        if self._loop and self._server:
+            def _close() -> None:
+                self._server.close()
+
+            self._loop.call_soon_threadsafe(_close)
+        if self._thread:
+            self._thread.join(timeout=3.0)
+
+    def received(self) -> List[Dict[str, Any]]:
+        return list(self._responses)
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+def cdp_server(monkeypatch):
+    """Start a CDP mock and route tool resolution to it."""
+    server = _CDPServer()
+    ws_url = server.start()
+    monkeypatch.setattr(
+        browser_cdp_tool, "_resolve_cdp_endpoint", lambda: ws_url
+    )
+    try:
+        yield server
+    finally:
+        server.stop()
+
+
+# ---------------------------------------------------------------------------
+# Input validation
+# ---------------------------------------------------------------------------
+
+
+def test_missing_method_returns_error():
+    result = json.loads(browser_cdp_tool.browser_cdp(method=""))
+    assert "error" in result
+    assert "method" in result["error"].lower()
+    assert result.get("cdp_docs") == browser_cdp_tool.CDP_DOCS_URL
+
+
+def test_non_string_method_returns_error():
+    result = json.loads(browser_cdp_tool.browser_cdp(method=123))  # type: ignore[arg-type]
+    assert "error" in result
+    assert "method" in result["error"].lower()
+
+
+def test_non_dict_params_returns_error(monkeypatch):
+    monkeypatch.setattr(
+        browser_cdp_tool, "_resolve_cdp_endpoint", lambda: "ws://localhost:9999"
+    )
+    result = json.loads(
+        browser_cdp_tool.browser_cdp(method="Target.getTargets", params="not-a-dict")  # type: ignore[arg-type]
+    )
+    assert "error" in result
+    assert "object" in result["error"].lower() or "dict" in result["error"].lower()
+
+
+# ---------------------------------------------------------------------------
+# Endpoint resolution
+# ---------------------------------------------------------------------------
+
+
+def test_no_endpoint_returns_helpful_error(monkeypatch):
+    monkeypatch.setattr(browser_cdp_tool, "_resolve_cdp_endpoint", lambda: "")
+    result = json.loads(browser_cdp_tool.browser_cdp(method="Target.getTargets"))
+    assert "error" in result
+    assert "/browser connect" in result["error"]
+    assert result.get("cdp_docs") == browser_cdp_tool.CDP_DOCS_URL
+
+
+def test_non_ws_endpoint_returns_error(monkeypatch):
+    monkeypatch.setattr(
+        browser_cdp_tool, "_resolve_cdp_endpoint", lambda: "http://localhost:9222"
+    )
+    result = json.loads(browser_cdp_tool.browser_cdp(method="Target.getTargets"))
+    assert "error" in result
+    assert "WebSocket" in result["error"]
+
+
+def test_websockets_missing_returns_error(monkeypatch):
+    monkeypatch.setattr(browser_cdp_tool, "_WS_AVAILABLE", False)
+    result = json.loads(browser_cdp_tool.browser_cdp(method="Target.getTargets"))
+    assert "error" in result
+    assert "websockets" in result["error"].lower()
+
+
+# ---------------------------------------------------------------------------
+# Happy-path: browser-level call
+# ---------------------------------------------------------------------------
+
+
+def test_browser_level_success(cdp_server):
+    cdp_server.on(
+        "Target.getTargets",
+        lambda params, sid: {
+            "targetInfos": [
+                {"targetId": "A", "type": "page", "title": "Tab 1", "url": "about:blank"},
+                {"targetId": "B", "type": "page", "title": "Tab 2", "url": "https://a.test"},
+            ]
+        },
+    )
+    result = json.loads(browser_cdp_tool.browser_cdp(method="Target.getTargets"))
+    assert result["success"] is True
+    assert result["method"] == "Target.getTargets"
+    assert "target_id" not in result
+    assert len(result["result"]["targetInfos"]) == 2
+    # Verify the server actually received exactly one call (no extra traffic)
+    calls = cdp_server.received()
+    assert len(calls) == 1
+    assert calls[0]["method"] == "Target.getTargets"
+    assert "sessionId" not in calls[0]
+
+
+def test_empty_params_sends_empty_object(cdp_server):
+    cdp_server.on("Browser.getVersion", lambda params, sid: {"product": "Mock/1.0"})
+    json.loads(browser_cdp_tool.browser_cdp(method="Browser.getVersion"))
+    assert cdp_server.received()[0]["params"] == {}
+
+
+# ---------------------------------------------------------------------------
+# Happy-path: target-attached call
+# ---------------------------------------------------------------------------
+
+
+def test_target_attach_then_call(cdp_server):
+    cdp_server.on(
+        "Target.attachToTarget",
+        lambda params, sid: {"sessionId": f"sess-{params['targetId']}"},
+    )
+    cdp_server.on(
+        "Runtime.evaluate",
+        lambda params, sid: {
+            "result": {"type": "string", "value": f"evaluated[{sid}]"},
+        },
+    )
+    result = json.loads(
+        browser_cdp_tool.browser_cdp(
+            method="Runtime.evaluate",
+            params={"expression": "document.title", "returnByValue": True},
+            target_id="tab-A",
+        )
+    )
+    assert result["success"] is True
+    assert result["target_id"] == "tab-A"
+    assert result["result"]["result"]["value"] == "evaluated[sess-tab-A]"
+
+    calls = cdp_server.received()
+    # First call: attach
+    assert calls[0]["method"] == "Target.attachToTarget"
+    assert calls[0]["params"] == {"targetId": "tab-A", "flatten": True}
+    # Second call: dispatched method on the session
+    assert calls[1]["method"] == "Runtime.evaluate"
+    assert calls[1]["sessionId"] == "sess-tab-A"
+
+
+# ---------------------------------------------------------------------------
+# CDP error responses
+# ---------------------------------------------------------------------------
+
+
+def test_cdp_method_error_returns_tool_error(cdp_server):
+    # No handler registered -> server returns CDP error
+    result = json.loads(
+        browser_cdp_tool.browser_cdp(method="NonExistent.method")
+    )
+    assert "error" in result
+    assert "CDP error" in result["error"]
+    assert result.get("method") == "NonExistent.method"
+
+
+def test_attach_failure_returns_tool_error(cdp_server):
+    # Target.attachToTarget has no handler -> server errors on attach
+    result = json.loads(
+        browser_cdp_tool.browser_cdp(
+            method="Runtime.evaluate",
+            params={"expression": "1+1"},
+            target_id="missing",
+        )
+    )
+    assert "error" in result
+    assert "Target.attachToTarget" in result["error"]
+
+
+# ---------------------------------------------------------------------------
+# Timeouts
+# ---------------------------------------------------------------------------
+
+
+def test_timeout_when_server_never_replies(cdp_server):
+    # Register a handler that blocks forever
+    def slow(params, sid):
+        time.sleep(10)
+        return {}
+
+    cdp_server.on("Page.slowMethod", slow)
+    result = json.loads(
+        browser_cdp_tool.browser_cdp(
+            method="Page.slowMethod", timeout=0.5
+        )
+    )
+    assert "error" in result
+    assert "tim" in result["error"].lower()
+
+
+# ---------------------------------------------------------------------------
+# Timeout clamping
+# ---------------------------------------------------------------------------
+
+
+def test_timeout_clamped_above_max(cdp_server):
+    cdp_server.on("Browser.getVersion", lambda p, s: {"product": "ok"})
+    # timeout=10_000 should be clamped to 300 but still succeed
+    result = json.loads(
+        browser_cdp_tool.browser_cdp(method="Browser.getVersion", timeout=10_000)
+    )
+    assert result["success"] is True
+
+
+def test_invalid_timeout_falls_back_to_default(cdp_server):
+    cdp_server.on("Browser.getVersion", lambda p, s: {"product": "ok"})
+    result = json.loads(
+        browser_cdp_tool.browser_cdp(method="Browser.getVersion", timeout="nope")  # type: ignore[arg-type]
+    )
+    assert result["success"] is True
+
+
+# ---------------------------------------------------------------------------
+# Registry integration
+# ---------------------------------------------------------------------------
+
+
+def test_registered_in_browser_toolset():
+    from tools.registry import registry
+
+    entry = registry.get_entry("browser_cdp")
+    assert entry is not None
+    assert entry.toolset == "browser"
+    assert entry.schema["name"] == "browser_cdp"
+    assert entry.schema["parameters"]["required"] == ["method"]
+    assert "Chrome DevTools Protocol" in entry.schema["description"]
+    assert browser_cdp_tool.CDP_DOCS_URL in entry.schema["description"]
+
+
+def test_dispatch_through_registry(cdp_server):
+    from tools.registry import registry
+
+    cdp_server.on("Target.getTargets", lambda p, s: {"targetInfos": []})
+    raw = registry.dispatch(
+        "browser_cdp", {"method": "Target.getTargets"}, task_id="t1"
+    )
+    result = json.loads(raw)
+    assert result["success"] is True
+    assert result["method"] == "Target.getTargets"
+
+
+# ---------------------------------------------------------------------------
+# check_fn gating
+# ---------------------------------------------------------------------------
+
+
+def test_check_fn_false_when_no_cdp_url(monkeypatch):
+    """Gate closes when no CDP URL is set — even if the browser toolset is
+    otherwise configured."""
+    import tools.browser_tool as bt
+
+    monkeypatch.setattr(bt, "check_browser_requirements", lambda: True)
+    monkeypatch.setattr(bt, "_get_cdp_override", lambda: "")
+    assert browser_cdp_tool._browser_cdp_check() is False
+
+
+def test_check_fn_true_when_cdp_url_set(monkeypatch):
+    """Gate opens as soon as a CDP URL is resolvable."""
+    import tools.browser_tool as bt
+
+    monkeypatch.setattr(bt, "check_browser_requirements", lambda: True)
+    monkeypatch.setattr(
+        bt, "_get_cdp_override", lambda: "ws://localhost:9222/devtools/browser/x"
+    )
+    assert browser_cdp_tool._browser_cdp_check() is True
+
+
+def test_check_fn_false_when_browser_requirements_fail(monkeypatch):
+    """Even with a CDP URL, gate closes if the overall browser toolset is
+    unavailable (e.g. agent-browser not installed)."""
+    import tools.browser_tool as bt
+
+    monkeypatch.setattr(bt, "check_browser_requirements", lambda: False)
+    monkeypatch.setattr(
+        bt, "_get_cdp_override", lambda: "ws://localhost:9222/devtools/browser/x"
+    )
+    assert browser_cdp_tool._browser_cdp_check() is False
diff --git a/tests/tools/test_browser_console.py b/tests/tools/test_browser_console.py
index 1b9bb462b1..b058fb3f36 100644
--- a/tests/tools/test_browser_console.py
+++ b/tests/tools/test_browser_console.py
@@ -3,6 +3,7 @@
 import json
 import os
 import sys
+from pathlib import Path
 from unittest.mock import patch, MagicMock
 
 import pytest
@@ -194,6 +195,63 @@ class TestBrowserVisionAnnotate:
                 assert "--annotate" in cmd_args
 
 
+class TestBrowserVisionConfig:
+    def _setup_screenshot(self, tmp_path):
+        shots_dir = tmp_path / "browser_screenshots"
+        shots_dir.mkdir()
+        screenshot = shots_dir / "shot.png"
+        screenshot.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 8)
+        return shots_dir, screenshot
+
+    def test_browser_vision_uses_configured_temperature_and_timeout(self, tmp_path):
+        from tools.browser_tool import browser_vision
+
+        shots_dir, screenshot = self._setup_screenshot(tmp_path)
+        mock_response = MagicMock()
+        mock_choice = MagicMock()
+        mock_choice.message.content = "Annotated screenshot analysis"
+        mock_response.choices = [mock_choice]
+
+        with (
+            patch("hermes_constants.get_hermes_dir", return_value=shots_dir),
+            patch("tools.browser_tool._cleanup_old_screenshots"),
+            patch("tools.browser_tool._run_browser_command", return_value={"success": True, "data": {"path": str(screenshot)}}),
+            patch("tools.browser_tool._get_vision_model", return_value="test-model"),
+            patch("hermes_cli.config.load_config", return_value={"auxiliary": {"vision": {"temperature": 1, "timeout": 45}}}),
+            patch("tools.browser_tool.call_llm", return_value=mock_response) as mock_llm,
+        ):
+            result = json.loads(browser_vision("what is on the page?", task_id="test"))
+
+        assert result["success"] is True
+        assert result["analysis"] == "Annotated screenshot analysis"
+        assert mock_llm.call_args.kwargs["temperature"] == 1.0
+        assert mock_llm.call_args.kwargs["timeout"] == 45.0
+
+    def test_browser_vision_defaults_temperature_when_config_omits_it(self, tmp_path):
+        from tools.browser_tool import browser_vision
+
+        shots_dir, screenshot = self._setup_screenshot(tmp_path)
+        mock_response = MagicMock()
+        mock_choice = MagicMock()
+        mock_choice.message.content = "Default screenshot analysis"
+        mock_response.choices = [mock_choice]
+
+        with (
+            patch("hermes_constants.get_hermes_dir", return_value=shots_dir),
+            patch("tools.browser_tool._cleanup_old_screenshots"),
+            patch("tools.browser_tool._run_browser_command", return_value={"success": True, "data": {"path": str(screenshot)}}),
+            patch("tools.browser_tool._get_vision_model", return_value="test-model"),
+            patch("hermes_cli.config.load_config", return_value={"auxiliary": {"vision": {}}}),
+            patch("tools.browser_tool.call_llm", return_value=mock_response) as mock_llm,
+        ):
+            result = json.loads(browser_vision("what is on the page?", task_id="test"))
+
+        assert result["success"] is True
+        assert result["analysis"] == "Default screenshot analysis"
+        assert mock_llm.call_args.kwargs["temperature"] == 0.1
+        assert mock_llm.call_args.kwargs["timeout"] == 120.0
+
+
 # ── auto-recording config ────────────────────────────────────────────
 
 
diff --git a/tests/tools/test_cronjob_tools.py b/tests/tools/test_cronjob_tools.py
index dd6b0101b1..38fc12cc8c 100644
--- a/tests/tools/test_cronjob_tools.py
+++ b/tests/tools/test_cronjob_tools.py
@@ -192,23 +192,23 @@ class TestUnifiedCronjobTool:
         result = json.loads(
             cronjob(
                 action="create",
-                skills=["blogwatcher", "find-nearby"],
+                skills=["blogwatcher", "maps"],
                 prompt="Use both skills and combine the result.",
                 schedule="every 1h",
                 name="Combo job",
             )
         )
         assert result["success"] is True
-        assert result["skills"] == ["blogwatcher", "find-nearby"]
+        assert result["skills"] == ["blogwatcher", "maps"]
 
         listing = json.loads(cronjob(action="list"))
-        assert listing["jobs"][0]["skills"] == ["blogwatcher", "find-nearby"]
+        assert listing["jobs"][0]["skills"] == ["blogwatcher", "maps"]
 
     def test_multi_skill_default_name_prefers_prompt_when_present(self):
         result = json.loads(
             cronjob(
                 action="create",
-                skills=["blogwatcher", "find-nearby"],
+                skills=["blogwatcher", "maps"],
                 prompt="Use both skills and combine the result.",
                 schedule="every 1h",
             )
@@ -220,7 +220,7 @@ class TestUnifiedCronjobTool:
         created = json.loads(
             cronjob(
                 action="create",
-                skills=["blogwatcher", "find-nearby"],
+                skills=["blogwatcher", "maps"],
                 prompt="Use both skills and combine the result.",
                 schedule="every 1h",
             )
diff --git a/tests/tools/test_delegate.py b/tests/tools/test_delegate.py
index e1e119d919..f53da7e554 100644
--- a/tests/tools/test_delegate.py
+++ b/tests/tools/test_delegate.py
@@ -20,11 +20,14 @@ from unittest.mock import MagicMock, patch
 from tools.delegate_tool import (
     DELEGATE_BLOCKED_TOOLS,
     DELEGATE_TASK_SCHEMA,
+    DelegateEvent,
     _get_max_concurrent_children,
+    _LEGACY_EVENT_MAP,
     MAX_DEPTH,
     check_delegate_requirements,
     delegate_task,
     _build_child_agent,
+    _build_child_progress_callback,
     _build_child_system_prompt,
     _strip_blocked_tools,
     _resolve_child_credential_pool,
@@ -387,7 +390,7 @@ class TestToolNamePreservation(unittest.TestCase):
         with patch("run_agent.AIAgent") as MockAgent:
             mock_child = MagicMock()
 
-            def capture_and_return(user_message):
+            def capture_and_return(user_message, task_id=None):
                 captured["saved"] = list(mock_child._delegate_saved_tool_names)
                 return {"final_response": "ok", "completed": True, "api_calls": 1}
 
@@ -568,8 +571,16 @@ class TestBlockedTools(unittest.TestCase):
             self.assertIn(tool, DELEGATE_BLOCKED_TOOLS)
 
     def test_constants(self):
+        from tools.delegate_tool import (
+            _get_max_spawn_depth, _get_orchestrator_enabled,
+            _MIN_SPAWN_DEPTH, _MAX_SPAWN_DEPTH_CAP,
+        )
         self.assertEqual(_get_max_concurrent_children(), 3)
-        self.assertEqual(MAX_DEPTH, 2)
+        self.assertEqual(MAX_DEPTH, 1)
+        self.assertEqual(_get_max_spawn_depth(), 1)       # default: flat
+        self.assertTrue(_get_orchestrator_enabled())      # default
+        self.assertEqual(_MIN_SPAWN_DEPTH, 1)
+        self.assertEqual(_MAX_SPAWN_DEPTH_CAP, 3)
 
 
 class TestDelegationCredentialResolution(unittest.TestCase):
@@ -903,6 +914,45 @@ class TestDelegationProviderIntegration(unittest.TestCase):
                 self.assertEqual(call.kwargs.get("override_api_key"), "sk-or-batch")
                 self.assertEqual(call.kwargs.get("override_api_mode"), "chat_completions")
 
+    @patch("tools.delegate_tool._load_config")
+    @patch("tools.delegate_tool._resolve_delegation_credentials")
+    def test_delegation_acp_runtime_reaches_child_agent(self, mock_creds, mock_cfg):
+        """Resolved ACP runtime command/args must be forwarded to child agents."""
+        mock_cfg.return_value = {
+            "max_iterations": 45,
+            "model": "copilot-model",
+            "provider": "copilot-acp",
+        }
+        mock_creds.return_value = {
+            "model": "copilot-model",
+            "provider": "copilot-acp",
+            "base_url": "acp://copilot",
+            "api_key": "copilot-acp",
+            "api_mode": "chat_completions",
+            "command": "custom-copilot",
+            "args": ["--stdio-custom"],
+        }
+        parent = _make_mock_parent(depth=0)
+
+        with patch("tools.delegate_tool._build_child_agent") as mock_build, \
+             patch("tools.delegate_tool._run_single_child") as mock_run:
+            mock_child = MagicMock()
+            mock_build.return_value = mock_child
+            mock_run.return_value = {
+                "task_index": 0, "status": "completed",
+                "summary": "Done", "api_calls": 1, "duration_seconds": 1.0
+            }
+
+            delegate_task(goal="ACP delegation test", parent_agent=parent)
+
+            _, kwargs = mock_build.call_args
+            self.assertEqual(kwargs.get("override_provider"), "copilot-acp")
+            self.assertEqual(kwargs.get("override_base_url"), "acp://copilot")
+            self.assertEqual(kwargs.get("override_api_key"), "copilot-acp")
+            self.assertEqual(kwargs.get("override_api_mode"), "chat_completions")
+            self.assertEqual(kwargs.get("override_acp_command"), "custom-copilot")
+            self.assertEqual(kwargs.get("override_acp_args"), ["--stdio-custom"])
+
     @patch("tools.delegate_tool._load_config")
     @patch("tools.delegate_tool._resolve_delegation_credentials")
     def test_model_only_no_provider_inherits_parent_credentials(self, mock_creds, mock_cfg):
@@ -1286,5 +1336,635 @@ class TestDelegationReasoningEffort(unittest.TestCase):
         self.assertEqual(call_kwargs["reasoning_config"], {"enabled": True, "effort": "medium"})
 
 
+# =========================================================================
+# Dispatch helper, progress events, concurrency
+# =========================================================================
+
+class TestDispatchDelegateTask(unittest.TestCase):
+    """Tests for the _dispatch_delegate_task helper and full param forwarding."""
+
+    @patch("tools.delegate_tool._load_config", return_value={})
+    @patch("tools.delegate_tool._resolve_delegation_credentials")
+    def test_acp_args_forwarded(self, mock_creds, mock_cfg):
+        """Both acp_command and acp_args reach delegate_task via the helper."""
+        mock_creds.return_value = {
+            "provider": None, "base_url": None,
+            "api_key": None, "api_mode": None, "model": None,
+        }
+        parent = _make_mock_parent(depth=0)
+        with patch("tools.delegate_tool._build_child_agent") as mock_build:
+            mock_child = MagicMock()
+            mock_child.run_conversation.return_value = {
+                "final_response": "done", "completed": True,
+                "api_calls": 1, "messages": [],
+            }
+            mock_child._delegate_saved_tool_names = []
+            mock_child._credential_pool = None
+            mock_child.session_prompt_tokens = 0
+            mock_child.session_completion_tokens = 0
+            mock_child.model = "test"
+            mock_build.return_value = mock_child
+
+            delegate_task(
+                goal="test",
+                acp_command="claude",
+                acp_args=["--acp", "--stdio"],
+                parent_agent=parent,
+            )
+            _, kwargs = mock_build.call_args
+            self.assertEqual(kwargs["override_acp_command"], "claude")
+            self.assertEqual(kwargs["override_acp_args"], ["--acp", "--stdio"])
+
+class TestDelegateEventEnum(unittest.TestCase):
+    """Tests for DelegateEvent enum and back-compat aliases."""
+
+    def test_enum_values_are_strings(self):
+        for event in DelegateEvent:
+            self.assertIsInstance(event.value, str)
+            self.assertTrue(event.value.startswith("delegate."))
+
+    def test_legacy_map_covers_all_old_names(self):
+        expected_legacy = {"_thinking", "reasoning.available",
+                          "tool.started", "tool.completed", "subagent_progress"}
+        self.assertEqual(set(_LEGACY_EVENT_MAP.keys()), expected_legacy)
+
+    def test_legacy_map_values_are_delegate_events(self):
+        for old_name, event in _LEGACY_EVENT_MAP.items():
+            self.assertIsInstance(event, DelegateEvent)
+
+    def test_progress_callback_normalises_tool_started(self):
+        """_build_child_progress_callback handles tool.started via enum."""
+        parent = _make_mock_parent()
+        parent._delegate_spinner = MagicMock()
+        parent.tool_progress_callback = MagicMock()
+
+        cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
+        self.assertIsNotNone(cb)
+
+        cb("tool.started", tool_name="terminal", preview="ls")
+        parent._delegate_spinner.print_above.assert_called()
+
+    def test_progress_callback_normalises_thinking(self):
+        """Both _thinking and reasoning.available route to TASK_THINKING."""
+        parent = _make_mock_parent()
+        parent._delegate_spinner = MagicMock()
+        parent.tool_progress_callback = None
+
+        cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
+
+        cb("_thinking", tool_name=None, preview="pondering...")
+        assert any("💭" in str(c) for c in parent._delegate_spinner.print_above.call_args_list)
+
+        parent._delegate_spinner.print_above.reset_mock()
+        cb("reasoning.available", tool_name=None, preview="hmm")
+        assert any("💭" in str(c) for c in parent._delegate_spinner.print_above.call_args_list)
+
+    def test_progress_callback_tool_completed_is_noop(self):
+        """tool.completed is normalised but produces no display output."""
+        parent = _make_mock_parent()
+        parent._delegate_spinner = MagicMock()
+        parent.tool_progress_callback = None
+
+        cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
+        cb("tool.completed", tool_name="terminal")
+        parent._delegate_spinner.print_above.assert_not_called()
+
+    def test_progress_callback_ignores_unknown_events(self):
+        """Unknown event types are silently ignored."""
+        parent = _make_mock_parent()
+        parent._delegate_spinner = MagicMock()
+
+        cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
+        # Should not raise
+        cb("some.unknown.event", tool_name="x")
+        parent._delegate_spinner.print_above.assert_not_called()
+
+    def test_progress_callback_accepts_enum_value_directly(self):
+        """cb(DelegateEvent.TASK_THINKING, ...) must route to the thinking
+        branch.  Pre-fix the callback only handled legacy strings via
+        _LEGACY_EVENT_MAP.get and silently dropped enum-typed callers."""
+        parent = _make_mock_parent()
+        parent._delegate_spinner = MagicMock()
+        parent.tool_progress_callback = None
+
+        cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
+        cb(DelegateEvent.TASK_THINKING, preview="pondering")
+        # If the enum was accepted, the thinking emoji got printed.
+        assert any(
+            "💭" in str(c)
+            for c in parent._delegate_spinner.print_above.call_args_list
+        )
+
+    def test_progress_callback_accepts_new_style_string(self):
+        """cb('delegate.task_thinking', ...) — the string form of the
+        enum value — must route to the thinking branch too, so new-style
+        emitters don't have to import DelegateEvent."""
+        parent = _make_mock_parent()
+        parent._delegate_spinner = MagicMock()
+
+        cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
+        cb("delegate.task_thinking", preview="hmm")
+        assert any(
+            "💭" in str(c)
+            for c in parent._delegate_spinner.print_above.call_args_list
+        )
+
+    def test_progress_callback_task_progress_not_misrendered(self):
+        """'subagent_progress' (legacy name for TASK_PROGRESS) carries a
+        pre-batched summary in the tool_name slot.  Before the fix, this
+        fell through to the TASK_TOOL_STARTED rendering path, treating
+        the summary string as a tool name.  After the fix: distinct
+        render (no tool-start emoji lookup) and pass-through relay
+        upward (no re-batching).
+
+        Regression path only reachable once nested orchestration is
+        enabled: nested orchestrators relay subagent_progress from
+        grandchildren upward through this callback.
+        """
+        parent = _make_mock_parent()
+        parent._delegate_spinner = MagicMock()
+        parent.tool_progress_callback = MagicMock()
+
+        cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
+        cb("subagent_progress", tool_name="🔀 [1] terminal, file")
+
+        # Spinner gets a distinct 🔀-prefixed line, NOT a tool emoji
+        # followed by the summary string as if it were a tool name.
+        calls = parent._delegate_spinner.print_above.call_args_list
+        self.assertTrue(any("🔀 🔀 [1] terminal, file" in str(c) for c in calls))
+        # Parent callback receives the relay (pass-through, no re-batching).
+        parent.tool_progress_callback.assert_called_once()
+        # No '⚡' tool-start emoji should appear — that's the pre-fix bug.
+        self.assertFalse(any("⚡" in str(c) for c in calls))
+
+
+class TestConcurrencyDefaults(unittest.TestCase):
+    """Tests for the concurrency default and no hard ceiling."""
+
+    @patch("tools.delegate_tool._load_config", return_value={})
+    def test_default_is_three(self, mock_cfg):
+        # Clear env var if set
+        with patch.dict(os.environ, {}, clear=True):
+            self.assertEqual(_get_max_concurrent_children(), 3)
+
+    @patch("tools.delegate_tool._load_config",
+           return_value={"max_concurrent_children": 10})
+    def test_no_upper_ceiling(self, mock_cfg):
+        """Users can raise concurrency as high as they want — no hard cap."""
+        self.assertEqual(_get_max_concurrent_children(), 10)
+
+    @patch("tools.delegate_tool._load_config",
+           return_value={"max_concurrent_children": 100})
+    def test_very_high_values_honored(self, mock_cfg):
+        self.assertEqual(_get_max_concurrent_children(), 100)
+
+    @patch("tools.delegate_tool._load_config",
+           return_value={"max_concurrent_children": 0})
+    def test_zero_clamped_to_one(self, mock_cfg):
+        """Floor of 1 is enforced; zero or negative values raise to 1."""
+        self.assertEqual(_get_max_concurrent_children(), 1)
+
+    @patch("tools.delegate_tool._load_config", return_value={})
+    def test_env_var_honored_uncapped(self, mock_cfg):
+        with patch.dict(os.environ, {"DELEGATION_MAX_CONCURRENT_CHILDREN": "12"}):
+            self.assertEqual(_get_max_concurrent_children(), 12)
+
+    @patch("tools.delegate_tool._load_config",
+           return_value={"max_concurrent_children": 6})
+    def test_configured_value_returned(self, mock_cfg):
+        self.assertEqual(_get_max_concurrent_children(), 6)
+
+
+# =========================================================================
+# max_spawn_depth clamping
+# =========================================================================
+
+class TestMaxSpawnDepth(unittest.TestCase):
+    """Tests for _get_max_spawn_depth clamping and fallback behavior."""
+
+    @patch("tools.delegate_tool._load_config", return_value={})
+    def test_max_spawn_depth_defaults_to_1(self, mock_cfg):
+        from tools.delegate_tool import _get_max_spawn_depth
+        self.assertEqual(_get_max_spawn_depth(), 1)
+
+    @patch("tools.delegate_tool._load_config",
+           return_value={"max_spawn_depth": 0})
+    def test_max_spawn_depth_clamped_below_one(self, mock_cfg):
+        import logging
+        from tools.delegate_tool import _get_max_spawn_depth
+        with self.assertLogs("tools.delegate_tool", level=logging.WARNING) as cm:
+            result = _get_max_spawn_depth()
+        self.assertEqual(result, 1)
+        self.assertTrue(any("clamping to 1" in m for m in cm.output))
+
+    @patch("tools.delegate_tool._load_config",
+           return_value={"max_spawn_depth": 99})
+    def test_max_spawn_depth_clamped_above_three(self, mock_cfg):
+        import logging
+        from tools.delegate_tool import _get_max_spawn_depth
+        with self.assertLogs("tools.delegate_tool", level=logging.WARNING) as cm:
+            result = _get_max_spawn_depth()
+        self.assertEqual(result, 3)
+        self.assertTrue(any("clamping to 3" in m for m in cm.output))
+
+    @patch("tools.delegate_tool._load_config",
+           return_value={"max_spawn_depth": "not-a-number"})
+    def test_max_spawn_depth_invalid_falls_back_to_default(self, mock_cfg):
+        from tools.delegate_tool import _get_max_spawn_depth
+        self.assertEqual(_get_max_spawn_depth(), 1)
+
+
+# =========================================================================
+# role param plumbing
+# =========================================================================
+#
+# These tests cover the schema + signature + stash plumbing of the role
+# param.  The full role-honoring behavior (toolset re-add, role-aware
+# prompt) lives in TestOrchestratorRoleBehavior below; these tests only
+# assert on _delegate_role stashing and on the schema shape.
+
+
+class TestOrchestratorRoleSchema(unittest.TestCase):
+    """Tests that the role param reaches the child via dispatch."""
+
+    @patch("tools.delegate_tool._resolve_delegation_credentials")
+    @patch("tools.delegate_tool._load_config",
+           return_value={"max_spawn_depth": 2})
+    def _run_with_mock_child(self, role_arg, mock_cfg, mock_creds):
+        mock_creds.return_value = {
+            "provider": None, "base_url": None,
+            "api_key": None, "api_mode": None, "model": None,
+        }
+        parent = _make_mock_parent(depth=0)
+        with patch("run_agent.AIAgent") as MockAgent:
+            mock_child = MagicMock()
+            mock_child.run_conversation.return_value = {
+                "final_response": "done", "completed": True,
+                "api_calls": 1, "messages": [],
+            }
+            mock_child._delegate_saved_tool_names = []
+            mock_child._credential_pool = None
+            mock_child.session_prompt_tokens = 0
+            mock_child.session_completion_tokens = 0
+            mock_child.model = "test"
+            MockAgent.return_value = mock_child
+            kwargs = {"goal": "test", "parent_agent": parent}
+            if role_arg is not _SENTINEL:
+                kwargs["role"] = role_arg
+            delegate_task(**kwargs)
+            return mock_child
+
+    def test_default_role_is_leaf(self):
+        child = self._run_with_mock_child(_SENTINEL)
+        self.assertEqual(child._delegate_role, "leaf")
+
+    def test_explicit_orchestrator_role_stashed(self):
+        """role='orchestrator' reaches _build_child_agent and is stashed.
+        Full behavior (toolset re-add) lands in commit 3; commit 2 only
+        verifies the plumbing."""
+        child = self._run_with_mock_child("orchestrator")
+        self.assertEqual(child._delegate_role, "orchestrator")
+
+    def test_unknown_role_coerces_to_leaf(self):
+        """role='nonsense' → _normalize_role warns and returns 'leaf'."""
+        import logging
+        with self.assertLogs("tools.delegate_tool", level=logging.WARNING) as cm:
+            child = self._run_with_mock_child("nonsense")
+        self.assertEqual(child._delegate_role, "leaf")
+        self.assertTrue(any("coercing" in m.lower() for m in cm.output))
+
+    def test_schema_has_role_top_level_and_per_task(self):
+        from tools.delegate_tool import DELEGATE_TASK_SCHEMA
+        props = DELEGATE_TASK_SCHEMA["parameters"]["properties"]
+        self.assertIn("role", props)
+        self.assertEqual(props["role"]["enum"], ["leaf", "orchestrator"])
+        task_props = props["tasks"]["items"]["properties"]
+        self.assertIn("role", task_props)
+        self.assertEqual(task_props["role"]["enum"], ["leaf", "orchestrator"])
+
+
+# Sentinel used to distinguish "role kwarg omitted" from "role=None".
+_SENTINEL = object()
+
+
+# =========================================================================
+# role-honoring behavior
+# =========================================================================
+
+
+def _make_role_mock_child():
+    """Helper: mock child with minimal fields for delegate_task to process."""
+    mock_child = MagicMock()
+    mock_child.run_conversation.return_value = {
+        "final_response": "done", "completed": True,
+        "api_calls": 1, "messages": [],
+    }
+    mock_child._delegate_saved_tool_names = []
+    mock_child._credential_pool = None
+    mock_child.session_prompt_tokens = 0
+    mock_child.session_completion_tokens = 0
+    mock_child.model = "test"
+    return mock_child
+
+
+class TestOrchestratorRoleBehavior(unittest.TestCase):
+    """Tests that role='orchestrator' actually changes toolset + prompt."""
+
+    @patch("tools.delegate_tool._resolve_delegation_credentials")
+    @patch("tools.delegate_tool._load_config",
+           return_value={"max_spawn_depth": 2})
+    def test_orchestrator_role_keeps_delegation_at_depth_1(
+        self, mock_cfg, mock_creds
+    ):
+        """role='orchestrator' + depth-0 parent with max_spawn_depth=2 →
+        child at depth 1 gets 'delegation' in enabled_toolsets (can
+        further delegate).  Requires max_spawn_depth>=2 since the new
+        default is 1 (flat)."""
+        mock_creds.return_value = {
+            "provider": None, "base_url": None,
+            "api_key": None, "api_mode": None, "model": None,
+        }
+        parent = _make_mock_parent(depth=0)
+        parent.enabled_toolsets = ["terminal", "file"]
+        with patch("run_agent.AIAgent") as MockAgent:
+            mock_child = _make_role_mock_child()
+            MockAgent.return_value = mock_child
+            delegate_task(goal="test", role="orchestrator", parent_agent=parent)
+            kwargs = MockAgent.call_args[1]
+            self.assertIn("delegation", kwargs["enabled_toolsets"])
+            self.assertEqual(mock_child._delegate_role, "orchestrator")
+
+    @patch("tools.delegate_tool._resolve_delegation_credentials")
+    @patch("tools.delegate_tool._load_config",
+           return_value={"max_spawn_depth": 2})
+    def test_orchestrator_blocked_at_max_spawn_depth(
+        self, mock_cfg, mock_creds
+    ):
+        """Parent at depth 1 with max_spawn_depth=2 spawns child
+        at depth 2 (the floor); role='orchestrator' degrades to leaf."""
+        mock_creds.return_value = {
+            "provider": None, "base_url": None,
+            "api_key": None, "api_mode": None, "model": None,
+        }
+        parent = _make_mock_parent(depth=1)
+        parent.enabled_toolsets = ["terminal", "delegation"]
+        with patch("run_agent.AIAgent") as MockAgent:
+            mock_child = _make_role_mock_child()
+            MockAgent.return_value = mock_child
+            delegate_task(goal="test", role="orchestrator", parent_agent=parent)
+            kwargs = MockAgent.call_args[1]
+            self.assertNotIn("delegation", kwargs["enabled_toolsets"])
+            self.assertEqual(mock_child._delegate_role, "leaf")
+
+    @patch("tools.delegate_tool._resolve_delegation_credentials")
+    @patch("tools.delegate_tool._load_config", return_value={})
+    def test_orchestrator_blocked_at_default_flat_depth(
+        self, mock_cfg, mock_creds
+    ):
+        """With default max_spawn_depth=1 (flat), role='orchestrator'
+        on a depth-0 parent produces a depth-1 child that is already at
+        the floor — the role degrades to 'leaf' and the delegation
+        toolset is stripped.  This is the new default posture."""
+        mock_creds.return_value = {
+            "provider": None, "base_url": None,
+            "api_key": None, "api_mode": None, "model": None,
+        }
+        parent = _make_mock_parent(depth=0)
+        parent.enabled_toolsets = ["terminal", "file", "delegation"]
+        with patch("run_agent.AIAgent") as MockAgent:
+            mock_child = _make_role_mock_child()
+            MockAgent.return_value = mock_child
+            delegate_task(goal="test", role="orchestrator", parent_agent=parent)
+            kwargs = MockAgent.call_args[1]
+            self.assertNotIn("delegation", kwargs["enabled_toolsets"])
+            self.assertEqual(mock_child._delegate_role, "leaf")
+
+    @patch("tools.delegate_tool._resolve_delegation_credentials")
+    def test_orchestrator_enabled_false_forces_leaf(self, mock_creds):
+        """Kill switch delegation.orchestrator_enabled=false overrides
+        role='orchestrator'."""
+        mock_creds.return_value = {
+            "provider": None, "base_url": None,
+            "api_key": None, "api_mode": None, "model": None,
+        }
+        parent = _make_mock_parent(depth=0)
+        parent.enabled_toolsets = ["terminal", "delegation"]
+        with patch("tools.delegate_tool._load_config",
+                   return_value={"orchestrator_enabled": False}):
+            with patch("run_agent.AIAgent") as MockAgent:
+                mock_child = _make_role_mock_child()
+                MockAgent.return_value = mock_child
+                delegate_task(goal="test", role="orchestrator",
+                              parent_agent=parent)
+                kwargs = MockAgent.call_args[1]
+                self.assertNotIn("delegation", kwargs["enabled_toolsets"])
+                self.assertEqual(mock_child._delegate_role, "leaf")
+
+    # ── Role-aware system prompt ────────────────────────────────────────
+
+    def test_leaf_prompt_does_not_mention_delegation(self):
+        prompt = _build_child_system_prompt(
+            "Fix tests", role="leaf",
+            max_spawn_depth=2, child_depth=1,
+        )
+        self.assertNotIn("delegate_task", prompt)
+        self.assertNotIn("Orchestrator Role", prompt)
+
+    def test_orchestrator_prompt_mentions_delegation_capability(self):
+        prompt = _build_child_system_prompt(
+            "Survey approaches", role="orchestrator",
+            max_spawn_depth=2, child_depth=1,
+        )
+        self.assertIn("delegate_task", prompt)
+        self.assertIn("Orchestrator Role", prompt)
+        # Depth/max-depth note present and literal:
+        self.assertIn("depth 1", prompt)
+        self.assertIn("max_spawn_depth=2", prompt)
+
+    def test_orchestrator_prompt_at_depth_floor_says_children_are_leaves(self):
+        """With max_spawn_depth=2 and child_depth=1, the orchestrator's
+        own children would be at depth 2 (the floor) → must be leaves."""
+        prompt = _build_child_system_prompt(
+            "Survey", role="orchestrator",
+            max_spawn_depth=2, child_depth=1,
+        )
+        self.assertIn("MUST be leaves", prompt)
+
+    def test_orchestrator_prompt_below_floor_allows_more_nesting(self):
+        """With max_spawn_depth=3 and child_depth=1, the orchestrator's
+        own children can themselves be orchestrators (depth 2 < 3)."""
+        prompt = _build_child_system_prompt(
+            "Deep work", role="orchestrator",
+            max_spawn_depth=3, child_depth=1,
+        )
+        self.assertIn("can themselves be orchestrators", prompt)
+
+    # ── Batch mode and intersection ─────────────────────────────────────
+
+    @patch("tools.delegate_tool._resolve_delegation_credentials")
+    @patch("tools.delegate_tool._load_config",
+           return_value={"max_spawn_depth": 2})
+    def test_batch_mode_per_task_role_override(self, mock_cfg, mock_creds):
+        """Per-task role beats top-level; no top-level role → "leaf".
+
+        tasks=[{role:'orchestrator'},{role:'leaf'},{}] → first gets
+        delegation, second and third don't.  Requires max_spawn_depth>=2
+        (raised explicitly here) since the new default is 1 (flat).
+        """
+        mock_creds.return_value = {
+            "provider": None, "base_url": None,
+            "api_key": None, "api_mode": None, "model": None,
+        }
+        parent = _make_mock_parent(depth=0)
+        parent.enabled_toolsets = ["terminal", "file", "delegation"]
+        built_toolsets = []
+
+        def _factory(*a, **kw):
+            m = _make_role_mock_child()
+            built_toolsets.append(kw.get("enabled_toolsets"))
+            return m
+
+        with patch("run_agent.AIAgent", side_effect=_factory):
+            delegate_task(
+                tasks=[
+                    {"goal": "A", "role": "orchestrator"},
+                    {"goal": "B", "role": "leaf"},
+                    {"goal": "C"},  # no role → falls back to top_role (leaf)
+                ],
+                parent_agent=parent,
+            )
+        self.assertIn("delegation", built_toolsets[0])
+        self.assertNotIn("delegation", built_toolsets[1])
+        self.assertNotIn("delegation", built_toolsets[2])
+
+    @patch("tools.delegate_tool._resolve_delegation_credentials")
+    @patch("tools.delegate_tool._load_config",
+           return_value={"max_spawn_depth": 2})
+    def test_intersection_preserves_delegation_bound(
+        self, mock_cfg, mock_creds
+    ):
+        """Design decision: orchestrator capability is granted by role,
+        NOT inherited from the parent's toolset. A parent without
+        'delegation' in its enabled_toolsets can still spawn an
+        orchestrator child — the re-add in _build_child_agent runs
+        unconditionally for orchestrators (when max_spawn_depth allows).
+
+        If you want to change to "parent must have delegation too",
+        update _build_child_agent to check parent_toolsets before the
+        re-add and update this test to match.
+        """
+        mock_creds.return_value = {
+            "provider": None, "base_url": None,
+            "api_key": None, "api_mode": None, "model": None,
+        }
+        parent = _make_mock_parent(depth=0)
+        parent.enabled_toolsets = ["terminal", "file"]  # no delegation
+        with patch("run_agent.AIAgent") as MockAgent:
+            mock_child = _make_role_mock_child()
+            MockAgent.return_value = mock_child
+            delegate_task(goal="test", role="orchestrator",
+                          parent_agent=parent)
+            self.assertIn("delegation", MockAgent.call_args[1]["enabled_toolsets"])
+
+
+class TestOrchestratorEndToEnd(unittest.TestCase):
+    """End-to-end: parent -> orchestrator -> two-leaf nested orchestration.
+
+    Covers the acceptance gate: parent delegates to an orchestrator
+    child; the orchestrator delegates to two leaf grandchildren; the
+    role/toolset/depth chain all resolve correctly.
+
+    Mock strategy: a single AIAgent patch with a side_effect factory
+    that keys on the child's ephemeral_system_prompt — orchestrator
+    prompts contain the string "Orchestrator Role" (see
+    _build_child_system_prompt), leaves don't.  The orchestrator
+    mock's run_conversation recursively calls delegate_task with
+    tasks=[{goal:...},{goal:...}] to spawn two leaves.  This keeps
+    the test in one patch context and avoids depth-indexed nesting.
+    """
+
+    @patch("tools.delegate_tool._resolve_delegation_credentials")
+    @patch("tools.delegate_tool._load_config",
+           return_value={"max_spawn_depth": 2})
+    def test_end_to_end_nested_orchestration(self, mock_cfg, mock_creds):
+        mock_creds.return_value = {
+            "provider": None, "base_url": None,
+            "api_key": None, "api_mode": None, "model": None,
+        }
+        parent = _make_mock_parent(depth=0)
+        parent.enabled_toolsets = ["terminal", "file", "delegation"]
+
+        # (enabled_toolsets, _delegate_role) for each agent built
+        built_agents: list = []
+        # Keep the orchestrator mock around so the re-entrant delegate_task
+        # can reach it via closure.
+        orch_mock = {}
+
+        def _factory(*a, **kw):
+            prompt = kw.get("ephemeral_system_prompt", "") or ""
+            is_orchestrator = "Orchestrator Role" in prompt
+            m = _make_role_mock_child()
+            built_agents.append({
+                "enabled_toolsets": list(kw.get("enabled_toolsets") or []),
+                "is_orchestrator_prompt": is_orchestrator,
+            })
+
+            if is_orchestrator:
+                # Prepare the orchestrator mock as a parent-capable object
+                # so the nested delegate_task call succeeds.
+                m._delegate_depth = 1
+                m._delegate_role = "orchestrator"
+                m._active_children = []
+                m._active_children_lock = threading.Lock()
+                m._session_db = None
+                m.platform = "cli"
+                m.enabled_toolsets = ["terminal", "file", "delegation"]
+                m.api_key = "***"
+                m.base_url = ""
+                m.provider = None
+                m.api_mode = None
+                m.providers_allowed = None
+                m.providers_ignored = None
+                m.providers_order = None
+                m.provider_sort = None
+                m._print_fn = None
+                m.tool_progress_callback = None
+                m.thinking_callback = None
+                orch_mock["agent"] = m
+
+                def _orchestrator_run(user_message=None, task_id=None):
+                    # Re-entrant: orchestrator spawns two leaves
+                    delegate_task(
+                        tasks=[{"goal": "leaf-A"}, {"goal": "leaf-B"}],
+                        parent_agent=m,
+                    )
+                    return {
+                        "final_response": "orchestrated 2 workers",
+                        "completed": True, "api_calls": 1,
+                        "messages": [],
+                    }
+                m.run_conversation.side_effect = _orchestrator_run
+
+            return m
+
+        with patch("run_agent.AIAgent", side_effect=_factory) as MockAgent:
+            delegate_task(
+                goal="top-level orchestration",
+                role="orchestrator",
+                parent_agent=parent,
+            )
+
+        # 1 orchestrator + 2 leaf grandchildren = 3 agents
+        self.assertEqual(MockAgent.call_count, 3)
+        # First built = the orchestrator (parent's direct child)
+        self.assertIn("delegation", built_agents[0]["enabled_toolsets"])
+        self.assertTrue(built_agents[0]["is_orchestrator_prompt"])
+        # Next two = leaves (grandchildren)
+        self.assertNotIn("delegation", built_agents[1]["enabled_toolsets"])
+        self.assertFalse(built_agents[1]["is_orchestrator_prompt"])
+        self.assertNotIn("delegation", built_agents[2]["enabled_toolsets"])
+        self.assertFalse(built_agents[2]["is_orchestrator_prompt"])
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/tools/test_discord_tool.py b/tests/tools/test_discord_tool.py
new file mode 100644
index 0000000000..34fe672132
--- /dev/null
+++ b/tests/tools/test_discord_tool.py
@@ -0,0 +1,1001 @@
+"""Tests for the Discord server introspection and management tool."""
+
+import json
+import os
+import urllib.error
+from io import BytesIO
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from tools.discord_tool import (
+    DiscordAPIError,
+    _ACTIONS,
+    _available_actions,
+    _build_schema,
+    _channel_type_name,
+    _detect_capabilities,
+    _discord_request,
+    _enrich_403,
+    _get_bot_token,
+    _load_allowed_actions_config,
+    _reset_capability_cache,
+    check_discord_tool_requirements,
+    discord_server,
+    get_dynamic_schema,
+)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _mock_urlopen(response_data, status=200):
+    """Create a mock for urllib.request.urlopen."""
+    mock_resp = MagicMock()
+    mock_resp.status = status
+    mock_resp.read.return_value = json.dumps(response_data).encode("utf-8")
+    mock_resp.__enter__ = MagicMock(return_value=mock_resp)
+    mock_resp.__exit__ = MagicMock(return_value=False)
+    return mock_resp
+
+
+# ---------------------------------------------------------------------------
+# Token / check_fn
+# ---------------------------------------------------------------------------
+
+class TestCheckRequirements:
+    def test_no_token(self, monkeypatch):
+        monkeypatch.delenv("DISCORD_BOT_TOKEN", raising=False)
+        assert check_discord_tool_requirements() is False
+
+    def test_empty_token(self, monkeypatch):
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "")
+        assert check_discord_tool_requirements() is False
+
+    def test_valid_token(self, monkeypatch):
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token-123")
+        assert check_discord_tool_requirements() is True
+
+    def test_get_bot_token(self, monkeypatch):
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "  my-token  ")
+        assert _get_bot_token() == "my-token"
+
+    def test_get_bot_token_missing(self, monkeypatch):
+        monkeypatch.delenv("DISCORD_BOT_TOKEN", raising=False)
+        assert _get_bot_token() is None
+
+
+# ---------------------------------------------------------------------------
+# Channel type names
+# ---------------------------------------------------------------------------
+
+class TestChannelTypeNames:
+    def test_known_types(self):
+        assert _channel_type_name(0) == "text"
+        assert _channel_type_name(2) == "voice"
+        assert _channel_type_name(4) == "category"
+        assert _channel_type_name(5) == "announcement"
+        assert _channel_type_name(13) == "stage"
+        assert _channel_type_name(15) == "forum"
+
+    def test_unknown_type(self):
+        assert _channel_type_name(99) == "unknown(99)"
+
+
+# ---------------------------------------------------------------------------
+# Discord API request helper
+# ---------------------------------------------------------------------------
+
+class TestDiscordRequest:
+    @patch("tools.discord_tool.urllib.request.urlopen")
+    def test_get_request(self, mock_urlopen_fn):
+        mock_urlopen_fn.return_value = _mock_urlopen({"ok": True})
+        result = _discord_request("GET", "/test", "token123")
+        assert result == {"ok": True}
+
+        # Verify the request was constructed correctly
+        call_args = mock_urlopen_fn.call_args
+        req = call_args[0][0]
+        assert "https://discord.com/api/v10/test" in req.full_url
+        assert req.get_header("Authorization") == "Bot token123"
+        assert req.get_method() == "GET"
+
+    @patch("tools.discord_tool.urllib.request.urlopen")
+    def test_get_with_params(self, mock_urlopen_fn):
+        mock_urlopen_fn.return_value = _mock_urlopen({"ok": True})
+        _discord_request("GET", "/test", "tok", params={"foo": "bar"})
+        req = mock_urlopen_fn.call_args[0][0]
+        assert "foo=bar" in req.full_url
+
+    @patch("tools.discord_tool.urllib.request.urlopen")
+    def test_post_with_body(self, mock_urlopen_fn):
+        mock_urlopen_fn.return_value = _mock_urlopen({"id": "123"})
+        result = _discord_request("POST", "/channels", "tok", body={"name": "test"})
+        assert result == {"id": "123"}
+        req = mock_urlopen_fn.call_args[0][0]
+        assert req.data == json.dumps({"name": "test"}).encode("utf-8")
+
+    @patch("tools.discord_tool.urllib.request.urlopen")
+    def test_204_returns_none(self, mock_urlopen_fn):
+        mock_resp = _mock_urlopen({}, status=204)
+        mock_urlopen_fn.return_value = mock_resp
+        result = _discord_request("PUT", "/pins/1", "tok")
+        assert result is None
+
+    @patch("tools.discord_tool.urllib.request.urlopen")
+    def test_http_error(self, mock_urlopen_fn):
+        error_body = json.dumps({"message": "Missing Access"}).encode()
+        http_error = urllib.error.HTTPError(
+            url="https://discord.com/api/v10/test",
+            code=403,
+            msg="Forbidden",
+            hdrs={},
+            fp=BytesIO(error_body),
+        )
+        mock_urlopen_fn.side_effect = http_error
+        with pytest.raises(DiscordAPIError) as exc_info:
+            _discord_request("GET", "/test", "tok")
+        assert exc_info.value.status == 403
+        assert "Missing Access" in exc_info.value.body
+
+
+# ---------------------------------------------------------------------------
+# Main handler: validation
+# ---------------------------------------------------------------------------
+
+class TestDiscordServerValidation:
+    def test_no_token(self, monkeypatch):
+        monkeypatch.delenv("DISCORD_BOT_TOKEN", raising=False)
+        result = json.loads(discord_server(action="list_guilds"))
+        assert "error" in result
+        assert "DISCORD_BOT_TOKEN" in result["error"]
+
+    def test_unknown_action(self, monkeypatch):
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
+        result = json.loads(discord_server(action="bad_action"))
+        assert "error" in result
+        assert "Unknown action" in result["error"]
+        assert "available_actions" in result
+
+    def test_missing_required_guild_id(self, monkeypatch):
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
+        result = json.loads(discord_server(action="list_channels"))
+        assert "error" in result
+        assert "guild_id" in result["error"]
+
+    def test_missing_required_channel_id(self, monkeypatch):
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
+        result = json.loads(discord_server(action="fetch_messages"))
+        assert "error" in result
+        assert "channel_id" in result["error"]
+
+    def test_missing_multiple_params(self, monkeypatch):
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
+        result = json.loads(discord_server(action="add_role"))
+        assert "error" in result
+        assert "guild_id" in result["error"]
+        assert "user_id" in result["error"]
+        assert "role_id" in result["error"]
+
+
+# ---------------------------------------------------------------------------
+# Action: list_guilds
+# ---------------------------------------------------------------------------
+
+class TestListGuilds:
+    @patch("tools.discord_tool._discord_request")
+    def test_list_guilds(self, mock_req, monkeypatch):
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
+        mock_req.return_value = [
+            {"id": "111", "name": "Test Server", "icon": "abc", "owner": True, "permissions": "123"},
+            {"id": "222", "name": "Other Server", "icon": None, "owner": False, "permissions": "456"},
+        ]
+        result = json.loads(discord_server(action="list_guilds"))
+        assert result["count"] == 2
+        assert result["guilds"][0]["name"] == "Test Server"
+        assert result["guilds"][1]["id"] == "222"
+        mock_req.assert_called_once_with("GET", "/users/@me/guilds", "test-token")
+
+
+# ---------------------------------------------------------------------------
+# Action: server_info
+# ---------------------------------------------------------------------------
+
+class TestServerInfo:
+    @patch("tools.discord_tool._discord_request")
+    def test_server_info(self, mock_req, monkeypatch):
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
+        mock_req.return_value = {
+            "id": "111",
+            "name": "My Server",
+            "description": "A cool server",
+            "icon": "icon_hash",
+            "owner_id": "999",
+            "approximate_member_count": 42,
+            "approximate_presence_count": 10,
+            "features": ["COMMUNITY"],
+            "premium_tier": 2,
+            "premium_subscription_count": 5,
+            "verification_level": 1,
+        }
+        result = json.loads(discord_server(action="server_info", guild_id="111"))
+        assert result["name"] == "My Server"
+        assert result["member_count"] == 42
+        assert result["online_count"] == 10
+        mock_req.assert_called_once_with(
+            "GET", "/guilds/111", "test-token", params={"with_counts": "true"}
+        )
+
+
+# ---------------------------------------------------------------------------
+# Action: list_channels
+# ---------------------------------------------------------------------------
+
+class TestListChannels:
+    @patch("tools.discord_tool._discord_request")
+    def test_list_channels_organized(self, mock_req, monkeypatch):
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
+        mock_req.return_value = [
+            {"id": "10", "name": "General", "type": 4, "position": 0, "parent_id": None},
+            {"id": "11", "name": "chat", "type": 0, "position": 0, "parent_id": "10", "topic": "Main chat", "nsfw": False},
+            {"id": "12", "name": "voice", "type": 2, "position": 1, "parent_id": "10", "topic": None, "nsfw": False},
+            {"id": "13", "name": "no-category", "type": 0, "position": 0, "parent_id": None, "topic": None, "nsfw": False},
+        ]
+        result = json.loads(discord_server(action="list_channels", guild_id="111"))
+        assert result["total_channels"] == 3  # excludes the category itself
+        groups = result["channel_groups"]
+        # Uncategorized first
+        assert groups[0]["category"] is None
+        assert len(groups[0]["channels"]) == 1
+        assert groups[0]["channels"][0]["name"] == "no-category"
+        # Then the category
+        assert groups[1]["category"]["name"] == "General"
+        assert len(groups[1]["channels"]) == 2
+
+    @patch("tools.discord_tool._discord_request")
+    def test_empty_guild(self, mock_req, monkeypatch):
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
+        mock_req.return_value = []
+        result = json.loads(discord_server(action="list_channels", guild_id="111"))
+        assert result["total_channels"] == 0
+
+
+# ---------------------------------------------------------------------------
+# Action: channel_info
+# ---------------------------------------------------------------------------
+
+class TestChannelInfo:
+    @patch("tools.discord_tool._discord_request")
+    def test_channel_info(self, mock_req, monkeypatch):
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
+        mock_req.return_value = {
+            "id": "11", "name": "general", "type": 0, "guild_id": "111",
+            "topic": "Welcome!", "nsfw": False, "position": 0,
+            "parent_id": "10", "rate_limit_per_user": 0, "last_message_id": "999",
+        }
+        result = json.loads(discord_server(action="channel_info", channel_id="11"))
+        assert result["name"] == "general"
+        assert result["type"] == "text"
+        assert result["guild_id"] == "111"
+
+
+# ---------------------------------------------------------------------------
+# Action: list_roles
+# ---------------------------------------------------------------------------
+
+class TestListRoles:
+    @patch("tools.discord_tool._discord_request")
+    def test_list_roles_sorted(self, mock_req, monkeypatch):
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
+        mock_req.return_value = [
+            {"id": "1", "name": "@everyone", "position": 0, "color": 0, "mentionable": False, "managed": False, "hoist": False},
+            {"id": "2", "name": "Admin", "position": 2, "color": 16711680, "mentionable": True, "managed": False, "hoist": True},
+            {"id": "3", "name": "Mod", "position": 1, "color": 255, "mentionable": True, "managed": False, "hoist": True},
+        ]
+        result = json.loads(discord_server(action="list_roles", guild_id="111"))
+        assert result["count"] == 3
+        # Should be sorted by position descending
+        assert result["roles"][0]["name"] == "Admin"
+        assert result["roles"][0]["color"] == "#ff0000"
+        assert result["roles"][1]["name"] == "Mod"
+        assert result["roles"][2]["name"] == "@everyone"
+
+
+# ---------------------------------------------------------------------------
+# Action: member_info
+# ---------------------------------------------------------------------------
+
+class TestMemberInfo:
+    @patch("tools.discord_tool._discord_request")
+    def test_member_info(self, mock_req, monkeypatch):
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
+        mock_req.return_value = {
+            "user": {"id": "42", "username": "testuser", "global_name": "Test User", "avatar": "abc", "bot": False},
+            "nick": "Testy",
+            "roles": ["2", "3"],
+            "joined_at": "2024-01-01T00:00:00Z",
+            "premium_since": None,
+        }
+        result = json.loads(discord_server(action="member_info", guild_id="111", user_id="42"))
+        assert result["username"] == "testuser"
+        assert result["nickname"] == "Testy"
+        assert result["roles"] == ["2", "3"]
+
+
+# ---------------------------------------------------------------------------
+# Action: search_members
+# ---------------------------------------------------------------------------
+
+class TestSearchMembers:
+    @patch("tools.discord_tool._discord_request")
+    def test_search_members(self, mock_req, monkeypatch):
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
+        mock_req.return_value = [
+            {"user": {"id": "42", "username": "testuser", "global_name": "Test", "bot": False}, "nick": None, "roles": []},
+        ]
+        result = json.loads(discord_server(action="search_members", guild_id="111", query="test"))
+        assert result["count"] == 1
+        assert result["members"][0]["username"] == "testuser"
+        mock_req.assert_called_once_with(
+            "GET", "/guilds/111/members/search", "test-token",
+            params={"query": "test", "limit": "50"},
+        )
+
+    @patch("tools.discord_tool._discord_request")
+    def test_search_members_limit_capped(self, mock_req, monkeypatch):
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
+        mock_req.return_value = []
+        discord_server(action="search_members", guild_id="111", query="x", limit=200)
+        call_params = mock_req.call_args[1]["params"]
+        assert call_params["limit"] == "100"  # Capped at 100
+
+
+# ---------------------------------------------------------------------------
+# Action: fetch_messages
+# ---------------------------------------------------------------------------
+
+class TestFetchMessages:
+    @patch("tools.discord_tool._discord_request")
+    def test_fetch_messages(self, mock_req, monkeypatch):
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
+        mock_req.return_value = [
+            {
+                "id": "1001",
+                "content": "Hello world",
+                "author": {"id": "42", "username": "user1", "global_name": "User One", "bot": False},
+                "timestamp": "2024-01-01T12:00:00Z",
+                "edited_timestamp": None,
+                "attachments": [],
+                "pinned": False,
+            },
+        ]
+        result = json.loads(discord_server(action="fetch_messages", channel_id="11"))
+        assert result["count"] == 1
+        assert result["messages"][0]["content"] == "Hello world"
+        assert result["messages"][0]["author"]["username"] == "user1"
+
+    @patch("tools.discord_tool._discord_request")
+    def test_fetch_messages_with_pagination(self, mock_req, monkeypatch):
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
+        mock_req.return_value = []
+        discord_server(action="fetch_messages", channel_id="11", before="999", limit=10)
+        call_params = mock_req.call_args[1]["params"]
+        assert call_params["before"] == "999"
+        assert call_params["limit"] == "10"
+
+
+# ---------------------------------------------------------------------------
+# Action: list_pins
+# ---------------------------------------------------------------------------
+
+class TestListPins:
+    @patch("tools.discord_tool._discord_request")
+    def test_list_pins(self, mock_req, monkeypatch):
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
+        mock_req.return_value = [
+            {"id": "500", "content": "Important announcement", "author": {"username": "admin"}, "timestamp": "2024-01-01T00:00:00Z"},
+        ]
+        result = json.loads(discord_server(action="list_pins", channel_id="11"))
+        assert result["count"] == 1
+        assert result["pinned_messages"][0]["content"] == "Important announcement"
+
+
+# ---------------------------------------------------------------------------
+# Actions: pin_message / unpin_message
+# ---------------------------------------------------------------------------
+
+class TestPinUnpin:
+    @patch("tools.discord_tool._discord_request")
+    def test_pin_message(self, mock_req, monkeypatch):
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
+        mock_req.return_value = None  # 204
+        result = json.loads(discord_server(action="pin_message", channel_id="11", message_id="500"))
+        assert result["success"] is True
+        mock_req.assert_called_once_with("PUT", "/channels/11/pins/500", "test-token")
+
+    @patch("tools.discord_tool._discord_request")
+    def test_unpin_message(self, mock_req, monkeypatch):
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
+        mock_req.return_value = None
+        result = json.loads(discord_server(action="unpin_message", channel_id="11", message_id="500"))
+        assert result["success"] is True
+
+
+# ---------------------------------------------------------------------------
+# Action: create_thread
+# ---------------------------------------------------------------------------
+
+class TestCreateThread:
+    @patch("tools.discord_tool._discord_request")
+    def test_create_standalone_thread(self, mock_req, monkeypatch):
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
+        mock_req.return_value = {"id": "800", "name": "New Thread"}
+        result = json.loads(discord_server(action="create_thread", channel_id="11", name="New Thread"))
+        assert result["success"] is True
+        assert result["thread_id"] == "800"
+        # Verify the API call
+        mock_req.assert_called_once_with(
+            "POST", "/channels/11/threads", "test-token",
+            body={"name": "New Thread", "auto_archive_duration": 1440, "type": 11},
+        )
+
+    @patch("tools.discord_tool._discord_request")
+    def test_create_thread_from_message(self, mock_req, monkeypatch):
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
+        mock_req.return_value = {"id": "801", "name": "Discussion"}
+        result = json.loads(discord_server(
+            action="create_thread", channel_id="11", name="Discussion", message_id="1001",
+        ))
+        assert result["success"] is True
+        mock_req.assert_called_once_with(
+            "POST", "/channels/11/messages/1001/threads", "test-token",
+            body={"name": "Discussion", "auto_archive_duration": 1440},
+        )
+
+
+# ---------------------------------------------------------------------------
+# Actions: add_role / remove_role
+# ---------------------------------------------------------------------------
+
+class TestRoleManagement:
+    @patch("tools.discord_tool._discord_request")
+    def test_add_role(self, mock_req, monkeypatch):
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
+        mock_req.return_value = None
+        result = json.loads(discord_server(
+            action="add_role", guild_id="111", user_id="42", role_id="2",
+        ))
+        assert result["success"] is True
+        mock_req.assert_called_once_with(
+            "PUT", "/guilds/111/members/42/roles/2", "test-token",
+        )
+
+    @patch("tools.discord_tool._discord_request")
+    def test_remove_role(self, mock_req, monkeypatch):
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
+        mock_req.return_value = None
+        result = json.loads(discord_server(
+            action="remove_role", guild_id="111", user_id="42", role_id="2",
+        ))
+        assert result["success"] is True
+
+
+# ---------------------------------------------------------------------------
+# Error handling
+# ---------------------------------------------------------------------------
+
+class TestErrorHandling:
+    @patch("tools.discord_tool._discord_request")
+    def test_api_error_handled(self, mock_req, monkeypatch):
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
+        mock_req.side_effect = DiscordAPIError(403, '{"message": "Missing Access"}')
+        result = json.loads(discord_server(action="list_guilds"))
+        assert "error" in result
+        assert "403" in result["error"]
+
+    @patch("tools.discord_tool._discord_request")
+    def test_unexpected_error_handled(self, mock_req, monkeypatch):
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
+        mock_req.side_effect = RuntimeError("something broke")
+        result = json.loads(discord_server(action="list_guilds"))
+        assert "error" in result
+        assert "something broke" in result["error"]
+
+
+# ---------------------------------------------------------------------------
+# Registration
+# ---------------------------------------------------------------------------
+
+class TestRegistration:
+    def test_tool_registered(self):
+        from tools.registry import registry
+        entry = registry._tools.get("discord_server")
+        assert entry is not None
+        assert entry.schema["name"] == "discord_server"
+        assert entry.toolset == "discord"
+        assert entry.check_fn is not None
+        assert entry.requires_env == ["DISCORD_BOT_TOKEN"]
+
+    def test_schema_actions(self):
+        """Static schema should list all actions (the model_tools post-processing
+        narrows this per-session; static registration is the superset)."""
+        from tools.registry import registry
+        entry = registry._tools["discord_server"]
+        actions = entry.schema["parameters"]["properties"]["action"]["enum"]
+        expected = [
+            "list_guilds", "server_info", "list_channels", "channel_info",
+            "list_roles", "member_info", "search_members", "fetch_messages",
+            "list_pins", "pin_message", "unpin_message", "create_thread",
+            "add_role", "remove_role",
+        ]
+        assert set(actions) == set(expected)
+        assert set(_ACTIONS.keys()) == set(expected)
+
+    def test_schema_parameter_bounds(self):
+        from tools.registry import registry
+        entry = registry._tools["discord_server"]
+        props = entry.schema["parameters"]["properties"]
+        assert props["limit"]["minimum"] == 1
+        assert props["limit"]["maximum"] == 100
+        assert props["auto_archive_duration"]["enum"] == [60, 1440, 4320, 10080]
+
+    def test_schema_description_is_action_manifest(self):
+        """The top-level description should include the action manifest
+        (one-line signatures per action) so the model can find required
+        params without re-reading every parameter description."""
+        from tools.registry import registry
+        entry = registry._tools["discord_server"]
+        desc = entry.schema["description"]
+        # Spot-check a few entries
+        assert "list_guilds()" in desc
+        assert "fetch_messages(channel_id)" in desc
+        assert "add_role(guild_id, user_id, role_id)" in desc
+
+    def test_handler_callable(self):
+        from tools.registry import registry
+        entry = registry._tools["discord_server"]
+        assert callable(entry.handler)
+
+
+# ---------------------------------------------------------------------------
+# Toolset: discord_server only in hermes-discord
+# ---------------------------------------------------------------------------
+
+class TestToolsetInclusion:
+    def test_discord_server_in_hermes_discord_toolset(self):
+        from toolsets import TOOLSETS
+        assert "discord_server" in TOOLSETS["hermes-discord"]["tools"]
+
+    def test_discord_server_not_in_core_tools(self):
+        from toolsets import _HERMES_CORE_TOOLS
+        assert "discord_server" not in _HERMES_CORE_TOOLS
+
+    def test_discord_server_not_in_other_toolsets(self):
+        from toolsets import TOOLSETS
+        for name, ts in TOOLSETS.items():
+            if name == "hermes-discord":
+                continue
+            # The gateway toolset might include it if it unions all platform tools
+            if name == "hermes-gateway":
+                continue
+            assert "discord_server" not in ts.get("tools", []), (
+                f"discord_server should not be in toolset '{name}'"
+            )
+
+
+# ---------------------------------------------------------------------------
+# Capability detection (privileged intents)
+# ---------------------------------------------------------------------------
+
+class TestCapabilityDetection:
+    def setup_method(self):
+        _reset_capability_cache()
+
+    def teardown_method(self):
+        _reset_capability_cache()
+
+    @patch("tools.discord_tool._discord_request")
+    def test_both_intents_enabled(self, mock_req):
+        # flags: GUILD_MEMBERS (1<<14) + MESSAGE_CONTENT (1<<18) = 278528
+        mock_req.return_value = {"flags": (1 << 14) | (1 << 18)}
+        caps = _detect_capabilities("tok")
+        assert caps["has_members_intent"] is True
+        assert caps["has_message_content"] is True
+        assert caps["detected"] is True
+
+    @patch("tools.discord_tool._discord_request")
+    def test_no_intents(self, mock_req):
+        mock_req.return_value = {"flags": 0}
+        caps = _detect_capabilities("tok")
+        assert caps["has_members_intent"] is False
+        assert caps["has_message_content"] is False
+        assert caps["detected"] is True
+
+    @patch("tools.discord_tool._discord_request")
+    def test_limited_intent_variants_counted(self, mock_req):
+        # GUILD_MEMBERS_LIMITED (1<<15), MESSAGE_CONTENT_LIMITED (1<<19)
+        mock_req.return_value = {"flags": (1 << 15) | (1 << 19)}
+        caps = _detect_capabilities("tok")
+        assert caps["has_members_intent"] is True
+        assert caps["has_message_content"] is True
+
+    @patch("tools.discord_tool._discord_request")
+    def test_only_members_intent(self, mock_req):
+        mock_req.return_value = {"flags": 1 << 14}
+        caps = _detect_capabilities("tok")
+        assert caps["has_members_intent"] is True
+        assert caps["has_message_content"] is False
+
+    @patch("tools.discord_tool._discord_request")
+    def test_detection_failure_is_permissive(self, mock_req):
+        """If detection fails (network/401/revoked token), expose everything
+        and let runtime errors surface. Silent failure should never hide
+        actions the bot actually has."""
+        mock_req.side_effect = DiscordAPIError(401, "unauthorized")
+        caps = _detect_capabilities("tok")
+        assert caps["detected"] is False
+        assert caps["has_members_intent"] is True
+        assert caps["has_message_content"] is True
+
+    @patch("tools.discord_tool._discord_request")
+    def test_detection_is_cached(self, mock_req):
+        mock_req.return_value = {"flags": 0}
+        _detect_capabilities("tok")
+        _detect_capabilities("tok")
+        _detect_capabilities("tok")
+        assert mock_req.call_count == 1
+
+    @patch("tools.discord_tool._discord_request")
+    def test_force_refresh(self, mock_req):
+        mock_req.return_value = {"flags": 0}
+        _detect_capabilities("tok")
+        _detect_capabilities("tok", force=True)
+        assert mock_req.call_count == 2
+
+
+# ---------------------------------------------------------------------------
+# Config allowlist
+# ---------------------------------------------------------------------------
+
+class TestConfigAllowlist:
+    @pytest.fixture(autouse=True)
+    def _reset_tools_logger(self):
+        """Restore the ``tools`` logger level after cross-test pollution.
+
+        ``AIAgent(quiet_mode=True)`` globally sets ``tools`` and
+        ``tools.*`` children to ``ERROR`` (see run_agent.py quiet_mode
+        block).  xdist workers are persistent, so a streaming test on the
+        same worker will silence WARNING-level logs from
+        ``tools.discord_tool`` for every test that follows.  Reset here so
+        ``caplog`` can capture warnings regardless of worker history.
+        """
+        import logging as _logging
+        _prev_tools = _logging.getLogger("tools").level
+        _prev_dt = _logging.getLogger("tools.discord_tool").level
+        _logging.getLogger("tools").setLevel(_logging.NOTSET)
+        _logging.getLogger("tools.discord_tool").setLevel(_logging.NOTSET)
+        try:
+            yield
+        finally:
+            _logging.getLogger("tools").setLevel(_prev_tools)
+            _logging.getLogger("tools.discord_tool").setLevel(_prev_dt)
+
+    def test_empty_string_returns_none(self, monkeypatch):
+        """Empty config means no allowlist — all actions visible."""
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: {"discord": {"server_actions": ""}},
+        )
+        assert _load_allowed_actions_config() is None
+
+    def test_missing_key_returns_none(self, monkeypatch):
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: {"discord": {}},
+        )
+        assert _load_allowed_actions_config() is None
+
+    def test_comma_separated_string(self, monkeypatch):
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: {"discord": {"server_actions": "list_guilds,list_channels,fetch_messages"}},
+        )
+        result = _load_allowed_actions_config()
+        assert result == ["list_guilds", "list_channels", "fetch_messages"]
+
+    def test_yaml_list(self, monkeypatch):
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: {"discord": {"server_actions": ["list_guilds", "server_info"]}},
+        )
+        result = _load_allowed_actions_config()
+        assert result == ["list_guilds", "server_info"]
+
+    def test_unknown_names_dropped(self, monkeypatch, caplog):
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: {"discord": {"server_actions": "list_guilds,bogus_action,fetch_messages"}},
+        )
+        with caplog.at_level("WARNING"):
+            result = _load_allowed_actions_config()
+        assert result == ["list_guilds", "fetch_messages"]
+        assert "bogus_action" in caplog.text
+
+    def test_config_load_failure_is_permissive(self, monkeypatch):
+        """If config can't be loaded at all, fall back to None (all allowed)."""
+        def bad_load():
+            raise RuntimeError("disk gone")
+        monkeypatch.setattr("hermes_cli.config.load_config", bad_load)
+        assert _load_allowed_actions_config() is None
+
+    def test_unexpected_type_ignored(self, monkeypatch, caplog):
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: {"discord": {"server_actions": {"unexpected": "dict"}}},
+        )
+        with caplog.at_level("WARNING"):
+            result = _load_allowed_actions_config()
+        assert result is None
+        assert "unexpected type" in caplog.text
+
+
+# ---------------------------------------------------------------------------
+# Action filtering combines intents + allowlist
+# ---------------------------------------------------------------------------
+
+class TestAvailableActions:
+    def test_all_available_when_unrestricted(self):
+        caps = {"detected": True, "has_members_intent": True, "has_message_content": True}
+        assert _available_actions(caps, None) == list(_ACTIONS.keys())
+
+    def test_no_members_intent_hides_member_actions(self):
+        caps = {"detected": True, "has_members_intent": False, "has_message_content": True}
+        actions = _available_actions(caps, None)
+        assert "search_members" not in actions
+        assert "member_info" not in actions
+        # fetch_messages stays — MESSAGE_CONTENT affects content field but action works
+        assert "fetch_messages" in actions
+
+    def test_no_message_content_keeps_fetch_messages(self):
+        """MESSAGE_CONTENT affects the content field, not the action.
+        Hiding fetch_messages would lose author/timestamp/attachments access."""
+        caps = {"detected": True, "has_members_intent": True, "has_message_content": False}
+        actions = _available_actions(caps, None)
+        assert "fetch_messages" in actions
+        assert "list_pins" in actions
+
+    def test_allowlist_intersects_with_intents(self):
+        """Allowlist can only narrow — not re-enable intent-gated actions."""
+        caps = {"detected": True, "has_members_intent": False, "has_message_content": True}
+        allowlist = ["list_guilds", "search_members", "fetch_messages"]
+        actions = _available_actions(caps, allowlist)
+        # search_members gated by intent → stripped even though allowlisted
+        assert actions == ["list_guilds", "fetch_messages"]
+
+    def test_empty_allowlist_yields_empty(self):
+        caps = {"detected": True, "has_members_intent": True, "has_message_content": True}
+        assert _available_actions(caps, []) == []
+
+    def test_allowlist_preserves_canonical_order(self):
+        caps = {"detected": True, "has_members_intent": True, "has_message_content": True}
+        # Pass allowlist out of canonical order
+        allowlist = ["fetch_messages", "list_guilds", "server_info"]
+        assert _available_actions(caps, allowlist) == ["list_guilds", "server_info", "fetch_messages"]
+
+
+# ---------------------------------------------------------------------------
+# Dynamic schema build (integration of intents + config)
+# ---------------------------------------------------------------------------
+
+class TestDynamicSchema:
+    def setup_method(self):
+        _reset_capability_cache()
+
+    def teardown_method(self):
+        _reset_capability_cache()
+
+    @patch("tools.discord_tool._discord_request")
+    def test_no_token_returns_none(self, mock_req, monkeypatch):
+        monkeypatch.delenv("DISCORD_BOT_TOKEN", raising=False)
+        assert get_dynamic_schema() is None
+        mock_req.assert_not_called()
+
+    @patch("tools.discord_tool._discord_request")
+    def test_full_intents_full_schema(self, mock_req, monkeypatch):
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: {"discord": {"server_actions": ""}},
+        )
+        mock_req.return_value = {"flags": (1 << 14) | (1 << 18)}
+        schema = get_dynamic_schema()
+        actions = schema["parameters"]["properties"]["action"]["enum"]
+        assert set(actions) == set(_ACTIONS.keys())
+        # No content warning
+        assert "MESSAGE_CONTENT" not in schema["description"]
+
+    @patch("tools.discord_tool._discord_request")
+    def test_no_members_intent_removes_member_actions_from_schema(
+        self, mock_req, monkeypatch,
+    ):
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: {"discord": {"server_actions": ""}},
+        )
+        mock_req.return_value = {"flags": 1 << 18}  # only MESSAGE_CONTENT
+        schema = get_dynamic_schema()
+        actions = schema["parameters"]["properties"]["action"]["enum"]
+        assert "search_members" not in actions
+        assert "member_info" not in actions
+        # Manifest description should also not advertise them
+        assert "search_members" not in schema["description"]
+        assert "member_info" not in schema["description"]
+
+    @patch("tools.discord_tool._discord_request")
+    def test_no_message_content_adds_warning_note(self, mock_req, monkeypatch):
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: {"discord": {"server_actions": ""}},
+        )
+        mock_req.return_value = {"flags": 1 << 14}  # only GUILD_MEMBERS
+        schema = get_dynamic_schema()
+        assert "MESSAGE_CONTENT" in schema["description"]
+        # But fetch_messages is still available
+        actions = schema["parameters"]["properties"]["action"]["enum"]
+        assert "fetch_messages" in actions
+
+    @patch("tools.discord_tool._discord_request")
+    def test_config_allowlist_narrows_schema(self, mock_req, monkeypatch):
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: {"discord": {"server_actions": "list_guilds,list_channels"}},
+        )
+        mock_req.return_value = {"flags": (1 << 14) | (1 << 18)}
+        schema = get_dynamic_schema()
+        actions = schema["parameters"]["properties"]["action"]["enum"]
+        assert actions == ["list_guilds", "list_channels"]
+        # Manifest description should only show allowed ones (check for
+        # the signature marker, which is specific to manifest lines)
+        assert "list_guilds()" in schema["description"]
+        assert "add_role(" not in schema["description"]
+        assert "create_thread(" not in schema["description"]
+
+    @patch("tools.discord_tool._discord_request")
+    def test_empty_allowlist_with_valid_values_hides_tool(self, mock_req, monkeypatch):
+        """If the allowlist resolves to zero valid actions (e.g. all names
+        were typos), get_dynamic_schema returns None so the tool is dropped
+        entirely rather than showing an empty enum."""
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: {"discord": {"server_actions": "typo_one,typo_two"}},
+        )
+        mock_req.return_value = {"flags": (1 << 14) | (1 << 18)}
+        assert get_dynamic_schema() is None
+
+
+# ---------------------------------------------------------------------------
+# Runtime allowlist enforcement (defense in depth — schema already filtered)
+# ---------------------------------------------------------------------------
+
+class TestRuntimeAllowlistEnforcement:
+    @patch("tools.discord_tool._discord_request")
+    def test_denied_action_blocked_at_runtime(self, mock_req, monkeypatch):
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: {"discord": {"server_actions": "list_guilds"}},
+        )
+        result = json.loads(discord_server(action="add_role", guild_id="1", user_id="2", role_id="3"))
+        assert "error" in result
+        assert "disabled by config" in result["error"]
+        mock_req.assert_not_called()
+
+    @patch("tools.discord_tool._discord_request")
+    def test_allowed_action_proceeds(self, mock_req, monkeypatch):
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: {"discord": {"server_actions": "list_guilds"}},
+        )
+        mock_req.return_value = []
+        result = json.loads(discord_server(action="list_guilds"))
+        assert "guilds" in result
+
+
+# ---------------------------------------------------------------------------
+# 403 enrichment
+# ---------------------------------------------------------------------------
+
+class Test403Enrichment:
+    def test_enrich_known_action(self):
+        msg = _enrich_403("add_role", '{"message":"Missing Permissions"}')
+        assert "MANAGE_ROLES" in msg
+        assert "Missing Permissions" in msg  # Raw body preserved
+
+    def test_enrich_unknown_action_includes_body(self):
+        msg = _enrich_403("some_new_action", '{"message":"weird"}')
+        assert "some_new_action" in msg
+        assert "weird" in msg
+
+    @patch("tools.discord_tool._discord_request")
+    def test_403_in_runtime_is_enriched(self, mock_req, monkeypatch):
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: {"discord": {"server_actions": ""}},
+        )
+        mock_req.side_effect = DiscordAPIError(403, '{"message":"Missing Permissions"}')
+        result = json.loads(discord_server(
+            action="add_role", guild_id="1", user_id="2", role_id="3",
+        ))
+        assert "error" in result
+        assert "MANAGE_ROLES" in result["error"]
+
+    @patch("tools.discord_tool._discord_request")
+    def test_non_403_errors_are_not_enriched(self, mock_req, monkeypatch):
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: {"discord": {"server_actions": ""}},
+        )
+        mock_req.side_effect = DiscordAPIError(500, "server error")
+        result = json.loads(discord_server(action="list_guilds"))
+        assert "500" in result["error"]
+        assert "MANAGE_ROLES" not in result["error"]
+
+
+# ---------------------------------------------------------------------------
+# model_tools integration — dynamic schema replaces static
+# ---------------------------------------------------------------------------
+
+class TestModelToolsIntegration:
+    def setup_method(self):
+        _reset_capability_cache()
+
+    def teardown_method(self):
+        _reset_capability_cache()
+
+    @patch("tools.discord_tool._discord_request")
+    def test_discord_server_schema_rebuilt_by_get_tool_definitions(
+        self, mock_req, monkeypatch,
+    ):
+        """When model_tools.get_tool_definitions runs with discord_server
+        available, it should replace the static schema with the dynamic one."""
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: {"discord": {"server_actions": "list_guilds,server_info"}},
+        )
+        # Bot without GUILD_MEMBERS intent
+        mock_req.return_value = {"flags": 0}
+
+        from model_tools import get_tool_definitions
+        tools = get_tool_definitions(enabled_toolsets=["hermes-discord"], quiet_mode=True)
+        discord_tool = next(
+            (t for t in tools if t.get("function", {}).get("name") == "discord_server"),
+            None,
+        )
+        assert discord_tool is not None, "discord_server should be in the schema"
+        actions = discord_tool["function"]["parameters"]["properties"]["action"]["enum"]
+        assert actions == ["list_guilds", "server_info"]
+
+    @patch("tools.discord_tool._discord_request")
+    def test_discord_server_dropped_when_allowlist_empties_it(
+        self, mock_req, monkeypatch,
+    ):
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: {"discord": {"server_actions": "all_bogus_names"}},
+        )
+        mock_req.return_value = {"flags": 0}
+
+        from model_tools import get_tool_definitions
+        tools = get_tool_definitions(enabled_toolsets=["hermes-discord"], quiet_mode=True)
+        names = [t.get("function", {}).get("name") for t in tools]
+        assert "discord_server" not in names
diff --git a/tests/tools/test_env_passthrough.py b/tests/tools/test_env_passthrough.py
index 6e48ee5c30..eba84bdb2c 100644
--- a/tests/tools/test_env_passthrough.py
+++ b/tests/tools/test_env_passthrough.py
@@ -172,28 +172,60 @@ class TestTerminalIntegration:
         assert blocked_var not in result
         assert "PATH" in result
 
-    def test_passthrough_allows_blocklisted_var(self):
-        from tools.environments.local import _sanitize_subprocess_env, _HERMES_PROVIDER_ENV_BLOCKLIST
+    def test_passthrough_cannot_override_provider_blocklist(self):
+        """GHSA-rhgp-j443-p4rf: register_env_passthrough must NOT accept
+        Hermes provider credentials — that was the bypass where a skill
+        could declare ANTHROPIC_TOKEN / OPENAI_API_KEY as passthrough and
+        defeat the execute_code sandbox scrubbing."""
+        from tools.environments.local import (
+            _sanitize_subprocess_env,
+            _HERMES_PROVIDER_ENV_BLOCKLIST,
+        )
 
         blocked_var = next(iter(_HERMES_PROVIDER_ENV_BLOCKLIST))
+        # Attempt to register — must be silently refused (logged warning).
         register_env_passthrough([blocked_var])
 
+        # is_env_passthrough must NOT report it as allowed
+        assert not is_env_passthrough(blocked_var)
+
+        # Sanitizer still strips the var from subprocess env
         env = {blocked_var: "secret_value", "PATH": "/usr/bin"}
         result = _sanitize_subprocess_env(env)
-        assert blocked_var in result
-        assert result[blocked_var] == "secret_value"
+        assert blocked_var not in result
+        assert "PATH" in result
 
-    def test_make_run_env_passthrough(self, monkeypatch):
-        from tools.environments.local import _make_run_env, _HERMES_PROVIDER_ENV_BLOCKLIST
+    def test_make_run_env_blocklist_override_rejected(self):
+        """_make_run_env must NOT expose a blocklisted var to subprocess env
+        even after a skill attempts to register it via passthrough."""
+        import os
+        from tools.environments.local import (
+            _make_run_env,
+            _HERMES_PROVIDER_ENV_BLOCKLIST,
+        )
 
         blocked_var = next(iter(_HERMES_PROVIDER_ENV_BLOCKLIST))
-        monkeypatch.setenv(blocked_var, "secret_value")
+        os.environ[blocked_var] = "secret_value"
+        try:
+            # Without passthrough — blocked
+            result_before = _make_run_env({})
+            assert blocked_var not in result_before
 
-        # Without passthrough — blocked
-        result_before = _make_run_env({})
-        assert blocked_var not in result_before
+            # Skill tries to register it — must be refused, so still blocked
+            register_env_passthrough([blocked_var])
+            result_after = _make_run_env({})
+            assert blocked_var not in result_after
+        finally:
+            os.environ.pop(blocked_var, None)
 
-        # With passthrough — allowed
-        register_env_passthrough([blocked_var])
-        result_after = _make_run_env({})
-        assert blocked_var in result_after
+    def test_non_hermes_api_key_still_registerable(self):
+        """Third-party API keys (TENOR_API_KEY, NOTION_TOKEN, etc.) are NOT
+        Hermes provider credentials and must still pass through — skills
+        that legitimately wrap third-party APIs must keep working."""
+        # TENOR_API_KEY is a real example — used by the gif-search skill
+        register_env_passthrough(["TENOR_API_KEY"])
+        assert is_env_passthrough("TENOR_API_KEY")
+
+        # Arbitrary skill-specific var
+        register_env_passthrough(["MY_SKILL_CUSTOM_CONFIG"])
+        assert is_env_passthrough("MY_SKILL_CUSTOM_CONFIG")
diff --git a/tests/tools/test_file_operations.py b/tests/tools/test_file_operations.py
index dc8ccbde62..dfd54ba634 100644
--- a/tests/tools/test_file_operations.py
+++ b/tests/tools/test_file_operations.py
@@ -19,6 +19,8 @@ from tools.file_operations import (
     BINARY_EXTENSIONS,
     IMAGE_EXTENSIONS,
     MAX_LINE_LENGTH,
+    normalize_read_pagination,
+    normalize_search_pagination,
 )
 
 
@@ -192,6 +194,17 @@ def file_ops(mock_env):
 
 
 class TestShellFileOpsHelpers:
+    def test_normalize_read_pagination_clamps_invalid_values(self):
+        assert normalize_read_pagination(offset=0, limit=0) == (1, 1)
+        assert normalize_read_pagination(offset=-10, limit=-5) == (1, 1)
+        assert normalize_read_pagination(offset="bad", limit="bad") == (1, 500)
+        assert normalize_read_pagination(offset=2, limit=999999) == (2, 2000)
+
+    def test_normalize_search_pagination_clamps_invalid_values(self):
+        assert normalize_search_pagination(offset=-10, limit=-5) == (0, 1)
+        assert normalize_search_pagination(offset="bad", limit="bad") == (0, 50)
+        assert normalize_search_pagination(offset=3, limit=0) == (3, 1)
+
     def test_escape_shell_arg_simple(self, file_ops):
         assert file_ops._escape_shell_arg("hello") == "'hello'"
 
@@ -355,3 +368,101 @@ class TestShellFileOpsWriteDenied:
         result = ops.move_file("/tmp/nonexistent.txt", "/tmp/dest.txt")
         assert result.error is not None
         assert "Failed to move" in result.error
+
+
+class TestPatchReplacePostWriteVerification:
+    """Tests for the post-write verification added in patch_replace.
+
+    Confirms that a silent persistence failure (where write_file's command
+    appears to succeed but the bytes on disk don't match new_content) is
+    surfaced as an error instead of being reported as a successful patch.
+    """
+
+    def test_patch_replace_fails_when_file_not_persisted(self, mock_env):
+        """write_file reports success but the re-read returns old content:
+        patch_replace must return an error, not success-with-diff."""
+        file_contents = {"/tmp/test/a.py": "hello world\n"}
+
+        def side_effect(command, **kwargs):
+            # cat reads the file — both the initial read and the verify read
+            if command.startswith("cat "):
+                # Extract path from cat command (strip quotes)
+                for path in file_contents:
+                    if path in command:
+                        return {"output": file_contents[path], "returncode": 0}
+                return {"output": "", "returncode": 1}
+            # mkdir for parent dir
+            if command.startswith("mkdir "):
+                return {"output": "", "returncode": 0}
+            # wc -c for byte count after write
+            if command.startswith("wc -c"):
+                for path in file_contents:
+                    if path in command:
+                        return {"output": str(len(file_contents[path].encode())), "returncode": 0}
+                return {"output": "0", "returncode": 0}
+            # Everything else (including the write itself) pretends to succeed
+            # but DOESN'T update file_contents — simulates silent failure
+            return {"output": "", "returncode": 0}
+
+        mock_env.execute.side_effect = side_effect
+        ops = ShellFileOperations(mock_env)
+        result = ops.patch_replace("/tmp/test/a.py", "hello", "hi")
+        assert result.error is not None, (
+            "Silent persistence failure must surface as error, got: "
+            f"success={result.success}, diff={result.diff}"
+        )
+        assert "verification failed" in result.error.lower()
+        assert "did not persist" in result.error.lower()
+
+    def test_patch_replace_succeeds_when_file_persisted(self, mock_env):
+        """Normal success path: write persists, verify read returns new bytes."""
+        state = {"content": "hello world\n"}
+
+        def side_effect(command, stdin_data=None, **kwargs):
+            # Write is `cat > path` — detect by the `>` redirect, NOT just `cat `
+            if command.startswith("cat >"):
+                if stdin_data is not None:
+                    state["content"] = stdin_data
+                return {"output": "", "returncode": 0}
+            if command.startswith("cat "):  # read
+                return {"output": state["content"], "returncode": 0}
+            if command.startswith("mkdir "):
+                return {"output": "", "returncode": 0}
+            if command.startswith("wc -c"):
+                return {"output": str(len(state["content"].encode())), "returncode": 0}
+            return {"output": "", "returncode": 0}
+
+        mock_env.execute.side_effect = side_effect
+        ops = ShellFileOperations(mock_env)
+        result = ops.patch_replace("/tmp/test/a.py", "hello", "hi")
+        assert result.error is None, f"Unexpected error: {result.error}"
+        assert result.success is True
+        assert state["content"] == "hi world\n", f"File not actually updated: {state['content']!r}"
+
+    def test_patch_replace_fails_when_verify_read_errors(self, mock_env):
+        """If the verify-read step itself fails (exit code != 0), return an error."""
+        call_count = {"cat": 0}
+        state = {"content": "hello world\n"}
+
+        def side_effect(command, stdin_data=None, **kwargs):
+            if command.startswith("cat >"):  # write
+                if stdin_data is not None:
+                    state["content"] = stdin_data
+                return {"output": "", "returncode": 0}
+            if command.startswith("cat "):  # read
+                call_count["cat"] += 1
+                # First read (initial fetch) succeeds; second read (verify) fails
+                if call_count["cat"] == 1:
+                    return {"output": state["content"], "returncode": 0}
+                return {"output": "", "returncode": 1}
+            if command.startswith("mkdir "):
+                return {"output": "", "returncode": 0}
+            if command.startswith("wc -c"):
+                return {"output": str(len(state["content"].encode())), "returncode": 0}
+            return {"output": "", "returncode": 0}
+
+        mock_env.execute.side_effect = side_effect
+        ops = ShellFileOperations(mock_env)
+        result = ops.patch_replace("/tmp/test/a.py", "hello", "hi")
+        assert result.error is not None
+        assert "could not re-read" in result.error.lower()
diff --git a/tests/tools/test_file_operations_edge_cases.py b/tests/tools/test_file_operations_edge_cases.py
index b13deddede..8a4378d2fa 100644
--- a/tests/tools/test_file_operations_edge_cases.py
+++ b/tests/tools/test_file_operations_edge_cases.py
@@ -146,3 +146,61 @@ class TestCheckLintBracePaths:
 
         assert result.success is False
         assert "SyntaxError" in result.output
+
+
+# =========================================================================
+# Pagination bounds
+# =========================================================================
+
+
+class TestPaginationBounds:
+    """Invalid pagination inputs should not leak into shell commands."""
+
+    def test_read_file_clamps_offset_and_limit_before_building_sed_range(self):
+        env = MagicMock()
+        env.cwd = "/tmp"
+        ops = ShellFileOperations(env)
+        commands = []
+
+        def fake_exec(command, *args, **kwargs):
+            commands.append(command)
+            if command.startswith("wc -c"):
+                return MagicMock(exit_code=0, stdout="12")
+            if command.startswith("head -c"):
+                return MagicMock(exit_code=0, stdout="line1\nline2\n")
+            if command.startswith("sed -n"):
+                return MagicMock(exit_code=0, stdout="line1\n")
+            if command.startswith("wc -l"):
+                return MagicMock(exit_code=0, stdout="2")
+            return MagicMock(exit_code=0, stdout="")
+
+        with patch.object(ops, "_exec", side_effect=fake_exec):
+            result = ops.read_file("notes.txt", offset=0, limit=0)
+
+        assert result.error is None
+        assert "     1|line1" in result.content
+        sed_commands = [cmd for cmd in commands if cmd.startswith("sed -n")]
+        assert sed_commands == ["sed -n '1,1p' 'notes.txt'"]
+
+    def test_search_clamps_offset_and_limit_before_building_head_pipeline(self):
+        env = MagicMock()
+        env.cwd = "/tmp"
+        ops = ShellFileOperations(env)
+        commands = []
+
+        def fake_exec(command, *args, **kwargs):
+            commands.append(command)
+            if command.startswith("test -e"):
+                return MagicMock(exit_code=0, stdout="exists")
+            if command.startswith("rg --files"):
+                return MagicMock(exit_code=0, stdout="a.py\n")
+            return MagicMock(exit_code=0, stdout="")
+
+        with patch.object(ops, "_has_command", side_effect=lambda cmd: cmd == "rg"), \
+             patch.object(ops, "_exec", side_effect=fake_exec):
+            result = ops.search("*.py", target="files", path=".", offset=-4, limit=-2)
+
+        assert result.files == ["a.py"]
+        rg_commands = [cmd for cmd in commands if cmd.startswith("rg --files")]
+        assert rg_commands
+        assert "| head -n 1" in rg_commands[0]
diff --git a/tests/tools/test_file_state_registry.py b/tests/tools/test_file_state_registry.py
new file mode 100644
index 0000000000..6038036ae8
--- /dev/null
+++ b/tests/tools/test_file_state_registry.py
@@ -0,0 +1,287 @@
+#!/usr/bin/env python3
+"""Tests for the cross-agent FileStateRegistry (tools/file_state.py).
+
+Covers the three layers added for safe concurrent subagent file edits:
+
+  1. Cross-agent staleness detection via ``check_stale``
+  2. Per-path serialization via ``lock_path``
+  3. Delegate-completion reminder via ``writes_since``
+
+Plus integration through the real ``read_file_tool`` / ``write_file_tool``
+/ ``patch_tool`` handlers so the full hook wiring is exercised.
+
+Run:
+    python -m pytest tests/tools/test_file_state_registry.py -v
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import tempfile
+import threading
+import time
+import unittest
+
+from tools import file_state
+from tools.file_tools import (
+    read_file_tool,
+    write_file_tool,
+    patch_tool,
+)
+
+
+def _tmp_file(content: str = "initial\n") -> str:
+    fd, path = tempfile.mkstemp(prefix="hermes_file_state_test_", suffix=".txt")
+    with os.fdopen(fd, "w") as f:
+        f.write(content)
+    return path
+
+
+class FileStateRegistryUnitTests(unittest.TestCase):
+    """Direct unit tests on the registry singleton."""
+
+    def setUp(self) -> None:
+        file_state.get_registry().clear()
+        self._tmpfiles: list[str] = []
+
+    def tearDown(self) -> None:
+        for p in self._tmpfiles:
+            try:
+                os.unlink(p)
+            except OSError:
+                pass
+        file_state.get_registry().clear()
+
+    def _mk(self, content: str = "x\n") -> str:
+        p = _tmp_file(content)
+        self._tmpfiles.append(p)
+        return p
+
+    def test_record_read_then_check_stale_returns_none(self):
+        p = self._mk()
+        file_state.record_read("A", p)
+        self.assertIsNone(file_state.check_stale("A", p))
+
+    def test_sibling_write_flags_other_agent_as_stale(self):
+        p = self._mk()
+        file_state.record_read("A", p)
+        # Simulate sibling writing this file later
+        time.sleep(0.01)  # ensure ts ordering across resolution
+        file_state.note_write("B", p)
+        warn = file_state.check_stale("A", p)
+        self.assertIsNotNone(warn)
+        self.assertIn("B", warn)
+        self.assertIn("sibling", warn.lower())
+
+    def test_write_without_read_flagged(self):
+        p = self._mk()
+        # Agent A never read this file.
+        file_state.note_write("B", p)  # another agent touched it
+        warn = file_state.check_stale("A", p)
+        self.assertIsNotNone(warn)
+
+    def test_partial_read_flagged_on_write(self):
+        p = self._mk()
+        file_state.record_read("A", p, partial=True)
+        warn = file_state.check_stale("A", p)
+        self.assertIsNotNone(warn)
+        self.assertIn("partial", warn.lower())
+
+    def test_external_mtime_drift_flagged(self):
+        p = self._mk()
+        file_state.record_read("A", p)
+        # Bump the on-disk mtime without going through the registry.
+        time.sleep(0.01)
+        os.utime(p, None)
+        with open(p, "w") as f:
+            f.write("externally modified\n")
+        warn = file_state.check_stale("A", p)
+        self.assertIsNotNone(warn)
+        self.assertIn("modified since you last read", warn)
+
+    def test_own_write_updates_stamp_so_next_write_is_clean(self):
+        p = self._mk()
+        file_state.record_read("A", p)
+        file_state.note_write("A", p)
+        # Second write by the same agent — should not be flagged.
+        self.assertIsNone(file_state.check_stale("A", p))
+
+    def test_different_paths_dont_interfere(self):
+        a = self._mk()
+        b = self._mk()
+        file_state.record_read("A", a)
+        file_state.note_write("B", b)
+        # A reads only `a`; B writes `b`. A writing `a` is NOT stale.
+        self.assertIsNone(file_state.check_stale("A", a))
+
+    def test_lock_path_serializes_same_path(self):
+        p = self._mk()
+        events: list[tuple[str, int]] = []
+        lock = threading.Lock()
+
+        def worker(i: int) -> None:
+            with file_state.lock_path(p):
+                with lock:
+                    events.append(("enter", i))
+                time.sleep(0.01)
+                with lock:
+                    events.append(("exit", i))
+
+        threads = [threading.Thread(target=worker, args=(i,)) for i in range(4)]
+        for t in threads:
+            t.start()
+        for t in threads:
+            t.join()
+
+        # Every enter must be immediately followed by its matching exit.
+        self.assertEqual(len(events), 8)
+        for i in range(0, 8, 2):
+            self.assertEqual(events[i][0], "enter")
+            self.assertEqual(events[i + 1][0], "exit")
+            self.assertEqual(events[i][1], events[i + 1][1])
+
+    def test_lock_path_is_per_path_not_global(self):
+        a = self._mk()
+        b = self._mk()
+        b_entered = threading.Event()
+
+        def hold_a() -> None:
+            with file_state.lock_path(a):
+                b_entered.wait(timeout=2.0)
+
+        def enter_b() -> None:
+            time.sleep(0.02)  # let A grab its lock
+            with file_state.lock_path(b):
+                b_entered.set()
+
+        ta = threading.Thread(target=hold_a)
+        tb = threading.Thread(target=enter_b)
+        ta.start()
+        tb.start()
+        self.assertTrue(b_entered.wait(timeout=3.0))
+        ta.join(timeout=3.0)
+        tb.join(timeout=3.0)
+
+    def test_writes_since_filters_by_parent_read_set(self):
+        foo = self._mk()
+        bar = self._mk()
+        baz = self._mk()
+        file_state.record_read("parent", foo)
+        file_state.record_read("parent", bar)
+        since = time.time()
+        time.sleep(0.01)
+        file_state.note_write("child", foo)  # parent read this — report
+        file_state.note_write("child", baz)  # parent never saw — skip
+
+        # Caller passes only paths the parent actually read (this is what
+        # delegate_tool does via ``known_reads(parent_task_id)``).
+        parent_reads = file_state.known_reads("parent")
+        out = file_state.writes_since("parent", since, parent_reads)
+        self.assertIn("child", out)
+        self.assertIn(foo, out["child"])
+        self.assertNotIn(baz, out["child"])
+
+    def test_writes_since_excludes_the_target_agent(self):
+        p = self._mk()
+        file_state.record_read("parent", p)
+        since = time.time()
+        time.sleep(0.01)
+        file_state.note_write("parent", p)  # parent's own write
+        out = file_state.writes_since("parent", since, [p])
+        self.assertEqual(out, {})
+
+    def test_kill_switch_env_var(self):
+        p = self._mk()
+        os.environ["HERMES_DISABLE_FILE_STATE_GUARD"] = "1"
+        try:
+            file_state.record_read("A", p)
+            file_state.note_write("B", p)
+            self.assertIsNone(file_state.check_stale("A", p))
+            self.assertEqual(file_state.known_reads("A"), [])
+            self.assertEqual(
+                file_state.writes_since("A", 0.0, [p]),
+                {},
+            )
+        finally:
+            del os.environ["HERMES_DISABLE_FILE_STATE_GUARD"]
+
+
+class FileToolsIntegrationTests(unittest.TestCase):
+    """Integration through the real file_tools handlers.
+
+    These exercise the wiring: read_file_tool → registry.record_read,
+    write_file_tool / patch_tool → check_stale + lock_path + note_write.
+    """
+
+    def setUp(self) -> None:
+        file_state.get_registry().clear()
+        self._tmpdir = tempfile.mkdtemp(prefix="hermes_file_state_int_")
+
+    def tearDown(self) -> None:
+        import shutil
+        shutil.rmtree(self._tmpdir, ignore_errors=True)
+        file_state.get_registry().clear()
+
+    def _write_seed(self, name: str, content: str = "seed\n") -> str:
+        p = os.path.join(self._tmpdir, name)
+        with open(p, "w") as f:
+            f.write(content)
+        return p
+
+    def test_sibling_agent_write_surfaces_warning_through_handler(self):
+        p = self._write_seed("shared.txt")
+        r = json.loads(read_file_tool(path=p, task_id="agentA"))
+        self.assertNotIn("error", r)
+
+        w_b = json.loads(write_file_tool(path=p, content="B wrote\n", task_id="agentB"))
+        self.assertNotIn("error", w_b)
+
+        w_a = json.loads(write_file_tool(path=p, content="A stale\n", task_id="agentA"))
+        warn = w_a.get("_warning", "")
+        self.assertTrue(warn, f"expected warning, got: {w_a}")
+        # The cross-agent message names the sibling task_id.
+        self.assertIn("agentB", warn)
+        self.assertIn("sibling", warn.lower())
+
+    def test_same_agent_consecutive_writes_no_false_warning(self):
+        p = self._write_seed("own.txt")
+        json.loads(read_file_tool(path=p, task_id="agentC"))
+        w1 = json.loads(write_file_tool(path=p, content="one\n", task_id="agentC"))
+        self.assertFalse(w1.get("_warning"))
+        w2 = json.loads(write_file_tool(path=p, content="two\n", task_id="agentC"))
+        self.assertFalse(w2.get("_warning"))
+
+    def test_patch_tool_also_surfaces_sibling_warning(self):
+        p = self._write_seed("p.txt", "hello world\n")
+        json.loads(read_file_tool(path=p, task_id="agentA"))
+        json.loads(write_file_tool(path=p, content="hello planet\n", task_id="agentB"))
+        r = json.loads(
+            patch_tool(
+                mode="replace",
+                path=p,
+                old_string="hello",
+                new_string="HI",
+                task_id="agentA",
+            )
+        )
+        warn = r.get("_warning", "")
+        # Patch may fail (sibling changed the content so old_string may not
+        # match) or succeed — either way, the cross-agent warning should be
+        # present when old_string still happens to match.  What matters is
+        # that if the patch succeeded or the warning was reported, it names
+        # the sibling.  When old_string doesn't match, the patch itself
+        # returns an error but the warning is still set from the pre-check.
+        if warn:
+            self.assertIn("agentB", warn)
+
+    def test_net_new_file_no_warning(self):
+        p = os.path.join(self._tmpdir, "brand_new.txt")
+        # Nobody has read or written this before.
+        w = json.loads(write_file_tool(path=p, content="hi\n", task_id="agentX"))
+        self.assertFalse(w.get("_warning"))
+        self.assertNotIn("error", w)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/tools/test_file_tools.py b/tests/tools/test_file_tools.py
index 1e1fccb664..c2d75bf5d0 100644
--- a/tests/tools/test_file_tools.py
+++ b/tests/tools/test_file_tools.py
@@ -45,6 +45,19 @@ class TestReadFileHandler:
         read_file_tool("/tmp/big.txt", offset=10, limit=20)
         mock_ops.read_file.assert_called_once_with("/tmp/big.txt", 10, 20)
 
+    @patch("tools.file_tools._get_file_ops")
+    def test_invalid_offset_and_limit_are_normalized_before_dispatch(self, mock_get):
+        mock_ops = MagicMock()
+        result_obj = MagicMock()
+        result_obj.content = "line1"
+        result_obj.to_dict.return_value = {"content": "line1", "total_lines": 1}
+        mock_ops.read_file.return_value = result_obj
+        mock_get.return_value = mock_ops
+
+        from tools.file_tools import read_file_tool
+        read_file_tool("/tmp/big.txt", offset=0, limit=0)
+        mock_ops.read_file.assert_called_once_with("/tmp/big.txt", 1, 1)
+
     @patch("tools.file_tools._get_file_ops")
     def test_exception_returns_error_json(self, mock_get):
         mock_get.side_effect = RuntimeError("terminal not available")
@@ -191,6 +204,21 @@ class TestSearchHandler:
             limit=10, offset=5, output_mode="count", context=2,
         )
 
+    @patch("tools.file_tools._get_file_ops")
+    def test_search_normalizes_invalid_pagination_before_dispatch(self, mock_get):
+        mock_ops = MagicMock()
+        result_obj = MagicMock()
+        result_obj.to_dict.return_value = {"files": []}
+        mock_ops.search.return_value = result_obj
+        mock_get.return_value = mock_ops
+
+        from tools.file_tools import search_tool
+        search_tool(pattern="class", target="files", path="/src", limit=-5, offset=-2)
+        mock_ops.search.assert_called_once_with(
+            pattern="class", path="/src", target="files", file_glob=None,
+            limit=1, offset=0, output_mode="content", context=0,
+        )
+
     @patch("tools.file_tools._get_file_ops")
     def test_search_exception_returns_error(self, mock_get):
         mock_get.side_effect = RuntimeError("no terminal")
diff --git a/tests/tools/test_file_tools_container_config.py b/tests/tools/test_file_tools_container_config.py
new file mode 100644
index 0000000000..54c3a60919
--- /dev/null
+++ b/tests/tools/test_file_tools_container_config.py
@@ -0,0 +1,65 @@
+"""Tests for docker container_config key propagation in file_tools."""
+
+from unittest.mock import patch, MagicMock
+import tools.file_tools as file_tools
+
+
+def _make_env_config(**overrides):
+    base = {
+        "env_type": "docker",
+        "docker_image": "test-image:latest",
+        "singularity_image": "docker://test",
+        "modal_image": "test",
+        "daytona_image": "test",
+        "cwd": "/workspace",
+        "host_cwd": None,
+        "timeout": 180,
+        "container_cpu": 2,
+        "container_memory": 4096,
+        "container_disk": 20480,
+        "container_persistent": False,
+        "docker_volumes": [],
+        "docker_mount_cwd_to_workspace": True,
+        "docker_forward_env": ["MY_SECRET", "API_KEY"],
+    }
+    base.update(overrides)
+    return base
+
+
+class TestFileToolsContainerConfig:
+    def _run(self, env_config, task_id):
+        captured = {}
+        mock_env = MagicMock()
+
+        def fake_create_env(**kwargs):
+            captured.update(kwargs)
+            return mock_env
+
+        with patch("tools.terminal_tool._get_env_config", return_value=env_config),              patch("tools.terminal_tool._task_env_overrides", {}),              patch("tools.terminal_tool._active_environments", {}),              patch("tools.terminal_tool._creation_locks", {}),              patch("tools.terminal_tool._creation_locks_lock", __import__("threading").Lock()),              patch("tools.terminal_tool._create_environment", side_effect=fake_create_env),              patch("tools.terminal_tool._start_cleanup_thread"),              patch("tools.terminal_tool._check_disk_usage_warning"),              patch("tools.file_tools._file_ops_cache", {}),              patch("tools.file_tools._file_ops_lock", __import__("threading").Lock()):
+            file_tools._get_file_ops(task_id)
+
+        return captured.get("container_config", {})
+
+    def test_docker_mount_cwd_to_workspace_passed(self):
+        """docker_mount_cwd_to_workspace is forwarded to container_config."""
+        cc = self._run(_make_env_config(docker_mount_cwd_to_workspace=True), "t1")
+        assert cc.get("docker_mount_cwd_to_workspace") is True
+
+    def test_docker_forward_env_passed(self):
+        """docker_forward_env is forwarded to container_config."""
+        cc = self._run(_make_env_config(docker_forward_env=["MY_SECRET"]), "t2")
+        assert cc.get("docker_forward_env") == ["MY_SECRET"]
+
+    def test_docker_mount_cwd_defaults_to_false(self):
+        """docker_mount_cwd_to_workspace defaults to False when absent from config."""
+        cfg = _make_env_config()
+        del cfg["docker_mount_cwd_to_workspace"]
+        cc = self._run(cfg, "t3")
+        assert cc.get("docker_mount_cwd_to_workspace") is False
+
+    def test_docker_forward_env_defaults_to_empty_list(self):
+        """docker_forward_env defaults to [] when absent from config."""
+        cfg = _make_env_config()
+        del cfg["docker_forward_env"]
+        cc = self._run(cfg, "t4")
+        assert cc.get("docker_forward_env") == []
diff --git a/tests/tools/test_fuzzy_match.py b/tests/tools/test_fuzzy_match.py
index c1dbc5446a..3f7d315820 100644
--- a/tests/tools/test_fuzzy_match.py
+++ b/tests/tools/test_fuzzy_match.py
@@ -147,4 +147,185 @@ class TestStrategyNameSurfaced:
         new, count, strategy, err = fuzzy_find_and_replace("hello", "xyz", "world")
         assert count == 0
         assert strategy is None
-        assert err is not None
+
+
+class TestEscapeDriftGuard:
+    """Tests for the escape-drift guard that catches bash/JSON serialization
+    artifacts where an apostrophe gets prefixed with a spurious backslash
+    in tool-call transport.
+    """
+
+    def test_drift_blocked_apostrophe(self):
+        """File has ', old_string and new_string both have \\' — classic
+        tool-call drift. Guard must block with a helpful error instead of
+        writing \\' literals into source code."""
+        content = "x = \"hello there\"\n"
+        # Simulate transport-corrupted old_string and new_string where an
+        # apostrophe-like context got prefixed with a backslash. The content
+        # itself has no apostrophe, but both strings do — matching via
+        # whitespace/anchor strategies would otherwise succeed.
+        old_string = "x = \"hello there\" # don\\'t edit\n"
+        new_string = "x = \"hi there\" # don\\'t edit\n"
+        # This particular pair won't match anything, so it exits via
+        # no-match path. Build a case where a non-exact strategy DOES match.
+        content = "line\n    x = 1\nline"
+        old_string = "line\n  x = \\'a\\'\nline"
+        new_string = "line\n  x = \\'b\\'\nline"
+        new, count, strategy, err = fuzzy_find_and_replace(content, old_string, new_string)
+        assert count == 0
+        assert err is not None and "Escape-drift" in err
+        assert "backslash" in err.lower()
+        assert new == content  # file untouched
+
+    def test_drift_blocked_double_quote(self):
+        """Same idea but with \\" drift instead of \\'."""
+        content = 'line\n    x = 1\nline'
+        old_string = 'line\n  x = \\"a\\"\nline'
+        new_string = 'line\n  x = \\"b\\"\nline'
+        new, count, strategy, err = fuzzy_find_and_replace(content, old_string, new_string)
+        assert count == 0
+        assert err is not None and "Escape-drift" in err
+
+    def test_drift_allowed_when_file_genuinely_has_backslash_escapes(self):
+        """If the file already contains \\' (e.g. inside an existing escaped
+        string), the model is legitimately preserving it. Guard must NOT
+        fire."""
+        content = "line\n  x = \\'a\\'\nline"
+        old_string = "line\n  x = \\'a\\'\nline"
+        new_string = "line\n  x = \\'b\\'\nline"
+        new, count, strategy, err = fuzzy_find_and_replace(content, old_string, new_string)
+        assert err is None
+        assert count == 1
+        assert "\\'b\\'" in new
+
+    def test_drift_allowed_on_exact_match(self):
+        """Exact matches bypass the drift guard entirely — if the file
+        really contains the exact bytes old_string specified, it's not
+        drift."""
+        content = "hello \\'world\\'"
+        new, count, strategy, err = fuzzy_find_and_replace(
+            content, "hello \\'world\\'", "hello \\'there\\'"
+        )
+        assert err is None
+        assert count == 1
+        assert strategy == "exact"
+
+    def test_drift_allowed_when_adding_escaped_strings(self):
+        """Model is adding new content with \\' that wasn't in the original.
+        old_string has no \\', so guard doesn't fire."""
+        content = "line1\nline2\nline3"
+        old_string = "line1\nline2\nline3"
+        new_string = "line1\nprint(\\'added\\')\nline2\nline3"
+        new, count, strategy, err = fuzzy_find_and_replace(content, old_string, new_string)
+        assert err is None
+        assert count == 1
+        assert "\\'added\\'" in new
+
+    def test_no_drift_check_when_new_string_lacks_suspect_chars(self):
+        """Fast-path: if new_string has no \\' or \\", guard must not
+        fire even on fuzzy match."""
+        content = "def foo():\n    pass"  # extra space ignored by line_trimmed
+        old_string = "def foo():\n  pass"
+        new_string = "def bar():\n  return 1"
+        new, count, strategy, err = fuzzy_find_and_replace(content, old_string, new_string)
+        assert err is None
+        assert count == 1
+
+
+class TestFindClosestLines:
+    def setup_method(self):
+        from tools.fuzzy_match import find_closest_lines
+        self.find_closest_lines = find_closest_lines
+
+    def test_finds_similar_line(self):
+        content = "def foo():\n    pass\ndef bar():\n    return 1\n"
+        result = self.find_closest_lines("def baz():", content)
+        assert "def foo" in result or "def bar" in result
+
+    def test_returns_empty_for_no_match(self):
+        content = "completely different content here"
+        result = self.find_closest_lines("xyzzy_no_match_possible_!!!", content)
+        assert result == ""
+
+    def test_returns_empty_for_empty_inputs(self):
+        assert self.find_closest_lines("", "some content") == ""
+        assert self.find_closest_lines("old string", "") == ""
+
+    def test_includes_context_lines(self):
+        content = "line1\nline2\ndef target():\n    pass\nline5\n"
+        result = self.find_closest_lines("def target():", content)
+        assert "target" in result
+
+    def test_includes_line_numbers(self):
+        content = "line1\nline2\ndef foo():\n    pass\n"
+        result = self.find_closest_lines("def foo():", content)
+        # Should include line numbers in format "N| content"
+        assert "|" in result
+
+
+class TestFormatNoMatchHint:
+    """Gating tests for format_no_match_hint — the shared helper that decides
+    whether a 'Did you mean?' snippet should be appended to an error.
+    """
+
+    def setup_method(self):
+        from tools.fuzzy_match import format_no_match_hint
+        self.fmt = format_no_match_hint
+
+    def test_fires_on_could_not_find_with_match(self):
+        """Classic no-match: similar content exists → hint fires."""
+        content = "def foo():\n    pass\ndef bar():\n    pass\n"
+        result = self.fmt(
+            "Could not find a match for old_string in the file",
+            0, "def baz():", content,
+        )
+        assert "Did you mean" in result
+        assert "foo" in result or "bar" in result
+
+    def test_silent_on_ambiguous_match_error(self):
+        """'Found N matches' is not a missing-match failure — no hint."""
+        content = "aaa bbb aaa\n"
+        result = self.fmt(
+            "Found 2 matches for old_string. Provide more context to make it unique, or use replace_all=True.",
+            0, "aaa", content,
+        )
+        assert result == ""
+
+    def test_silent_on_escape_drift_error(self):
+        """Escape-drift errors are intentional blocks — hint would mislead."""
+        content = "x = 1\n"
+        result = self.fmt(
+            "Escape-drift detected: old_string and new_string contain the literal sequence '\\\\''...",
+            0, "x = \\'1\\'", content,
+        )
+        assert result == ""
+
+    def test_silent_on_identical_strings(self):
+        """old_string == new_string — hint irrelevant."""
+        result = self.fmt(
+            "old_string and new_string are identical",
+            0, "foo", "foo bar\n",
+        )
+        assert result == ""
+
+    def test_silent_when_match_count_nonzero(self):
+        """If match succeeded, we shouldn't be in the error path — defense in depth."""
+        result = self.fmt(
+            "Could not find a match for old_string in the file",
+            1, "foo", "foo bar\n",
+        )
+        assert result == ""
+
+    def test_silent_on_none_error(self):
+        """No error at all — no hint."""
+        result = self.fmt(None, 0, "foo", "bar\n")
+        assert result == ""
+
+    def test_silent_when_no_similar_content(self):
+        """Even for a valid no-match error, skip hint when nothing similar exists."""
+        result = self.fmt(
+            "Could not find a match for old_string in the file",
+            0, "totally_unique_xyzzy_qux", "abc\nxyz\n",
+        )
+        assert result == ""
+
diff --git a/tests/tools/test_image_generation.py b/tests/tools/test_image_generation.py
index 4cde05fb4e..b24e6bc1fc 100644
--- a/tests/tools/test_image_generation.py
+++ b/tests/tools/test_image_generation.py
@@ -136,6 +136,49 @@ class TestGptLiteralFamily:
         assert p["image_size"] == "1024x1536"
 
 
+class TestGptImage2Presets:
+    """GPT Image 2 uses preset enum sizes (not literal strings like 1.5).
+    Mapped to 4:3 variants so we stay above the 655,360 min-pixel floor
+    (16:9 presets at 1024x576 = 589,824 would be rejected)."""
+
+    def test_gpt2_landscape_uses_4_3_preset(self, image_tool):
+        p = image_tool._build_fal_payload("fal-ai/gpt-image-2", "hello", "landscape")
+        assert p["image_size"] == "landscape_4_3"
+
+    def test_gpt2_square_uses_square_hd(self, image_tool):
+        p = image_tool._build_fal_payload("fal-ai/gpt-image-2", "hello", "square")
+        assert p["image_size"] == "square_hd"
+
+    def test_gpt2_portrait_uses_4_3_preset(self, image_tool):
+        p = image_tool._build_fal_payload("fal-ai/gpt-image-2", "hello", "portrait")
+        assert p["image_size"] == "portrait_4_3"
+
+    def test_gpt2_quality_pinned_to_medium(self, image_tool):
+        p = image_tool._build_fal_payload("fal-ai/gpt-image-2", "hi", "square")
+        assert p["quality"] == "medium"
+
+    def test_gpt2_strips_byok_and_unsupported_overrides(self, image_tool):
+        """openai_api_key (BYOK) is deliberately not in supports — all users
+        route through shared FAL billing. guidance_scale/num_inference_steps
+        aren't in the model's API surface either."""
+        p = image_tool._build_fal_payload(
+            "fal-ai/gpt-image-2", "hi", "square",
+            overrides={
+                "openai_api_key": "sk-...",
+                "guidance_scale": 7.5,
+                "num_inference_steps": 50,
+            },
+        )
+        assert "openai_api_key" not in p
+        assert "guidance_scale" not in p
+        assert "num_inference_steps" not in p
+
+    def test_gpt2_strips_seed_even_if_passed(self, image_tool):
+        # seed isn't in the GPT Image 2 API surface either.
+        p = image_tool._build_fal_payload("fal-ai/gpt-image-2", "hi", "square", seed=42)
+        assert "seed" not in p
+
+
 # ---------------------------------------------------------------------------
 # Supports whitelist — the main safety property
 # ---------------------------------------------------------------------------
@@ -231,10 +274,11 @@ class TestGptQualityPinnedToMedium:
         assert p["quality"] == "medium"
 
     def test_non_gpt_model_never_gets_quality(self, image_tool):
-        """quality is only meaningful for gpt-image-1.5 — other models should
-        never have it in their payload."""
+        """quality is only meaningful for GPT-Image models (1.5, 2) — other
+        models should never have it in their payload."""
+        gpt_models = {"fal-ai/gpt-image-1.5", "fal-ai/gpt-image-2"}
         for mid in image_tool.FAL_MODELS:
-            if mid == "fal-ai/gpt-image-1.5":
+            if mid in gpt_models:
                 continue
             p = image_tool._build_fal_payload(mid, "hi", "square")
             assert "quality" not in p, f"{mid} unexpectedly has 'quality' in payload"
diff --git a/tests/tools/test_image_generation_env.py b/tests/tools/test_image_generation_env.py
new file mode 100644
index 0000000000..fc4e655334
--- /dev/null
+++ b/tests/tools/test_image_generation_env.py
@@ -0,0 +1,39 @@
+"""FAL_KEY env var normalization (whitespace-only treated as unset)."""
+
+
+def test_fal_key_whitespace_is_unset(monkeypatch):
+    # Whitespace-only FAL_KEY must NOT register as configured, and the managed
+    # gateway fallback must be disabled for this assertion to be meaningful.
+    monkeypatch.setenv("FAL_KEY", "   ")
+
+    from tools import image_generation_tool
+
+    monkeypatch.setattr(
+        image_generation_tool, "_resolve_managed_fal_gateway", lambda: None
+    )
+
+    assert image_generation_tool.check_fal_api_key() is False
+
+
+def test_fal_key_valid(monkeypatch):
+    monkeypatch.setenv("FAL_KEY", "sk-test")
+
+    from tools import image_generation_tool
+
+    monkeypatch.setattr(
+        image_generation_tool, "_resolve_managed_fal_gateway", lambda: None
+    )
+
+    assert image_generation_tool.check_fal_api_key() is True
+
+
+def test_fal_key_empty_is_unset(monkeypatch):
+    monkeypatch.setenv("FAL_KEY", "")
+
+    from tools import image_generation_tool
+
+    monkeypatch.setattr(
+        image_generation_tool, "_resolve_managed_fal_gateway", lambda: None
+    )
+
+    assert image_generation_tool.check_fal_api_key() is False
diff --git a/tests/tools/test_local_background_child_hang.py b/tests/tools/test_local_background_child_hang.py
new file mode 100644
index 0000000000..a8cc0ba102
--- /dev/null
+++ b/tests/tools/test_local_background_child_hang.py
@@ -0,0 +1,154 @@
+"""Regression tests for issue #8340.
+
+When a user command backgrounds a child process (``cmd &``, ``setsid cmd &
+disown``, etc.), the backgrounded grandchild inherits the write-end of our
+stdout pipe via fork().  Before the fix, the drain thread's blocking
+``for line in proc.stdout`` would never see EOF until that grandchild
+closed the pipe — causing the terminal tool to hang for the full lifetime
+of the backgrounded service (indefinitely for a uvicorn server).
+
+The fix switches ``_drain()`` to select()-based non-blocking reads and
+stops draining shortly after bash exits even if the pipe hasn't EOF'd.
+"""
+import json
+import subprocess
+import time
+
+import pytest
+
+from tools.environments.local import LocalEnvironment
+
+
+def _pkill(pattern: str) -> None:
+    subprocess.run(f"pkill -9 -f {pattern!r} 2>/dev/null", shell=True)
+
+
+@pytest.fixture
+def local_env():
+    env = LocalEnvironment(cwd="/tmp")
+    try:
+        yield env
+    finally:
+        env.cleanup()
+
+
+class TestBackgroundChildDoesNotHang:
+    """Regression guard for issue #8340."""
+
+    def test_plain_background_returns_promptly(self, local_env):
+        """``cmd &`` with no output redirection must not hang on pipe inherit."""
+        marker = "hermes_8340_plain_bg"
+        cmd = f'python3 -c "import time; time.sleep(60)" & echo {marker}'
+        try:
+            t0 = time.monotonic()
+            result = local_env.execute(cmd, timeout=15)
+            elapsed = time.monotonic() - t0
+
+            assert elapsed < 4.0, (
+                f"terminal_tool hung for {elapsed:.1f}s — drain thread "
+                f"is still blocking on backgrounded child's inherited pipe fd"
+            )
+            assert result["returncode"] == 0
+            assert marker in result["output"]
+        finally:
+            _pkill("time.sleep(60)")
+
+    def test_setsid_disown_pattern_returns_promptly(self, local_env):
+        """The exact pattern from the issue: setsid ... & disown."""
+        cmd = (
+            'setsid python3 -c "import time; time.sleep(60)" '
+            '> /dev/null 2>&1 < /dev/null & disown; echo started'
+        )
+        try:
+            t0 = time.monotonic()
+            result = local_env.execute(cmd, timeout=15)
+            elapsed = time.monotonic() - t0
+
+            assert elapsed < 4.0, f"setsid+disown path hung for {elapsed:.1f}s"
+            assert result["returncode"] == 0
+            assert "started" in result["output"]
+        finally:
+            _pkill("time.sleep(60)")
+
+    def test_foreground_streaming_output_still_captured(self, local_env):
+        """Sanity: incremental output over time must still be captured in full."""
+        cmd = 'for i in 1 2 3; do echo "tick $i"; sleep 0.2; done; echo done'
+        t0 = time.monotonic()
+        result = local_env.execute(cmd, timeout=10)
+        elapsed = time.monotonic() - t0
+
+        # Loop body sleeps ~0.6s total — elapsed should be close to that.
+        assert 0.5 < elapsed < 3.0
+        assert result["returncode"] == 0
+        for expected in ("tick 1", "tick 2", "tick 3", "done"):
+            assert expected in result["output"], f"missing {expected!r}"
+
+    def test_high_volume_output_complete(self, local_env):
+        """Sanity: select-based drain must not drop lines under load."""
+        result = local_env.execute("seq 1 3000", timeout=10)
+        lines = result["output"].strip().split("\n")
+        assert result["returncode"] == 0
+        assert len(lines) == 3000
+        assert lines[0] == "1"
+        assert lines[-1] == "3000"
+
+    def test_timeout_path_still_works(self, local_env):
+        """Foreground command exceeding timeout must still be killed."""
+        t0 = time.monotonic()
+        result = local_env.execute("sleep 30", timeout=2)
+        elapsed = time.monotonic() - t0
+
+        assert elapsed < 4.0
+        assert result["returncode"] == 124
+        assert "timed out" in result["output"].lower()
+
+    def test_utf8_output_decoded_correctly(self, local_env):
+        """Multibyte UTF-8 chunks must decode cleanly under select-based reads."""
+        result = local_env.execute("echo 日本語 café résumé", timeout=5)
+        assert result["returncode"] == 0
+        assert "日本語" in result["output"]
+        assert "café" in result["output"]
+        assert "résumé" in result["output"]
+
+    def test_utf8_multibyte_across_read_boundary(self, local_env):
+        """Multibyte UTF-8 characters straddling a 4096-byte ``os.read()`` boundary
+        must be decoded correctly via the incremental decoder — not lost to a
+        ``UnicodeDecodeError`` fallback.  Regression for a bug in the first draft
+        of the fix where a strict ``bytes.decode('utf-8')`` on each raw chunk
+        wiped the entire buffer as soon as any chunk split a multi-byte char.
+        """
+        # 10000 "日" chars = 30000 bytes — guaranteed to cross multiple 4096
+        # read boundaries, and most boundaries will land in the middle of the
+        # 3-byte UTF-8 encoding of U+65E5.
+        cmd = (
+            'python3 -c \'import sys; '
+            'sys.stdout.buffer.write(chr(0x65e5).encode("utf-8") * 10000); '
+            'sys.stdout.buffer.write(b"\\n")\''
+        )
+        result = local_env.execute(cmd, timeout=10)
+        assert result["returncode"] == 0
+        # All 10000 characters must survive the round-trip
+        assert result["output"].count("\u65e5") == 10000, (
+            f"lost multibyte chars across read boundaries: got "
+            f"{result['output'].count(chr(0x65e5))} / 10000"
+        )
+        # And the "[binary output detected ...]" fallback must NOT fire
+        assert "binary output detected" not in result["output"]
+
+    def test_invalid_utf8_uses_replacement_not_fallback(self, local_env):
+        """Truly invalid byte sequences must be substituted with U+FFFD (matching
+        the pre-fix ``errors='replace'`` behaviour of the old ``TextIOWrapper``
+        drain), not clobber the entire buffer with a fallback placeholder.
+        """
+        # Write a deliberate invalid UTF-8 lead byte sandwiched between valid ASCII
+        cmd = (
+            'python3 -c \'import sys; '
+            'sys.stdout.buffer.write(b"before "); '
+            'sys.stdout.buffer.write(b"\\xff\\xfe"); '
+            'sys.stdout.buffer.write(b" after\\n")\''
+        )
+        result = local_env.execute(cmd, timeout=5)
+        assert result["returncode"] == 0
+        assert "before" in result["output"]
+        assert "after" in result["output"]
+        assert "binary output detected" not in result["output"]
diff --git a/tests/tools/test_local_shell_init.py b/tests/tools/test_local_shell_init.py
new file mode 100644
index 0000000000..96e26e7357
--- /dev/null
+++ b/tests/tools/test_local_shell_init.py
@@ -0,0 +1,162 @@
+"""Tests for terminal.shell_init_files / terminal.auto_source_bashrc.
+
+A bash ``-l -c`` invocation does NOT source ``~/.bashrc``, so tools that
+register themselves there (nvm, asdf, pyenv) stay invisible to the
+environment snapshot built by ``LocalEnvironment.init_session``.  These
+tests verify the config-driven prelude that fixes that.
+"""
+
+import os
+from unittest.mock import patch
+
+import pytest
+
+from tools.environments.local import (
+    LocalEnvironment,
+    _prepend_shell_init,
+    _read_terminal_shell_init_config,
+    _resolve_shell_init_files,
+)
+
+
+class TestResolveShellInitFiles:
+    def test_auto_sources_bashrc_when_present(self, tmp_path, monkeypatch):
+        bashrc = tmp_path / ".bashrc"
+        bashrc.write_text('export MARKER=seen\n')
+        monkeypatch.setenv("HOME", str(tmp_path))
+
+        # Default config: auto_source_bashrc on, no explicit list.
+        with patch(
+            "tools.environments.local._read_terminal_shell_init_config",
+            return_value=([], True),
+        ):
+            resolved = _resolve_shell_init_files()
+
+        assert resolved == [str(bashrc)]
+
+    def test_skips_bashrc_when_missing(self, tmp_path, monkeypatch):
+        # No bashrc written.
+        monkeypatch.setenv("HOME", str(tmp_path))
+
+        with patch(
+            "tools.environments.local._read_terminal_shell_init_config",
+            return_value=([], True),
+        ):
+            resolved = _resolve_shell_init_files()
+
+        assert resolved == []
+
+    def test_auto_source_bashrc_off_suppresses_default(self, tmp_path, monkeypatch):
+        bashrc = tmp_path / ".bashrc"
+        bashrc.write_text('export MARKER=seen\n')
+        monkeypatch.setenv("HOME", str(tmp_path))
+
+        with patch(
+            "tools.environments.local._read_terminal_shell_init_config",
+            return_value=([], False),
+        ):
+            resolved = _resolve_shell_init_files()
+
+        assert resolved == []
+
+    def test_explicit_list_wins_over_auto(self, tmp_path, monkeypatch):
+        bashrc = tmp_path / ".bashrc"
+        bashrc.write_text('export FROM_BASHRC=1\n')
+        custom = tmp_path / "custom.sh"
+        custom.write_text('export FROM_CUSTOM=1\n')
+        monkeypatch.setenv("HOME", str(tmp_path))
+
+        # auto_source_bashrc stays True but the explicit list takes precedence.
+        with patch(
+            "tools.environments.local._read_terminal_shell_init_config",
+            return_value=([str(custom)], True),
+        ):
+            resolved = _resolve_shell_init_files()
+
+        assert resolved == [str(custom)]
+        assert str(bashrc) not in resolved
+
+    def test_expands_home_and_env_vars(self, tmp_path, monkeypatch):
+        target = tmp_path / "rc" / "custom.sh"
+        target.parent.mkdir()
+        target.write_text('export A=1\n')
+        monkeypatch.setenv("HOME", str(tmp_path))
+        monkeypatch.setenv("CUSTOM_RC_DIR", str(tmp_path / "rc"))
+
+        with patch(
+            "tools.environments.local._read_terminal_shell_init_config",
+            return_value=(["~/rc/custom.sh"], False),
+        ):
+            resolved_home = _resolve_shell_init_files()
+
+        with patch(
+            "tools.environments.local._read_terminal_shell_init_config",
+            return_value=(["${CUSTOM_RC_DIR}/custom.sh"], False),
+        ):
+            resolved_var = _resolve_shell_init_files()
+
+        assert resolved_home == [str(target)]
+        assert resolved_var == [str(target)]
+
+    def test_missing_explicit_files_are_skipped_silently(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HOME", str(tmp_path))
+        with patch(
+            "tools.environments.local._read_terminal_shell_init_config",
+            return_value=([str(tmp_path / "does-not-exist.sh")], False),
+        ):
+            resolved = _resolve_shell_init_files()
+
+        assert resolved == []
+
+
+class TestPrependShellInit:
+    def test_empty_list_returns_command_unchanged(self):
+        assert _prepend_shell_init("echo hi", []) == "echo hi"
+
+    def test_prepends_guarded_source_lines(self):
+        wrapped = _prepend_shell_init("echo hi", ["/tmp/a.sh", "/tmp/b.sh"])
+        assert "echo hi" in wrapped
+        # Each file is sourced through a guarded [ -r … ] && . '…' || true
+        # pattern so a missing/broken rc can't abort the bootstrap.
+        assert "/tmp/a.sh" in wrapped
+        assert "/tmp/b.sh" in wrapped
+        assert "|| true" in wrapped
+        assert "set +e" in wrapped
+
+    def test_escapes_single_quotes(self):
+        wrapped = _prepend_shell_init("echo hi", ["/tmp/o'malley.sh"])
+        # The path must survive as the shell receives it; embedded single
+        # quote is escaped as '\'' rather than breaking the outer quoting.
+        assert "o'\\''malley" in wrapped
+
+
+@pytest.mark.skipif(
+    os.environ.get("CI") == "true" and not os.path.isfile("/bin/bash"),
+    reason="Requires bash; CI sandbox may strip it.",
+)
+class TestSnapshotEndToEnd:
+    """Spin up a real LocalEnvironment and confirm the snapshot sources
+    extra init files."""
+
+    def test_snapshot_picks_up_init_file_exports(self, tmp_path, monkeypatch):
+        init_file = tmp_path / "custom-init.sh"
+        init_file.write_text(
+            'export HERMES_SHELL_INIT_PROBE="probe-ok"\n'
+            'export PATH="/opt/shell-init-probe/bin:$PATH"\n'
+        )
+
+        with patch(
+            "tools.environments.local._read_terminal_shell_init_config",
+            return_value=([str(init_file)], False),
+        ):
+            env = LocalEnvironment(cwd=str(tmp_path), timeout=15)
+            try:
+                result = env.execute(
+                    'echo "PROBE=$HERMES_SHELL_INIT_PROBE"; echo "PATH=$PATH"'
+                )
+            finally:
+                env.cleanup()
+
+        output = result.get("output", "")
+        assert "PROBE=probe-ok" in output
+        assert "/opt/shell-init-probe/bin" in output
diff --git a/tests/tools/test_mcp_circuit_breaker.py b/tests/tools/test_mcp_circuit_breaker.py
new file mode 100644
index 0000000000..0173fa52af
--- /dev/null
+++ b/tests/tools/test_mcp_circuit_breaker.py
@@ -0,0 +1,252 @@
+"""Tests for MCP tool-handler circuit-breaker recovery.
+
+The circuit breaker in ``tools/mcp_tool.py`` is intended to short-circuit
+calls to an MCP server that has failed ``_CIRCUIT_BREAKER_THRESHOLD``
+consecutive times, then *transition back to a usable state* once the
+server has had time to recover (or an explicit reconnect succeeds).
+
+The original implementation only had two states — closed and open — with
+no mechanism to transition back to closed, so a tripped breaker stayed
+tripped for the lifetime of the process. These tests lock in the
+half-open / cooldown / reconnect-resets-breaker behavior that fixes
+that.
+"""
+import json
+from unittest.mock import MagicMock
+
+import pytest
+
+
+pytest.importorskip("mcp.client.auth.oauth2")
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _install_stub_server(mcp_tool_module, name: str, call_tool_impl):
+    """Install a fake MCP server in the module's registry.
+
+    ``call_tool_impl`` is an async function stored at ``session.call_tool``
+    (it's what the tool handler invokes).
+    """
+    server = MagicMock()
+    server.name = name
+    session = MagicMock()
+    session.call_tool = call_tool_impl
+    server.session = session
+    server._reconnect_event = MagicMock()
+    server._ready = MagicMock()
+    server._ready.is_set.return_value = True
+
+    mcp_tool_module._servers[name] = server
+    mcp_tool_module._server_error_counts.pop(name, None)
+    if hasattr(mcp_tool_module, "_server_breaker_opened_at"):
+        mcp_tool_module._server_breaker_opened_at.pop(name, None)
+    return server
+
+
+def _cleanup(mcp_tool_module, name: str) -> None:
+    mcp_tool_module._servers.pop(name, None)
+    mcp_tool_module._server_error_counts.pop(name, None)
+    if hasattr(mcp_tool_module, "_server_breaker_opened_at"):
+        mcp_tool_module._server_breaker_opened_at.pop(name, None)
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+def test_circuit_breaker_half_opens_after_cooldown(monkeypatch, tmp_path):
+    """After a tripped breaker's cooldown elapses, the *next* call must
+    actually execute against the session (half-open probe). When the
+    probe succeeds, the breaker resets to fully closed.
+    """
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+
+    from tools import mcp_tool
+    from tools.mcp_tool import _make_tool_handler
+
+    call_count = {"n": 0}
+
+    async def _call_tool_success(*a, **kw):
+        call_count["n"] += 1
+        result = MagicMock()
+        result.isError = False
+        block = MagicMock()
+        block.text = "ok"
+        result.content = [block]
+        result.structuredContent = None
+        return result
+
+    _install_stub_server(mcp_tool, "srv", _call_tool_success)
+    mcp_tool._ensure_mcp_loop()
+
+    try:
+        # Trip the breaker by setting the count at/above threshold and
+        # stamping the open-time to "now".
+        mcp_tool._server_error_counts["srv"] = mcp_tool._CIRCUIT_BREAKER_THRESHOLD
+        fake_now = [1000.0]
+
+        def _fake_monotonic():
+            return fake_now[0]
+
+        monkeypatch.setattr(mcp_tool.time, "monotonic", _fake_monotonic)
+        # The breaker-open timestamp dict is introduced by the fix; on
+        # a pre-fix build it won't exist, which will cause the test to
+        # fail at the .get() inside the gate (correct — the fix is
+        # required for this state to be tracked at all).
+        if hasattr(mcp_tool, "_server_breaker_opened_at"):
+            mcp_tool._server_breaker_opened_at["srv"] = fake_now[0]
+        cooldown = getattr(mcp_tool, "_CIRCUIT_BREAKER_COOLDOWN_SEC", 60.0)
+
+        handler = _make_tool_handler("srv", "tool1", 10.0)
+
+        # Before cooldown: must short-circuit (no session call).
+        result = handler({})
+        parsed = json.loads(result)
+        assert "error" in parsed, parsed
+        assert "unreachable" in parsed["error"].lower()
+        assert call_count["n"] == 0, (
+            "breaker should short-circuit before cooldown elapses"
+        )
+
+        # Advance past cooldown → next call is a half-open probe that
+        # actually hits the session.
+        fake_now[0] += cooldown + 1.0
+
+        result = handler({})
+        parsed = json.loads(result)
+        assert parsed.get("result") == "ok", parsed
+        assert call_count["n"] == 1, "half-open probe should invoke session"
+
+        # On probe success the breaker must close (count reset to 0).
+        assert mcp_tool._server_error_counts.get("srv", 0) == 0
+    finally:
+        _cleanup(mcp_tool, "srv")
+
+
+def test_circuit_breaker_reopens_on_probe_failure(monkeypatch, tmp_path):
+    """If the half-open probe fails, the breaker must re-arm the
+    cooldown (not let every subsequent call through).
+    """
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+
+    from tools import mcp_tool
+    from tools.mcp_tool import _make_tool_handler
+
+    call_count = {"n": 0}
+
+    async def _call_tool_fails(*a, **kw):
+        call_count["n"] += 1
+        raise RuntimeError("still broken")
+
+    _install_stub_server(mcp_tool, "srv", _call_tool_fails)
+    mcp_tool._ensure_mcp_loop()
+
+    try:
+        mcp_tool._server_error_counts["srv"] = mcp_tool._CIRCUIT_BREAKER_THRESHOLD
+        fake_now = [1000.0]
+
+        def _fake_monotonic():
+            return fake_now[0]
+
+        monkeypatch.setattr(mcp_tool.time, "monotonic", _fake_monotonic)
+        if hasattr(mcp_tool, "_server_breaker_opened_at"):
+            mcp_tool._server_breaker_opened_at["srv"] = fake_now[0]
+        cooldown = getattr(mcp_tool, "_CIRCUIT_BREAKER_COOLDOWN_SEC", 60.0)
+
+        handler = _make_tool_handler("srv", "tool1", 10.0)
+
+        # Advance past cooldown, run probe, expect failure.
+        fake_now[0] += cooldown + 1.0
+        result = handler({})
+        parsed = json.loads(result)
+        assert "error" in parsed
+        assert call_count["n"] == 1, "probe should invoke session once"
+
+        # The probe failure must have re-armed the cooldown — another
+        # immediate call should short-circuit, not invoke session again.
+        result = handler({})
+        parsed = json.loads(result)
+        assert "unreachable" in parsed.get("error", "").lower()
+        assert call_count["n"] == 1, (
+            "breaker should re-open and block further calls after probe failure"
+        )
+    finally:
+        _cleanup(mcp_tool, "srv")
+
+
+def test_circuit_breaker_cleared_on_reconnect(monkeypatch, tmp_path):
+    """When the auth-recovery path successfully reconnects the server,
+    the breaker should be cleared so subsequent calls aren't gated on a
+    stale failure count — even if the post-reconnect retry itself fails.
+
+    This locks in the fix-#2 contract: a successful reconnect is
+    sufficient evidence that the server is viable again. Under the old
+    implementation, reset only happened on retry *success*, so a
+    reconnect+retry-failure left the counter pinned above threshold
+    forever.
+    """
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+
+    from tools import mcp_tool
+    from tools.mcp_oauth_manager import get_manager, reset_manager_for_tests
+    from mcp.client.auth import OAuthFlowError
+
+    reset_manager_for_tests()
+
+    async def _call_tool_unused(*a, **kw):  # pragma: no cover
+        raise AssertionError("session.call_tool should not be reached in this test")
+
+    _install_stub_server(mcp_tool, "srv", _call_tool_unused)
+    mcp_tool._ensure_mcp_loop()
+
+    # Open the breaker well above threshold, with a recent open-time so
+    # it would short-circuit everything without a reset.
+    mcp_tool._server_error_counts["srv"] = mcp_tool._CIRCUIT_BREAKER_THRESHOLD + 2
+    if hasattr(mcp_tool, "_server_breaker_opened_at"):
+        import time as _time
+        mcp_tool._server_breaker_opened_at["srv"] = _time.monotonic()
+
+    # Force handle_401 to claim recovery succeeded.
+    mgr = get_manager()
+
+    async def _h401(name, token=None):
+        return True
+
+    monkeypatch.setattr(mgr, "handle_401", _h401)
+
+    try:
+        # Retry fails *after* the successful reconnect. Under the old
+        # implementation this bumps an already-tripped counter even
+        # higher. Under fix #2 the reset happens on successful
+        # reconnect, and the post-retry bump only raises the fresh
+        # count to 1 — still below threshold.
+        def _retry_call():
+            raise OAuthFlowError("still failing post-reconnect")
+
+        result = mcp_tool._handle_auth_error_and_retry(
+            "srv",
+            OAuthFlowError("initial"),
+            _retry_call,
+            "tools/call test",
+        )
+        # The call as a whole still surfaces needs_reauth because the
+        # retry itself didn't succeed, but the breaker state must
+        # reflect the successful reconnect.
+        assert result is not None
+        parsed = json.loads(result)
+        assert parsed.get("needs_reauth") is True, parsed
+
+        # Post-reconnect count was reset to 0, then the failing retry
+        # bumped it to exactly 1 — well below threshold.
+        count = mcp_tool._server_error_counts.get("srv", 0)
+        assert count < mcp_tool._CIRCUIT_BREAKER_THRESHOLD, (
+            f"successful reconnect must reset the breaker below threshold; "
+            f"got count={count}, threshold={mcp_tool._CIRCUIT_BREAKER_THRESHOLD}"
+        )
+    finally:
+        _cleanup(mcp_tool, "srv")
diff --git a/tests/tools/test_mcp_oauth_bidirectional.py b/tests/tools/test_mcp_oauth_bidirectional.py
new file mode 100644
index 0000000000..37ca409bbc
--- /dev/null
+++ b/tests/tools/test_mcp_oauth_bidirectional.py
@@ -0,0 +1,210 @@
+"""Regression test for the ``HermesMCPOAuthProvider.async_auth_flow`` bidirectional
+generator bridge.
+
+PR #11383 introduced a subclass method that wrapped the SDK's ``auth_flow`` with::
+
+    async for item in super().async_auth_flow(request):
+        yield item
+
+``httpx``'s auth_flow contract is a **bidirectional** async generator — the
+driving code (``httpx._client._send_handling_auth``) does::
+
+    next_request = await auth_flow.asend(response)
+
+to feed HTTP responses back into the generator. The naive ``async for ...``
+wrapper discards those ``.asend(response)`` values and resumes the inner
+generator with ``None``, so the SDK's ``response = yield request`` branch in
+``mcp/client/auth/oauth2.py`` sees ``response = None`` and crashes at
+``if response.status_code == 401`` with
+``AttributeError: 'NoneType' object has no attribute 'status_code'``.
+
+This broke every OAuth MCP server on the first HTTP response regardless of
+status code. The reason nothing caught it in CI: zero existing tests drive
+the full ``.asend()`` round-trip — the integration tests in
+``test_mcp_oauth_integration.py`` stop at ``_initialize()`` and disk-watching.
+
+These tests drive the wrapper through a manual ``.asend()`` sequence to prove
+the bridge forwards responses correctly into the inner SDK generator.
+"""
+from __future__ import annotations
+
+import pytest
+
+
+pytest.importorskip("mcp.client.auth.oauth2", reason="MCP SDK 1.26.0+ required")
+
+
+@pytest.mark.asyncio
+async def test_hermes_provider_forwards_asend_values(tmp_path, monkeypatch):
+    """The wrapper MUST forward ``.asend(response)`` into the inner generator.
+
+    This is the primary regression test. With the broken wrapper, the inner
+    SDK generator sees ``response = None`` and raises ``AttributeError`` at
+    ``oauth2.py:505``. With the correct bridge, a 200 response finishes the
+    flow cleanly (``StopAsyncIteration``).
+    """
+    import httpx
+    from mcp.shared.auth import OAuthClientMetadata, OAuthToken
+    from pydantic import AnyUrl
+
+    from tools.mcp_oauth import HermesTokenStorage
+    from tools.mcp_oauth_manager import _HERMES_PROVIDER_CLS, reset_manager_for_tests
+
+    assert _HERMES_PROVIDER_CLS is not None, "SDK OAuth types must be available"
+
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    reset_manager_for_tests()
+
+    # Seed a valid-looking token so the SDK's _initialize loads something and
+    # can_refresh_token() is True (though we don't exercise refresh here — we
+    # go straight through the 200 path).
+    storage = HermesTokenStorage("srv")
+    await storage.set_tokens(
+        OAuthToken(
+            access_token="old_access",
+            token_type="Bearer",
+            expires_in=3600,
+            refresh_token="old_refresh",
+        )
+    )
+    # Also seed client_info so the SDK doesn't attempt registration.
+    from mcp.shared.auth import OAuthClientInformationFull
+
+    await storage.set_client_info(
+        OAuthClientInformationFull(
+            client_id="test-client",
+            redirect_uris=[AnyUrl("http://127.0.0.1:12345/callback")],
+            grant_types=["authorization_code", "refresh_token"],
+            response_types=["code"],
+            token_endpoint_auth_method="none",
+        )
+    )
+
+    metadata = OAuthClientMetadata(
+        redirect_uris=[AnyUrl("http://127.0.0.1:12345/callback")],
+        client_name="Hermes Agent",
+    )
+    provider = _HERMES_PROVIDER_CLS(
+        server_name="srv",
+        server_url="https://example.com/mcp",
+        client_metadata=metadata,
+        storage=storage,
+        redirect_handler=_noop_redirect,
+        callback_handler=_noop_callback,
+    )
+
+    req = httpx.Request("POST", "https://example.com/mcp")
+    flow = provider.async_auth_flow(req)
+
+    # First anext() drives the wrapper + inner generator until the inner
+    # yields the outbound request (at oauth2.py:503 ``response = yield request``).
+    outbound = await flow.__anext__()
+    assert outbound is not None, "wrapper must yield the outbound request"
+    assert outbound.url.host == "example.com"
+
+    # Simulate httpx returning a 200 response.
+    fake_response = httpx.Response(200, request=outbound)
+
+    # The broken wrapper would crash here with AttributeError: 'NoneType'
+    # object has no attribute 'status_code', because the SDK's inner generator
+    # resumes with response=None and dereferences .status_code at line 505.
+    #
+    # The correct wrapper forwards the response, the SDK takes the non-401
+    # non-403 exit, and the generator ends cleanly (StopAsyncIteration).
+    with pytest.raises(StopAsyncIteration):
+        await flow.asend(fake_response)
+
+
+@pytest.mark.asyncio
+async def test_hermes_provider_forwards_401_triggers_refresh(tmp_path, monkeypatch):
+    """A 401 response MUST flow into the inner generator and trigger the
+    SDK's 401 recovery branch.
+
+    With the broken wrapper, the inner generator sees ``response = None``
+    and the 401 check short-circuits into AttributeError. With the correct
+    bridge, the 401 is routed into the SDK's ``response.status_code == 401``
+    branch which begins discovery (yielding a metadata-discovery request).
+    """
+    import httpx
+    from mcp.shared.auth import OAuthClientInformationFull, OAuthClientMetadata, OAuthToken
+    from pydantic import AnyUrl
+
+    from tools.mcp_oauth import HermesTokenStorage
+    from tools.mcp_oauth_manager import _HERMES_PROVIDER_CLS, reset_manager_for_tests
+
+    assert _HERMES_PROVIDER_CLS is not None
+
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    reset_manager_for_tests()
+
+    storage = HermesTokenStorage("srv")
+    await storage.set_tokens(
+        OAuthToken(
+            access_token="old_access",
+            token_type="Bearer",
+            expires_in=3600,
+            refresh_token="old_refresh",
+        )
+    )
+    await storage.set_client_info(
+        OAuthClientInformationFull(
+            client_id="test-client",
+            redirect_uris=[AnyUrl("http://127.0.0.1:12345/callback")],
+            grant_types=["authorization_code", "refresh_token"],
+            response_types=["code"],
+            token_endpoint_auth_method="none",
+        )
+    )
+
+    metadata = OAuthClientMetadata(
+        redirect_uris=[AnyUrl("http://127.0.0.1:12345/callback")],
+        client_name="Hermes Agent",
+    )
+    provider = _HERMES_PROVIDER_CLS(
+        server_name="srv",
+        server_url="https://example.com/mcp",
+        client_metadata=metadata,
+        storage=storage,
+        redirect_handler=_noop_redirect,
+        callback_handler=_noop_callback,
+    )
+
+    req = httpx.Request("POST", "https://example.com/mcp")
+    flow = provider.async_auth_flow(req)
+
+    # Drive to the first yield (outbound MCP request).
+    outbound = await flow.__anext__()
+
+    # Reply with a 401 including a minimal WWW-Authenticate so the SDK's
+    # 401 branch can parse resource metadata from it. We just need something
+    # the SDK accepts before it tries to yield the metadata-discovery request.
+    fake_401 = httpx.Response(
+        401,
+        request=outbound,
+        headers={"www-authenticate": 'Bearer resource_metadata="https://example.com/.well-known/oauth-protected-resource"'},
+    )
+
+    # The correct bridge forwards the 401 into the SDK; the SDK then yields
+    # its NEXT request (a metadata-discovery GET). We assert we get a request
+    # back — any request. The broken bridge would have crashed with
+    # AttributeError before we ever reach this point.
+    next_request = await flow.asend(fake_401)
+    assert isinstance(next_request, httpx.Request), (
+        "wrapper must forward .asend() so the SDK's 401 branch can yield the "
+        "next request in the discovery flow"
+    )
+
+    # Clean up the generator — we don't need to complete the full dance.
+    await flow.aclose()
+
+
+async def _noop_redirect(_url: str) -> None:
+    """Redirect handler that does nothing (won't be invoked in these tests)."""
+    return None
+
+
+async def _noop_callback() -> tuple[str, str | None]:
+    """Callback handler that won't be invoked in these tests."""
+    raise AssertionError(
+        "callback handler should not be invoked in bidirectional-generator tests"
+    )
diff --git a/tests/tools/test_mcp_oauth_cold_load_expiry.py b/tests/tools/test_mcp_oauth_cold_load_expiry.py
new file mode 100644
index 0000000000..a9fb191066
--- /dev/null
+++ b/tests/tools/test_mcp_oauth_cold_load_expiry.py
@@ -0,0 +1,546 @@
+"""Tests for cold-load token expiry tracking in MCP OAuth.
+
+PR #11383's consolidation fixed external-refresh reloading (mtime disk-watch)
+and 401 dedup, but left two underlying latent bugs in place:
+
+1. ``HermesTokenStorage.set_tokens`` persisted only relative ``expires_in``,
+   which is meaningless after a process restart.
+2. The MCP SDK's ``OAuthContext._initialize`` loads ``current_tokens`` from
+   storage but does NOT call ``update_token_expiry``, so
+   ``token_expiry_time`` stays None. ``is_token_valid()`` then returns True
+   for any loaded token regardless of actual age, and the SDK's preemptive
+   refresh branch at ``oauth2.py:491`` is never taken.
+
+Consequence: a token that expired while the process was down ships to the
+server with a stale Bearer header. The server's response is provider-specific
+— some return HTTP 401 (caught by the consolidation's 401 handler, which
+surfaces a ``needs_reauth`` error), others return HTTP 200 with an
+application-level auth failure in the body (e.g. BetterStack's "No teams
+found. Please check your authentication."), which the consolidation cannot
+detect.
+
+These tests pin the contract for Fix A:
+- ``set_tokens`` persists an absolute ``expires_at`` wall-clock timestamp.
+- ``get_tokens`` reconstructs ``expires_in`` from ``expires_at - now`` so
+  the SDK's ``update_token_expiry`` computes the correct absolute expiry.
+- ``HermesMCPOAuthProvider._initialize`` seeds ``context.token_expiry_time``
+  after loading, so ``is_token_valid()`` reports True only for tokens that
+  are actually still valid, and the SDK's preemptive refresh fires for
+  expired tokens with a live refresh_token.
+
+Reference: Claude Code solves this via an ``OAuthTokens.expiresAt`` absolute
+timestamp persisted alongside the access_token (``auth.ts:~180``).
+"""
+from __future__ import annotations
+
+import asyncio
+import json
+import time
+
+import pytest
+
+
+pytest.importorskip("mcp.client.auth.oauth2", reason="MCP SDK 1.26.0+ required")
+
+
+# ---------------------------------------------------------------------------
+# HermesTokenStorage — absolute expiry persistence
+# ---------------------------------------------------------------------------
+
+
+class TestSetTokensAbsoluteExpiry:
+    def test_set_tokens_persists_absolute_expires_at(self, tmp_path, monkeypatch):
+        """Tokens round-tripped through disk must encode absolute expiry."""
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        from mcp.shared.auth import OAuthToken
+
+        from tools.mcp_oauth import HermesTokenStorage
+
+        storage = HermesTokenStorage("srv")
+        before = time.time()
+        asyncio.run(
+            storage.set_tokens(
+                OAuthToken(
+                    access_token="a",
+                    token_type="Bearer",
+                    expires_in=3600,
+                    refresh_token="r",
+                )
+            )
+        )
+        after = time.time()
+
+        on_disk = json.loads(
+            (tmp_path / "mcp-tokens" / "srv.json").read_text()
+        )
+        assert "expires_at" in on_disk, (
+            "Fix A: set_tokens must record an absolute expires_at wall-clock "
+            "timestamp alongside the SDK's serialized token so cold-loads "
+            "can compute correct remaining TTL."
+        )
+        assert before + 3600 <= on_disk["expires_at"] <= after + 3600
+
+    def test_set_tokens_without_expires_in_omits_expires_at(
+        self, tmp_path, monkeypatch
+    ):
+        """Tokens without a TTL must not gain a fabricated expires_at."""
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        from mcp.shared.auth import OAuthToken
+
+        from tools.mcp_oauth import HermesTokenStorage
+
+        storage = HermesTokenStorage("srv")
+        asyncio.run(
+            storage.set_tokens(
+                OAuthToken(
+                    access_token="a",
+                    token_type="Bearer",
+                    refresh_token="r",
+                )
+            )
+        )
+
+        on_disk = json.loads(
+            (tmp_path / "mcp-tokens" / "srv.json").read_text()
+        )
+        assert "expires_at" not in on_disk
+
+
+class TestGetTokensReconstructsExpiresIn:
+    def test_get_tokens_uses_expires_at_for_remaining_ttl(
+        self, tmp_path, monkeypatch
+    ):
+        """Round-trip: expires_in on read must reflect time remaining."""
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        from mcp.shared.auth import OAuthToken
+
+        from tools.mcp_oauth import HermesTokenStorage
+
+        storage = HermesTokenStorage("srv")
+        asyncio.run(
+            storage.set_tokens(
+                OAuthToken(
+                    access_token="a",
+                    token_type="Bearer",
+                    expires_in=3600,
+                    refresh_token="r",
+                )
+            )
+        )
+
+        # Wait briefly so the remaining TTL is measurably less than 3600.
+        time.sleep(0.05)
+
+        reloaded = asyncio.run(storage.get_tokens())
+        assert reloaded is not None
+        assert reloaded.expires_in is not None
+        # Should be slightly less than 3600 after the 50ms sleep.
+        assert 3500 < reloaded.expires_in <= 3600
+
+    def test_get_tokens_returns_zero_ttl_for_expired_token(
+        self, tmp_path, monkeypatch
+    ):
+        """An already-expired token reloaded from disk must report expires_in=0."""
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        from tools.mcp_oauth import HermesTokenStorage, _get_token_dir
+
+        token_dir = _get_token_dir()
+        token_dir.mkdir(parents=True, exist_ok=True)
+        # Write an already-expired token file directly.
+        (token_dir / "srv.json").write_text(
+            json.dumps(
+                {
+                    "access_token": "a",
+                    "token_type": "Bearer",
+                    "expires_in": 3600,
+                    "expires_at": time.time() - 60,  # expired 1 min ago
+                    "refresh_token": "r",
+                }
+            )
+        )
+
+        storage = HermesTokenStorage("srv")
+        reloaded = asyncio.run(storage.get_tokens())
+        assert reloaded is not None
+        assert reloaded.expires_in == 0, (
+            "Expired token must reload with expires_in=0 so the SDK's "
+            "is_token_valid() returns False and preemptive refresh fires."
+        )
+
+    def test_get_tokens_legacy_file_without_expires_at_is_loadable(
+        self, tmp_path, monkeypatch
+    ):
+        """Existing on-disk files (pre-Fix-A) must still load without crashing.
+
+        Pre-existing token files have ``expires_in`` but no ``expires_at``.
+        Fix A falls back to the file's mtime as a best-effort wall-clock
+        proxy: a file whose (mtime + expires_in) is in the past clamps
+        expires_in to zero so the SDK refreshes on next request. A fresh
+        legacy-format file (mtime = now) keeps most of its TTL.
+        """
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        from tools.mcp_oauth import HermesTokenStorage, _get_token_dir
+
+        token_dir = _get_token_dir()
+        token_dir.mkdir(parents=True, exist_ok=True)
+        # Legacy-shape file (no expires_at). Make it stale by backdating mtime
+        # well past its nominal expires_in.
+        legacy_path = token_dir / "srv.json"
+        legacy_path.write_text(
+            json.dumps(
+                {
+                    "access_token": "a",
+                    "token_type": "Bearer",
+                    "expires_in": 3600,
+                    "refresh_token": "r",
+                }
+            )
+        )
+        stale_time = time.time() - 7200  # 2hr ago, exceeds 3600s TTL
+        import os
+
+        os.utime(legacy_path, (stale_time, stale_time))
+
+        storage = HermesTokenStorage("srv")
+        reloaded = asyncio.run(storage.get_tokens())
+        assert reloaded is not None
+        assert reloaded.expires_in == 0, (
+            "Legacy file whose mtime + expires_in is in the past must report "
+            "expires_in=0 so the SDK refreshes on next request."
+        )
+
+
+# ---------------------------------------------------------------------------
+# HermesMCPOAuthProvider._initialize — seed token_expiry_time
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_initialize_seeds_token_expiry_time_from_stored_tokens(
+    tmp_path, monkeypatch
+):
+    """Cold-load must populate context.token_expiry_time.
+
+    The SDK's base ``_initialize`` loads current_tokens but doesn't seed
+    token_expiry_time. Our subclass must do it so ``is_token_valid()``
+    reports correctly and the preemptive-refresh path fires when needed.
+    """
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    from mcp.shared.auth import OAuthClientInformationFull, OAuthToken
+    from pydantic import AnyUrl
+
+    from tools.mcp_oauth import HermesTokenStorage
+    from tools.mcp_oauth_manager import _HERMES_PROVIDER_CLS, reset_manager_for_tests
+
+    assert _HERMES_PROVIDER_CLS is not None
+    reset_manager_for_tests()
+
+    storage = HermesTokenStorage("srv")
+    await storage.set_tokens(
+        OAuthToken(
+            access_token="a",
+            token_type="Bearer",
+            expires_in=7200,
+            refresh_token="r",
+        )
+    )
+    await storage.set_client_info(
+        OAuthClientInformationFull(
+            client_id="test-client",
+            redirect_uris=[AnyUrl("http://127.0.0.1:12345/callback")],
+            grant_types=["authorization_code", "refresh_token"],
+            response_types=["code"],
+            token_endpoint_auth_method="none",
+        )
+    )
+
+    from mcp.shared.auth import OAuthClientMetadata
+
+    metadata = OAuthClientMetadata(
+        redirect_uris=[AnyUrl("http://127.0.0.1:12345/callback")],
+        client_name="Hermes Agent",
+    )
+    provider = _HERMES_PROVIDER_CLS(
+        server_name="srv",
+        server_url="https://example.com/mcp",
+        client_metadata=metadata,
+        storage=storage,
+        redirect_handler=_noop_redirect,
+        callback_handler=_noop_callback,
+    )
+
+    await provider._initialize()
+
+    assert provider.context.token_expiry_time is not None, (
+        "Fix A: _initialize must seed context.token_expiry_time so "
+        "is_token_valid() correctly reports expiry on cold-load."
+    )
+    # Should be ~7200s in the future (fresh write).
+    assert provider.context.token_expiry_time > time.time() + 7000
+    assert provider.context.token_expiry_time <= time.time() + 7200 + 5
+
+
+@pytest.mark.asyncio
+async def test_initialize_flags_expired_token_as_invalid(tmp_path, monkeypatch):
+    """After _initialize, an expired-on-disk token must report is_token_valid=False.
+
+    This is the end-to-end assertion: cold-load an expired token, verify the
+    SDK's own ``is_token_valid()`` now returns False (the consequence of
+    seeding token_expiry_time correctly), so the SDK's ``async_auth_flow``
+    will take the ``can_refresh_token()`` branch on the next request and
+    silently refresh instead of sending the stale Bearer.
+    """
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    from mcp.shared.auth import OAuthClientInformationFull, OAuthClientMetadata
+    from pydantic import AnyUrl
+
+    from tools.mcp_oauth import HermesTokenStorage, _get_token_dir
+    from tools.mcp_oauth_manager import _HERMES_PROVIDER_CLS, reset_manager_for_tests
+
+    assert _HERMES_PROVIDER_CLS is not None
+    reset_manager_for_tests()
+
+    # Write an already-expired token directly so we control the wall-clock.
+    token_dir = _get_token_dir()
+    token_dir.mkdir(parents=True, exist_ok=True)
+    (token_dir / "srv.json").write_text(
+        json.dumps(
+            {
+                "access_token": "stale",
+                "token_type": "Bearer",
+                "expires_in": 3600,
+                "expires_at": time.time() - 60,
+                "refresh_token": "fresh",
+            }
+        )
+    )
+
+    storage = HermesTokenStorage("srv")
+    await storage.set_client_info(
+        OAuthClientInformationFull(
+            client_id="test-client",
+            redirect_uris=[AnyUrl("http://127.0.0.1:12345/callback")],
+            grant_types=["authorization_code", "refresh_token"],
+            response_types=["code"],
+            token_endpoint_auth_method="none",
+        )
+    )
+
+    metadata = OAuthClientMetadata(
+        redirect_uris=[AnyUrl("http://127.0.0.1:12345/callback")],
+        client_name="Hermes Agent",
+    )
+    provider = _HERMES_PROVIDER_CLS(
+        server_name="srv",
+        server_url="https://example.com/mcp",
+        client_metadata=metadata,
+        storage=storage,
+        redirect_handler=_noop_redirect,
+        callback_handler=_noop_callback,
+    )
+
+    await provider._initialize()
+
+    assert provider.context.is_token_valid() is False, (
+        "After _initialize with an expired-on-disk token, is_token_valid() "
+        "must return False so the SDK's async_auth_flow takes the "
+        "preemptive refresh path."
+    )
+    assert provider.context.can_refresh_token() is True, (
+        "Refresh should remain possible because refresh_token + client_info "
+        "are both present."
+    )
+
+
+async def _noop_redirect(_url: str) -> None:
+    return None
+
+
+async def _noop_callback() -> tuple[str, str | None]:
+    raise AssertionError("callback handler should not be invoked in these tests")
+
+
+# ---------------------------------------------------------------------------
+# Pre-flight OAuth metadata discovery
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_initialize_prefetches_oauth_metadata_when_missing(
+    tmp_path, monkeypatch
+):
+    """Cold-load must pre-flight PRM + ASM discovery so ``_refresh_token``
+    has the correct ``token_endpoint`` before the first refresh attempt.
+
+    Without this, the SDK's ``_refresh_token`` falls back to
+    ``{server_url}/token`` which is wrong for providers whose AS is at
+    a different origin. BetterStack specifically: MCP at
+    ``mcp.betterstack.com`` but token_endpoint at
+    ``betterstack.com/oauth/token``. Without pre-flight the refresh 404s
+    and we drop into full browser re-auth — visible to the user as an
+    unwanted OAuth browser prompt every time the process restarts.
+    """
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+
+    import httpx
+    from mcp.shared.auth import (
+        OAuthClientInformationFull,
+        OAuthClientMetadata,
+        OAuthToken,
+    )
+    from pydantic import AnyUrl
+
+    from tools.mcp_oauth import HermesTokenStorage
+    from tools.mcp_oauth_manager import _HERMES_PROVIDER_CLS, reset_manager_for_tests
+
+    assert _HERMES_PROVIDER_CLS is not None
+    reset_manager_for_tests()
+
+    storage = HermesTokenStorage("srv")
+    await storage.set_tokens(
+        OAuthToken(
+            access_token="a",
+            token_type="Bearer",
+            expires_in=3600,
+            refresh_token="r",
+        )
+    )
+    await storage.set_client_info(
+        OAuthClientInformationFull(
+            client_id="test-client",
+            redirect_uris=[AnyUrl("http://127.0.0.1:12345/callback")],
+            grant_types=["authorization_code", "refresh_token"],
+            response_types=["code"],
+            token_endpoint_auth_method="none",
+        )
+    )
+
+    # Route the AsyncClient used inside _prefetch_oauth_metadata through a
+    # MockTransport that mimics BetterStack's split-origin discovery:
+    #   PRM at mcp.example.com/.well-known/oauth-protected-resource -> points to auth.example.com
+    #   ASM at auth.example.com/.well-known/oauth-authorization-server -> token_endpoint at auth.example.com/oauth/token
+    def mock_handler(request: httpx.Request) -> httpx.Response:
+        url = str(request.url)
+        if url.endswith("/.well-known/oauth-protected-resource"):
+            return httpx.Response(
+                200,
+                json={
+                    "resource": "https://mcp.example.com",
+                    "authorization_servers": ["https://auth.example.com"],
+                    "scopes_supported": ["read", "write"],
+                    "bearer_methods_supported": ["header"],
+                },
+            )
+        if url.endswith("/.well-known/oauth-authorization-server"):
+            return httpx.Response(
+                200,
+                json={
+                    "issuer": "https://auth.example.com",
+                    "authorization_endpoint": "https://auth.example.com/oauth/authorize",
+                    "token_endpoint": "https://auth.example.com/oauth/token",
+                    "registration_endpoint": "https://auth.example.com/oauth/register",
+                    "response_types_supported": ["code"],
+                    "grant_types_supported": ["authorization_code", "refresh_token"],
+                    "code_challenge_methods_supported": ["S256"],
+                    "token_endpoint_auth_methods_supported": ["none"],
+                    "scopes_supported": ["read", "write"],
+                },
+            )
+        return httpx.Response(404)
+
+    transport = httpx.MockTransport(mock_handler)
+
+    # Patch the AsyncClient constructor used by _prefetch_oauth_metadata so
+    # it uses our mock transport instead of the real network.
+    import httpx as real_httpx
+
+    original_async_client = real_httpx.AsyncClient
+
+    def patched_async_client(*args, **kwargs):
+        kwargs["transport"] = transport
+        return original_async_client(*args, **kwargs)
+
+    monkeypatch.setattr(real_httpx, "AsyncClient", patched_async_client)
+
+    metadata = OAuthClientMetadata(
+        redirect_uris=[AnyUrl("http://127.0.0.1:12345/callback")],
+        client_name="Hermes Agent",
+    )
+    provider = _HERMES_PROVIDER_CLS(
+        server_name="srv",
+        server_url="https://mcp.example.com",
+        client_metadata=metadata,
+        storage=storage,
+        redirect_handler=_noop_redirect,
+        callback_handler=_noop_callback,
+    )
+
+    await provider._initialize()
+
+    assert provider.context.protected_resource_metadata is not None, (
+        "Pre-flight must cache PRM for the SDK to reference later."
+    )
+    assert provider.context.oauth_metadata is not None, (
+        "Pre-flight must cache ASM so _refresh_token builds the correct "
+        "token_endpoint URL."
+    )
+    assert str(provider.context.oauth_metadata.token_endpoint) == (
+        "https://auth.example.com/oauth/token"
+    )
+
+
+@pytest.mark.asyncio
+async def test_initialize_skips_prefetch_when_no_tokens(tmp_path, monkeypatch):
+    """Pre-flight must not run when there are no stored tokens yet.
+
+    Without this guard, every fresh-install ``_initialize`` would do two
+    extra network roundtrips that gain nothing (the SDK's 401-branch
+    discovery will run on the first real request anyway).
+    """
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    import httpx
+    from mcp.shared.auth import OAuthClientMetadata
+    from pydantic import AnyUrl
+
+    from tools.mcp_oauth_manager import _HERMES_PROVIDER_CLS, reset_manager_for_tests
+    from tools.mcp_oauth import HermesTokenStorage
+
+    assert _HERMES_PROVIDER_CLS is not None
+    reset_manager_for_tests()
+
+    calls: list[str] = []
+
+    def mock_handler(request: httpx.Request) -> httpx.Response:
+        calls.append(str(request.url))
+        return httpx.Response(404)
+
+    transport = httpx.MockTransport(mock_handler)
+    import httpx as real_httpx
+
+    original = real_httpx.AsyncClient
+
+    def patched(*args, **kwargs):
+        kwargs["transport"] = transport
+        return original(*args, **kwargs)
+
+    monkeypatch.setattr(real_httpx, "AsyncClient", patched)
+
+    storage = HermesTokenStorage("srv")  # empty — no tokens on disk
+    metadata = OAuthClientMetadata(
+        redirect_uris=[AnyUrl("http://127.0.0.1:12345/callback")],
+        client_name="Hermes Agent",
+    )
+    provider = _HERMES_PROVIDER_CLS(
+        server_name="srv",
+        server_url="https://mcp.example.com",
+        client_metadata=metadata,
+        storage=storage,
+        redirect_handler=_noop_redirect,
+        callback_handler=_noop_callback,
+    )
+
+    await provider._initialize()
+
+    assert calls == [], (
+        f"Pre-flight must not fire when no tokens are stored, but got {calls}"
+    )
diff --git a/tests/tools/test_registry.py b/tests/tools/test_registry.py
index eb895e55a1..d015b48386 100644
--- a/tests/tools/test_registry.py
+++ b/tests/tools/test_registry.py
@@ -291,11 +291,13 @@ class TestCheckFnExceptionHandling:
 class TestBuiltinDiscovery:
     def test_matches_previous_manual_builtin_tool_set(self):
         expected = {
+            "tools.browser_cdp_tool",
             "tools.browser_tool",
             "tools.clarify_tool",
             "tools.code_execution_tool",
             "tools.cronjob_tools",
             "tools.delegate_tool",
+            "tools.discord_tool",
             "tools.feishu_doc_tool",
             "tools.feishu_drive_tool",
             "tools.file_tools",
diff --git a/tests/tools/test_resolve_path.py b/tests/tools/test_resolve_path.py
new file mode 100644
index 0000000000..beea3cc40f
--- /dev/null
+++ b/tests/tools/test_resolve_path.py
@@ -0,0 +1,52 @@
+"""Tests for _resolve_path() — TERMINAL_CWD-aware path resolution in file_tools."""
+
+import os
+from pathlib import Path
+
+import pytest
+
+
+class TestResolvePath:
+    """Verify _resolve_path respects TERMINAL_CWD for worktree isolation."""
+
+    def test_relative_path_uses_terminal_cwd(self, monkeypatch, tmp_path):
+        """Relative paths resolve against TERMINAL_CWD, not process CWD."""
+        monkeypatch.setenv("TERMINAL_CWD", str(tmp_path))
+        from tools.file_tools import _resolve_path
+
+        result = _resolve_path("foo/bar.py")
+        assert result == (tmp_path / "foo" / "bar.py")
+
+    def test_absolute_path_ignores_terminal_cwd(self, monkeypatch, tmp_path):
+        """Absolute paths are unaffected by TERMINAL_CWD."""
+        monkeypatch.setenv("TERMINAL_CWD", str(tmp_path))
+        from tools.file_tools import _resolve_path
+
+        result = _resolve_path("/etc/hosts")
+        assert result == Path("/etc/hosts")
+
+    def test_falls_back_to_cwd_without_terminal_cwd(self, monkeypatch):
+        """Without TERMINAL_CWD, falls back to os.getcwd()."""
+        monkeypatch.delenv("TERMINAL_CWD", raising=False)
+        from tools.file_tools import _resolve_path
+
+        result = _resolve_path("some_file.txt")
+        assert result == Path(os.getcwd()) / "some_file.txt"
+
+    def test_tilde_expansion(self, monkeypatch, tmp_path):
+        """~ is expanded before TERMINAL_CWD join (already absolute)."""
+        monkeypatch.setenv("TERMINAL_CWD", str(tmp_path))
+        from tools.file_tools import _resolve_path
+
+        result = _resolve_path("~/notes.txt")
+        # After expanduser, ~/notes.txt becomes absolute → TERMINAL_CWD ignored
+        assert result == Path.home() / "notes.txt"
+
+    def test_result_is_resolved(self, monkeypatch, tmp_path):
+        """Output path has no '..' components."""
+        monkeypatch.setenv("TERMINAL_CWD", str(tmp_path))
+        from tools.file_tools import _resolve_path
+
+        result = _resolve_path("a/../b/file.txt")
+        assert ".." not in str(result)
+        assert result == (tmp_path / "b" / "file.txt")
diff --git a/tests/tools/test_send_message_tool.py b/tests/tools/test_send_message_tool.py
index f1c4249cac..626179de19 100644
--- a/tests/tools/test_send_message_tool.py
+++ b/tests/tools/test_send_message_tool.py
@@ -100,112 +100,6 @@ class TestSendMessageTool:
         send_mock.assert_not_awaited()
         mirror_mock.assert_not_called()
 
-    def test_cron_different_target_still_sends(self):
-        config, telegram_cfg = _make_config()
-
-        with patch.dict(
-            os.environ,
-            {
-                "HERMES_CRON_AUTO_DELIVER_PLATFORM": "telegram",
-                "HERMES_CRON_AUTO_DELIVER_CHAT_ID": "-1001",
-            },
-            clear=False,
-        ), \
-             patch("gateway.config.load_gateway_config", return_value=config), \
-             patch("tools.interrupt.is_interrupted", return_value=False), \
-             patch("model_tools._run_async", side_effect=_run_async_immediately), \
-             patch("tools.send_message_tool._send_to_platform", new=AsyncMock(return_value={"success": True})) as send_mock, \
-             patch("gateway.mirror.mirror_to_session", return_value=True) as mirror_mock:
-            result = json.loads(
-                send_message_tool(
-                    {
-                        "action": "send",
-                        "target": "telegram:-1002",
-                        "message": "hello",
-                    }
-                )
-            )
-
-        assert result["success"] is True
-        assert result.get("skipped") is not True
-        send_mock.assert_awaited_once_with(
-            Platform.TELEGRAM,
-            telegram_cfg,
-            "-1002",
-            "hello",
-            thread_id=None,
-            media_files=[],
-        )
-        mirror_mock.assert_called_once_with("telegram", "-1002", "hello", source_label="cli", thread_id=None)
-
-    def test_cron_same_chat_different_thread_still_sends(self):
-        config, telegram_cfg = _make_config()
-
-        with patch.dict(
-            os.environ,
-            {
-                "HERMES_CRON_AUTO_DELIVER_PLATFORM": "telegram",
-                "HERMES_CRON_AUTO_DELIVER_CHAT_ID": "-1001",
-                "HERMES_CRON_AUTO_DELIVER_THREAD_ID": "17585",
-            },
-            clear=False,
-        ), \
-             patch("gateway.config.load_gateway_config", return_value=config), \
-             patch("tools.interrupt.is_interrupted", return_value=False), \
-             patch("model_tools._run_async", side_effect=_run_async_immediately), \
-             patch("tools.send_message_tool._send_to_platform", new=AsyncMock(return_value={"success": True})) as send_mock, \
-             patch("gateway.mirror.mirror_to_session", return_value=True) as mirror_mock:
-            result = json.loads(
-                send_message_tool(
-                    {
-                        "action": "send",
-                        "target": "telegram:-1001:99999",
-                        "message": "hello",
-                    }
-                )
-            )
-
-        assert result["success"] is True
-        assert result.get("skipped") is not True
-        send_mock.assert_awaited_once_with(
-            Platform.TELEGRAM,
-            telegram_cfg,
-            "-1001",
-            "hello",
-            thread_id="99999",
-            media_files=[],
-        )
-        mirror_mock.assert_called_once_with("telegram", "-1001", "hello", source_label="cli", thread_id="99999")
-
-    def test_sends_to_explicit_telegram_topic_target(self):
-        config, telegram_cfg = _make_config()
-
-        with patch("gateway.config.load_gateway_config", return_value=config), \
-             patch("tools.interrupt.is_interrupted", return_value=False), \
-             patch("model_tools._run_async", side_effect=_run_async_immediately), \
-             patch("tools.send_message_tool._send_to_platform", new=AsyncMock(return_value={"success": True})) as send_mock, \
-             patch("gateway.mirror.mirror_to_session", return_value=True) as mirror_mock:
-            result = json.loads(
-                send_message_tool(
-                    {
-                        "action": "send",
-                        "target": "telegram:-1001:17585",
-                        "message": "hello",
-                    }
-                )
-            )
-
-        assert result["success"] is True
-        send_mock.assert_awaited_once_with(
-            Platform.TELEGRAM,
-            telegram_cfg,
-            "-1001",
-            "hello",
-            thread_id="17585",
-            media_files=[],
-        )
-        mirror_mock.assert_called_once_with("telegram", "-1001", "hello", source_label="cli", thread_id="17585")
-
     def test_resolved_telegram_topic_name_preserves_thread_id(self):
         config, telegram_cfg = _make_config()
 
@@ -273,41 +167,6 @@ class TestSendMessageTool:
             media_files=[],
         )
 
-    def test_media_only_message_uses_placeholder_for_mirroring(self):
-        config, telegram_cfg = _make_config()
-
-        with patch("gateway.config.load_gateway_config", return_value=config), \
-             patch("tools.interrupt.is_interrupted", return_value=False), \
-             patch("model_tools._run_async", side_effect=_run_async_immediately), \
-             patch("tools.send_message_tool._send_to_platform", new=AsyncMock(return_value={"success": True})) as send_mock, \
-             patch("gateway.mirror.mirror_to_session", return_value=True) as mirror_mock:
-            result = json.loads(
-                send_message_tool(
-                    {
-                        "action": "send",
-                        "target": "telegram:-1001",
-                        "message": "MEDIA:/tmp/example.ogg",
-                    }
-                )
-            )
-
-        assert result["success"] is True
-        send_mock.assert_awaited_once_with(
-            Platform.TELEGRAM,
-            telegram_cfg,
-            "-1001",
-            "",
-            thread_id=None,
-            media_files=[("/tmp/example.ogg", False)],
-        )
-        mirror_mock.assert_called_once_with(
-            "telegram",
-            "-1001",
-            "[Sent audio attachment]",
-            source_label="cli",
-            thread_id=None,
-        )
-
     def test_top_level_send_failure_redacts_query_token(self):
         config, _telegram_cfg = _make_config()
         leaked = "very-secret-query-token-123456"
@@ -911,6 +770,46 @@ class TestParseTargetRefMatrix:
         assert is_explicit is False
 
 
+class TestParseTargetRefE164:
+    """_parse_target_ref accepts E.164 phone numbers for phone-based platforms."""
+
+    def test_signal_e164_preserves_plus_prefix(self):
+        """signal:+E164 is explicit and preserves the leading '+' for signal-cli."""
+        chat_id, thread_id, is_explicit = _parse_target_ref("signal", "+41791234567")
+        assert chat_id == "+41791234567"
+        assert thread_id is None
+        assert is_explicit is True
+
+    def test_sms_e164_is_explicit(self):
+        chat_id, _, is_explicit = _parse_target_ref("sms", "+15551234567")
+        assert chat_id == "+15551234567"
+        assert is_explicit is True
+
+    def test_whatsapp_e164_is_explicit(self):
+        chat_id, _, is_explicit = _parse_target_ref("whatsapp", "+15551234567")
+        assert chat_id == "+15551234567"
+        assert is_explicit is True
+
+    def test_signal_bare_digits_still_work(self):
+        """Bare digit strings continue to match the generic numeric branch."""
+        chat_id, _, is_explicit = _parse_target_ref("signal", "15551234567")
+        assert chat_id == "15551234567"
+        assert is_explicit is True
+
+    def test_signal_invalid_e164_rejected(self):
+        """Too-short, too-long, and non-numeric E.164 strings are not explicit."""
+        assert _parse_target_ref("signal", "+123")[2] is False
+        assert _parse_target_ref("signal", "+1234567890123456")[2] is False
+        assert _parse_target_ref("signal", "+12abc4567890")[2] is False
+        assert _parse_target_ref("signal", "+")[2] is False
+
+    def test_e164_prefix_only_matches_phone_platforms(self):
+        """'+' prefix must NOT be treated as explicit for non-phone platforms."""
+        assert _parse_target_ref("telegram", "+15551234567")[2] is False
+        assert _parse_target_ref("discord", "+15551234567")[2] is False
+        assert _parse_target_ref("matrix", "+15551234567")[2] is False
+
+
 class TestSendDiscordThreadId:
     """_send_discord uses thread_id when provided."""
 
diff --git a/tests/tools/test_session_search.py b/tests/tools/test_session_search.py
index f5d75bb91c..c90023affd 100644
--- a/tests/tools/test_session_search.py
+++ b/tests/tools/test_session_search.py
@@ -1,5 +1,6 @@
 """Tests for tools/session_search_tool.py — helper functions and search dispatcher."""
 
+import asyncio
 import json
 import time
 import pytest
@@ -8,6 +9,7 @@ from tools.session_search_tool import (
     _format_timestamp,
     _format_conversation,
     _truncate_around_matches,
+    _get_session_search_max_concurrency,
     _HIDDEN_SESSION_SOURCES,
     MAX_SESSION_CHARS,
     SESSION_SEARCH_SCHEMA,
@@ -181,6 +183,63 @@ class TestTruncateAroundMatches:
         assert result.lower().count("alpha beta") == 2
 
 
+class TestSessionSearchConcurrency:
+    def test_defaults_to_three(self):
+        assert _get_session_search_max_concurrency() == 3
+
+    def test_reads_and_clamps_configured_value(self, monkeypatch):
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: {"auxiliary": {"session_search": {"max_concurrency": 9}}},
+        )
+        assert _get_session_search_max_concurrency() == 5
+
+    def test_session_search_respects_configured_concurrency_limit(self, monkeypatch):
+        from unittest.mock import MagicMock
+        from tools.session_search_tool import session_search
+
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: {"auxiliary": {"session_search": {"max_concurrency": 1}}},
+        )
+
+        max_seen = {"value": 0}
+        active = {"value": 0}
+
+        async def fake_summarize(_text, _query, _meta):
+            active["value"] += 1
+            max_seen["value"] = max(max_seen["value"], active["value"])
+            await asyncio.sleep(0.01)
+            active["value"] -= 1
+            return "summary"
+
+        monkeypatch.setattr("tools.session_search_tool._summarize_session", fake_summarize)
+        monkeypatch.setattr("model_tools._run_async", lambda coro: asyncio.run(coro))
+
+        mock_db = MagicMock()
+        mock_db.search_messages.return_value = [
+            {"session_id": "s1", "source": "cli", "session_started": 1709500000, "model": "test"},
+            {"session_id": "s2", "source": "cli", "session_started": 1709500001, "model": "test"},
+            {"session_id": "s3", "source": "cli", "session_started": 1709500002, "model": "test"},
+        ]
+        mock_db.get_session.side_effect = lambda sid: {
+            "id": sid,
+            "parent_session_id": None,
+            "source": "cli",
+            "started_at": 1709500000,
+        }
+        mock_db.get_messages_as_conversation.side_effect = lambda sid: [
+            {"role": "user", "content": f"message from {sid}"},
+            {"role": "assistant", "content": "response"},
+        ]
+
+        result = json.loads(session_search(query="message", db=mock_db, limit=3))
+
+        assert result["success"] is True
+        assert result["count"] == 3
+        assert max_seen["value"] == 1
+
+
 # =========================================================================
 # session_search (dispatcher)
 # =========================================================================
diff --git a/tests/tools/test_signal_media.py b/tests/tools/test_signal_media.py
new file mode 100644
index 0000000000..ee483c0819
--- /dev/null
+++ b/tests/tools/test_signal_media.py
@@ -0,0 +1,208 @@
+"""Tests for Signal media delivery in send_message_tool.py."""
+
+import asyncio
+import sys
+from pathlib import Path
+from types import ModuleType
+from unittest.mock import MagicMock, AsyncMock, patch
+
+import pytest
+
+from gateway.config import Platform
+
+
+def _make_httpx_mock():
+    """Create a mock httpx module with proper sync json()."""
+
+    class AsyncBaseTransport:
+        pass
+
+    class Proxy:
+        pass
+
+    class MockResp:
+        status_code = 200
+        def json(self):
+            return {"timestamp": 1234567890}
+        def raise_for_status(self):
+            pass
+
+    class MockClient:
+        async def __aenter__(self):
+            return self
+        async def __aexit__(self, *a):
+            pass
+        async def post(self, *args, **kwargs):
+            return MockResp()
+
+    httpx_mock = ModuleType("httpx")
+    httpx_mock.AsyncClient = lambda timeout=None: MockClient()
+    httpx_mock.AsyncBaseTransport = AsyncBaseTransport  # Needed by Telegram adapter
+    httpx_mock.Proxy = Proxy  # Needed by telegram-bot library
+    return httpx_mock
+
+
+@pytest.fixture(autouse=True)
+def inject_httpx(monkeypatch):
+    """Inject mock httpx into sys.modules before imports."""
+    monkeypatch.setitem(sys.modules, "httpx", _make_httpx_mock())
+
+
+class TestSendSignalMediaFiles:
+    """Test that _send_signal correctly handles media_files parameter."""
+
+    def test_send_signal_basic_text_without_media(self):
+        """Backward compatibility: text-only signal messages work."""
+        from tools.send_message_tool import _send_signal
+
+        extra = {"http_url": "http://localhost:8080", "account": "+155****4567"}
+
+        result = asyncio.run(_send_signal(extra, "+155****9999", "Hello world"))
+
+        assert result["success"] is True
+        assert result["platform"] == "signal"
+        assert result["chat_id"] == "+155****9999"
+
+    def test_send_signal_with_attachments(self, tmp_path):
+        """Signal messages with media_files include attachments in JSON-RPC."""
+        from tools.send_message_tool import _send_signal
+
+        img_path = tmp_path / "test.png"
+        img_path.write_bytes(b"\x89PNG")
+
+        extra = {"http_url": "http://localhost:8080", "account": "+155****4567"}
+
+        result = asyncio.run(
+            _send_signal(extra, "+155****9999", "Check this out", media_files=[(str(img_path), False)])
+        )
+
+        assert result["success"] is True
+        assert result["platform"] == "signal"
+
+    def test_send_signal_with_missing_media_file(self):
+        """Missing media files should generate warnings but not fail."""
+        from tools.send_message_tool import _send_signal
+
+        extra = {"http_url": "http://localhost:8080", "account": "+155****4567"}
+
+        result = asyncio.run(
+            _send_signal(extra, "+155****9999", "File missing?", media_files=[("/nonexistent.png", False)])
+        )
+
+        assert result["success"] is True  # Should succeed despite missing file
+        assert "warnings" in result
+        assert "Some media files were skipped" in str(result["warnings"])
+
+
+class TestSendSignalMediaRestrictions:
+    """Test that the restriction block handles Signal media correctly."""
+
+    def test_signal_allows_text_only_media_via_send_to_platform(self):
+        """Signal should accept text-only media files (no message) via _send_to_platform."""
+        import httpx
+        if not hasattr(httpx, 'Proxy') or not hasattr(httpx, 'URL'):
+            pytest.skip("httpx type annotations incompatible with telegram library")
+        from tools.send_message_tool import _send_to_platform
+
+        mock_result = {"success": True, "platform": "signal"}
+        with patch("tools.send_message_tool._send_signal", new=AsyncMock(return_value=mock_result)):
+            config = MagicMock()
+            config.platforms = {Platform.SIGNAL: MagicMock(enabled=True)}
+            config.get_home_channel.return_value = None
+
+            result = asyncio.run(
+                _send_to_platform(
+                    Platform.SIGNAL,
+                    config,
+                    "+155****9999",
+                    "",  # Empty message - media is the message
+                    media_files=[("/tmp/test.png", False)]
+                )
+            )
+
+            assert result["success"] is True
+
+    def test_non_media_platforms_reject_text_only_media(self):
+        """Slack should reject text-only media (no MESSAGE content)."""
+        import httpx
+        if not hasattr(httpx, 'Proxy') or not hasattr(httpx, 'URL'):
+            pytest.skip("httpx type annotations incompatible with telegram library")
+        from tools.send_message_tool import _send_to_platform
+
+        config = MagicMock()
+        config.platforms = {Platform.SLACK: MagicMock(enabled=True)}
+        config.get_home_channel.return_value = None
+
+        # Empty message with media_files should trigger restriction block
+        result = asyncio.run(
+            _send_to_platform(
+                Platform.SLACK,
+                config,
+                "C012AB3CD",
+                "",  # Empty message - media is the only content
+                media_files=[("/tmp/test.png", False)]
+            )
+        )
+
+        assert "error" in result
+        assert "only supported for" in result["error"]
+
+
+class TestSendSignalMediaWarningMessages:
+    """Test warning messages are updated to include signal."""
+
+    def test_warning_includes_signal_when_media_omitted(self):
+        """Non-media platforms should show a warning mentioning signal in the supported list."""
+        import httpx
+        if not hasattr(httpx, 'Proxy') or not hasattr(httpx, 'URL'):
+            pytest.skip("httpx type annotations incompatible with telegram library")
+        from tools.send_message_tool import _send_to_platform
+
+        config = MagicMock()
+        config.platforms = {Platform.SLACK: MagicMock(enabled=True)}
+        config.get_home_channel.return_value = None
+
+        # Mock _send_slack so it succeeds -> then warning gets attached to result
+        with patch("tools.send_message_tool._send_slack", new=AsyncMock(return_value={"success": True})):
+            result = asyncio.run(
+                _send_to_platform(
+                    Platform.SLACK,
+                    config,
+                    "C012AB3CD",
+                    "Test message with media",
+                    media_files=[("/tmp/test.png", False)]
+                )
+            )
+
+        assert result.get("warnings") is not None
+        # Check that the warning mentions signal as supported
+        found = any("signal" in w.lower() for w in result["warnings"])
+        assert found, f"Expected 'signal' in warnings but got: {result.get('warnings')}"
+
+
+class TestSendSignalGroupChats:
+    """Test that _send_signal handles group chats correctly."""
+
+    def test_send_signal_group_with_attachments(self, tmp_path):
+        """Group chat messages with attachments should use groupId parameter."""
+        from tools.send_message_tool import _send_signal
+
+        img_path = tmp_path / "test_attachment.pdf"
+        img_path.write_bytes(b"%PDF-1.4")
+
+        extra = {"http_url": "http://localhost:8080", "account": "+155****4567"}
+
+        result = asyncio.run(
+            _send_signal(extra, "group:abc123==", "Group file", media_files=[(str(img_path), False)])
+        )
+
+        assert result["success"] is True
+
+
+class TestSendSignalConfigLoading:
+    """Verify Signal config loading works."""
+
+    def test_signal_platform_exists(self):
+        """Platform.SIGNAL should be a valid platform."""
+        assert hasattr(Platform, "SIGNAL")
+        assert Platform.SIGNAL.value == "signal"
diff --git a/tests/tools/test_skills_tool.py b/tests/tools/test_skills_tool.py
index 19c65cb8b9..2a21f06b5f 100644
--- a/tests/tools/test_skills_tool.py
+++ b/tests/tools/test_skills_tool.py
@@ -484,52 +484,6 @@ class TestSkillViewSecureSetupOnLoad:
         assert result["setup_skipped"] is True
         assert result["content"].startswith("---")
 
-    def test_gateway_load_returns_guidance_without_secret_capture(
-        self,
-        tmp_path,
-        monkeypatch,
-    ):
-        monkeypatch.delenv("TENOR_API_KEY", raising=False)
-        called = {"value": False}
-
-        def fake_secret_callback(var_name, prompt, metadata=None):
-            called["value"] = True
-            return {
-                "success": True,
-                "stored_as": var_name,
-                "validated": False,
-                "skipped": False,
-            }
-
-        monkeypatch.setattr(
-            skills_tool_module,
-            "_secret_capture_callback",
-            fake_secret_callback,
-            raising=False,
-        )
-
-        with patch.dict(
-            os.environ, {"HERMES_SESSION_PLATFORM": "telegram"}, clear=False
-        ):
-            with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
-                _make_skill(
-                    tmp_path,
-                    "gif-search",
-                    frontmatter_extra=(
-                        "required_environment_variables:\n"
-                        "  - name: TENOR_API_KEY\n"
-                        "    prompt: Tenor API key\n"
-                    ),
-                )
-                raw = skill_view("gif-search")
-
-        result = json.loads(raw)
-        assert result["success"] is True
-        assert called["value"] is False
-        assert "local cli" in result["gateway_setup_hint"].lower()
-        assert result["content"].startswith("---")
-
-
 # ---------------------------------------------------------------------------
 # skill_matches_platform
 # ---------------------------------------------------------------------------
@@ -840,26 +794,6 @@ class TestSkillViewPrerequisites:
         assert result["missing_required_environment_variables"] == ["SHELL_ONLY_KEY"]
         assert result["readiness_status"] == "setup_needed"
 
-    def test_gateway_load_keeps_setup_guidance_for_backend_only_env(
-        self, tmp_path, monkeypatch
-    ):
-        monkeypatch.setenv("TERMINAL_ENV", "docker")
-
-        with patch.dict(
-            os.environ, {"HERMES_SESSION_PLATFORM": "telegram"}, clear=False
-        ):
-            with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
-                _make_skill(
-                    tmp_path,
-                    "backend-unknown",
-                    frontmatter_extra="prerequisites:\n  env_vars: [BACKEND_ONLY_KEY]\n",
-                )
-                raw = skill_view("backend-unknown")
-        result = json.loads(raw)
-        assert result["success"] is True
-        assert "local cli" in result["gateway_setup_hint"].lower()
-        assert result["setup_needed"] is True
-
     @pytest.mark.parametrize(
         "backend",
         ["ssh", "daytona", "docker", "singularity", "modal"],
diff --git a/tests/tools/test_ssh_environment.py b/tests/tools/test_ssh_environment.py
index 383e48e299..09f090297a 100644
--- a/tests/tools/test_ssh_environment.py
+++ b/tests/tools/test_ssh_environment.py
@@ -67,6 +67,74 @@ class TestBuildSSHCommand:
         assert env._build_ssh_command()[-1] == "u@h"
 
 
+class TestControlSocketPath:
+    """Regression tests for issue #11840.
+
+    macOS caps Unix domain socket paths at 104 bytes (sun_path). SSH
+    appends a 16-byte random suffix to the control socket path when
+    operating in ControlMaster mode. An IPv6 host embedded in the
+    filename plus the deeply-nested macOS $TMPDIR easily blows past
+    the limit, causing every tool call to fail immediately.
+    """
+
+    @pytest.fixture(autouse=True)
+    def _mock_connection(self, monkeypatch):
+        monkeypatch.setattr("tools.environments.ssh.subprocess.run",
+                            lambda *a, **k: subprocess.CompletedProcess([], 0))
+        monkeypatch.setattr("tools.environments.ssh.subprocess.Popen",
+                            lambda *a, **k: MagicMock(stdout=iter([]),
+                                                      stderr=iter([]),
+                                                      stdin=MagicMock()))
+        monkeypatch.setattr("tools.environments.base.time.sleep", lambda _: None)
+
+    # SSH appends ``.XXXXXXXXXXXXXXXX`` (17 bytes) to the ControlPath in
+    # ControlMaster mode; the macOS sun_path field is 104 bytes including
+    # the NUL terminator, so the usable path length is 103 bytes.
+    _SSH_CONTROLMASTER_SUFFIX = 17
+    _MAX_SUN_PATH = 103
+
+    def test_fits_under_macos_socket_limit_with_ipv6_host(self, monkeypatch):
+        """A realistic macOS $TMPDIR + IPv6 host must still produce a
+        control socket path that fits once SSH appends its ControlMaster
+        suffix (see issue #11840)."""
+        # Simulate the macOS $TMPDIR shape from the issue traceback —
+        # 48 bytes, the typical length of ``/var/folders/XX/YYYYYYYYY/T``.
+        fake_tmp = "/var/folders/2t/wbkw5yb158jc3zhswgl7tz9c0000gn/T"
+        monkeypatch.setattr("tools.environments.ssh.tempfile.gettempdir",
+                            lambda: fake_tmp)
+        # The simulated path doesn't exist on the test host — skip the
+        # real mkdir so __init__ can proceed.
+        from pathlib import Path as _Path
+        monkeypatch.setattr(_Path, "mkdir", lambda *a, **k: None)
+
+        env = SSHEnvironment(
+            host="9373:9b91:4480:558d:708e:e601:24e8:d8d0",
+            user="hermes",
+            port=22,
+        )
+
+        total_len = len(str(env.control_socket)) + self._SSH_CONTROLMASTER_SUFFIX
+        assert total_len <= self._MAX_SUN_PATH, (
+            f"control socket path would exceed the {self._MAX_SUN_PATH}-byte "
+            f"Unix domain socket limit once SSH appends its 16-byte suffix: "
+            f"{env.control_socket} (+{self._SSH_CONTROLMASTER_SUFFIX} = {total_len})"
+        )
+
+    def test_path_is_deterministic_across_instances(self):
+        """Same (user, host, port) must yield the same control socket so
+        ControlMaster reuse works across reconnects."""
+        first = SSHEnvironment(host="example.com", user="alice", port=2222)
+        second = SSHEnvironment(host="example.com", user="alice", port=2222)
+        assert first.control_socket == second.control_socket
+
+    def test_path_differs_for_different_targets(self):
+        """Different (user, host, port) triples must produce different paths."""
+        base = SSHEnvironment(host="h", user="u", port=22).control_socket
+        assert SSHEnvironment(host="h", user="u", port=23).control_socket != base
+        assert SSHEnvironment(host="h", user="v", port=22).control_socket != base
+        assert SSHEnvironment(host="g", user="u", port=22).control_socket != base
+
+
 class TestTerminalToolConfig:
     def test_ssh_persistent_default_true(self, monkeypatch):
         """SSH persistent defaults to True (via TERMINAL_PERSISTENT_SHELL)."""
diff --git a/tests/tools/test_terminal_compound_background.py b/tests/tools/test_terminal_compound_background.py
new file mode 100644
index 0000000000..d8922bcf55
--- /dev/null
+++ b/tests/tools/test_terminal_compound_background.py
@@ -0,0 +1,180 @@
+"""Regression tests for _rewrite_compound_background.
+
+Context: bash parses ``A && B &`` as ``(A && B) &`` — it forks a subshell
+for the compound and backgrounds the subshell. Inside the subshell, B
+runs foreground, so the subshell waits for B. When B never exits on its
+own (HTTP servers, ``yes > /dev/null``, etc.), the subshell is stuck in
+``wait4`` forever and leaks as an orphan process. Pre-fix, we saw this
+pattern leak processes across the fleet (vela, sal, combiagent).
+
+The rewriter fixes this by wrapping the tail in a brace group —
+``A && { B & }`` — so B runs as a simple backgrounded command inside
+the current shell. No subshell fork, no wait.
+"""
+
+import pytest
+
+from tools.terminal_tool import _rewrite_compound_background as rewrite
+
+
+class TestRewrites:
+    """Commands that trigger the subshell-wait bug MUST be rewritten."""
+
+    def test_simple_and_background(self):
+        assert rewrite("A && B &") == "A && { B & }"
+
+    def test_or_background(self):
+        assert rewrite("A || B &") == "A || { B & }"
+
+    def test_chained_and(self):
+        assert rewrite("A && B && C &") == "A && B && { C & }"
+
+    def test_chained_or(self):
+        assert rewrite("A || B || C &") == "A || B || { C & }"
+
+    def test_mixed_and_or(self):
+        assert rewrite("A && B || C &") == "A && B || { C & }"
+
+    def test_realistic_server_start(self):
+        # The exact shape observed in the vela incident.
+        cmd = (
+            "cd /home/exedev && python3 -m http.server 8000 &>/dev/null &\n"
+            "sleep 1\n"
+            'curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/'
+        )
+        expected = (
+            "cd /home/exedev && { python3 -m http.server 8000 &>/dev/null & }\n"
+            "sleep 1\n"
+            'curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/'
+        )
+        assert rewrite(cmd) == expected
+
+    def test_newline_resets_chain_state(self):
+        # A && newline starts a new statement; B & on its own line is simple.
+        cmd = "A && B\nC &"
+        assert rewrite(cmd) == "A && B\nC &"
+
+    def test_semicolon_resets_chain_state(self):
+        cmd = "A && B; C &"
+        assert rewrite(cmd) == "A && B; C &"
+
+    def test_pipe_resets_chain_state(self):
+        cmd = "A && B | C &"
+        assert rewrite(cmd) == "A && B | C &"
+
+    def test_multiple_rewrites_in_one_script(self):
+        cmd = "A && B &\nfalse || C &"
+        assert rewrite(cmd) == "A && { B & }\nfalse || { C & }"
+
+
+class TestPreserved:
+    """Commands that DON'T have the bug MUST pass through unchanged."""
+
+    def test_simple_background(self):
+        # No compound — just background a single command. Works fine as-is.
+        assert rewrite("sleep 5 &") == "sleep 5 &"
+
+    def test_plain_server_background(self):
+        assert rewrite("python3 -m http.server 0 &") == "python3 -m http.server 0 &"
+
+    def test_semicolon_sequence(self):
+        assert rewrite("cd /tmp; start-server &") == "cd /tmp; start-server &"
+
+    def test_no_trailing_ampersand(self):
+        assert rewrite("A && B") == "A && B"
+
+    def test_no_chain_at_all(self):
+        assert rewrite("echo hello") == "echo hello"
+
+    def test_empty_string(self):
+        assert rewrite("") == ""
+
+    def test_whitespace_only(self):
+        assert rewrite("   \n\t") == "   \n\t"
+
+
+class TestRedirectsNotConfused:
+    """``&>``, ``2>&1``, ``>&2`` must not be mistaken for background ``&``."""
+
+    def test_amp_gt_redirect_alone(self):
+        assert rewrite("echo hi &>/dev/null") == "echo hi &>/dev/null"
+
+    def test_fd_to_fd_redirect(self):
+        assert rewrite("cmd 2>&1") == "cmd 2>&1"
+
+    def test_fd_redirect_with_trailing_bg(self):
+        # 2>&1 is redirect; trailing & is simple bg (no compound).
+        assert rewrite("cmd 2>&1 &") == "cmd 2>&1 &"
+
+    def test_amp_gt_inside_compound_background(self):
+        # &> should be preserved; the trailing & still needs wrapping.
+        cmd = "A && B &>/dev/null &"
+        assert rewrite(cmd) == "A && { B &>/dev/null & }"
+
+    def test_gt_amp_inside_compound(self):
+        cmd = "A && B 2>&1 &"
+        assert rewrite(cmd) == "A && { B 2>&1 & }"
+
+
+class TestQuotingAndParens:
+    """Shell metacharacters inside quotes/parens must not be parsed as operators."""
+
+    def test_and_and_inside_single_quotes(self):
+        cmd = "echo 'A && B &'"
+        assert rewrite(cmd) == "echo 'A && B &'"
+
+    def test_and_and_inside_double_quotes(self):
+        cmd = 'echo "A && B &"'
+        assert rewrite(cmd) == 'echo "A && B &"'
+
+    def test_parenthesised_subshell_left_alone(self):
+        # `(A && B) &` has the same bug class but isn't the common agent
+        # pattern. Leave for a follow-up; do not rewrite and do not
+        # misrewrite content inside the parens.
+        assert rewrite("(A && B) &") == "(A && B) &"
+
+    def test_command_substitution_not_rewritten(self):
+        # $(A && B) is command substitution; the `&&` inside is a compound
+        # expression in the subshell, unrelated to the outer `&`.
+        cmd = 'echo "$(A && B)" &'
+        assert rewrite(cmd) == 'echo "$(A && B)" &'
+
+    def test_backslash_escaped_ampersand(self):
+        # Escaped & is not a background operator.
+        cmd = r"echo A \&\& B"
+        assert rewrite(cmd) == cmd
+
+    def test_comment_line_not_rewritten(self):
+        cmd = "# A && B &\nC"
+        assert rewrite(cmd) == "# A && B &\nC"
+
+
+class TestIdempotence:
+    """Running the rewriter twice should be a no-op on its own output."""
+
+    def test_already_rewritten(self):
+        once = rewrite("A && B &")
+        twice = rewrite(once)
+        assert once == twice
+        assert twice == "A && { B & }"
+
+    def test_multiline_idempotent(self):
+        once = rewrite("cd /tmp && server &\nsleep 1")
+        assert rewrite(once) == once
+
+
+class TestEdgeCases:
+    def test_only_chain_op_no_second_command(self):
+        # Malformed input: bash would error, we shouldn't crash or rewrite.
+        cmd = "A && &"
+        # Don't assert a specific output; just don't raise.
+        rewrite(cmd)
+
+    def test_only_trailing_ampersand(self):
+        assert rewrite("&") == "&"
+
+    def test_leading_whitespace(self):
+        assert rewrite("   A && B &") == "   A && { B & }"
+
+    def test_tabs_between_tokens(self):
+        assert rewrite("A\t&&\tB\t&") == "A\t&&\t{ B\t& }"
diff --git a/tests/tools/test_terminal_foreground_timeout_cap.py b/tests/tools/test_terminal_foreground_timeout_cap.py
index 5f95e15571..54848f6292 100644
--- a/tests/tools/test_terminal_foreground_timeout_cap.py
+++ b/tests/tools/test_terminal_foreground_timeout_cap.py
@@ -48,6 +48,53 @@ class TestForegroundTimeoutCap:
         assert str(FOREGROUND_MAX_TIMEOUT) in result["error"]
         assert "background=true" in result["error"]
 
+    def test_foreground_rejects_shell_level_background_wrappers(self):
+        """Foreground nohup/disown/setsid commands should be redirected to background mode."""
+        from tools.terminal_tool import terminal_tool
+
+        with patch("tools.terminal_tool._get_env_config", return_value=_make_env_config()), \
+             patch("tools.terminal_tool._start_cleanup_thread"):
+
+            result = json.loads(terminal_tool(
+                command="nohup pnpm dev > /tmp/sg-server.log 2>&1 &",
+            ))
+
+        assert result["exit_code"] == -1
+        assert "background=true" in result["error"]
+        assert "nohup" in result["error"].lower()
+
+    def test_foreground_rejects_long_lived_server_command(self):
+        """Foreground dev server commands should be redirected to background mode."""
+        from tools.terminal_tool import terminal_tool
+
+        with patch("tools.terminal_tool._get_env_config", return_value=_make_env_config()), \
+             patch("tools.terminal_tool._start_cleanup_thread"):
+
+            result = json.loads(terminal_tool(command="pnpm dev"))
+
+        assert result["exit_code"] == -1
+        assert "long-lived" in result["error"].lower()
+        assert "background=true" in result["error"]
+
+    def test_foreground_allows_help_variant_for_server_command(self):
+        """Informational variants like '--help' should not be blocked."""
+        from tools.terminal_tool import terminal_tool
+
+        with patch("tools.terminal_tool._get_env_config", return_value=_make_env_config()), \
+             patch("tools.terminal_tool._start_cleanup_thread"):
+
+            mock_env = MagicMock()
+            mock_env.execute.return_value = {"output": "usage", "returncode": 0}
+
+            with patch("tools.terminal_tool._active_environments", {"default": mock_env}), \
+                 patch("tools.terminal_tool._last_activity", {"default": 0}), \
+                 patch("tools.terminal_tool._check_all_guards", return_value={"approved": True}):
+                result = json.loads(terminal_tool(command="pnpm dev --help"))
+
+        assert result["error"] is None
+        call_kwargs = mock_env.execute.call_args
+        assert call_kwargs[0][0] == "pnpm dev --help"
+
     def test_foreground_timeout_within_max_executes(self):
         """When model requests timeout <= FOREGROUND_MAX_TIMEOUT, execute normally."""
         from tools.terminal_tool import terminal_tool
diff --git a/tests/tools/test_terminal_output_transform_hook.py b/tests/tools/test_terminal_output_transform_hook.py
new file mode 100644
index 0000000000..ccba7f77c1
--- /dev/null
+++ b/tests/tools/test_terminal_output_transform_hook.py
@@ -0,0 +1,209 @@
+import json
+import os
+from pathlib import Path
+from unittest.mock import MagicMock
+
+import hermes_cli.plugins as plugins_mod
+import tools.terminal_tool as terminal_tool_module
+
+
+_UNSET = object()
+
+
+def _make_env_config(tmp_path, **overrides):
+    config = {
+        "env_type": "local",
+        "timeout": 30,
+        "cwd": str(tmp_path),
+        "host_cwd": None,
+        "modal_mode": "auto",
+        "docker_image": "",
+        "singularity_image": "",
+        "modal_image": "",
+        "daytona_image": "",
+    }
+    config.update(overrides)
+    return config
+
+
+def _run_terminal(
+    monkeypatch,
+    tmp_path,
+    *,
+    output,
+    returncode=0,
+    invoke_hook=_UNSET,
+    approval=None,
+    command="echo hello",
+):
+    mock_env = MagicMock()
+    mock_env.execute.return_value = {"output": output, "returncode": returncode}
+
+    monkeypatch.setattr(
+        terminal_tool_module, "_get_env_config", lambda: _make_env_config(tmp_path)
+    )
+    monkeypatch.setattr(terminal_tool_module, "_start_cleanup_thread", lambda: None)
+    monkeypatch.setattr(
+        terminal_tool_module,
+        "_check_all_guards",
+        lambda *_args, **_kwargs: approval or {"approved": True},
+    )
+    monkeypatch.setitem(terminal_tool_module._active_environments, "default", mock_env)
+    monkeypatch.setitem(terminal_tool_module._last_activity, "default", 0.0)
+
+    if invoke_hook is not _UNSET:
+        monkeypatch.setattr("hermes_cli.plugins.invoke_hook", invoke_hook)
+
+    result = json.loads(terminal_tool_module.terminal_tool(command=command))
+    return result, mock_env
+
+
+def test_terminal_output_unchanged_when_transform_hook_not_registered(monkeypatch, tmp_path):
+    result, _mock_env = _run_terminal(monkeypatch, tmp_path, output="plain output")
+
+    assert result["output"] == "plain output"
+    assert result["exit_code"] == 0
+    assert result["error"] is None
+
+
+def test_terminal_output_unchanged_for_none_hook_result(monkeypatch, tmp_path):
+    result, _mock_env = _run_terminal(
+        monkeypatch,
+        tmp_path,
+        output="plain output",
+        invoke_hook=lambda hook_name, **kwargs: [None],
+    )
+
+    assert result["output"] == "plain output"
+
+
+def test_terminal_output_ignores_invalid_hook_results(monkeypatch, tmp_path):
+    result, _mock_env = _run_terminal(
+        monkeypatch,
+        tmp_path,
+        output="plain output",
+        invoke_hook=lambda hook_name, **kwargs: [{"bad": True}, 123, ["nope"]],
+    )
+
+    assert result["output"] == "plain output"
+
+
+def test_terminal_output_uses_first_valid_string_from_hooks(monkeypatch, tmp_path):
+    result, _mock_env = _run_terminal(
+        monkeypatch,
+        tmp_path,
+        output="plain output",
+        invoke_hook=lambda hook_name, **kwargs: [None, {"bad": True}, "first", "second"],
+    )
+
+    assert result["output"] == "first"
+
+
+def test_terminal_output_transform_still_truncates_long_replacement(monkeypatch, tmp_path):
+    transformed_output = "PLUGIN-HEAD\n" + ("A" * 60000) + "\nPLUGIN-TAIL"
+    result, _mock_env = _run_terminal(
+        monkeypatch,
+        tmp_path,
+        output="short output",
+        invoke_hook=lambda hook_name, **kwargs: [transformed_output],
+    )
+
+    assert "PLUGIN-HEAD" in result["output"]
+    assert "PLUGIN-TAIL" in result["output"]
+    assert "[OUTPUT TRUNCATED" in result["output"]
+    assert transformed_output != result["output"]
+
+
+def test_terminal_output_transform_still_runs_strip_and_redact(monkeypatch, tmp_path):
+    # Ensure redaction is active regardless of host HERMES_REDACT_SECRETS state
+    # or collection-time import order (the module snapshots env at import).
+    monkeypatch.setattr("agent.redact._REDACT_ENABLED", True)
+
+    secret = "sk-proj-abc123def456ghi789jkl012mno345"
+    result, _mock_env = _run_terminal(
+        monkeypatch,
+        tmp_path,
+        output="plain output",
+        invoke_hook=lambda hook_name, **kwargs: [f" \x1b[31mOPENAI_API_KEY={secret}\x1b[0m "],
+    )
+
+    assert "\x1b" not in result["output"]
+    assert secret not in result["output"]
+    assert "OPENAI_API_KEY=" in result["output"]
+    assert "***" in result["output"]
+
+
+def test_terminal_output_transform_hook_exception_falls_back(monkeypatch, tmp_path):
+    def _raise(*_args, **_kwargs):
+        raise RuntimeError("boom")
+
+    result, _mock_env = _run_terminal(
+        monkeypatch,
+        tmp_path,
+        output="plain output",
+        invoke_hook=_raise,
+    )
+
+    assert result["output"] == "plain output"
+    assert result["exit_code"] == 0
+    assert result["error"] is None
+
+
+def test_terminal_output_transform_does_not_change_approval_or_exit_code_meaning(monkeypatch, tmp_path):
+    approval = {
+        "approved": True,
+        "user_approved": True,
+        "description": "dangerous command",
+    }
+    result, _mock_env = _run_terminal(
+        monkeypatch,
+        tmp_path,
+        output="original output",
+        returncode=1,
+        approval=approval,
+        command="grep foo bar",
+        invoke_hook=lambda hook_name, **kwargs: ["replaced output"],
+    )
+
+    assert result["output"] == "replaced output"
+    assert result["approval"] == (
+        "Command required approval (dangerous command) and was approved by the user."
+    )
+    assert result["exit_code_meaning"] == "No matches found (not an error)"
+
+
+def test_terminal_output_transform_integration_with_real_plugin(monkeypatch, tmp_path):
+    import yaml
+
+    hermes_home = Path(os.environ["HERMES_HOME"])
+    plugins_dir = hermes_home / "plugins"
+    plugin_dir = plugins_dir / "terminal_transform"
+    plugin_dir.mkdir(parents=True)
+    (plugin_dir / "plugin.yaml").write_text("name: terminal_transform\n", encoding="utf-8")
+    (plugin_dir / "__init__.py").write_text(
+        "def register(ctx):\n"
+        '    ctx.register_hook("transform_terminal_output", '
+        'lambda **kw: "PLUGIN-HEAD\\n" + kw["output"] + "\\nPLUGIN-TAIL")\n',
+        encoding="utf-8",
+    )
+    # Plugins are opt-in — must be listed in plugins.enabled to load.
+    cfg_path = hermes_home / "config.yaml"
+    cfg_path.write_text(
+        yaml.safe_dump({"plugins": {"enabled": ["terminal_transform"]}}),
+        encoding="utf-8",
+    )
+
+    # Force a fresh plugin manager so the new config is picked up.
+    plugins_mod._plugin_manager = plugins_mod.PluginManager()
+    plugins_mod.discover_plugins()
+
+    long_output = "X" * 60000
+    result, _mock_env = _run_terminal(
+        monkeypatch,
+        tmp_path,
+        output=long_output,
+    )
+
+    assert "PLUGIN-HEAD" in result["output"]
+    assert "PLUGIN-TAIL" in result["output"]
+    assert "[OUTPUT TRUNCATED" in result["output"]
diff --git a/tests/tools/test_transcription.py b/tests/tools/test_transcription.py
index 5f42272a54..9983f9031b 100644
--- a/tests/tools/test_transcription.py
+++ b/tests/tools/test_transcription.py
@@ -245,3 +245,67 @@ class TestTranscribeAudio:
         result = transcribe_audio("/nonexistent/file.ogg")
         assert result["success"] is False
         assert "not found" in result["error"]
+
+
+# ---------------------------------------------------------------------------
+# Model name normalisation for local providers
+# ---------------------------------------------------------------------------
+
+
+class TestNormalizeLocalModel:
+    """_normalize_local_model() maps cloud-only names to the local default."""
+
+    def test_openai_model_name_maps_to_default(self):
+        from tools.transcription_tools import _normalize_local_model, DEFAULT_LOCAL_MODEL
+        assert _normalize_local_model("whisper-1") == DEFAULT_LOCAL_MODEL
+
+    def test_groq_model_name_maps_to_default(self):
+        from tools.transcription_tools import _normalize_local_model, DEFAULT_LOCAL_MODEL
+        assert _normalize_local_model("whisper-large-v3-turbo") == DEFAULT_LOCAL_MODEL
+
+    def test_valid_local_model_preserved(self):
+        from tools.transcription_tools import _normalize_local_model
+        for size in ("tiny", "base", "small", "medium", "large-v3"):
+            assert _normalize_local_model(size) == size
+
+    def test_none_maps_to_default(self):
+        from tools.transcription_tools import _normalize_local_model, DEFAULT_LOCAL_MODEL
+        assert _normalize_local_model(None) == DEFAULT_LOCAL_MODEL
+
+    def test_warning_emitted_for_cloud_model(self, caplog):
+        import logging
+        from tools.transcription_tools import _normalize_local_model
+        with caplog.at_level(logging.WARNING, logger="tools.transcription_tools"):
+            _normalize_local_model("whisper-1")
+        assert any("whisper-1" in r.message for r in caplog.records)
+
+    def test_local_transcribe_normalises_model(self):
+        """transcribe_audio with local provider must not pass 'whisper-1' to WhisperModel."""
+        import tempfile, os
+        from unittest.mock import MagicMock, patch
+
+        with tempfile.NamedTemporaryFile(suffix=".ogg", delete=False) as f:
+            f.write(b"x")
+            audio_file = f.name
+        try:
+            mock_model = MagicMock()
+            mock_model.transcribe.return_value = (iter([]), MagicMock(language="en", duration=1.0))
+            with patch("tools.transcription_tools._HAS_FASTER_WHISPER", True), \
+                 patch("tools.transcription_tools._load_stt_config", return_value={
+                     "enabled": True,
+                     "provider": "local",
+                     "local": {"model": "whisper-1"},
+                 }), \
+                 patch("tools.transcription_tools._local_model", None), \
+                 patch("tools.transcription_tools._local_model_name", None), \
+                 patch("faster_whisper.WhisperModel", return_value=mock_model) as mock_cls:
+                from tools.transcription_tools import transcribe_audio
+                transcribe_audio(audio_file)
+                # WhisperModel must NOT have been called with "whisper-1"
+                call_args = mock_cls.call_args
+                assert call_args is not None
+                assert call_args[0][0] != "whisper-1", (
+                    "WhisperModel was called with the cloud-only name 'whisper-1'"
+                )
+        finally:
+            os.unlink(audio_file)
diff --git a/tests/tools/test_tts_kittentts.py b/tests/tools/test_tts_kittentts.py
new file mode 100644
index 0000000000..ab841f59f4
--- /dev/null
+++ b/tests/tools/test_tts_kittentts.py
@@ -0,0 +1,198 @@
+"""Tests for the KittenTTS local provider in tools/tts_tool.py."""
+
+import json
+from unittest.mock import MagicMock, patch
+
+import numpy as np
+import pytest
+
+
+@pytest.fixture(autouse=True)
+def clean_env(monkeypatch):
+    for key in ("HERMES_SESSION_PLATFORM",):
+        monkeypatch.delenv(key, raising=False)
+
+
+@pytest.fixture(autouse=True)
+def clear_kittentts_cache():
+    """Reset the module-level model cache between tests."""
+    from tools import tts_tool as _tt
+    _tt._kittentts_model_cache.clear()
+    yield
+    _tt._kittentts_model_cache.clear()
+
+
+@pytest.fixture
+def mock_kittentts_module():
+    """Inject a fake kittentts + soundfile module that return stub objects."""
+    fake_model = MagicMock()
+    # 24kHz float32 PCM at ~2s of silence
+    fake_model.generate.return_value = np.zeros(48000, dtype=np.float32)
+    fake_cls = MagicMock(return_value=fake_model)
+    fake_kittentts = MagicMock()
+    fake_kittentts.KittenTTS = fake_cls
+
+    # Stub soundfile — the real package isn't installed in CI venv, and
+    # _generate_kittentts does `import soundfile as sf` at runtime.
+    fake_sf = MagicMock()
+    def _fake_write(path, audio, samplerate):
+        # Emulate writing a real file so downstream path checks succeed.
+        import pathlib
+        pathlib.Path(path).write_bytes(b"RIFF\x00\x00\x00\x00WAVEfmt fake")
+    fake_sf.write = _fake_write
+
+    with patch.dict(
+        "sys.modules",
+        {"kittentts": fake_kittentts, "soundfile": fake_sf},
+    ):
+        yield fake_model, fake_cls
+
+
+class TestGenerateKittenTts:
+    def test_successful_wav_generation(self, tmp_path, mock_kittentts_module):
+        from tools.tts_tool import _generate_kittentts
+
+        fake_model, fake_cls = mock_kittentts_module
+        output_path = str(tmp_path / "test.wav")
+        result = _generate_kittentts("Hello world", output_path, {})
+
+        assert result == output_path
+        assert (tmp_path / "test.wav").exists()
+        fake_cls.assert_called_once()
+        fake_model.generate.assert_called_once()
+
+    def test_config_passes_voice_speed_cleantext(self, tmp_path, mock_kittentts_module):
+        from tools.tts_tool import _generate_kittentts
+
+        fake_model, _ = mock_kittentts_module
+        config = {
+            "kittentts": {
+                "model": "KittenML/kitten-tts-mini-0.8",
+                "voice": "Luna",
+                "speed": 1.25,
+                "clean_text": False,
+            }
+        }
+        _generate_kittentts("Hi there", str(tmp_path / "out.wav"), config)
+
+        call_kwargs = fake_model.generate.call_args.kwargs
+        assert call_kwargs["voice"] == "Luna"
+        assert call_kwargs["speed"] == 1.25
+        assert call_kwargs["clean_text"] is False
+
+    def test_default_model_and_voice(self, tmp_path, mock_kittentts_module):
+        from tools.tts_tool import (
+            DEFAULT_KITTENTTS_MODEL,
+            DEFAULT_KITTENTTS_VOICE,
+            _generate_kittentts,
+        )
+
+        fake_model, fake_cls = mock_kittentts_module
+        _generate_kittentts("Hi", str(tmp_path / "out.wav"), {})
+
+        fake_cls.assert_called_once_with(DEFAULT_KITTENTTS_MODEL)
+        assert fake_model.generate.call_args.kwargs["voice"] == DEFAULT_KITTENTTS_VOICE
+
+    def test_model_is_cached_across_calls(self, tmp_path, mock_kittentts_module):
+        from tools.tts_tool import _generate_kittentts
+
+        _, fake_cls = mock_kittentts_module
+        _generate_kittentts("One", str(tmp_path / "a.wav"), {})
+        _generate_kittentts("Two", str(tmp_path / "b.wav"), {})
+
+        # Same model name → class instantiated exactly once
+        assert fake_cls.call_count == 1
+
+    def test_different_models_are_cached_separately(self, tmp_path, mock_kittentts_module):
+        from tools.tts_tool import _generate_kittentts
+
+        _, fake_cls = mock_kittentts_module
+        _generate_kittentts(
+            "A", str(tmp_path / "a.wav"),
+            {"kittentts": {"model": "KittenML/kitten-tts-nano-0.8-int8"}},
+        )
+        _generate_kittentts(
+            "B", str(tmp_path / "b.wav"),
+            {"kittentts": {"model": "KittenML/kitten-tts-mini-0.8"}},
+        )
+
+        assert fake_cls.call_count == 2
+
+    def test_non_wav_extension_triggers_ffmpeg_conversion(
+        self, tmp_path, mock_kittentts_module, monkeypatch
+    ):
+        """Non-.wav output path causes WAV → target ffmpeg conversion."""
+        from tools import tts_tool as _tt
+
+        calls = []
+
+        def fake_shutil_which(cmd):
+            return "/usr/bin/ffmpeg" if cmd == "ffmpeg" else None
+
+        def fake_run(cmd, check=False, timeout=None, **kw):
+            calls.append(cmd)
+            # Emulate ffmpeg writing the output file
+            import pathlib
+            out_path = cmd[-1]
+            pathlib.Path(out_path).write_bytes(b"fake-mp3-data")
+            return MagicMock(returncode=0)
+
+        monkeypatch.setattr(_tt.shutil, "which", fake_shutil_which)
+        monkeypatch.setattr(_tt.subprocess, "run", fake_run)
+
+        output_path = str(tmp_path / "test.mp3")
+        result = _tt._generate_kittentts("Hi", output_path, {})
+
+        assert result == output_path
+        assert len(calls) == 1
+        assert calls[0][0] == "/usr/bin/ffmpeg"
+
+    def test_missing_kittentts_raises_import_error(self, tmp_path, monkeypatch):
+        """When kittentts package is not installed, _import_kittentts raises."""
+        import sys
+        monkeypatch.setitem(sys.modules, "kittentts", None)
+        from tools.tts_tool import _generate_kittentts
+
+        with pytest.raises((ImportError, TypeError)):
+            _generate_kittentts("Hi", str(tmp_path / "out.wav"), {})
+
+
+class TestCheckKittenttsAvailable:
+    def test_reports_available_when_package_present(self, monkeypatch):
+        import importlib.util
+        from tools.tts_tool import _check_kittentts_available
+
+        fake_spec = MagicMock()
+        monkeypatch.setattr(
+            importlib.util, "find_spec",
+            lambda name: fake_spec if name == "kittentts" else None,
+        )
+        assert _check_kittentts_available() is True
+
+    def test_reports_unavailable_when_package_missing(self, monkeypatch):
+        import importlib.util
+        from tools.tts_tool import _check_kittentts_available
+
+        monkeypatch.setattr(importlib.util, "find_spec", lambda name: None)
+        assert _check_kittentts_available() is False
+
+
+class TestDispatcherBranch:
+    def test_kittentts_not_installed_returns_helpful_error(self, monkeypatch, tmp_path):
+        """When provider=kittentts but package missing, return JSON error with setup hint."""
+        import sys
+        monkeypatch.setitem(sys.modules, "kittentts", None)
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+
+        from tools.tts_tool import text_to_speech_tool
+
+        # Write a config telling it to use kittentts
+        import yaml
+        (tmp_path / "config.yaml").write_text(
+            yaml.safe_dump({"tts": {"provider": "kittentts"}})
+        )
+
+        result = json.loads(text_to_speech_tool(text="Hello"))
+        assert result["success"] is False
+        assert "kittentts" in result["error"].lower()
+        assert "hermes setup tts" in result["error"].lower()
diff --git a/tests/tools/test_tts_max_text_length.py b/tests/tools/test_tts_max_text_length.py
new file mode 100644
index 0000000000..38a763ea78
--- /dev/null
+++ b/tests/tools/test_tts_max_text_length.py
@@ -0,0 +1,197 @@
+"""Tests for per-provider TTS input-character limits.
+
+Replaces the old global ``MAX_TEXT_LENGTH = 4000`` cap that truncated every
+provider at 4000 chars even though OpenAI allows 4096, xAI allows 15000,
+MiniMax allows 10000, and ElevenLabs allows 5000-40000 depending on model.
+"""
+
+import json
+from unittest.mock import patch
+
+import pytest
+
+from tools.tts_tool import (
+    ELEVENLABS_MODEL_MAX_TEXT_LENGTH,
+    FALLBACK_MAX_TEXT_LENGTH,
+    PROVIDER_MAX_TEXT_LENGTH,
+    _resolve_max_text_length,
+)
+
+
+class TestResolveMaxTextLength:
+    def test_edge_default(self):
+        assert _resolve_max_text_length("edge", {}) == PROVIDER_MAX_TEXT_LENGTH["edge"]
+
+    def test_openai_default_is_4096(self):
+        assert _resolve_max_text_length("openai", {}) == 4096
+
+    def test_xai_default_is_15000(self):
+        assert _resolve_max_text_length("xai", {}) == 15000
+
+    def test_minimax_default_is_10000(self):
+        assert _resolve_max_text_length("minimax", {}) == 10000
+
+    def test_mistral_default(self):
+        assert _resolve_max_text_length("mistral", {}) == PROVIDER_MAX_TEXT_LENGTH["mistral"]
+
+    def test_gemini_default(self):
+        assert _resolve_max_text_length("gemini", {}) == PROVIDER_MAX_TEXT_LENGTH["gemini"]
+
+    def test_unknown_provider_falls_back(self):
+        assert _resolve_max_text_length("does-not-exist", {}) == FALLBACK_MAX_TEXT_LENGTH
+
+    def test_empty_provider_falls_back(self):
+        assert _resolve_max_text_length("", {}) == FALLBACK_MAX_TEXT_LENGTH
+        assert _resolve_max_text_length(None, {}) == FALLBACK_MAX_TEXT_LENGTH
+
+    def test_case_insensitive(self):
+        assert _resolve_max_text_length("OpenAI", {}) == 4096
+        assert _resolve_max_text_length("  XAI  ", {}) == 15000
+
+    # --- Overrides ---
+
+    def test_override_wins(self):
+        cfg = {"openai": {"max_text_length": 9999}}
+        assert _resolve_max_text_length("openai", cfg) == 9999
+
+    def test_override_zero_falls_through(self):
+        # A broken/zero override must not disable truncation
+        cfg = {"openai": {"max_text_length": 0}}
+        assert _resolve_max_text_length("openai", cfg) == 4096
+
+    def test_override_negative_falls_through(self):
+        cfg = {"xai": {"max_text_length": -1}}
+        assert _resolve_max_text_length("xai", cfg) == 15000
+
+    def test_override_non_int_falls_through(self):
+        cfg = {"minimax": {"max_text_length": "lots"}}
+        assert _resolve_max_text_length("minimax", cfg) == 10000
+
+    def test_override_bool_falls_through(self):
+        # bool is technically an int; make sure we don't treat True as 1 char
+        cfg = {"openai": {"max_text_length": True}}
+        assert _resolve_max_text_length("openai", cfg) == 4096
+
+    def test_missing_provider_section_uses_default(self):
+        cfg = {"provider": "openai"}  # no "openai" key
+        assert _resolve_max_text_length("openai", cfg) == 4096
+
+    # --- ElevenLabs model-aware ---
+
+    def test_elevenlabs_default_model_multilingual_v2(self):
+        cfg = {"elevenlabs": {"model_id": "eleven_multilingual_v2"}}
+        assert _resolve_max_text_length("elevenlabs", cfg) == 10000
+
+    def test_elevenlabs_flash_v2_5_gets_40k(self):
+        cfg = {"elevenlabs": {"model_id": "eleven_flash_v2_5"}}
+        assert _resolve_max_text_length("elevenlabs", cfg) == 40000
+
+    def test_elevenlabs_flash_v2_gets_30k(self):
+        cfg = {"elevenlabs": {"model_id": "eleven_flash_v2"}}
+        assert _resolve_max_text_length("elevenlabs", cfg) == 30000
+
+    def test_elevenlabs_v3_gets_5k(self):
+        cfg = {"elevenlabs": {"model_id": "eleven_v3"}}
+        assert _resolve_max_text_length("elevenlabs", cfg) == 5000
+
+    def test_elevenlabs_unknown_model_falls_back_to_provider_default(self):
+        cfg = {"elevenlabs": {"model_id": "eleven_experimental_xyz"}}
+        assert _resolve_max_text_length("elevenlabs", cfg) == PROVIDER_MAX_TEXT_LENGTH["elevenlabs"]
+
+    def test_elevenlabs_override_beats_model_lookup(self):
+        cfg = {"elevenlabs": {"model_id": "eleven_flash_v2_5", "max_text_length": 1000}}
+        assert _resolve_max_text_length("elevenlabs", cfg) == 1000
+
+    def test_elevenlabs_no_model_id_uses_default_model_mapping(self):
+        # Falls back to DEFAULT_ELEVENLABS_MODEL_ID = eleven_multilingual_v2 -> 10000
+        assert _resolve_max_text_length("elevenlabs", {}) == 10000
+
+    def test_provider_config_not_a_dict(self):
+        cfg = {"openai": "not-a-dict"}
+        assert _resolve_max_text_length("openai", cfg) == 4096
+
+    # --- Sanity: the table covers every provider listed in the schema ---
+
+    def test_all_documented_providers_have_defaults(self):
+        expected = {"edge", "openai", "xai", "minimax", "mistral",
+                    "gemini", "elevenlabs", "neutts", "kittentts"}
+        assert expected.issubset(PROVIDER_MAX_TEXT_LENGTH.keys())
+
+
+class TestTextToSpeechToolTruncation:
+    """End-to-end: verify the resolver actually drives the text_to_speech_tool
+    truncation path rather than the old 4000-char global."""
+
+    def test_openai_truncates_at_4096_not_4000(self, tmp_path, monkeypatch, caplog):
+        import logging
+        caplog.set_level(logging.WARNING, logger="tools.tts_tool")
+
+        # 5000 chars -- over OpenAI's 4096 limit but under xAI's 15k
+        text = "A" * 5000
+        captured_text = {}
+
+        def fake_openai(t, out, cfg):
+            captured_text["text"] = t
+            with open(out, "wb") as f:
+                f.write(b"\x00")
+            return out
+
+        monkeypatch.setattr("tools.tts_tool._generate_openai_tts", fake_openai)
+        monkeypatch.setattr("tools.tts_tool._load_tts_config",
+                            lambda: {"provider": "openai"})
+
+        from tools.tts_tool import text_to_speech_tool
+        out = str(tmp_path / "out.mp3")
+        result = json.loads(text_to_speech_tool(text=text, output_path=out))
+
+        assert result["success"] is True
+        # Should be truncated to 4096, not the old 4000
+        assert len(captured_text["text"]) == 4096
+        # And the warning should mention the provider
+        assert any("openai" in rec.message.lower() for rec in caplog.records)
+
+    def test_xai_accepts_much_longer_input(self, tmp_path, monkeypatch):
+        # 12000 chars -- over old global 4000, under xAI's 15000
+        text = "B" * 12000
+        captured_text = {}
+
+        def fake_xai(t, out, cfg):
+            captured_text["text"] = t
+            with open(out, "wb") as f:
+                f.write(b"\x00")
+            return out
+
+        monkeypatch.setattr("tools.tts_tool._generate_xai_tts", fake_xai)
+        monkeypatch.setattr("tools.tts_tool._load_tts_config",
+                            lambda: {"provider": "xai"})
+
+        from tools.tts_tool import text_to_speech_tool
+        out = str(tmp_path / "out.mp3")
+        result = json.loads(text_to_speech_tool(text=text, output_path=out))
+
+        assert result["success"] is True
+        # xAI should accept the full 12000 chars
+        assert len(captured_text["text"]) == 12000
+
+    def test_user_override_is_respected(self, tmp_path, monkeypatch):
+        # User says "cap openai at 100 chars" -- we must honor it
+        text = "C" * 500
+        captured_text = {}
+
+        def fake_openai(t, out, cfg):
+            captured_text["text"] = t
+            with open(out, "wb") as f:
+                f.write(b"\x00")
+            return out
+
+        monkeypatch.setattr("tools.tts_tool._generate_openai_tts", fake_openai)
+        monkeypatch.setattr("tools.tts_tool._load_tts_config",
+                            lambda: {"provider": "openai",
+                                     "openai": {"max_text_length": 100}})
+
+        from tools.tts_tool import text_to_speech_tool
+        out = str(tmp_path / "out.mp3")
+        result = json.loads(text_to_speech_tool(text=text, output_path=out))
+
+        assert result["success"] is True
+        assert len(captured_text["text"]) == 100
diff --git a/tests/tools/test_tts_mistral.py b/tests/tools/test_tts_mistral.py
index a62afd8dbe..36088f3f0a 100644
--- a/tests/tools/test_tts_mistral.py
+++ b/tests/tools/test_tts_mistral.py
@@ -218,28 +218,3 @@ class TestCheckTtsRequirementsMistral:
              patch("tools.tts_tool._import_openai_client", side_effect=ImportError), \
              patch("tools.tts_tool._check_neutts_available", return_value=False):
             assert check_tts_requirements() is False
-
-
-class TestMistralTtsOpus:
-    def test_telegram_produces_ogg_and_voice_compatible(
-        self, tmp_path, mock_mistral_module, monkeypatch
-    ):
-        import json
-
-        from tools.tts_tool import text_to_speech_tool
-
-        monkeypatch.setenv("MISTRAL_API_KEY", "test-key")
-        monkeypatch.setenv("HERMES_SESSION_PLATFORM", "telegram")
-        mock_mistral_module.audio.speech.complete.return_value = MagicMock(
-            audio_data=base64.b64encode(b"opus-audio").decode()
-        )
-
-        with patch("tools.tts_tool._load_tts_config", return_value={"provider": "mistral"}):
-            result = json.loads(text_to_speech_tool("Hello"))
-
-        assert result["success"] is True
-        assert result["file_path"].endswith(".ogg")
-        assert result["voice_compatible"] is True
-        assert "[[audio_as_voice]]" in result["media_tag"]
-        call_kwargs = mock_mistral_module.audio.speech.complete.call_args[1]
-        assert call_kwargs["response_format"] == "opus"
diff --git a/tests/tools/test_vision_tools.py b/tests/tools/test_vision_tools.py
index 8238f1158c..d8977f8492 100644
--- a/tests/tools/test_vision_tools.py
+++ b/tests/tools/test_vision_tools.py
@@ -366,6 +366,66 @@ class TestErrorLoggingExcInfo:
             assert warning_records[0].exc_info is not None
 
 
+class TestVisionConfig:
+    @pytest.mark.asyncio
+    async def test_vision_uses_configured_temperature_and_timeout(self, tmp_path):
+        img = tmp_path / "test.png"
+        img.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 8)
+
+        mock_response = MagicMock()
+        mock_choice = MagicMock()
+        mock_choice.message.content = "Configured image analysis"
+        mock_response.choices = [mock_choice]
+
+        with (
+            patch("hermes_cli.config.load_config", return_value={
+                "auxiliary": {"vision": {"temperature": 1, "timeout": 77}}
+            }),
+            patch(
+                "tools.vision_tools._image_to_base64_data_url",
+                return_value="data:image/png;base64,abc",
+            ),
+            patch(
+                "tools.vision_tools.async_call_llm",
+                new_callable=AsyncMock,
+                return_value=mock_response,
+            ) as mock_llm,
+        ):
+            result = json.loads(await vision_analyze_tool(str(img), "describe this", "test/model"))
+
+        assert result["success"] is True
+        assert mock_llm.await_args.kwargs["temperature"] == 1.0
+        assert mock_llm.await_args.kwargs["timeout"] == 77.0
+
+    @pytest.mark.asyncio
+    async def test_vision_defaults_temperature_when_config_omits_it(self, tmp_path):
+        img = tmp_path / "test.png"
+        img.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 8)
+
+        mock_response = MagicMock()
+        mock_choice = MagicMock()
+        mock_choice.message.content = "Default image analysis"
+        mock_response.choices = [mock_choice]
+
+        with (
+            patch("hermes_cli.config.load_config", return_value={"auxiliary": {"vision": {}}}),
+            patch(
+                "tools.vision_tools._image_to_base64_data_url",
+                return_value="data:image/png;base64,abc",
+            ),
+            patch(
+                "tools.vision_tools.async_call_llm",
+                new_callable=AsyncMock,
+                return_value=mock_response,
+            ) as mock_llm,
+        ):
+            result = json.loads(await vision_analyze_tool(str(img), "describe this", "test/model"))
+
+        assert result["success"] is True
+        assert mock_llm.await_args.kwargs["temperature"] == 0.1
+        assert mock_llm.await_args.kwargs["timeout"] == 120.0
+
+
 class TestVisionSafetyGuards:
     @pytest.mark.asyncio
     async def test_local_non_image_file_rejected_before_llm_call(self, tmp_path):
diff --git a/tests/tools/test_voice_cli_integration.py b/tests/tools/test_voice_cli_integration.py
index da500996a1..e7d8811e02 100644
--- a/tests/tools/test_voice_cli_integration.py
+++ b/tests/tools/test_voice_cli_integration.py
@@ -933,6 +933,58 @@ class TestEnableVoiceModeReal:
         assert cli._voice_mode is True
 
 
+class TestVoiceBeepConfigReal:
+    """Tests the CLI voice beep toggle."""
+
+    @patch("hermes_cli.config.load_config", return_value={"voice": {}})
+    def test_beeps_enabled_by_default(self, _cfg):
+        cli = _make_voice_cli()
+        assert cli._voice_beeps_enabled() is True
+
+    @patch("hermes_cli.config.load_config", return_value={"voice": {"beep_enabled": False}})
+    def test_beeps_can_be_disabled(self, _cfg):
+        cli = _make_voice_cli()
+        assert cli._voice_beeps_enabled() is False
+
+    @patch("cli._cprint")
+    @patch("cli.threading.Thread")
+    @patch("tools.voice_mode.play_beep")
+    @patch("tools.voice_mode.create_audio_recorder")
+    @patch(
+        "tools.voice_mode.check_voice_requirements",
+        return_value={
+            "available": True,
+            "audio_available": True,
+            "stt_available": True,
+            "details": "OK",
+            "missing_packages": [],
+        },
+    )
+    @patch(
+        "hermes_cli.config.load_config",
+        return_value={
+            "voice": {
+                "beep_enabled": False,
+                "silence_threshold": 200,
+                "silence_duration": 3.0,
+            }
+        },
+    )
+    def test_start_recording_skips_beep_when_disabled(
+        self, _cfg, _req, mock_create, mock_beep, mock_thread, _cp
+    ):
+        recorder = MagicMock()
+        recorder.supports_silence_autostop = True
+        mock_create.return_value = recorder
+        mock_thread.return_value = MagicMock(start=MagicMock())
+
+        cli = _make_voice_cli()
+        cli._voice_start_recording()
+
+        recorder.start.assert_called_once()
+        mock_beep.assert_not_called()
+
+
 class TestDisableVoiceModeReal:
     """Tests _disable_voice_mode with real CLI instance."""
 
@@ -1087,6 +1139,16 @@ class TestVoiceStopAndTranscribeReal:
         cli._voice_stop_and_transcribe()
         assert cli._pending_input.empty()
 
+    @patch("cli._cprint")
+    @patch("hermes_cli.config.load_config", return_value={"voice": {"beep_enabled": False}})
+    @patch("tools.voice_mode.play_beep")
+    def test_no_speech_detected_skips_beep_when_disabled(self, mock_beep, _cfg, _cp):
+        recorder = MagicMock()
+        recorder.stop.return_value = None
+        cli = _make_voice_cli(_voice_recording=True, _voice_recorder=recorder)
+        cli._voice_stop_and_transcribe()
+        mock_beep.assert_not_called()
+
     @patch("cli._cprint")
     @patch("cli.os.unlink")
     @patch("cli.os.path.isfile", return_value=True)
diff --git a/tests/tui_gateway/test_protocol.py b/tests/tui_gateway/test_protocol.py
index eb51cccfec..da154cc168 100644
--- a/tests/tui_gateway/test_protocol.py
+++ b/tests/tui_gateway/test_protocol.py
@@ -4,6 +4,7 @@ import io
 import json
 import sys
 import threading
+import time
 from unittest.mock import MagicMock, patch
 
 import pytest
@@ -120,7 +121,9 @@ def test_block_and_respond(capture):
 
     rid = next(iter(server._pending))
     server._answers[rid] = "my_answer"
-    server._pending[rid].set()
+    # _pending values are (sid, Event) tuples — unpack to set the Event
+    _, ev = server._pending[rid]
+    ev.set()
 
     threading.Event().wait(0.1)
     assert result[0] == "my_answer"
@@ -128,7 +131,8 @@ def test_block_and_respond(capture):
 
 def test_clear_pending(server):
     ev = threading.Event()
-    server._pending["r1"] = ev
+    # _pending values are (sid, Event) tuples
+    server._pending["r1"] = ("sid-x", ev)
     server._clear_pending()
 
     assert ev.is_set()
@@ -429,3 +433,81 @@ def test_command_dispatch_returns_skill_payload(server):
     assert result["type"] == "skill"
     assert result["message"] == fake_msg
     assert result["name"] == "hermes-agent-dev"
+
+
+# ── dispatch(): pool routing for long handlers (#12546) ──────────────
+
+
+def test_dispatch_runs_short_handlers_inline(server):
+    """Non-long handlers return their response synchronously from dispatch()."""
+    server._methods["fast.ping"] = lambda rid, params: server._ok(rid, {"pong": True})
+
+    resp = server.dispatch({"id": "r1", "method": "fast.ping", "params": {}})
+
+    assert resp == {"jsonrpc": "2.0", "id": "r1", "result": {"pong": True}}
+
+
+def test_dispatch_offloads_long_handlers_and_emits_via_stdout(capture):
+    """Long handlers run on the pool and write their response via write_json."""
+    server, buf = capture
+    server._methods["slash.exec"] = lambda rid, params: server._ok(rid, {"output": "hi"})
+
+    resp = server.dispatch({"id": "r2", "method": "slash.exec", "params": {}})
+    assert resp is None
+
+    for _ in range(50):
+        if buf.getvalue():
+            break
+        time.sleep(0.01)
+
+    written = json.loads(buf.getvalue())
+    assert written == {"jsonrpc": "2.0", "id": "r2", "result": {"output": "hi"}}
+
+
+def test_dispatch_long_handler_does_not_block_fast_handler(server):
+    """A slow long handler must not prevent a concurrent fast handler from completing."""
+    released = threading.Event()
+    server._methods["slash.exec"] = lambda rid, params: (released.wait(timeout=5), server._ok(rid, {"done": True}))[1]
+    server._methods["fast.ping"] = lambda rid, params: server._ok(rid, {"pong": True})
+
+    t0 = time.monotonic()
+    assert server.dispatch({"id": "slow", "method": "slash.exec", "params": {}}) is None
+
+    fast_resp = server.dispatch({"id": "fast", "method": "fast.ping", "params": {}})
+    fast_elapsed = time.monotonic() - t0
+
+    assert fast_resp["result"] == {"pong": True}
+    assert fast_elapsed < 0.5, f"fast handler blocked for {fast_elapsed:.2f}s behind slow handler"
+
+    released.set()
+
+
+def test_dispatch_long_handler_exception_produces_error_response(capture):
+    """An exception inside a pool-dispatched handler still yields a JSON-RPC error."""
+    server, buf = capture
+
+    def boom(rid, params):
+        raise RuntimeError("kaboom")
+
+    server._methods["slash.exec"] = boom
+
+    server.dispatch({"id": "r3", "method": "slash.exec", "params": {}})
+
+    for _ in range(50):
+        if buf.getvalue():
+            break
+        time.sleep(0.01)
+
+    written = json.loads(buf.getvalue())
+    assert written["id"] == "r3"
+    assert written["error"]["code"] == -32000
+    assert "kaboom" in written["error"]["message"]
+
+
+def test_dispatch_unknown_long_method_still_goes_inline(server):
+    """Method name not in _LONG_HANDLERS takes the sync path even if handler is slow."""
+    server._methods["some.method"] = lambda rid, params: server._ok(rid, {"ok": True})
+
+    resp = server.dispatch({"id": "r4", "method": "some.method", "params": {}})
+
+    assert resp["result"] == {"ok": True}
diff --git a/tools/browser_camofox.py b/tools/browser_camofox.py
index 88f486f196..e1233859ae 100644
--- a/tools/browser_camofox.py
+++ b/tools/browser_camofox.py
@@ -543,11 +543,13 @@ def camofox_vision(question: str, annotate: bool = False,
         )
 
         try:
-            from hermes_cli.config import load_config
             _cfg = load_config()
-            _vision_timeout = int(_cfg.get("auxiliary", {}).get("vision", {}).get("timeout", 120))
+            _vision_cfg = _cfg.get("auxiliary", {}).get("vision", {})
+            _vision_timeout = float(_vision_cfg.get("timeout", 120))
+            _vision_temperature = float(_vision_cfg.get("temperature", 0.1))
         except Exception:
-            _vision_timeout = 120
+            _vision_timeout = 120.0
+            _vision_temperature = 0.1
 
         response = call_llm(
             messages=[{
@@ -563,6 +565,7 @@ def camofox_vision(question: str, annotate: bool = False,
                 ],
             }],
             task="vision",
+            temperature=_vision_temperature,
             timeout=_vision_timeout,
         )
         analysis = (response.choices[0].message.content or "").strip() if response.choices else ""
diff --git a/tools/browser_cdp_tool.py b/tools/browser_cdp_tool.py
new file mode 100644
index 0000000000..7817b9c35a
--- /dev/null
+++ b/tools/browser_cdp_tool.py
@@ -0,0 +1,416 @@
+#!/usr/bin/env python3
+"""
+Raw Chrome DevTools Protocol (CDP) passthrough tool.
+
+Exposes a single tool, ``browser_cdp``, that sends arbitrary CDP commands to
+the browser's DevTools WebSocket endpoint.  Works when a CDP URL is
+configured — either via ``/browser connect`` (sets ``BROWSER_CDP_URL``) or
+``browser.cdp_url`` in ``config.yaml`` — or when a CDP-backed cloud provider
+session is active.
+
+This is the escape hatch for browser operations not covered by the main
+browser tool surface (``browser_navigate``, ``browser_click``,
+``browser_console``, etc.) — handling native dialogs, iframe-scoped
+evaluation, cookie/network control, low-level tab management, etc.
+
+Method reference: https://chromedevtools.github.io/devtools-protocol/
+"""
+from __future__ import annotations
+
+import asyncio
+import json
+import logging
+import os
+from typing import Any, Dict, Optional
+
+from tools.registry import registry, tool_error
+
+logger = logging.getLogger(__name__)
+
+CDP_DOCS_URL = "https://chromedevtools.github.io/devtools-protocol/"
+
+# ``websockets`` is a transitive dependency of hermes-agent (via fal_client
+# and firecrawl-py) and is already imported by gateway/platforms/feishu.py.
+# Wrap the import so a clean error surfaces if the package is ever absent.
+try:
+    import websockets
+    from websockets.exceptions import WebSocketException
+
+    _WS_AVAILABLE = True
+except ImportError:
+    websockets = None  # type: ignore[assignment]
+    WebSocketException = Exception  # type: ignore[assignment,misc]
+    _WS_AVAILABLE = False
+
+
+# ---------------------------------------------------------------------------
+# Async-from-sync bridge (matches the pattern in homeassistant_tool.py)
+# ---------------------------------------------------------------------------
+
+
+def _run_async(coro):
+    """Run an async coroutine from a sync handler, safe inside or outside a loop."""
+    try:
+        loop = asyncio.get_running_loop()
+    except RuntimeError:
+        loop = None
+
+    if loop and loop.is_running():
+        import concurrent.futures
+
+        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
+            future = pool.submit(asyncio.run, coro)
+            return future.result()
+    return asyncio.run(coro)
+
+
+# ---------------------------------------------------------------------------
+# Endpoint resolution
+# ---------------------------------------------------------------------------
+
+
+def _resolve_cdp_endpoint() -> str:
+    """Return the normalized CDP WebSocket URL, or empty string if unavailable.
+
+    Delegates to ``tools.browser_tool._get_cdp_override`` so precedence stays
+    consistent with the rest of the browser tool surface:
+
+    1. ``BROWSER_CDP_URL`` env var (live override from ``/browser connect``)
+    2. ``browser.cdp_url`` in ``config.yaml``
+    """
+    try:
+        from tools.browser_tool import _get_cdp_override  # type: ignore[import-not-found]
+
+        return (_get_cdp_override() or "").strip()
+    except Exception as exc:  # pragma: no cover — defensive
+        logger.debug("browser_cdp: failed to resolve CDP endpoint: %s", exc)
+        return ""
+
+
+# ---------------------------------------------------------------------------
+# Core CDP call
+# ---------------------------------------------------------------------------
+
+
+async def _cdp_call(
+    ws_url: str,
+    method: str,
+    params: Dict[str, Any],
+    target_id: Optional[str],
+    timeout: float,
+) -> Dict[str, Any]:
+    """Make a single CDP call, optionally attaching to a target first.
+
+    When ``target_id`` is provided, we call ``Target.attachToTarget`` with
+    ``flatten=True`` to multiplex a page-level session over the same
+    browser-level WebSocket, then send ``method`` with that ``sessionId``.
+    When ``target_id`` is None, ``method`` is sent at browser level — which
+    works for ``Target.*``, ``Browser.*``, ``Storage.*`` and a few other
+    globally-scoped domains.
+    """
+    assert websockets is not None  # guarded by _WS_AVAILABLE at call-site
+
+    async with websockets.connect(
+        ws_url,
+        max_size=None,  # CDP responses (e.g. DOM.getDocument) can be large
+        open_timeout=timeout,
+        close_timeout=5,
+        ping_interval=None,  # CDP server doesn't expect pings
+    ) as ws:
+        next_id = 1
+        session_id: Optional[str] = None
+
+        # --- Step 1: attach to target if requested ---
+        if target_id:
+            attach_id = next_id
+            next_id += 1
+            await ws.send(
+                json.dumps(
+                    {
+                        "id": attach_id,
+                        "method": "Target.attachToTarget",
+                        "params": {"targetId": target_id, "flatten": True},
+                    }
+                )
+            )
+            deadline = asyncio.get_event_loop().time() + timeout
+            while True:
+                remaining = deadline - asyncio.get_event_loop().time()
+                if remaining <= 0:
+                    raise TimeoutError(
+                        f"Timed out attaching to target {target_id}"
+                    )
+                raw = await asyncio.wait_for(ws.recv(), timeout=remaining)
+                msg = json.loads(raw)
+                if msg.get("id") == attach_id:
+                    if "error" in msg:
+                        raise RuntimeError(
+                            f"Target.attachToTarget failed: {msg['error']}"
+                        )
+                    session_id = msg.get("result", {}).get("sessionId")
+                    if not session_id:
+                        raise RuntimeError(
+                            "Target.attachToTarget did not return a sessionId"
+                        )
+                    break
+                # Ignore events (messages without "id") while waiting
+
+        # --- Step 2: dispatch the real method ---
+        call_id = next_id
+        next_id += 1
+        req: Dict[str, Any] = {
+            "id": call_id,
+            "method": method,
+            "params": params or {},
+        }
+        if session_id:
+            req["sessionId"] = session_id
+        await ws.send(json.dumps(req))
+
+        deadline = asyncio.get_event_loop().time() + timeout
+        while True:
+            remaining = deadline - asyncio.get_event_loop().time()
+            if remaining <= 0:
+                raise TimeoutError(
+                    f"Timed out waiting for response to {method}"
+                )
+            raw = await asyncio.wait_for(ws.recv(), timeout=remaining)
+            msg = json.loads(raw)
+            if msg.get("id") == call_id:
+                if "error" in msg:
+                    raise RuntimeError(f"CDP error: {msg['error']}")
+                return msg.get("result", {})
+            # Ignore events / out-of-order responses
+
+
+# ---------------------------------------------------------------------------
+# Public tool function
+# ---------------------------------------------------------------------------
+
+
+def browser_cdp(
+    method: str,
+    params: Optional[Dict[str, Any]] = None,
+    target_id: Optional[str] = None,
+    timeout: float = 30.0,
+    task_id: Optional[str] = None,
+) -> str:
+    """Send a raw CDP command.  See ``CDP_DOCS_URL`` for method documentation.
+
+    Args:
+        method: CDP method name, e.g. ``"Target.getTargets"``.
+        params: Method-specific parameters; defaults to ``{}``.
+        target_id: Optional target/tab ID for page-level methods.  When set,
+            we first attach to the target (``flatten=True``) and send
+            ``method`` with the resulting ``sessionId``.
+        timeout: Seconds to wait for the call to complete.
+        task_id: Unused (tool is stateless) — accepted for uniformity with
+            other browser tools.
+
+    Returns:
+        JSON string ``{"success": True, "method": ..., "result": {...}}`` on
+        success, or ``{"error": "..."}`` on failure.
+    """
+    del task_id  # unused — stateless
+
+    if not method or not isinstance(method, str):
+        return tool_error(
+            "'method' is required (e.g. 'Target.getTargets')",
+            cdp_docs=CDP_DOCS_URL,
+        )
+
+    if not _WS_AVAILABLE:
+        return tool_error(
+            "The 'websockets' Python package is required but not installed. "
+            "Install it with: pip install websockets"
+        )
+
+    endpoint = _resolve_cdp_endpoint()
+    if not endpoint:
+        return tool_error(
+            "No CDP endpoint is available. Run '/browser connect' to attach "
+            "to a running Chrome, or set 'browser.cdp_url' in config.yaml. "
+            "The Camofox backend is REST-only and does not expose CDP.",
+            cdp_docs=CDP_DOCS_URL,
+        )
+
+    if not endpoint.startswith(("ws://", "wss://")):
+        return tool_error(
+            f"CDP endpoint is not a WebSocket URL: {endpoint!r}. "
+            "Expected ws://... or wss://... — the /browser connect "
+            "resolver should have rewritten this. Check that Chrome is "
+            "actually listening on the debug port."
+        )
+
+    call_params: Dict[str, Any] = params or {}
+    if not isinstance(call_params, dict):
+        return tool_error(
+            f"'params' must be an object/dict, got {type(call_params).__name__}"
+        )
+
+    try:
+        safe_timeout = float(timeout) if timeout else 30.0
+    except (TypeError, ValueError):
+        safe_timeout = 30.0
+    safe_timeout = max(1.0, min(safe_timeout, 300.0))
+
+    try:
+        result = _run_async(
+            _cdp_call(endpoint, method, call_params, target_id, safe_timeout)
+        )
+    except asyncio.TimeoutError as exc:
+        return tool_error(
+            f"CDP call timed out after {safe_timeout}s: {exc}",
+            method=method,
+        )
+    except TimeoutError as exc:
+        return tool_error(str(exc), method=method)
+    except RuntimeError as exc:
+        return tool_error(str(exc), method=method)
+    except WebSocketException as exc:
+        return tool_error(
+            f"WebSocket error talking to CDP at {endpoint}: {exc}. The "
+            "browser may have disconnected — try '/browser connect' again.",
+            method=method,
+        )
+    except Exception as exc:  # pragma: no cover — unexpected
+        logger.exception("browser_cdp unexpected error")
+        return tool_error(
+            f"Unexpected error: {type(exc).__name__}: {exc}",
+            method=method,
+        )
+
+    payload: Dict[str, Any] = {
+        "success": True,
+        "method": method,
+        "result": result,
+    }
+    if target_id:
+        payload["target_id"] = target_id
+    return json.dumps(payload, ensure_ascii=False)
+
+
+# ---------------------------------------------------------------------------
+# Registry
+# ---------------------------------------------------------------------------
+
+
+BROWSER_CDP_SCHEMA: Dict[str, Any] = {
+    "name": "browser_cdp",
+    "description": (
+        "Send a raw Chrome DevTools Protocol (CDP) command. Escape hatch for "
+        "browser operations not covered by browser_navigate, browser_click, "
+        "browser_console, etc.\n\n"
+        "**Requires a reachable CDP endpoint.** Available when the user has "
+        "run '/browser connect' to attach to a running Chrome, or when "
+        "'browser.cdp_url' is set in config.yaml. Not currently wired up for "
+        "cloud backends (Browserbase, Browser Use, Firecrawl) — those expose "
+        "CDP per session but live-session routing is a follow-up. Camofox is "
+        "REST-only and will never support CDP. If the tool is in your toolset "
+        "at all, a CDP endpoint is already reachable.\n\n"
+        f"**CDP method reference:** {CDP_DOCS_URL} — use web_extract on a "
+        "method's URL (e.g. '/tot/Page/#method-handleJavaScriptDialog') "
+        "to look up parameters and return shape.\n\n"
+        "**Common patterns:**\n"
+        "- List tabs: method='Target.getTargets', params={}\n"
+        "- Handle a native JS dialog: method='Page.handleJavaScriptDialog', "
+        "params={'accept': true, 'promptText': ''}, target_id=<tabId>\n"
+        "- Get all cookies: method='Network.getAllCookies', params={}\n"
+        "- Eval in a specific tab: method='Runtime.evaluate', "
+        "params={'expression': '...', 'returnByValue': true}, "
+        "target_id=<tabId>\n"
+        "- Set viewport for a tab: method='Emulation.setDeviceMetricsOverride', "
+        "params={'width': 1280, 'height': 720, 'deviceScaleFactor': 1, "
+        "'mobile': false}, target_id=<tabId>\n\n"
+        "**Usage rules:**\n"
+        "- Browser-level methods (Target.*, Browser.*, Storage.*): omit "
+        "target_id.\n"
+        "- Page-level methods (Page.*, Runtime.*, DOM.*, Emulation.*, "
+        "Network.* scoped to a tab): pass target_id from Target.getTargets.\n"
+        "- Each call is independent — sessions and event subscriptions do "
+        "not persist between calls. For stateful workflows, prefer the "
+        "dedicated browser tools."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "method": {
+                "type": "string",
+                "description": (
+                    "CDP method name, e.g. 'Target.getTargets', "
+                    "'Runtime.evaluate', 'Page.handleJavaScriptDialog'."
+                ),
+            },
+            "params": {
+                "type": "object",
+                "description": (
+                    "Method-specific parameters as a JSON object. Omit or "
+                    "pass {} for methods that take no parameters."
+                ),
+                "additionalProperties": True,
+            },
+            "target_id": {
+                "type": "string",
+                "description": (
+                    "Optional. Target/tab ID from Target.getTargets result "
+                    "(each entry's 'targetId'). Required for page-level "
+                    "methods; must be omitted for browser-level methods."
+                ),
+            },
+            "timeout": {
+                "type": "number",
+                "description": (
+                    "Timeout in seconds (default 30, max 300)."
+                ),
+                "default": 30,
+            },
+        },
+        "required": ["method"],
+    },
+}
+
+
+def _browser_cdp_check() -> bool:
+    """Availability check for browser_cdp.
+
+    The tool is only offered when the Python side can actually reach a CDP
+    endpoint right now — meaning a static URL is set via ``/browser connect``
+    (``BROWSER_CDP_URL``) or ``browser.cdp_url`` in ``config.yaml``.
+
+    Backends that do *not* currently expose CDP to us — Camofox (REST-only),
+    the default local agent-browser mode (Playwright hides its internal CDP
+    port), and cloud providers whose per-session ``cdp_url`` is not yet
+    surfaced — are gated out so the model doesn't see a tool that would
+    reliably fail.  Cloud-provider CDP routing is a follow-up.
+
+    Kept in a thin wrapper so the registration statement stays at module top
+    level (the tool-discovery AST scan only picks up top-level
+    ``registry.register(...)`` calls).
+    """
+    try:
+        from tools.browser_tool import (  # type: ignore[import-not-found]
+            _get_cdp_override,
+            check_browser_requirements,
+        )
+    except ImportError as exc:  # pragma: no cover — defensive
+        logger.debug("browser_cdp check: browser_tool import failed: %s", exc)
+        return False
+    if not check_browser_requirements():
+        return False
+    return bool(_get_cdp_override())
+
+
+registry.register(
+    name="browser_cdp",
+    toolset="browser",
+    schema=BROWSER_CDP_SCHEMA,
+    handler=lambda args, **kw: browser_cdp(
+        method=args.get("method", ""),
+        params=args.get("params"),
+        target_id=args.get("target_id"),
+        timeout=args.get("timeout", 30.0),
+        task_id=kw.get("task_id"),
+    ),
+    check_fn=_browser_cdp_check,
+    emoji="🧪",
+)
diff --git a/tools/browser_tool.py b/tools/browser_tool.py
index f8a3ff09ac..b19b220d1b 100644
--- a/tools/browser_tool.py
+++ b/tools/browser_tool.py
@@ -582,6 +582,8 @@ def _reap_orphaned_browser_sessions():
     socket_dirs = glob.glob(pattern)
     # Also pick up CDP sessions
     socket_dirs += glob.glob(os.path.join(tmpdir, "agent-browser-cdp_*"))
+    # Also pick up cloud-provider sessions (browser-use/browserbase/firecrawl)
+    socket_dirs += glob.glob(os.path.join(tmpdir, "agent-browser-hermes_*"))
 
     if not socket_dirs:
         return
@@ -1909,7 +1911,6 @@ def _maybe_start_recording(task_id: str):
         recordings_dir.mkdir(parents=True, exist_ok=True)
         _cleanup_old_recordings(max_age_hours=72)
         
-        import time
         timestamp = time.strftime("%Y%m%d_%H%M%S")
         recording_path = recordings_dir / f"session_{timestamp}_{task_id[:16]}.webm"
         
@@ -2025,8 +2026,6 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
 
     import base64
     import uuid as uuid_mod
-    from pathlib import Path
-    
     effective_task_id = task_id or "default"
     
     # Save screenshot to persistent location so it can be shared with users
@@ -2098,16 +2097,21 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
         logger.debug("browser_vision: analysing screenshot (%d bytes)",
                      len(_screenshot_bytes))
 
-        # Read vision timeout from config (auxiliary.vision.timeout), default 120s.
+        # Read vision timeout/temperature from config (auxiliary.vision.*).
         # Local vision models (llama.cpp, ollama) can take well over 30s for
-        # screenshot analysis, so the default must be generous.
+        # screenshot analysis, so the default timeout must be generous.
         vision_timeout = 120.0
+        vision_temperature = 0.1
         try:
             from hermes_cli.config import load_config
             _cfg = load_config()
-            _vt = _cfg.get("auxiliary", {}).get("vision", {}).get("timeout")
+            _vision_cfg = _cfg.get("auxiliary", {}).get("vision", {})
+            _vt = _vision_cfg.get("timeout")
             if _vt is not None:
                 vision_timeout = float(_vt)
+            _vtemp = _vision_cfg.get("temperature")
+            if _vtemp is not None:
+                vision_temperature = float(_vtemp)
         except Exception:
             pass
 
@@ -2123,7 +2127,7 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
                 }
             ],
             "max_tokens": 2000,
-            "temperature": 0.1,
+            "temperature": vision_temperature,
             "timeout": vision_timeout,
         }
         if vision_model:
@@ -2203,7 +2207,6 @@ def _cleanup_old_screenshots(screenshots_dir, max_age_hours=24):
 
 def _cleanup_old_recordings(max_age_hours=72):
     """Remove browser recordings older than max_age_hours to prevent disk bloat."""
-    import time
     try:
         hermes_home = get_hermes_home()
         recordings_dir = hermes_home / "browser_recordings"
diff --git a/tools/checkpoint_manager.py b/tools/checkpoint_manager.py
index 277a23e449..a3beee2a79 100644
--- a/tools/checkpoint_manager.py
+++ b/tools/checkpoint_manager.py
@@ -389,7 +389,6 @@ class CheckpointManager:
     @staticmethod
     def _parse_shortstat(stat_line: str, entry: Dict) -> None:
         """Parse git --shortstat output into entry dict."""
-        import re
         m = re.search(r'(\d+) file', stat_line)
         if m:
             entry["files_changed"] = int(m.group(1))
diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py
index 22b132f2c4..f250856139 100644
--- a/tools/delegate_tool.py
+++ b/tools/delegate_tool.py
@@ -16,33 +16,50 @@ The parent's context only sees the delegation call and the summary result,
 never the child's intermediate tool calls or reasoning.
 """
 
+import enum
 import json
 import logging
+
 logger = logging.getLogger(__name__)
 import os
 import threading
 import time
-from concurrent.futures import ThreadPoolExecutor, as_completed
+from concurrent.futures import (
+    ThreadPoolExecutor,
+    TimeoutError as FuturesTimeoutError,
+    as_completed,
+)
 from typing import Any, Dict, List, Optional
 
 from toolsets import TOOLSETS
+from tools import file_state
+from utils import base_url_hostname
 
 
 # Tools that children must never have access to
-DELEGATE_BLOCKED_TOOLS = frozenset([
-    "delegate_task",   # no recursive delegation
-    "clarify",         # no user interaction
-    "memory",          # no writes to shared MEMORY.md
-    "send_message",    # no cross-platform side effects
-    "execute_code",    # children should reason step-by-step, not write scripts
-])
+DELEGATE_BLOCKED_TOOLS = frozenset(
+    [
+        "delegate_task",  # no recursive delegation
+        "clarify",  # no user interaction
+        "memory",  # no writes to shared MEMORY.md
+        "send_message",  # no cross-platform side effects
+        "execute_code",  # children should reason step-by-step, not write scripts
+    ]
+)
 
 # Build a description fragment listing toolsets available for subagents.
 # Excludes toolsets where ALL tools are blocked, composite/platform toolsets
 # (hermes-* prefixed), and scenario toolsets.
+#
+# NOTE: "delegation" is in this exclusion set so the subagent-facing
+# capability hint string (_TOOLSET_LIST_STR) doesn't advertise it as a
+# toolset to request explicitly — the correct mechanism for nested
+# delegation is role='orchestrator', which re-adds "delegation" in
+# _build_child_agent regardless of this exclusion.
 _EXCLUDED_TOOLSET_NAMES = frozenset({"debugging", "safe", "delegation", "moa", "rl"})
 _SUBAGENT_TOOLSETS = sorted(
-    name for name, defn in TOOLSETS.items()
+    name
+    for name, defn in TOOLSETS.items()
     if name not in _EXCLUDED_TOOLSET_NAMES
     and not name.startswith("hermes-")
     and not all(t in DELEGATE_BLOCKED_TOOLS for t in defn.get("tools", []))
@@ -50,13 +67,208 @@ _SUBAGENT_TOOLSETS = sorted(
 _TOOLSET_LIST_STR = ", ".join(f"'{n}'" for n in _SUBAGENT_TOOLSETS)
 
 _DEFAULT_MAX_CONCURRENT_CHILDREN = 3
-MAX_DEPTH = 2  # parent (0) -> child (1) -> grandchild rejected (2)
+MAX_DEPTH = 1  # flat by default: parent (0) -> child (1); grandchild rejected unless max_spawn_depth raised.
+# Configurable depth cap consulted by _get_max_spawn_depth; MAX_DEPTH
+# stays as the default fallback and is still the symbol tests import.
+_MIN_SPAWN_DEPTH = 1
+_MAX_SPAWN_DEPTH_CAP = 3
+
+
+# ---------------------------------------------------------------------------
+# Runtime state: pause flag + active subagent registry
+#
+# Consumed by the TUI observability layer (overlay/control surface) and the
+# gateway RPCs `delegation.pause`, `delegation.status`, `subagent.interrupt`.
+# Kept module-level so they span every delegate_task invocation in the
+# process, including nested orchestrator -> worker chains.
+# ---------------------------------------------------------------------------
+
+_spawn_pause_lock = threading.Lock()
+_spawn_paused: bool = False
+
+_active_subagents_lock = threading.Lock()
+# subagent_id -> mutable record tracking the live child agent.  Stays only
+# for the lifetime of the run; _run_single_child is the owner.
+_active_subagents: Dict[str, Dict[str, Any]] = {}
+
+
+def set_spawn_paused(paused: bool) -> bool:
+    """Globally block/unblock new delegate_task spawns.
+
+    Active children keep running; only NEW calls to delegate_task fail fast
+    with a "spawning paused" error until unblocked.  Returns the new state.
+    """
+    global _spawn_paused
+    with _spawn_pause_lock:
+        _spawn_paused = bool(paused)
+        return _spawn_paused
+
+
+def is_spawn_paused() -> bool:
+    with _spawn_pause_lock:
+        return _spawn_paused
+
+
+def _register_subagent(record: Dict[str, Any]) -> None:
+    sid = record.get("subagent_id")
+    if not sid:
+        return
+    with _active_subagents_lock:
+        _active_subagents[sid] = record
+
+
+def _unregister_subagent(subagent_id: str) -> None:
+    with _active_subagents_lock:
+        _active_subagents.pop(subagent_id, None)
+
+
+def interrupt_subagent(subagent_id: str) -> bool:
+    """Request that a single running subagent stop at its next iteration boundary.
+
+    Does not hard-kill the worker thread (Python can't); sets the child's
+    interrupt flag which propagates to in-flight tools and recurses into
+    grandchildren via AIAgent.interrupt().  Returns True if a matching
+    subagent was found.
+    """
+    with _active_subagents_lock:
+        record = _active_subagents.get(subagent_id)
+    if not record:
+        return False
+    agent = record.get("agent")
+    if agent is None:
+        return False
+    try:
+        agent.interrupt(f"Interrupted via TUI ({subagent_id})")
+    except Exception as exc:
+        logger.debug("interrupt_subagent(%s) failed: %s", subagent_id, exc)
+        return False
+    return True
+
+
+def list_active_subagents() -> List[Dict[str, Any]]:
+    """Snapshot of the currently running subagent tree.
+
+    Each record: {subagent_id, parent_id, depth, goal, model, started_at,
+    tool_count, status}.  Safe to call from any thread — returns a copy.
+    """
+    with _active_subagents_lock:
+        return [
+            {k: v for k, v in r.items() if k != "agent"}
+            for r in _active_subagents.values()
+        ]
+
+
+def _extract_output_tail(
+    result: Dict[str, Any],
+    *,
+    max_entries: int = 12,
+    max_chars: int = 8000,
+) -> List[Dict[str, Any]]:
+    """Pull the last N tool-call results from a child's conversation.
+
+    Powers the overlay's "Output" section — the cc-swarm-parity feature.
+    We reuse the same messages list the trajectory saver walks, taking
+    only the tail to keep event payloads small.  Each entry is
+    ``{tool, preview, is_error}``.
+    """
+    messages = result.get("messages") if isinstance(result, dict) else None
+    if not isinstance(messages, list):
+        return []
+
+    # Walk in reverse to build a tail; stop when we have enough.
+    tail: List[Dict[str, Any]] = []
+    pending_call_by_id: Dict[str, str] = {}
+
+    # First pass (forward): build tool_call_id -> tool_name map
+    for msg in messages:
+        if not isinstance(msg, dict):
+            continue
+        if msg.get("role") == "assistant":
+            for tc in msg.get("tool_calls") or []:
+                tc_id = tc.get("id")
+                fn = tc.get("function") or {}
+                if tc_id:
+                    pending_call_by_id[tc_id] = str(fn.get("name") or "tool")
+
+    # Second pass (reverse): pick tool results, newest first
+    for msg in reversed(messages):
+        if len(tail) >= max_entries:
+            break
+        if not isinstance(msg, dict) or msg.get("role") != "tool":
+            continue
+        content = msg.get("content") or ""
+        if not isinstance(content, str):
+            content = str(content)
+        is_error = _looks_like_error_output(content)
+        tool_name = pending_call_by_id.get(msg.get("tool_call_id") or "", "tool")
+        # Preserve line structure so the overlay's wrapped scroll region can
+        # show real output rather than a whitespace-collapsed blob. We still
+        # cap the payload size to keep events bounded.
+        preview = content[:max_chars]
+        tail.append({"tool": tool_name, "preview": preview, "is_error": is_error})
+
+    tail.reverse()  # restore chronological order for display
+    return tail
+
+
+def _looks_like_error_output(content: str) -> bool:
+    """Conservative stderr/error detector for tool-result previews.
+
+    The old heuristic flagged any preview containing the substring "error",
+    which painted perfectly normal terminal/json output red.  We now only
+    mark output as an error when there is stronger evidence:
+      - structured JSON with an ``error`` key
+      - structured JSON with ``status`` of error/failed
+      - first line starts with a classic error marker
+    """
+    if not content:
+        return False
+
+    head = content.lstrip()
+    if head.startswith("{") or head.startswith("["):
+        try:
+            parsed = json.loads(content)
+            if isinstance(parsed, dict):
+                if parsed.get("error"):
+                    return True
+                status = str(parsed.get("status") or "").strip().lower()
+                if status in {"error", "failed", "failure", "timeout"}:
+                    return True
+        except Exception:
+            pass
+
+    first = content.splitlines()[0].strip().lower() if content.splitlines() else ""
+    return (
+        first.startswith("error:")
+        or first.startswith("failed:")
+        or first.startswith("traceback ")
+        or first.startswith("exception:")
+    )
+
+
+def _normalize_role(r: Optional[str]) -> str:
+    """Normalise a caller-provided role to 'leaf' or 'orchestrator'.
+
+    None/empty -> 'leaf'.  Unknown strings coerce to 'leaf' with a
+    warning log (matches the silent-degrade pattern of
+    _get_orchestrator_enabled).  _build_child_agent adds a second
+    degrade layer for depth/kill-switch bounds.
+    """
+    if r is None or not r:
+        return "leaf"
+    r_norm = str(r).strip().lower()
+    if r_norm in ("leaf", "orchestrator"):
+        return r_norm
+    logger.warning("Unknown delegate_task role=%r, coercing to 'leaf'", r)
+    return "leaf"
 
 
 def _get_max_concurrent_children() -> int:
     """Read delegation.max_concurrent_children from config, falling back to
     DELEGATION_MAX_CONCURRENT_CHILDREN env var, then the default (3).
 
+    Users can raise this as high as they want; only the floor (1) is enforced.
+
     Uses the same ``_load_config()`` path that the rest of ``delegate_task``
     uses, keeping config priority consistent (config.yaml > env > default).
     """
@@ -68,20 +280,148 @@ def _get_max_concurrent_children() -> int:
         except (TypeError, ValueError):
             logger.warning(
                 "delegation.max_concurrent_children=%r is not a valid integer; "
-                "using default %d", val, _DEFAULT_MAX_CONCURRENT_CHILDREN,
+                "using default %d",
+                val,
+                _DEFAULT_MAX_CONCURRENT_CHILDREN,
             )
+            return _DEFAULT_MAX_CONCURRENT_CHILDREN
     env_val = os.getenv("DELEGATION_MAX_CONCURRENT_CHILDREN")
     if env_val:
         try:
             return max(1, int(env_val))
         except (TypeError, ValueError):
-            pass
+            return _DEFAULT_MAX_CONCURRENT_CHILDREN
     return _DEFAULT_MAX_CONCURRENT_CHILDREN
+
+
+def _get_child_timeout() -> float:
+    """Read delegation.child_timeout_seconds from config.
+
+    Returns the number of seconds a single child agent is allowed to run
+    before being considered stuck.  Default: 300 s (5 minutes).
+    """
+    cfg = _load_config()
+    val = cfg.get("child_timeout_seconds")
+    if val is not None:
+        try:
+            return max(30.0, float(val))
+        except (TypeError, ValueError):
+            logger.warning(
+                "delegation.child_timeout_seconds=%r is not a valid number; "
+                "using default %d",
+                val,
+                DEFAULT_CHILD_TIMEOUT,
+            )
+    env_val = os.getenv("DELEGATION_CHILD_TIMEOUT_SECONDS")
+    if env_val:
+        try:
+            return max(30.0, float(env_val))
+        except (TypeError, ValueError):
+            pass
+    return float(DEFAULT_CHILD_TIMEOUT)
+
+
+def _get_max_spawn_depth() -> int:
+    """Read delegation.max_spawn_depth from config, clamped to [1, 3].
+
+    depth 0 = parent agent.  max_spawn_depth = N means agents at depths
+    0..N-1 can spawn; depth N is the leaf floor.  Default 1 is flat:
+    parent spawns children (depth 1), depth-1 children cannot spawn
+    (blocked by this guard AND, for leaf children, by the delegation
+    toolset strip in _strip_blocked_tools).
+
+    Raise to 2 or 3 to unlock nested orchestration. role="orchestrator"
+    removes the toolset strip for depth-1 children when
+    max_spawn_depth >= 2, enabling them to spawn their own workers.
+    """
+    cfg = _load_config()
+    val = cfg.get("max_spawn_depth")
+    if val is None:
+        return MAX_DEPTH
+    try:
+        ival = int(val)
+    except (TypeError, ValueError):
+        logger.warning(
+            "delegation.max_spawn_depth=%r is not a valid integer; " "using default %d",
+            val,
+            MAX_DEPTH,
+        )
+        return MAX_DEPTH
+    clamped = max(_MIN_SPAWN_DEPTH, min(_MAX_SPAWN_DEPTH_CAP, ival))
+    if clamped != ival:
+        logger.warning(
+            "delegation.max_spawn_depth=%d out of range [%d, %d]; " "clamping to %d",
+            ival,
+            _MIN_SPAWN_DEPTH,
+            _MAX_SPAWN_DEPTH_CAP,
+            clamped,
+        )
+    return clamped
+
+
+def _get_orchestrator_enabled() -> bool:
+    """Global kill switch for the orchestrator role.
+
+    When False, role="orchestrator" is silently forced to "leaf" in
+    _build_child_agent and the delegation toolset is stripped as before.
+    Lets an operator disable the feature without a code revert.
+    """
+    cfg = _load_config()
+    val = cfg.get("orchestrator_enabled", True)
+    if isinstance(val, bool):
+        return val
+    # Accept "true"/"false" strings from YAML that doesn't auto-coerce.
+    if isinstance(val, str):
+        return val.strip().lower() in ("true", "1", "yes", "on")
+    return True
+
+
 DEFAULT_MAX_ITERATIONS = 50
+DEFAULT_CHILD_TIMEOUT = 300  # seconds before a child agent is considered stuck
 _HEARTBEAT_INTERVAL = 30  # seconds between parent activity heartbeats during delegation
+_HEARTBEAT_STALE_CYCLES = (
+    5  # mark child stale after this many heartbeats with no iteration progress
+)
 DEFAULT_TOOLSETS = ["terminal", "file", "web"]
 
 
+# ---------------------------------------------------------------------------
+# Delegation progress event types
+# ---------------------------------------------------------------------------
+
+
+class DelegateEvent(str, enum.Enum):
+    """Formal event types emitted during delegation progress.
+
+    _build_child_progress_callback normalises incoming legacy strings
+    (``tool.started``, ``_thinking``, …) to these enum values via
+    ``_LEGACY_EVENT_MAP``.  External consumers (gateway SSE, ACP adapter,
+    CLI) still receive the legacy strings during the deprecation window.
+
+    TASK_SPAWNED / TASK_COMPLETED / TASK_FAILED are reserved for
+    future orchestrator lifecycle events and are not currently emitted.
+    """
+
+    TASK_SPAWNED = "delegate.task_spawned"
+    TASK_PROGRESS = "delegate.task_progress"
+    TASK_COMPLETED = "delegate.task_completed"
+    TASK_FAILED = "delegate.task_failed"
+    TASK_THINKING = "delegate.task_thinking"
+    TASK_TOOL_STARTED = "delegate.tool_started"
+    TASK_TOOL_COMPLETED = "delegate.tool_completed"
+
+
+# Legacy event strings → DelegateEvent mapping.
+# Incoming child-agent events use the old names; the callback normalises them.
+_LEGACY_EVENT_MAP: Dict[str, DelegateEvent] = {
+    "_thinking": DelegateEvent.TASK_THINKING,
+    "reasoning.available": DelegateEvent.TASK_THINKING,
+    "tool.started": DelegateEvent.TASK_TOOL_STARTED,
+    "tool.completed": DelegateEvent.TASK_TOOL_COMPLETED,
+    "subagent_progress": DelegateEvent.TASK_PROGRESS,
+}
+
+
 def check_delegate_requirements() -> bool:
     """Delegation has no external requirements -- always available."""
     return True
@@ -92,8 +432,18 @@ def _build_child_system_prompt(
     context: Optional[str] = None,
     *,
     workspace_path: Optional[str] = None,
+    role: str = "leaf",
+    max_spawn_depth: int = 2,
+    child_depth: int = 1,
 ) -> str:
-    """Build a focused system prompt for a child agent."""
+    """Build a focused system prompt for a child agent.
+
+    When role='orchestrator', appends a delegation-capability block
+    modeled on OpenClaw's buildSubagentSystemPrompt (canSpawn branch at
+    inspiration/openclaw/src/agents/subagent-system-prompt.ts:63-95).
+    The depth note is literal truth (grounded in the passed config) so
+    the LLM doesn't confabulate nesting capabilities that don't exist.
+    """
     parts = [
         "You are a focused subagent working on a specific delegated task.",
         "",
@@ -119,6 +469,37 @@ def _build_child_system_prompt(
         "Be thorough but concise -- your response is returned to the "
         "parent agent as a summary."
     )
+    if role == "orchestrator":
+        child_note = (
+            "Your own children MUST be leaves (cannot delegate further) "
+            "because they would be at the depth floor — you cannot pass "
+            "role='orchestrator' to your own delegate_task calls."
+            if child_depth + 1 >= max_spawn_depth
+            else "Your own children can themselves be orchestrators or leaves, "
+            "depending on the `role` you pass to delegate_task. Default is "
+            "'leaf'; pass role='orchestrator' explicitly when a child "
+            "needs to further decompose its work."
+        )
+        parts.append(
+            "\n## Subagent Spawning (Orchestrator Role)\n"
+            "You have access to the `delegate_task` tool and CAN spawn "
+            "your own subagents to parallelize independent work.\n\n"
+            "WHEN to delegate:\n"
+            "- The goal decomposes into 2+ independent subtasks that can "
+            "run in parallel (e.g. research A and B simultaneously).\n"
+            "- A subtask is reasoning-heavy and would flood your context "
+            "with intermediate data.\n\n"
+            "WHEN NOT to delegate:\n"
+            "- Single-step mechanical work — do it directly.\n"
+            "- Trivial tasks you can execute in one or two tool calls.\n"
+            "- Re-delegating your entire assigned goal to one worker "
+            "(that's just pass-through with no value added).\n\n"
+            "Coordinate your workers' results and synthesize them before "
+            "reporting back to your parent. You are responsible for the "
+            "final summary, not your workers.\n\n"
+            f"NOTE: You are at depth {child_depth}. The delegation tree "
+            f"is capped at max_spawn_depth={max_spawn_depth}. {child_note}"
+        )
     return "\n".join(parts)
 
 
@@ -131,7 +512,9 @@ def _resolve_workspace_hint(parent_agent) -> Optional[str]:
     """
     candidates = [
         os.getenv("TERMINAL_CWD"),
-        getattr(getattr(parent_agent, "_subdirectory_hints", None), "working_dir", None),
+        getattr(
+            getattr(parent_agent, "_subdirectory_hints", None), "working_dir", None
+        ),
         getattr(parent_agent, "terminal_cwd", None),
         getattr(parent_agent, "cwd", None),
     ]
@@ -150,23 +533,43 @@ def _resolve_workspace_hint(parent_agent) -> Optional[str]:
 def _strip_blocked_tools(toolsets: List[str]) -> List[str]:
     """Remove toolsets that contain only blocked tools."""
     blocked_toolset_names = {
-        "delegation", "clarify", "memory", "code_execution",
+        "delegation",
+        "clarify",
+        "memory",
+        "code_execution",
     }
     return [t for t in toolsets if t not in blocked_toolset_names]
 
 
-def _build_child_progress_callback(task_index: int, goal: str, parent_agent, task_count: int = 1) -> Optional[callable]:
+def _build_child_progress_callback(
+    task_index: int,
+    goal: str,
+    parent_agent,
+    task_count: int = 1,
+    *,
+    subagent_id: Optional[str] = None,
+    parent_id: Optional[str] = None,
+    depth: Optional[int] = None,
+    model: Optional[str] = None,
+    toolsets: Optional[List[str]] = None,
+) -> Optional[callable]:
     """Build a callback that relays child agent tool calls to the parent display.
 
     Two display paths:
       CLI:     prints tree-view lines above the parent's delegation spinner
       Gateway: batches tool names and relays to parent's progress callback
 
+    The identity kwargs (``subagent_id``, ``parent_id``, ``depth``, ``model``,
+    ``toolsets``) are threaded into every relayed event so the TUI can
+    reconstruct the live spawn tree and route per-branch controls (kill,
+    pause) back by ``subagent_id``.  All are optional for backward compat —
+    older callers that ignore them still produce a flat list on the TUI.
+
     Returns None if no display mechanism is available, in which case the
     child agent runs with no progress callback (identical to current behavior).
     """
-    spinner = getattr(parent_agent, '_delegate_spinner', None)
-    parent_cb = getattr(parent_agent, 'tool_progress_callback', None)
+    spinner = getattr(parent_agent, "_delegate_spinner", None)
+    parent_cb = getattr(parent_agent, "tool_progress_callback", None)
 
     if not spinner and not parent_cb:
         return None  # No display → no callback → zero behavior change
@@ -178,31 +581,49 @@ def _build_child_progress_callback(task_index: int, goal: str, parent_agent, tas
     # Gateway: batch tool names, flush periodically
     _BATCH_SIZE = 5
     _batch: List[str] = []
+    _tool_count = [0]  # per-subagent running counter (list for closure mutation)
 
-    def _relay(event_type: str, tool_name: str = None, preview: str = None, args=None, **kwargs):
+    def _identity_kwargs() -> Dict[str, Any]:
+        kw: Dict[str, Any] = {
+            "task_index": task_index,
+            "task_count": task_count,
+            "goal": goal_label,
+        }
+        if subagent_id is not None:
+            kw["subagent_id"] = subagent_id
+        if parent_id is not None:
+            kw["parent_id"] = parent_id
+        if depth is not None:
+            kw["depth"] = depth
+        if model is not None:
+            kw["model"] = model
+        if toolsets is not None:
+            kw["toolsets"] = list(toolsets)
+        kw["tool_count"] = _tool_count[0]
+        return kw
+
+    def _relay(
+        event_type: str, tool_name: str = None, preview: str = None, args=None, **kwargs
+    ):
         if not parent_cb:
             return
+        payload = _identity_kwargs()
+        payload.update(kwargs)  # caller overrides (e.g. status, duration_seconds)
         try:
-            parent_cb(
-                event_type,
-                tool_name,
-                preview,
-                args,
-                task_index=task_index,
-                task_count=task_count,
-                goal=goal_label,
-                **kwargs,
-            )
+            parent_cb(event_type, tool_name, preview, args, **payload)
         except Exception as e:
             logger.debug("Parent callback failed: %s", e)
 
-    def _callback(event_type: str, tool_name: str = None, preview: str = None, args=None, **kwargs):
-        # event_type is one of: "tool.started", "tool.completed",
-        # "reasoning.available", "_thinking", "subagent.*"
-
+    def _callback(
+        event_type, tool_name: str = None, preview: str = None, args=None, **kwargs
+    ):
+        # Lifecycle events emitted by the orchestrator itself — handled
+        # before enum normalisation since they are not part of DelegateEvent.
         if event_type == "subagent.start":
             if spinner and goal_label:
-                short = (goal_label[:55] + "...") if len(goal_label) > 55 else goal_label
+                short = (
+                    (goal_label[:55] + "...") if len(goal_label) > 55 else goal_label
+                )
                 try:
                     spinner.print_above(f" {prefix}├─ 🔀 {short}")
                 except Exception as e:
@@ -214,30 +635,75 @@ def _build_child_progress_callback(task_index: int, goal: str, parent_agent, tas
             _relay("subagent.complete", preview=preview, **kwargs)
             return
 
-        # "_thinking" / reasoning events
-        if event_type in ("_thinking", "reasoning.available"):
+        # Normalise legacy strings, new-style "delegate.*" strings, and
+        # DelegateEvent enum values all to a single DelegateEvent.  The
+        # original implementation only accepted the five legacy strings;
+        # enum-typed callers were silently dropped.
+        if isinstance(event_type, DelegateEvent):
+            event = event_type
+        else:
+            event = _LEGACY_EVENT_MAP.get(event_type)
+            if event is None:
+                try:
+                    event = DelegateEvent(event_type)
+                except (ValueError, TypeError):
+                    return  # Unknown event — ignore
+
+        if event == DelegateEvent.TASK_THINKING:
             text = preview or tool_name or ""
             if spinner:
                 short = (text[:55] + "...") if len(text) > 55 else text
                 try:
-                    spinner.print_above(f" {prefix}├─ 💭 \"{short}\"")
+                    spinner.print_above(f' {prefix}├─ 💭 "{short}"')
                 except Exception as e:
                     logger.debug("Spinner print_above failed: %s", e)
             _relay("subagent.thinking", preview=text)
             return
 
-        # tool.completed — no display needed here (spinner shows on started)
-        if event_type == "tool.completed":
+        if event == DelegateEvent.TASK_TOOL_COMPLETED:
             return
 
-        # tool.started — display and batch for parent relay
+        if event == DelegateEvent.TASK_PROGRESS:
+            # Pre-batched progress summary relayed from a nested
+            # orchestrator's grandchild (upstream emits as
+            # parent_cb("subagent_progress", summary_string) where the
+            # summary lands in the tool_name positional slot).  Treat as
+            # a pass-through: render distinctly (not via the tool-start
+            # emoji lookup, which would mistake the summary string for a
+            # tool name) and relay upward without re-batching.
+            summary_text = tool_name or preview or ""
+            if spinner and summary_text:
+                try:
+                    spinner.print_above(f" {prefix}├─ 🔀 {summary_text}")
+                except Exception as e:
+                    logger.debug("Spinner print_above failed: %s", e)
+            if parent_cb:
+                try:
+                    parent_cb("subagent_progress", f"{prefix}{summary_text}")
+                except Exception as e:
+                    logger.debug("Parent callback relay failed: %s", e)
+            return
+
+        # TASK_TOOL_STARTED — display and batch for parent relay
+        _tool_count[0] += 1
+        if subagent_id is not None:
+            with _active_subagents_lock:
+                rec = _active_subagents.get(subagent_id)
+                if rec is not None:
+                    rec["tool_count"] = _tool_count[0]
+                    rec["last_tool"] = tool_name or ""
         if spinner:
-            short = (preview[:35] + "...") if preview and len(preview) > 35 else (preview or "")
+            short = (
+                (preview[:35] + "...")
+                if preview and len(preview) > 35
+                else (preview or "")
+            )
             from agent.display import get_tool_emoji
+
             emoji = get_tool_emoji(tool_name or "")
             line = f" {prefix}├─ {emoji} {tool_name}"
             if short:
-                line += f"  \"{short}\""
+                line += f'  "{short}"'
             try:
                 spinner.print_above(line)
             except Exception as e:
@@ -279,6 +745,10 @@ def _build_child_agent(
     # ACP transport overrides — lets a non-ACP parent spawn ACP child agents
     override_acp_command: Optional[str] = None,
     override_acp_args: Optional[List[str]] = None,
+    # Per-call role controlling whether the child can further delegate.
+    # 'leaf' (default) cannot; 'orchestrator' retains the delegation
+    # toolset subject to depth/kill-switch bounds applied below.
+    role: str = "leaf",
 ):
     """
     Build a child AIAgent on the main thread (thread-safe construction).
@@ -290,6 +760,27 @@ def _build_child_agent(
     model on OpenRouter while the parent runs on Nous Portal).
     """
     from run_agent import AIAgent
+    import uuid as _uuid
+
+    # ── Role resolution ─────────────────────────────────────────────────
+    # Honor the caller's role only when BOTH the kill switch and the
+    # child's depth allow it.  This is the single point where role
+    # degrades to 'leaf' — keeps the rule predictable.  Callers pass
+    # the normalised role (_normalize_role ran in delegate_task) so
+    # we only deal with 'leaf' or 'orchestrator' here.
+    child_depth = getattr(parent_agent, "_delegate_depth", 0) + 1
+    max_spawn = _get_max_spawn_depth()
+    orchestrator_ok = _get_orchestrator_enabled() and child_depth < max_spawn
+    effective_role = role if (role == "orchestrator" and orchestrator_ok) else "leaf"
+
+    # ── Subagent identity (stable across events, 0-indexed for TUI) ─────
+    # subagent_id is generated here so the progress callback, the
+    # spawn_requested event, and the _active_subagents registry all share
+    # one key.  parent_id is non-None when THIS parent is itself a subagent
+    # (nested orchestrator -> worker chain).
+    subagent_id = f"sa-{task_index}-{_uuid.uuid4().hex[:8]}"
+    parent_subagent_id = getattr(parent_agent, "_subagent_id", None)
+    tui_depth = max(0, child_depth - 1)  # 0 = first-level child for the UI
 
     # When no explicit toolsets given, inherit from parent's enabled toolsets
     # so disabled tools (e.g. web) don't leak to subagents.
@@ -301,8 +792,10 @@ def _build_child_agent(
     elif parent_agent and hasattr(parent_agent, "valid_tool_names"):
         # enabled_toolsets is None (all tools) — derive from loaded tool names
         import model_tools
+
         parent_toolsets = {
-            ts for name in parent_agent.valid_tool_names
+            ts
+            for name in parent_agent.valid_tool_names
             if (ts := model_tools.get_toolset_for_tool(name)) is not None
         }
     else:
@@ -310,7 +803,9 @@ def _build_child_agent(
 
     if toolsets:
         # Intersect with parent — subagent must not gain tools the parent lacks
-        child_toolsets = _strip_blocked_tools([t for t in toolsets if t in parent_toolsets])
+        child_toolsets = _strip_blocked_tools(
+            [t for t in toolsets if t in parent_toolsets]
+        )
     elif parent_agent and parent_enabled is not None:
         child_toolsets = _strip_blocked_tools(parent_enabled)
     elif parent_toolsets:
@@ -318,15 +813,44 @@ def _build_child_agent(
     else:
         child_toolsets = _strip_blocked_tools(DEFAULT_TOOLSETS)
 
+    # Orchestrators retain the 'delegation' toolset that _strip_blocked_tools
+    # removed.  The re-add is unconditional on parent-toolset membership because
+    # orchestrator capability is granted by role, not inherited — see the
+    # test_intersection_preserves_delegation_bound test for the design rationale.
+    if effective_role == "orchestrator" and "delegation" not in child_toolsets:
+        child_toolsets.append("delegation")
+
     workspace_hint = _resolve_workspace_hint(parent_agent)
-    child_prompt = _build_child_system_prompt(goal, context, workspace_path=workspace_hint)
+    child_prompt = _build_child_system_prompt(
+        goal,
+        context,
+        workspace_path=workspace_hint,
+        role=effective_role,
+        max_spawn_depth=max_spawn,
+        child_depth=child_depth,
+    )
     # Extract parent's API key so subagents inherit auth (e.g. Nous Portal).
     parent_api_key = getattr(parent_agent, "api_key", None)
     if (not parent_api_key) and hasattr(parent_agent, "_client_kwargs"):
         parent_api_key = parent_agent._client_kwargs.get("api_key")
 
-    # Build progress callback to relay tool calls to parent display
-    child_progress_cb = _build_child_progress_callback(task_index, goal, parent_agent, task_count)
+    # Resolve the child's effective model early so it can ride on every event.
+    effective_model_for_cb = model or getattr(parent_agent, "model", None)
+
+    # Build progress callback to relay tool calls to parent display.
+    # Identity kwargs thread the subagent_id through every emitted event so the
+    # TUI can reconstruct the spawn tree and route per-branch controls.
+    child_progress_cb = _build_child_progress_callback(
+        task_index,
+        goal,
+        parent_agent,
+        task_count,
+        subagent_id=subagent_id,
+        parent_id=parent_subagent_id,
+        depth=tui_depth,
+        model=effective_model_for_cb,
+        toolsets=child_toolsets,
+    )
 
     # Each subagent gets its own iteration budget capped at max_iterations
     # (configurable via delegation.max_iterations, default 50).  This means
@@ -335,6 +859,7 @@ def _build_child_agent(
 
     child_thinking_cb = None
     if child_progress_cb:
+
         def _child_thinking(text: str) -> None:
             if not text:
                 return
@@ -351,8 +876,14 @@ def _build_child_agent(
     effective_base_url = override_base_url or parent_agent.base_url
     effective_api_key = override_api_key or parent_api_key
     effective_api_mode = override_api_mode or getattr(parent_agent, "api_mode", None)
-    effective_acp_command = override_acp_command or getattr(parent_agent, "acp_command", None)
-    effective_acp_args = list(override_acp_args if override_acp_args is not None else (getattr(parent_agent, "acp_args", []) or []))
+    effective_acp_command = override_acp_command or getattr(
+        parent_agent, "acp_command", None
+    )
+    effective_acp_args = list(
+        override_acp_args
+        if override_acp_args is not None
+        else (getattr(parent_agent, "acp_args", []) or [])
+    )
 
     # Resolve reasoning config: delegation override > parent inherit
     parent_reasoning = getattr(parent_agent, "reasoning_config", None)
@@ -362,6 +893,7 @@ def _build_child_agent(
         delegation_effort = str(delegation_cfg.get("reasoning_effort") or "").strip()
         if delegation_effort:
             from hermes_constants import parse_reasoning_effort
+
             parsed = parse_reasoning_effort(delegation_effort)
             if parsed is not None:
                 child_reasoning = parsed
@@ -394,8 +926,8 @@ def _build_child_agent(
         skip_memory=True,
         clarify_callback=None,
         thinking_callback=child_thinking_cb,
-        session_db=getattr(parent_agent, '_session_db', None),
-        parent_session_id=getattr(parent_agent, 'session_id', None),
+        session_db=getattr(parent_agent, "_session_db", None),
+        parent_session_id=getattr(parent_agent, "session_id", None),
         providers_allowed=parent_agent.providers_allowed,
         providers_ignored=parent_agent.providers_ignored,
         providers_order=parent_agent.providers_order,
@@ -403,9 +935,17 @@ def _build_child_agent(
         tool_progress_callback=child_progress_cb,
         iteration_budget=None,  # fresh budget per subagent
     )
-    child._print_fn = getattr(parent_agent, '_print_fn', None)
+    child._print_fn = getattr(parent_agent, "_print_fn", None)
     # Set delegation depth so children can't spawn grandchildren
-    child._delegate_depth = getattr(parent_agent, '_delegate_depth', 0) + 1
+    child._delegate_depth = child_depth
+    # Stash the post-degrade role for introspection (leaf if the
+    # kill switch or depth bounded the caller's requested role).
+    child._delegate_role = effective_role
+    # Stash subagent identity for nested-delegation event propagation and
+    # for _run_single_child / interrupt_subagent to look up by id.
+    child._subagent_id = subagent_id
+    child._parent_subagent_id = parent_subagent_id
+    child._subagent_goal = goal
 
     # Share a credential pool with the child when possible so subagents can
     # rotate credentials on rate limits instead of getting pinned to one key.
@@ -414,16 +954,26 @@ def _build_child_agent(
         child._credential_pool = child_pool
 
     # Register child for interrupt propagation
-    if hasattr(parent_agent, '_active_children'):
-        lock = getattr(parent_agent, '_active_children_lock', None)
+    if hasattr(parent_agent, "_active_children"):
+        lock = getattr(parent_agent, "_active_children_lock", None)
         if lock:
             with lock:
                 parent_agent._active_children.append(child)
         else:
             parent_agent._active_children.append(child)
 
+    # Announce the spawn immediately — the child may sit in a queue
+    # for seconds if max_concurrent_children is saturated, so the TUI
+    # wants a node in the tree before run starts.
+    if child_progress_cb:
+        try:
+            child_progress_cb("subagent.spawn_requested", preview=goal)
+        except Exception as exc:
+            logger.debug("spawn_requested relay failed: %s", exc)
+
     return child
 
+
 def _run_single_child(
     task_index: int,
     goal: str,
@@ -438,22 +988,24 @@ def _run_single_child(
     child_start = time.monotonic()
 
     # Get the progress callback from the child agent
-    child_progress_cb = getattr(child, 'tool_progress_callback', None)
+    child_progress_cb = getattr(child, "tool_progress_callback", None)
 
     # Restore parent tool names using the value saved before child construction
     # mutated the global. This is the correct parent toolset, not the child's.
     import model_tools
-    _saved_tool_names = getattr(child, "_delegate_saved_tool_names",
-                                list(model_tools._last_resolved_tool_names))
 
-    child_pool = getattr(child, '_credential_pool', None)
+    _saved_tool_names = getattr(
+        child, "_delegate_saved_tool_names", list(model_tools._last_resolved_tool_names)
+    )
+
+    child_pool = getattr(child, "_credential_pool", None)
     leased_cred_id = None
     if child_pool is not None:
         leased_cred_id = child_pool.acquire_lease()
         if leased_cred_id is not None:
             try:
                 leased_entry = child_pool.current()
-                if leased_entry is not None and hasattr(child, '_swap_credential'):
+                if leased_entry is not None and hasattr(child, "_swap_credential"):
                     child._swap_credential(leased_entry)
             except Exception as exc:
                 logger.debug("Failed to bind child to leased credential: %s", exc)
@@ -463,12 +1015,14 @@ def _run_single_child(
     # Without this, the parent's _last_activity_ts freezes when delegate_task
     # starts and the gateway eventually kills the agent for "no activity".
     _heartbeat_stop = threading.Event()
+    _last_seen_iter = [0]  # mutable container for heartbeat stale detection
+    _stale_count = [0]
 
     def _heartbeat_loop():
         while not _heartbeat_stop.wait(_HEARTBEAT_INTERVAL):
             if parent_agent is None:
                 continue
-            touch = getattr(parent_agent, '_touch_activity', None)
+            touch = getattr(parent_agent, "_touch_activity", None)
             if not touch:
                 continue
             # Pull detail from the child's own activity tracker
@@ -478,14 +1032,38 @@ def _run_single_child(
                 child_tool = child_summary.get("current_tool")
                 child_iter = child_summary.get("api_call_count", 0)
                 child_max = child_summary.get("max_iterations", 0)
+
+                # Stale detection: if iteration count hasn't advanced,
+                # increment stale counter.  After N cycles with no
+                # progress, stop masking the hang so the gateway
+                # inactivity timeout can fire as a last resort.
+                if child_iter <= _last_seen_iter[0]:
+                    _stale_count[0] += 1
+                else:
+                    _last_seen_iter[0] = child_iter
+                    _stale_count[0] = 0
+
+                if _stale_count[0] >= _HEARTBEAT_STALE_CYCLES:
+                    logger.warning(
+                        "Subagent %d appears stale (no iteration progress "
+                        "for %d heartbeat cycles) — stopping heartbeat",
+                        task_index,
+                        _stale_count[0],
+                    )
+                    break  # stop touching parent, let gateway timeout fire
+
                 if child_tool:
-                    desc = (f"delegate_task: subagent running {child_tool} "
-                            f"(iteration {child_iter}/{child_max})")
+                    desc = (
+                        f"delegate_task: subagent running {child_tool} "
+                        f"(iteration {child_iter}/{child_max})"
+                    )
                 else:
                     child_desc = child_summary.get("last_activity_desc", "")
                     if child_desc:
-                        desc = (f"delegate_task: subagent {child_desc} "
-                                f"(iteration {child_iter}/{child_max})")
+                        desc = (
+                            f"delegate_task: subagent {child_desc} "
+                            f"(iteration {child_iter}/{child_max})"
+                        )
             except Exception:
                 pass
             try:
@@ -496,6 +1074,34 @@ def _run_single_child(
     _heartbeat_thread = threading.Thread(target=_heartbeat_loop, daemon=True)
     _heartbeat_thread.start()
 
+    # Register the live agent in the module-level registry so the TUI can
+    # target it by subagent_id (kill, pause, status queries).  Unregistered
+    # in the finally block, even when the child raises.  Test doubles that
+    # hand us a MagicMock don't carry stable ids; skip registration then.
+    _raw_sid = getattr(child, "_subagent_id", None)
+    _subagent_id = _raw_sid if isinstance(_raw_sid, str) else None
+    if _subagent_id:
+        _raw_depth = getattr(child, "_delegate_depth", 1)
+        _tui_depth = max(0, _raw_depth - 1) if isinstance(_raw_depth, int) else 0
+        _parent_sid = getattr(child, "_parent_subagent_id", None)
+        _register_subagent(
+            {
+                "subagent_id": _subagent_id,
+                "parent_id": _parent_sid if isinstance(_parent_sid, str) else None,
+                "depth": _tui_depth,
+                "goal": goal,
+                "model": (
+                    getattr(child, "model", None)
+                    if isinstance(getattr(child, "model", None), str)
+                    else None
+                ),
+                "started_at": time.time(),
+                "status": "running",
+                "tool_count": 0,
+                "agent": child,
+            }
+        )
+
     try:
         if child_progress_cb:
             try:
@@ -503,10 +1109,89 @@ def _run_single_child(
             except Exception as e:
                 logger.debug("Progress callback start failed: %s", e)
 
-        result = child.run_conversation(user_message=goal)
+        # File-state coordination: reuse the stable subagent_id as the child's
+        # task_id so file_state writes, active-subagents registry, and TUI
+        # events all share one key.  Falls back to a fresh uuid only if the
+        # pre-built id is somehow missing.
+        import uuid as _uuid
+
+        child_task_id = _subagent_id or f"subagent-{task_index}-{_uuid.uuid4().hex[:8]}"
+        parent_task_id = getattr(parent_agent, "_current_task_id", None)
+        wall_start = time.time()
+        parent_reads_snapshot = (
+            list(file_state.known_reads(parent_task_id)) if parent_task_id else []
+        )
+
+        # Run child with a hard timeout to prevent indefinite blocking
+        # when the child's API call or tool-level HTTP request hangs.
+        child_timeout = _get_child_timeout()
+        _timeout_executor = ThreadPoolExecutor(max_workers=1)
+        _child_future = _timeout_executor.submit(
+            child.run_conversation,
+            user_message=goal,
+            task_id=child_task_id,
+        )
+        try:
+            result = _child_future.result(timeout=child_timeout)
+        except Exception as _timeout_exc:
+            # Signal the child to stop so its thread can exit cleanly.
+            try:
+                if hasattr(child, "interrupt"):
+                    child.interrupt()
+                elif hasattr(child, "_interrupt_requested"):
+                    child._interrupt_requested = True
+            except Exception:
+                pass
+
+            is_timeout = isinstance(_timeout_exc, (FuturesTimeoutError, TimeoutError))
+            duration = round(time.monotonic() - child_start, 2)
+            logger.warning(
+                "Subagent %d %s after %.1fs",
+                task_index,
+                "timed out" if is_timeout else f"raised {type(_timeout_exc).__name__}",
+                duration,
+            )
+
+            if child_progress_cb:
+                try:
+                    child_progress_cb(
+                        "subagent.complete",
+                        preview=(
+                            f"Timed out after {duration}s"
+                            if is_timeout
+                            else str(_timeout_exc)
+                        ),
+                        status="timeout" if is_timeout else "error",
+                        duration_seconds=duration,
+                        summary="",
+                    )
+                except Exception:
+                    pass
+
+            return {
+                "task_index": task_index,
+                "status": "timeout" if is_timeout else "error",
+                "summary": None,
+                "error": (
+                    (
+                        f"Subagent timed out after {child_timeout}s with no response. "
+                        "The child may be stuck on a slow API call or unresponsive network request."
+                    )
+                    if is_timeout
+                    else str(_timeout_exc)
+                ),
+                "exit_reason": "timeout" if is_timeout else "error",
+                "api_calls": 0,
+                "duration_seconds": duration,
+                "_child_role": getattr(child, "_delegate_role", None),
+            }
+        finally:
+            # Shut down executor without waiting — if the child thread
+            # is stuck on blocking I/O, wait=True would hang forever.
+            _timeout_executor.shutdown(wait=False)
 
         # Flush any remaining batched progress to gateway
-        if child_progress_cb and hasattr(child_progress_cb, '_flush'):
+        if child_progress_cb and hasattr(child_progress_cb, "_flush"):
             try:
                 child_progress_cb._flush()
             except Exception as e:
@@ -539,7 +1224,7 @@ def _run_single_child(
                 if not isinstance(msg, dict):
                     continue
                 if msg.get("role") == "assistant":
-                    for tc in (msg.get("tool_calls") or []):
+                    for tc in msg.get("tool_calls") or []:
                         fn = tc.get("function", {})
                         entry_t = {
                             "tool": fn.get("name", "unknown"),
@@ -551,9 +1236,7 @@ def _run_single_child(
                             trace_by_id[tc_id] = entry_t
                 elif msg.get("role") == "tool":
                     content = msg.get("content", "")
-                    is_error = bool(
-                        content and "error" in content[:80].lower()
-                    )
+                    is_error = bool(content and "error" in content[:80].lower())
                     result_meta = {
                         "result_bytes": len(content),
                         "status": "error" if is_error else "ok",
@@ -589,23 +1272,113 @@ def _run_single_child(
             "model": _model if isinstance(_model, str) else None,
             "exit_reason": exit_reason,
             "tokens": {
-                "input": _input_tokens if isinstance(_input_tokens, (int, float)) else 0,
-                "output": _output_tokens if isinstance(_output_tokens, (int, float)) else 0,
+                "input": (
+                    _input_tokens if isinstance(_input_tokens, (int, float)) else 0
+                ),
+                "output": (
+                    _output_tokens if isinstance(_output_tokens, (int, float)) else 0
+                ),
             },
             "tool_trace": tool_trace,
+            # Captured before the finally block calls child.close() so the
+            # parent thread can fire subagent_stop with the correct role.
+            # Stripped before the dict is serialised back to the model.
+            "_child_role": getattr(child, "_delegate_role", None),
         }
         if status == "failed":
             entry["error"] = result.get("error", "Subagent did not produce a response.")
 
+        # Cross-agent file-state reminder.  If this subagent wrote any
+        # files the parent had already read, surface it so the parent
+        # knows to re-read before editing — the scenario that motivated
+        # the registry.  We check writes by ANY non-parent task_id (not
+        # just this child's), which also covers transitive writes from
+        # nested orchestrator→worker chains.
+        try:
+            if parent_task_id and parent_reads_snapshot:
+                sibling_writes = file_state.writes_since(
+                    parent_task_id, wall_start, parent_reads_snapshot
+                )
+                if sibling_writes:
+                    mod_paths = sorted(
+                        {p for paths in sibling_writes.values() for p in paths}
+                    )
+                    if mod_paths:
+                        reminder = (
+                            "\n\n[NOTE: subagent modified files the parent "
+                            "previously read — re-read before editing: "
+                            + ", ".join(mod_paths[:8])
+                            + (
+                                f" (+{len(mod_paths) - 8} more)"
+                                if len(mod_paths) > 8
+                                else ""
+                            )
+                            + "]"
+                        )
+                        if entry.get("summary"):
+                            entry["summary"] = entry["summary"] + reminder
+                        else:
+                            entry["stale_paths"] = mod_paths
+        except Exception:
+            logger.debug("file_state sibling-write check failed", exc_info=True)
+
+        # Per-branch observability payload: tokens, cost, files touched, and
+        # a tail of tool-call results.  Fed into the TUI's overlay detail
+        # pane + accordion rollups (features 1, 2, 4).  All fields are
+        # optional — missing data degrades gracefully on the client.
+        _cost_usd = getattr(child, "session_estimated_cost_usd", None)
+        _reasoning_tokens = getattr(child, "session_reasoning_tokens", 0)
+        try:
+            _files_read = list(file_state.known_reads(child_task_id))[:40]
+        except Exception:
+            _files_read = []
+        try:
+            _files_written_map = file_state.writes_since(
+                "", wall_start, []
+            )  # all writes since wall_start
+        except Exception:
+            _files_written_map = {}
+        _files_written = sorted(
+            {
+                p
+                for tid, paths in _files_written_map.items()
+                if tid == child_task_id
+                for p in paths
+            }
+        )[:40]
+
+        _output_tail = _extract_output_tail(result, max_entries=8, max_chars=600)
+
+        complete_kwargs: Dict[str, Any] = {
+            "preview": summary[:160] if summary else entry.get("error", ""),
+            "status": status,
+            "duration_seconds": duration,
+            "summary": summary[:500] if summary else entry.get("error", ""),
+            "input_tokens": (
+                int(_input_tokens) if isinstance(_input_tokens, (int, float)) else 0
+            ),
+            "output_tokens": (
+                int(_output_tokens) if isinstance(_output_tokens, (int, float)) else 0
+            ),
+            "reasoning_tokens": (
+                int(_reasoning_tokens)
+                if isinstance(_reasoning_tokens, (int, float))
+                else 0
+            ),
+            "api_calls": int(api_calls) if isinstance(api_calls, (int, float)) else 0,
+            "files_read": _files_read,
+            "files_written": _files_written,
+            "output_tail": _output_tail,
+        }
+        if _cost_usd is not None:
+            try:
+                complete_kwargs["cost_usd"] = float(_cost_usd)
+            except (TypeError, ValueError):
+                pass
+
         if child_progress_cb:
             try:
-                child_progress_cb(
-                    "subagent.complete",
-                    preview=summary[:160] if summary else entry.get("error", ""),
-                    status=status,
-                    duration_seconds=duration,
-                    summary=summary[:500] if summary else entry.get("error", ""),
-                )
+                child_progress_cb("subagent.complete", **complete_kwargs)
             except Exception as e:
                 logger.debug("Progress callback completion failed: %s", e)
 
@@ -632,6 +1405,7 @@ def _run_single_child(
             "error": str(exc),
             "api_calls": 0,
             "duration_seconds": duration,
+            "_child_role": getattr(child, "_delegate_role", None),
         }
 
     finally:
@@ -640,6 +1414,11 @@ def _run_single_child(
         _heartbeat_stop.set()
         _heartbeat_thread.join(timeout=5)
 
+        # Drop the TUI-facing registry entry.  Safe to call even if the
+        # child was never registered (e.g. ID missing on test doubles).
+        if _subagent_id:
+            _unregister_subagent(_subagent_id)
+
         if child_pool is not None and leased_cred_id is not None:
             try:
                 child_pool.release_lease(leased_cred_id)
@@ -657,9 +1436,9 @@ def _run_single_child(
         # Remove child from active tracking
 
         # Unregister child from interrupt propagation
-        if hasattr(parent_agent, '_active_children'):
+        if hasattr(parent_agent, "_active_children"):
             try:
-                lock = getattr(parent_agent, '_active_children_lock', None)
+                lock = getattr(parent_agent, "_active_children_lock", None)
                 if lock:
                     with lock:
                         parent_agent._active_children.remove(child)
@@ -672,11 +1451,12 @@ def _run_single_child(
         # background processes, httpx clients) so subagent subprocesses
         # don't outlive the delegation.
         try:
-            if hasattr(child, 'close'):
+            if hasattr(child, "close"):
                 child.close()
         except Exception:
             logger.debug("Failed to close child agent after delegation")
 
+
 def delegate_task(
     goal: Optional[str] = None,
     context: Optional[str] = None,
@@ -685,29 +1465,53 @@ def delegate_task(
     max_iterations: Optional[int] = None,
     acp_command: Optional[str] = None,
     acp_args: Optional[List[str]] = None,
+    role: Optional[str] = None,
     parent_agent=None,
 ) -> str:
     """
     Spawn one or more child agents to handle delegated tasks.
 
     Supports two modes:
-      - Single: provide goal (+ optional context, toolsets)
-      - Batch:  provide tasks array [{goal, context, toolsets}, ...]
+      - Single: provide goal (+ optional context, toolsets, role)
+      - Batch:  provide tasks array [{goal, context, toolsets, role}, ...]
+
+    The 'role' parameter controls whether a child can further delegate:
+    'leaf' (default) cannot; 'orchestrator' retains the delegation
+    toolset and can spawn its own workers, bounded by
+    delegation.max_spawn_depth.  Per-task role beats the top-level one.
 
     Returns JSON with results array, one entry per task.
     """
     if parent_agent is None:
         return tool_error("delegate_task requires a parent agent context.")
 
-    # Depth limit
-    depth = getattr(parent_agent, '_delegate_depth', 0)
-    if depth >= MAX_DEPTH:
-        return json.dumps({
-            "error": (
-                f"Delegation depth limit reached ({MAX_DEPTH}). "
-                "Subagents cannot spawn further subagents."
-            )
-        })
+    # Operator-controlled kill switch — lets the TUI freeze new fan-out
+    # when a runaway tree is detected, without interrupting already-running
+    # children.  Cleared via the matching `delegation.pause` RPC.
+    if is_spawn_paused():
+        return tool_error(
+            "Delegation spawning is paused. Clear the pause via the TUI "
+            "(`p` in /agents) or the `delegation.pause` RPC before retrying."
+        )
+
+    # Normalise the top-level role once; per-task overrides re-normalise.
+    top_role = _normalize_role(role)
+
+    # Depth limit — configurable via delegation.max_spawn_depth,
+    # default 2 for parity with the original MAX_DEPTH constant.
+    depth = getattr(parent_agent, "_delegate_depth", 0)
+    max_spawn = _get_max_spawn_depth()
+    if depth >= max_spawn:
+        return json.dumps(
+            {
+                "error": (
+                    f"Delegation depth limit reached (depth={depth}, "
+                    f"max_spawn_depth={max_spawn}). Raise "
+                    f"delegation.max_spawn_depth in config.yaml if deeper "
+                    f"nesting is required (cap: {_MAX_SPAWN_DEPTH_CAP})."
+                )
+            }
+        )
 
     # Load config
     cfg = _load_config()
@@ -737,7 +1541,9 @@ def delegate_task(
             )
         task_list = tasks
     elif goal and isinstance(goal, str) and goal.strip():
-        task_list = [{"goal": goal, "context": context, "toolsets": toolsets}]
+        task_list = [
+            {"goal": goal, "context": context, "toolsets": toolsets, "role": top_role}
+        ]
     else:
         return tool_error("Provide either 'goal' (single task) or 'tasks' (batch).")
 
@@ -760,6 +1566,7 @@ def delegate_task(
     # _build_child_agent() calls AIAgent() which calls get_tool_definitions(),
     # which overwrites model_tools._last_resolved_tool_names with child's toolset.
     import model_tools as _model_tools
+
     _parent_tool_names = list(_model_tools._last_resolved_tool_names)
 
     # Build all child agents on the main thread (thread-safe construction)
@@ -768,15 +1575,32 @@ def delegate_task(
     children = []
     try:
         for i, t in enumerate(task_list):
+            task_acp_args = t.get("acp_args") if "acp_args" in t else None
+            # Per-task role beats top-level; normalise again so unknown
+            # per-task values warn and degrade to leaf uniformly.
+            effective_role = _normalize_role(t.get("role") or top_role)
             child = _build_child_agent(
-                task_index=i, goal=t["goal"], context=t.get("context"),
-                toolsets=t.get("toolsets") or toolsets, model=creds["model"],
-                max_iterations=effective_max_iter, task_count=n_tasks, parent_agent=parent_agent,
-                override_provider=creds["provider"], override_base_url=creds["base_url"],
+                task_index=i,
+                goal=t["goal"],
+                context=t.get("context"),
+                toolsets=t.get("toolsets") or toolsets,
+                model=creds["model"],
+                max_iterations=effective_max_iter,
+                task_count=n_tasks,
+                parent_agent=parent_agent,
+                override_provider=creds["provider"],
+                override_base_url=creds["base_url"],
                 override_api_key=creds["api_key"],
                 override_api_mode=creds["api_mode"],
-                override_acp_command=t.get("acp_command") or acp_command,
-                override_acp_args=t.get("acp_args") or acp_args,
+                override_acp_command=t.get("acp_command")
+                or acp_command
+                or creds.get("command"),
+                override_acp_args=(
+                    task_acp_args
+                    if task_acp_args is not None
+                    else (acp_args if acp_args is not None else creds.get("args"))
+                ),
+                role=effective_role,
             )
             # Override with correct parent tool names (before child construction mutated global)
             child._delegate_saved_tool_names = _parent_tool_names
@@ -793,7 +1617,7 @@ def delegate_task(
     else:
         # Batch -- run in parallel with per-task progress lines
         completed_count = 0
-        spinner_ref = getattr(parent_agent, '_delegate_spinner', None)
+        spinner_ref = getattr(parent_agent, "_delegate_spinner", None)
 
         with ThreadPoolExecutor(max_workers=max_children) as executor:
             futures = {}
@@ -812,6 +1636,10 @@ def delegate_task(
             # the parent blocks forever even after interrupt propagation.
             # Instead, use wait() with a short timeout so we can bail
             # when the parent is interrupted.
+            # Map task_index -> child agent, so fabricated entries for
+            # still-pending futures can carry the correct _delegate_role.
+            _child_by_index = {i: child for (i, _, child) in children}
+
             pending = set(futures.keys())
             while pending:
                 if getattr(parent_agent, "_interrupt_requested", False) is True:
@@ -831,6 +1659,9 @@ def delegate_task(
                                     "error": str(exc),
                                     "api_calls": 0,
                                     "duration_seconds": 0,
+                                    "_child_role": getattr(
+                                        _child_by_index.get(idx), "_delegate_role", None
+                                    ),
                                 }
                         else:
                             entry = {
@@ -840,13 +1671,19 @@ def delegate_task(
                                 "error": "Parent agent interrupted — child did not finish in time",
                                 "api_calls": 0,
                                 "duration_seconds": 0,
+                                "_child_role": getattr(
+                                    _child_by_index.get(idx), "_delegate_role", None
+                                ),
                             }
                         results.append(entry)
                         completed_count += 1
                     break
 
                 from concurrent.futures import wait as _cf_wait, FIRST_COMPLETED
-                done, pending = _cf_wait(pending, timeout=0.5, return_when=FIRST_COMPLETED)
+
+                done, pending = _cf_wait(
+                    pending, timeout=0.5, return_when=FIRST_COMPLETED
+                )
                 for future in done:
                     try:
                         entry = future.result()
@@ -859,13 +1696,18 @@ def delegate_task(
                             "error": str(exc),
                             "api_calls": 0,
                             "duration_seconds": 0,
+                            "_child_role": getattr(
+                                _child_by_index.get(idx), "_delegate_role", None
+                            ),
                         }
                     results.append(entry)
                     completed_count += 1
 
                     # Print per-task completion line above the spinner
                     idx = entry["task_index"]
-                    label = task_labels[idx] if idx < len(task_labels) else f"Task {idx}"
+                    label = (
+                        task_labels[idx] if idx < len(task_labels) else f"Task {idx}"
+                    )
                     dur = entry.get("duration_seconds", 0)
                     status = entry.get("status", "?")
                     icon = "✓" if status == "completed" else "✗"
@@ -882,7 +1724,9 @@ def delegate_task(
                     # Update spinner text to show remaining count
                     if spinner_ref and remaining > 0:
                         try:
-                            spinner_ref.update_text(f"🔀 {remaining} task{'s' if remaining != 1 else ''} remaining")
+                            spinner_ref.update_text(
+                                f"🔀 {remaining} task{'s' if remaining != 1 else ''} remaining"
+                            )
                         except Exception as e:
                             logger.debug("Spinner update_text failed: %s", e)
 
@@ -890,24 +1734,66 @@ def delegate_task(
         results.sort(key=lambda r: r["task_index"])
 
     # Notify parent's memory provider of delegation outcomes
-    if parent_agent and hasattr(parent_agent, '_memory_manager') and parent_agent._memory_manager:
+    if (
+        parent_agent
+        and hasattr(parent_agent, "_memory_manager")
+        and parent_agent._memory_manager
+    ):
         for entry in results:
             try:
-                _task_goal = task_list[entry["task_index"]]["goal"] if entry["task_index"] < len(task_list) else ""
+                _task_goal = (
+                    task_list[entry["task_index"]]["goal"]
+                    if entry["task_index"] < len(task_list)
+                    else ""
+                )
                 parent_agent._memory_manager.on_delegation(
                     task=_task_goal,
                     result=entry.get("summary", "") or "",
-                    child_session_id=getattr(children[entry["task_index"]][2], "session_id", "") if entry["task_index"] < len(children) else "",
+                    child_session_id=(
+                        getattr(children[entry["task_index"]][2], "session_id", "")
+                        if entry["task_index"] < len(children)
+                        else ""
+                    ),
                 )
             except Exception:
                 pass
 
+    # Fire subagent_stop hooks once per child, serialised on the parent thread.
+    # This keeps Python-plugin and shell-hook callbacks off of the worker threads
+    # that ran the children, so hook authors don't need to reason about
+    # concurrent invocation.  Role was captured into the entry dict in
+    # _run_single_child (or the fabricated-entry branches above) before the
+    # child was closed.
+    _parent_session_id = getattr(parent_agent, "session_id", None)
+    try:
+        from hermes_cli.plugins import invoke_hook as _invoke_hook
+    except Exception:
+        _invoke_hook = None
+    for entry in results:
+        child_role = entry.pop("_child_role", None)
+        if _invoke_hook is None:
+            continue
+        try:
+            _invoke_hook(
+                "subagent_stop",
+                parent_session_id=_parent_session_id,
+                child_role=child_role,
+                child_summary=entry.get("summary"),
+                child_status=entry.get("status"),
+                duration_ms=int((entry.get("duration_seconds") or 0) * 1000),
+            )
+        except Exception:
+            logger.debug("subagent_stop hook invocation failed", exc_info=True)
+
     total_duration = round(time.monotonic() - overall_start, 2)
 
-    return json.dumps({
-        "results": results,
-        "total_duration_seconds": total_duration,
-    }, ensure_ascii=False)
+    return json.dumps(
+        {
+            "results": results,
+            "total_duration_seconds": total_duration,
+        },
+        ensure_ascii=False,
+    )
 
 
 def _resolve_child_credential_pool(effective_provider: Optional[str], parent_agent):
@@ -930,6 +1816,7 @@ def _resolve_child_credential_pool(effective_provider: Optional[str], parent_age
 
     try:
         from agent.credential_pool import load_pool
+
         pool = load_pool(effective_provider)
         if pool is not None and pool.has_credentials():
             return pool
@@ -963,10 +1850,7 @@ def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict:
     configured_api_key = str(cfg.get("api_key") or "").strip() or None
 
     if configured_base_url:
-        api_key = (
-            configured_api_key
-            or os.getenv("OPENAI_API_KEY", "").strip()
-        )
+        api_key = configured_api_key or os.getenv("OPENAI_API_KEY", "").strip()
         if not api_key:
             raise ValueError(
                 "Delegation base_url is configured but no API key was found. "
@@ -976,12 +1860,18 @@ def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict:
         base_lower = configured_base_url.lower()
         provider = "custom"
         api_mode = "chat_completions"
-        if "chatgpt.com/backend-api/codex" in base_lower:
+        if (
+            base_url_hostname(configured_base_url) == "chatgpt.com"
+            and "/backend-api/codex" in base_lower
+        ):
             provider = "openai-codex"
             api_mode = "codex_responses"
-        elif "api.anthropic.com" in base_lower:
+        elif base_url_hostname(configured_base_url) == "api.anthropic.com":
             provider = "anthropic"
             api_mode = "anthropic_messages"
+        elif "api.kimi.com/coding" in base_lower:
+            provider = "custom"
+            api_mode = "anthropic_messages"
 
         return {
             "model": configured_model,
@@ -1004,6 +1894,7 @@ def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict:
     # Provider is configured — resolve full credentials
     try:
         from hermes_cli.runtime_provider import resolve_runtime_provider
+
         runtime = resolve_runtime_provider(requested=configured_provider)
     except Exception as exc:
         raise ValueError(
@@ -1041,6 +1932,7 @@ def _load_config() -> dict:
     """
     try:
         from cli import CLI_CONFIG
+
         cfg = CLI_CONFIG.get("delegation", {})
         if cfg:
             return cfg
@@ -1048,6 +1940,7 @@ def _load_config() -> dict:
         pass
     try:
         from hermes_cli.config import load_config
+
         full = load_config()
         return full.get("delegation", {})
     except Exception:
@@ -1067,7 +1960,7 @@ DELEGATE_TASK_SCHEMA = {
         "never enter your context window.\n\n"
         "TWO MODES (one of 'goal' or 'tasks' is required):\n"
         "1. Single task: provide 'goal' (+ optional context, toolsets)\n"
-        "2. Batch (parallel): provide 'tasks' array with up to 3 items. "
+        "2. Batch (parallel): provide 'tasks' array with up to delegation.max_concurrent_children items (default 3). "
         "All run concurrently and results are returned together.\n\n"
         "WHEN TO USE delegate_task:\n"
         "- Reasoning-heavy subtasks (debugging, code review, research synthesis)\n"
@@ -1080,8 +1973,14 @@ DELEGATE_TASK_SCHEMA = {
         "IMPORTANT:\n"
         "- Subagents have NO memory of your conversation. Pass all relevant "
         "info (file paths, error messages, constraints) via the 'context' field.\n"
-        "- Subagents CANNOT call: delegate_task, clarify, memory, send_message, "
-        "execute_code.\n"
+        "- Leaf subagents (role='leaf', the default) CANNOT call: "
+        "delegate_task, clarify, memory, send_message, execute_code.\n"
+        "- Orchestrator subagents (role='orchestrator') retain "
+        "delegate_task so they can spawn their own workers, but still "
+        "cannot use clarify, memory, send_message, or execute_code. "
+        "Orchestrators are bounded by delegation.max_spawn_depth "
+        "(default 2) and can be disabled globally via "
+        "delegation.orchestrator_enabled=false.\n"
         "- Each subagent gets its own terminal session (separate working directory and state).\n"
         "- Results are always returned as an array, one entry per task."
     ),
@@ -1122,7 +2021,10 @@ DELEGATE_TASK_SCHEMA = {
                     "type": "object",
                     "properties": {
                         "goal": {"type": "string", "description": "Task goal"},
-                        "context": {"type": "string", "description": "Task-specific context"},
+                        "context": {
+                            "type": "string",
+                            "description": "Task-specific context",
+                        },
                         "toolsets": {
                             "type": "array",
                             "items": {"type": "string"},
@@ -1137,6 +2039,11 @@ DELEGATE_TASK_SCHEMA = {
                             "items": {"type": "string"},
                             "description": "Per-task ACP args override.",
                         },
+                        "role": {
+                            "type": "string",
+                            "enum": ["leaf", "orchestrator"],
+                            "description": "Per-task role override. See top-level 'role' for semantics.",
+                        },
                     },
                     "required": ["goal"],
                 },
@@ -1156,6 +2063,19 @@ DELEGATE_TASK_SCHEMA = {
                     "Only set lower for simple tasks."
                 ),
             },
+            "role": {
+                "type": "string",
+                "enum": ["leaf", "orchestrator"],
+                "description": (
+                    "Role of the child agent. 'leaf' (default) = focused "
+                    "worker, cannot delegate further. 'orchestrator' = can "
+                    "use delegate_task to spawn its own workers. Requires "
+                    "delegation.max_spawn_depth >= 2 in config; ignored "
+                    "(treated as 'leaf') when the child would exceed "
+                    "max_spawn_depth or when "
+                    "delegation.orchestrator_enabled=false."
+                ),
+            },
             "acp_command": {
                 "type": "string",
                 "description": (
@@ -1194,7 +2114,9 @@ registry.register(
         max_iterations=args.get("max_iterations"),
         acp_command=args.get("acp_command"),
         acp_args=args.get("acp_args"),
-        parent_agent=kw.get("parent_agent")),
+        role=args.get("role"),
+        parent_agent=kw.get("parent_agent"),
+    ),
     check_fn=check_delegate_requirements,
     emoji="🔀",
 )
diff --git a/tools/discord_tool.py b/tools/discord_tool.py
new file mode 100644
index 0000000000..1bdbbd4368
--- /dev/null
+++ b/tools/discord_tool.py
@@ -0,0 +1,891 @@
+"""Discord server introspection and management tool.
+
+Provides the agent with the ability to interact with Discord servers
+when running on the Discord gateway. Uses Discord REST API directly
+with the bot token — no dependency on the gateway adapter's client.
+
+Only included in the hermes-discord toolset, so it has zero cost
+for users on other platforms.
+
+The schema exposed to the model is filtered by two gates:
+
+1. Privileged intents detected from GET /applications/@me at schema
+   build time. Actions that require an intent the bot doesn't have
+   (search_members / member_info → GUILD_MEMBERS intent) are hidden.
+   fetch_messages is kept regardless of MESSAGE_CONTENT intent, but
+   its description is annotated when the intent is missing.
+
+2. User config allowlist at ``discord.server_actions``. If the user
+   sets a comma-separated list (or YAML list) of action names, only
+   those appear in the schema. Empty/unset means all intent-available
+   actions are exposed.
+
+Per-guild permissions (MANAGE_ROLES etc.) are NOT pre-checked — Discord
+returns a 403 at call time and :func:`_enrich_403` maps it to
+actionable guidance the model can relay to the user.
+"""
+
+import json
+import logging
+import os
+import urllib.error
+import urllib.parse
+import urllib.request
+from typing import Any, Dict, List, Optional, Tuple
+
+from tools.registry import registry
+
+logger = logging.getLogger(__name__)
+
+DISCORD_API_BASE = "https://discord.com/api/v10"
+
+# Application flag bits (from GET /applications/@me → "flags").
+# Source: https://discord.com/developers/docs/resources/application#application-object-application-flags
+_FLAG_GATEWAY_GUILD_MEMBERS = 1 << 14
+_FLAG_GATEWAY_GUILD_MEMBERS_LIMITED = 1 << 15
+_FLAG_GATEWAY_MESSAGE_CONTENT = 1 << 18
+_FLAG_GATEWAY_MESSAGE_CONTENT_LIMITED = 1 << 19
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _get_bot_token() -> Optional[str]:
+    """Resolve the Discord bot token from environment."""
+    return os.getenv("DISCORD_BOT_TOKEN", "").strip() or None
+
+
+def _discord_request(
+    method: str,
+    path: str,
+    token: str,
+    params: Optional[Dict[str, str]] = None,
+    body: Optional[Dict[str, Any]] = None,
+    timeout: int = 15,
+) -> Any:
+    """Make a request to the Discord REST API."""
+    url = f"{DISCORD_API_BASE}{path}"
+    if params:
+        url += "?" + urllib.parse.urlencode(params)
+
+    data = None
+    if body is not None:
+        data = json.dumps(body).encode("utf-8")
+
+    req = urllib.request.Request(
+        url,
+        data=data,
+        method=method,
+        headers={
+            "Authorization": f"Bot {token}",
+            "Content-Type": "application/json",
+            "User-Agent": "Hermes-Agent (https://github.com/NousResearch/hermes-agent)",
+        },
+    )
+
+    try:
+        with urllib.request.urlopen(req, timeout=timeout) as resp:
+            if resp.status == 204:
+                return None
+            return json.loads(resp.read().decode("utf-8"))
+    except urllib.error.HTTPError as e:
+        error_body = ""
+        try:
+            error_body = e.read().decode("utf-8", errors="replace")
+        except Exception:
+            pass
+        raise DiscordAPIError(e.code, error_body) from e
+
+
+class DiscordAPIError(Exception):
+    """Raised when a Discord API call fails."""
+    def __init__(self, status: int, body: str):
+        self.status = status
+        self.body = body
+        super().__init__(f"Discord API error {status}: {body}")
+
+
+# ---------------------------------------------------------------------------
+# Channel type mapping
+# ---------------------------------------------------------------------------
+
+_CHANNEL_TYPE_NAMES = {
+    0: "text",
+    2: "voice",
+    4: "category",
+    5: "announcement",
+    10: "announcement_thread",
+    11: "public_thread",
+    12: "private_thread",
+    13: "stage",
+    15: "forum",
+    16: "media",
+}
+
+
+def _channel_type_name(type_id: int) -> str:
+    return _CHANNEL_TYPE_NAMES.get(type_id, f"unknown({type_id})")
+
+
+# ---------------------------------------------------------------------------
+# Capability detection (application intents)
+# ---------------------------------------------------------------------------
+
+# Module-level cache so the app/me endpoint is hit at most once per process.
+_capability_cache: Optional[Dict[str, Any]] = None
+
+
+def _detect_capabilities(token: str, *, force: bool = False) -> Dict[str, Any]:
+    """Detect the bot's app-wide capabilities via GET /applications/@me.
+
+    Returns a dict with keys:
+
+    - ``has_members_intent``: GUILD_MEMBERS intent is enabled
+    - ``has_message_content``: MESSAGE_CONTENT intent is enabled
+    - ``detected``: detection succeeded (False means exposing everything
+      and letting runtime errors handle it)
+
+    Cached in a module-global. Pass ``force=True`` to re-fetch.
+    """
+    global _capability_cache
+    if _capability_cache is not None and not force:
+        return _capability_cache
+
+    caps: Dict[str, Any] = {
+        "has_members_intent": True,
+        "has_message_content": True,
+        "detected": False,
+    }
+
+    try:
+        app = _discord_request("GET", "/applications/@me", token, timeout=5)
+        flags = int(app.get("flags", 0) or 0)
+        caps["has_members_intent"] = bool(
+            flags & (_FLAG_GATEWAY_GUILD_MEMBERS | _FLAG_GATEWAY_GUILD_MEMBERS_LIMITED)
+        )
+        caps["has_message_content"] = bool(
+            flags & (_FLAG_GATEWAY_MESSAGE_CONTENT | _FLAG_GATEWAY_MESSAGE_CONTENT_LIMITED)
+        )
+        caps["detected"] = True
+    except Exception as exc:  # nosec — detection is best-effort
+        logger.info(
+            "Discord capability detection failed (%s); exposing all actions.", exc,
+        )
+
+    _capability_cache = caps
+    return caps
+
+
+def _reset_capability_cache() -> None:
+    """Test hook: clear the detection cache."""
+    global _capability_cache
+    _capability_cache = None
+
+
+# ---------------------------------------------------------------------------
+# Action implementations
+# ---------------------------------------------------------------------------
+
+def _list_guilds(token: str, **_kwargs: Any) -> str:
+    """List all guilds the bot is a member of."""
+    guilds = _discord_request("GET", "/users/@me/guilds", token)
+    result = []
+    for g in guilds:
+        result.append({
+            "id": g["id"],
+            "name": g["name"],
+            "icon": g.get("icon"),
+            "owner": g.get("owner", False),
+            "permissions": g.get("permissions"),
+        })
+    return json.dumps({"guilds": result, "count": len(result)})
+
+
+def _server_info(token: str, guild_id: str, **_kwargs: Any) -> str:
+    """Get detailed information about a guild."""
+    g = _discord_request("GET", f"/guilds/{guild_id}", token, params={"with_counts": "true"})
+    return json.dumps({
+        "id": g["id"],
+        "name": g["name"],
+        "description": g.get("description"),
+        "icon": g.get("icon"),
+        "owner_id": g.get("owner_id"),
+        "member_count": g.get("approximate_member_count"),
+        "online_count": g.get("approximate_presence_count"),
+        "features": g.get("features", []),
+        "premium_tier": g.get("premium_tier"),
+        "premium_subscription_count": g.get("premium_subscription_count"),
+        "verification_level": g.get("verification_level"),
+    })
+
+
+def _list_channels(token: str, guild_id: str, **_kwargs: Any) -> str:
+    """List all channels in a guild, organized by category."""
+    channels = _discord_request("GET", f"/guilds/{guild_id}/channels", token)
+
+    # Organize: categories first, then channels under each
+    categories: Dict[Optional[str], Dict[str, Any]] = {}
+    uncategorized: List[Dict[str, Any]] = []
+
+    # First pass: collect categories
+    for ch in channels:
+        if ch["type"] == 4:  # category
+            categories[ch["id"]] = {
+                "id": ch["id"],
+                "name": ch["name"],
+                "position": ch.get("position", 0),
+                "channels": [],
+            }
+
+    # Second pass: assign channels to categories
+    for ch in channels:
+        if ch["type"] == 4:
+            continue
+        entry = {
+            "id": ch["id"],
+            "name": ch.get("name", ""),
+            "type": _channel_type_name(ch["type"]),
+            "position": ch.get("position", 0),
+            "topic": ch.get("topic"),
+            "nsfw": ch.get("nsfw", False),
+        }
+        parent = ch.get("parent_id")
+        if parent and parent in categories:
+            categories[parent]["channels"].append(entry)
+        else:
+            uncategorized.append(entry)
+
+    # Sort
+    sorted_cats = sorted(categories.values(), key=lambda c: c["position"])
+    for cat in sorted_cats:
+        cat["channels"].sort(key=lambda c: c["position"])
+    uncategorized.sort(key=lambda c: c["position"])
+
+    result: List[Dict[str, Any]] = []
+    if uncategorized:
+        result.append({"category": None, "channels": uncategorized})
+    for cat in sorted_cats:
+        result.append({
+            "category": {"id": cat["id"], "name": cat["name"]},
+            "channels": cat["channels"],
+        })
+
+    total = sum(len(group["channels"]) for group in result)
+    return json.dumps({"channel_groups": result, "total_channels": total})
+
+
+def _channel_info(token: str, channel_id: str, **_kwargs: Any) -> str:
+    """Get detailed info about a specific channel."""
+    ch = _discord_request("GET", f"/channels/{channel_id}", token)
+    return json.dumps({
+        "id": ch["id"],
+        "name": ch.get("name"),
+        "type": _channel_type_name(ch["type"]),
+        "guild_id": ch.get("guild_id"),
+        "topic": ch.get("topic"),
+        "nsfw": ch.get("nsfw", False),
+        "position": ch.get("position"),
+        "parent_id": ch.get("parent_id"),
+        "rate_limit_per_user": ch.get("rate_limit_per_user", 0),
+        "last_message_id": ch.get("last_message_id"),
+    })
+
+
+def _list_roles(token: str, guild_id: str, **_kwargs: Any) -> str:
+    """List all roles in a guild."""
+    roles = _discord_request("GET", f"/guilds/{guild_id}/roles", token)
+    result = []
+    for r in sorted(roles, key=lambda r: r.get("position", 0), reverse=True):
+        result.append({
+            "id": r["id"],
+            "name": r["name"],
+            "color": f"#{r.get('color', 0):06x}" if r.get("color") else None,
+            "position": r.get("position", 0),
+            "mentionable": r.get("mentionable", False),
+            "managed": r.get("managed", False),
+            "member_count": r.get("member_count"),
+            "hoist": r.get("hoist", False),
+        })
+    return json.dumps({"roles": result, "count": len(result)})
+
+
+def _member_info(token: str, guild_id: str, user_id: str, **_kwargs: Any) -> str:
+    """Get info about a specific guild member."""
+    m = _discord_request("GET", f"/guilds/{guild_id}/members/{user_id}", token)
+    user = m.get("user", {})
+    return json.dumps({
+        "user_id": user.get("id"),
+        "username": user.get("username"),
+        "display_name": user.get("global_name"),
+        "nickname": m.get("nick"),
+        "avatar": user.get("avatar"),
+        "bot": user.get("bot", False),
+        "roles": m.get("roles", []),
+        "joined_at": m.get("joined_at"),
+        "premium_since": m.get("premium_since"),
+    })
+
+
+def _search_members(token: str, guild_id: str, query: str, limit: int = 20, **_kwargs: Any) -> str:
+    """Search for guild members by name."""
+    params = {"query": query, "limit": str(min(limit, 100))}
+    members = _discord_request("GET", f"/guilds/{guild_id}/members/search", token, params=params)
+    result = []
+    for m in members:
+        user = m.get("user", {})
+        result.append({
+            "user_id": user.get("id"),
+            "username": user.get("username"),
+            "display_name": user.get("global_name"),
+            "nickname": m.get("nick"),
+            "bot": user.get("bot", False),
+            "roles": m.get("roles", []),
+        })
+    return json.dumps({"members": result, "count": len(result)})
+
+
+def _fetch_messages(
+    token: str, channel_id: str, limit: int = 50,
+    before: Optional[str] = None, after: Optional[str] = None,
+    **_kwargs: Any,
+) -> str:
+    """Fetch recent messages from a channel."""
+    params: Dict[str, str] = {"limit": str(min(limit, 100))}
+    if before:
+        params["before"] = before
+    if after:
+        params["after"] = after
+    messages = _discord_request("GET", f"/channels/{channel_id}/messages", token, params=params)
+    result = []
+    for msg in messages:
+        author = msg.get("author", {})
+        result.append({
+            "id": msg["id"],
+            "content": msg.get("content", ""),
+            "author": {
+                "id": author.get("id"),
+                "username": author.get("username"),
+                "display_name": author.get("global_name"),
+                "bot": author.get("bot", False),
+            },
+            "timestamp": msg.get("timestamp"),
+            "edited_timestamp": msg.get("edited_timestamp"),
+            "attachments": [
+                {"filename": a.get("filename"), "url": a.get("url"), "size": a.get("size")}
+                for a in msg.get("attachments", [])
+            ],
+            "reactions": [
+                {"emoji": r.get("emoji", {}).get("name"), "count": r.get("count", 0)}
+                for r in msg.get("reactions", [])
+            ] if msg.get("reactions") else [],
+            "pinned": msg.get("pinned", False),
+        })
+    return json.dumps({"messages": result, "count": len(result)})
+
+
+def _list_pins(token: str, channel_id: str, **_kwargs: Any) -> str:
+    """List pinned messages in a channel."""
+    messages = _discord_request("GET", f"/channels/{channel_id}/pins", token)
+    result = []
+    for msg in messages:
+        author = msg.get("author", {})
+        result.append({
+            "id": msg["id"],
+            "content": msg.get("content", "")[:200],  # Truncate for overview
+            "author": author.get("username"),
+            "timestamp": msg.get("timestamp"),
+        })
+    return json.dumps({"pinned_messages": result, "count": len(result)})
+
+
+def _pin_message(token: str, channel_id: str, message_id: str, **_kwargs: Any) -> str:
+    """Pin a message in a channel."""
+    _discord_request("PUT", f"/channels/{channel_id}/pins/{message_id}", token)
+    return json.dumps({"success": True, "message": f"Message {message_id} pinned."})
+
+
+def _unpin_message(token: str, channel_id: str, message_id: str, **_kwargs: Any) -> str:
+    """Unpin a message from a channel."""
+    _discord_request("DELETE", f"/channels/{channel_id}/pins/{message_id}", token)
+    return json.dumps({"success": True, "message": f"Message {message_id} unpinned."})
+
+
+def _create_thread(
+    token: str, channel_id: str, name: str,
+    message_id: Optional[str] = None,
+    auto_archive_duration: int = 1440,
+    **_kwargs: Any,
+) -> str:
+    """Create a thread in a channel."""
+    if message_id:
+        # Create thread from an existing message
+        path = f"/channels/{channel_id}/messages/{message_id}/threads"
+        body: Dict[str, Any] = {
+            "name": name,
+            "auto_archive_duration": auto_archive_duration,
+        }
+    else:
+        # Create a standalone thread
+        path = f"/channels/{channel_id}/threads"
+        body = {
+            "name": name,
+            "auto_archive_duration": auto_archive_duration,
+            "type": 11,  # PUBLIC_THREAD
+        }
+    thread = _discord_request("POST", path, token, body=body)
+    return json.dumps({
+        "success": True,
+        "thread_id": thread["id"],
+        "name": thread.get("name"),
+    })
+
+
+def _add_role(token: str, guild_id: str, user_id: str, role_id: str, **_kwargs: Any) -> str:
+    """Add a role to a guild member."""
+    _discord_request("PUT", f"/guilds/{guild_id}/members/{user_id}/roles/{role_id}", token)
+    return json.dumps({"success": True, "message": f"Role {role_id} added to user {user_id}."})
+
+
+def _remove_role(token: str, guild_id: str, user_id: str, role_id: str, **_kwargs: Any) -> str:
+    """Remove a role from a guild member."""
+    _discord_request("DELETE", f"/guilds/{guild_id}/members/{user_id}/roles/{role_id}", token)
+    return json.dumps({"success": True, "message": f"Role {role_id} removed from user {user_id}."})
+
+
+# ---------------------------------------------------------------------------
+# Action dispatch + metadata
+# ---------------------------------------------------------------------------
+
+_ACTIONS = {
+    "list_guilds": _list_guilds,
+    "server_info": _server_info,
+    "list_channels": _list_channels,
+    "channel_info": _channel_info,
+    "list_roles": _list_roles,
+    "member_info": _member_info,
+    "search_members": _search_members,
+    "fetch_messages": _fetch_messages,
+    "list_pins": _list_pins,
+    "pin_message": _pin_message,
+    "unpin_message": _unpin_message,
+    "create_thread": _create_thread,
+    "add_role": _add_role,
+    "remove_role": _remove_role,
+}
+
+# Single-source-of-truth manifest: action → (signature, one-line description).
+# Consumed by :func:`_build_schema` so the schema's top-level description
+# always matches the registered action set.
+_ACTION_MANIFEST: List[Tuple[str, str, str]] = [
+    ("list_guilds", "()", "list servers the bot is in"),
+    ("server_info", "(guild_id)", "server details + member counts"),
+    ("list_channels", "(guild_id)", "all channels grouped by category"),
+    ("channel_info", "(channel_id)", "single channel details"),
+    ("list_roles", "(guild_id)", "roles sorted by position"),
+    ("member_info", "(guild_id, user_id)", "lookup a specific member"),
+    ("search_members", "(guild_id, query)", "find members by name prefix"),
+    ("fetch_messages", "(channel_id)", "recent messages; optional before/after snowflakes"),
+    ("list_pins", "(channel_id)", "pinned messages in a channel"),
+    ("pin_message", "(channel_id, message_id)", "pin a message"),
+    ("unpin_message", "(channel_id, message_id)", "unpin a message"),
+    ("create_thread", "(channel_id, name)", "create a public thread; optional message_id anchor"),
+    ("add_role", "(guild_id, user_id, role_id)", "assign a role"),
+    ("remove_role", "(guild_id, user_id, role_id)", "remove a role"),
+]
+
+# Actions that require the GUILD_MEMBERS privileged intent.
+_INTENT_GATED_MEMBERS = frozenset({"member_info", "search_members"})
+
+# Per-action required params for runtime validation.
+_REQUIRED_PARAMS: Dict[str, List[str]] = {
+    "server_info": ["guild_id"],
+    "list_channels": ["guild_id"],
+    "list_roles": ["guild_id"],
+    "member_info": ["guild_id", "user_id"],
+    "search_members": ["guild_id", "query"],
+    "channel_info": ["channel_id"],
+    "fetch_messages": ["channel_id"],
+    "list_pins": ["channel_id"],
+    "pin_message": ["channel_id", "message_id"],
+    "unpin_message": ["channel_id", "message_id"],
+    "create_thread": ["channel_id", "name"],
+    "add_role": ["guild_id", "user_id", "role_id"],
+    "remove_role": ["guild_id", "user_id", "role_id"],
+}
+
+
+# ---------------------------------------------------------------------------
+# Config-based action allowlist
+# ---------------------------------------------------------------------------
+
+def _load_allowed_actions_config() -> Optional[List[str]]:
+    """Read ``discord.server_actions`` from user config.
+
+    Returns a list of allowed action names, or ``None`` if the user
+    hasn't restricted the set (default: all actions allowed).
+
+    Accepts either a comma-separated string or a YAML list.
+    Unknown action names are dropped with a log warning.
+    """
+    try:
+        from hermes_cli.config import load_config
+        cfg = load_config()
+    except Exception as exc:
+        logger.debug("discord_server: could not load config (%s); allowing all actions.", exc)
+        return None
+
+    raw = (cfg.get("discord") or {}).get("server_actions")
+    if raw is None or raw == "":
+        return None
+
+    if isinstance(raw, str):
+        names = [n.strip() for n in raw.split(",") if n.strip()]
+    elif isinstance(raw, (list, tuple)):
+        names = [str(n).strip() for n in raw if str(n).strip()]
+    else:
+        logger.warning(
+            "discord.server_actions: unexpected type %s; ignoring.", type(raw).__name__,
+        )
+        return None
+
+    valid = [n for n in names if n in _ACTIONS]
+    invalid = [n for n in names if n not in _ACTIONS]
+    if invalid:
+        logger.warning(
+            "discord.server_actions: unknown action(s) ignored: %s. "
+            "Known: %s",
+            ", ".join(invalid), ", ".join(_ACTIONS.keys()),
+        )
+    return valid
+
+
+def _available_actions(
+    caps: Dict[str, Any],
+    allowlist: Optional[List[str]],
+) -> List[str]:
+    """Compute the visible action list from intents + config allowlist.
+
+    Preserves the canonical order from :data:`_ACTIONS`.
+    """
+    actions: List[str] = []
+    for name in _ACTIONS:
+        # Intent filter
+        if not caps.get("has_members_intent", True) and name in _INTENT_GATED_MEMBERS:
+            continue
+        # Config allowlist filter
+        if allowlist is not None and name not in allowlist:
+            continue
+        actions.append(name)
+    return actions
+
+
+# ---------------------------------------------------------------------------
+# Schema construction
+# ---------------------------------------------------------------------------
+
+def _build_schema(
+    actions: List[str],
+    caps: Optional[Dict[str, Any]] = None,
+) -> Dict[str, Any]:
+    """Build the tool schema for the given filtered action list."""
+    caps = caps or {}
+    if not actions:
+        # Tool shouldn't be registered when empty, but guard anyway.
+        actions = list(_ACTIONS.keys())
+
+    # Action manifest lines (action-first, parameter-scoped).
+    manifest_lines = [
+        f"  {name}{sig}  — {desc}"
+        for name, sig, desc in _ACTION_MANIFEST
+        if name in actions
+    ]
+    manifest_block = "\n".join(manifest_lines)
+
+    content_note = ""
+    if caps.get("detected") and caps.get("has_message_content") is False:
+        content_note = (
+            "\n\nNOTE: Bot does NOT have the MESSAGE_CONTENT privileged intent. "
+            "fetch_messages and list_pins will return message metadata (author, "
+            "timestamps, attachments, reactions, pin state) but `content` will be "
+            "empty for messages not sent as a direct mention to the bot or in DMs. "
+            "Enable the intent in the Discord Developer Portal to see all content."
+        )
+
+    description = (
+        "Query and manage a Discord server via the REST API.\n\n"
+        "Available actions:\n"
+        f"{manifest_block}\n\n"
+        "Call list_guilds first to discover guild_ids, then list_channels for "
+        "channel_ids. Runtime errors will tell you if the bot lacks a specific "
+        "per-guild permission (e.g. MANAGE_ROLES for add_role)."
+        f"{content_note}"
+    )
+
+    properties: Dict[str, Any] = {
+        "action": {
+            "type": "string",
+            "enum": actions,
+        },
+        "guild_id": {
+            "type": "string",
+            "description": "Discord server (guild) ID.",
+        },
+        "channel_id": {
+            "type": "string",
+            "description": "Discord channel ID.",
+        },
+        "user_id": {
+            "type": "string",
+            "description": "Discord user ID.",
+        },
+        "role_id": {
+            "type": "string",
+            "description": "Discord role ID.",
+        },
+        "message_id": {
+            "type": "string",
+            "description": "Discord message ID.",
+        },
+        "query": {
+            "type": "string",
+            "description": "Member name prefix to search for (search_members).",
+        },
+        "name": {
+            "type": "string",
+            "description": "New thread name (create_thread).",
+        },
+        "limit": {
+            "type": "integer",
+            "minimum": 1,
+            "maximum": 100,
+            "description": "Max results (default 50). Applies to fetch_messages, search_members.",
+        },
+        "before": {
+            "type": "string",
+            "description": "Snowflake ID for reverse pagination (fetch_messages).",
+        },
+        "after": {
+            "type": "string",
+            "description": "Snowflake ID for forward pagination (fetch_messages).",
+        },
+        "auto_archive_duration": {
+            "type": "integer",
+            "enum": [60, 1440, 4320, 10080],
+            "description": "Thread archive duration in minutes (create_thread, default 1440).",
+        },
+    }
+
+    return {
+        "name": "discord_server",
+        "description": description,
+        "parameters": {
+            "type": "object",
+            "properties": properties,
+            "required": ["action"],
+        },
+    }
+
+
+def get_dynamic_schema() -> Optional[Dict[str, Any]]:
+    """Return a schema filtered by current intents + config allowlist.
+
+    Called by ``model_tools.get_tool_definitions`` as a post-processing
+    step so the schema the model sees always reflects reality. Returns
+    ``None`` when no actions are available (tool should be removed from
+    the schema list entirely).
+    """
+    token = _get_bot_token()
+    if not token:
+        return None
+
+    caps = _detect_capabilities(token)
+    allowlist = _load_allowed_actions_config()
+    actions = _available_actions(caps, allowlist)
+    if not actions:
+        logger.warning(
+            "discord_server: config allowlist/intents left zero available actions; "
+            "hiding tool from this session."
+        )
+        return None
+    return _build_schema(actions, caps)
+
+
+# ---------------------------------------------------------------------------
+# 403 error enrichment
+# ---------------------------------------------------------------------------
+
+_ACTION_403_HINT = {
+    "pin_message": (
+        "Bot lacks MANAGE_MESSAGES permission in this channel. "
+        "Ask the server admin to grant the bot a role that has MANAGE_MESSAGES, "
+        "or a per-channel overwrite."
+    ),
+    "unpin_message": (
+        "Bot lacks MANAGE_MESSAGES permission in this channel."
+    ),
+    "create_thread": (
+        "Bot lacks CREATE_PUBLIC_THREADS in this channel, or cannot view it."
+    ),
+    "add_role": (
+        "Either the bot lacks MANAGE_ROLES, or the target role sits higher "
+        "than the bot's highest role. Roles can only be assigned below the "
+        "bot's own position in the role hierarchy."
+    ),
+    "remove_role": (
+        "Either the bot lacks MANAGE_ROLES, or the target role sits higher "
+        "than the bot's highest role."
+    ),
+    "fetch_messages": (
+        "Bot cannot view this channel (missing VIEW_CHANNEL or READ_MESSAGE_HISTORY)."
+    ),
+    "list_pins": (
+        "Bot cannot view this channel (missing VIEW_CHANNEL or READ_MESSAGE_HISTORY)."
+    ),
+    "channel_info": (
+        "Bot cannot view this channel (missing VIEW_CHANNEL)."
+    ),
+    "search_members": (
+        "Likely missing the Server Members privileged intent — enable it in the "
+        "Discord Developer Portal under your bot's settings."
+    ),
+    "member_info": (
+        "Bot cannot see this guild member (missing Server Members intent or "
+        "insufficient permissions)."
+    ),
+}
+
+
+def _enrich_403(action: str, body: str) -> str:
+    """Return a user-friendly guidance string for a 403 on ``action``."""
+    hint = _ACTION_403_HINT.get(action)
+    base = f"Discord API 403 (forbidden) on '{action}'."
+    if hint:
+        return f"{base} {hint} (Raw: {body})"
+    return f"{base} (Raw: {body})"
+
+
+# ---------------------------------------------------------------------------
+# Check function
+# ---------------------------------------------------------------------------
+
+def check_discord_tool_requirements() -> bool:
+    """Tool is available only when a Discord bot token is configured."""
+    return bool(_get_bot_token())
+
+
+# ---------------------------------------------------------------------------
+# Main handler
+# ---------------------------------------------------------------------------
+
+def discord_server(
+    action: str,
+    guild_id: str = "",
+    channel_id: str = "",
+    user_id: str = "",
+    role_id: str = "",
+    message_id: str = "",
+    query: str = "",
+    name: str = "",
+    limit: int = 50,
+    before: str = "",
+    after: str = "",
+    auto_archive_duration: int = 1440,
+    task_id: str = None,
+) -> str:
+    """Execute a Discord server action."""
+    token = _get_bot_token()
+    if not token:
+        return json.dumps({"error": "DISCORD_BOT_TOKEN not configured."})
+
+    action_fn = _ACTIONS.get(action)
+    if not action_fn:
+        return json.dumps({
+            "error": f"Unknown action: {action}",
+            "available_actions": list(_ACTIONS.keys()),
+        })
+
+    # Config-level allowlist gate (defense in depth — schema already filtered,
+    # but a stale cached schema from a prior config should not let denied
+    # actions through).
+    allowlist = _load_allowed_actions_config()
+    if allowlist is not None and action not in allowlist:
+        return json.dumps({
+            "error": (
+                f"Action '{action}' is disabled by config (discord.server_actions). "
+                f"Allowed: {', '.join(allowlist) if allowlist else '<none>'}"
+            ),
+        })
+
+    local_vars = {
+        "guild_id": guild_id,
+        "channel_id": channel_id,
+        "user_id": user_id,
+        "role_id": role_id,
+        "message_id": message_id,
+        "query": query,
+        "name": name,
+    }
+
+    missing = [p for p in _REQUIRED_PARAMS.get(action, []) if not local_vars.get(p)]
+    if missing:
+        return json.dumps({
+            "error": f"Missing required parameters for '{action}': {', '.join(missing)}",
+        })
+
+    try:
+        return action_fn(
+            token=token,
+            guild_id=guild_id,
+            channel_id=channel_id,
+            user_id=user_id,
+            role_id=role_id,
+            message_id=message_id,
+            query=query,
+            name=name,
+            limit=limit,
+            before=before,
+            after=after,
+            auto_archive_duration=auto_archive_duration,
+        )
+    except DiscordAPIError as e:
+        logger.warning("Discord API error in action '%s': %s", action, e)
+        if e.status == 403:
+            return json.dumps({"error": _enrich_403(action, e.body)})
+        return json.dumps({"error": str(e)})
+    except Exception as e:
+        logger.exception("Unexpected error in discord_server action '%s'", action)
+        return json.dumps({"error": f"Unexpected error: {e}"})
+
+
+# ---------------------------------------------------------------------------
+# Tool registration
+# ---------------------------------------------------------------------------
+
+# Register with the full unfiltered schema. ``model_tools.get_tool_definitions``
+# rebuilds this per-session via ``get_dynamic_schema`` so the model only ever
+# sees intent-available, config-allowed actions. The static registration is a
+# safe baseline for tools that inspect the registry directly.
+_STATIC_SCHEMA = _build_schema(list(_ACTIONS.keys()), caps={"detected": False})
+
+registry.register(
+    name="discord_server",
+    toolset="discord",
+    schema=_STATIC_SCHEMA,
+    handler=lambda args, **kw: discord_server(
+        action=args.get("action", ""),
+        guild_id=args.get("guild_id", ""),
+        channel_id=args.get("channel_id", ""),
+        user_id=args.get("user_id", ""),
+        role_id=args.get("role_id", ""),
+        message_id=args.get("message_id", ""),
+        query=args.get("query", ""),
+        name=args.get("name", ""),
+        limit=args.get("limit", 50),
+        before=args.get("before", ""),
+        after=args.get("after", ""),
+        auto_archive_duration=args.get("auto_archive_duration", 1440),
+        task_id=kw.get("task_id"),
+    ),
+    check_fn=check_discord_tool_requirements,
+    requires_env=["DISCORD_BOT_TOKEN"],
+)
diff --git a/tools/env_passthrough.py b/tools/env_passthrough.py
index b4686cb13f..07bf333a60 100644
--- a/tools/env_passthrough.py
+++ b/tools/env_passthrough.py
@@ -44,16 +44,59 @@ def _get_allowed() -> set[str]:
 _config_passthrough: frozenset[str] | None = None
 
 
+def _is_hermes_provider_credential(name: str) -> bool:
+    """True if ``name`` is a Hermes-managed provider credential (API key,
+    token, or similar) per ``_HERMES_PROVIDER_ENV_BLOCKLIST``.
+
+    Skill-declared ``required_environment_variables`` frontmatter must
+    not be able to override this list — that was the bypass in
+    GHSA-rhgp-j443-p4rf where a malicious skill registered
+    ``ANTHROPIC_TOKEN`` / ``OPENAI_API_KEY`` as passthrough and received
+    the credential in the ``execute_code`` child process, defeating the
+    sandbox's scrubbing guarantee.
+
+    Non-Hermes API keys (TENOR_API_KEY, NOTION_TOKEN, etc.) are NOT
+    in the blocklist and remain legitimately registerable — skills that
+    wrap third-party APIs still work.
+    """
+    try:
+        from tools.environments.local import _HERMES_PROVIDER_ENV_BLOCKLIST
+    except Exception:
+        return False
+    return name in _HERMES_PROVIDER_ENV_BLOCKLIST
+
+
 def register_env_passthrough(var_names: Iterable[str]) -> None:
     """Register environment variable names as allowed in sandboxed environments.
 
     Typically called when a skill declares ``required_environment_variables``.
+
+    Variables that are Hermes-managed provider credentials (from
+    ``_HERMES_PROVIDER_ENV_BLOCKLIST``) are rejected here to preserve
+    the ``execute_code`` sandbox's credential-scrubbing guarantee per
+    GHSA-rhgp-j443-p4rf. A skill that needs to talk to a Hermes-managed
+    provider should do so via the agent's main-process tools (web_search,
+    web_extract, etc.) where the credential remains safely in the main
+    process.
+
+    Non-Hermes third-party API keys (TENOR_API_KEY, NOTION_TOKEN, etc.)
+    pass through normally — they were never in the sandbox scrub list.
     """
     for name in var_names:
         name = name.strip()
-        if name:
-            _get_allowed().add(name)
-            logger.debug("env passthrough: registered %s", name)
+        if not name:
+            continue
+        if _is_hermes_provider_credential(name):
+            logger.warning(
+                "env passthrough: refusing to register Hermes provider "
+                "credential %r (blocked by _HERMES_PROVIDER_ENV_BLOCKLIST). "
+                "Skills must not override the execute_code sandbox's "
+                "credential scrubbing; see GHSA-rhgp-j443-p4rf.",
+                name,
+            )
+            continue
+        _get_allowed().add(name)
+        logger.debug("env passthrough: registered %s", name)
 
 
 def _load_config_passthrough() -> frozenset[str]:
diff --git a/tools/environments/base.py b/tools/environments/base.py
index 1bc08449e4..19a637901a 100644
--- a/tools/environments/base.py
+++ b/tools/environments/base.py
@@ -6,9 +6,11 @@ re-sourced before each command. CWD persists via in-band stdout markers (remote)
 or a temp file (local).
 """
 
+import codecs
 import json
 import logging
 import os
+import select
 import shlex
 import subprocess
 import threading
@@ -436,17 +438,67 @@ class BaseEnvironment(ABC):
         """
         output_chunks: list[str] = []
 
+        # Non-blocking drain via select().
+        #
+        # The old pattern — ``for line in proc.stdout`` — blocks on
+        # ``readline()`` until the pipe reaches EOF.  When the user's command
+        # backgrounds a process (``cmd &``, ``setsid cmd & disown``, etc.),
+        # that backgrounded grandchild inherits the write-end of our stdout
+        # pipe via ``fork()``.  Even after ``bash`` itself exits, the pipe
+        # stays open because the grandchild still holds it — so the drain
+        # thread never returns and the tool hangs for the full lifetime of
+        # the grandchild (issue #8340: users reported indefinite hangs when
+        # restarting uvicorn with ``setsid ... & disown``).
+        #
+        # The fix: select() with a short poll interval, and stop draining
+        # shortly after ``bash`` exits even if the pipe hasn't EOF'd yet.
+        # Any output the grandchild writes after that point goes to an
+        # orphaned pipe (harmless — the kernel reaps it when our end closes).
+        #
+        # Decoding: we ``os.read()`` raw bytes in fixed-size chunks (4096)
+        # so a single multibyte UTF-8 character can split across reads.  An
+        # incremental decoder buffers partial sequences across chunks, and
+        # ``errors="replace"`` mirrors the baseline ``TextIOWrapper`` (which
+        # was constructed with ``encoding="utf-8", errors="replace"`` on
+        # ``Popen``) so binary or mis-encoded output is preserved with
+        # U+FFFD substitution rather than clobbering the whole buffer.
+        decoder = codecs.getincrementaldecoder("utf-8")(errors="replace")
+
         def _drain():
+            fd = proc.stdout.fileno()
+            idle_after_exit = 0
             try:
-                for line in proc.stdout:
-                    output_chunks.append(line)
-            except UnicodeDecodeError:
-                output_chunks.clear()
-                output_chunks.append(
-                    "[binary output detected — raw bytes not displayable]"
-                )
-            except (ValueError, OSError):
-                pass
+                while True:
+                    try:
+                        ready, _, _ = select.select([fd], [], [], 0.1)
+                    except (ValueError, OSError):
+                        break  # fd already closed
+                    if ready:
+                        try:
+                            chunk = os.read(fd, 4096)
+                        except (ValueError, OSError):
+                            break
+                        if not chunk:
+                            break  # true EOF — all writers closed
+                        output_chunks.append(decoder.decode(chunk))
+                        idle_after_exit = 0
+                    elif proc.poll() is not None:
+                        # bash is gone and the pipe was idle for ~100ms.  Give
+                        # it two more cycles to catch any buffered tail, then
+                        # stop — otherwise we wait forever on a grandchild pipe.
+                        idle_after_exit += 1
+                        if idle_after_exit >= 3:
+                            break
+            finally:
+                # Flush any bytes buffered mid-sequence.  With ``errors="replace"``
+                # this emits U+FFFD for any final incomplete sequence rather than
+                # raising.
+                try:
+                    tail = decoder.decode(b"", final=True)
+                    if tail:
+                        output_chunks.append(tail)
+                except Exception:
+                    pass
 
         drain_thread = threading.Thread(target=_drain, daemon=True)
         drain_thread.start()
@@ -553,7 +605,10 @@ class BaseEnvironment(ABC):
                 pass  # cleanup is best-effort
             raise
 
-        drain_thread.join(timeout=5)
+        # Drain thread now exits promptly after bash does (~300ms idle
+        # check).  A short join is enough; a long one would be a bug since
+        # it means the non-blocking loop itself stopped cooperating.
+        drain_thread.join(timeout=2)
 
         try:
             proc.stdout.close()
@@ -650,6 +705,13 @@ class BaseEnvironment(ABC):
         self._before_execute()
 
         exec_command, sudo_stdin = self._prepare_command(command)
+        # Guard against the `A && B &` subshell-wait trap: bash forks a
+        # subshell for the compound that then waits for an infinite B (a
+        # server, `yes > /dev/null`, etc.), leaking the subshell forever.
+        # Rewriting to `A && { B & }` runs B as a plain background in the
+        # current shell — no subshell wait.
+        from tools.terminal_tool import _rewrite_compound_background
+        exec_command = _rewrite_compound_background(exec_command)
         effective_timeout = timeout or self.timeout
         effective_cwd = cwd or self.cwd
 
diff --git a/tools/environments/local.py b/tools/environments/local.py
index a1ab676d30..06fd66a2d0 100644
--- a/tools/environments/local.py
+++ b/tools/environments/local.py
@@ -213,6 +213,77 @@ def _make_run_env(env: dict) -> dict:
     return run_env
 
 
+def _read_terminal_shell_init_config() -> tuple[list[str], bool]:
+    """Return (shell_init_files, auto_source_bashrc) from config.yaml.
+
+    Best-effort — returns sensible defaults on any failure so terminal
+    execution never breaks because the config file is unreadable.
+    """
+    try:
+        from hermes_cli.config import load_config
+
+        cfg = load_config() or {}
+        terminal_cfg = cfg.get("terminal") or {}
+        files = terminal_cfg.get("shell_init_files") or []
+        if not isinstance(files, list):
+            files = []
+        auto_bashrc = bool(terminal_cfg.get("auto_source_bashrc", True))
+        return [str(f) for f in files if f], auto_bashrc
+    except Exception:
+        return [], True
+
+
+def _resolve_shell_init_files() -> list[str]:
+    """Resolve the list of files to source before the login-shell snapshot.
+
+    Expands ``~`` and ``${VAR}`` references and drops anything that doesn't
+    exist on disk, so a missing ``~/.bashrc`` never breaks the snapshot.
+    The ``auto_source_bashrc`` path runs only when the user hasn't supplied
+    an explicit list — once they have, Hermes trusts them.
+    """
+    explicit, auto_bashrc = _read_terminal_shell_init_config()
+
+    candidates: list[str] = []
+    if explicit:
+        candidates.extend(explicit)
+    elif auto_bashrc and not _IS_WINDOWS:
+        # Bash's login-shell invocation does NOT source ~/.bashrc by default,
+        # so tools like nvm / asdf / pyenv that self-install there stay
+        # invisible to the snapshot without this nudge.
+        candidates.append("~/.bashrc")
+
+    resolved: list[str] = []
+    for raw in candidates:
+        try:
+            path = os.path.expandvars(os.path.expanduser(raw))
+        except Exception:
+            continue
+        if path and os.path.isfile(path):
+            resolved.append(path)
+    return resolved
+
+
+def _prepend_shell_init(cmd_string: str, files: list[str]) -> str:
+    """Prepend ``source <file>`` lines (guarded + silent) to a bash script.
+
+    Each file is wrapped so a failing rc file doesn't abort the whole
+    bootstrap: ``set +e`` keeps going on errors, ``2>/dev/null`` hides
+    noisy prompts, and ``|| true`` neutralises the exit status.
+    """
+    if not files:
+        return cmd_string
+
+    prelude_parts = ["set +e"]
+    for path in files:
+        # shlex.quote isn't available here without an import; the files list
+        # comes from os.path.expanduser output so it's a concrete absolute
+        # path.  Escape single quotes defensively anyway.
+        safe = path.replace("'", "'\\''")
+        prelude_parts.append(f"[ -r '{safe}' ] && . '{safe}' 2>/dev/null || true")
+    prelude = "\n".join(prelude_parts) + "\n"
+    return prelude + cmd_string
+
+
 class LocalEnvironment(BaseEnvironment):
     """Run commands directly on the host machine.
 
@@ -255,6 +326,16 @@ class LocalEnvironment(BaseEnvironment):
                   timeout: int = 120,
                   stdin_data: str | None = None) -> subprocess.Popen:
         bash = _find_bash()
+        # For login-shell invocations (used by init_session to build the
+        # environment snapshot), prepend sources for the user's bashrc /
+        # custom init files so tools registered outside bash_profile
+        # (nvm, asdf, pyenv, …) end up on PATH in the captured snapshot.
+        # Non-login invocations are already sourcing the snapshot and
+        # don't need this.
+        if login:
+            init_files = _resolve_shell_init_files()
+            if init_files:
+                cmd_string = _prepend_shell_init(cmd_string, init_files)
         args = [bash, "-l", "-c", cmd_string] if login else [bash, "-c", cmd_string]
         run_env = _make_run_env(self.env)
 
diff --git a/tools/environments/ssh.py b/tools/environments/ssh.py
index 568112b2c8..f2f27659c5 100644
--- a/tools/environments/ssh.py
+++ b/tools/environments/ssh.py
@@ -1,5 +1,6 @@
 """SSH remote execution environment with ControlMaster connection persistence."""
 
+import hashlib
 import logging
 import os
 import shlex
@@ -47,7 +48,18 @@ class SSHEnvironment(BaseEnvironment):
 
         self.control_dir = Path(tempfile.gettempdir()) / "hermes-ssh"
         self.control_dir.mkdir(parents=True, exist_ok=True)
-        self.control_socket = self.control_dir / f"{user}@{host}:{port}.sock"
+        # Keep the socket filename short and deterministic so the full path
+        # stays under the 104-byte sun_path limit that macOS enforces on
+        # Unix domain sockets. A raw ``user@host:port`` — especially with an
+        # IPv6 host — plus the 16-byte random suffix SSH appends in
+        # ControlMaster mode easily exceeds the limit under macOS's
+        # deeply-nested $TMPDIR (e.g. /var/folders/xx/yy/T/). Hashing the
+        # triple keeps the path stable across reconnects so ControlMaster
+        # reuse still works.
+        _socket_id = hashlib.sha256(
+            f"{user}@{host}:{port}".encode()
+        ).hexdigest()[:16]
+        self.control_socket = self.control_dir / f"{_socket_id}.sock"
         _ensure_ssh_available()
         self._establish_connection()
         self._remote_home = self._detect_remote_home()
diff --git a/tools/file_operations.py b/tools/file_operations.py
index 4550e9a2a8..7e75578b2b 100644
--- a/tools/file_operations.py
+++ b/tools/file_operations.py
@@ -35,6 +35,13 @@ from pathlib import Path
 from hermes_constants import get_hermes_home
 from tools.binary_extensions import BINARY_EXTENSIONS
 
+from agent.file_safety import (
+    build_write_denied_paths,
+    build_write_denied_prefixes,
+    get_safe_write_root as _shared_get_safe_write_root,
+    is_write_denied as _shared_is_write_denied,
+)
+
 
 # ---------------------------------------------------------------------------
 # Write-path deny list — blocks writes to sensitive system/credential files
@@ -42,41 +49,9 @@ from tools.binary_extensions import BINARY_EXTENSIONS
 
 _HOME = str(Path.home())
 
-WRITE_DENIED_PATHS = {
-    os.path.realpath(p) for p in [
-        os.path.join(_HOME, ".ssh", "authorized_keys"),
-        os.path.join(_HOME, ".ssh", "id_rsa"),
-        os.path.join(_HOME, ".ssh", "id_ed25519"),
-        os.path.join(_HOME, ".ssh", "config"),
-        str(get_hermes_home() / ".env"),
-        os.path.join(_HOME, ".bashrc"),
-        os.path.join(_HOME, ".zshrc"),
-        os.path.join(_HOME, ".profile"),
-        os.path.join(_HOME, ".bash_profile"),
-        os.path.join(_HOME, ".zprofile"),
-        os.path.join(_HOME, ".netrc"),
-        os.path.join(_HOME, ".pgpass"),
-        os.path.join(_HOME, ".npmrc"),
-        os.path.join(_HOME, ".pypirc"),
-        "/etc/sudoers",
-        "/etc/passwd",
-        "/etc/shadow",
-    ]
-}
+WRITE_DENIED_PATHS = build_write_denied_paths(_HOME)
 
-WRITE_DENIED_PREFIXES = [
-    os.path.realpath(p) + os.sep for p in [
-        os.path.join(_HOME, ".ssh"),
-        os.path.join(_HOME, ".aws"),
-        os.path.join(_HOME, ".gnupg"),
-        os.path.join(_HOME, ".kube"),
-        "/etc/sudoers.d",
-        "/etc/systemd",
-        os.path.join(_HOME, ".docker"),
-        os.path.join(_HOME, ".azure"),
-        os.path.join(_HOME, ".config", "gh"),
-    ]
-]
+WRITE_DENIED_PREFIXES = build_write_denied_prefixes(_HOME)
 
 
 def _get_safe_write_root() -> Optional[str]:
@@ -87,33 +62,12 @@ def _get_safe_write_root() -> Optional[str]:
     not on the static deny list.  Opt-in hardening for gateway/messaging
     deployments that should only touch a workspace checkout.
     """
-    root = os.getenv("HERMES_WRITE_SAFE_ROOT", "")
-    if not root:
-        return None
-    try:
-        return os.path.realpath(os.path.expanduser(root))
-    except Exception:
-        return None
+    return _shared_get_safe_write_root()
 
 
 def _is_write_denied(path: str) -> bool:
     """Return True if path is on the write deny list."""
-    resolved = os.path.realpath(os.path.expanduser(str(path)))
-
-    # 1) Static deny list
-    if resolved in WRITE_DENIED_PATHS:
-        return True
-    for prefix in WRITE_DENIED_PREFIXES:
-        if resolved.startswith(prefix):
-            return True
-
-    # 2) Optional safe-root sandbox
-    safe_root = _get_safe_write_root()
-    if safe_root:
-        if not (resolved == safe_root or resolved.startswith(safe_root + os.sep)):
-            return True
-
-    return False
+    return _shared_is_write_denied(path)
 
 
 # =============================================================================
@@ -317,6 +271,40 @@ LINTERS = {
 MAX_LINES = 2000
 MAX_LINE_LENGTH = 2000
 MAX_FILE_SIZE = 50 * 1024  # 50KB
+DEFAULT_READ_OFFSET = 1
+DEFAULT_READ_LIMIT = 500
+DEFAULT_SEARCH_OFFSET = 0
+DEFAULT_SEARCH_LIMIT = 50
+
+
+def _coerce_int(value: Any, default: int) -> int:
+    """Best-effort integer coercion for tool pagination inputs."""
+    try:
+        return int(value)
+    except (TypeError, ValueError):
+        return default
+
+
+def normalize_read_pagination(offset: Any = DEFAULT_READ_OFFSET,
+                              limit: Any = DEFAULT_READ_LIMIT) -> tuple[int, int]:
+    """Return safe read_file pagination bounds.
+
+    Tool schemas declare minimum/maximum values, but not every caller or
+    provider enforces schemas before dispatch. Clamp here so invalid values
+    cannot leak into sed ranges like ``0,-1p``.
+    """
+    normalized_offset = max(1, _coerce_int(offset, DEFAULT_READ_OFFSET))
+    normalized_limit = _coerce_int(limit, DEFAULT_READ_LIMIT)
+    normalized_limit = max(1, min(normalized_limit, MAX_LINES))
+    return normalized_offset, normalized_limit
+
+
+def normalize_search_pagination(offset: Any = DEFAULT_SEARCH_OFFSET,
+                                limit: Any = DEFAULT_SEARCH_LIMIT) -> tuple[int, int]:
+    """Return safe search pagination bounds for shell head/tail pipelines."""
+    normalized_offset = max(0, _coerce_int(offset, DEFAULT_SEARCH_OFFSET))
+    normalized_limit = max(1, _coerce_int(limit, DEFAULT_SEARCH_LIMIT))
+    return normalized_offset, normalized_limit
 
 
 class ShellFileOperations(FileOperations):
@@ -507,8 +495,7 @@ class ShellFileOperations(FileOperations):
         # Expand ~ and other shell paths
         path = self._expand_path(path)
         
-        # Clamp limit
-        limit = min(limit, MAX_LINES)
+        offset, limit = normalize_read_pagination(offset, limit)
         
         # Check if file exists and get size (wc -c is POSIX, works on Linux + macOS)
         stat_cmd = f"wc -c < {self._escape_shell_arg(path)} 2>/dev/null"
@@ -784,17 +771,36 @@ class ShellFileOperations(FileOperations):
             content, old_string, new_string, replace_all
         )
         
-        if error:
-            return PatchResult(error=error)
-        
-        if match_count == 0:
-            return PatchResult(error=f"Could not find match for old_string in {path}")
-        
+        if error or match_count == 0:
+            err_msg = error or f"Could not find match for old_string in {path}"
+            try:
+                from tools.fuzzy_match import format_no_match_hint
+                err_msg += format_no_match_hint(err_msg, match_count, old_string, content)
+            except Exception:
+                pass
+            return PatchResult(error=err_msg)
         # Write back
         write_result = self.write_file(path, new_content)
         if write_result.error:
             return PatchResult(error=f"Failed to write changes: {write_result.error}")
-        
+
+        # Post-write verification — re-read the file and confirm the bytes we
+        # intended to write actually landed. Catches silent persistence
+        # failures (backend FS oddities, race with another task, truncated
+        # pipe, etc.) that would otherwise return success-with-diff while the
+        # file is unchanged on disk.
+        verify_cmd = f"cat {self._escape_shell_arg(path)} 2>/dev/null"
+        verify_result = self._exec(verify_cmd)
+        if verify_result.exit_code != 0:
+            return PatchResult(error=f"Post-write verification failed: could not re-read {path}")
+        if verify_result.stdout != new_content:
+            return PatchResult(error=(
+                f"Post-write verification failed for {path}: on-disk content "
+                f"differs from intended write "
+                f"(wrote {len(new_content)} chars, read back {len(verify_result.stdout)}). "
+                "The patch did not persist. Re-read the file and try again."
+            ))
+
         # Generate diff
         diff = self._unified_diff(content, new_content, path)
         
@@ -893,6 +899,8 @@ class ShellFileOperations(FileOperations):
         Returns:
             SearchResult with matches or file list
         """
+        offset, limit = normalize_search_pagination(offset, limit)
+
         # Expand ~ and other shell paths
         path = self._expand_path(path)
         
diff --git a/tools/file_state.py b/tools/file_state.py
new file mode 100644
index 0000000000..f22a966e1d
--- /dev/null
+++ b/tools/file_state.py
@@ -0,0 +1,332 @@
+"""Cross-agent file state coordination.
+
+Prevents mangled edits when concurrent subagents (same process, same
+filesystem) touch the same file. Complements the single-agent path-overlap
+check in ``run_agent._should_parallelize_tool_batch`` — this module catches
+the case where subagent B writes a file that subagent A already read, so
+A's next write would overwrite B's changes with stale content.
+
+Design
+------
+A process-wide singleton ``FileStateRegistry`` tracks, per resolved path:
+
+  * per-agent read stamps: {task_id: {path: (mtime, read_ts, partial)}}
+  * last writer globally: {path: (task_id, write_ts)}
+  * per-path ``threading.Lock`` for read→modify→write critical sections
+
+Three public hooks are used by the file tools:
+
+  * ``record_read(task_id, path, *, partial)`` — called by read_file
+  * ``note_write(task_id, path)`` — called after write_file / patch
+  * ``check_stale(task_id, path)`` — called BEFORE write_file / patch
+
+Plus ``lock_path(path)`` — a context-manager returning a per-path lock to
+wrap the whole read→modify→write block. And ``writes_since(task_id,
+since_ts, paths)`` for the subagent-completion reminder in delegate_tool.
+
+All methods are no-ops when ``HERMES_DISABLE_FILE_STATE_GUARD=1`` is set.
+
+This module is intentionally separate from ``_read_tracker`` in
+``file_tools.py`` — that tracker is per-task and handles consecutive-read
+loop detection, which is a different concern.
+"""
+from __future__ import annotations
+
+import os
+import threading
+import time
+from collections import defaultdict
+from contextlib import contextmanager
+from pathlib import Path
+from typing import Dict, Iterable, List, Optional, Tuple
+
+
+# ── Public stamp type ────────────────────────────────────────────────
+# (mtime, read_ts, partial).  partial=True when read_file returned a
+# windowed view (offset > 1 or limit < total_lines) — writes that happen
+# after a partial read should still warn so the model re-reads in full.
+ReadStamp = Tuple[float, float, bool]
+
+# Number of resolved-path entries retained per agent.  Bounded to keep
+# long sessions from accumulating unbounded state.  On overflow we drop
+# the oldest entries by insertion order.
+_MAX_PATHS_PER_AGENT = 4096
+
+# Global last-writer map cap.  Same policy.
+_MAX_GLOBAL_WRITERS = 4096
+
+
+class FileStateRegistry:
+    """Process-wide coordinator for cross-agent file edits."""
+
+    def __init__(self) -> None:
+        self._reads: Dict[str, Dict[str, ReadStamp]] = defaultdict(dict)
+        self._last_writer: Dict[str, Tuple[str, float]] = {}
+        self._path_locks: Dict[str, threading.Lock] = {}
+        self._meta_lock = threading.Lock()  # guards _path_locks
+        self._state_lock = threading.Lock()  # guards _reads + _last_writer
+
+    # ── Path lock management ────────────────────────────────────────
+    def _lock_for(self, resolved: str) -> threading.Lock:
+        with self._meta_lock:
+            lock = self._path_locks.get(resolved)
+            if lock is None:
+                lock = threading.Lock()
+                self._path_locks[resolved] = lock
+            return lock
+
+    @contextmanager
+    def lock_path(self, resolved: str):
+        """Acquire the per-path lock for a read→modify→write section.
+
+        Same process, same filesystem — threads on the same path serialize.
+        Different paths proceed in parallel.
+        """
+        lock = self._lock_for(resolved)
+        lock.acquire()
+        try:
+            yield
+        finally:
+            lock.release()
+
+    # ── Read/write accounting ───────────────────────────────────────
+    def record_read(
+        self,
+        task_id: str,
+        resolved: str,
+        *,
+        partial: bool = False,
+        mtime: Optional[float] = None,
+    ) -> None:
+        if _disabled():
+            return
+        if mtime is None:
+            try:
+                mtime = os.path.getmtime(resolved)
+            except OSError:
+                return
+        now = time.time()
+        with self._state_lock:
+            agent_reads = self._reads[task_id]
+            agent_reads[resolved] = (float(mtime), now, bool(partial))
+            _cap_dict(agent_reads, _MAX_PATHS_PER_AGENT)
+
+    def note_write(
+        self,
+        task_id: str,
+        resolved: str,
+        *,
+        mtime: Optional[float] = None,
+    ) -> None:
+        """Record a successful write.
+
+        Updates the global last-writer map AND this agent's own read stamp
+        (a write is an implicit read — the agent now knows the current
+        content).
+        """
+        if _disabled():
+            return
+        if mtime is None:
+            try:
+                mtime = os.path.getmtime(resolved)
+            except OSError:
+                return
+        now = time.time()
+        with self._state_lock:
+            self._last_writer[resolved] = (task_id, now)
+            _cap_dict(self._last_writer, _MAX_GLOBAL_WRITERS)
+            # Writer's own view is now up-to-date.
+            self._reads[task_id][resolved] = (float(mtime), now, False)
+            _cap_dict(self._reads[task_id], _MAX_PATHS_PER_AGENT)
+
+    def check_stale(self, task_id: str, resolved: str) -> Optional[str]:
+        """Return a model-facing warning if this write would be stale.
+
+        Three staleness classes, in order of severity:
+
+          1. Sibling subagent wrote this file after this agent's last read.
+          2. External/unknown change (mtime differs from our last read).
+          3. Agent never read the file (write-without-read).
+
+        Returns ``None`` when the write is safe.  Does not raise — callers
+        decide whether to block or warn.
+        """
+        if _disabled():
+            return None
+        with self._state_lock:
+            stamp = self._reads.get(task_id, {}).get(resolved)
+            last_writer = self._last_writer.get(resolved)
+
+        # Case 3: never read AND we have no write record — net-new file or
+        # first touch by this agent.  Let existing _check_sensitive_path
+        # and file-exists logic handle it; nothing to warn about here.
+        if stamp is None and last_writer is None:
+            return None
+
+        try:
+            current_mtime = os.path.getmtime(resolved)
+        except OSError:
+            # File doesn't exist — write will create it; not stale.
+            return None
+
+        # Case 1: sibling subagent modified after our last read.
+        if last_writer is not None:
+            writer_tid, writer_ts = last_writer
+            if writer_tid != task_id:
+                if stamp is None:
+                    return (
+                        f"{resolved} was modified by sibling subagent "
+                        f"{writer_tid!r} but this agent never read it. "
+                        "Read the file before writing to avoid overwriting "
+                        "the sibling's changes."
+                    )
+                read_ts = stamp[1]
+                if writer_ts > read_ts:
+                    return (
+                        f"{resolved} was modified by sibling subagent "
+                        f"{writer_tid!r} at {_fmt_ts(writer_ts)} — after "
+                        f"this agent's last read at {_fmt_ts(read_ts)}. "
+                        "Re-read the file before writing."
+                    )
+
+        # Case 2: external / unknown modification (mtime drifted).
+        if stamp is not None:
+            read_mtime, _read_ts, partial = stamp
+            if current_mtime != read_mtime:
+                return (
+                    f"{resolved} was modified since you last read it "
+                    "on disk (external edit or unrecorded writer). "
+                    "Re-read the file before writing."
+                )
+            if partial:
+                return (
+                    f"{resolved} was last read with offset/limit pagination "
+                    "(partial view). Re-read the whole file before "
+                    "overwriting it."
+                )
+
+        # Case 3b: agent truly never read the file.
+        if stamp is None:
+            return (
+                f"{resolved} was not read by this agent. "
+                "Read the file first so you can write an informed edit."
+            )
+
+        return None
+
+    # ── Reminder helper for delegate_tool ───────────────────────────
+    def writes_since(
+        self,
+        exclude_task_id: str,
+        since_ts: float,
+        paths: Iterable[str],
+    ) -> Dict[str, List[str]]:
+        """Return ``{writer_task_id: [paths]}`` for writes done after
+        ``since_ts`` by agents OTHER than ``exclude_task_id``.
+
+        Used by delegate_task to append a "subagent modified files the
+        parent previously read" reminder to the delegation result.
+        """
+        if _disabled():
+            return {}
+        paths_set = set(paths)
+        out: Dict[str, List[str]] = defaultdict(list)
+        with self._state_lock:
+            for p, (writer_tid, ts) in self._last_writer.items():
+                if writer_tid == exclude_task_id:
+                    continue
+                if ts < since_ts:
+                    continue
+                if p in paths_set:
+                    out[writer_tid].append(p)
+        return dict(out)
+
+    def known_reads(self, task_id: str) -> List[str]:
+        """Return the list of resolved paths this agent has read."""
+        if _disabled():
+            return []
+        with self._state_lock:
+            return list(self._reads.get(task_id, {}).keys())
+
+    # ── Testing hooks ───────────────────────────────────────────────
+    def clear(self) -> None:
+        """Reset all state.  Intended for tests only."""
+        with self._state_lock:
+            self._reads.clear()
+            self._last_writer.clear()
+        with self._meta_lock:
+            self._path_locks.clear()
+
+
+# ── Module-level singleton + helpers ─────────────────────────────────
+_registry = FileStateRegistry()
+
+
+def get_registry() -> FileStateRegistry:
+    return _registry
+
+
+def _disabled() -> bool:
+    # Re-read each call so tests can toggle via monkeypatch.setenv.
+    return os.environ.get("HERMES_DISABLE_FILE_STATE_GUARD", "").strip() == "1"
+
+
+def _fmt_ts(ts: float) -> str:
+    # Short relative wall-clock for error messages; avoids pulling in
+    # datetime formatting overhead on the hot path.
+    return time.strftime("%H:%M:%S", time.localtime(ts))
+
+
+def _cap_dict(d: dict, limit: int) -> None:
+    """Trim a dict to ``limit`` entries by dropping insertion-order oldest."""
+    over = len(d) - limit
+    if over <= 0:
+        return
+    # dict preserves insertion order (PY>=3.7) — pop the oldest keys.
+    it = iter(d)
+    for _ in range(over):
+        try:
+            d.pop(next(it))
+        except (StopIteration, KeyError):
+            break
+
+
+# ── Convenience wrappers (short names used at call sites) ────────────
+def record_read(task_id: str, resolved_or_path: str | Path, *, partial: bool = False) -> None:
+    _registry.record_read(task_id, str(resolved_or_path), partial=partial)
+
+
+def note_write(task_id: str, resolved_or_path: str | Path) -> None:
+    _registry.note_write(task_id, str(resolved_or_path))
+
+
+def check_stale(task_id: str, resolved_or_path: str | Path) -> Optional[str]:
+    return _registry.check_stale(task_id, str(resolved_or_path))
+
+
+def lock_path(resolved_or_path: str | Path):
+    return _registry.lock_path(str(resolved_or_path))
+
+
+def writes_since(
+    exclude_task_id: str,
+    since_ts: float,
+    paths: Iterable[str | Path],
+) -> Dict[str, List[str]]:
+    return _registry.writes_since(exclude_task_id, since_ts, [str(p) for p in paths])
+
+
+def known_reads(task_id: str) -> List[str]:
+    return _registry.known_reads(task_id)
+
+
+__all__ = [
+    "FileStateRegistry",
+    "get_registry",
+    "record_read",
+    "note_write",
+    "check_stale",
+    "lock_path",
+    "writes_since",
+    "known_reads",
+]
diff --git a/tools/file_tools.py b/tools/file_tools.py
index cf6246dd01..3b6f459422 100644
--- a/tools/file_tools.py
+++ b/tools/file_tools.py
@@ -7,8 +7,16 @@ import logging
 import os
 import threading
 from pathlib import Path
+from typing import Optional
+
+from agent.file_safety import get_read_block_error
 from tools.binary_extensions import has_binary_extension
-from tools.file_operations import ShellFileOperations
+from tools.file_operations import (
+    ShellFileOperations,
+    normalize_read_pagination,
+    normalize_search_pagination,
+)
+from tools import file_state
 from agent.redact import redact_sensitive_text
 
 logger = logging.getLogger(__name__)
@@ -71,6 +79,17 @@ _BLOCKED_DEVICE_PATHS = frozenset({
 })
 
 
+def _resolve_path(filepath: str) -> Path:
+    """Resolve a path relative to TERMINAL_CWD (the worktree base directory)
+    instead of the main repository root.
+    """
+    p = Path(filepath).expanduser()
+    if not p.is_absolute():
+        base = os.environ.get("TERMINAL_CWD", os.getcwd())
+        p = Path(base) / p
+    return p.resolve()
+
+
 def _is_blocked_device(filepath: str) -> bool:
     """Return True if the path would hang the process (infinite output or blocking input).
 
@@ -102,7 +121,7 @@ _SENSITIVE_EXACT_PATHS = {"/var/run/docker.sock", "/run/docker.sock"}
 def _check_sensitive_path(filepath: str) -> str | None:
     """Return an error message if the path targets a sensitive system location."""
     try:
-        resolved = os.path.realpath(os.path.expanduser(filepath))
+        resolved = str(_resolve_path(filepath))
     except (OSError, ValueError):
         resolved = filepath
     normalized = os.path.normpath(os.path.expanduser(filepath))
@@ -278,6 +297,8 @@ def _get_file_ops(task_id: str = "default") -> ShellFileOperations:
                     "container_disk": config.get("container_disk", 51200),
                     "container_persistent": config.get("container_persistent", True),
                     "docker_volumes": config.get("docker_volumes", []),
+                    "docker_mount_cwd_to_workspace": config.get("docker_mount_cwd_to_workspace", False),
+                    "docker_forward_env": config.get("docker_forward_env", []),
                 }
 
             ssh_config = None
@@ -334,6 +355,8 @@ def clear_file_ops_cache(task_id: str = None):
 def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str = "default") -> str:
     """Read a file with pagination and line numbers."""
     try:
+        offset, limit = normalize_read_pagination(offset, limit)
+
         # ── Device path guard ─────────────────────────────────────────
         # Block paths that would hang the process (infinite output,
         # blocking on input).  Pure path check — no I/O.
@@ -345,7 +368,7 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
                 ),
             })
 
-        _resolved = Path(path).expanduser().resolve()
+        _resolved = _resolve_path(path)
 
         # ── Binary file guard ─────────────────────────────────────────
         # Block binary files by extension (no I/O).
@@ -360,24 +383,9 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
 
         # ── Hermes internal path guard ────────────────────────────────
         # Prevent prompt injection via catalog or hub metadata files.
-        from hermes_constants import get_hermes_home as _get_hh
-        _hermes_home = _get_hh().resolve()
-        _blocked_dirs = [
-            _hermes_home / "skills" / ".hub" / "index-cache",
-            _hermes_home / "skills" / ".hub",
-        ]
-        for _blocked in _blocked_dirs:
-            try:
-                _resolved.relative_to(_blocked)
-                return json.dumps({
-                    "error": (
-                        f"Access denied: {path} is an internal Hermes cache file "
-                        "and cannot be read directly to prevent prompt injection. "
-                        "Use the skills_list or skill_view tools instead."
-                    )
-                })
-            except ValueError:
-                pass
+        block_error = get_read_block_error(path)
+        if block_error:
+            return json.dumps({"error": block_error})
 
         # ── Dedup check ───────────────────────────────────────────────
         # If we already read this exact (path, offset, limit) and the
@@ -482,6 +490,19 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
             # accumulate megabytes of dict/set state.  See _cap_read_tracker_data.
             _cap_read_tracker_data(task_data)
 
+        # Cross-agent file-state registry (separate from per-task read
+        # tracker above): records that THIS agent has read this path so
+        # write/patch can detect sibling-subagent writes that happened
+        # after our read.  Partial read when offset>1 or the read was
+        # truncated (large file with more content than limit covered).
+        # Outside the _read_tracker_lock so the registry's own locking
+        # isn't nested under ours.
+        try:
+            _partial = (offset > 1) or bool(result_dict.get("truncated"))
+            file_state.record_read(task_id, resolved_str, partial=_partial)
+        except Exception:
+            logger.debug("file_state.record_read failed", exc_info=True)
+
         if count >= 4:
             # Hard block: stop returning content to break the loop
             return json.dumps({
@@ -553,7 +574,7 @@ def _update_read_timestamp(filepath: str, task_id: str) -> None:
     refreshes the stored timestamp to match the file's new state.
     """
     try:
-        resolved = str(Path(filepath).expanduser().resolve())
+        resolved = str(_resolve_path(filepath))
         current_mtime = os.path.getmtime(resolved)
     except (OSError, ValueError):
         return
@@ -572,7 +593,7 @@ def _check_file_staleness(filepath: str, task_id: str) -> str | None:
     or was never read.  Does not block — the write still proceeds.
     """
     try:
-        resolved = str(Path(filepath).expanduser().resolve())
+        resolved = str(_resolve_path(filepath))
     except (OSError, ValueError):
         return None
     with _read_tracker_lock:
@@ -601,15 +622,43 @@ def write_file_tool(path: str, content: str, task_id: str = "default") -> str:
     if sensitive_err:
         return tool_error(sensitive_err)
     try:
-        stale_warning = _check_file_staleness(path, task_id)
-        file_ops = _get_file_ops(task_id)
-        result = file_ops.write_file(path, content)
-        result_dict = result.to_dict()
-        if stale_warning:
-            result_dict["_warning"] = stale_warning
-        # Refresh the stored timestamp so consecutive writes by this
-        # task don't trigger false staleness warnings.
-        _update_read_timestamp(path, task_id)
+        # Resolve once for the registry lock + stale check.  Failures here
+        # fall back to the legacy path — write proceeds, per-task staleness
+        # check below still runs.
+        try:
+            _resolved = str(_resolve_path(path))
+        except Exception:
+            _resolved = None
+
+        if _resolved is None:
+            stale_warning = _check_file_staleness(path, task_id)
+            file_ops = _get_file_ops(task_id)
+            result = file_ops.write_file(path, content)
+            result_dict = result.to_dict()
+            if stale_warning:
+                result_dict["_warning"] = stale_warning
+            _update_read_timestamp(path, task_id)
+            return json.dumps(result_dict, ensure_ascii=False)
+
+        # Serialize the read→modify→write region per-path so concurrent
+        # subagents can't interleave on the same file.  Different paths
+        # remain fully parallel.
+        with file_state.lock_path(_resolved):
+            # Cross-agent staleness wins over per-task warning when both
+            # fire — its message names the sibling subagent.
+            cross_warning = file_state.check_stale(task_id, _resolved)
+            stale_warning = _check_file_staleness(path, task_id)
+            file_ops = _get_file_ops(task_id)
+            result = file_ops.write_file(path, content)
+            result_dict = result.to_dict()
+            effective_warning = cross_warning or stale_warning
+            if effective_warning:
+                result_dict["_warning"] = effective_warning
+            # Refresh stamps after the successful write so consecutive
+            # writes by this task don't trigger false staleness warnings.
+            _update_read_timestamp(path, task_id)
+            if not result_dict.get("error"):
+                file_state.note_write(task_id, _resolved)
         return json.dumps(result_dict, ensure_ascii=False)
     except Exception as e:
         if _is_expected_write_exception(e):
@@ -636,41 +685,78 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None,
         if sensitive_err:
             return tool_error(sensitive_err)
     try:
-        # Check staleness for all files this patch will touch.
-        stale_warnings = []
+        # Resolve paths for locking.  Ordered + deduplicated so concurrent
+        # callers lock in the same order — prevents deadlock on overlapping
+        # multi-file V4A patches.
+        _resolved_paths: list[str] = []
+        _seen: set[str] = set()
         for _p in _paths_to_check:
-            _sw = _check_file_staleness(_p, task_id)
-            if _sw:
-                stale_warnings.append(_sw)
+            try:
+                _r = str(_resolve_path(_p))
+            except Exception:
+                _r = None
+            if _r and _r not in _seen:
+                _resolved_paths.append(_r)
+                _seen.add(_r)
+        _resolved_paths.sort()
 
-        file_ops = _get_file_ops(task_id)
-        
-        if mode == "replace":
-            if not path:
-                return tool_error("path required")
-            if old_string is None or new_string is None:
-                return tool_error("old_string and new_string required")
-            result = file_ops.patch_replace(path, old_string, new_string, replace_all)
-        elif mode == "patch":
-            if not patch:
-                return tool_error("patch content required")
-            result = file_ops.patch_v4a(patch)
-        else:
-            return tool_error(f"Unknown mode: {mode}")
-        
-        result_dict = result.to_dict()
-        if stale_warnings:
-            result_dict["_warning"] = stale_warnings[0] if len(stale_warnings) == 1 else " | ".join(stale_warnings)
-        # Refresh stored timestamps for all successfully-patched paths so
-        # consecutive edits by this task don't trigger false warnings.
-        if not result_dict.get("error"):
+        # Acquire per-path locks in sorted order via ExitStack.  On single
+        # path this degenerates to one lock; on empty list (unresolvable)
+        # it's a no-op and execution falls through unchanged.
+        from contextlib import ExitStack
+        with ExitStack() as _locks:
+            for _r in _resolved_paths:
+                _locks.enter_context(file_state.lock_path(_r))
+
+            # Collect warnings — cross-agent registry first (names sibling),
+            # then per-task tracker as a fallback.
+            stale_warnings: list[str] = []
+            _path_to_resolved: dict[str, str] = {}
             for _p in _paths_to_check:
-                _update_read_timestamp(_p, task_id)
+                try:
+                    _r = str(_resolve_path(_p))
+                except Exception:
+                    _r = None
+                _path_to_resolved[_p] = _r
+                _cross = file_state.check_stale(task_id, _r) if _r else None
+                _sw = _cross or _check_file_staleness(_p, task_id)
+                if _sw:
+                    stale_warnings.append(_sw)
+
+            file_ops = _get_file_ops(task_id)
+
+            if mode == "replace":
+                if not path:
+                    return tool_error("path required")
+                if old_string is None or new_string is None:
+                    return tool_error("old_string and new_string required")
+                result = file_ops.patch_replace(path, old_string, new_string, replace_all)
+            elif mode == "patch":
+                if not patch:
+                    return tool_error("patch content required")
+                result = file_ops.patch_v4a(patch)
+            else:
+                return tool_error(f"Unknown mode: {mode}")
+
+            result_dict = result.to_dict()
+            if stale_warnings:
+                result_dict["_warning"] = stale_warnings[0] if len(stale_warnings) == 1 else " | ".join(stale_warnings)
+            # Refresh stored timestamps for all successfully-patched paths so
+            # consecutive edits by this task don't trigger false warnings.
+            if not result_dict.get("error"):
+                for _p in _paths_to_check:
+                    _update_read_timestamp(_p, task_id)
+                    _r = _path_to_resolved.get(_p)
+                    if _r:
+                        file_state.note_write(task_id, _r)
         result_json = json.dumps(result_dict, ensure_ascii=False)
         # Hint when old_string not found — saves iterations where the agent
         # retries with stale content instead of re-reading the file.
+        # Suppressed when patch_replace already attached a rich "Did you mean?"
+        # snippet (which is strictly more useful than the generic hint).
         if result_dict.get("error") and "Could not find" in str(result_dict["error"]):
-            result_json += "\n\n[Hint: old_string not found. Use read_file to verify the current content, or search_files to locate the text.]"
+            if "Did you mean one of these sections?" not in str(result_dict["error"]):
+                result_json += "\n\n[Hint: old_string not found. Use read_file to verify the current content, or search_files to locate the text.]"
         return result_json
     except Exception as e:
         return tool_error(str(e))
@@ -682,6 +768,8 @@ def search_tool(pattern: str, target: str = "content", path: str = ".",
                 task_id: str = "default") -> str:
     """Search for content or files."""
     try:
+        offset, limit = normalize_search_pagination(offset, limit)
+
         # Track searches to detect *consecutive* repeated search loops.
         # Include pagination args so users can page through truncated
         # results without tripping the repeated-search guard.
diff --git a/tools/fuzzy_match.py b/tools/fuzzy_match.py
index 84833e0d0f..9a922cd9b3 100644
--- a/tools/fuzzy_match.py
+++ b/tools/fuzzy_match.py
@@ -93,6 +93,21 @@ def fuzzy_find_and_replace(content: str, old_string: str, new_string: str,
                     f"Provide more context to make it unique, or use replace_all=True."
                 )
 
+            # Escape-drift guard: when the matched strategy is NOT `exact`,
+            # we matched via some form of normalization. If new_string
+            # contains shell/JSON-style escape sequences (\' or \") that
+            # would be written literally into the file but the matched
+            # region of the file has no such sequences, this is almost
+            # certainly tool-call serialization drift — the model typed
+            # an apostrophe/quote and the transport added a stray
+            # backslash. Writing new_string as-is would corrupt the file.
+            # Block with a helpful error so the model re-reads and retries
+            # instead of the caller silently persisting garbage (or not).
+            if strategy_name != "exact":
+                drift_err = _detect_escape_drift(content, matches, old_string, new_string)
+                if drift_err:
+                    return content, 0, None, drift_err
+
             # Perform replacement
             new_content = _apply_replacements(content, matches, new_string)
             return new_content, len(matches), strategy_name, None
@@ -101,6 +116,46 @@ def fuzzy_find_and_replace(content: str, old_string: str, new_string: str,
     return content, 0, None, "Could not find a match for old_string in the file"
 
 
+def _detect_escape_drift(content: str, matches: List[Tuple[int, int]],
+                         old_string: str, new_string: str) -> Optional[str]:
+    """Detect tool-call escape-drift artifacts in new_string.
+
+    Looks for ``\\'`` or ``\\"`` sequences that are present in both
+    old_string and new_string (i.e. the model copy-pasted them as "context"
+    it intended to preserve) but don't exist in the matched region of the
+    file. That pattern indicates the transport layer inserted spurious
+    shell-style escapes around apostrophes or quotes — writing new_string
+    verbatim would literally insert ``\\'`` into source code.
+
+    Returns an error string if drift is detected, None otherwise.
+    """
+    # Cheap pre-check: bail out unless new_string actually contains a
+    # suspect escape sequence. This keeps the guard free for all the
+    # common, correct cases.
+    if "\\'" not in new_string and '\\"' not in new_string:
+        return None
+
+    # Aggregate matched regions of the file — that's what new_string will
+    # replace. If the suspect escapes are present there already, the
+    # model is genuinely preserving them (valid for some languages /
+    # escaped strings); accept the patch.
+    matched_regions = "".join(content[start:end] for start, end in matches)
+
+    for suspect in ("\\'", '\\"'):
+        if suspect in new_string and suspect in old_string and suspect not in matched_regions:
+            plain = suspect[1]  # "'" or '"'
+            return (
+                f"Escape-drift detected: old_string and new_string contain "
+                f"the literal sequence {suspect!r} but the matched region of "
+                f"the file does not. This is almost always a tool-call "
+                f"serialization artifact where an apostrophe or quote got "
+                f"prefixed with a spurious backslash. Re-read the file with "
+                f"read_file and pass old_string/new_string without "
+                f"backslash-escaping {plain!r} characters."
+            )
+    return None
+
+
 def _apply_replacements(content: str, matches: List[Tuple[int, int]], new_string: str) -> str:
     """
     Apply replacements at the given positions.
@@ -564,3 +619,86 @@ def _map_normalized_positions(original: str, normalized: str,
         original_matches.append((orig_start, min(orig_end, len(original))))
     
     return original_matches
+
+
+def find_closest_lines(old_string: str, content: str, context_lines: int = 2, max_results: int = 3) -> str:
+    """Find lines in content most similar to old_string for "did you mean?" feedback.
+
+    Returns a formatted string showing the closest matching lines with context,
+    or empty string if no useful match is found.
+    """
+    if not old_string or not content:
+        return ""
+
+    old_lines = old_string.splitlines()
+    content_lines = content.splitlines()
+
+    if not old_lines or not content_lines:
+        return ""
+
+    # Use first line of old_string as anchor for search
+    anchor = old_lines[0].strip()
+    if not anchor:
+        # Try second line if first is blank
+        candidates = [l.strip() for l in old_lines if l.strip()]
+        if not candidates:
+            return ""
+        anchor = candidates[0]
+
+    # Score each line in content by similarity to anchor
+    scored = []
+    for i, line in enumerate(content_lines):
+        stripped = line.strip()
+        if not stripped:
+            continue
+        ratio = SequenceMatcher(None, anchor, stripped).ratio()
+        if ratio > 0.3:
+            scored.append((ratio, i))
+
+    if not scored:
+        return ""
+
+    # Take top matches
+    scored.sort(key=lambda x: -x[0])
+    top = scored[:max_results]
+
+    parts = []
+    seen_ranges = set()
+    for _, line_idx in top:
+        start = max(0, line_idx - context_lines)
+        end = min(len(content_lines), line_idx + len(old_lines) + context_lines)
+        key = (start, end)
+        if key in seen_ranges:
+            continue
+        seen_ranges.add(key)
+        snippet = "\n".join(
+            f"{start + j + 1:4d}| {content_lines[start + j]}"
+            for j in range(end - start)
+        )
+        parts.append(snippet)
+
+    if not parts:
+        return ""
+
+    return "\n---\n".join(parts)
+
+
+def format_no_match_hint(error: Optional[str], match_count: int,
+                         old_string: str, content: str) -> str:
+    """Return a '\\n\\nDid you mean...' snippet for plain no-match errors.
+
+    Gated so the hint only fires for actual "old_string not found" failures.
+    Ambiguous-match ("Found N matches"), escape-drift, and identical-strings
+    errors all have ``match_count == 0`` but a "did you mean?" snippet would
+    be misleading — those failed for unrelated reasons.
+
+    Returns an empty string when there's nothing useful to append.
+    """
+    if match_count != 0:
+        return ""
+    if not error or not error.startswith("Could not find"):
+        return ""
+    hint = find_closest_lines(old_string, content)
+    if not hint:
+        return ""
+    return "\n\nDid you mean one of these sections?\n" + hint
diff --git a/tools/image_generation_tool.py b/tools/image_generation_tool.py
index cf1003d12b..9631e74ee2 100644
--- a/tools/image_generation_tool.py
+++ b/tools/image_generation_tool.py
@@ -33,7 +33,11 @@ import fal_client
 
 from tools.debug_helpers import DebugSession
 from tools.managed_tool_gateway import resolve_managed_tool_gateway
-from tools.tool_backend_helpers import managed_nous_tools_enabled, prefers_gateway
+from tools.tool_backend_helpers import (
+    fal_key_is_configured,
+    managed_nous_tools_enabled,
+    prefers_gateway,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -184,6 +188,38 @@ FAL_MODELS: Dict[str, Dict[str, Any]] = {
         },
         "upscale": False,
     },
+    "fal-ai/gpt-image-2": {
+        "display": "GPT Image 2",
+        "speed": "~20s",
+        "strengths": "SOTA text rendering + CJK, world-aware photorealism",
+        "price": "$0.04–0.06/image",
+        # GPT Image 2 uses FAL's standard preset enum (unlike 1.5's literal
+        # dimensions). We map to the 4:3 variants — the 16:9 presets
+        # (1024x576) fall below GPT-Image-2's 655,360 min-pixel requirement
+        # and would be rejected. 4:3 keeps us above the minimum on all
+        # three aspect ratios.
+        "size_style": "image_size_preset",
+        "sizes": {
+            "landscape": "landscape_4_3",   # 1024x768
+            "square": "square_hd",            # 1024x1024
+            "portrait": "portrait_4_3",       # 768x1024
+        },
+        "defaults": {
+            # Same quality pinning as gpt-image-1.5: medium keeps Nous
+            # Portal billing predictable. "high" is 3-4x the per-image
+            # cost at the same size; "low" is too rough for production use.
+            "quality": "medium",
+            "num_images": 1,
+            "output_format": "png",
+        },
+        "supports": {
+            "prompt", "image_size", "quality", "num_images", "output_format",
+            "sync_mode",
+            # openai_api_key (BYOK) intentionally omitted — all users go
+            # through the shared FAL billing path.
+        },
+        "upscale": False,
+    },
     "fal-ai/ideogram/v3": {
         "display": "Ideogram V3",
         "speed": "~5s",
@@ -286,7 +322,7 @@ _managed_fal_client_lock = threading.Lock()
 def _resolve_managed_fal_gateway():
     """Return managed fal-queue gateway config when the user prefers the gateway
     or direct FAL credentials are absent."""
-    if os.getenv("FAL_KEY") and not prefers_gateway("image_gen"):
+    if fal_key_is_configured() and not prefers_gateway("image_gen"):
         return None
     return resolve_managed_tool_gateway("fal-queue")
 
@@ -623,7 +659,7 @@ def image_generate_tool(
         if not prompt or not isinstance(prompt, str) or len(prompt.strip()) == 0:
             raise ValueError("Prompt is required and must be a non-empty string")
 
-        if not (os.getenv("FAL_KEY") or _resolve_managed_fal_gateway()):
+        if not (fal_key_is_configured() or _resolve_managed_fal_gateway()):
             message = "FAL_KEY environment variable not set"
             if managed_nous_tools_enabled():
                 message += " and managed FAL gateway is unavailable"
@@ -734,18 +770,45 @@ def image_generate_tool(
 
 def check_fal_api_key() -> bool:
     """True if the FAL.ai API key (direct or managed gateway) is available."""
-    return bool(os.getenv("FAL_KEY") or _resolve_managed_fal_gateway())
+    return bool(fal_key_is_configured() or _resolve_managed_fal_gateway())
 
 
 def check_image_generation_requirements() -> bool:
-    """True if FAL credentials and fal_client SDK are both available."""
+    """True if any image gen backend is available.
+
+    Providers are considered in this order:
+
+    1. The in-tree FAL backend (FAL_KEY or managed gateway).
+    2. Any plugin-registered provider whose ``is_available()`` returns True.
+
+    Plugins win only when the in-tree FAL path is NOT ready, which matches
+    the historical behavior: shipping hermes with a FAL key configured
+    should still expose the tool. The active selection among ready
+    providers is resolved per-call by ``image_gen.provider``.
+    """
     try:
-        if not check_fal_api_key():
-            return False
-        import fal_client  # noqa: F401 — SDK presence check
-        return True
+        if check_fal_api_key():
+            fal_client  # noqa: F401 — SDK presence check
+            return True
     except ImportError:
-        return False
+        pass
+
+    # Probe plugin providers. Discovery is idempotent and cheap.
+    try:
+        from agent.image_gen_registry import list_providers
+        from hermes_cli.plugins import _ensure_plugins_discovered
+
+        _ensure_plugins_discovered()
+        for provider in list_providers():
+            try:
+                if provider.is_available():
+                    return True
+            except Exception:
+                continue
+    except Exception:
+        pass
+
+    return False
 
 
 # ---------------------------------------------------------------------------
@@ -791,10 +854,11 @@ from tools.registry import registry, tool_error
 IMAGE_GENERATE_SCHEMA = {
     "name": "image_generate",
     "description": (
-        "Generate high-quality images from text prompts using FAL.ai. "
-        "The underlying model is user-configured (default: FLUX 2 Klein 9B, "
-        "sub-1s generation) and is not selectable by the agent. Returns a "
-        "single image URL. Display it using markdown: ![description](URL)"
+        "Generate high-quality images from text prompts. The underlying "
+        "backend (FAL, OpenAI, etc.) and model are user-configured and not "
+        "selectable by the agent. Returns either a URL or an absolute file "
+        "path in the `image` field; display it with markdown "
+        "![description](url-or-path) and the gateway will deliver it."
     ),
     "parameters": {
         "type": "object",
@@ -815,13 +879,104 @@ IMAGE_GENERATE_SCHEMA = {
 }
 
 
+def _read_configured_image_provider():
+    """Return the value of ``image_gen.provider`` from config.yaml, or None.
+
+    We only consult the plugin registry when this is explicitly set — an
+    unset value keeps users on the legacy in-tree FAL path even when other
+    providers happen to be registered (e.g. a user has OPENAI_API_KEY set
+    for other features but never asked for OpenAI image gen).
+    """
+    try:
+        from hermes_cli.config import load_config
+        cfg = load_config()
+        section = cfg.get("image_gen") if isinstance(cfg, dict) else None
+        if isinstance(section, dict):
+            value = section.get("provider")
+            if isinstance(value, str) and value.strip():
+                return value.strip()
+    except Exception as exc:
+        logger.debug("Could not read image_gen.provider: %s", exc)
+    return None
+
+
+def _dispatch_to_plugin_provider(prompt: str, aspect_ratio: str):
+    """Route the call to a plugin-registered provider when one is selected.
+
+    Returns a JSON string on dispatch, or ``None`` to fall through to the
+    built-in FAL path.
+
+    Dispatch only fires when ``image_gen.provider`` is explicitly set AND
+    it does not point to ``fal`` (FAL still lives in-tree in this PR;
+    a later PR ports it into ``plugins/image_gen/fal/``). Any other value
+    that matches a registered plugin provider wins.
+    """
+    configured = _read_configured_image_provider()
+    if not configured or configured == "fal":
+        return None
+
+    try:
+        # Import locally so plugin discovery isn't triggered just by
+        # importing this module (tests rely on that).
+        from agent.image_gen_registry import get_provider
+        from hermes_cli.plugins import _ensure_plugins_discovered
+
+        _ensure_plugins_discovered()
+        provider = get_provider(configured)
+    except Exception as exc:
+        logger.debug("image_gen plugin dispatch skipped: %s", exc)
+        return None
+
+    if provider is None:
+        return json.dumps({
+            "success": False,
+            "image": None,
+            "error": (
+                f"image_gen.provider='{configured}' is set but no plugin "
+                f"registered that name. Run `hermes plugins list` to see "
+                f"available image gen backends."
+            ),
+            "error_type": "provider_not_registered",
+        })
+
+    try:
+        result = provider.generate(prompt=prompt, aspect_ratio=aspect_ratio)
+    except Exception as exc:
+        logger.warning(
+            "Image gen provider '%s' raised: %s",
+            getattr(provider, "name", "?"), exc,
+        )
+        return json.dumps({
+            "success": False,
+            "image": None,
+            "error": f"Provider '{getattr(provider, 'name', '?')}' error: {exc}",
+            "error_type": "provider_exception",
+        })
+    if not isinstance(result, dict):
+        return json.dumps({
+            "success": False,
+            "image": None,
+            "error": "Provider returned a non-dict result",
+            "error_type": "provider_contract",
+        })
+    return json.dumps(result)
+
+
 def _handle_image_generate(args, **kw):
     prompt = args.get("prompt", "")
     if not prompt:
         return tool_error("prompt is required for image generation")
+    aspect_ratio = args.get("aspect_ratio", DEFAULT_ASPECT_RATIO)
+
+    # Route to a plugin-registered provider if one is active (and it's
+    # not the in-tree FAL path).
+    dispatched = _dispatch_to_plugin_provider(prompt, aspect_ratio)
+    if dispatched is not None:
+        return dispatched
+
     return image_generate_tool(
         prompt=prompt,
-        aspect_ratio=args.get("aspect_ratio", DEFAULT_ASPECT_RATIO),
+        aspect_ratio=aspect_ratio,
     )
 
 
diff --git a/tools/mcp_oauth.py b/tools/mcp_oauth.py
index 6e1d7f5fb0..7910c3cdc0 100644
--- a/tools/mcp_oauth.py
+++ b/tools/mcp_oauth.py
@@ -40,6 +40,7 @@ import re
 import socket
 import sys
 import threading
+import time
 import webbrowser
 from http.server import BaseHTTPRequestHandler, HTTPServer
 from pathlib import Path
@@ -196,6 +197,35 @@ class HermesTokenStorage:
         data = _read_json(self._tokens_path())
         if data is None:
             return None
+        # Hermes records an absolute wall-clock ``expires_at`` alongside the
+        # SDK's serialized token (see ``set_tokens``). On read we rewrite
+        # ``expires_in`` to the remaining seconds so the SDK's downstream
+        # ``update_token_expiry`` computes the correct absolute time and
+        # ``is_token_valid()`` correctly reports False for tokens that
+        # expired while the process was down.
+        #
+        # Legacy token files (pre-Fix-A) have ``expires_in`` but no
+        # ``expires_at``. We fall back to the file's mtime as a best-effort
+        # wall-clock proxy for when the token was written: if (mtime +
+        # expires_in) is in the past, clamp ``expires_in`` to zero so the
+        # SDK refreshes before the first request. This self-heals one-time
+        # on the next successful ``set_tokens``, which writes the new
+        # ``expires_at`` field. The stored ``expires_at`` is stripped before
+        # model_validate because it's not part of the SDK's OAuthToken schema.
+        absolute_expiry = data.pop("expires_at", None)
+        if absolute_expiry is not None:
+            data["expires_in"] = int(max(absolute_expiry - time.time(), 0))
+        elif data.get("expires_in") is not None:
+            try:
+                file_mtime = self._tokens_path().stat().st_mtime
+            except OSError:
+                file_mtime = None
+            if file_mtime is not None:
+                try:
+                    implied_expiry = file_mtime + int(data["expires_in"])
+                    data["expires_in"] = int(max(implied_expiry - time.time(), 0))
+                except (TypeError, ValueError):
+                    pass
         try:
             return OAuthToken.model_validate(data)
         except (ValueError, TypeError, KeyError) as exc:
@@ -203,7 +233,23 @@ class HermesTokenStorage:
             return None
 
     async def set_tokens(self, tokens: "OAuthToken") -> None:
-        _write_json(self._tokens_path(), tokens.model_dump(exclude_none=True))
+        payload = tokens.model_dump(exclude_none=True)
+        # Persist an absolute ``expires_at`` so a process restart can
+        # reconstruct the correct remaining TTL. Without this the MCP SDK's
+        # ``_initialize`` reloads a relative ``expires_in`` which has no
+        # wall-clock reference, leaving ``context.token_expiry_time=None``
+        # and ``is_token_valid()`` falsely reporting True. See Fix A in
+        # ``mcp-oauth-token-diagnosis`` skill + Claude Code's
+        # ``OAuthTokens.expiresAt`` persistence (auth.ts ~180).
+        expires_in = payload.get("expires_in")
+        if expires_in is not None:
+            try:
+                payload["expires_at"] = time.time() + int(expires_in)
+            except (TypeError, ValueError):
+                # Mock tokens or unusual shapes: skip the expires_at write
+                # rather than fail persistence.
+                pass
+        _write_json(self._tokens_path(), payload)
         logger.debug("OAuth tokens saved for %s", self._server_name)
 
     # -- client info -------------------------------------------------------
diff --git a/tools/mcp_oauth_manager.py b/tools/mcp_oauth_manager.py
index d3760e3b87..7c8a91f3f9 100644
--- a/tools/mcp_oauth_manager.py
+++ b/tools/mcp_oauth_manager.py
@@ -111,6 +111,131 @@ def _make_hermes_provider_class() -> Optional[type]:
             super().__init__(*args, **kwargs)
             self._hermes_server_name = server_name
 
+        async def _initialize(self) -> None:
+            """Load stored tokens + client info AND seed token_expiry_time.
+
+            Also eagerly fetches OAuth authorization-server metadata (PRM +
+            ASM) when we have stored tokens but no cached metadata, so the
+            SDK's ``_refresh_token`` can build the correct token_endpoint
+            URL on the preemptive-refresh path. Without this, the SDK
+            falls back to ``{mcp_server_url}/token`` (wrong for providers
+            whose AS is a different origin — BetterStack's MCP lives at
+            ``https://mcp.betterstack.com`` but its token endpoint is at
+            ``https://betterstack.com/oauth/token``), the refresh 404s, and
+            we drop through to full browser reauth.
+
+            The SDK's base ``_initialize`` populates ``current_tokens`` but
+            does NOT call ``update_token_expiry``, so ``token_expiry_time``
+            stays ``None`` and ``is_token_valid()`` returns True for any
+            loaded token regardless of actual age. After a process restart
+            this ships stale Bearer tokens to the server; some providers
+            return HTTP 401 (caught by the 401 handler), others return 200
+            with an app-level auth error (invisible to the transport layer,
+            e.g. BetterStack returning "No teams found. Please check your
+            authentication.").
+
+            Seeding ``token_expiry_time`` from the reloaded token fixes that:
+            ``is_token_valid()`` correctly reports False for expired tokens,
+            ``async_auth_flow`` takes the ``can_refresh_token()`` branch,
+            and the SDK quietly refreshes before the first real request.
+
+            Paired with :class:`HermesTokenStorage` persisting an absolute
+            ``expires_at`` timestamp (``mcp_oauth.py:set_tokens``) so the
+            remaining TTL we compute here reflects real wall-clock age.
+            """
+            await super()._initialize()
+            tokens = self.context.current_tokens
+            if tokens is not None and tokens.expires_in is not None:
+                self.context.update_token_expiry(tokens)
+
+            # Pre-flight OAuth AS discovery so ``_refresh_token`` has a
+            # correct ``token_endpoint`` before the first refresh attempt.
+            # Only runs when we have tokens on cold-load but no cached
+            # metadata — i.e. the exact scenario where the SDK's built-in
+            # 401-branch discovery hasn't had a chance to run yet.
+            if (
+                tokens is not None
+                and self.context.oauth_metadata is None
+            ):
+                try:
+                    await self._prefetch_oauth_metadata()
+                except Exception as exc:  # pragma: no cover — defensive
+                    # Non-fatal: if discovery fails, the SDK's normal 401-
+                    # branch discovery will run on the next request.
+                    logger.debug(
+                        "MCP OAuth '%s': pre-flight metadata discovery "
+                        "failed (non-fatal): %s",
+                        self._hermes_server_name, exc,
+                    )
+
+        async def _prefetch_oauth_metadata(self) -> None:
+            """Fetch PRM + ASM from the well-known endpoints, cache on context.
+
+            Mirrors the SDK's 401-branch discovery (oauth2.py ~line 511-551)
+            but runs synchronously before the first request instead of
+            inside the httpx auth_flow generator. Uses the SDK's own URL
+            builders and response handlers so we track whatever the SDK
+            version we're pinned to expects.
+            """
+            import httpx  # local import: httpx is an MCP SDK dependency
+            from mcp.client.auth.utils import (
+                build_oauth_authorization_server_metadata_discovery_urls,
+                build_protected_resource_metadata_discovery_urls,
+                create_oauth_metadata_request,
+                handle_auth_metadata_response,
+                handle_protected_resource_response,
+            )
+
+            server_url = self.context.server_url
+            async with httpx.AsyncClient(timeout=10.0) as client:
+                # Step 1: PRM discovery to learn the authorization_server URL.
+                for url in build_protected_resource_metadata_discovery_urls(
+                    None, server_url
+                ):
+                    req = create_oauth_metadata_request(url)
+                    try:
+                        resp = await client.send(req)
+                    except httpx.HTTPError as exc:
+                        logger.debug(
+                            "MCP OAuth '%s': PRM discovery to %s failed: %s",
+                            self._hermes_server_name, url, exc,
+                        )
+                        continue
+                    prm = await handle_protected_resource_response(resp)
+                    if prm:
+                        self.context.protected_resource_metadata = prm
+                        if prm.authorization_servers:
+                            self.context.auth_server_url = str(
+                                prm.authorization_servers[0]
+                            )
+                        break
+
+                # Step 2: ASM discovery against the auth_server_url (or
+                # server_url fallback for legacy providers).
+                for url in build_oauth_authorization_server_metadata_discovery_urls(
+                    self.context.auth_server_url, server_url
+                ):
+                    req = create_oauth_metadata_request(url)
+                    try:
+                        resp = await client.send(req)
+                    except httpx.HTTPError as exc:
+                        logger.debug(
+                            "MCP OAuth '%s': ASM discovery to %s failed: %s",
+                            self._hermes_server_name, url, exc,
+                        )
+                        continue
+                    ok, asm = await handle_auth_metadata_response(resp)
+                    if not ok:
+                        break
+                    if asm:
+                        self.context.oauth_metadata = asm
+                        logger.debug(
+                            "MCP OAuth '%s': pre-flight ASM discovered "
+                            "token_endpoint=%s",
+                            self._hermes_server_name, asm.token_endpoint,
+                        )
+                        break
+
         async def async_auth_flow(self, request):  # type: ignore[override]
             # Pre-flow hook: ask the manager to refresh from disk if needed.
             # Any failure here is non-fatal — we just log and proceed with
@@ -125,9 +250,28 @@ def _make_hermes_provider_class() -> Optional[type]:
                     self._hermes_server_name, exc,
                 )
 
-            # Delegate to the SDK's auth flow
-            async for item in super().async_auth_flow(request):
-                yield item
+            # Manually bridge the bidirectional generator protocol. httpx's
+            # auth_flow driver (httpx._client._send_handling_auth) calls
+            # ``auth_flow.asend(response)`` to feed HTTP responses back into
+            # the generator. A naive wrapper using ``async for item in inner:
+            # yield item`` DISCARDS those .asend(response) values and resumes
+            # the inner generator with None, so the SDK's
+            # ``response = yield request`` branch in
+            # mcp/client/auth/oauth2.py sees response=None and crashes at
+            # ``if response.status_code == 401`` with AttributeError.
+            #
+            # The bridge below forwards each .asend() value into the inner
+            # generator via inner.asend(incoming), preserving the bidirectional
+            # contract. Regression from PR #11383 caught by
+            # tests/tools/test_mcp_oauth_bidirectional.py.
+            inner = super().async_auth_flow(request)
+            try:
+                outgoing = await inner.__anext__()
+                while True:
+                    incoming = yield outgoing
+                    outgoing = await inner.asend(incoming)
+            except StopAsyncIteration:
+                return
 
     return HermesMCPOAuthProvider
 
diff --git a/tools/mcp_tool.py b/tools/mcp_tool.py
index e5e856d0bb..aecc0cc230 100644
--- a/tools/mcp_tool.py
+++ b/tools/mcp_tool.py
@@ -1249,9 +1249,47 @@ _servers: Dict[str, MCPServerTask] = {}
 # _CIRCUIT_BREAKER_THRESHOLD consecutive failures, the handler returns
 # a "server unreachable" message that tells the model to stop retrying,
 # preventing the 90-iteration burn loop described in #10447.
-# Reset to 0 on any successful call.
+#
+# State machine:
+#   closed    — error count below threshold; all calls go through.
+#   open      — threshold reached; calls short-circuit until the
+#               cooldown elapses.
+#   half-open — cooldown elapsed; the next call is a probe that
+#               actually hits the session. Probe success → closed.
+#               Probe failure → reopens (cooldown re-armed).
+#
+# ``_server_breaker_opened_at`` records the monotonic timestamp when
+# the breaker most recently transitioned into the open state. Use the
+# ``_bump_server_error`` / ``_reset_server_error`` helpers to mutate
+# this state — they keep the count and timestamp in sync.
 _server_error_counts: Dict[str, int] = {}
+_server_breaker_opened_at: Dict[str, float] = {}
 _CIRCUIT_BREAKER_THRESHOLD = 3
+_CIRCUIT_BREAKER_COOLDOWN_SEC = 60.0
+
+
+def _bump_server_error(server_name: str) -> None:
+    """Increment the consecutive-failure count for ``server_name``.
+
+    When the count crosses :data:`_CIRCUIT_BREAKER_THRESHOLD`, stamp the
+    breaker-open timestamp so the cooldown clock starts (or re-starts,
+    for probe failures in the half-open state).
+    """
+    n = _server_error_counts.get(server_name, 0) + 1
+    _server_error_counts[server_name] = n
+    if n >= _CIRCUIT_BREAKER_THRESHOLD:
+        _server_breaker_opened_at[server_name] = time.monotonic()
+
+
+def _reset_server_error(server_name: str) -> None:
+    """Fully close the breaker for ``server_name``.
+
+    Clears both the failure count and the breaker-open timestamp. Call
+    this on any unambiguous success signal (successful tool call,
+    successful reconnect, manual /mcp refresh).
+    """
+    _server_error_counts[server_name] = 0
+    _server_breaker_opened_at.pop(server_name, None)
 
 # ---------------------------------------------------------------------------
 # Auth-failure detection helpers (Task 6 of MCP OAuth consolidation)
@@ -1391,15 +1429,25 @@ def _handle_auth_error_and_retry(
                         break
                     time.sleep(0.25)
 
+        # A successful OAuth recovery is independent evidence that the
+        # server is viable again, so close the circuit breaker here —
+        # not only on retry success. Without this, a reconnect
+        # followed by a failing retry would leave the breaker pinned
+        # above threshold forever (the retry-exception branch below
+        # bumps the count again).  The post-reset retry still goes
+        # through _bump_server_error on failure, so a genuinely broken
+        # server will re-trip the breaker as normal.
+        _reset_server_error(server_name)
+
         try:
             result = retry_call()
             try:
                 parsed = json.loads(result)
                 if "error" not in parsed:
-                    _server_error_counts[server_name] = 0
+                    _reset_server_error(server_name)
                     return result
             except (json.JSONDecodeError, TypeError):
-                _server_error_counts[server_name] = 0
+                _reset_server_error(server_name)
                 return result
         except Exception as retry_exc:
             logger.warning(
@@ -1410,7 +1458,7 @@ def _handle_auth_error_and_retry(
     # No recovery available, or retry also failed: surface a structured
     # needs_reauth error. Bumps the circuit breaker so the model stops
     # retrying the tool.
-    _server_error_counts[server_name] = _server_error_counts.get(server_name, 0) + 1
+    _bump_server_error(server_name)
     return json.dumps({
         "error": (
             f"MCP server '{server_name}' requires re-authentication. "
@@ -1540,7 +1588,6 @@ def _interrupted_call_result() -> str:
 def _interpolate_env_vars(value):
     """Recursively resolve ``${VAR}`` placeholders from ``os.environ``."""
     if isinstance(value, str):
-        import re
         def _replace(m):
             return os.environ.get(m.group(1), m.group(0))
         return re.sub(r"\$\{([^}]+)\}", _replace, value)
@@ -1615,20 +1662,33 @@ def _make_tool_handler(server_name: str, tool_name: str, tool_timeout: float):
         # Circuit breaker: if this server has failed too many times
         # consecutively, short-circuit with a clear message so the model
         # stops retrying and uses alternative approaches (#10447).
+        #
+        # Once the cooldown elapses, the breaker transitions to
+        # half-open: we let the *next* call through as a probe. On
+        # success the success-path below resets the breaker; on
+        # failure the error paths below bump the count again, which
+        # re-stamps the open-time via _bump_server_error (re-arming
+        # the cooldown).
         if _server_error_counts.get(server_name, 0) >= _CIRCUIT_BREAKER_THRESHOLD:
-            return json.dumps({
-                "error": (
-                    f"MCP server '{server_name}' is unreachable after "
-                    f"{_CIRCUIT_BREAKER_THRESHOLD} consecutive failures. "
-                    f"Do NOT retry this tool — use alternative approaches "
-                    f"or ask the user to check the MCP server."
-                )
-            }, ensure_ascii=False)
+            opened_at = _server_breaker_opened_at.get(server_name, 0.0)
+            age = time.monotonic() - opened_at
+            if age < _CIRCUIT_BREAKER_COOLDOWN_SEC:
+                remaining = max(1, int(_CIRCUIT_BREAKER_COOLDOWN_SEC - age))
+                return json.dumps({
+                    "error": (
+                        f"MCP server '{server_name}' is unreachable after "
+                        f"{_server_error_counts[server_name]} consecutive "
+                        f"failures. Auto-retry available in ~{remaining}s. "
+                        f"Do NOT retry this tool yet — use alternative "
+                        f"approaches or ask the user to check the MCP server."
+                    )
+                }, ensure_ascii=False)
+            # Cooldown elapsed → fall through as a half-open probe.
 
         with _lock:
             server = _servers.get(server_name)
         if not server or not server.session:
-            _server_error_counts[server_name] = _server_error_counts.get(server_name, 0) + 1
+            _bump_server_error(server_name)
             return json.dumps({
                 "error": f"MCP server '{server_name}' is not connected"
             }, ensure_ascii=False)
@@ -1677,11 +1737,11 @@ def _make_tool_handler(server_name: str, tool_name: str, tool_timeout: float):
             try:
                 parsed = json.loads(result)
                 if "error" in parsed:
-                    _server_error_counts[server_name] = _server_error_counts.get(server_name, 0) + 1
+                    _bump_server_error(server_name)
                 else:
-                    _server_error_counts[server_name] = 0  # success — reset
+                    _reset_server_error(server_name)  # success — reset
             except (json.JSONDecodeError, TypeError):
-                _server_error_counts[server_name] = 0  # non-JSON = success
+                _reset_server_error(server_name)  # non-JSON = success
             return result
         except InterruptedError:
             return _interrupted_call_result()
@@ -1696,7 +1756,7 @@ def _make_tool_handler(server_name: str, tool_name: str, tool_timeout: float):
             if recovered is not None:
                 return recovered
 
-            _server_error_counts[server_name] = _server_error_counts.get(server_name, 0) + 1
+            _bump_server_error(server_name)
             logger.error(
                 "MCP tool %s/%s call failed: %s",
                 server_name, tool_name, exc,
diff --git a/tools/patch_parser.py b/tools/patch_parser.py
index 0c961083c2..d2a298fc9f 100644
--- a/tools/patch_parser.py
+++ b/tools/patch_parser.py
@@ -290,10 +290,16 @@ def _validate_operations(
                 )
                 if count == 0:
                     label = f"'{hunk.context_hint}'" if hunk.context_hint else "(no hint)"
-                    errors.append(
+                    msg = (
                         f"{op.file_path}: hunk {label} not found"
                         + (f" — {match_error}" if match_error else "")
                     )
+                    try:
+                        from tools.fuzzy_match import format_no_match_hint
+                        msg += format_no_match_hint(match_error, count, search_pattern, simulated)
+                    except Exception:
+                        pass
+                    errors.append(msg)
                 else:
                     # Advance simulation so subsequent hunks validate correctly.
                     # Reuse the result from the call above — no second fuzzy run.
@@ -537,7 +543,13 @@ def _apply_update(op: PatchOperation, file_ops: Any) -> Tuple[bool, str]:
                             error = None
                 
                 if error:
-                    return False, f"Could not apply hunk: {error}"
+                    err_msg = f"Could not apply hunk: {error}"
+                    try:
+                        from tools.fuzzy_match import format_no_match_hint
+                        err_msg += format_no_match_hint(error, 0, search_pattern, new_content)
+                    except Exception:
+                        pass
+                    return False, err_msg
         else:
             # Addition-only hunk (no context or removed lines).
             # Insert at the location indicated by the context hint, or at end of file.
diff --git a/tools/process_registry.py b/tools/process_registry.py
index 92f3db2a10..ec510cae04 100644
--- a/tools/process_registry.py
+++ b/tools/process_registry.py
@@ -1167,32 +1167,31 @@ PROCESS_SCHEMA = {
 
 
 def _handle_process(args, **kw):
-    import json as _json
     task_id = kw.get("task_id")
     action = args.get("action", "")
     # Coerce to string — some models send session_id as an integer
     session_id = str(args.get("session_id", "")) if args.get("session_id") is not None else ""
 
     if action == "list":
-        return _json.dumps({"processes": process_registry.list_sessions(task_id=task_id)}, ensure_ascii=False)
+        return json.dumps({"processes": process_registry.list_sessions(task_id=task_id)}, ensure_ascii=False)
     elif action in ("poll", "log", "wait", "kill", "write", "submit", "close"):
         if not session_id:
             return tool_error(f"session_id is required for {action}")
         if action == "poll":
-            return _json.dumps(process_registry.poll(session_id), ensure_ascii=False)
+            return json.dumps(process_registry.poll(session_id), ensure_ascii=False)
         elif action == "log":
-            return _json.dumps(process_registry.read_log(
+            return json.dumps(process_registry.read_log(
                 session_id, offset=args.get("offset", 0), limit=args.get("limit", 200)), ensure_ascii=False)
         elif action == "wait":
-            return _json.dumps(process_registry.wait(session_id, timeout=args.get("timeout")), ensure_ascii=False)
+            return json.dumps(process_registry.wait(session_id, timeout=args.get("timeout")), ensure_ascii=False)
         elif action == "kill":
-            return _json.dumps(process_registry.kill_process(session_id), ensure_ascii=False)
+            return json.dumps(process_registry.kill_process(session_id), ensure_ascii=False)
         elif action == "write":
-            return _json.dumps(process_registry.write_stdin(session_id, str(args.get("data", ""))), ensure_ascii=False)
+            return json.dumps(process_registry.write_stdin(session_id, str(args.get("data", ""))), ensure_ascii=False)
         elif action == "submit":
-            return _json.dumps(process_registry.submit_stdin(session_id, str(args.get("data", ""))), ensure_ascii=False)
+            return json.dumps(process_registry.submit_stdin(session_id, str(args.get("data", ""))), ensure_ascii=False)
         elif action == "close":
-            return _json.dumps(process_registry.close_stdin(session_id), ensure_ascii=False)
+            return json.dumps(process_registry.close_stdin(session_id), ensure_ascii=False)
     return tool_error(f"Unknown process action: {action}. Use: list, poll, log, wait, kill, write, submit, close")
 
 
diff --git a/tools/send_message_tool.py b/tools/send_message_tool.py
index eef2673686..19da4f55af 100644
--- a/tools/send_message_tool.py
+++ b/tools/send_message_tool.py
@@ -23,6 +23,13 @@ _FEISHU_TARGET_RE = re.compile(r"^\s*((?:oc|ou|on|chat|open)_[-A-Za-z0-9]+)(?::(
 _WEIXIN_TARGET_RE = re.compile(r"^\s*((?:wxid|gh|v\d+|wm|wb)_[A-Za-z0-9_-]+|[A-Za-z0-9._-]+@chatroom|filehelper)\s*$")
 # Discord snowflake IDs are numeric, same regex pattern as Telegram topic targets.
 _NUMERIC_TOPIC_RE = _TELEGRAM_TOPIC_TARGET_RE
+# Platforms that address recipients by phone number and accept E.164 format
+# (with a leading '+'). Without this, "+15551234567" fails the isdigit() check
+# below and falls through to channel-name resolution, which has no way to
+# resolve a raw phone number. Keeping the '+' preserves the E.164 form that
+# downstream adapters (signal, etc.) expect.
+_PHONE_PLATFORMS = frozenset({"signal", "sms", "whatsapp"})
+_E164_TARGET_RE = re.compile(r"^\s*\+(\d{7,15})\s*$")
 _IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".webp", ".gif"}
 _VIDEO_EXTS = {".mp4", ".mov", ".avi", ".mkv", ".3gp"}
 _AUDIO_EXTS = {".ogg", ".opus", ".mp3", ".wav", ".m4a"}
@@ -219,7 +226,6 @@ def _handle_send(args):
         # Weixin can be configured purely via .env; synthesize a pconfig so
         # send_message and cron delivery work without a gateway.yaml entry.
         if platform_name == "weixin":
-            import os
             wx_token = os.getenv("WEIXIN_TOKEN", "").strip()
             wx_account = os.getenv("WEIXIN_ACCOUNT_ID", "").strip()
             if wx_token and wx_account:
@@ -247,7 +253,6 @@ def _handle_send(args):
     if not chat_id:
         home = config.get_home_channel(platform)
         if not home and platform_name == "weixin":
-            import os
             wx_home = os.getenv("WEIXIN_HOME_CHANNEL", "").strip()
             if wx_home:
                 from gateway.config import HomeChannel
@@ -317,6 +322,12 @@ def _parse_target_ref(platform_name: str, target_ref: str):
         match = _WEIXIN_TARGET_RE.fullmatch(target_ref)
         if match:
             return match.group(1), None, True
+    if platform_name in _PHONE_PLATFORMS:
+        match = _E164_TARGET_RE.fullmatch(target_ref)
+        if match:
+            # Preserve the leading '+' — signal-cli and sms/whatsapp adapters
+            # expect E.164 format for direct recipients.
+            return target_ref.strip(), None, True
     if target_ref.lstrip("-").isdigit():
         return target_ref, None, True
     # Matrix room IDs (start with !) and user IDs (start with @) are explicit
@@ -346,11 +357,12 @@ def _describe_media_for_mirror(media_files):
 
 def _get_cron_auto_delivery_target():
     """Return the cron scheduler's auto-delivery target for the current run, if any."""
-    platform = os.getenv("HERMES_CRON_AUTO_DELIVER_PLATFORM", "").strip().lower()
-    chat_id = os.getenv("HERMES_CRON_AUTO_DELIVER_CHAT_ID", "").strip()
+    from gateway.session_context import get_session_env
+    platform = get_session_env("HERMES_CRON_AUTO_DELIVER_PLATFORM", "").strip().lower()
+    chat_id = get_session_env("HERMES_CRON_AUTO_DELIVER_CHAT_ID", "").strip()
     if not platform or not chat_id:
         return None
-    thread_id = os.getenv("HERMES_CRON_AUTO_DELIVER_THREAD_ID", "").strip() or None
+    thread_id = get_session_env("HERMES_CRON_AUTO_DELIVER_THREAD_ID", "").strip() or None
     return {
         "platform": platform,
         "chat_id": chat_id,
@@ -500,11 +512,27 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None,
             last_result = result
         return last_result
 
-    # --- Non-Telegram/Discord platforms ---
+    # --- Signal: native attachment support via JSON-RPC attachments param ---
+    if platform == Platform.SIGNAL and media_files:
+        last_result = None
+        for i, chunk in enumerate(chunks):
+            is_last = (i == len(chunks) - 1)
+            result = await _send_signal(
+                pconfig.extra,
+                chat_id,
+                chunk,
+                media_files=media_files if is_last else [],
+            )
+            if isinstance(result, dict) and result.get("error"):
+                return result
+            last_result = result
+        return last_result
+
+    # --- Non-media platforms ---
     if media_files and not message.strip():
         return {
             "error": (
-                f"send_message MEDIA delivery is currently only supported for telegram, discord, matrix, and weixin; "
+                f"send_message MEDIA delivery is currently only supported for telegram, discord, matrix, weixin, and signal; "
                 f"target {platform.value} had only media attachments"
             )
         }
@@ -512,7 +540,7 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None,
     if media_files:
         warning = (
             f"MEDIA attachments were omitted for {platform.value}; "
-            "native send_message media delivery is currently only supported for telegram, discord, matrix, and weixin"
+            "native send_message media delivery is currently only supported for telegram, discord, matrix, weixin, and signal"
         )
 
     last_result = None
@@ -958,8 +986,12 @@ async def _send_whatsapp(extra, chat_id, message):
         return _error(f"WhatsApp send failed: {e}")
 
 
-async def _send_signal(extra, chat_id, message):
-    """Send via signal-cli JSON-RPC API."""
+async def _send_signal(extra, chat_id, message, media_files=None):
+    """Send via signal-cli JSON-RPC API.
+
+    Supports both text-only and text-with-attachments (images/audio/documents).
+    Attachments are sent as an 'attachments' array in the JSON-RPC params.
+    """
     try:
         import httpx
     except ImportError:
@@ -976,6 +1008,18 @@ async def _send_signal(extra, chat_id, message):
         else:
             params["recipient"] = [chat_id]
 
+        # Add attachments if media_files are present
+        valid_media = media_files or []
+        attachment_paths = []
+        for media_path, _is_voice in valid_media:
+            if os.path.exists(media_path):
+                attachment_paths.append(media_path)
+            else:
+                logger.warning("Signal media file not found, skipping: %s", media_path)
+
+        if attachment_paths:
+            params["attachments"] = attachment_paths
+
         payload = {
             "jsonrpc": "2.0",
             "method": "send",
@@ -989,7 +1033,12 @@ async def _send_signal(extra, chat_id, message):
             data = resp.json()
             if "error" in data:
                 return _error(f"Signal RPC error: {data['error']}")
-            return {"success": True, "platform": "signal", "chat_id": chat_id}
+
+            # Return warning for any skipped media files
+            result = {"success": True, "platform": "signal", "chat_id": chat_id}
+            if len(attachment_paths) < len(valid_media):
+                result["warnings"] = [f"Some media files were skipped (not found on disk)"]
+            return result
     except Exception as e:
         return _error(f"Signal send failed: {e}")
 
diff --git a/tools/session_search_tool.py b/tools/session_search_tool.py
index 1398bdfff2..16aaea109f 100644
--- a/tools/session_search_tool.py
+++ b/tools/session_search_tool.py
@@ -27,6 +27,27 @@ MAX_SESSION_CHARS = 100_000
 MAX_SUMMARY_TOKENS = 10000
 
 
+def _get_session_search_max_concurrency(default: int = 3) -> int:
+    """Read auxiliary.session_search.max_concurrency with sane bounds."""
+    try:
+        from hermes_cli.config import load_config
+        config = load_config()
+    except ImportError:
+        return default
+    aux = config.get("auxiliary", {}) if isinstance(config, dict) else {}
+    task_config = aux.get("session_search", {}) if isinstance(aux, dict) else {}
+    if not isinstance(task_config, dict):
+        return default
+    raw = task_config.get("max_concurrency")
+    if raw is None:
+        return default
+    try:
+        value = int(raw)
+    except (TypeError, ValueError):
+        return default
+    return max(1, min(value, 5))
+
+
 def _format_timestamp(ts: Union[int, float, str, None]) -> str:
     """Convert a Unix timestamp (float/int) or ISO string to a human-readable date.
 
@@ -423,9 +444,16 @@ def session_search(
 
         # Summarize all sessions in parallel
         async def _summarize_all() -> List[Union[str, Exception]]:
-            """Summarize all sessions in parallel."""
+            """Summarize all sessions with bounded concurrency."""
+            max_concurrency = min(_get_session_search_max_concurrency(), max(1, len(tasks)))
+            semaphore = asyncio.Semaphore(max_concurrency)
+
+            async def _bounded_summary(text: str, meta: Dict[str, Any]) -> Optional[str]:
+                async with semaphore:
+                    return await _summarize_session(text, query, meta)
+
             coros = [
-                _summarize_session(text, query, meta)
+                _bounded_summary(text, meta)
                 for _, _, text, meta in tasks
             ]
             return await asyncio.gather(*coros, return_exceptions=True)
diff --git a/tools/skill_manager_tool.py b/tools/skill_manager_tool.py
index 33d3976ea8..493b434c51 100644
--- a/tools/skill_manager_tool.py
+++ b/tools/skill_manager_tool.py
@@ -449,9 +449,15 @@ def _patch_skill(
     if match_error:
         # Show a short preview of the file so the model can self-correct
         preview = content[:500] + ("..." if len(content) > 500 else "")
+        err_msg = match_error
+        try:
+            from tools.fuzzy_match import format_no_match_hint
+            err_msg += format_no_match_hint(match_error, match_count, old_string, content)
+        except Exception:
+            pass
         return {
             "success": False,
-            "error": match_error,
+            "error": err_msg,
             "file_preview": preview,
         }
 
diff --git a/tools/skills_tool.py b/tools/skills_tool.py
index ed8c8cfb08..6ff54230d5 100644
--- a/tools/skills_tool.py
+++ b/tools/skills_tool.py
@@ -507,14 +507,33 @@ def _get_disabled_skill_names() -> Set[str]:
     return get_disabled_skill_names()
 
 
+def _get_session_platform() -> str:
+    """Resolve the current platform from gateway session context.
+
+    Mirrors the platform-resolution logic in
+    ``agent.skill_utils.get_disabled_skill_names`` so that
+    ``_is_skill_disabled`` respects ``HERMES_SESSION_PLATFORM``.
+    """
+    try:
+        from gateway.session_context import get_session_env
+        return get_session_env("HERMES_SESSION_PLATFORM") or ""
+    except Exception:
+        return ""
+
+
 def _is_skill_disabled(name: str, platform: str = None) -> bool:
-    """Check if a skill is disabled in config."""
-    import os
+    """Check if a skill is disabled in config.
+
+    Resolves the active platform from (in order of precedence):
+    1. Explicit ``platform`` argument
+    2. ``HERMES_PLATFORM`` environment variable
+    3. ``HERMES_SESSION_PLATFORM`` from gateway session context
+    """
     try:
         from hermes_cli.config import load_config
         config = load_config()
         skills_cfg = config.get("skills", {})
-        resolved_platform = platform or os.getenv("HERMES_PLATFORM")
+        resolved_platform = platform or os.getenv("HERMES_PLATFORM") or _get_session_platform()
         if resolved_platform:
             platform_disabled = skills_cfg.get("platform_disabled", {}).get(resolved_platform)
             if platform_disabled is not None:
@@ -976,8 +995,7 @@ def skill_view(name: str, file_path: str = None, task_id: str = None) -> str:
                 _warnings.append(f"skill file is outside the trusted skills directory (~/.hermes/skills/): {skill_md}")
             if _injection_detected:
                 _warnings.append("skill content contains patterns that may indicate prompt injection")
-            import logging as _logging
-            _logging.getLogger(__name__).warning("Skill security warning for '%s': %s", name, "; ".join(_warnings))
+            logging.getLogger(__name__).warning("Skill security warning for '%s': %s", name, "; ".join(_warnings))
 
         parsed_frontmatter: Dict[str, Any] = {}
         try:
diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py
index 1182207b84..4a2a5fc0be 100644
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@@ -114,22 +114,44 @@ _cached_sudo_password: str = ""
 # Optional UI callbacks for interactive prompts. When set, these are called
 # instead of the default /dev/tty or input() readers. The CLI registers these
 # so prompts route through prompt_toolkit's event loop.
-#   _sudo_password_callback() -> str  (return password or "" to skip)
-#   _approval_callback(command, description) -> str  ("once"/"session"/"always"/"deny")
-_sudo_password_callback = None
-_approval_callback = None
+# Callback slots used by the approval prompt and sudo password prompt
+# routines. Stored in thread-local state so overlapping ACP sessions —
+# each running in its own ThreadPoolExecutor thread — don't stomp on
+# each other's callbacks. See GHSA-qg5c-hvr5-hjgr.
+#
+# CLI mode is single-threaded, so each thread (the only one) holds its
+# own callback exactly like before. Gateway mode resolves approvals via
+# the per-session queue in tools.approval, not through these callbacks,
+# so it's unaffected.
+import threading
+_callback_tls = threading.local()
+
+
+def _get_sudo_password_callback():
+    return getattr(_callback_tls, "sudo_password", None)
+
+
+def _get_approval_callback():
+    return getattr(_callback_tls, "approval", None)
 
 
 def set_sudo_password_callback(cb):
-    """Register a callback for sudo password prompts (used by CLI)."""
-    global _sudo_password_callback
-    _sudo_password_callback = cb
+    """Register a callback for sudo password prompts (used by CLI).
+
+    Per-thread scope — ACP sessions that run concurrently in a
+    ThreadPoolExecutor each have their own callback slot.
+    """
+    _callback_tls.sudo_password = cb
 
 
 def set_approval_callback(cb):
-    """Register a callback for dangerous command approval prompts (used by CLI)."""
-    global _approval_callback
-    _approval_callback = cb
+    """Register a callback for dangerous command approval prompts.
+
+    Per-thread scope — ACP sessions that run concurrently in a
+    ThreadPoolExecutor each have their own callback slot. See
+    GHSA-qg5c-hvr5-hjgr.
+    """
+    _callback_tls.approval = cb
 
 # =============================================================================
 # Dangerous Command Approval System
@@ -144,7 +166,7 @@ from tools.approval import (
 def _check_all_guards(command: str, env_type: str) -> dict:
     """Delegate to consolidated guard (tirith + dangerous cmd) with CLI callback."""
     return _check_all_guards_impl(command, env_type,
-                                  approval_callback=_approval_callback)
+                                  approval_callback=_get_approval_callback())
 
 
 # Allowlist: characters that can legitimately appear in directory paths.
@@ -217,12 +239,12 @@ def _prompt_for_sudo_password(timeout_seconds: int = 45) -> str:
     directly from /dev/tty with echo disabled.
     """
     import sys
-    import time as time_module
     
     # Use the registered callback when available (prompt_toolkit-compatible)
-    if _sudo_password_callback is not None:
+    _sudo_cb = _get_sudo_password_callback()
+    if _sudo_cb is not None:
         try:
-            return _sudo_password_callback() or ""
+            return _sudo_cb() or ""
         except Exception:
             return ""
 
@@ -278,7 +300,7 @@ def _prompt_for_sudo_password(timeout_seconds: int = 45) -> str:
     
     try:
         os.environ["HERMES_SPINNER_PAUSE"] = "1"
-        time_module.sleep(0.2)
+        time.sleep(0.2)
         
         print()
         print("┌" + "─" * 58 + "┐")
@@ -442,6 +464,171 @@ def _rewrite_real_sudo_invocations(command: str) -> tuple[str, bool]:
     return "".join(out), found
 
 
+def _rewrite_compound_background(command: str) -> str:
+    """Wrap `A && B &` (or `A || B &`) to `A && { B & }` at depth 0.
+
+    Bash parses ``A && B &`` with `&&` tighter than `&`, so it forks a
+    subshell for the whole `A && B` compound and backgrounds it. Inside
+    the subshell, `B` runs foreground, so the subshell waits for `B` to
+    finish. When `B` is a long-running process (`python3 -m http.server`,
+    `yes > /dev/null`, anything that doesn't naturally exit), the subshell
+    never exits. It leaks as a process stuck in ``wait4`` forever — and
+    on the way, its open stdout pipe can prevent the terminal tool from
+    returning promptly.
+
+    Rewriting the tail to `A && { B & }` preserves `&&`'s error semantics
+    (skip B if A fails) while replacing the subshell with a brace group.
+    The brace group runs in the current shell (no fork), backgrounds B as
+    a simple command (bash doesn't wait for it in non-interactive mode),
+    and exits immediately. B runs as a normal backgrounded child, orphaned
+    when the parent shell exits.
+
+    Handles redirects (``&>``, ``2>&1``) and skips content inside quoted
+    strings and parenthesised subshells. Leaves simple ``cmd &`` alone —
+    that construct doesn't have the subshell-wait bug.
+    """
+    n = len(command)
+    i = 0
+    paren_depth = 0
+    brace_depth = 0
+    # Position in *command* just after the most recent `&&` / `||` at depth 0
+    # in the current statement; -1 when no chain operator is active.
+    last_chain_op_end = -1
+    rewrites: list[tuple[int, int]] = []  # (chain_op_end, amp_pos)
+
+    while i < n:
+        ch = command[i]
+
+        # Newline terminates a statement at depth 0 — reset chain state.
+        # Checked before the whitespace skip so we don't miss it.
+        if ch == "\n" and paren_depth == 0 and brace_depth == 0:
+            last_chain_op_end = -1
+            i += 1
+            continue
+
+        if ch.isspace():
+            i += 1
+            continue
+
+        # Comments (only at statement start — conservative: any `#` not inside
+        # a token ends the line). `_read_shell_token` handles quoted strings
+        # below so `#` inside quotes is safe.
+        if ch == "#":
+            nl = command.find("\n", i)
+            if nl == -1:
+                break
+            i = nl
+            continue
+
+        if ch == "\\" and i + 1 < n:
+            i += 2
+            continue
+
+        # Quoted tokens — consume whole string via the shared tokenizer.
+        if ch in ("'", '"'):
+            _, next_i = _read_shell_token(command, i)
+            i = max(next_i, i + 1)
+            continue
+
+        if ch == "(":
+            paren_depth += 1
+            i += 1
+            continue
+
+        if ch == ")":
+            paren_depth = max(0, paren_depth - 1)
+            i += 1
+            continue
+
+        # Brace groups: `{ ... }` is a group (no subshell fork), and bash
+        # requires whitespace after `{`. We track depth so already-rewritten
+        # output (`A && { B & }`) is idempotent — the inner `&` is part of
+        # the group, not a new compound to rewrite. Also skip content inside
+        # the group since `A && B &` there is separately well-formed.
+        if ch == "{" and i + 1 < n and (command[i + 1].isspace() or command[i + 1] == "\n"):
+            brace_depth += 1
+            i += 1
+            continue
+        if ch == "}" and brace_depth > 0:
+            brace_depth -= 1
+            # Closing a group completes a compound statement; reset chain.
+            last_chain_op_end = -1
+            i += 1
+            continue
+
+        # Inside parens or brace groups, skip operators — they parse in their
+        # own scope. `(...)` subshells have the same bug class but are not the
+        # common agent pattern; leave for a follow-up.
+        if paren_depth > 0 or brace_depth > 0:
+            i += 1
+            continue
+
+        # Chain operators at depth 0
+        if command.startswith("&&", i) or command.startswith("||", i):
+            last_chain_op_end = i + 2
+            i += 2
+            continue
+
+        # Statement terminators reset the chain state
+        if ch == ";":
+            last_chain_op_end = -1
+            i += 1
+            continue
+
+        # Single `|` (pipe) starts a new pipeline stage; don't rewrite
+        # across it. `||` handled above.
+        if ch == "|":
+            last_chain_op_end = -1
+            i += 1
+            continue
+
+        # `&` handling: distinguish `&&`, `&>`, fd redirect (`>&`, `<&`),
+        # and a true backgrounding `&`.
+        if ch == "&":
+            # `&&` handled above; won't reach here
+            if i + 1 < n and command[i + 1] == ">":
+                # `&>` redirect — consume
+                i += 2
+                continue
+            # `>&` / `<&` fd target — look back past whitespace
+            j = i - 1
+            while j >= 0 and command[j].isspace():
+                j -= 1
+            if j >= 0 and command[j] in "<>":
+                i += 1
+                continue
+            # Real background operator
+            if last_chain_op_end >= 0:
+                rewrites.append((last_chain_op_end, i))
+            last_chain_op_end = -1
+            i += 1
+            continue
+
+        # Regular unquoted token — advance past it via the shared tokenizer
+        _, next_i = _read_shell_token(command, i)
+        i = max(next_i, i + 1)
+
+    if not rewrites:
+        return command
+
+    # Apply rewrites back-to-front so earlier indices remain valid.
+    result = command
+    for chain_end, amp_pos in reversed(rewrites):
+        # Skip whitespace right after the `&&`/`||` so the brace group
+        # opens flush against the inner command.
+        insert_pos = chain_end
+        while insert_pos < amp_pos and result[insert_pos].isspace():
+            insert_pos += 1
+        prefix = result[:insert_pos]
+        middle = result[insert_pos:amp_pos]  # inner command + trailing space
+        suffix = result[amp_pos + 1 :]
+        # `{` needs a trailing space in bash; the closing `}` needs to be
+        # preceded by `;` or `&` — we're providing `&` from the backgrounding.
+        result = prefix + "{ " + middle + "& }" + suffix
+
+    return result
+
+
 def _transform_sudo_command(command: str | None) -> tuple[str | None, str | None]:
     """
     Transform sudo commands to use -S flag if SUDO_PASSWORD is available.
@@ -523,6 +710,8 @@ Foreground (default): Commands return INSTANTLY when done, even if the timeout i
 Background: Set background=true to get a session_id. Two patterns:
   (1) Long-lived processes that never exit (servers, watchers).
   (2) Long-running tasks with notify_on_complete=true — you can keep working on other things and the system auto-notifies you when the task finishes. Great for test suites, builds, deployments, or anything that takes more than a minute.
+For servers/watchers, do NOT use shell-level background wrappers (nohup/disown/setsid/trailing '&') in foreground mode. Use background=true so Hermes can track lifecycle and output.
+After starting a server, verify readiness with a health check or log signal, then run tests in a separate terminal() call. Avoid blind sleep loops.
 Use process(action="poll") for progress checks, process(action="wait") to block until done.
 Working directory: Use 'workdir' for per-command cwd.
 PTY mode: Set pty=true for interactive CLI tools (Codex, Claude Code, Python REPL).
@@ -1103,6 +1292,65 @@ def _command_requires_pipe_stdin(command: str) -> bool:
     )
 
 
+_SHELL_LEVEL_BACKGROUND_RE = re.compile(r"\b(?:nohup|disown|setsid)\b", re.IGNORECASE)
+_INLINE_BACKGROUND_AMP_RE = re.compile(r"\s&\s")
+_TRAILING_BACKGROUND_AMP_RE = re.compile(r"\s&\s*(?:#.*)?$")
+_LONG_LIVED_FOREGROUND_PATTERNS = (
+    re.compile(r"\b(?:npm|pnpm|yarn|bun)\s+(?:run\s+)?(?:dev|start|serve|watch)\b", re.IGNORECASE),
+    re.compile(r"\bdocker\s+compose\s+up\b", re.IGNORECASE),
+    re.compile(r"\bnext\s+dev\b", re.IGNORECASE),
+    re.compile(r"\bvite(?:\s|$)", re.IGNORECASE),
+    re.compile(r"\bnodemon\b", re.IGNORECASE),
+    re.compile(r"\buvicorn\b", re.IGNORECASE),
+    re.compile(r"\bgunicorn\b", re.IGNORECASE),
+    re.compile(r"\bpython(?:3)?\s+-m\s+http\.server\b", re.IGNORECASE),
+)
+
+
+def _looks_like_help_or_version_command(command: str) -> bool:
+    """Return True for informational invocations that should never be blocked."""
+    normalized = " ".join(command.lower().split())
+    return (
+        " --help" in normalized
+        or normalized.endswith(" -h")
+        or " --version" in normalized
+        or normalized.endswith(" -v")
+    )
+
+
+def _foreground_background_guidance(command: str) -> str | None:
+    """Suggest background mode when a foreground command looks long-lived.
+
+    Prevents workflows that start a server/watch process and then stall before
+    follow-up checks or test commands run.
+    """
+    if _looks_like_help_or_version_command(command):
+        return None
+
+    if _SHELL_LEVEL_BACKGROUND_RE.search(command):
+        return (
+            "Foreground command uses shell-level background wrappers (nohup/disown/setsid). "
+            "Use terminal(background=true) so Hermes can track the process, then run "
+            "readiness checks and tests in separate commands."
+        )
+
+    if _INLINE_BACKGROUND_AMP_RE.search(command) or _TRAILING_BACKGROUND_AMP_RE.search(command):
+        return (
+            "Foreground command uses '&' backgrounding. Use terminal(background=true) for long-lived "
+            "processes, then run health checks and tests in follow-up terminal calls."
+        )
+
+    for pattern in _LONG_LIVED_FOREGROUND_PATTERNS:
+        if pattern.search(command):
+            return (
+                "This foreground command appears to start a long-lived server/watch process. "
+                "Run it with background=true, verify readiness (health endpoint/log signal), "
+                "then execute tests in a separate command."
+            )
+
+    return None
+
+
 def terminal_tool(
     command: str,
     background: bool = False,
@@ -1195,6 +1443,18 @@ def terminal_tool(
                 ),
             }, ensure_ascii=False)
 
+        # Guardrail: long-lived server/watch commands should run as managed
+        # background sessions, not foreground shell hacks.
+        if not background:
+            guidance = _foreground_background_guidance(command)
+            if guidance:
+                return json.dumps({
+                    "output": "",
+                    "exit_code": -1,
+                    "error": guidance,
+                    "status": "error",
+                }, ensure_ascii=False)
+
         # Start cleanup thread
         _start_cleanup_thread()
 
@@ -1483,6 +1743,27 @@ def terminal_tool(
             
             # Add helpful message for sudo failures in messaging context
             output = _handle_sudo_failure(output, env_type)
+
+            # Foreground terminal output canonicalization seam: plugins receive
+            # the full output string before default truncation and may only
+            # replace it by returning a string from transform_terminal_output.
+            # The hook is fail-open, and the first valid string return wins.
+            try:
+                from hermes_cli.plugins import invoke_hook
+                hook_results = invoke_hook(
+                    "transform_terminal_output",
+                    command=command,
+                    output=output,
+                    returncode=returncode,
+                    task_id=effective_task_id or "",
+                    env_type=env_type,
+                )
+                for hook_result in hook_results:
+                    if isinstance(hook_result, str):
+                        output = hook_result
+                        break
+            except Exception:
+                pass
             
             # Truncate output if too long, keeping both head and tail
             MAX_OUTPUT_CHARS = 50000
diff --git a/tools/tool_backend_helpers.py b/tools/tool_backend_helpers.py
index a770fe7470..810a51c63d 100644
--- a/tools/tool_backend_helpers.py
+++ b/tools/tool_backend_helpers.py
@@ -119,3 +119,24 @@ def prefers_gateway(config_section: str) -> bool:
     except Exception:
         pass
     return False
+
+
+def fal_key_is_configured() -> bool:
+    """Return True when FAL_KEY is set to a non-whitespace value.
+
+    Consults both ``os.environ`` and ``~/.hermes/.env`` (via
+    ``hermes_cli.config.get_env_value`` when available) so tool-side
+    checks and CLI setup-time checks agree.  A whitespace-only value
+    is treated as unset everywhere.
+    """
+    value = os.getenv("FAL_KEY")
+    if value is None:
+        # Fall back to the .env file for CLI paths that may run before
+        # dotenv is loaded into os.environ.
+        try:
+            from hermes_cli.config import get_env_value
+
+            value = get_env_value("FAL_KEY")
+        except Exception:
+            value = None
+    return bool(value and value.strip())
diff --git a/tools/transcription_tools.py b/tools/transcription_tools.py
index 3fdf0cc043..0cd79733ed 100644
--- a/tools/transcription_tools.py
+++ b/tools/transcription_tools.py
@@ -154,12 +154,31 @@ def _has_local_command() -> bool:
     return _get_local_command_template() is not None
 
 
-def _normalize_local_command_model(model_name: Optional[str]) -> str:
+def _normalize_local_model(model_name: Optional[str]) -> str:
+    """Return a valid faster-whisper model size, mapping cloud-only names to the default.
+
+    Cloud providers like OpenAI use names such as ``whisper-1`` which are not
+    valid for faster-whisper (which expects ``tiny``, ``base``, ``small``,
+    ``medium``, or ``large-v*``).  When such a name is detected we fall back to
+    the default local model and emit a warning so the user knows what happened.
+    """
     if not model_name or model_name in OPENAI_MODELS or model_name in GROQ_MODELS:
+        if model_name and (model_name in OPENAI_MODELS or model_name in GROQ_MODELS):
+            logger.warning(
+                "STT model '%s' is a cloud-only name and cannot be used with the local "
+                "provider. Falling back to '%s'. Set stt.local.model to a valid "
+                "faster-whisper size (tiny, base, small, medium, large-v3).",
+                model_name,
+                DEFAULT_LOCAL_MODEL,
+            )
         return DEFAULT_LOCAL_MODEL
     return model_name
 
 
+def _normalize_local_command_model(model_name: Optional[str]) -> str:
+    return _normalize_local_model(model_name)
+
+
 def _get_provider(stt_config: dict) -> str:
     """Determine which STT provider to use.
 
@@ -596,7 +615,9 @@ def transcribe_audio(file_path: str, model: Optional[str] = None) -> Dict[str, A
 
     if provider == "local":
         local_cfg = stt_config.get("local", {})
-        model_name = model or local_cfg.get("model", DEFAULT_LOCAL_MODEL)
+        model_name = _normalize_local_model(
+            model or local_cfg.get("model", DEFAULT_LOCAL_MODEL)
+        )
         return _transcribe_local(file_path, model_name)
 
     if provider == "local_command":
diff --git a/tools/tts_tool.py b/tools/tts_tool.py
index adc6524c46..a7ca57fab1 100644
--- a/tools/tts_tool.py
+++ b/tools/tts_tool.py
@@ -79,6 +79,12 @@ def _import_sounddevice():
     return sd
 
 
+def _import_kittentts():
+    """Lazy import KittenTTS. Returns the class or raises ImportError."""
+    from kittentts import KittenTTS
+    return KittenTTS
+
+
 # ===========================================================================
 # Defaults
 # ===========================================================================
@@ -88,6 +94,8 @@ DEFAULT_ELEVENLABS_VOICE_ID = "pNInz6obpgDQGcFmaJgB"  # Adam
 DEFAULT_ELEVENLABS_MODEL_ID = "eleven_multilingual_v2"
 DEFAULT_ELEVENLABS_STREAMING_MODEL_ID = "eleven_flash_v2_5"
 DEFAULT_OPENAI_MODEL = "gpt-4o-mini-tts"
+DEFAULT_KITTENTTS_MODEL = "KittenML/kitten-tts-nano-0.8-int8"  # 25MB
+DEFAULT_KITTENTTS_VOICE = "Jasper"
 DEFAULT_OPENAI_VOICE = "alloy"
 DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1"
 DEFAULT_MINIMAX_MODEL = "speech-2.8-hd"
@@ -113,7 +121,80 @@ def _get_default_output_dir() -> str:
     return str(get_hermes_dir("cache/audio", "audio_cache"))
 
 DEFAULT_OUTPUT_DIR = _get_default_output_dir()
-MAX_TEXT_LENGTH = 4000
+
+# ---------------------------------------------------------------------------
+# Per-provider input-character limits (from official provider docs).
+# A single global cap was wrong: OpenAI is 4096, xAI is 15k, MiniMax is 10k,
+# ElevenLabs is model-dependent (5k / 10k / 30k / 40k), Gemini caps at ~8k
+# input tokens.  Users can override any of these via
+# ``tts.<provider>.max_text_length`` in config.yaml.
+# ---------------------------------------------------------------------------
+PROVIDER_MAX_TEXT_LENGTH: Dict[str, int] = {
+    "edge": 5000,         # edge-tts practical sync limit
+    "openai": 4096,       # https://platform.openai.com/docs/guides/text-to-speech
+    "xai": 15000,         # https://docs.x.ai/developers/model-capabilities/audio/text-to-speech
+    "minimax": 10000,     # https://platform.minimax.io/docs/api-reference/speech-t2a-http (sync)
+    "mistral": 4000,      # conservative; no published per-request cap
+    "gemini": 5000,       # Gemini TTS caps at ~8k input tokens / ~655s audio
+    "elevenlabs": 10000,  # fallback when model-aware lookup can't resolve (multilingual_v2)
+    "neutts": 2000,       # local model, quality falls off on long text
+    "kittentts": 2000,    # local 25MB model
+}
+
+# ElevenLabs caps vary by model_id. https://elevenlabs.io/docs/overview/models
+ELEVENLABS_MODEL_MAX_TEXT_LENGTH: Dict[str, int] = {
+    "eleven_v3": 5000,
+    "eleven_ttv_v3": 5000,
+    "eleven_multilingual_v2": 10000,
+    "eleven_multilingual_v1": 10000,
+    "eleven_english_sts_v2": 10000,
+    "eleven_english_sts_v1": 10000,
+    "eleven_flash_v2": 30000,
+    "eleven_flash_v2_5": 40000,
+}
+
+# Final fallback when provider isn't recognised at all.
+FALLBACK_MAX_TEXT_LENGTH = 4000
+
+# Back-compat alias. Prefer ``_resolve_max_text_length()`` for new code.
+MAX_TEXT_LENGTH = FALLBACK_MAX_TEXT_LENGTH
+
+
+def _resolve_max_text_length(
+    provider: Optional[str],
+    tts_config: Optional[Dict[str, Any]] = None,
+) -> int:
+    """Return the input-character cap for *provider*.
+
+    Resolution order:
+      1. ``tts.<provider>.max_text_length`` (user override in config.yaml)
+      2. ElevenLabs model-aware table (keyed on configured ``model_id``)
+      3. ``PROVIDER_MAX_TEXT_LENGTH`` default
+      4. ``FALLBACK_MAX_TEXT_LENGTH`` (4000)
+
+    Non-positive or non-integer overrides fall through to the default so a
+    broken config can't accidentally disable truncation entirely.
+    """
+    if not provider:
+        return FALLBACK_MAX_TEXT_LENGTH
+    key = provider.lower().strip()
+    cfg = tts_config or {}
+    prov_cfg = cfg.get(key) if isinstance(cfg.get(key), dict) else {}
+
+    override = prov_cfg.get("max_text_length") if prov_cfg else None
+    if isinstance(override, bool):
+        # bool is an int subclass; treat explicit booleans as "not set"
+        override = None
+    if isinstance(override, int) and override > 0:
+        return override
+
+    if key == "elevenlabs":
+        model_id = (prov_cfg or {}).get("model_id") or DEFAULT_ELEVENLABS_MODEL_ID
+        mapped = ELEVENLABS_MODEL_MAX_TEXT_LENGTH.get(str(model_id).strip())
+        if mapped:
+            return mapped
+
+    return PROVIDER_MAX_TEXT_LENGTH.get(key, FALLBACK_MAX_TEXT_LENGTH)
 
 
 # ===========================================================================
@@ -695,6 +776,15 @@ def _check_neutts_available() -> bool:
         return False
 
 
+def _check_kittentts_available() -> bool:
+    """Check if the kittentts engine is importable (installed locally)."""
+    try:
+        import importlib.util
+        return importlib.util.find_spec("kittentts") is not None
+    except Exception:
+        return False
+
+
 def _default_neutts_ref_audio() -> str:
     """Return path to the bundled default voice reference audio."""
     return str(Path(__file__).parent / "neutts_samples" / "jo.wav")
@@ -758,6 +848,69 @@ def _generate_neutts(text: str, output_path: str, tts_config: Dict[str, Any]) ->
     return output_path
 
 
+# ===========================================================================
+# Provider: KittenTTS (local, lightweight)
+# ===========================================================================
+
+# Module-level cache for KittenTTS model instance
+_kittentts_model_cache: Dict[str, Any] = {}
+
+
+def _generate_kittentts(text: str, output_path: str, tts_config: Dict[str, Any]) -> str:
+    """Generate speech using KittenTTS local ONNX model.
+
+    KittenTTS is a lightweight TTS engine (25-80MB models) that runs
+    entirely on CPU without requiring a GPU or API key.
+
+    Args:
+        text: Text to convert to speech.
+        output_path: Where to save the audio file.
+        tts_config: TTS config dict.
+
+    Returns:
+        Path to the saved audio file.
+    """
+    KittenTTS = _import_kittentts()
+    kt_config = tts_config.get("kittentts", {})
+    model_name = kt_config.get("model", DEFAULT_KITTENTTS_MODEL)
+    voice = kt_config.get("voice", DEFAULT_KITTENTTS_VOICE)
+    speed = kt_config.get("speed", 1.0)
+    clean_text = kt_config.get("clean_text", True)
+
+    # Use cached model instance if available
+    global _kittentts_model_cache
+    if model_name not in _kittentts_model_cache:
+        logger.info("[KittenTTS] Loading model: %s", model_name)
+        _kittentts_model_cache[model_name] = KittenTTS(model_name)
+        logger.info("[KittenTTS] Model loaded successfully")
+
+    model = _kittentts_model_cache[model_name]
+
+    # Generate audio (returns numpy array at 24kHz)
+    audio = model.generate(text, voice=voice, speed=speed, clean_text=clean_text)
+
+    # Save as WAV
+    import soundfile as sf
+    wav_path = output_path
+    if not output_path.endswith(".wav"):
+        wav_path = output_path.rsplit(".", 1)[0] + ".wav"
+
+    sf.write(wav_path, audio, 24000)
+
+    # Convert to desired format if needed
+    if wav_path != output_path:
+        ffmpeg = shutil.which("ffmpeg")
+        if ffmpeg:
+            conv_cmd = [ffmpeg, "-i", wav_path, "-y", "-loglevel", "error", output_path]
+            subprocess.run(conv_cmd, check=True, timeout=30)
+            os.remove(wav_path)
+        else:
+            # No ffmpeg — rename the WAV to the expected path
+            os.rename(wav_path, output_path)
+
+    return output_path
+
+
 # ===========================================================================
 # Main tool function
 # ===========================================================================
@@ -785,14 +938,19 @@ def text_to_speech_tool(
     if not text or not text.strip():
         return tool_error("Text is required", success=False)
 
-    # Truncate very long text with a warning
-    if len(text) > MAX_TEXT_LENGTH:
-        logger.warning("TTS text too long (%d chars), truncating to %d", len(text), MAX_TEXT_LENGTH)
-        text = text[:MAX_TEXT_LENGTH]
-
     tts_config = _load_tts_config()
     provider = _get_provider(tts_config)
 
+    # Truncate very long text with a warning. The cap is per-provider
+    # (OpenAI 4096, xAI 15k, MiniMax 10k, ElevenLabs model-aware, etc.).
+    max_len = _resolve_max_text_length(provider, tts_config)
+    if len(text) > max_len:
+        logger.warning(
+            "TTS text too long for provider %s (%d chars), truncating to %d",
+            provider, len(text), max_len,
+        )
+        text = text[:max_len]
+
     # Detect platform from gateway env var to choose the best output format.
     # Telegram voice bubbles require Opus (.ogg); OpenAI and ElevenLabs can
     # produce Opus natively (no ffmpeg needed).  Edge TTS always outputs MP3
@@ -877,6 +1035,19 @@ def text_to_speech_tool(
             logger.info("Generating speech with NeuTTS (local)...")
             _generate_neutts(text, file_str, tts_config)
 
+        elif provider == "kittentts":
+            try:
+                _import_kittentts()
+            except ImportError:
+                return json.dumps({
+                    "success": False,
+                    "error": "KittenTTS provider selected but 'kittentts' package not installed. "
+                             "Run 'hermes setup tts' and choose KittenTTS, or install manually: "
+                             "pip install https://github.com/KittenML/KittenTTS/releases/download/0.8.1/kittentts-0.8.1-py3-none-any.whl"
+                }, ensure_ascii=False)
+            logger.info("Generating speech with KittenTTS (local, ~25MB)...")
+            _generate_kittentts(text, file_str, tts_config)
+
         else:
             # Default: Edge TTS (free), with NeuTTS as local fallback
             edge_available = True
@@ -914,9 +1085,9 @@ def text_to_speech_tool(
             }, ensure_ascii=False)
 
         # Try Opus conversion for Telegram compatibility
-        # Edge TTS outputs MP3, NeuTTS outputs WAV — both need ffmpeg conversion
+        # Edge TTS outputs MP3, NeuTTS/KittenTTS output WAV — all need ffmpeg conversion
         voice_compatible = False
-        if provider in ("edge", "neutts", "minimax", "xai") and not file_str.endswith(".ogg"):
+        if provider in ("edge", "neutts", "minimax", "xai", "kittentts") and not file_str.endswith(".ogg"):
             opus_path = _convert_to_opus(file_str)
             if opus_path:
                 file_str = opus_path
@@ -1001,6 +1172,8 @@ def check_tts_requirements() -> bool:
         pass
     if _check_neutts_available():
         return True
+    if _check_kittentts_available():
+        return True
     return False
 
 
@@ -1096,6 +1269,14 @@ def stream_tts_to_speaker(
         voice_id = el_config.get("voice_id", voice_id)
         model_id = el_config.get("streaming_model_id",
                                  el_config.get("model_id", model_id))
+        # Per-sentence cap for the streaming path. Look up the cap against
+        # the *streaming* model_id (defaults to eleven_flash_v2_5 = 40k chars),
+        # not the sync model_id. A user override
+        # (tts.elevenlabs.max_text_length) still wins.
+        stream_max_len = _resolve_max_text_length(
+            "elevenlabs",
+            {**tts_config, "elevenlabs": {**el_config, "model_id": model_id}},
+        )
 
         api_key = os.getenv("ELEVENLABS_API_KEY", "")
         if not api_key:
@@ -1151,9 +1332,9 @@ def stream_tts_to_speaker(
             # Skip audio generation if no TTS client available
             if client is None:
                 return
-            # Truncate very long sentences
-            if len(cleaned) > MAX_TEXT_LENGTH:
-                cleaned = cleaned[:MAX_TEXT_LENGTH]
+            # Truncate very long sentences (ElevenLabs streaming path)
+            if len(cleaned) > stream_max_len:
+                cleaned = cleaned[:stream_max_len]
             try:
                 audio_iter = client.text_to_speech.convert(
                     text=cleaned,
@@ -1311,7 +1492,7 @@ TTS_SCHEMA = {
         "properties": {
             "text": {
                 "type": "string",
-                "description": "The text to convert to speech. Keep under 4000 characters."
+                "description": "The text to convert to speech. Provider-specific character caps apply and are enforced automatically (OpenAI 4096, xAI 15000, MiniMax 10000, ElevenLabs 5k-40k depending on model); over-long input is truncated."
             },
             "output_path": {
                 "type": "string",
diff --git a/tools/vision_tools.py b/tools/vision_tools.py
index 2bcf256b29..d3019b1d0b 100644
--- a/tools/vision_tools.py
+++ b/tools/vision_tools.py
@@ -553,18 +553,23 @@ async def vision_analyze_tool(
         # Read timeout from config.yaml (auxiliary.vision.timeout), default 120s.
         # Local vision models (llama.cpp, ollama) can take well over 30s.
         vision_timeout = 120.0
+        vision_temperature = 0.1
         try:
             from hermes_cli.config import load_config
             _cfg = load_config()
-            _vt = _cfg.get("auxiliary", {}).get("vision", {}).get("timeout")
+            _vision_cfg = _cfg.get("auxiliary", {}).get("vision", {})
+            _vt = _vision_cfg.get("timeout")
             if _vt is not None:
                 vision_timeout = float(_vt)
+            _vtemp = _vision_cfg.get("temperature")
+            if _vtemp is not None:
+                vision_temperature = float(_vtemp)
         except Exception:
             pass
         call_kwargs = {
             "task": "vision",
             "messages": messages,
-            "temperature": 0.1,
+            "temperature": vision_temperature,
             "max_tokens": 2000,
             "timeout": vision_timeout,
         }
diff --git a/toolsets.py b/toolsets.py
index 6ac8d0782d..f1dc7fca1c 100644
--- a/toolsets.py
+++ b/toolsets.py
@@ -43,7 +43,7 @@ _HERMES_CORE_TOOLS = [
     "browser_navigate", "browser_snapshot", "browser_click",
     "browser_type", "browser_scroll", "browser_back",
     "browser_press", "browser_get_images",
-    "browser_vision", "browser_console",
+    "browser_vision", "browser_console", "browser_cdp",
     # Text-to-speech
     "text_to_speech",
     # Planning & memory
@@ -115,7 +115,7 @@ TOOLSETS = {
             "browser_navigate", "browser_snapshot", "browser_click",
             "browser_type", "browser_scroll", "browser_back",
             "browser_press", "browser_get_images",
-            "browser_vision", "browser_console", "web_search"
+            "browser_vision", "browser_console", "browser_cdp", "web_search"
         ],
         "includes": []
     },
@@ -249,7 +249,7 @@ TOOLSETS = {
             "browser_navigate", "browser_snapshot", "browser_click",
             "browser_type", "browser_scroll", "browser_back",
             "browser_press", "browser_get_images",
-            "browser_vision", "browser_console",
+            "browser_vision", "browser_console", "browser_cdp",
             "todo", "memory",
             "session_search",
             "execute_code", "delegate_task",
@@ -274,7 +274,7 @@ TOOLSETS = {
             "browser_navigate", "browser_snapshot", "browser_click",
             "browser_type", "browser_scroll", "browser_back",
             "browser_press", "browser_get_images",
-            "browser_vision", "browser_console",
+            "browser_vision", "browser_console", "browser_cdp",
             # Planning & memory
             "todo", "memory",
             # Session history search
@@ -304,7 +304,10 @@ TOOLSETS = {
     
     "hermes-discord": {
         "description": "Discord bot toolset - full access (terminal has safety checks via dangerous command approval)",
-        "tools": _HERMES_CORE_TOOLS,
+        "tools": _HERMES_CORE_TOOLS + [
+            # Discord server introspection & management (gated on DISCORD_BOT_TOKEN via check_fn)
+            "discord_server",
+        ],
         "includes": []
     },
     
diff --git a/trajectory_compressor.py b/trajectory_compressor.py
index dff15b2278..ff2dcc6266 100644
--- a/trajectory_compressor.py
+++ b/trajectory_compressor.py
@@ -40,6 +40,8 @@ from pathlib import Path
 from typing import List, Dict, Any, Optional, Tuple, Callable
 from dataclasses import dataclass, field
 from datetime import datetime
+
+from utils import base_url_host_matches, base_url_hostname
 import fire
 from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TaskProgressColumn, TimeElapsedColumn, TimeRemainingColumn
 from rich.console import Console
@@ -54,14 +56,24 @@ _project_env = Path(__file__).parent / ".env"
 load_hermes_dotenv(hermes_home=_hermes_home, project_env=_project_env)
 
 
-def _effective_temperature_for_model(model: str, requested_temperature: float) -> float:
-    """Apply fixed model temperature contracts to direct client calls."""
+def _effective_temperature_for_model(
+    model: str,
+    requested_temperature: float,
+    base_url: Optional[str] = None,
+) -> Optional[float]:
+    """Apply fixed model temperature contracts to direct client calls.
+
+    Returns ``None`` when the model manages temperature server-side (Kimi);
+    callers must omit the ``temperature`` kwarg entirely in that case.
+    """
     try:
-        from agent.auxiliary_client import _fixed_temperature_for_model
+        from agent.auxiliary_client import _fixed_temperature_for_model, OMIT_TEMPERATURE
     except Exception:
         return requested_temperature
 
-    fixed_temperature = _fixed_temperature_for_model(model)
+    fixed_temperature = _fixed_temperature_for_model(model, base_url)
+    if fixed_temperature is OMIT_TEMPERATURE:
+        return None  # caller must omit temperature
     if fixed_temperature is not None:
         return fixed_temperature
     return requested_temperature
@@ -422,22 +434,29 @@ class TrajectoryCompressor:
 
     def _detect_provider(self) -> str:
         """Detect the provider name from the configured base_url."""
-        url = (self.config.base_url or "").lower()
-        if "openrouter" in url:
+        url = self.config.base_url or ""
+        if base_url_host_matches(url, "openrouter.ai"):
             return "openrouter"
-        if "nousresearch.com" in url:
+        if base_url_host_matches(url, "nousresearch.com"):
             return "nous"
-        if "chatgpt.com/backend-api/codex" in url:
+        if (
+            base_url_hostname(url) == "chatgpt.com"
+            and "/backend-api/codex" in url.lower()
+        ):
             return "codex"
-        if "api.z.ai" in url:
+        if base_url_host_matches(url, "z.ai"):
             return "zai"
-        if "moonshot.ai" in url or "moonshot.cn" in url or "api.kimi.com" in url:
+        if (
+            base_url_host_matches(url, "moonshot.ai")
+            or base_url_host_matches(url, "moonshot.cn")
+            or base_url_host_matches(url, "api.kimi.com")
+        ):
             return "kimi-coding"
-        if "arcee.ai" in url:
+        if base_url_host_matches(url, "arcee.ai"):
             return "arcee"
-        if "minimaxi.com" in url:
+        if base_url_host_matches(url, "minimaxi.com"):
             return "minimax-cn"
-        if "minimax.io" in url:
+        if base_url_host_matches(url, "minimax.io"):
             return "minimax"
         # Unknown base_url — not a known provider
         return ""
@@ -583,6 +602,7 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
                 summary_temperature = _effective_temperature_for_model(
                     self.config.summarization_model,
                     self.config.temperature,
+                    self.config.base_url,
                 )
                 
                 if getattr(self, '_use_call_llm', False):
@@ -595,12 +615,14 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
                         max_tokens=self.config.summary_target_tokens * 2,
                     )
                 else:
-                    response = self.client.chat.completions.create(
-                        model=self.config.summarization_model,
-                        messages=[{"role": "user", "content": prompt}],
-                        temperature=summary_temperature,
-                        max_tokens=self.config.summary_target_tokens * 2,
-                    )
+                    _create_kwargs = {
+                        "model": self.config.summarization_model,
+                        "messages": [{"role": "user", "content": prompt}],
+                        "max_tokens": self.config.summary_target_tokens * 2,
+                    }
+                    if summary_temperature is not None:
+                        _create_kwargs["temperature"] = summary_temperature
+                    response = self.client.chat.completions.create(**_create_kwargs)
                 
                 summary = self._coerce_summary_content(response.choices[0].message.content)
                 return self._ensure_summary_prefix(summary)
@@ -649,6 +671,7 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
                 summary_temperature = _effective_temperature_for_model(
                     self.config.summarization_model,
                     self.config.temperature,
+                    self.config.base_url,
                 )
                 
                 if getattr(self, '_use_call_llm', False):
@@ -661,12 +684,14 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
                         max_tokens=self.config.summary_target_tokens * 2,
                     )
                 else:
-                    response = await self._get_async_client().chat.completions.create(
-                        model=self.config.summarization_model,
-                        messages=[{"role": "user", "content": prompt}],
-                        temperature=summary_temperature,
-                        max_tokens=self.config.summary_target_tokens * 2,
-                    )
+                    _create_kwargs = {
+                        "model": self.config.summarization_model,
+                        "messages": [{"role": "user", "content": prompt}],
+                        "max_tokens": self.config.summary_target_tokens * 2,
+                    }
+                    if summary_temperature is not None:
+                        _create_kwargs["temperature"] = summary_temperature
+                    response = await self._get_async_client().chat.completions.create(**_create_kwargs)
                 
                 summary = self._coerce_summary_content(response.choices[0].message.content)
                 return self._ensure_summary_prefix(summary)
diff --git a/tui_gateway/entry.py b/tui_gateway/entry.py
index a9667528de..d2b82b9dab 100644
--- a/tui_gateway/entry.py
+++ b/tui_gateway/entry.py
@@ -2,7 +2,7 @@ import json
 import signal
 import sys
 
-from tui_gateway.server import handle_request, resolve_skin, write_json
+from tui_gateway.server import dispatch, resolve_skin, write_json
 
 signal.signal(signal.SIGPIPE, signal.SIG_DFL)
 signal.signal(signal.SIGINT, signal.SIG_IGN)
@@ -28,7 +28,7 @@ def main():
                 sys.exit(0)
             continue
 
-        resp = handle_request(req)
+        resp = dispatch(req)
         if resp is not None:
             if not write_json(resp):
                 sys.exit(0)
diff --git a/tui_gateway/server.py b/tui_gateway/server.py
index 536136e2d3..cc3b3f8aa5 100644
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@@ -1,4 +1,5 @@
 import atexit
+import concurrent.futures
 import copy
 import json
 import os
@@ -15,10 +16,13 @@ from hermes_constants import get_hermes_home
 from hermes_cli.env_loader import load_hermes_dotenv
 
 _hermes_home = get_hermes_home()
-load_hermes_dotenv(hermes_home=_hermes_home, project_env=Path(__file__).parent.parent / ".env")
+load_hermes_dotenv(
+    hermes_home=_hermes_home, project_env=Path(__file__).parent.parent / ".env"
+)
 
 try:
     from hermes_cli.banner import prefetch_update_check
+
     prefetch_update_check()
 except Exception:
     pass
@@ -27,14 +31,42 @@ from tui_gateway.render import make_stream_renderer, render_diff, render_message
 
 _sessions: dict[str, dict] = {}
 _methods: dict[str, callable] = {}
-_pending: dict[str, threading.Event] = {}
+_pending: dict[str, tuple[str, threading.Event]] = {}
 _answers: dict[str, str] = {}
 _db = None
 _stdout_lock = threading.Lock()
 _cfg_lock = threading.Lock()
 _cfg_cache: dict | None = None
 _cfg_mtime: float | None = None
-_SLASH_WORKER_TIMEOUT_S = max(5.0, float(os.environ.get("HERMES_TUI_SLASH_TIMEOUT_S", "45") or 45))
+_SLASH_WORKER_TIMEOUT_S = max(
+    5.0, float(os.environ.get("HERMES_TUI_SLASH_TIMEOUT_S", "45") or 45)
+)
+
+# ── Async RPC dispatch (#12546) ──────────────────────────────────────
+# A handful of handlers block the dispatcher loop in entry.py for seconds
+# to minutes (slash.exec, cli.exec, shell.exec, session.resume,
+# session.branch, skills.manage).  While they're running, inbound RPCs —
+# notably approval.respond and session.interrupt — sit unread in the
+# stdin pipe.  We route only those slow handlers onto a small thread pool;
+# everything else stays on the main thread so ordering stays sane for the
+# fast path.  write_json is already _stdout_lock-guarded, so concurrent
+# response writes are safe.
+_LONG_HANDLERS = frozenset(
+    {
+        "cli.exec",
+        "session.branch",
+        "session.resume",
+        "shell.exec",
+        "skills.manage",
+        "slash.exec",
+    }
+)
+
+_pool = concurrent.futures.ThreadPoolExecutor(
+    max_workers=max(2, int(os.environ.get("HERMES_TUI_RPC_POOL_WORKERS", "4") or 4)),
+    thread_name_prefix="tui-rpc",
+)
+atexit.register(lambda: _pool.shutdown(wait=False, cancel_futures=True))
 
 # Reserve real stdout for JSON-RPC only; redirect Python's stdout to stderr
 # so stray print() from libraries/tools becomes harmless gateway.stderr instead
@@ -52,19 +84,31 @@ class _SlashWorker:
         self.stderr_tail: list[str] = []
         self.stdout_queue: queue.Queue[dict | None] = queue.Queue()
 
-        argv = [sys.executable, "-m", "tui_gateway.slash_worker", "--session-key", session_key]
+        argv = [
+            sys.executable,
+            "-m",
+            "tui_gateway.slash_worker",
+            "--session-key",
+            session_key,
+        ]
         if model:
             argv += ["--model", model]
 
         self.proc = subprocess.Popen(
-            argv, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
-            text=True, bufsize=1, cwd=os.getcwd(), env=os.environ.copy(),
+            argv,
+            stdin=subprocess.PIPE,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            text=True,
+            bufsize=1,
+            cwd=os.getcwd(),
+            env=os.environ.copy(),
         )
         threading.Thread(target=self._drain_stdout, daemon=True).start()
         threading.Thread(target=self._drain_stderr, daemon=True).start()
 
     def _drain_stdout(self):
-        for line in (self.proc.stdout or []):
+        for line in self.proc.stdout or []:
             try:
                 self.stdout_queue.put(json.loads(line))
             except json.JSONDecodeError:
@@ -72,7 +116,7 @@ class _SlashWorker:
         self.stdout_queue.put(None)
 
     def _drain_stderr(self):
-        for line in (self.proc.stderr or []):
+        for line in self.proc.stderr or []:
             if text := line.rstrip("\n"):
                 self.stderr_tail = (self.stderr_tail + [text])[-80:]
 
@@ -99,7 +143,9 @@ class _SlashWorker:
                     raise RuntimeError(msg.get("error", "slash worker failed"))
                 return str(msg.get("output", "")).rstrip()
 
-            raise RuntimeError(f"slash worker closed pipe{': ' + chr(10).join(self.stderr_tail[-8:]) if self.stderr_tail else ''}")
+            raise RuntimeError(
+                f"slash worker closed pipe{': ' + chr(10).join(self.stderr_tail[-8:]) if self.stderr_tail else ''}"
+            )
 
     def close(self):
         try:
@@ -107,22 +153,27 @@ class _SlashWorker:
                 self.proc.terminate()
                 self.proc.wait(timeout=1)
         except Exception:
-            try: self.proc.kill()
-            except Exception: pass
+            try:
+                self.proc.kill()
+            except Exception:
+                pass
 
 
-atexit.register(lambda: [
-    s.get("slash_worker") and s["slash_worker"].close()
-    for s in _sessions.values()
-])
+atexit.register(
+    lambda: [
+        s.get("slash_worker") and s["slash_worker"].close() for s in _sessions.values()
+    ]
+)
 
 
 # ── Plumbing ──────────────────────────────────────────────────────────
 
+
 def _get_db():
     global _db
     if _db is None:
         from hermes_state import SessionDB
+
         _db = SessionDB()
     return _db
 
@@ -149,7 +200,11 @@ def _status_update(sid: str, kind: str, text: str | None = None):
     body = (text if text is not None else kind).strip()
     if not body:
         return
-    _emit("status.update", sid, {"kind": kind if text is not None else "status", "text": body})
+    _emit(
+        "status.update",
+        sid,
+        {"kind": kind if text is not None else "status", "text": body},
+    )
 
 
 def _estimate_image_tokens(width: int, height: int) -> int:
@@ -190,6 +245,7 @@ def method(name: str):
     def dec(fn):
         _methods[name] = fn
         return fn
+
     return dec
 
 
@@ -200,6 +256,29 @@ def handle_request(req: dict) -> dict | None:
     return fn(req.get("id"), req.get("params", {}))
 
 
+def dispatch(req: dict) -> dict | None:
+    """Route inbound RPCs — long handlers to the pool, everything else inline.
+
+    Returns a response dict when handled inline. Returns None when the
+    handler was scheduled on the pool; the worker writes its own
+    response via write_json when done.
+    """
+    if req.get("method") not in _LONG_HANDLERS:
+        return handle_request(req)
+
+    def run():
+        try:
+            resp = handle_request(req)
+        except Exception as exc:
+            resp = _err(req.get("id"), -32000, f"handler error: {exc}")
+        if resp is not None:
+            write_json(resp)
+
+    _pool.submit(run)
+
+    return None
+
+
 def _wait_agent(session: dict, rid: str, timeout: float = 30.0) -> dict | None:
     ready = session.get("agent_ready")
     if ready is not None and not ready.wait(timeout=timeout):
@@ -222,17 +301,24 @@ def _normalize_completion_path(path_part: str) -> str:
     expanded = os.path.expanduser(path_part)
     if os.name != "nt":
         normalized = expanded.replace("\\", "/")
-        if len(normalized) >= 3 and normalized[1] == ":" and normalized[2] == "/" and normalized[0].isalpha():
+        if (
+            len(normalized) >= 3
+            and normalized[1] == ":"
+            and normalized[2] == "/"
+            and normalized[0].isalpha()
+        ):
             return f"/mnt/{normalized[0].lower()}/{normalized[3:]}"
     return expanded
 
 
 # ── Config I/O ────────────────────────────────────────────────────────
 
+
 def _load_cfg() -> dict:
     global _cfg_cache, _cfg_mtime
     try:
         import yaml
+
         p = _hermes_home / "config.yaml"
         mtime = p.stat().st_mtime if p.exists() else None
         with _cfg_lock:
@@ -255,6 +341,7 @@ def _load_cfg() -> dict:
 def _save_cfg(cfg: dict):
     global _cfg_cache, _cfg_mtime
     import yaml
+
     path = _hermes_home / "config.yaml"
     with open(path, "w") as f:
         yaml.safe_dump(cfg, f)
@@ -269,6 +356,7 @@ def _save_cfg(cfg: dict):
 def _set_session_context(session_key: str) -> list:
     try:
         from gateway.session_context import set_session_vars
+
         return set_session_vars(session_key=session_key)
     except Exception:
         return []
@@ -279,6 +367,7 @@ def _clear_session_context(tokens: list) -> None:
         return
     try:
         from gateway.session_context import clear_session_vars
+
         clear_session_vars(tokens)
     except Exception:
         pass
@@ -293,10 +382,11 @@ def _enable_gateway_prompts() -> None:
 
 # ── Blocking prompt factory ──────────────────────────────────────────
 
+
 def _block(event: str, sid: str, payload: dict, timeout: int = 300) -> str:
     rid = uuid.uuid4().hex[:8]
     ev = threading.Event()
-    _pending[rid] = ev
+    _pending[rid] = (sid, ev)
     payload["request_id"] = rid
     _emit(event, sid, payload)
     ev.wait(timeout=timeout)
@@ -304,17 +394,28 @@ def _block(event: str, sid: str, payload: dict, timeout: int = 300) -> str:
     return _answers.pop(rid, "")
 
 
-def _clear_pending():
-    for rid, ev in list(_pending.items()):
-        _answers[rid] = ""
-        ev.set()
+def _clear_pending(sid: str | None = None) -> None:
+    """Release pending prompts with an empty answer.
+
+    When *sid* is provided, only prompts owned by that session are
+    released — critical for session.interrupt, which must not
+    collaterally cancel clarify/sudo/secret prompts on unrelated
+    sessions sharing the same tui_gateway process.  When *sid* is
+    None, every pending prompt is released (used during shutdown).
+    """
+    for rid, (owner_sid, ev) in list(_pending.items()):
+        if sid is None or owner_sid == sid:
+            _answers[rid] = ""
+            ev.set()
 
 
 # ── Agent factory ────────────────────────────────────────────────────
 
+
 def resolve_skin() -> dict:
     try:
         from hermes_cli.skin_engine import init_skin_from_config, get_active_skin
+
         init_skin_from_config(_load_cfg())
         skin = get_active_skin()
         return {
@@ -362,7 +463,9 @@ def _load_reasoning_config() -> dict | None:
 
 
 def _load_service_tier() -> str | None:
-    raw = str(_load_cfg().get("agent", {}).get("service_tier", "") or "").strip().lower()
+    raw = (
+        str(_load_cfg().get("agent", {}).get("service_tier", "") or "").strip().lower()
+    )
     if not raw or raw in {"normal", "default", "standard", "off", "none"}:
         return None
     if raw in {"fast", "priority", "on"}:
@@ -389,7 +492,9 @@ def _load_enabled_toolsets() -> list[str] | None:
         from hermes_cli.config import load_config
         from hermes_cli.tools_config import _get_platform_tools
 
-        enabled = sorted(_get_platform_tools(load_config(), "cli", include_default_mcp_servers=False))
+        enabled = sorted(
+            _get_platform_tools(load_config(), "cli", include_default_mcp_servers=False)
+        )
         return enabled or None
     except Exception:
         return None
@@ -411,7 +516,10 @@ def _restart_slash_worker(session: dict):
         except Exception:
             pass
     try:
-        session["slash_worker"] = _SlashWorker(session["session_key"], getattr(session.get("agent"), "model", _resolve_model()))
+        session["slash_worker"] = _SlashWorker(
+            session["session_key"],
+            getattr(session.get("agent"), "model", _resolve_model()),
+        )
     except Exception:
         session["slash_worker"] = None
 
@@ -479,12 +587,20 @@ def _apply_model_switch(sid: str, session: dict, raw_input: str) -> dict:
         _emit("session.info", sid, _session_info(agent))
 
     os.environ["HERMES_MODEL"] = result.new_model
+    # Keep the process-level provider env var in sync with the user's explicit
+    # choice so any ambient re-resolution (credential pool refresh, compressor
+    # rebuild, aux clients) resolves to the new provider instead of the
+    # original one persisted in config or env.
+    if result.target_provider:
+        os.environ["HERMES_INFERENCE_PROVIDER"] = result.target_provider
     if persist_global:
         _persist_model_switch(result)
     return {"value": result.new_model, "warning": result.warning_message or ""}
 
 
-def _compress_session_history(session: dict, focus_topic: str | None = None) -> tuple[int, dict]:
+def _compress_session_history(
+    session: dict, focus_topic: str | None = None
+) -> tuple[int, dict]:
     from agent.model_metadata import estimate_messages_tokens_rough
 
     agent = session["agent"]
@@ -527,6 +643,7 @@ def _get_usage(agent) -> dict:
         usage["compressions"] = getattr(comp, "compression_count", 0) or 0
     try:
         from agent.usage_pricing import CanonicalUsage, estimate_usage_cost
+
         cost = estimate_usage_cost(
             usage["model"],
             CanonicalUsage(
@@ -572,30 +689,37 @@ def _session_info(agent) -> dict:
     }
     try:
         from hermes_cli import __version__, __release_date__
+
         info["version"] = __version__
         info["release_date"] = __release_date__
     except Exception:
         pass
     try:
         from model_tools import get_toolset_for_tool
+
         for t in getattr(agent, "tools", []) or []:
             name = t["function"]["name"]
-            info["tools"].setdefault(get_toolset_for_tool(name) or "other", []).append(name)
+            info["tools"].setdefault(get_toolset_for_tool(name) or "other", []).append(
+                name
+            )
     except Exception:
         pass
     try:
         from hermes_cli.banner import get_available_skills
+
         info["skills"] = get_available_skills()
     except Exception:
         pass
     try:
         from tools.mcp_tool import get_mcp_status
+
         info["mcp_servers"] = get_mcp_status()
     except Exception:
         info["mcp_servers"] = []
     try:
         from hermes_cli.banner import get_update_result
         from hermes_cli.config import recommended_update_command
+
         info["update_behind"] = get_update_result(timeout=0.5)
         info["update_command"] = recommended_update_command()
     except Exception:
@@ -606,6 +730,7 @@ def _session_info(agent) -> dict:
 def _tool_ctx(name: str, args: dict) -> str:
     try:
         from agent.display import build_tool_preview
+
         return build_tool_preview(name, args, max_len=80) or ""
     except Exception:
         return ""
@@ -667,7 +792,11 @@ def _on_tool_start(sid: str, tool_call_id: str, name: str, args: dict):
             pass
         session.setdefault("tool_started_at", {})[tool_call_id] = time.time()
     if _tool_progress_enabled(sid):
-        _emit("tool.start", sid, {"tool_id": tool_call_id, "name": name, "context": _tool_ctx(name, args)})
+        _emit(
+            "tool.start",
+            sid,
+            {"tool_id": tool_call_id, "name": name, "context": _tool_ctx(name, args)},
+        )
 
 
 def _on_tool_complete(sid: str, tool_call_id: str, name: str, args: dict, result: str):
@@ -688,7 +817,13 @@ def _on_tool_complete(sid: str, tool_call_id: str, name: str, args: dict, result
         from agent.display import render_edit_diff_with_delta
 
         rendered: list[str] = []
-        if render_edit_diff_with_delta(name, result, function_args=args, snapshot=snapshot, print_fn=rendered.append):
+        if render_edit_diff_with_delta(
+            name,
+            result,
+            function_args=args,
+            snapshot=snapshot,
+            print_fn=rendered.append,
+        ):
             payload["inline_diff"] = "\n".join(rendered)
     except Exception:
         pass
@@ -718,6 +853,44 @@ def _on_tool_progress(
             "task_count": int(_kwargs.get("task_count") or 1),
             "task_index": int(_kwargs.get("task_index") or 0),
         }
+        # Identity fields for the TUI spawn tree.  All optional — older
+        # emitters that omit them fall back to flat rendering client-side.
+        if _kwargs.get("subagent_id"):
+            payload["subagent_id"] = str(_kwargs["subagent_id"])
+        if _kwargs.get("parent_id"):
+            payload["parent_id"] = str(_kwargs["parent_id"])
+        if _kwargs.get("depth") is not None:
+            payload["depth"] = int(_kwargs["depth"])
+        if _kwargs.get("model"):
+            payload["model"] = str(_kwargs["model"])
+        if _kwargs.get("tool_count") is not None:
+            payload["tool_count"] = int(_kwargs["tool_count"])
+        if _kwargs.get("toolsets"):
+            payload["toolsets"] = [str(t) for t in _kwargs["toolsets"]]
+        # Per-branch rollups emitted on subagent.complete (features 1+2+4).
+        for int_key in (
+            "input_tokens",
+            "output_tokens",
+            "reasoning_tokens",
+            "api_calls",
+        ):
+            val = _kwargs.get(int_key)
+            if val is not None:
+                try:
+                    payload[int_key] = int(val)
+                except (TypeError, ValueError):
+                    pass
+        if _kwargs.get("cost_usd") is not None:
+            try:
+                payload["cost_usd"] = float(_kwargs["cost_usd"])
+            except (TypeError, ValueError):
+                pass
+        if _kwargs.get("files_read"):
+            payload["files_read"] = [str(p) for p in _kwargs["files_read"]]
+        if _kwargs.get("files_written"):
+            payload["files_written"] = [str(p) for p in _kwargs["files_written"]]
+        if _kwargs.get("output_tail"):
+            payload["output_tail"] = list(_kwargs["output_tail"])  # list of dicts
         if name:
             payload["tool_name"] = str(name)
         if preview:
@@ -736,16 +909,25 @@ def _on_tool_progress(
 
 def _agent_cbs(sid: str) -> dict:
     return dict(
-        tool_start_callback=lambda tc_id, name, args: _on_tool_start(sid, tc_id, name, args),
-        tool_complete_callback=lambda tc_id, name, args, result: _on_tool_complete(sid, tc_id, name, args, result),
+        tool_start_callback=lambda tc_id, name, args: _on_tool_start(
+            sid, tc_id, name, args
+        ),
+        tool_complete_callback=lambda tc_id, name, args, result: _on_tool_complete(
+            sid, tc_id, name, args, result
+        ),
         tool_progress_callback=lambda event_type, name=None, preview=None, args=None, **kwargs: _on_tool_progress(
             sid, event_type, name, preview, args, **kwargs
         ),
-        tool_gen_callback=lambda name: _tool_progress_enabled(sid) and _emit("tool.generating", sid, {"name": name}),
+        tool_gen_callback=lambda name: _tool_progress_enabled(sid)
+        and _emit("tool.generating", sid, {"name": name}),
         thinking_callback=lambda text: _emit("thinking.delta", sid, {"text": text}),
         reasoning_callback=lambda text: _emit("reasoning.delta", sid, {"text": text}),
-        status_callback=lambda kind, text=None: _status_update(sid, str(kind), None if text is None else str(text)),
-        clarify_callback=lambda q, c: _block("clarify.request", sid, {"question": q, "choices": c}),
+        status_callback=lambda kind, text=None: _status_update(
+            sid, str(kind), None if text is None else str(text)
+        ),
+        clarify_callback=lambda q, c: _block(
+            "clarify.request", sid, {"question": q, "choices": c}
+        ),
     )
 
 
@@ -761,9 +943,20 @@ def _wire_callbacks(sid: str):
             pl["metadata"] = metadata
         val = _block("secret.request", sid, pl)
         if not val:
-            return {"success": True, "stored_as": env_var, "validated": False, "skipped": True, "message": "skipped"}
+            return {
+                "success": True,
+                "stored_as": env_var,
+                "validated": False,
+                "skipped": True,
+                "message": "skipped",
+            }
         from hermes_cli.config import save_env_value_secure
-        return {**save_env_value_secure(env_var, val), "skipped": False, "message": "ok"}
+
+        return {
+            **save_env_value_secure(env_var, val),
+            "skipped": False,
+            "message": "ok",
+        }
 
     set_secret_capture_callback(secret_cb)
 
@@ -836,7 +1029,9 @@ def _validate_personality(value: str, cfg: dict | None = None) -> tuple[str, str
     return name, _render_personality_prompt(personalities[name])
 
 
-def _apply_personality_to_session(sid: str, session: dict, new_prompt: str) -> tuple[bool, dict | None]:
+def _apply_personality_to_session(
+    sid: str, session: dict, new_prompt: str
+) -> tuple[bool, dict | None]:
     if not session:
         return False, None
 
@@ -866,18 +1061,23 @@ def _background_agent_kwargs(agent, task_id: str) -> dict:
         "acp_args": getattr(agent, "acp_args", None) or None,
         "model": getattr(agent, "model", None) or _resolve_model(),
         "max_iterations": int(cfg.get("max_turns", 25) or 25),
-        "enabled_toolsets": getattr(agent, "enabled_toolsets", None) or _load_enabled_toolsets(),
+        "enabled_toolsets": getattr(agent, "enabled_toolsets", None)
+        or _load_enabled_toolsets(),
         "quiet_mode": True,
         "verbose_logging": False,
-        "ephemeral_system_prompt": getattr(agent, "ephemeral_system_prompt", None) or None,
+        "ephemeral_system_prompt": getattr(agent, "ephemeral_system_prompt", None)
+        or None,
         "providers_allowed": getattr(agent, "providers_allowed", None),
         "providers_ignored": getattr(agent, "providers_ignored", None),
         "providers_order": getattr(agent, "providers_order", None),
         "provider_sort": getattr(agent, "provider_sort", None),
-        "provider_require_parameters": getattr(agent, "provider_require_parameters", False),
+        "provider_require_parameters": getattr(
+            agent, "provider_require_parameters", False
+        ),
         "provider_data_collection": getattr(agent, "provider_data_collection", None),
         "session_id": task_id,
-        "reasoning_config": getattr(agent, "reasoning_config", None) or _load_reasoning_config(),
+        "reasoning_config": getattr(agent, "reasoning_config", None)
+        or _load_reasoning_config(),
         "service_tier": getattr(agent, "service_tier", None) or _load_service_tier(),
         "request_overrides": dict(getattr(agent, "request_overrides", {}) or {}),
         "platform": "tui",
@@ -889,7 +1089,9 @@ def _background_agent_kwargs(agent, task_id: str) -> dict:
 def _reset_session_agent(sid: str, session: dict) -> dict:
     tokens = _set_session_context(session["session_key"])
     try:
-        new_agent = _make_agent(sid, session["session_key"], session_id=session["session_key"])
+        new_agent = _make_agent(
+            sid, session["session_key"], session_id=session["session_key"]
+        )
     finally:
         _clear_session_context(tokens)
     session["agent"] = new_agent
@@ -912,6 +1114,7 @@ def _reset_session_agent(sid: str, session: dict) -> dict:
 def _make_agent(sid: str, key: str, session_id: str | None = None):
     from run_agent import AIAgent
     from hermes_cli.runtime_provider import resolve_runtime_provider
+
     cfg = _load_cfg()
     system_prompt = cfg.get("agent", {}).get("system_prompt", "") or ""
     if not system_prompt:
@@ -932,7 +1135,8 @@ def _make_agent(sid: str, key: str, session_id: str | None = None):
         service_tier=_load_service_tier(),
         enabled_toolsets=_load_enabled_toolsets(),
         platform="tui",
-        session_id=session_id or key, session_db=_get_db(),
+        session_id=session_id or key,
+        session_db=_get_db(),
         ephemeral_system_prompt=system_prompt or None,
         **_agent_cbs(sid),
     )
@@ -956,12 +1160,15 @@ def _init_session(sid: str, key: str, agent, history: list, cols: int = 80):
         "tool_started_at": {},
     }
     try:
-        _sessions[sid]["slash_worker"] = _SlashWorker(key, getattr(agent, "model", _resolve_model()))
+        _sessions[sid]["slash_worker"] = _SlashWorker(
+            key, getattr(agent, "model", _resolve_model())
+        )
     except Exception:
         # Defer hard-failure to slash.exec; chat still works without slash worker.
         _sessions[sid]["slash_worker"] = None
     try:
         from tools.approval import register_gateway_notify, load_permanent_allowlist
+
         register_gateway_notify(key, lambda data: _emit("approval.request", sid, data))
         load_permanent_allowlist()
     except Exception:
@@ -1007,10 +1214,15 @@ def _enrich_with_attached_images(user_text: str, image_paths: list[str]) -> str:
             continue
         hint = f"[You can examine it with vision_analyze using image_url: {p}]"
         try:
-            r = _json.loads(asyncio.run(vision_analyze_tool(image_url=str(p), user_prompt=prompt)))
+            r = _json.loads(
+                asyncio.run(vision_analyze_tool(image_url=str(p), user_prompt=prompt))
+            )
             desc = r.get("analysis", "") if r.get("success") else None
-            parts.append(f"[The user attached an image:\n{desc}]\n{hint}" if desc
-                         else f"[The user attached an image but analysis failed.]\n{hint}")
+            parts.append(
+                f"[The user attached an image:\n{desc}]\n{hint}"
+                if desc
+                else f"[The user attached an image but analysis failed.]\n{hint}"
+            )
         except Exception:
             parts.append(f"[The user attached an image but analysis failed.]\n{hint}")
 
@@ -1048,7 +1260,9 @@ def _history_to_messages(history: list[dict]) -> list[dict]:
             tc_info = tool_call_args.get(tc_id) if tc_id else None
             name = (tc_info[0] if tc_info else None) or m.get("tool_name") or "tool"
             args = (tc_info[1] if tc_info else None) or {}
-            messages.append({"role": "tool", "name": name, "context": _tool_ctx(name, args)})
+            messages.append(
+                {"role": "tool", "name": name, "context": _tool_ctx(name, args)}
+            )
             continue
         if not (m.get("content") or "").strip():
             continue
@@ -1059,6 +1273,7 @@ def _history_to_messages(history: list[dict]) -> list[dict]:
 
 # ── Methods: session ─────────────────────────────────────────────────
 
+
 @method("session.create")
 def _(rid, params: dict) -> dict:
     sid = uuid.uuid4().hex[:8]
@@ -1088,7 +1303,23 @@ def _(rid, params: dict) -> dict:
     }
 
     def _build() -> None:
-        session = _sessions[sid]
+        session = _sessions.get(sid)
+        if session is None:
+            # session.close ran before the build thread got scheduled.
+            ready.set()
+            return
+
+        # Track what we allocate so we can clean up if session.close
+        # races us to the finish line.  session.close pops _sessions[sid]
+        # unconditionally and tries to close the slash_worker it finds;
+        # if _build is still mid-construction when close runs, close
+        # finds slash_worker=None / notify unregistered and returns
+        # cleanly — leaving us, the build thread, to later install the
+        # worker + notify on an orphaned session dict.  The finally
+        # block below detects the orphan and cleans up instead of
+        # leaking a subprocess and a global notify registration.
+        worker = None
+        notify_registered = False
         try:
             tokens = _set_session_context(key)
             try:
@@ -1100,13 +1331,21 @@ def _(rid, params: dict) -> dict:
             session["agent"] = agent
 
             try:
-                session["slash_worker"] = _SlashWorker(key, getattr(agent, "model", _resolve_model()))
+                worker = _SlashWorker(key, getattr(agent, "model", _resolve_model()))
+                session["slash_worker"] = worker
             except Exception:
                 pass
 
             try:
-                from tools.approval import register_gateway_notify, load_permanent_allowlist
-                register_gateway_notify(key, lambda data: _emit("approval.request", sid, data))
+                from tools.approval import (
+                    register_gateway_notify,
+                    load_permanent_allowlist,
+                )
+
+                register_gateway_notify(
+                    key, lambda data: _emit("approval.request", sid, data)
+                )
+                notify_registered = True
                 load_permanent_allowlist()
             except Exception:
                 pass
@@ -1122,36 +1361,89 @@ def _(rid, params: dict) -> dict:
             session["agent_error"] = str(e)
             _emit("error", sid, {"message": f"agent init failed: {e}"})
         finally:
+            # Orphan check: if session.close raced us and popped
+            # _sessions[sid] while we were building, the dict we just
+            # populated is unreachable.  Clean up the subprocess and
+            # the global notify registration ourselves — session.close
+            # couldn't see them at the time it ran.
+            if _sessions.get(sid) is not session:
+                if worker is not None:
+                    try:
+                        worker.close()
+                    except Exception:
+                        pass
+                if notify_registered:
+                    try:
+                        from tools.approval import unregister_gateway_notify
+
+                        unregister_gateway_notify(key)
+                    except Exception:
+                        pass
             ready.set()
 
     threading.Thread(target=_build, daemon=True).start()
 
-    return _ok(rid, {
-        "session_id": sid,
-        "info": {
-            "model": _resolve_model(),
-            "tools": {},
-            "skills": {},
-            "cwd": os.getenv("TERMINAL_CWD", os.getcwd()),
+    return _ok(
+        rid,
+        {
+            "session_id": sid,
+            "info": {
+                "model": _resolve_model(),
+                "tools": {},
+                "skills": {},
+                "cwd": os.getenv("TERMINAL_CWD", os.getcwd()),
+            },
         },
-    })
+    )
 
 
 @method("session.list")
 def _(rid, params: dict) -> dict:
     try:
-        db = _get_db()
-        # Show both TUI and CLI sessions — TUI is the successor to the CLI,
-        # so users expect to resume their old CLI sessions here too.
-        tui = db.list_sessions_rich(source="tui", limit=params.get("limit", 20))
-        cli = db.list_sessions_rich(source="cli", limit=params.get("limit", 20))
-        rows = sorted(tui + cli, key=lambda s: s.get("started_at") or 0, reverse=True)[:params.get("limit", 20)]
-        return _ok(rid, {"sessions": [
-            {"id": s["id"], "title": s.get("title") or "", "preview": s.get("preview") or "",
-             "started_at": s.get("started_at") or 0, "message_count": s.get("message_count") or 0,
-             "source": s.get("source") or ""}
-            for s in rows
-        ]})
+        # Resume picker should include human conversation surfaces beyond
+        # tui/cli (notably telegram from blitz row #7), but avoid internal
+        # sources that clutter the modal (tool/acp/etc).
+        allow = frozenset(
+            {
+                "cli",
+                "tui",
+                "telegram",
+                "discord",
+                "slack",
+                "whatsapp",
+                "wecom",
+                "weixin",
+                "feishu",
+                "signal",
+                "mattermost",
+                "matrix",
+                "qq",
+            }
+        )
+
+        limit = int(params.get("limit", 20) or 20)
+        fetch_limit = max(limit * 5, 100)
+        rows = [
+            s
+            for s in _get_db().list_sessions_rich(source=None, limit=fetch_limit)
+            if (s.get("source") or "").strip().lower() in allow
+        ][:limit]
+        return _ok(
+            rid,
+            {
+                "sessions": [
+                    {
+                        "id": s["id"],
+                        "title": s.get("title") or "",
+                        "preview": s.get("preview") or "",
+                        "started_at": s.get("started_at") or 0,
+                        "message_count": s.get("message_count") or 0,
+                        "source": s.get("source") or "",
+                    }
+                    for s in rows
+                ]
+            },
+        )
     except Exception as e:
         return _err(rid, 5006, str(e))
 
@@ -1202,7 +1494,9 @@ def _(rid, params: dict) -> dict:
         return err
     title, key = params.get("title", ""), session["session_key"]
     if not title:
-        return _ok(rid, {"title": _get_db().get_session_title(key) or "", "session_key": key})
+        return _ok(
+            rid, {"title": _get_db().get_session_title(key) or "", "session_key": key}
+        )
     try:
         _get_db().set_session_title(key, title)
         return _ok(rid, {"title": title})
@@ -1233,6 +1527,15 @@ def _(rid, params: dict) -> dict:
     session, err = _sess(params, rid)
     if err:
         return err
+    # Reject during an in-flight turn.  If we mutated history while
+    # the agent thread is running, prompt.submit's post-run history
+    # write would either clobber the undo (version matches) or
+    # silently drop the agent's output (version mismatch, see below).
+    # Neither is what the user wants — make them /interrupt first.
+    if session.get("running"):
+        return _err(
+            rid, 4009, "session busy — /interrupt the current turn before /undo"
+        )
     removed = 0
     with session["history_lock"]:
         history = session.get("history", [])
@@ -1252,13 +1555,28 @@ def _(rid, params: dict) -> dict:
     session, err = _sess(params, rid)
     if err:
         return err
+    if session.get("running"):
+        return _err(
+            rid, 4009, "session busy — /interrupt the current turn before /compress"
+        )
     try:
         with session["history_lock"]:
-            removed, usage = _compress_session_history(session, str(params.get("focus_topic", "") or "").strip())
+            removed, usage = _compress_session_history(
+                session, str(params.get("focus_topic", "") or "").strip()
+            )
             messages = list(session.get("history", []))
         info = _session_info(session["agent"])
         _emit("session.info", params.get("session_id", ""), info)
-        return _ok(rid, {"status": "compressed", "removed": removed, "usage": usage, "info": info, "messages": messages})
+        return _ok(
+            rid,
+            {
+                "status": "compressed",
+                "removed": removed,
+                "usage": usage,
+                "info": info,
+                "messages": messages,
+            },
+        )
     except Exception as e:
         return _err(rid, 5005, str(e))
 
@@ -1269,11 +1587,21 @@ def _(rid, params: dict) -> dict:
     if err:
         return err
     import time as _time
-    filename = os.path.abspath(f"hermes_conversation_{_time.strftime('%Y%m%d_%H%M%S')}.json")
+
+    filename = os.path.abspath(
+        f"hermes_conversation_{_time.strftime('%Y%m%d_%H%M%S')}.json"
+    )
     try:
         with open(filename, "w") as f:
-            json.dump({"model": getattr(session["agent"], "model", ""), "messages": session.get("history", [])},
-                      f, indent=2, ensure_ascii=False)
+            json.dump(
+                {
+                    "model": getattr(session["agent"], "model", ""),
+                    "messages": session.get("history", []),
+                },
+                f,
+                indent=2,
+                ensure_ascii=False,
+            )
         return _ok(rid, {"file": filename})
     except Exception as e:
         return _err(rid, 5011, str(e))
@@ -1318,10 +1646,20 @@ def _(rid, params: dict) -> dict:
             title = branch_name
         else:
             current = db.get_session_title(old_key) or "branch"
-            title = db.get_next_title_in_lineage(current) if hasattr(db, "get_next_title_in_lineage") else f"{current} (branch)"
-        db.create_session(new_key, source="tui", model=_resolve_model(), parent_session_id=old_key)
+            title = (
+                db.get_next_title_in_lineage(current)
+                if hasattr(db, "get_next_title_in_lineage")
+                else f"{current} (branch)"
+            )
+        db.create_session(
+            new_key, source="tui", model=_resolve_model(), parent_session_id=old_key
+        )
         for msg in history:
-            db.append_message(session_id=new_key, role=msg.get("role", "user"), content=msg.get("content"))
+            db.append_message(
+                session_id=new_key,
+                role=msg.get("role", "user"),
+                content=msg.get("content"),
+            )
         db.set_session_title(new_key, title)
     except Exception as e:
         return _err(rid, 5008, f"branch failed: {e}")
@@ -1332,7 +1670,9 @@ def _(rid, params: dict) -> dict:
             agent = _make_agent(new_sid, new_key, session_id=new_key)
         finally:
             _clear_session_context(tokens)
-        _init_session(new_sid, new_key, agent, list(history), cols=session.get("cols", 80))
+        _init_session(
+            new_sid, new_key, agent, list(history), cols=session.get("cols", 80)
+        )
     except Exception as e:
         return _err(rid, 5000, f"agent init failed on branch: {e}")
     return _ok(rid, {"session_id": new_sid, "title": title, "parent": old_key})
@@ -1345,15 +1685,246 @@ def _(rid, params: dict) -> dict:
         return err
     if hasattr(session["agent"], "interrupt"):
         session["agent"].interrupt()
-    _clear_pending()
+    # Scope the pending-prompt release to THIS session.  A global
+    # _clear_pending() would collaterally cancel clarify/sudo/secret
+    # prompts on unrelated sessions sharing the same tui_gateway
+    # process, silently resolving them to empty strings.
+    _clear_pending(params.get("session_id", ""))
     try:
         from tools.approval import resolve_gateway_approval
+
         resolve_gateway_approval(session["session_key"], "deny", resolve_all=True)
     except Exception:
         pass
     return _ok(rid, {"status": "interrupted"})
 
 
+# ── Delegation: subagent tree observability + controls ───────────────
+# Powers the TUI's /agents overlay (see ui-tui/src/components/agentsOverlay).
+# The registry lives in tools/delegate_tool — these handlers are thin
+# translators between JSON-RPC and the Python API.
+
+
+@method("delegation.status")
+def _(rid, params: dict) -> dict:
+    from tools.delegate_tool import (
+        is_spawn_paused,
+        list_active_subagents,
+        _get_max_concurrent_children,
+        _get_max_spawn_depth,
+    )
+
+    return _ok(
+        rid,
+        {
+            "active": list_active_subagents(),
+            "paused": is_spawn_paused(),
+            "max_spawn_depth": _get_max_spawn_depth(),
+            "max_concurrent_children": _get_max_concurrent_children(),
+        },
+    )
+
+
+@method("delegation.pause")
+def _(rid, params: dict) -> dict:
+    from tools.delegate_tool import set_spawn_paused
+
+    paused = bool(params.get("paused", True))
+    return _ok(rid, {"paused": set_spawn_paused(paused)})
+
+
+@method("subagent.interrupt")
+def _(rid, params: dict) -> dict:
+    from tools.delegate_tool import interrupt_subagent
+
+    subagent_id = str(params.get("subagent_id") or "").strip()
+    if not subagent_id:
+        return _err(rid, 4000, "subagent_id required")
+    ok = interrupt_subagent(subagent_id)
+    return _ok(rid, {"found": ok, "subagent_id": subagent_id})
+
+
+# ── Spawn-tree snapshots: TUI-written, disk-persisted ────────────────
+# The TUI is the source of truth for subagent state (it assembles payloads
+# from the event stream).  On turn-complete it posts the final tree here;
+# /replay and /replay-diff fetch past snapshots by session_id + filename.
+#
+# Layout:  $HERMES_HOME/spawn-trees/<session_id>/<timestamp>.json
+# Each file contains { session_id, started_at, finished_at, subagents: [...] }.
+
+
+def _spawn_trees_root():
+    from pathlib import Path as _P
+    from hermes_constants import get_hermes_home
+
+    root = get_hermes_home() / "spawn-trees"
+    root.mkdir(parents=True, exist_ok=True)
+    return root
+
+
+def _spawn_tree_session_dir(session_id: str):
+    safe = (
+        "".join(c if c.isalnum() or c in "-_" else "_" for c in session_id) or "unknown"
+    )
+    d = _spawn_trees_root() / safe
+    d.mkdir(parents=True, exist_ok=True)
+    return d
+
+
+# Per-session append-only index of lightweight snapshot metadata.  Read by
+# `spawn_tree.list` so scanning doesn't require reading every full snapshot
+# file (Copilot review on #14045).  One JSON object per line.
+_SPAWN_TREE_INDEX = "_index.jsonl"
+
+
+def _append_spawn_tree_index(session_dir, entry: dict) -> None:
+    try:
+        with (session_dir / _SPAWN_TREE_INDEX).open("a", encoding="utf-8") as f:
+            f.write(json.dumps(entry, ensure_ascii=False) + "\n")
+    except OSError as exc:
+        # Index is a cache — losing a line just means list() falls back
+        # to a directory scan for that entry.  Never block the save.
+        logger.debug("spawn_tree index append failed: %s", exc)
+
+
+def _read_spawn_tree_index(session_dir) -> list[dict]:
+    index_path = session_dir / _SPAWN_TREE_INDEX
+    if not index_path.exists():
+        return []
+    out: list[dict] = []
+    try:
+        with index_path.open("r", encoding="utf-8") as f:
+            for line in f:
+                line = line.strip()
+                if not line:
+                    continue
+                try:
+                    out.append(json.loads(line))
+                except json.JSONDecodeError:
+                    continue
+    except OSError:
+        return []
+    return out
+
+
+@method("spawn_tree.save")
+def _(rid, params: dict) -> dict:
+    session_id = str(params.get("session_id") or "").strip()
+    subagents = params.get("subagents") or []
+    if not isinstance(subagents, list) or not subagents:
+        return _err(rid, 4000, "subagents list required")
+
+    from datetime import datetime
+
+    started_at = params.get("started_at")
+    finished_at = params.get("finished_at") or time.time()
+    label = str(params.get("label") or "")
+    ts = datetime.utcfromtimestamp(float(finished_at)).strftime("%Y%m%dT%H%M%S")
+    fname = f"{ts}.json"
+    d = _spawn_tree_session_dir(session_id or "default")
+    path = d / fname
+    try:
+        payload = {
+            "session_id": session_id,
+            "started_at": float(started_at) if started_at else None,
+            "finished_at": float(finished_at),
+            "label": label,
+            "subagents": subagents,
+        }
+        path.write_text(json.dumps(payload, ensure_ascii=False), encoding="utf-8")
+    except OSError as exc:
+        return _err(rid, 5000, f"spawn_tree.save failed: {exc}")
+
+    _append_spawn_tree_index(
+        d,
+        {
+            "path": str(path),
+            "session_id": session_id,
+            "started_at": payload["started_at"],
+            "finished_at": payload["finished_at"],
+            "label": label,
+            "count": len(subagents),
+        },
+    )
+
+    return _ok(rid, {"path": str(path), "session_id": session_id})
+
+
+@method("spawn_tree.list")
+def _(rid, params: dict) -> dict:
+    session_id = str(params.get("session_id") or "").strip()
+    limit = int(params.get("limit") or 50)
+    cross_session = bool(params.get("cross_session"))
+
+    if cross_session:
+        root = _spawn_trees_root()
+        roots = [p for p in root.iterdir() if p.is_dir()]
+    else:
+        roots = [_spawn_tree_session_dir(session_id or "default")]
+
+    entries: list[dict] = []
+    for d in roots:
+        indexed = _read_spawn_tree_index(d)
+        if indexed:
+            # Skip index entries whose snapshot file was manually deleted.
+            entries.extend(
+                e for e in indexed if (p := e.get("path")) and Path(p).exists()
+            )
+            continue
+
+        # Fallback for legacy (pre-index) sessions: full scan.  O(N) reads
+        # but only runs once per session until the next save writes the index.
+        for p in d.glob("*.json"):
+            if p.name == _SPAWN_TREE_INDEX:
+                continue
+            try:
+                stat = p.stat()
+                try:
+                    raw = json.loads(p.read_text(encoding="utf-8"))
+                except Exception:
+                    raw = {}
+                subagents = raw.get("subagents") or []
+                entries.append(
+                    {
+                        "path": str(p),
+                        "session_id": raw.get("session_id") or d.name,
+                        "finished_at": raw.get("finished_at") or stat.st_mtime,
+                        "started_at": raw.get("started_at"),
+                        "label": raw.get("label") or "",
+                        "count": len(subagents) if isinstance(subagents, list) else 0,
+                    }
+                )
+            except OSError:
+                continue
+
+    entries.sort(key=lambda e: e.get("finished_at") or 0, reverse=True)
+    return _ok(rid, {"entries": entries[:limit]})
+
+
+@method("spawn_tree.load")
+def _(rid, params: dict) -> dict:
+    from pathlib import Path
+
+    raw_path = str(params.get("path") or "").strip()
+    if not raw_path:
+        return _err(rid, 4000, "path required")
+
+    # Reject paths escaping the spawn-trees root.
+    root = _spawn_trees_root().resolve()
+    try:
+        resolved = Path(raw_path).resolve()
+        resolved.relative_to(root)
+    except (ValueError, OSError) as exc:
+        return _err(rid, 4030, f"path outside spawn-trees root: {exc}")
+
+    try:
+        payload = json.loads(resolved.read_text(encoding="utf-8"))
+    except (OSError, json.JSONDecodeError) as exc:
+        return _err(rid, 5000, f"spawn_tree.load failed: {exc}")
+
+    return _ok(rid, payload)
+
+
 @method("session.steer")
 def _(rid, params: dict) -> dict:
     """Inject a user message into the next tool result without interrupting.
@@ -1390,6 +1961,7 @@ def _(rid, params: dict) -> dict:
 
 # ── Methods: prompt ──────────────────────────────────────────────────
 
+
 @method("prompt.submit")
 def _(rid, params: dict) -> dict:
     sid, text = params.get("session_id", ""), params.get("text", "")
@@ -1411,7 +1983,11 @@ def _(rid, params: dict) -> dict:
         approval_token = None
         session_tokens = []
         try:
-            from tools.approval import reset_current_session_key, set_current_session_key
+            from tools.approval import (
+                reset_current_session_key,
+                set_current_session_key,
+            )
+
             approval_token = set_current_session_key(session["session_key"])
             session_tokens = _set_session_context(session["session_key"])
             cols = session.get("cols", 80)
@@ -1434,7 +2010,14 @@ def _(rid, params: dict) -> dict:
                     context_length=ctx_len,
                 )
                 if ctx.blocked:
-                    _emit("error", sid, {"message": "\n".join(ctx.warnings) or "Context injection refused."})
+                    _emit(
+                        "error",
+                        sid,
+                        {
+                            "message": "\n".join(ctx.warnings)
+                            or "Context injection refused."
+                        },
+                    )
                     return
                 prompt = ctx.message
 
@@ -1447,19 +2030,45 @@ def _(rid, params: dict) -> dict:
                 _emit("message.delta", sid, payload)
 
             result = agent.run_conversation(
-                prompt, conversation_history=list(history),
+                prompt,
+                conversation_history=list(history),
                 stream_callback=_stream,
             )
 
             last_reasoning = None
+            status_note = None
             if isinstance(result, dict):
                 if isinstance(result.get("messages"), list):
                     with session["history_lock"]:
-                        if int(session.get("history_version", 0)) == history_version:
+                        current_version = int(session.get("history_version", 0))
+                        if current_version == history_version:
                             session["history"] = result["messages"]
                             session["history_version"] = history_version + 1
+                        else:
+                            # History mutated externally during the turn
+                            # (undo/compress/retry/rollback now guard on
+                            # session.running, but this is the defensive
+                            # backstop for any path that slips past).
+                            # Surface the desync rather than silently
+                            # dropping the agent's output — the UI can
+                            # show the response and warn that it was
+                            # not persisted.
+                            print(
+                                f"[tui_gateway] prompt.submit: history_version mismatch "
+                                f"(expected={history_version} current={current_version}) — "
+                                f"agent output NOT written to session history",
+                                file=sys.stderr,
+                            )
+                            status_note = (
+                                "History changed during this turn — the response above is visible "
+                                "but was not saved to session history."
+                            )
                 raw = result.get("final_response", "")
-                status = "interrupted" if result.get("interrupted") else "error" if result.get("error") else "complete"
+                status = (
+                    "interrupted"
+                    if result.get("interrupted")
+                    else "error" if result.get("error") else "complete"
+                )
                 lr = result.get("last_reasoning")
                 if isinstance(lr, str) and lr.strip():
                     last_reasoning = lr.strip()
@@ -1470,6 +2079,8 @@ def _(rid, params: dict) -> dict:
             payload = {"text": raw, "usage": _get_usage(agent), "status": status}
             if last_reasoning:
                 payload["reasoning"] = last_reasoning
+            if status_note:
+                payload["warning"] = status_note
             rendered = render_message(raw, cols)
             if rendered:
                 payload["rendered"] = rendered
@@ -1496,7 +2107,6 @@ def _(rid, params: dict) -> dict:
     if err:
         return err
     try:
-        from datetime import datetime
         from hermes_cli.clipboard import has_clipboard_image, save_clipboard_image
     except Exception as e:
         return _err(rid, 5027, f"clipboard unavailable: {e}")
@@ -1504,12 +2114,19 @@ def _(rid, params: dict) -> dict:
     session["image_counter"] = session.get("image_counter", 0) + 1
     img_dir = _hermes_home / "images"
     img_dir.mkdir(parents=True, exist_ok=True)
-    img_path = img_dir / f"clip_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{session['image_counter']}.png"
+    img_path = (
+        img_dir
+        / f"clip_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{session['image_counter']}.png"
+    )
 
     # Save-first: mirrors CLI keybinding path; more robust than has_image() precheck
     if not save_clipboard_image(img_path):
         session["image_counter"] = max(0, session["image_counter"] - 1)
-        msg = "Clipboard has image but extraction failed" if has_clipboard_image() else "No image found in clipboard"
+        msg = (
+            "Clipboard has image but extraction failed"
+            if has_clipboard_image()
+            else "No image found in clipboard"
+        )
         return _ok(rid, {"attached": False, "message": msg})
 
     session.setdefault("attached_images", []).append(str(img_path))
@@ -1533,12 +2150,22 @@ def _(rid, params: dict) -> dict:
     if not raw:
         return _err(rid, 4015, "path required")
     try:
-        from cli import _IMAGE_EXTENSIONS, _resolve_attachment_path, _split_path_input
+        from cli import (
+            _IMAGE_EXTENSIONS,
+            _detect_file_drop,
+            _resolve_attachment_path,
+            _split_path_input,
+        )
 
-        path_token, remainder = _split_path_input(raw)
-        image_path = _resolve_attachment_path(path_token)
-        if image_path is None:
-            return _err(rid, 4016, f"image not found: {path_token}")
+        dropped = _detect_file_drop(raw)
+        if dropped:
+            image_path = dropped["path"]
+            remainder = dropped["remainder"]
+        else:
+            path_token, remainder = _split_path_input(raw)
+            image_path = _resolve_attachment_path(path_token)
+            if image_path is None:
+                return _err(rid, 4016, f"image not found: {path_token}")
         if image_path.suffix.lower() not in _IMAGE_EXTENSIONS:
             return _err(rid, 4016, f"unsupported image: {image_path.name}")
         session.setdefault("attached_images", []).append(str(image_path))
@@ -1587,7 +2214,9 @@ def _(rid, params: dict) -> dict:
                 },
             )
 
-        text = f"[User attached file: {drop_path}]" + (f"\n{remainder}" if remainder else "")
+        text = f"[User attached file: {drop_path}]" + (
+            f"\n{remainder}" if remainder else ""
+        )
         return _ok(
             rid,
             {
@@ -1616,14 +2245,31 @@ def _(rid, params: dict) -> dict:
         session_tokens = _set_session_context(task_id)
         try:
             from run_agent import AIAgent
-            result = AIAgent(**_background_agent_kwargs(session["agent"], task_id)).run_conversation(
+
+            result = AIAgent(
+                **_background_agent_kwargs(session["agent"], task_id)
+            ).run_conversation(
                 user_message=text,
                 task_id=task_id,
             )
-            _emit("background.complete", parent, {"task_id": task_id,
-                  "text": result.get("final_response", str(result)) if isinstance(result, dict) else str(result)})
+            _emit(
+                "background.complete",
+                parent,
+                {
+                    "task_id": task_id,
+                    "text": (
+                        result.get("final_response", str(result))
+                        if isinstance(result, dict)
+                        else str(result)
+                    ),
+                },
+            )
         except Exception as e:
-            _emit("background.complete", parent, {"task_id": task_id, "text": f"error: {e}"})
+            _emit(
+                "background.complete",
+                parent,
+                {"task_id": task_id, "text": f"error: {e}"},
+            )
         finally:
             _clear_session_context(session_tokens)
 
@@ -1645,9 +2291,25 @@ def _(rid, params: dict) -> dict:
         session_tokens = _set_session_context(session["session_key"])
         try:
             from run_agent import AIAgent
-            result = AIAgent(model=_resolve_model(), quiet_mode=True, platform="tui",
-                             max_iterations=8, enabled_toolsets=[]).run_conversation(text, conversation_history=snapshot)
-            _emit("btw.complete", sid, {"text": result.get("final_response", str(result)) if isinstance(result, dict) else str(result)})
+
+            result = AIAgent(
+                model=_resolve_model(),
+                quiet_mode=True,
+                platform="tui",
+                max_iterations=8,
+                enabled_toolsets=[],
+            ).run_conversation(text, conversation_history=snapshot)
+            _emit(
+                "btw.complete",
+                sid,
+                {
+                    "text": (
+                        result.get("final_response", str(result))
+                        if isinstance(result, dict)
+                        else str(result)
+                    )
+                },
+            )
         except Exception as e:
             _emit("btw.complete", sid, {"text": f"error: {e}"})
         finally:
@@ -1659,11 +2321,13 @@ def _(rid, params: dict) -> dict:
 
 # ── Methods: respond ─────────────────────────────────────────────────
 
+
 def _respond(rid, params, key):
     r = params.get("request_id", "")
-    ev = _pending.get(r)
-    if not ev:
+    entry = _pending.get(r)
+    if not entry:
         return _err(rid, 4009, f"no pending {key} request")
+    _, ev = entry
     _answers[r] = params.get(key, "")
     ev.set()
     return _ok(rid, {"status": "ok"})
@@ -1673,14 +2337,17 @@ def _respond(rid, params, key):
 def _(rid, params: dict) -> dict:
     return _respond(rid, params, "answer")
 
+
 @method("sudo.respond")
 def _(rid, params: dict) -> dict:
     return _respond(rid, params, "password")
 
+
 @method("secret.respond")
 def _(rid, params: dict) -> dict:
     return _respond(rid, params, "value")
 
+
 @method("approval.respond")
 def _(rid, params: dict) -> dict:
     session, err = _sess(params, rid)
@@ -1688,14 +2355,24 @@ def _(rid, params: dict) -> dict:
         return err
     try:
         from tools.approval import resolve_gateway_approval
-        return _ok(rid, {"resolved": resolve_gateway_approval(
-            session["session_key"], params.get("choice", "deny"), resolve_all=params.get("all", False))})
+
+        return _ok(
+            rid,
+            {
+                "resolved": resolve_gateway_approval(
+                    session["session_key"],
+                    params.get("choice", "deny"),
+                    resolve_all=params.get("all", False),
+                )
+            },
+        )
     except Exception as e:
         return _err(rid, 5004, str(e))
 
 
 # ── Methods: config ──────────────────────────────────────────────────
 
+
 @method("config.set")
 def _(rid, params: dict) -> dict:
     key, value = params.get("key", ""), params.get("value", "")
@@ -1706,16 +2383,39 @@ def _(rid, params: dict) -> dict:
             if not value:
                 return _err(rid, 4002, "model value required")
             if session:
-                result = _apply_model_switch(params.get("session_id", ""), session, value)
+                # Reject during an in-flight turn.  agent.switch_model()
+                # mutates self.model / self.provider / self.base_url /
+                # self.client in place; the worker thread running
+                # agent.run_conversation is reading those on every
+                # iteration.  A mid-turn swap can send an HTTP request
+                # with the new base_url but old model (or vice versa),
+                # producing 400/404s the user never asked for.  Parity
+                # with the gateway's running-agent /model guard.
+                if session.get("running"):
+                    return _err(
+                        rid,
+                        4009,
+                        "session busy — /interrupt the current turn before switching models",
+                    )
+                result = _apply_model_switch(
+                    params.get("session_id", ""), session, value
+                )
             else:
                 result = _apply_model_switch("", {"agent": None}, value)
-            return _ok(rid, {"key": key, "value": result["value"], "warning": result["warning"]})
+            return _ok(
+                rid,
+                {"key": key, "value": result["value"], "warning": result["warning"]},
+            )
         except Exception as e:
             return _err(rid, 5001, str(e))
 
     if key == "verbose":
         cycle = ["off", "new", "all", "verbose"]
-        cur = session.get("tool_progress_mode", _load_tool_progress_mode()) if session else _load_tool_progress_mode()
+        cur = (
+            session.get("tool_progress_mode", _load_tool_progress_mode())
+            if session
+            else _load_tool_progress_mode()
+        )
         if value and value != "cycle":
             nv = str(value).strip().lower()
             if nv not in cycle:
@@ -1803,7 +2503,9 @@ def _(rid, params: dict) -> dict:
             return _err(rid, 4002, f"unknown thinking_mode: {value}")
         _write_config_key("display.thinking_mode", nv)
         # Backward compatibility bridge: keep details_mode aligned.
-        _write_config_key("display.details_mode", "expanded" if nv == "full" else "collapsed")
+        _write_config_key(
+            "display.details_mode", "expanded" if nv == "full" else "collapsed"
+        )
         return _ok(rid, {"key": key, "value": nv})
 
     if key in ("compact", "statusbar"):
@@ -1841,7 +2543,9 @@ def _(rid, params: dict) -> dict:
                 _write_config_key("display.personality", pname)
                 _write_config_key("agent.system_prompt", new_prompt)
                 nv = str(value or "default")
-                history_reset, info = _apply_personality_to_session(sid_key, session, new_prompt)
+                history_reset, info = _apply_personality_to_session(
+                    sid_key, session, new_prompt
+                )
             else:
                 _write_config_key(f"display.{key}", value)
                 nv = value
@@ -1865,31 +2569,56 @@ def _(rid, params: dict) -> dict:
     if key == "provider":
         try:
             from hermes_cli.models import list_available_providers, normalize_provider
+
             model = _resolve_model()
             parts = model.split("/", 1)
-            return _ok(rid, {"model": model, "provider": normalize_provider(parts[0]) if len(parts) > 1 else "unknown",
-                             "providers": list_available_providers()})
+            return _ok(
+                rid,
+                {
+                    "model": model,
+                    "provider": (
+                        normalize_provider(parts[0]) if len(parts) > 1 else "unknown"
+                    ),
+                    "providers": list_available_providers(),
+                },
+            )
         except Exception as e:
             return _err(rid, 5013, str(e))
     if key == "profile":
         from hermes_constants import display_hermes_home
+
         return _ok(rid, {"home": str(_hermes_home), "display": display_hermes_home()})
     if key == "full":
         return _ok(rid, {"config": _load_cfg()})
     if key == "prompt":
         return _ok(rid, {"prompt": _load_cfg().get("custom_prompt", "")})
     if key == "skin":
-        return _ok(rid, {"value": _load_cfg().get("display", {}).get("skin", "default")})
+        return _ok(
+            rid, {"value": _load_cfg().get("display", {}).get("skin", "default")}
+        )
     if key == "personality":
-        return _ok(rid, {"value": _load_cfg().get("display", {}).get("personality", "default")})
+        return _ok(
+            rid, {"value": _load_cfg().get("display", {}).get("personality", "default")}
+        )
     if key == "reasoning":
         cfg = _load_cfg()
         effort = str(cfg.get("agent", {}).get("reasoning_effort", "medium") or "medium")
-        display = "show" if bool(cfg.get("display", {}).get("show_reasoning", False)) else "hide"
+        display = (
+            "show"
+            if bool(cfg.get("display", {}).get("show_reasoning", False))
+            else "hide"
+        )
         return _ok(rid, {"value": effort, "display": display})
     if key == "details_mode":
         allowed_dm = frozenset({"hidden", "collapsed", "expanded"})
-        raw = str(_load_cfg().get("display", {}).get("details_mode", "collapsed") or "collapsed").strip().lower()
+        raw = (
+            str(
+                _load_cfg().get("display", {}).get("details_mode", "collapsed")
+                or "collapsed"
+            )
+            .strip()
+            .lower()
+        )
         nv = raw if raw in allowed_dm else "collapsed"
         return _ok(rid, {"value": nv})
     if key == "thinking_mode":
@@ -1899,7 +2628,14 @@ def _(rid, params: dict) -> dict:
         if raw in allowed_tm:
             nv = raw
         else:
-            dm = str(cfg.get("display", {}).get("details_mode", "collapsed") or "collapsed").strip().lower()
+            dm = (
+                str(
+                    cfg.get("display", {}).get("details_mode", "collapsed")
+                    or "collapsed"
+                )
+                .strip()
+                .lower()
+            )
             nv = "full" if dm == "expanded" else "collapsed"
         return _ok(rid, {"value": nv})
     if key == "compact":
@@ -1911,7 +2647,9 @@ def _(rid, params: dict) -> dict:
     if key == "mtime":
         cfg_path = _hermes_home / "config.yaml"
         try:
-            return _ok(rid, {"mtime": cfg_path.stat().st_mtime if cfg_path.exists() else 0})
+            return _ok(
+                rid, {"mtime": cfg_path.stat().st_mtime if cfg_path.exists() else 0}
+            )
         except Exception:
             return _ok(rid, {"mtime": 0})
     return _err(rid, 4002, f"unknown config key: {key}")
@@ -1921,6 +2659,7 @@ def _(rid, params: dict) -> dict:
 def _(rid, params: dict) -> dict:
     try:
         from hermes_cli.main import _has_any_provider_configured
+
         return _ok(rid, {"provider_configured": bool(_has_any_provider_configured())})
     except Exception as e:
         return _err(rid, 5016, str(e))
@@ -1928,10 +2667,12 @@ def _(rid, params: dict) -> dict:
 
 # ── Methods: tools & system ──────────────────────────────────────────
 
+
 @method("process.stop")
 def _(rid, params: dict) -> dict:
     try:
         from tools.process_registry import process_registry
+
         return _ok(rid, {"killed": process_registry.kill_all()})
     except Exception as e:
         return _err(rid, 5010, str(e))
@@ -1942,6 +2683,7 @@ def _(rid, params: dict) -> dict:
     session = _sessions.get(params.get("session_id", ""))
     try:
         from tools.mcp_tool import shutdown_mcp_servers, discover_mcp_tools
+
         shutdown_mcp_servers()
         discover_mcp_tools()
         if session:
@@ -1954,9 +2696,17 @@ def _(rid, params: dict) -> dict:
         return _err(rid, 5015, str(e))
 
 
-_TUI_HIDDEN: frozenset[str] = frozenset({
-    "sethome", "set-home", "update", "commands", "status", "approve", "deny",
-})
+_TUI_HIDDEN: frozenset[str] = frozenset(
+    {
+        "sethome",
+        "set-home",
+        "update",
+        "commands",
+        "status",
+        "approve",
+        "deny",
+    }
+)
 
 _TUI_EXTRA: list[tuple[str, str, str]] = [
     ("/compact", "Toggle compact display mode", "TUI"),
@@ -1966,16 +2716,26 @@ _TUI_EXTRA: list[tuple[str, str, str]] = [
 # Commands that queue messages onto _pending_input in the CLI.
 # In the TUI the slash worker subprocess has no reader for that queue,
 # so slash.exec rejects them → TUI falls through to command.dispatch.
-_PENDING_INPUT_COMMANDS: frozenset[str] = frozenset({
-    "retry", "queue", "q", "steer", "plan",
-})
+_PENDING_INPUT_COMMANDS: frozenset[str] = frozenset(
+    {
+        "retry",
+        "queue",
+        "q",
+        "steer",
+        "plan",
+    }
+)
 
 
 @method("commands.catalog")
 def _(rid, params: dict) -> dict:
     """Registry-backed slash metadata for the TUI — categorized, no aliases."""
     try:
-        from hermes_cli.commands import COMMAND_REGISTRY, SUBCOMMANDS, _build_description
+        from hermes_cli.commands import (
+            COMMAND_REGISTRY,
+            SUBCOMMANDS,
+            _build_description,
+        )
 
         all_pairs: list[list[str]] = []
         canon: dict[str, str] = {}
@@ -2039,6 +2799,7 @@ def _(rid, params: dict) -> dict:
         skill_count = 0
         try:
             from agent.skill_commands import scan_skill_commands
+
             for k, info in sorted(scan_skill_commands().items()):
                 d = str(info.get("description", "Skill"))
                 all_pairs.append([k, d[:120] + ("…" if len(d) > 120 else "")])
@@ -2050,14 +2811,17 @@ def _(rid, params: dict) -> dict:
             categories.append({"name": cat, "pairs": cat_map[cat]})
 
         sub = {k: v[:] for k, v in SUBCOMMANDS.items()}
-        return _ok(rid, {
-            "pairs": all_pairs,
-            "sub": sub,
-            "canon": canon,
-            "categories": categories,
-            "skill_count": skill_count,
-            "warning": warning,
-        })
+        return _ok(
+            rid,
+            {
+                "pairs": all_pairs,
+                "sub": sub,
+                "canon": canon,
+                "categories": categories,
+                "skill_count": skill_count,
+                "warning": warning,
+            },
+        )
     except Exception as e:
         return _err(rid, 5020, str(e))
 
@@ -2098,7 +2862,9 @@ def _(rid, params: dict) -> dict:
         )
         parts = [r.stdout or "", r.stderr or ""]
         out = "\n".join(p for p in parts if p).strip() or "(no output)"
-        return _ok(rid, {"blocked": False, "code": r.returncode, "output": out[:48_000]})
+        return _ok(
+            rid, {"blocked": False, "code": r.returncode, "output": out[:48_000]}
+        )
     except subprocess.TimeoutExpired:
         return _err(rid, 5016, "cli.exec: timeout")
     except Exception as e:
@@ -2109,9 +2875,17 @@ def _(rid, params: dict) -> dict:
 def _(rid, params: dict) -> dict:
     try:
         from hermes_cli.commands import resolve_command
+
         r = resolve_command(params.get("name", ""))
         if r:
-            return _ok(rid, {"canonical": r.name, "description": r.description, "category": r.category})
+            return _ok(
+                rid,
+                {
+                    "canonical": r.name,
+                    "description": r.description,
+                    "category": r.category,
+                },
+            )
         return _err(rid, 4011, f"unknown command: {params.get('name')}")
     except Exception as e:
         return _err(rid, 5012, str(e))
@@ -2120,6 +2894,7 @@ def _(rid, params: dict) -> dict:
 def _resolve_name(name: str) -> str:
     try:
         from hermes_cli.commands import resolve_command
+
         r = resolve_command(name)
         return r.name if r else name
     except Exception:
@@ -2138,16 +2913,31 @@ def _(rid, params: dict) -> dict:
     if name in qcmds:
         qc = qcmds[name]
         if qc.get("type") == "exec":
-            r = subprocess.run(qc.get("command", ""), shell=True, capture_output=True, text=True, timeout=30)
-            output = ((r.stdout or "") + ("\n" if r.stdout and r.stderr else "") + (r.stderr or "")).strip()[:4000]
+            r = subprocess.run(
+                qc.get("command", ""),
+                shell=True,
+                capture_output=True,
+                text=True,
+                timeout=30,
+            )
+            output = (
+                (r.stdout or "")
+                + ("\n" if r.stdout and r.stderr else "")
+                + (r.stderr or "")
+            ).strip()[:4000]
             if r.returncode != 0:
-                return _err(rid, 4018, output or f"quick command failed with exit code {r.returncode}")
+                return _err(
+                    rid,
+                    4018,
+                    output or f"quick command failed with exit code {r.returncode}",
+                )
             return _ok(rid, {"type": "exec", "output": output})
         if qc.get("type") == "alias":
             return _ok(rid, {"type": "alias", "target": qc.get("target", "")})
 
     try:
         from hermes_cli.plugins import get_plugin_command_handler
+
         handler = get_plugin_command_handler(name)
         if handler:
             return _ok(rid, {"type": "plugin", "output": str(handler(arg) or "")})
@@ -2155,13 +2945,26 @@ def _(rid, params: dict) -> dict:
         pass
 
     try:
-        from agent.skill_commands import scan_skill_commands, build_skill_invocation_message
+        from agent.skill_commands import (
+            scan_skill_commands,
+            build_skill_invocation_message,
+        )
+
         cmds = scan_skill_commands()
         key = f"/{name}"
         if key in cmds:
-            msg = build_skill_invocation_message(key, arg, task_id=session.get("session_key", "") if session else "")
+            msg = build_skill_invocation_message(
+                key, arg, task_id=session.get("session_key", "") if session else ""
+            )
             if msg:
-                return _ok(rid, {"type": "skill", "message": msg, "name": cmds[key].get("name", name)})
+                return _ok(
+                    rid,
+                    {
+                        "type": "skill",
+                        "message": msg,
+                        "name": cmds[key].get("name", name),
+                    },
+                )
     except Exception:
         pass
 
@@ -2177,6 +2980,10 @@ def _(rid, params: dict) -> dict:
     if name == "retry":
         if not session:
             return _err(rid, 4001, "no active session to retry")
+        if session.get("running"):
+            return _err(
+                rid, 4009, "session busy — /interrupt the current turn before /retry"
+            )
         history = session.get("history", [])
         if not history:
             return _err(rid, 4018, "no previous user message to retry")
@@ -2191,7 +2998,9 @@ def _(rid, params: dict) -> dict:
         content = history[last_user_idx].get("content", "")
         if isinstance(content, list):
             content = " ".join(
-                p.get("text", "") for p in content if isinstance(p, dict) and p.get("type") == "text"
+                p.get("text", "")
+                for p in content
+                if isinstance(p, dict) and p.get("type") == "text"
             )
         if not content:
             return _err(rid, 4018, "last user message is empty")
@@ -2210,7 +3019,13 @@ def _(rid, params: dict) -> dict:
             try:
                 accepted = agent.steer(arg)
                 if accepted:
-                    return _ok(rid, {"type": "exec", "output": f"⏩ Steer queued — arrives after the next tool call: {arg[:80]}{'...' if len(arg) > 80 else ''}"})
+                    return _ok(
+                        rid,
+                        {
+                            "type": "exec",
+                            "output": f"⏩ Steer queued — arrives after the next tool call: {arg[:80]}{'...' if len(arg) > 80 else ''}",
+                        },
+                    )
             except Exception:
                 pass
         # Fallback: no active run, treat as next-turn message
@@ -2218,11 +3033,16 @@ def _(rid, params: dict) -> dict:
 
     if name == "plan":
         try:
-            from agent.skill_commands import build_skill_invocation_message as _bsim, build_plan_path
+            from agent.skill_commands import (
+                build_skill_invocation_message as _bsim,
+                build_plan_path,
+            )
+
             user_instruction = arg or ""
             plan_path = build_plan_path(user_instruction)
             msg = _bsim(
-                "/plan", user_instruction,
+                "/plan",
+                user_instruction,
                 task_id=session.get("session_key", "") if session else "",
                 runtime_note=(
                     "Save the markdown plan with write_file to this exact relative path "
@@ -2241,6 +3061,7 @@ def _(rid, params: dict) -> dict:
 
 _paste_counter = 0
 
+
 @method("paste.collapse")
 def _(rid, params: dict) -> dict:
     global _paste_counter
@@ -2249,20 +3070,28 @@ def _(rid, params: dict) -> dict:
         return _err(rid, 4004, "empty paste")
 
     _paste_counter += 1
-    line_count = text.count('\n') + 1
+    line_count = text.count("\n") + 1
     paste_dir = _hermes_home / "pastes"
     paste_dir.mkdir(parents=True, exist_ok=True)
 
     from datetime import datetime
-    paste_file = paste_dir / f"paste_{_paste_counter}_{datetime.now().strftime('%H%M%S')}.txt"
+
+    paste_file = (
+        paste_dir / f"paste_{_paste_counter}_{datetime.now().strftime('%H%M%S')}.txt"
+    )
     paste_file.write_text(text, encoding="utf-8")
 
-    placeholder = f"[Pasted text #{_paste_counter}: {line_count} lines \u2192 {paste_file}]"
-    return _ok(rid, {"placeholder": placeholder, "path": str(paste_file), "lines": line_count})
+    placeholder = (
+        f"[Pasted text #{_paste_counter}: {line_count} lines \u2192 {paste_file}]"
+    )
+    return _ok(
+        rid, {"placeholder": placeholder, "path": str(paste_file), "lines": line_count}
+    )
 
 
 # ── Methods: complete ─────────────────────────────────────────────────
 
+
 @method("complete.path")
 def _(rid, params: dict) -> dict:
     word = params.get("word", "")
@@ -2285,15 +3114,22 @@ def _(rid, params: dict) -> dict:
             ]
             return _ok(rid, {"items": items})
 
-        if is_context and query.startswith(("file:", "folder:")):
-            prefix_tag = query.split(":", 1)[0]
-            path_part = query.split(":", 1)[1] or "."
+        # Accept both `@folder:path` and the bare `@folder` form so the user
+        # sees directory listings as soon as they finish typing the keyword,
+        # without first accepting the static `@folder:` hint.
+        if is_context and query in ("file", "folder"):
+            prefix_tag, path_part = query, ""
+        elif is_context and query.startswith(("file:", "folder:")):
+            prefix_tag, _, tail = query.partition(":")
+            path_part = tail
         else:
             prefix_tag = ""
-            path_part = query if not is_context else query
+            path_part = query if is_context else query
 
-        expanded = _normalize_completion_path(path_part)
-        if expanded.endswith("/"):
+        expanded = _normalize_completion_path(path_part) if path_part else "."
+        if expanded == "." or not expanded:
+            search_dir, match = ".", ""
+        elif expanded.endswith("/"):
             search_dir, match = expanded, ""
         else:
             search_dir = os.path.dirname(expanded) or "."
@@ -2302,6 +3138,7 @@ def _(rid, params: dict) -> dict:
         if not os.path.isdir(search_dir):
             return _ok(rid, {"items": []})
 
+        want_dir = prefix_tag == "folder"
         match_lower = match.lower()
         for entry in sorted(os.listdir(search_dir)):
             if match and not entry.lower().startswith(match_lower):
@@ -2310,6 +3147,11 @@ def _(rid, params: dict) -> dict:
                 continue
             full = os.path.join(search_dir, entry)
             is_dir = os.path.isdir(full)
+            # Explicit `@folder:` / `@file:` — honour the user's filter.  Skip
+            # the opposite kind instead of auto-rewriting the completion tag,
+            # which used to defeat the prefix and let `@folder:` list files.
+            if prefix_tag and want_dir != is_dir:
+                continue
             rel = os.path.relpath(full)
             suffix = "/" if is_dir else ""
 
@@ -2325,7 +3167,13 @@ def _(rid, params: dict) -> dict:
             else:
                 text = rel + suffix
 
-            items.append({"text": text, "display": entry + suffix, "meta": "dir" if is_dir else ""})
+            items.append(
+                {
+                    "text": text,
+                    "display": entry + suffix,
+                    "meta": "dir" if is_dir else "",
+                }
+            )
             if len(items) >= 30:
                 break
     except Exception as e:
@@ -2347,22 +3195,40 @@ def _(rid, params: dict) -> dict:
 
         from agent.skill_commands import get_skill_commands
 
-        completer = SlashCommandCompleter(skill_commands_provider=lambda: get_skill_commands())
+        completer = SlashCommandCompleter(
+            skill_commands_provider=lambda: get_skill_commands()
+        )
         doc = Document(text, len(text))
         items = [
-            {"text": c.text, "display": c.display or c.text,
-             "meta": to_plain_text(c.display_meta) if c.display_meta else ""}
+            {
+                "text": c.text,
+                "display": c.display or c.text,
+                "meta": to_plain_text(c.display_meta) if c.display_meta else "",
+            }
             for c in completer.get_completions(doc, None)
         ][:30]
         text_lower = text.lower()
         extras = [
-            {"text": "/compact", "display": "/compact", "meta": "Toggle compact display mode"},
-            {"text": "/logs", "display": "/logs", "meta": "Show recent gateway log lines"},
+            {
+                "text": "/compact",
+                "display": "/compact",
+                "meta": "Toggle compact display mode",
+            },
+            {
+                "text": "/logs",
+                "display": "/logs",
+                "meta": "Show recent gateway log lines",
+            },
         ]
         for extra in extras:
-            if extra["text"].startswith(text_lower) and not any(item["text"] == extra["text"] for item in items):
+            if extra["text"].startswith(text_lower) and not any(
+                item["text"] == extra["text"] for item in items
+            ):
                 items.append(extra)
-        return _ok(rid, {"items": items, "replace_from": text.rfind(" ") + 1 if " " in text else 1})
+        return _ok(
+            rid,
+            {"items": items, "replace_from": text.rfind(" ") + 1 if " " in text else 1},
+        )
     except Exception as e:
         return _err(rid, 5020, str(e))
 
@@ -2371,28 +3237,38 @@ def _(rid, params: dict) -> dict:
 def _(rid, params: dict) -> dict:
     try:
         from hermes_cli.model_switch import list_authenticated_providers
-        from hermes_cli.models import provider_model_ids
 
         session = _sessions.get(params.get("session_id", ""))
         agent = session.get("agent") if session else None
         cfg = _load_cfg()
         current_provider = getattr(agent, "provider", "") or ""
         current_model = getattr(agent, "model", "") or _resolve_model()
+        # list_authenticated_providers already populates each provider's
+        # "models" with the curated list (same source as `hermes model` and
+        # classic CLI's /model picker). Do NOT overwrite with live
+        # provider_model_ids() — that bypasses curation and pulls in
+        # non-agentic models (e.g. Nous /models returns ~400 IDs including
+        # TTS, embeddings, rerankers, image/video generators).
         providers = list_authenticated_providers(
             current_provider=current_provider,
-            user_providers=cfg.get("providers") if isinstance(cfg.get("providers"), dict) else {},
-            custom_providers=cfg.get("custom_providers") if isinstance(cfg.get("custom_providers"), list) else [],
+            user_providers=(
+                cfg.get("providers") if isinstance(cfg.get("providers"), dict) else {}
+            ),
+            custom_providers=(
+                cfg.get("custom_providers")
+                if isinstance(cfg.get("custom_providers"), list)
+                else []
+            ),
             max_models=50,
         )
-        for provider in providers:
-            try:
-                models = provider_model_ids(provider.get("slug"))
-                if models:
-                    provider["models"] = models
-                    provider["total_models"] = len(models)
-            except Exception as e:
-                provider["warning"] = f"model catalog unavailable: {e}"
-        return _ok(rid, {"providers": providers, "model": current_model, "provider": current_provider})
+        return _ok(
+            rid,
+            {
+                "providers": providers,
+                "model": current_model,
+                "provider": current_provider,
+            },
+        )
     except Exception as e:
         return _err(rid, 5033, str(e))
 
@@ -2405,7 +3281,20 @@ def _mirror_slash_side_effects(sid: str, session: dict, command: str) -> str:
     parts = command.lstrip("/").split(None, 1)
     if not parts:
         return ""
-    name, arg, agent = parts[0], (parts[1].strip() if len(parts) > 1 else ""), session.get("agent")
+    name, arg, agent = (
+        parts[0],
+        (parts[1].strip() if len(parts) > 1 else ""),
+        session.get("agent"),
+    )
+
+    # Reject agent-mutating commands during an in-flight turn.  These
+    # all do read-then-mutate on live agent/session state that the
+    # worker thread running agent.run_conversation is using.  Parity
+    # with the session.compress / session.undo guards and the gateway
+    # runner's running-agent /model guard.
+    _MUTATES_WHILE_RUNNING = {"model", "personality", "prompt", "compress"}
+    if name in _MUTATES_WHILE_RUNNING and session.get("running"):
+        return f"session busy — /interrupt the current turn before running /{name}"
 
     try:
         if name == "model" and arg and agent:
@@ -2434,6 +3323,7 @@ def _mirror_slash_side_effects(sid: str, session: dict, command: str) -> str:
             agent.reload_mcp_tools()
         elif name == "stop":
             from tools.process_registry import process_registry
+
             process_registry.kill_all()
     except Exception as e:
         return f"live session sync failed: {e}"
@@ -2460,20 +3350,28 @@ def _(rid, params: dict) -> dict:
     _cmd_base = _cmd_parts[0] if _cmd_parts else ""
 
     if _cmd_base in _PENDING_INPUT_COMMANDS:
-        return _err(rid, 4018, f"pending-input command: use command.dispatch for /{_cmd_base}")
+        return _err(
+            rid, 4018, f"pending-input command: use command.dispatch for /{_cmd_base}"
+        )
 
     try:
         from agent.skill_commands import get_skill_commands
+
         _cmd_key = f"/{_cmd_base}"
         if _cmd_key in get_skill_commands():
-            return _err(rid, 4018, f"skill command: use command.dispatch for {_cmd_key}")
+            return _err(
+                rid, 4018, f"skill command: use command.dispatch for {_cmd_key}"
+            )
     except Exception:
         pass
 
     worker = session.get("slash_worker")
     if not worker:
         try:
-            worker = _SlashWorker(session["session_key"], getattr(session.get("agent"), "model", _resolve_model()))
+            worker = _SlashWorker(
+                session["session_key"],
+                getattr(session.get("agent"), "model", _resolve_model()),
+            )
             session["slash_worker"] = worker
         except Exception as e:
             return _err(rid, 5030, f"slash worker start failed: {e}")
@@ -2496,6 +3394,7 @@ def _(rid, params: dict) -> dict:
 
 # ── Methods: voice ───────────────────────────────────────────────────
 
+
 @method("voice.toggle")
 def _(rid, params: dict) -> dict:
     action = params.get("action", "status")
@@ -2503,7 +3402,14 @@ def _(rid, params: dict) -> dict:
         env = os.environ.get("HERMES_VOICE", "").strip()
         if env in {"0", "1"}:
             return _ok(rid, {"enabled": env == "1"})
-        return _ok(rid, {"enabled": bool(_load_cfg().get("display", {}).get("voice_enabled", False))})
+        return _ok(
+            rid,
+            {
+                "enabled": bool(
+                    _load_cfg().get("display", {}).get("voice_enabled", False)
+                )
+            },
+        )
     if action in ("on", "off"):
         enabled = action == "on"
         os.environ["HERMES_VOICE"] = "1" if enabled else "0"
@@ -2518,14 +3424,18 @@ def _(rid, params: dict) -> dict:
     try:
         if action == "start":
             from hermes_cli.voice import start_recording
+
             start_recording()
             return _ok(rid, {"status": "recording"})
         if action == "stop":
             from hermes_cli.voice import stop_and_transcribe
+
             return _ok(rid, {"text": stop_and_transcribe() or ""})
         return _err(rid, 4019, f"unknown voice action: {action}")
     except ImportError:
-        return _err(rid, 5025, "voice module not available — install audio dependencies")
+        return _err(
+            rid, 5025, "voice module not available — install audio dependencies"
+        )
     except Exception as e:
         return _err(rid, 5025, str(e))
 
@@ -2537,6 +3447,7 @@ def _(rid, params: dict) -> dict:
         return _err(rid, 4020, "text required")
     try:
         from hermes_cli.voice import speak_text
+
         threading.Thread(target=speak_text, args=(text,), daemon=True).start()
         return _ok(rid, {"status": "speaking"})
     except ImportError:
@@ -2547,32 +3458,57 @@ def _(rid, params: dict) -> dict:
 
 # ── Methods: insights ────────────────────────────────────────────────
 
+
 @method("insights.get")
 def _(rid, params: dict) -> dict:
     days = params.get("days", 30)
     try:
-        import time
         cutoff = time.time() - days * 86400
-        rows = [s for s in _get_db().list_sessions_rich(limit=500) if (s.get("started_at") or 0) >= cutoff]
-        return _ok(rid, {"days": days, "sessions": len(rows), "messages": sum(s.get("message_count", 0) for s in rows)})
+        rows = [
+            s
+            for s in _get_db().list_sessions_rich(limit=500)
+            if (s.get("started_at") or 0) >= cutoff
+        ]
+        return _ok(
+            rid,
+            {
+                "days": days,
+                "sessions": len(rows),
+                "messages": sum(s.get("message_count", 0) for s in rows),
+            },
+        )
     except Exception as e:
         return _err(rid, 5017, str(e))
 
 
 # ── Methods: rollback ────────────────────────────────────────────────
 
+
 @method("rollback.list")
 def _(rid, params: dict) -> dict:
     session, err = _sess(params, rid)
     if err:
         return err
     try:
+
         def go(mgr, cwd):
             if not mgr.enabled:
                 return _ok(rid, {"enabled": False, "checkpoints": []})
-            return _ok(rid, {"enabled": True, "checkpoints": [
-                {"hash": c.get("hash", ""), "timestamp": c.get("timestamp", ""), "message": c.get("message", "")}
-                for c in mgr.list_checkpoints(cwd)]})
+            return _ok(
+                rid,
+                {
+                    "enabled": True,
+                    "checkpoints": [
+                        {
+                            "hash": c.get("hash", ""),
+                            "timestamp": c.get("timestamp", ""),
+                            "message": c.get("message", ""),
+                        }
+                        for c in mgr.list_checkpoints(cwd)
+                    ],
+                },
+            )
+
         return _with_checkpoints(session, go)
     except Exception as e:
         return _err(rid, 5020, str(e))
@@ -2587,7 +3523,19 @@ def _(rid, params: dict) -> dict:
     file_path = params.get("file_path", "")
     if not target:
         return _err(rid, 4014, "hash required")
+    # Full-history rollback mutates session history.  Rejecting during
+    # an in-flight turn prevents prompt.submit from silently dropping
+    # the agent's output (version mismatch path) or clobbering the
+    # rollback (version-matches path).  A file-scoped rollback only
+    # touches disk, so we allow it.
+    if not file_path and session.get("running"):
+        return _err(
+            rid,
+            4009,
+            "session busy — /interrupt the current turn before full rollback.restore",
+        )
     try:
+
         def go(mgr, cwd):
             resolved = _resolve_checkpoint_hash(mgr, cwd, target)
             result = mgr.restore(cwd, resolved, file_path=file_path or None)
@@ -2602,7 +3550,9 @@ def _(rid, params: dict) -> dict:
                         history.pop()
                         removed += 1
                     if removed:
-                        session["history_version"] = int(session.get("history_version", 0)) + 1
+                        session["history_version"] = (
+                            int(session.get("history_version", 0)) + 1
+                        )
                 result["history_removed"] = removed
             return result
 
@@ -2620,7 +3570,10 @@ def _(rid, params: dict) -> dict:
     if not target:
         return _err(rid, 4014, "hash required")
     try:
-        r = _with_checkpoints(session, lambda mgr, cwd: mgr.diff(cwd, _resolve_checkpoint_hash(mgr, cwd, target)))
+        r = _with_checkpoints(
+            session,
+            lambda mgr, cwd: mgr.diff(cwd, _resolve_checkpoint_hash(mgr, cwd, target)),
+        )
         raw = r.get("diff", "")[:4000]
         payload = {"stat": r.get("stat", ""), "diff": raw}
         rendered = render_diff(raw, session.get("cols", 80))
@@ -2633,6 +3586,7 @@ def _(rid, params: dict) -> dict:
 
 # ── Methods: browser / plugins / cron / skills ───────────────────────
 
+
 @method("browser.manage")
 def _(rid, params: dict) -> dict:
     action = params.get("action", "status")
@@ -2649,10 +3603,11 @@ def _(rid, params: dict) -> dict:
             parsed = urlparse(url if "://" in url else f"http://{url}")
             if parsed.scheme not in {"http", "https", "ws", "wss"}:
                 return _err(rid, 4015, f"unsupported browser url: {url}")
-            probe_root = (
-                f"{'https' if parsed.scheme == 'wss' else 'http' if parsed.scheme == 'ws' else parsed.scheme}://{parsed.netloc}"
-            )
-            probe_urls = [f"{probe_root.rstrip('/')}/json/version", f"{probe_root.rstrip('/')}/json"]
+            probe_root = f"{'https' if parsed.scheme == 'wss' else 'http' if parsed.scheme == 'ws' else parsed.scheme}://{parsed.netloc}"
+            probe_urls = [
+                f"{probe_root.rstrip('/')}/json/version",
+                f"{probe_root.rstrip('/')}/json",
+            ]
             ok = False
             for probe in probe_urls:
                 try:
@@ -2674,6 +3629,7 @@ def _(rid, params: dict) -> dict:
         os.environ.pop("BROWSER_CDP_URL", None)
         try:
             from tools.browser_tool import cleanup_all_browsers
+
             cleanup_all_browsers()
         except Exception:
             pass
@@ -2685,9 +3641,20 @@ def _(rid, params: dict) -> dict:
 def _(rid, params: dict) -> dict:
     try:
         from hermes_cli.plugins import get_plugin_manager
-        return _ok(rid, {"plugins": [
-            {"name": n, "version": getattr(i, "version", "?"), "enabled": getattr(i, "enabled", True)}
-            for n, i in get_plugin_manager()._plugins.items()]})
+
+        return _ok(
+            rid,
+            {
+                "plugins": [
+                    {
+                        "name": n,
+                        "version": getattr(i, "version", "?"),
+                        "enabled": getattr(i, "enabled", True),
+                    }
+                    for n, i in get_plugin_manager()._plugins.items()
+                ]
+            },
+        )
     except Exception as e:
         return _err(rid, 5032, str(e))
 
@@ -2701,27 +3668,31 @@ def _(rid, params: dict) -> dict:
         masked = f"****{api_key[-4:]}" if len(api_key) > 4 else "(not set)"
         base_url = os.environ.get("HERMES_BASE_URL", "") or cfg.get("base_url", "")
 
-        sections = [{
-            "title": "Model",
-            "rows": [
-                ["Model", model],
-                ["Base URL", base_url or "(default)"],
-                ["API Key", masked],
-            ]
-        }, {
-            "title": "Agent",
-            "rows": [
-                ["Max Turns", str(cfg.get("max_turns", 25))],
-                ["Toolsets", ", ".join(cfg.get("enabled_toolsets", [])) or "all"],
-                ["Verbose", str(cfg.get("verbose", False))],
-            ]
-        }, {
-            "title": "Environment",
-            "rows": [
-                ["Working Dir", os.getcwd()],
-                ["Config File", str(_hermes_home / "config.yaml")],
-            ]
-        }]
+        sections = [
+            {
+                "title": "Model",
+                "rows": [
+                    ["Model", model],
+                    ["Base URL", base_url or "(default)"],
+                    ["API Key", masked],
+                ],
+            },
+            {
+                "title": "Agent",
+                "rows": [
+                    ["Max Turns", str(cfg.get("max_turns", 25))],
+                    ["Toolsets", ", ".join(cfg.get("enabled_toolsets", [])) or "all"],
+                    ["Verbose", str(cfg.get("verbose", False))],
+                ],
+            },
+            {
+                "title": "Environment",
+                "rows": [
+                    ["Working Dir", os.getcwd()],
+                    ["Config File", str(_hermes_home / "config.yaml")],
+                ],
+            },
+        ]
         return _ok(rid, {"sections": sections})
     except Exception as e:
         return _err(rid, 5030, str(e))
@@ -2731,21 +3702,28 @@ def _(rid, params: dict) -> dict:
 def _(rid, params: dict) -> dict:
     try:
         from toolsets import get_all_toolsets, get_toolset_info
+
         session = _sessions.get(params.get("session_id", ""))
-        enabled = set(getattr(session["agent"], "enabled_toolsets", []) or []) if session else set(_load_enabled_toolsets() or [])
+        enabled = (
+            set(getattr(session["agent"], "enabled_toolsets", []) or [])
+            if session
+            else set(_load_enabled_toolsets() or [])
+        )
 
         items = []
         for name in sorted(get_all_toolsets().keys()):
             info = get_toolset_info(name)
             if not info:
                 continue
-            items.append({
-                "name": name,
-                "description": info["description"],
-                "tool_count": info["tool_count"],
-                "enabled": name in enabled if enabled else True,
-                "tools": info["resolved_tools"],
-            })
+            items.append(
+                {
+                    "name": name,
+                    "description": info["description"],
+                    "tool_count": info["tool_count"],
+                    "enabled": name in enabled if enabled else True,
+                    "tools": info["resolved_tools"],
+                }
+            )
         return _ok(rid, {"toolsets": items})
     except Exception as e:
         return _err(rid, 5031, str(e))
@@ -2757,7 +3735,11 @@ def _(rid, params: dict) -> dict:
         from model_tools import get_toolset_for_tool, get_tool_definitions
 
         session = _sessions.get(params.get("session_id", ""))
-        enabled = getattr(session["agent"], "enabled_toolsets", None) if session else _load_enabled_toolsets()
+        enabled = (
+            getattr(session["agent"], "enabled_toolsets", None)
+            if session
+            else _load_enabled_toolsets()
+        )
         tools = get_tool_definitions(enabled_toolsets=enabled, quiet_mode=True)
         sections = {}
 
@@ -2765,16 +3747,24 @@ def _(rid, params: dict) -> dict:
             name = tool["function"]["name"]
             desc = str(tool["function"].get("description", "") or "").split("\n")[0]
             if ". " in desc:
-                desc = desc[:desc.index(". ") + 1]
-            sections.setdefault(get_toolset_for_tool(name) or "unknown", []).append({
-                "name": name,
-                "description": desc,
-            })
+                desc = desc[: desc.index(". ") + 1]
+            sections.setdefault(get_toolset_for_tool(name) or "unknown", []).append(
+                {
+                    "name": name,
+                    "description": desc,
+                }
+            )
 
-        return _ok(rid, {
-            "sections": [{"name": name, "tools": rows} for name, rows in sorted(sections.items())],
-            "total": len(tools),
-        })
+        return _ok(
+            rid,
+            {
+                "sections": [
+                    {"name": name, "tools": rows}
+                    for name, rows in sorted(sections.items())
+                ],
+                "total": len(tools),
+            },
+        )
     except Exception as e:
         return _err(rid, 5034, str(e))
 
@@ -2782,7 +3772,9 @@ def _(rid, params: dict) -> dict:
 @method("tools.configure")
 def _(rid, params: dict) -> dict:
     action = str(params.get("action", "") or "").strip().lower()
-    targets = [str(name).strip() for name in params.get("names", []) or [] if str(name).strip()]
+    targets = [
+        str(name).strip() for name in params.get("names", []) or [] if str(name).strip()
+    ]
     if action not in {"disable", "enable"}:
         return _err(rid, 4017, f"unknown tools action: {action}")
     if not targets:
@@ -2799,7 +3791,9 @@ def _(rid, params: dict) -> dict:
         )
 
         cfg = load_config()
-        valid_toolsets = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS} | _get_plugin_toolset_keys()
+        valid_toolsets = {
+            ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS
+        } | _get_plugin_toolset_keys()
         toolset_targets = [name for name in targets if ":" not in name]
         mcp_targets = [name for name in targets if ":" in name]
         unknown = [name for name in toolset_targets if name not in valid_toolsets]
@@ -2808,25 +3802,38 @@ def _(rid, params: dict) -> dict:
         if toolset_targets:
             _apply_toolset_change(cfg, "cli", toolset_targets, action)
 
-        missing_servers = _apply_mcp_change(cfg, mcp_targets, action) if mcp_targets else set()
+        missing_servers = (
+            _apply_mcp_change(cfg, mcp_targets, action) if mcp_targets else set()
+        )
         save_config(cfg)
 
         session = _sessions.get(params.get("session_id", ""))
-        info = _reset_session_agent(params.get("session_id", ""), session) if session else None
-        enabled = sorted(_get_platform_tools(load_config(), "cli", include_default_mcp_servers=False))
+        info = (
+            _reset_session_agent(params.get("session_id", ""), session)
+            if session
+            else None
+        )
+        enabled = sorted(
+            _get_platform_tools(load_config(), "cli", include_default_mcp_servers=False)
+        )
         changed = [
-            name for name in targets
-            if name not in unknown and (":" not in name or name.split(":", 1)[0] not in missing_servers)
+            name
+            for name in targets
+            if name not in unknown
+            and (":" not in name or name.split(":", 1)[0] not in missing_servers)
         ]
 
-        return _ok(rid, {
-            "changed": changed,
-            "enabled_toolsets": enabled,
-            "info": info,
-            "missing_servers": sorted(missing_servers),
-            "reset": bool(session),
-            "unknown": unknown,
-        })
+        return _ok(
+            rid,
+            {
+                "changed": changed,
+                "enabled_toolsets": enabled,
+                "info": info,
+                "missing_servers": sorted(missing_servers),
+                "reset": bool(session),
+                "unknown": unknown,
+            },
+        )
     except Exception as e:
         return _err(rid, 5035, str(e))
 
@@ -2835,20 +3842,27 @@ def _(rid, params: dict) -> dict:
 def _(rid, params: dict) -> dict:
     try:
         from toolsets import get_all_toolsets, get_toolset_info
+
         session = _sessions.get(params.get("session_id", ""))
-        enabled = set(getattr(session["agent"], "enabled_toolsets", []) or []) if session else set(_load_enabled_toolsets() or [])
+        enabled = (
+            set(getattr(session["agent"], "enabled_toolsets", []) or [])
+            if session
+            else set(_load_enabled_toolsets() or [])
+        )
 
         items = []
         for name in sorted(get_all_toolsets().keys()):
             info = get_toolset_info(name)
             if not info:
                 continue
-            items.append({
-                "name": name,
-                "description": info["description"],
-                "tool_count": info["tool_count"],
-                "enabled": name in enabled if enabled else True,
-            })
+            items.append(
+                {
+                    "name": name,
+                    "description": info["description"],
+                    "tool_count": info["tool_count"],
+                    "enabled": name in enabled if enabled else True,
+                }
+            )
         return _ok(rid, {"toolsets": items})
     except Exception as e:
         return _err(rid, 5032, str(e))
@@ -2858,15 +3872,22 @@ def _(rid, params: dict) -> dict:
 def _(rid, params: dict) -> dict:
     try:
         from tools.process_registry import process_registry
+
         procs = process_registry.list_sessions()
-        return _ok(rid, {
-            "processes": [{
-                "session_id": p["session_id"],
-                "command": p["command"][:80],
-                "status": p["status"],
-                "uptime": p["uptime_seconds"],
-            } for p in procs]
-        })
+        return _ok(
+            rid,
+            {
+                "processes": [
+                    {
+                        "session_id": p["session_id"],
+                        "command": p["command"][:80],
+                        "status": p["status"],
+                        "uptime": p["uptime_seconds"],
+                    }
+                    for p in procs
+                ]
+            },
+        )
     except Exception as e:
         return _err(rid, 5033, str(e))
 
@@ -2876,11 +3897,21 @@ def _(rid, params: dict) -> dict:
     action, jid = params.get("action", "list"), params.get("name", "")
     try:
         from tools.cronjob_tools import cronjob
+
         if action == "list":
             return _ok(rid, json.loads(cronjob(action="list")))
         if action == "add":
-            return _ok(rid, json.loads(cronjob(action="create", name=jid,
-                                               schedule=params.get("schedule", ""), prompt=params.get("prompt", ""))))
+            return _ok(
+                rid,
+                json.loads(
+                    cronjob(
+                        action="create",
+                        name=jid,
+                        schedule=params.get("schedule", ""),
+                        prompt=params.get("prompt", ""),
+                    )
+                ),
+            )
         if action in ("remove", "pause", "resume"):
             return _ok(rid, json.loads(cronjob(action=action, job_id=jid)))
         return _err(rid, 4016, f"unknown cron action: {action}")
@@ -2894,23 +3925,53 @@ def _(rid, params: dict) -> dict:
     try:
         if action == "list":
             from hermes_cli.banner import get_available_skills
+
             return _ok(rid, {"skills": get_available_skills()})
         if action == "search":
-            from hermes_cli.skills_hub import unified_search, GitHubAuth, create_source_router
-            raw = unified_search(query, create_source_router(GitHubAuth()), source_filter="all", limit=20) or []
-            return _ok(rid, {"results": [{"name": r.name, "description": r.description} for r in raw]})
+            from hermes_cli.skills_hub import (
+                unified_search,
+                GitHubAuth,
+                create_source_router,
+            )
+
+            raw = (
+                unified_search(
+                    query,
+                    create_source_router(GitHubAuth()),
+                    source_filter="all",
+                    limit=20,
+                )
+                or []
+            )
+            return _ok(
+                rid,
+                {
+                    "results": [
+                        {"name": r.name, "description": r.description} for r in raw
+                    ]
+                },
+            )
         if action == "install":
             from hermes_cli.skills_hub import do_install
+
             class _Q:
-                def print(self, *a, **k): pass
+                def print(self, *a, **k):
+                    pass
+
             do_install(query, skip_confirm=True, console=_Q())
             return _ok(rid, {"installed": True, "name": query})
         if action == "browse":
             from hermes_cli.skills_hub import browse_skills
-            pg = int(params.get("page", 0) or 0) or (int(query) if query.isdigit() else 1)
-            return _ok(rid, browse_skills(page=pg, page_size=int(params.get("page_size", 20))))
+
+            pg = int(params.get("page", 0) or 0) or (
+                int(query) if query.isdigit() else 1
+            )
+            return _ok(
+                rid, browse_skills(page=pg, page_size=int(params.get("page_size", 20)))
+            )
         if action == "inspect":
             from hermes_cli.skills_hub import inspect_skill
+
             return _ok(rid, {"info": inspect_skill(query) or {}})
         return _err(rid, 4017, f"unknown skills action: {action}")
     except Exception as e:
@@ -2919,6 +3980,7 @@ def _(rid, params: dict) -> dict:
 
 # ── Methods: shell ───────────────────────────────────────────────────
 
+
 @method("shell.exec")
 def _(rid, params: dict) -> dict:
     cmd = params.get("command", "")
@@ -2926,14 +3988,26 @@ def _(rid, params: dict) -> dict:
         return _err(rid, 4004, "empty command")
     try:
         from tools.approval import detect_dangerous_command
+
         is_dangerous, _, desc = detect_dangerous_command(cmd)
         if is_dangerous:
-            return _err(rid, 4005, f"blocked: {desc}. Use the agent for dangerous commands.")
+            return _err(
+                rid, 4005, f"blocked: {desc}. Use the agent for dangerous commands."
+            )
     except ImportError:
         pass
     try:
-        r = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=30, cwd=os.getcwd())
-        return _ok(rid, {"stdout": r.stdout[-4000:], "stderr": r.stderr[-2000:], "code": r.returncode})
+        r = subprocess.run(
+            cmd, shell=True, capture_output=True, text=True, timeout=30, cwd=os.getcwd()
+        )
+        return _ok(
+            rid,
+            {
+                "stdout": r.stdout[-4000:],
+                "stderr": r.stderr[-2000:],
+                "code": r.returncode,
+            },
+        )
     except subprocess.TimeoutExpired:
         return _err(rid, 5002, "command timed out (30s)")
     except Exception as e:
diff --git a/ui-tui/README.md b/ui-tui/README.md
index 38d206baf4..4d7090d5ac 100644
--- a/ui-tui/README.md
+++ b/ui-tui/README.md
@@ -112,7 +112,7 @@ Current input behavior is split across `app.tsx`, `components/textInput.tsx`, an
 | `Ctrl+D`                        | Exit                                                                                                                                                    |
 | `Ctrl+G`                        | Open `$EDITOR` with the current draft                                                                                                                   |
 | `Ctrl+L`                        | New session (same as `/clear`)                                                                                                                          |
-| `Ctrl+V` / `Alt+V`              | Paste clipboard image (same as `/paste`)                                                                                                                |
+| `Ctrl+V` / `Alt+V`              | Paste text first, then fall back to image/path attachment when applicable                                                                               |
 | `Tab`                           | Apply the active completion                                                                                                                             |
 | `Up/Down`                       | Cycle completions if the completion list is open; otherwise edit queued messages first, then walk input history                                         |
 | `Left/Right`                    | Move the cursor                                                                                                                                         |
@@ -217,8 +217,8 @@ The local slash handler covers the built-ins that need direct client behavior:
 Notes:
 
 - `/copy` sends the selected assistant response through OSC 52.
-- `/paste` with no args asks the gateway for clipboard image attachment state.
-- `/paste` does not manage text paste entries; text paste is inline-only.
+- `/paste` with no args asks the gateway to attach a clipboard image.
+- Text paste remains inline-only; `Cmd+V` / `Ctrl+V` handle layered text/OSC52/image fallback before `/paste` is needed.
 - `/details [hidden|collapsed|expanded|cycle]` controls thinking/tool-detail visibility.
 - `/statusbar` toggles the status rule on/off.
 
diff --git a/ui-tui/package-lock.json b/ui-tui/package-lock.json
index 0b33e6e334..46c83d195d 100644
--- a/ui-tui/package-lock.json
+++ b/ui-tui/package-lock.json
@@ -319,24 +319,26 @@
       }
     },
     "node_modules/@emnapi/core": {
-      "version": "1.9.2",
-      "resolved": "https://registry.npmjs.org/@emnapi/core/-/core-1.9.2.tgz",
-      "integrity": "sha512-UC+ZhH3XtczQYfOlu3lNEkdW/p4dsJ1r/bP7H8+rhao3TTTMO1ATq/4DdIi23XuGoFY+Cz0JmCbdVl0hz9jZcA==",
+      "version": "1.10.0",
+      "resolved": "https://registry.npmjs.org/@emnapi/core/-/core-1.10.0.tgz",
+      "integrity": "sha512-yq6OkJ4p82CAfPl0u9mQebQHKPJkY7WrIuk205cTYnYe+k2Z8YBh11FrbRG/H6ihirqcacOgl2BIO8oyMQLeXw==",
       "dev": true,
       "license": "MIT",
       "optional": true,
+      "peer": true,
       "dependencies": {
         "@emnapi/wasi-threads": "1.2.1",
         "tslib": "^2.4.0"
       }
     },
     "node_modules/@emnapi/runtime": {
-      "version": "1.9.2",
-      "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.9.2.tgz",
-      "integrity": "sha512-3U4+MIWHImeyu1wnmVygh5WlgfYDtyf0k8AbLhMFxOipihf6nrWC4syIm/SwEeec0mNSafiiNnMJwbza/Is6Lw==",
+      "version": "1.10.0",
+      "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.10.0.tgz",
+      "integrity": "sha512-ewvYlk86xUoGI0zQRNq/mC+16R1QeDlKQy21Ki3oSYXNgLb45GV1P6A0M+/s6nyCuNDqe5VpaY84BzXGwVbwFA==",
       "dev": true,
       "license": "MIT",
       "optional": true,
+      "peer": true,
       "dependencies": {
         "tslib": "^2.4.0"
       }
@@ -1387,6 +1389,29 @@
         "node": ">=14.0.0"
       }
     },
+    "node_modules/@rolldown/binding-wasm32-wasi/node_modules/@emnapi/core": {
+      "version": "1.9.2",
+      "resolved": "https://registry.npmjs.org/@emnapi/core/-/core-1.9.2.tgz",
+      "integrity": "sha512-UC+ZhH3XtczQYfOlu3lNEkdW/p4dsJ1r/bP7H8+rhao3TTTMO1ATq/4DdIi23XuGoFY+Cz0JmCbdVl0hz9jZcA==",
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "@emnapi/wasi-threads": "1.2.1",
+        "tslib": "^2.4.0"
+      }
+    },
+    "node_modules/@rolldown/binding-wasm32-wasi/node_modules/@emnapi/runtime": {
+      "version": "1.9.2",
+      "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.9.2.tgz",
+      "integrity": "sha512-3U4+MIWHImeyu1wnmVygh5WlgfYDtyf0k8AbLhMFxOipihf6nrWC4syIm/SwEeec0mNSafiiNnMJwbza/Is6Lw==",
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "tslib": "^2.4.0"
+      }
+    },
     "node_modules/@rolldown/binding-win32-arm64-msvc": {
       "version": "1.0.0-rc.15",
       "resolved": "https://registry.npmjs.org/@rolldown/binding-win32-arm64-msvc/-/binding-win32-arm64-msvc-1.0.0-rc.15.tgz",
diff --git a/ui-tui/packages/hermes-ink/src/ink/components/App.tsx b/ui-tui/packages/hermes-ink/src/ink/components/App.tsx
index 3a0381a729..7805b4f902 100644
--- a/ui-tui/packages/hermes-ink/src/ink/components/App.tsx
+++ b/ui-tui/packages/hermes-ink/src/ink/components/App.tsx
@@ -616,6 +616,7 @@ export function handleMouseEvent(app: App, m: ParsedMouse): void {
     if (baseButton !== 0) {
       // Non-left press breaks the multi-click chain.
       app.clickCount = 0
+      app.props.onMouseDownAt(col, row, baseButton)
 
       return
     }
diff --git a/ui-tui/packages/hermes-ink/src/ink/events/cmd-shortcuts.test.ts b/ui-tui/packages/hermes-ink/src/ink/events/cmd-shortcuts.test.ts
new file mode 100644
index 0000000000..1abd7bbe00
--- /dev/null
+++ b/ui-tui/packages/hermes-ink/src/ink/events/cmd-shortcuts.test.ts
@@ -0,0 +1,38 @@
+import { describe, expect, it } from 'vitest'
+
+import { parseMultipleKeypresses } from '../parse-keypress.js'
+
+import { InputEvent } from './input-event.js'
+
+function parseOne(sequence: string) {
+  const [keys] = parseMultipleKeypresses({ incomplete: '', mode: 'NORMAL' }, sequence)
+  expect(keys).toHaveLength(1)
+
+  return keys[0]!
+}
+
+describe('InputEvent macOS command modifiers', () => {
+  it('preserves Cmd as super for kitty keyboard CSI-u sequences', () => {
+    const parsed = parseOne('\u001b[99;9u')
+    const event = new InputEvent(parsed)
+
+    expect(parsed.name).toBe('c')
+    expect(event.key.meta).toBe(false)
+    expect(event.key.super).toBe(true)
+  })
+
+  it('preserves Cmd on word-delete and word-navigation sequences', () => {
+    const backspace = new InputEvent(parseOne('\u001b[127;9u'))
+    const left = new InputEvent(parseOne('\u001b[1;9D'))
+    const right = new InputEvent(parseOne('\u001b[1;9C'))
+
+    expect(backspace.key.backspace).toBe(true)
+    expect(backspace.key.super).toBe(true)
+
+    expect(left.key.leftArrow).toBe(true)
+    expect(left.key.super).toBe(true)
+
+    expect(right.key.rightArrow).toBe(true)
+    expect(right.key.super).toBe(true)
+  })
+})
diff --git a/ui-tui/packages/hermes-ink/src/ink/screen.ts b/ui-tui/packages/hermes-ink/src/ink/screen.ts
index 5a9b9df229..9dea201329 100644
--- a/ui-tui/packages/hermes-ink/src/ink/screen.ts
+++ b/ui-tui/packages/hermes-ink/src/ink/screen.ts
@@ -121,6 +121,8 @@ const YELLOW_FG_CODE: AnsiCode = {
   endCode: '\x1b[39m'
 }
 
+const MAX_TRANSITION_CACHE = 32768
+
 export class StylePool {
   private ids = new Map<string, number>()
   private styles: AnsiCode[][] = []
@@ -160,7 +162,9 @@ export class StylePool {
   /**
    * Returns the pre-serialized ANSI string to transition from one style to
    * another. Cached by (fromId, toId) — zero allocations after first call
-   * for a given pair.
+   * for a given pair. Full-clear at MAX_TRANSITION_CACHE guards against
+   * unbounded growth from ever-expanding id spaces; cache repopulates from
+   * the next frame's actual transitions.
    */
   transition(fromId: number, toId: number): string {
     if (fromId === toId) {
@@ -171,6 +175,10 @@ export class StylePool {
     let str = this.transitionCache.get(key)
 
     if (str === undefined) {
+      if (this.transitionCache.size >= MAX_TRANSITION_CACHE) {
+        this.transitionCache.clear()
+      }
+
       str = ansiCodesToString(diffAnsiCodes(this.get(fromId), this.get(toId)))
       this.transitionCache.set(key, str)
     }
diff --git a/ui-tui/src/__tests__/clipboard.test.ts b/ui-tui/src/__tests__/clipboard.test.ts
new file mode 100644
index 0000000000..ba14e9bebc
--- /dev/null
+++ b/ui-tui/src/__tests__/clipboard.test.ts
@@ -0,0 +1,151 @@
+import { describe, expect, it, vi } from 'vitest'
+
+import { isUsableClipboardText, readClipboardText, writeClipboardText } from '../lib/clipboard.js'
+
+describe('readClipboardText', () => {
+  it('reads text from pbpaste on macOS', async () => {
+    const run = vi.fn().mockResolvedValue({ stdout: 'hello world\n' })
+
+    await expect(readClipboardText('darwin', run)).resolves.toBe('hello world\n')
+    expect(run).toHaveBeenCalledWith(
+      'pbpaste',
+      [],
+      expect.objectContaining({ encoding: 'utf8', maxBuffer: 4 * 1024 * 1024, windowsHide: true })
+    )
+  })
+
+  it('reads text from PowerShell on Windows', async () => {
+    const run = vi.fn().mockResolvedValue({ stdout: 'from windows\r\n' })
+
+    await expect(readClipboardText('win32', run)).resolves.toBe('from windows\r\n')
+    expect(run).toHaveBeenCalledWith(
+      'powershell',
+      ['-NoProfile', '-NonInteractive', '-Command', 'Get-Clipboard -Raw'],
+      expect.objectContaining({ encoding: 'utf8', maxBuffer: 4 * 1024 * 1024, windowsHide: true })
+    )
+  })
+
+  it('tries powershell.exe first on WSL', async () => {
+    const run = vi.fn().mockResolvedValue({ stdout: 'from wsl\n' })
+
+    await expect(readClipboardText('linux', run, { WSL_INTEROP: '/tmp/socket' } as NodeJS.ProcessEnv)).resolves.toBe(
+      'from wsl\n'
+    )
+    expect(run).toHaveBeenCalledWith(
+      'powershell.exe',
+      ['-NoProfile', '-NonInteractive', '-Command', 'Get-Clipboard -Raw'],
+      expect.objectContaining({ encoding: 'utf8', maxBuffer: 4 * 1024 * 1024, windowsHide: true })
+    )
+  })
+
+  it('uses wl-paste on Wayland Linux', async () => {
+    const run = vi.fn().mockResolvedValue({ stdout: 'from wayland\n' })
+
+    await expect(readClipboardText('linux', run, { WAYLAND_DISPLAY: 'wayland-1' } as NodeJS.ProcessEnv)).resolves.toBe(
+      'from wayland\n'
+    )
+    expect(run).toHaveBeenCalledWith(
+      'wl-paste',
+      ['--type', 'text'],
+      expect.objectContaining({ encoding: 'utf8', maxBuffer: 4 * 1024 * 1024, windowsHide: true })
+    )
+  })
+
+  it('falls back to xclip on Linux when wl-paste fails', async () => {
+    const run = vi
+      .fn()
+      .mockRejectedValueOnce(new Error('wl-paste missing'))
+      .mockResolvedValueOnce({ stdout: 'from xclip\n' })
+
+    await expect(readClipboardText('linux', run, { WAYLAND_DISPLAY: 'wayland-1' } as NodeJS.ProcessEnv)).resolves.toBe(
+      'from xclip\n'
+    )
+    expect(run).toHaveBeenNthCalledWith(
+      1,
+      'wl-paste',
+      ['--type', 'text'],
+      expect.objectContaining({ encoding: 'utf8', maxBuffer: 4 * 1024 * 1024, windowsHide: true })
+    )
+    expect(run).toHaveBeenNthCalledWith(
+      2,
+      'xclip',
+      ['-selection', 'clipboard', '-out'],
+      expect.objectContaining({ encoding: 'utf8', maxBuffer: 4 * 1024 * 1024, windowsHide: true })
+    )
+  })
+
+  it('returns null when every clipboard backend fails', async () => {
+    const run = vi.fn().mockRejectedValue(new Error('clipboard failed'))
+
+    await expect(
+      readClipboardText('linux', run, { WAYLAND_DISPLAY: 'wayland-1' } as NodeJS.ProcessEnv)
+    ).resolves.toBeNull()
+  })
+})
+
+describe('isUsableClipboardText', () => {
+  it('accepts normal text', () => {
+    expect(isUsableClipboardText('hello world\n')).toBe(true)
+  })
+
+  it('rejects empty or whitespace-only content', () => {
+    expect(isUsableClipboardText('')).toBe(false)
+    expect(isUsableClipboardText('  \n\t')).toBe(false)
+  })
+
+  it('rejects binary-looking clipboard payloads', () => {
+    expect(isUsableClipboardText('PNG\u0000\u0001\u0002\u0003IHDR')).toBe(false)
+    expect(isUsableClipboardText('TIFF\ufffd\ufffd\ufffdmetadata')).toBe(false)
+  })
+})
+
+describe('writeClipboardText', () => {
+  it('does nothing off macOS', async () => {
+    const start = vi.fn()
+
+    await expect(writeClipboardText('hello', 'linux', start)).resolves.toBe(false)
+    expect(start).not.toHaveBeenCalled()
+  })
+
+  it('writes text to pbcopy on macOS', async () => {
+    const stdin = { end: vi.fn() }
+
+    const child = {
+      once: vi.fn((event: string, cb: (code?: number) => void) => {
+        if (event === 'close') {
+          cb(0)
+        }
+
+        return child
+      }),
+      stdin
+    }
+
+    const start = vi.fn().mockReturnValue(child)
+
+    await expect(writeClipboardText('hello world', 'darwin', start as any)).resolves.toBe(true)
+    expect(start).toHaveBeenCalledWith(
+      'pbcopy',
+      [],
+      expect.objectContaining({ stdio: ['pipe', 'ignore', 'ignore'], windowsHide: true })
+    )
+    expect(stdin.end).toHaveBeenCalledWith('hello world')
+  })
+
+  it('returns false when pbcopy fails', async () => {
+    const child = {
+      once: vi.fn((event: string, cb: () => void) => {
+        if (event === 'error') {
+          cb()
+        }
+
+        return child
+      }),
+      stdin: { end: vi.fn() }
+    }
+
+    const start = vi.fn().mockReturnValue(child)
+
+    await expect(writeClipboardText('hello world', 'darwin', start as any)).resolves.toBe(false)
+  })
+})
diff --git a/ui-tui/src/__tests__/createGatewayEventHandler.test.ts b/ui-tui/src/__tests__/createGatewayEventHandler.test.ts
index f1f0c306bc..23f7c46465 100644
--- a/ui-tui/src/__tests__/createGatewayEventHandler.test.ts
+++ b/ui-tui/src/__tests__/createGatewayEventHandler.test.ts
@@ -1,9 +1,9 @@
 import { beforeEach, describe, expect, it, vi } from 'vitest'
 
 import { createGatewayEventHandler } from '../app/createGatewayEventHandler.js'
-import { resetOverlayState } from '../app/overlayStore.js'
+import { getOverlayState, resetOverlayState } from '../app/overlayStore.js'
 import { turnController } from '../app/turnController.js'
-import { resetTurnState } from '../app/turnStore.js'
+import { getTurnState, resetTurnState } from '../app/turnStore.js'
 import { patchUiState, resetUiState } from '../app/uiStore.js'
 import { estimateTokensRough } from '../lib/text.js'
 import type { Msg } from '../types.js'
@@ -143,6 +143,117 @@ describe('createGatewayEventHandler', () => {
     expect(appended[0]?.thinkingTokens).toBe(estimateTokensRough(fromServer))
   })
 
+  it('attaches inline_diff to the assistant completion body', () => {
+    const appended: Msg[] = []
+    const onEvent = createGatewayEventHandler(buildCtx(appended))
+    const diff = '\u001b[31m--- a/foo.ts\u001b[0m\n\u001b[32m+++ b/foo.ts\u001b[0m\n@@\n-old\n+new'
+    const cleaned = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'
+
+    onEvent({
+      payload: { context: 'foo.ts', name: 'patch', tool_id: 'tool-1' },
+      type: 'tool.start'
+    } as any)
+    onEvent({
+      payload: { inline_diff: diff, summary: 'patched', tool_id: 'tool-1' },
+      type: 'tool.complete'
+    } as any)
+
+    // Diff is buffered for message.complete and sanitized (ANSI stripped).
+    expect(appended).toHaveLength(0)
+    expect(turnController.pendingInlineDiffs).toEqual([cleaned])
+
+    onEvent({
+      payload: { text: 'patch applied' },
+      type: 'message.complete'
+    } as any)
+
+    // Diff is rendered in the same assistant message body as the completion.
+    expect(appended).toHaveLength(1)
+    expect(appended[0]).toMatchObject({ role: 'assistant' })
+    expect(appended[0]?.text).toContain('patch applied')
+    expect(appended[0]?.text).toContain('```diff')
+    expect(appended[0]?.text).toContain(cleaned)
+  })
+
+  it('does not append inline_diff twice when assistant text already contains it', () => {
+    const appended: Msg[] = []
+    const onEvent = createGatewayEventHandler(buildCtx(appended))
+    const cleaned = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'
+    const assistantText = `Done. Here's the inline diff:\n\n\`\`\`diff\n${cleaned}\n\`\`\``
+
+    onEvent({
+      payload: { inline_diff: cleaned, summary: 'patched', tool_id: 'tool-1' },
+      type: 'tool.complete'
+    } as any)
+    onEvent({
+      payload: { text: assistantText },
+      type: 'message.complete'
+    } as any)
+
+    expect(appended).toHaveLength(1)
+    expect(appended[0]?.text).toBe(assistantText)
+    expect((appended[0]?.text.match(/```diff/g) ?? []).length).toBe(1)
+  })
+
+  it('strips the CLI "┊ review diff" header from queued inline diffs', () => {
+    const appended: Msg[] = []
+    const onEvent = createGatewayEventHandler(buildCtx(appended))
+    const raw = '  \u001b[33m┊ review diff\u001b[0m\n--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'
+
+    onEvent({
+      payload: { inline_diff: raw, summary: 'patched', tool_id: 'tool-1' },
+      type: 'tool.complete'
+    } as any)
+    onEvent({
+      payload: { text: 'done' },
+      type: 'message.complete'
+    } as any)
+
+    expect(appended).toHaveLength(1)
+    expect(appended[0]?.text).not.toContain('┊ review diff')
+    expect(appended[0]?.text).toContain('--- a/foo.ts')
+  })
+
+  it('suppresses inline_diff when assistant already wrote a diff fence', () => {
+    const appended: Msg[] = []
+    const onEvent = createGatewayEventHandler(buildCtx(appended))
+    const inlineDiff = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'
+    const assistantText = 'Done. Clean swap:\n\n```diff\n-old\n+new\n```'
+
+    onEvent({
+      payload: { inline_diff: inlineDiff, summary: 'patched', tool_id: 'tool-1' },
+      type: 'tool.complete'
+    } as any)
+    onEvent({
+      payload: { text: assistantText },
+      type: 'message.complete'
+    } as any)
+
+    expect(appended).toHaveLength(1)
+    expect(appended[0]?.text).toBe(assistantText)
+    expect((appended[0]?.text.match(/```diff/g) ?? []).length).toBe(1)
+  })
+
+  it('keeps tool trail terse when inline_diff is present', () => {
+    const appended: Msg[] = []
+    const onEvent = createGatewayEventHandler(buildCtx(appended))
+    const diff = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'
+
+    onEvent({
+      payload: { inline_diff: diff, name: 'review_diff', summary: diff, tool_id: 'tool-1' },
+      type: 'tool.complete'
+    } as any)
+    onEvent({
+      payload: { text: 'done' },
+      type: 'message.complete'
+    } as any)
+
+    expect(appended).toHaveLength(1)
+    expect(appended[0]?.tools?.[0]).toContain('Review Diff')
+    expect(appended[0]?.tools?.[0]).not.toContain('--- a/foo.ts')
+    expect(appended[0]?.text).toContain('```diff')
+  })
+
   it('shows setup panel for missing provider startup error', () => {
     const appended: Msg[] = []
     const onEvent = createGatewayEventHandler(buildCtx(appended))
@@ -162,4 +273,42 @@ describe('createGatewayEventHandler', () => {
       role: 'system'
     })
   })
+
+  it('keeps gateway noise informational and approval out of Activity', async () => {
+    const appended: Msg[] = []
+    const ctx = buildCtx(appended)
+    ctx.gateway.rpc = vi.fn(async () => {
+      throw new Error('cold start')
+    })
+
+    const onEvent = createGatewayEventHandler(ctx)
+
+    onEvent({ payload: { line: 'Traceback: noisy but non-fatal' }, type: 'gateway.stderr' } as any)
+    onEvent({ payload: { preview: 'bad framing' }, type: 'gateway.protocol_error' } as any)
+    onEvent({
+      payload: { command: 'rm -rf /tmp/nope', description: 'dangerous command' },
+      type: 'approval.request'
+    } as any)
+    onEvent({ payload: {}, type: 'gateway.ready' } as any)
+
+    await Promise.resolve()
+    await Promise.resolve()
+
+    expect(getOverlayState().approval).toMatchObject({ description: 'dangerous command' })
+    expect(getTurnState().activity).toMatchObject([
+      { text: 'Traceback: noisy but non-fatal', tone: 'info' },
+      { text: 'protocol noise detected · /logs to inspect', tone: 'info' },
+      { text: 'protocol noise: bad framing', tone: 'info' },
+      { text: 'command catalog unavailable: cold start', tone: 'info' }
+    ])
+  })
+
+  it('still surfaces terminal turn failures as errors', () => {
+    const appended: Msg[] = []
+    const onEvent = createGatewayEventHandler(buildCtx(appended))
+
+    onEvent({ payload: { message: 'boom' }, type: 'error' } as any)
+
+    expect(getTurnState().activity).toMatchObject([{ text: 'boom', tone: 'error' }])
+  })
 })
diff --git a/ui-tui/src/__tests__/createSlashHandler.test.ts b/ui-tui/src/__tests__/createSlashHandler.test.ts
index 1f2f938a93..901564f732 100644
--- a/ui-tui/src/__tests__/createSlashHandler.test.ts
+++ b/ui-tui/src/__tests__/createSlashHandler.test.ts
@@ -211,6 +211,42 @@ describe('createSlashHandler', () => {
     expect(ctx.transcript.send).toHaveBeenCalledWith(skillMessage)
   })
 
+  it('/history pages the current TUI transcript (user + assistant)', () => {
+    const ctx = buildCtx({
+      local: {
+        ...buildLocal(),
+        getHistoryItems: vi.fn(() => [
+          { role: 'user', text: 'hello' },
+          { role: 'system', text: 'ignore me' },
+          { role: 'assistant', text: 'hi there' },
+          { role: 'user', text: 'test' }
+        ])
+      }
+    })
+
+    createSlashHandler(ctx)('/history')
+    expect(ctx.transcript.page).toHaveBeenCalledTimes(1)
+
+    const [body, title] = ctx.transcript.page.mock.calls[0]!
+
+    expect(title).toBe('History')
+    expect(body).toContain('[You #1]')
+    expect(body).toContain('hello')
+    expect(body).toContain('[Hermes #2]')
+    expect(body).toContain('hi there')
+    expect(body).toContain('[You #3]')
+    expect(body).not.toContain('ignore me')
+    expect(ctx.gateway.gw.request).not.toHaveBeenCalled()
+  })
+
+  it('/history reports empty state without paging', () => {
+    const ctx = buildCtx()
+
+    createSlashHandler(ctx)('/history')
+    expect(ctx.transcript.page).not.toHaveBeenCalled()
+    expect(ctx.transcript.sys).toHaveBeenCalledWith('no conversation yet')
+  })
+
   it('handles send-type dispatch for /plan command', async () => {
     const planMessage = 'Plan skill content loaded'
 
diff --git a/ui-tui/src/__tests__/emoji.test.ts b/ui-tui/src/__tests__/emoji.test.ts
new file mode 100644
index 0000000000..929fd53e05
--- /dev/null
+++ b/ui-tui/src/__tests__/emoji.test.ts
@@ -0,0 +1,64 @@
+import { describe, expect, it } from 'vitest'
+
+import { ensureEmojiPresentation } from '../lib/emoji.js'
+
+const VS16 = '\uFE0F'
+
+describe('ensureEmojiPresentation', () => {
+  it('passes through ASCII unchanged', () => {
+    expect(ensureEmojiPresentation('hello world')).toBe('hello world')
+    expect(ensureEmojiPresentation('')).toBe('')
+  })
+
+  it('passes through emoji that already defaults to emoji presentation', () => {
+    expect(ensureEmojiPresentation('🚀 rocket')).toBe('🚀 rocket')
+    expect(ensureEmojiPresentation('😀')).toBe('😀')
+  })
+
+  it('injects VS16 after text-default emoji codepoints', () => {
+    expect(ensureEmojiPresentation('⚠ careful')).toBe(`⚠${VS16} careful`)
+    expect(ensureEmojiPresentation('ℹ info')).toBe(`ℹ${VS16} info`)
+    expect(ensureEmojiPresentation('love ❤ you')).toBe(`love ❤${VS16} you`)
+    expect(ensureEmojiPresentation('✔ done')).toBe(`✔${VS16} done`)
+  })
+
+  it('is idempotent when VS16 is already present', () => {
+    const already = `⚠${VS16} ℹ${VS16} ❤${VS16}`
+
+    expect(ensureEmojiPresentation(already)).toBe(already)
+    expect(ensureEmojiPresentation(ensureEmojiPresentation('⚠'))).toBe(`⚠${VS16}`)
+  })
+
+  it('leaves keycap sequences alone when the base is not a text-default emoji', () => {
+    expect(ensureEmojiPresentation('1\u20e3')).toBe('1\u20e3')
+  })
+
+  it('injects VS16 before ZWJ so text-default bases participate in emoji sequences', () => {
+    // ❤ + ZWJ + 🔥 → ❤️‍🔥 (heart on fire).  Without VS16 between the heart
+    // and the ZWJ, terminals render the heart in text/monochrome form and
+    // the ZWJ ligature can fail to form.
+    const heartFire = '\u2764\u200d\ud83d\udd25'
+
+    expect(ensureEmojiPresentation(heartFire)).toBe(`\u2764\uFE0F\u200d\ud83d\udd25`)
+  })
+
+  it('leaves explicit text-presentation selector (VS15) alone', () => {
+    // `❤︎` (U+2764 + U+FE0E) asks for text presentation — injecting VS16
+    // would create an invalid double-variation sequence.
+    const explicitText = '\u2764\ufe0e'
+
+    expect(ensureEmojiPresentation(explicitText)).toBe(explicitText)
+  })
+
+  it('returns the original reference when no change is needed', () => {
+    const already = `⚠${VS16} ℹ${VS16} ❤${VS16}`
+
+    // Reference equality — the lazy allocator should short-circuit to the
+    // input when nothing needed injection.
+    expect(ensureEmojiPresentation(already)).toBe(already)
+  })
+
+  it('handles mixed content', () => {
+    expect(ensureEmojiPresentation('⚠ path: /tmp/x ❤ done')).toBe(`⚠${VS16} path: /tmp/x ❤${VS16} done`)
+  })
+})
diff --git a/ui-tui/src/__tests__/markdown.test.ts b/ui-tui/src/__tests__/markdown.test.ts
new file mode 100644
index 0000000000..0e95ba6c0f
--- /dev/null
+++ b/ui-tui/src/__tests__/markdown.test.ts
@@ -0,0 +1,86 @@
+import { describe, expect, it } from 'vitest'
+
+import { AUDIO_DIRECTIVE_RE, INLINE_RE, MEDIA_LINE_RE, stripInlineMarkup } from '../components/markdown.js'
+
+const matches = (text: string) => [...text.matchAll(INLINE_RE)].map(m => m[0])
+
+describe('INLINE_RE emphasis', () => {
+  it('matches word-boundary italic/bold', () => {
+    expect(matches('say _hi_ there')).toEqual(['_hi_'])
+    expect(matches('very __bold__ move')).toEqual(['__bold__'])
+    expect(matches('(_paren_) and [_bracket_]')).toEqual(['_paren_', '_bracket_'])
+  })
+
+  it('keeps intraword underscores literal', () => {
+    const path = '/home/me/.hermes/cache/screenshots/browser_screenshot_ecc1c3feab.png'
+
+    expect(matches(path)).toEqual([])
+    expect(matches('snake_case_var and MY_CONST')).toEqual([])
+    expect(matches('foo__bar__baz')).toEqual([])
+  })
+
+  it('still matches asterisk emphasis intraword', () => {
+    expect(matches('a*b*c')).toEqual(['*b*'])
+    expect(matches('a**bold**c')).toEqual(['**bold**'])
+  })
+
+  it('matches short alphanumeric subscript (H~2~O, CO~2~, X~n~)', () => {
+    expect(matches('H~2~O')).toEqual(['~2~'])
+    expect(matches('CO~2~ levels')).toEqual(['~2~'])
+    expect(matches('the X~n~ term')).toEqual(['~n~'])
+  })
+
+  it('ignores kaomoji-style ~! and ~? punctuation', () => {
+    // Kimi / Qwen / GLM emit these as decorators and the whole span between
+    // two tildes used to get collapsed into one dim blob.
+    expect(matches('Aww ~! Building step by step, I love it ~!')).toEqual([])
+    expect(matches('cool ~? yeah ~?')).toEqual([])
+    expect(matches('mixed ~! and ~? flow')).toEqual([])
+  })
+
+  it('ignores tilde spans that contain spaces or punctuation', () => {
+    // Real subscript doesn't contain spaces; a tilde followed by words-then-
+    // tilde is almost always conversational. Matching it swallows text.
+    expect(matches('hello ~good idea~ there')).toEqual([])
+    expect(matches('x ~oh no!~ y')).toEqual([])
+  })
+
+  it('does not let strikethrough eat subscript', () => {
+    expect(matches('~~strike~~ and H~2~O')).toEqual(['~~strike~~', '~2~'])
+  })
+})
+
+describe('stripInlineMarkup', () => {
+  it('strips word-boundary emphasis only', () => {
+    expect(stripInlineMarkup('say _hi_ there')).toBe('say hi there')
+    expect(stripInlineMarkup('browser_screenshot_ecc.png')).toBe('browser_screenshot_ecc.png')
+    expect(stripInlineMarkup('__bold__ and foo__bar__')).toBe('bold and foo__bar__')
+  })
+
+  it('leaves ~!/~? kaomoji alone and still handles real subscript', () => {
+    expect(stripInlineMarkup('Yay ~! nice work ~!')).toBe('Yay ~! nice work ~!')
+    expect(stripInlineMarkup('H~2~O and CO~2~')).toBe('H_2O and CO_2')
+  })
+})
+
+describe('protocol sentinels', () => {
+  it('captures MEDIA: paths with surrounding quotes or backticks', () => {
+    expect('MEDIA:/tmp/a.png'.match(MEDIA_LINE_RE)?.[1]).toBe('/tmp/a.png')
+    expect('  MEDIA: /home/me/.hermes/cache/screenshots/browser_screenshot_ecc.png  '.match(MEDIA_LINE_RE)?.[1]).toBe(
+      '/home/me/.hermes/cache/screenshots/browser_screenshot_ecc.png'
+    )
+    expect('`MEDIA:/tmp/a.png`'.match(MEDIA_LINE_RE)?.[1]).toBe('/tmp/a.png')
+    expect('"MEDIA:C:\\files\\a.png"'.match(MEDIA_LINE_RE)?.[1]).toBe('C:\\files\\a.png')
+  })
+
+  it('ignores MEDIA: tokens embedded in prose', () => {
+    expect('here is MEDIA:/tmp/a.png for you'.match(MEDIA_LINE_RE)).toBeNull()
+    expect('the media: section is empty'.match(MEDIA_LINE_RE)).toBeNull()
+  })
+
+  it('matches the [[audio_as_voice]] directive', () => {
+    expect(AUDIO_DIRECTIVE_RE.test('[[audio_as_voice]]')).toBe(true)
+    expect(AUDIO_DIRECTIVE_RE.test('  [[audio_as_voice]]  ')).toBe(true)
+    expect(AUDIO_DIRECTIVE_RE.test('audio_as_voice')).toBe(false)
+  })
+})
diff --git a/ui-tui/src/__tests__/osc52.test.ts b/ui-tui/src/__tests__/osc52.test.ts
new file mode 100644
index 0000000000..a1f5242ddb
--- /dev/null
+++ b/ui-tui/src/__tests__/osc52.test.ts
@@ -0,0 +1,67 @@
+import { afterEach, describe, expect, it, vi } from 'vitest'
+
+import {
+  buildOsc52ClipboardQuery,
+  OSC52_CLIPBOARD_QUERY,
+  parseOsc52ClipboardData,
+  readOsc52Clipboard
+} from '../lib/osc52.js'
+
+const envBackup = { ...process.env }
+
+afterEach(() => {
+  process.env = { ...envBackup }
+})
+
+describe('buildOsc52ClipboardQuery', () => {
+  it('returns the raw OSC52 query outside multiplexers', () => {
+    delete process.env.TMUX
+    delete process.env.STY
+
+    expect(buildOsc52ClipboardQuery()).toBe(OSC52_CLIPBOARD_QUERY)
+  })
+
+  it('wraps the query for tmux passthrough', () => {
+    process.env.TMUX = '/tmp/tmux-123/default,1,0'
+
+    expect(buildOsc52ClipboardQuery()).toContain('\x1bPtmux;')
+    expect(buildOsc52ClipboardQuery()).toContain(']52;c;?')
+  })
+})
+
+describe('parseOsc52ClipboardData', () => {
+  it('decodes clipboard payloads', () => {
+    const encoded = Buffer.from('hello from osc52', 'utf8').toString('base64')
+
+    expect(parseOsc52ClipboardData(`c;${encoded}`)).toBe('hello from osc52')
+  })
+
+  it('returns null for empty or query payloads', () => {
+    expect(parseOsc52ClipboardData('c;?')).toBeNull()
+    expect(parseOsc52ClipboardData('c;')).toBeNull()
+  })
+})
+
+describe('readOsc52Clipboard', () => {
+  it('returns decoded text from a terminal OSC52 response', async () => {
+    const send = vi.fn().mockResolvedValue({
+      code: 52,
+      data: `c;${Buffer.from('queried text', 'utf8').toString('base64')}`,
+      type: 'osc'
+    })
+
+    const flush = vi.fn().mockResolvedValue(undefined)
+
+    await expect(readOsc52Clipboard({ flush, send })).resolves.toBe('queried text')
+    expect(send).toHaveBeenCalled()
+    expect(flush).toHaveBeenCalled()
+  })
+
+  it('returns null when the querier is missing or unsupported', async () => {
+    await expect(readOsc52Clipboard(null)).resolves.toBeNull()
+
+    const send = vi.fn().mockResolvedValue(undefined)
+    const flush = vi.fn().mockResolvedValue(undefined)
+    await expect(readOsc52Clipboard({ flush, send })).resolves.toBeNull()
+  })
+})
diff --git a/ui-tui/src/__tests__/platform.test.ts b/ui-tui/src/__tests__/platform.test.ts
new file mode 100644
index 0000000000..1d2f73fe46
--- /dev/null
+++ b/ui-tui/src/__tests__/platform.test.ts
@@ -0,0 +1,32 @@
+import { afterEach, describe, expect, it, vi } from 'vitest'
+
+const originalPlatform = process.platform
+
+async function importPlatform(platform: NodeJS.Platform) {
+  vi.resetModules()
+  Object.defineProperty(process, 'platform', { value: platform })
+
+  return import('../lib/platform.js')
+}
+
+afterEach(() => {
+  Object.defineProperty(process, 'platform', { value: originalPlatform })
+  vi.resetModules()
+})
+
+describe('platform action modifier', () => {
+  it('treats kitty Cmd sequences as the macOS action modifier', async () => {
+    const { isActionMod } = await importPlatform('darwin')
+
+    expect(isActionMod({ ctrl: false, meta: false, super: true })).toBe(true)
+    expect(isActionMod({ ctrl: false, meta: true, super: false })).toBe(true)
+    expect(isActionMod({ ctrl: true, meta: false, super: false })).toBe(false)
+  })
+
+  it('still uses Ctrl as the action modifier on non-macOS', async () => {
+    const { isActionMod } = await importPlatform('linux')
+
+    expect(isActionMod({ ctrl: true, meta: false, super: false })).toBe(true)
+    expect(isActionMod({ ctrl: false, meta: false, super: true })).toBe(false)
+  })
+})
diff --git a/ui-tui/src/__tests__/providers.test.ts b/ui-tui/src/__tests__/providers.test.ts
index a46102e893..2dfd76d022 100644
--- a/ui-tui/src/__tests__/providers.test.ts
+++ b/ui-tui/src/__tests__/providers.test.ts
@@ -4,9 +4,12 @@ import { providerDisplayNames } from '../domain/providers.js'
 
 describe('providerDisplayNames', () => {
   it('returns bare names when all are unique', () => {
-    expect(providerDisplayNames([{ name: 'Anthropic', slug: 'anthropic' }, { name: 'OpenAI', slug: 'openai' }])).toEqual(
-      ['Anthropic', 'OpenAI']
-    )
+    expect(
+      providerDisplayNames([
+        { name: 'Anthropic', slug: 'anthropic' },
+        { name: 'OpenAI', slug: 'openai' }
+      ])
+    ).toEqual(['Anthropic', 'OpenAI'])
   })
 
   it('appends slug to every collision so the disambiguation is symmetric', () => {
diff --git a/ui-tui/src/__tests__/subagentTree.test.ts b/ui-tui/src/__tests__/subagentTree.test.ts
new file mode 100644
index 0000000000..887754ce07
--- /dev/null
+++ b/ui-tui/src/__tests__/subagentTree.test.ts
@@ -0,0 +1,410 @@
+import { describe, expect, it } from 'vitest'
+
+import {
+  buildSubagentTree,
+  descendantIds,
+  flattenTree,
+  fmtCost,
+  fmtDuration,
+  fmtTokens,
+  formatSummary,
+  hotnessBucket,
+  peakHotness,
+  sparkline,
+  topLevelSubagents,
+  treeTotals,
+  widthByDepth
+} from '../lib/subagentTree.js'
+import type { SubagentProgress } from '../types.js'
+
+const makeItem = (overrides: Partial<SubagentProgress> & Pick<SubagentProgress, 'id' | 'index'>): SubagentProgress => ({
+  depth: 0,
+  goal: overrides.id,
+  notes: [],
+  parentId: null,
+  status: 'running',
+  taskCount: 1,
+  thinking: [],
+  toolCount: 0,
+  tools: [],
+  ...overrides
+})
+
+describe('aggregate: tokens, cost, files, hotness', () => {
+  it('sums tokens and cost across subtree', () => {
+    const items = [
+      makeItem({ costUsd: 0.01, id: 'p', index: 0, inputTokens: 1000, outputTokens: 500 }),
+      makeItem({
+        costUsd: 0.005,
+        depth: 1,
+        id: 'c1',
+        index: 0,
+        inputTokens: 500,
+        outputTokens: 100,
+        parentId: 'p'
+      }),
+      makeItem({
+        costUsd: 0.008,
+        depth: 1,
+        id: 'c2',
+        index: 1,
+        inputTokens: 300,
+        outputTokens: 200,
+        parentId: 'p'
+      })
+    ]
+
+    const tree = buildSubagentTree(items)
+    expect(tree[0]!.aggregate).toMatchObject({
+      costUsd: 0.023,
+      inputTokens: 1800,
+      outputTokens: 800
+    })
+  })
+
+  it('counts files read + written across subtree', () => {
+    const items = [
+      makeItem({ filesRead: ['a.ts', 'b.ts'], id: 'p', index: 0 }),
+      makeItem({ depth: 1, filesWritten: ['c.ts'], id: 'c', index: 0, parentId: 'p' })
+    ]
+
+    const tree = buildSubagentTree(items)
+    expect(tree[0]!.aggregate.filesTouched).toBe(3)
+  })
+
+  it('hotness = totalTools / totalDuration', () => {
+    const items = [
+      makeItem({
+        durationSeconds: 10,
+        id: 'p',
+        index: 0,
+        status: 'completed',
+        toolCount: 20
+      })
+    ]
+
+    const tree = buildSubagentTree(items)
+    expect(tree[0]!.aggregate.hotness).toBeCloseTo(2)
+  })
+
+  it('hotness is zero when duration is zero', () => {
+    const items = [makeItem({ id: 'p', index: 0, toolCount: 10 })]
+    const tree = buildSubagentTree(items)
+    expect(tree[0]!.aggregate.hotness).toBe(0)
+  })
+})
+
+describe('hotnessBucket + peakHotness', () => {
+  it('peakHotness walks subtree', () => {
+    const items = [
+      makeItem({ durationSeconds: 100, id: 'p', index: 0, status: 'completed', toolCount: 1 }),
+      makeItem({
+        depth: 1,
+        durationSeconds: 1,
+        id: 'c',
+        index: 0,
+        parentId: 'p',
+        status: 'completed',
+        toolCount: 5
+      })
+    ]
+
+    const tree = buildSubagentTree(items)
+    expect(peakHotness(tree)).toBeGreaterThan(2)
+  })
+
+  it('hotnessBucket clamps and normalizes', () => {
+    expect(hotnessBucket(0, 10, 4)).toBe(0)
+    expect(hotnessBucket(10, 10, 4)).toBe(3)
+    expect(hotnessBucket(5, 10, 4)).toBe(2)
+    expect(hotnessBucket(100, 10, 4)).toBe(3) // clamped
+    expect(hotnessBucket(5, 0, 4)).toBe(0) // guard against divide-by-zero
+  })
+})
+
+describe('fmtCost + fmtTokens', () => {
+  it('fmtCost handles ranges', () => {
+    expect(fmtCost(0)).toBe('')
+    expect(fmtCost(0.001)).toBe('<$0.01')
+    expect(fmtCost(0.42)).toBe('$0.42')
+    expect(fmtCost(1.23)).toBe('$1.23')
+    expect(fmtCost(12.5)).toBe('$12.5')
+  })
+
+  it('fmtTokens handles ranges', () => {
+    expect(fmtTokens(0)).toBe('0')
+    expect(fmtTokens(542)).toBe('542')
+    expect(fmtTokens(1234)).toBe('1.2k')
+    expect(fmtTokens(45678)).toBe('46k')
+  })
+})
+
+describe('formatSummary with tokens + cost', () => {
+  it('includes token + cost when present', () => {
+    expect(
+      formatSummary({
+        activeCount: 0,
+        costUsd: 0.42,
+        descendantCount: 3,
+        filesTouched: 0,
+        hotness: 0,
+        inputTokens: 8000,
+        maxDepthFromHere: 2,
+        outputTokens: 2000,
+        totalDuration: 30,
+        totalTools: 14
+      })
+    ).toBe('d2 · 3 agents · 14 tools · 30s · 10k tok · $0.42')
+  })
+})
+
+describe('buildSubagentTree', () => {
+  it('returns empty list for empty input', () => {
+    expect(buildSubagentTree([])).toEqual([])
+  })
+
+  it('treats flat list as top-level when no parentId is given', () => {
+    const items = [makeItem({ id: 'a', index: 0 }), makeItem({ id: 'b', index: 1 }), makeItem({ id: 'c', index: 2 })]
+
+    const tree = buildSubagentTree(items)
+    expect(tree).toHaveLength(3)
+    expect(tree.map(n => n.item.id)).toEqual(['a', 'b', 'c'])
+    expect(tree.every(n => n.children.length === 0)).toBe(true)
+  })
+
+  it('nests children under their parent by subagent_id', () => {
+    const items = [
+      makeItem({ id: 'parent', index: 0 }),
+      makeItem({ depth: 1, id: 'child-1', index: 0, parentId: 'parent' }),
+      makeItem({ depth: 1, id: 'child-2', index: 1, parentId: 'parent' })
+    ]
+
+    const tree = buildSubagentTree(items)
+    expect(tree).toHaveLength(1)
+    expect(tree[0]!.children).toHaveLength(2)
+    expect(tree[0]!.children.map(n => n.item.id)).toEqual(['child-1', 'child-2'])
+  })
+
+  it('builds multi-level nesting', () => {
+    const items = [
+      makeItem({ id: 'p', index: 0 }),
+      makeItem({ depth: 1, id: 'c', index: 0, parentId: 'p' }),
+      makeItem({ depth: 2, id: 'gc', index: 0, parentId: 'c' })
+    ]
+
+    const tree = buildSubagentTree(items)
+    expect(tree[0]!.children[0]!.children[0]!.item.id).toBe('gc')
+    expect(tree[0]!.aggregate.maxDepthFromHere).toBe(2)
+    expect(tree[0]!.aggregate.descendantCount).toBe(2)
+  })
+
+  it('promotes orphaned children (missing parent) to top level', () => {
+    const items = [makeItem({ id: 'a', index: 0 }), makeItem({ depth: 1, id: 'orphan', index: 1, parentId: 'ghost' })]
+
+    const tree = buildSubagentTree(items)
+    expect(tree).toHaveLength(2)
+    expect(tree.map(n => n.item.id)).toEqual(['a', 'orphan'])
+  })
+
+  it('stable sort: children ordered by (depth, index) not insert order', () => {
+    const items = [
+      makeItem({ id: 'p', index: 0 }),
+      makeItem({ depth: 1, id: 'c3', index: 2, parentId: 'p' }),
+      makeItem({ depth: 1, id: 'c1', index: 0, parentId: 'p' }),
+      makeItem({ depth: 1, id: 'c2', index: 1, parentId: 'p' })
+    ]
+
+    const tree = buildSubagentTree(items)
+    expect(tree[0]!.children.map(n => n.item.id)).toEqual(['c1', 'c2', 'c3'])
+  })
+})
+
+describe('aggregate', () => {
+  it('sums tool counts and durations across subtree', () => {
+    const items = [
+      makeItem({ durationSeconds: 10, id: 'p', index: 0, status: 'completed', toolCount: 5 }),
+      makeItem({ depth: 1, durationSeconds: 4, id: 'c1', index: 0, parentId: 'p', status: 'completed', toolCount: 3 }),
+      makeItem({ depth: 1, durationSeconds: 2, id: 'c2', index: 1, parentId: 'p', status: 'completed', toolCount: 1 })
+    ]
+
+    const tree = buildSubagentTree(items)
+    expect(tree[0]!.aggregate).toMatchObject({
+      activeCount: 0,
+      descendantCount: 2,
+      totalDuration: 16,
+      totalTools: 9
+    })
+  })
+
+  it('counts queued + running as active', () => {
+    const items = [
+      makeItem({ id: 'p', index: 0, status: 'running' }),
+      makeItem({ depth: 1, id: 'c1', index: 0, parentId: 'p', status: 'queued' }),
+      makeItem({ depth: 1, id: 'c2', index: 1, parentId: 'p', status: 'completed' })
+    ]
+
+    const tree = buildSubagentTree(items)
+    expect(tree[0]!.aggregate.activeCount).toBe(2)
+  })
+})
+
+describe('widthByDepth', () => {
+  it('returns empty array for empty tree', () => {
+    expect(widthByDepth([])).toEqual([])
+  })
+
+  it('tallies nodes at each depth', () => {
+    const items = [
+      makeItem({ id: 'p1', index: 0 }),
+      makeItem({ id: 'p2', index: 1 }),
+      makeItem({ depth: 1, id: 'c1', index: 0, parentId: 'p1' }),
+      makeItem({ depth: 1, id: 'c2', index: 1, parentId: 'p1' }),
+      makeItem({ depth: 1, id: 'c3', index: 0, parentId: 'p2' }),
+      makeItem({ depth: 2, id: 'gc1', index: 0, parentId: 'c1' })
+    ]
+
+    expect(widthByDepth(buildSubagentTree(items))).toEqual([2, 3, 1])
+  })
+})
+
+describe('treeTotals', () => {
+  it('folds a full tree into a single rollup', () => {
+    const items = [
+      makeItem({ id: 'p1', index: 0, toolCount: 5 }),
+      makeItem({ id: 'p2', index: 1, toolCount: 2 }),
+      makeItem({ depth: 1, id: 'c', index: 0, parentId: 'p1', toolCount: 3 })
+    ]
+
+    const totals = treeTotals(buildSubagentTree(items))
+    expect(totals.descendantCount).toBe(3)
+    expect(totals.totalTools).toBe(10)
+    expect(totals.maxDepthFromHere).toBe(2)
+  })
+
+  it('returns zeros for empty tree', () => {
+    expect(treeTotals([])).toEqual({
+      activeCount: 0,
+      costUsd: 0,
+      descendantCount: 0,
+      filesTouched: 0,
+      hotness: 0,
+      inputTokens: 0,
+      maxDepthFromHere: 0,
+      outputTokens: 0,
+      totalDuration: 0,
+      totalTools: 0
+    })
+  })
+})
+
+describe('flattenTree + descendantIds', () => {
+  const items = [
+    makeItem({ id: 'p', index: 0 }),
+    makeItem({ depth: 1, id: 'c1', index: 0, parentId: 'p' }),
+    makeItem({ depth: 2, id: 'gc', index: 0, parentId: 'c1' }),
+    makeItem({ depth: 1, id: 'c2', index: 1, parentId: 'p' })
+  ]
+
+  it('flattens in visit order (depth-first, pre-order)', () => {
+    const tree = buildSubagentTree(items)
+    expect(flattenTree(tree).map(n => n.item.id)).toEqual(['p', 'c1', 'gc', 'c2'])
+  })
+
+  it('collects descendant ids excluding the node itself', () => {
+    const tree = buildSubagentTree(items)
+    expect(descendantIds(tree[0]!)).toEqual(['c1', 'gc', 'c2'])
+  })
+})
+
+describe('sparkline', () => {
+  it('returns empty string for empty input', () => {
+    expect(sparkline([])).toBe('')
+  })
+
+  it('renders zeroes as spaces (not bottom glyph)', () => {
+    expect(sparkline([0, 0])).toBe('  ')
+  })
+
+  it('scales to the max value', () => {
+    const out = sparkline([1, 8])
+    expect(out).toHaveLength(2)
+    expect(out[1]).toBe('█')
+  })
+
+  it('sparse widths render as expected', () => {
+    const out = sparkline([2, 3, 7, 4])
+    expect(out).toHaveLength(4)
+    expect([...out].every(ch => /[\s▁-█]/.test(ch))).toBe(true)
+  })
+})
+
+describe('formatSummary', () => {
+  const emptyTotals = {
+    activeCount: 0,
+    costUsd: 0,
+    descendantCount: 0,
+    filesTouched: 0,
+    hotness: 0,
+    inputTokens: 0,
+    maxDepthFromHere: 0,
+    outputTokens: 0,
+    totalDuration: 0,
+    totalTools: 0
+  }
+
+  it('collapses zero-valued components', () => {
+    expect(formatSummary({ ...emptyTotals, descendantCount: 1 })).toBe('d0 · 1 agent')
+  })
+
+  it('emits rich summary with all pieces', () => {
+    expect(
+      formatSummary({
+        ...emptyTotals,
+        activeCount: 2,
+        descendantCount: 7,
+        maxDepthFromHere: 3,
+        totalDuration: 134,
+        totalTools: 124
+      })
+    ).toBe('d3 · 7 agents · 124 tools · 2m 14s · ⚡2')
+  })
+})
+
+describe('fmtDuration', () => {
+  it('formats under a minute as plain seconds', () => {
+    expect(fmtDuration(0)).toBe('0s')
+    expect(fmtDuration(42)).toBe('42s')
+    expect(fmtDuration(59.4)).toBe('59s')
+  })
+
+  it('formats whole minutes without trailing seconds', () => {
+    expect(fmtDuration(60)).toBe('1m')
+    expect(fmtDuration(180)).toBe('3m')
+  })
+
+  it('mixes minutes and seconds', () => {
+    expect(fmtDuration(134)).toBe('2m 14s')
+    expect(fmtDuration(605)).toBe('10m 5s')
+  })
+})
+
+describe('topLevelSubagents', () => {
+  it('returns items with no parent', () => {
+    const items = [makeItem({ id: 'a', index: 0 }), makeItem({ id: 'b', index: 1 })]
+    expect(topLevelSubagents(items).map(s => s.id)).toEqual(['a', 'b'])
+  })
+
+  it('excludes children whose parent is present', () => {
+    const items = [
+      makeItem({ id: 'p', index: 0 }),
+      makeItem({ depth: 1, id: 'c', index: 0, parentId: 'p' })
+    ]
+
+    expect(topLevelSubagents(items).map(s => s.id)).toEqual(['p'])
+  })
+
+  it('promotes orphans whose parent is missing', () => {
+    const items = [makeItem({ id: 'a', index: 0 }), makeItem({ depth: 1, id: 'orphan', index: 1, parentId: 'ghost' })]
+    expect(topLevelSubagents(items).map(s => s.id)).toEqual(['a', 'orphan'])
+  })
+})
diff --git a/ui-tui/src/__tests__/terminalParity.test.ts b/ui-tui/src/__tests__/terminalParity.test.ts
new file mode 100644
index 0000000000..0054343968
--- /dev/null
+++ b/ui-tui/src/__tests__/terminalParity.test.ts
@@ -0,0 +1,71 @@
+import { describe, expect, it, vi } from 'vitest'
+
+import { terminalParityHints } from '../lib/terminalParity.js'
+
+describe('terminalParityHints', () => {
+  it('warns for Apple Terminal and SSH/tmux sessions', async () => {
+    const hints = await terminalParityHints({
+      TERM_PROGRAM: 'Apple_Terminal',
+      TERM_SESSION_ID: 'w0t0p0:123',
+      SSH_CONNECTION: '1',
+      TMUX: '/tmp/tmux-1/default,1,0'
+    } as NodeJS.ProcessEnv)
+
+    expect(hints.map(h => h.key)).toEqual(expect.arrayContaining(['apple-terminal', 'remote', 'tmux']))
+  })
+
+  it('suggests IDE setup only for VS Code-family terminals that still need bindings', async () => {
+    const readFile = vi.fn().mockRejectedValue(Object.assign(new Error('missing'), { code: 'ENOENT' }))
+
+    const hints = await terminalParityHints({ TERM_PROGRAM: 'vscode' } as NodeJS.ProcessEnv, {
+      fileOps: { readFile },
+      homeDir: '/tmp/fake-home'
+    })
+
+    expect(hints.some(h => h.key === 'ide-setup')).toBe(true)
+  })
+
+  it('suppresses IDE setup hint when keybindings are already configured', async () => {
+    const readFile = vi.fn().mockResolvedValue(
+      JSON.stringify([
+        {
+          key: 'shift+enter',
+          command: 'workbench.action.terminal.sendSequence',
+          when: 'terminalFocus',
+          args: { text: '\\\r\n' }
+        },
+        {
+          key: 'ctrl+enter',
+          command: 'workbench.action.terminal.sendSequence',
+          when: 'terminalFocus',
+          args: { text: '\\\r\n' }
+        },
+        {
+          key: 'cmd+enter',
+          command: 'workbench.action.terminal.sendSequence',
+          when: 'terminalFocus',
+          args: { text: '\\\r\n' }
+        },
+        {
+          key: 'cmd+z',
+          command: 'workbench.action.terminal.sendSequence',
+          when: 'terminalFocus',
+          args: { text: '\u001b[122;9u' }
+        },
+        {
+          key: 'shift+cmd+z',
+          command: 'workbench.action.terminal.sendSequence',
+          when: 'terminalFocus',
+          args: { text: '\u001b[122;10u' }
+        }
+      ])
+    )
+
+    const hints = await terminalParityHints({ TERM_PROGRAM: 'vscode' } as NodeJS.ProcessEnv, {
+      fileOps: { readFile },
+      homeDir: '/tmp/fake-home'
+    })
+
+    expect(hints.some(h => h.key === 'ide-setup')).toBe(false)
+  })
+})
diff --git a/ui-tui/src/__tests__/terminalSetup.test.ts b/ui-tui/src/__tests__/terminalSetup.test.ts
new file mode 100644
index 0000000000..de23176f26
--- /dev/null
+++ b/ui-tui/src/__tests__/terminalSetup.test.ts
@@ -0,0 +1,237 @@
+import { describe, expect, it, vi } from 'vitest'
+
+import {
+  configureDetectedTerminalKeybindings,
+  configureTerminalKeybindings,
+  detectVSCodeLikeTerminal,
+  getVSCodeStyleConfigDir,
+  shouldPromptForTerminalSetup,
+  stripJsonComments
+} from '../lib/terminalSetup.js'
+
+describe('terminalSetup helpers', () => {
+  it('detects VS Code family terminals from environment', () => {
+    expect(detectVSCodeLikeTerminal({ CURSOR_TRACE_ID: 'x' } as NodeJS.ProcessEnv)).toBe('cursor')
+    expect(detectVSCodeLikeTerminal({ VSCODE_GIT_ASKPASS_MAIN: '/tmp/windsurf' } as NodeJS.ProcessEnv)).toBe('windsurf')
+    expect(detectVSCodeLikeTerminal({ TERM_PROGRAM: 'vscode' } as NodeJS.ProcessEnv)).toBe('vscode')
+    expect(detectVSCodeLikeTerminal({} as NodeJS.ProcessEnv)).toBeNull()
+  })
+
+  it('computes VS Code style config dirs cross-platform', () => {
+    expect(getVSCodeStyleConfigDir('Code', 'darwin', {} as NodeJS.ProcessEnv, '/home/me')).toBe(
+      '/home/me/Library/Application Support/Code/User'
+    )
+    expect(getVSCodeStyleConfigDir('Code', 'linux', {} as NodeJS.ProcessEnv, '/home/me')).toBe(
+      '/home/me/.config/Code/User'
+    )
+    expect(
+      getVSCodeStyleConfigDir(
+        'Code',
+        'win32',
+        { APPDATA: 'C:/Users/me/AppData/Roaming' } as NodeJS.ProcessEnv,
+        '/home/me'
+      )
+    ).toBe('C:/Users/me/AppData/Roaming/Code/User')
+  })
+
+  it('strips line comments from keybindings JSON', () => {
+    expect(stripJsonComments('// comment\n[{"key":"shift+enter"}]')).toBe('\n[{"key":"shift+enter"}]')
+  })
+
+  it('strips inline comments and block comments', () => {
+    expect(stripJsonComments('[{"key":"a"} // inline\n]')).toBe('[{"key":"a"} \n]')
+    expect(stripJsonComments('[/* block */{"key":"a"}]')).toBe('[{"key":"a"}]')
+  })
+
+  it('removes trailing commas before ] or }', () => {
+    expect(JSON.parse(stripJsonComments('[{"key":"a"},]'))).toEqual([{ key: 'a' }])
+    expect(JSON.parse(stripJsonComments('[{"key":"a",}]'))).toEqual([{ key: 'a' }])
+  })
+
+  it('preserves comment-like sequences inside strings', () => {
+    const input = '[{"key":"a","args":{"text":"// not a comment"}}]'
+    expect(JSON.parse(stripJsonComments(input))).toEqual([{ key: 'a', args: { text: '// not a comment' } }])
+  })
+
+  it('handles unterminated block comments gracefully', () => {
+    const input = '[{"key":"a"} /* never closed'
+    const stripped = stripJsonComments(input)
+    // The unterminated comment is consumed to end-of-file; the remainder is parseable
+    expect(stripped).toBe('[{"key":"a"} ')
+  })
+})
+
+describe('configureTerminalKeybindings', () => {
+  it('writes missing bindings into a VS Code style keybindings file', async () => {
+    const mkdir = vi.fn().mockResolvedValue(undefined)
+    const readFile = vi.fn().mockRejectedValue(Object.assign(new Error('missing'), { code: 'ENOENT' }))
+    const writeFile = vi.fn().mockResolvedValue(undefined)
+    const copyFile = vi.fn().mockResolvedValue(undefined)
+
+    const result = await configureTerminalKeybindings('vscode', {
+      fileOps: { copyFile, mkdir, readFile, writeFile },
+      homeDir: '/Users/me',
+      platform: 'darwin'
+    })
+
+    expect(result.success).toBe(true)
+    expect(result.requiresRestart).toBe(true)
+    expect(writeFile).toHaveBeenCalledTimes(1)
+    expect(copyFile).not.toHaveBeenCalled() // no existing file to back up
+    const written = writeFile.mock.calls[0]?.[1] as string
+    expect(written).toContain('shift+enter')
+    expect(written).toContain('cmd+enter')
+    expect(written).toContain('cmd+z')
+  })
+
+  it('reports conflicts without overwriting existing bindings', async () => {
+    const mkdir = vi.fn().mockResolvedValue(undefined)
+
+    const readFile = vi.fn().mockResolvedValue(
+      JSON.stringify([
+        {
+          key: 'cmd+z',
+          command: 'something.else',
+          when: 'terminalFocus',
+          args: { text: 'noop' }
+        }
+      ])
+    )
+
+    const writeFile = vi.fn().mockResolvedValue(undefined)
+    const copyFile = vi.fn().mockResolvedValue(undefined)
+
+    const result = await configureTerminalKeybindings('cursor', {
+      fileOps: { copyFile, mkdir, readFile, writeFile },
+      homeDir: '/Users/me',
+      platform: 'darwin'
+    })
+
+    expect(result.success).toBe(false)
+    expect(result.message).toContain('cmd+z')
+    expect(writeFile).not.toHaveBeenCalled()
+    expect(copyFile).not.toHaveBeenCalled() // no backup when not writing
+  })
+
+  it('backs up existing keybindings.json only when writing changes', async () => {
+    const mkdir = vi.fn().mockResolvedValue(undefined)
+    const readFile = vi.fn().mockResolvedValue(JSON.stringify([]))
+    const writeFile = vi.fn().mockResolvedValue(undefined)
+    const copyFile = vi.fn().mockResolvedValue(undefined)
+
+    const result = await configureTerminalKeybindings('vscode', {
+      fileOps: { copyFile, mkdir, readFile, writeFile },
+      homeDir: '/Users/me',
+      platform: 'darwin'
+    })
+
+    expect(result.success).toBe(true)
+    expect(writeFile).toHaveBeenCalledTimes(1)
+    expect(copyFile).toHaveBeenCalledTimes(1) // backup created before writing
+  })
+
+  it('reports error when keybindings.json is not readable (EACCES)', async () => {
+    const mkdir = vi.fn().mockResolvedValue(undefined)
+    const readFile = vi.fn().mockRejectedValue(Object.assign(new Error('permission denied'), { code: 'EACCES' }))
+    const writeFile = vi.fn().mockResolvedValue(undefined)
+    const copyFile = vi.fn().mockResolvedValue(undefined)
+
+    const result = await configureTerminalKeybindings('vscode', {
+      fileOps: { copyFile, mkdir, readFile, writeFile },
+      homeDir: '/Users/me',
+      platform: 'darwin'
+    })
+
+    expect(result.success).toBe(false)
+    expect(result.message).toContain('Failed to read')
+    expect(writeFile).not.toHaveBeenCalled()
+  })
+
+  it('auto-detects the current IDE terminal', async () => {
+    const mkdir = vi.fn().mockResolvedValue(undefined)
+    const readFile = vi.fn().mockRejectedValue(Object.assign(new Error('missing'), { code: 'ENOENT' }))
+    const writeFile = vi.fn().mockResolvedValue(undefined)
+    const copyFile = vi.fn().mockResolvedValue(undefined)
+
+    const result = await configureDetectedTerminalKeybindings({
+      env: { CURSOR_TRACE_ID: 'trace' } as NodeJS.ProcessEnv,
+      fileOps: { copyFile, mkdir, readFile, writeFile },
+      homeDir: '/Users/me',
+      platform: 'darwin'
+    })
+
+    expect(result.success).toBe(true)
+    expect(writeFile).toHaveBeenCalled()
+  })
+
+  it('refuses to configure IDE bindings from an SSH session', async () => {
+    const result = await configureDetectedTerminalKeybindings({
+      env: { SSH_CONNECTION: '1 2 3 4', TERM_PROGRAM: 'vscode' } as NodeJS.ProcessEnv,
+      homeDir: '/Users/me',
+      platform: 'darwin'
+    })
+
+    expect(result.success).toBe(false)
+    expect(result.message).toContain('local machine')
+  })
+
+  it('prompts for setup when bindings are missing and suppresses prompt when complete', async () => {
+    const readMissing = vi.fn().mockRejectedValue(Object.assign(new Error('missing'), { code: 'ENOENT' }))
+    await expect(
+      shouldPromptForTerminalSetup({
+        env: { TERM_PROGRAM: 'vscode' } as NodeJS.ProcessEnv,
+        fileOps: { readFile: readMissing }
+      })
+    ).resolves.toBe(true)
+
+    const readComplete = vi.fn().mockResolvedValue(
+      JSON.stringify([
+        {
+          key: 'shift+enter',
+          command: 'workbench.action.terminal.sendSequence',
+          when: 'terminalFocus',
+          args: { text: '\\\r\n' }
+        },
+        {
+          key: 'ctrl+enter',
+          command: 'workbench.action.terminal.sendSequence',
+          when: 'terminalFocus',
+          args: { text: '\\\r\n' }
+        },
+        {
+          key: 'cmd+enter',
+          command: 'workbench.action.terminal.sendSequence',
+          when: 'terminalFocus',
+          args: { text: '\\\r\n' }
+        },
+        {
+          key: 'cmd+z',
+          command: 'workbench.action.terminal.sendSequence',
+          when: 'terminalFocus',
+          args: { text: '\u001b[122;9u' }
+        },
+        {
+          key: 'shift+cmd+z',
+          command: 'workbench.action.terminal.sendSequence',
+          when: 'terminalFocus',
+          args: { text: '\u001b[122;10u' }
+        }
+      ])
+    )
+
+    await expect(
+      shouldPromptForTerminalSetup({
+        env: { TERM_PROGRAM: 'vscode' } as NodeJS.ProcessEnv,
+        fileOps: { readFile: readComplete }
+      })
+    ).resolves.toBe(false)
+  })
+
+  it('suppresses terminal setup prompts inside SSH sessions', async () => {
+    await expect(
+      shouldPromptForTerminalSetup({
+        env: { SSH_CONNECTION: '1 2 3 4', TERM_PROGRAM: 'vscode' } as NodeJS.ProcessEnv
+      })
+    ).resolves.toBe(false)
+  })
+})
diff --git a/ui-tui/src/__tests__/text.test.ts b/ui-tui/src/__tests__/text.test.ts
index 0a11e3cc06..d4a2469e8f 100644
--- a/ui-tui/src/__tests__/text.test.ts
+++ b/ui-tui/src/__tests__/text.test.ts
@@ -97,4 +97,12 @@ describe('estimateRows', () => {
 
     expect(estimateRows(md, 40)).toBe(2)
   })
+
+  it('keeps intraword underscores when sizing snake_case identifiers', () => {
+    const w = 80
+    const snake = 'look at test_case_with_underscores now'
+    const plain = 'look at test case with underscores now'
+
+    expect(estimateRows(snake, w)).toBe(estimateRows(plain, w))
+  })
 })
diff --git a/ui-tui/src/__tests__/textInputLineNav.test.ts b/ui-tui/src/__tests__/textInputLineNav.test.ts
new file mode 100644
index 0000000000..56b3772a9f
--- /dev/null
+++ b/ui-tui/src/__tests__/textInputLineNav.test.ts
@@ -0,0 +1,55 @@
+import { describe, expect, it } from 'vitest'
+
+import { lineNav } from '../components/textInput.js'
+
+describe('lineNav', () => {
+  it('returns null for single-line input (up)', () => {
+    expect(lineNav('hello world', 6, -1)).toBeNull()
+  })
+
+  it('returns null for single-line input (down)', () => {
+    expect(lineNav('hello world', 6, 1)).toBeNull()
+  })
+
+  it('returns null when cursor already on first line of a multiline block', () => {
+    expect(lineNav('one\ntwo\nthree', 2, -1)).toBeNull()
+  })
+
+  it('returns null when cursor on last line of a multiline block', () => {
+    expect(lineNav('one\ntwo\nthree', 10, 1)).toBeNull()
+  })
+
+  it('moves cursor up one line preserving column', () => {
+    // "hello\nworld" — cursor at col 3 of line 1 ('l' in world) → col 3 of line 0 ('l' in hello)
+    expect(lineNav('hello\nworld', 9, -1)).toBe(3)
+  })
+
+  it('moves cursor down one line preserving column', () => {
+    // cursor at col 2 of line 0 → col 2 of line 1
+    expect(lineNav('hello\nworld', 2, 1)).toBe(8)
+  })
+
+  it('clamps to end of shorter destination line on up', () => {
+    // col 10 on long line → clamp to end of short line "abc"
+    const s = 'abc\nlong long text'
+    const from = 14
+
+    expect(lineNav(s, from, -1)).toBe(3)
+  })
+
+  it('clamps to end of shorter destination line on down', () => {
+    // col 10 on line 0 → clamp to end of "abc" on line 1
+    const s = 'long long text\nabc'
+
+    expect(lineNav(s, 10, 1)).toBe(18)
+  })
+
+  it('handles empty lines correctly', () => {
+    // "a\n\nb" — cursor at line 2 (b) → up to empty line 1
+    expect(lineNav('a\n\nb', 3, -1)).toBe(2)
+  })
+
+  it('handles leading newline without crashing', () => {
+    expect(lineNav('\nfoo', 2, -1)).toBe(0)
+  })
+})
diff --git a/ui-tui/src/__tests__/theme.test.ts b/ui-tui/src/__tests__/theme.test.ts
index 4fe165c8d5..db2b1eac38 100644
--- a/ui-tui/src/__tests__/theme.test.ts
+++ b/ui-tui/src/__tests__/theme.test.ts
@@ -1,6 +1,6 @@
 import { describe, expect, it } from 'vitest'
 
-import { DARK_THEME, DEFAULT_THEME, fromSkin, LIGHT_THEME } from '../theme.js'
+import { DARK_THEME, DEFAULT_THEME, detectLightMode, fromSkin, LIGHT_THEME } from '../theme.js'
 
 describe('DEFAULT_THEME', () => {
   it('has brand defaults', () => {
@@ -30,11 +30,37 @@ describe('LIGHT_THEME', () => {
 })
 
 describe('DEFAULT_THEME aliasing', () => {
-  it('defaults to DARK_THEME when HERMES_TUI_LIGHT is unset', () => {
+  it('defaults to DARK_THEME when nothing signals light', () => {
     expect(DEFAULT_THEME).toBe(DARK_THEME)
   })
 })
 
+describe('detectLightMode', () => {
+  it('returns false on empty env', () => {
+    expect(detectLightMode({})).toBe(false)
+  })
+
+  it('honors HERMES_TUI_LIGHT on/off', () => {
+    expect(detectLightMode({ HERMES_TUI_LIGHT: '1' })).toBe(true)
+    expect(detectLightMode({ HERMES_TUI_LIGHT: 'true' })).toBe(true)
+    expect(detectLightMode({ HERMES_TUI_LIGHT: 'on' })).toBe(true)
+    expect(detectLightMode({ HERMES_TUI_LIGHT: '0' })).toBe(false)
+    expect(detectLightMode({ HERMES_TUI_LIGHT: 'off' })).toBe(false)
+  })
+
+  it('sniffs COLORFGBG bg slots 7 and 15 as light (#11300)', () => {
+    expect(detectLightMode({ COLORFGBG: '0;15' })).toBe(true)
+    expect(detectLightMode({ COLORFGBG: '0;default;15' })).toBe(true)
+    expect(detectLightMode({ COLORFGBG: '0;7' })).toBe(true)
+    expect(detectLightMode({ COLORFGBG: '15;0' })).toBe(false)
+    expect(detectLightMode({ COLORFGBG: '7;default;0' })).toBe(false)
+  })
+
+  it('lets HERMES_TUI_LIGHT=0 override a light COLORFGBG', () => {
+    expect(detectLightMode({ COLORFGBG: '0;15', HERMES_TUI_LIGHT: '0' })).toBe(false)
+  })
+})
+
 describe('fromSkin', () => {
   it('overrides banner colors', () => {
     expect(fromSkin({ banner_title: '#FF0000' }, {}).color.gold).toBe('#FF0000')
diff --git a/ui-tui/src/__tests__/useComposerState.test.ts b/ui-tui/src/__tests__/useComposerState.test.ts
new file mode 100644
index 0000000000..ff446153a6
--- /dev/null
+++ b/ui-tui/src/__tests__/useComposerState.test.ts
@@ -0,0 +1,59 @@
+import { describe, expect, it } from 'vitest'
+
+import { looksLikeDroppedPath } from '../app/useComposerState.js'
+
+describe('looksLikeDroppedPath', () => {
+  it('recognizes macOS screenshot temp paths and file URIs', () => {
+    expect(looksLikeDroppedPath('/var/folders/x/T/TemporaryItems/Screenshot\\ 2026-04-21\\ at\\ 1.04.43 PM.png')).toBe(
+      true
+    )
+    expect(
+      looksLikeDroppedPath('file:///var/folders/x/T/TemporaryItems/Screenshot%202026-04-21%20at%201.04.43%20PM.png')
+    ).toBe(true)
+  })
+
+  it('rejects normal multiline or plain text paste', () => {
+    expect(looksLikeDroppedPath('hello world')).toBe(false)
+    expect(looksLikeDroppedPath('line one\nline two')).toBe(false)
+  })
+
+  it('recognizes common image file extensions', () => {
+    expect(looksLikeDroppedPath('/Users/me/Desktop/photo.jpg')).toBe(true)
+    expect(looksLikeDroppedPath('/Users/me/Desktop/diagram.png')).toBe(true)
+    expect(looksLikeDroppedPath('/tmp/capture.webp')).toBe(true)
+    expect(looksLikeDroppedPath('/tmp/image.gif')).toBe(true)
+  })
+
+  it('recognizes file:// URIs with various extensions', () => {
+    expect(looksLikeDroppedPath('file:///home/user/doc.pdf')).toBe(true)
+    expect(looksLikeDroppedPath('file:///tmp/screenshot.png')).toBe(true)
+  })
+
+  it('recognizes paths with spaces (not backslash-escaped)', () => {
+    expect(looksLikeDroppedPath('/var/folders/x/T/TemporaryItems/Screenshot 2026-04-21 at 1.04.43 PM.png')).toBe(true)
+  })
+
+  it('rejects empty/whitespace-only input', () => {
+    expect(looksLikeDroppedPath('')).toBe(false)
+    expect(looksLikeDroppedPath('   ')).toBe(false)
+    expect(looksLikeDroppedPath('\n')).toBe(false)
+  })
+
+  it('rejects URLs that are not file:// URIs', () => {
+    expect(looksLikeDroppedPath('https://example.com/image.png')).toBe(false)
+    expect(looksLikeDroppedPath('http://localhost/file.pdf')).toBe(false)
+  })
+
+  it('rejects short slash-like strings without path structure', () => {
+    // No second '/' or '.' → not a plausible file path
+    expect(looksLikeDroppedPath('/help')).toBe(false)
+    expect(looksLikeDroppedPath('/model sonnet')).toBe(false)
+    expect(looksLikeDroppedPath('/api')).toBe(false)
+  })
+
+  it('accepts absolute paths with directory separators or extensions', () => {
+    expect(looksLikeDroppedPath('/usr/bin/test')).toBe(true)
+    expect(looksLikeDroppedPath('/tmp/file.txt')).toBe(true)
+    expect(looksLikeDroppedPath('/etc/hosts')).toBe(true) // has second /
+  })
+})
diff --git a/ui-tui/src/app/createGatewayEventHandler.ts b/ui-tui/src/app/createGatewayEventHandler.ts
index 699a3794de..1ec123f11a 100644
--- a/ui-tui/src/app/createGatewayEventHandler.ts
+++ b/ui-tui/src/app/createGatewayEventHandler.ts
@@ -1,17 +1,18 @@
 import { STREAM_BATCH_MS } from '../config/timing.js'
 import { buildSetupRequiredSections, SETUP_REQUIRED_TITLE } from '../content/setup.js'
-import type { CommandsCatalogResponse, GatewayEvent, GatewaySkin } from '../gatewayTypes.js'
+import type { CommandsCatalogResponse, DelegationStatusResponse, GatewayEvent, GatewaySkin } from '../gatewayTypes.js'
 import { rpcErrorMessage } from '../lib/rpc.js'
-import { formatToolCall } from '../lib/text.js'
+import { topLevelSubagents } from '../lib/subagentTree.js'
+import { formatToolCall, stripAnsi } from '../lib/text.js'
 import { fromSkin } from '../theme.js'
 import type { Msg, SubagentProgress } from '../types.js'
 
+import { applyDelegationStatus, getDelegationState } from './delegationStore.js'
 import type { GatewayEventHandlerContext } from './interfaces.js'
 import { patchOverlayState } from './overlayStore.js'
 import { turnController } from './turnController.js'
 import { getUiState, patchUiState } from './uiStore.js'
 
-const ERRLIKE_RE = /\b(error|traceback|exception|failed|spawn)\b/i
 const NO_PROVIDER_RE = /\bNo (?:LLM|inference) provider configured\b/i
 
 const statusFromBusy = () => (getUiState().busy ? 'running…' : 'ready')
@@ -46,7 +47,6 @@ const pushNote = pushUnique(6)
 const pushTool = pushUnique(8)
 
 export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev: GatewayEvent) => void {
-  const { dequeue, queueEditRef, sendQueued } = ctx.composer
   const { rpc } = ctx.gateway
   const { STARTUP_RESUME_ID, newSession, resumeById, setCatalog } = ctx.session
   const { bellOnComplete, stdout, sys } = ctx.system
@@ -55,6 +55,55 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
   let pendingThinkingStatus = ''
   let thinkingStatusTimer: null | ReturnType<typeof setTimeout> = null
 
+  // Inject the disk-save callback into turnController so recordMessageComplete
+  // can fire-and-forget a persist without having to plumb a gateway ref around.
+  turnController.persistSpawnTree = async (subagents, sessionId) => {
+    try {
+      const startedAt = subagents.reduce<number>((min, s) => {
+        if (!s.startedAt) {
+          return min
+        }
+
+        return min === 0 ? s.startedAt : Math.min(min, s.startedAt)
+      }, 0)
+
+      const top = topLevelSubagents(subagents)
+        .map(s => s.goal)
+        .filter(Boolean)
+        .slice(0, 2)
+
+      const label = top.length ? top.join(' · ') : `${subagents.length} subagents`
+
+      await rpc('spawn_tree.save', {
+        finished_at: Date.now() / 1000,
+        label: label.slice(0, 120),
+        session_id: sessionId ?? 'default',
+        started_at: startedAt ? startedAt / 1000 : null,
+        subagents
+      })
+    } catch {
+      // Persistence is best-effort; in-memory history is the authoritative
+      // same-session source.  A write failure doesn't block the turn.
+    }
+  }
+
+  // Refresh delegation caps at most every 5s so the status bar HUD can
+  // render a /warning close to the configured cap without spamming the RPC.
+  let lastDelegationFetchAt = 0
+
+  const refreshDelegationStatus = (force = false) => {
+    const now = Date.now()
+
+    if (!force && now - lastDelegationFetchAt < 5000) {
+      return
+    }
+
+    lastDelegationFetchAt = now
+    rpc<DelegationStatusResponse>('delegation.status', {})
+      .then(r => applyDelegationStatus(r))
+      .catch(() => {})
+  }
+
   const setStatus = (status: string) => {
     pendingThinkingStatus = ''
 
@@ -87,7 +136,12 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
     }, ms)
   }
 
-  const keepCompletedElseRunning = (s: SubagentProgress['status']) => (s === 'completed' ? s : 'running')
+  // Terminal statuses are never overwritten by late-arriving live events —
+  // otherwise a stale `subagent.start` / `spawn_requested` can clobber a
+  // `failed` or `interrupted` terminal state (Copilot review #14045).
+  const isTerminalStatus = (s: SubagentProgress['status']) => s === 'completed' || s === 'failed' || s === 'interrupted'
+
+  const keepTerminalElseRunning = (s: SubagentProgress['status']) => (isTerminalStatus(s) ? s : 'running')
 
   const handleReady = (skin?: GatewaySkin) => {
     if (skin) {
@@ -112,7 +166,7 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
           turnController.pushActivity(String(r.warning), 'warn')
         }
       })
-      .catch((e: unknown) => turnController.pushActivity(`command catalog unavailable: ${rpcErrorMessage(e)}`, 'warn'))
+      .catch((e: unknown) => turnController.pushActivity(`command catalog unavailable: ${rpcErrorMessage(e)}`, 'info'))
 
     if (!STARTUP_RESUME_ID) {
       patchUiState({ status: 'forging session…' })
@@ -202,7 +256,7 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
       case 'gateway.stderr': {
         const line = String(ev.payload.line).slice(0, 120)
 
-        turnController.pushActivity(line, ERRLIKE_RE.test(line) ? 'error' : 'warn')
+        turnController.pushActivity(line, 'info')
 
         return
       }
@@ -223,11 +277,11 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
 
         if (!turnController.protocolWarned) {
           turnController.protocolWarned = true
-          turnController.pushActivity('protocol noise detected · /logs to inspect', 'warn')
+          turnController.pushActivity('protocol noise detected · /logs to inspect', 'info')
         }
 
         if (ev.payload?.preview) {
-          turnController.pushActivity(`protocol noise: ${String(ev.payload.preview).slice(0, 120)}`, 'warn')
+          turnController.pushActivity(`protocol noise: ${String(ev.payload.preview).slice(0, 120)}`, 'info')
         }
 
         return
@@ -262,15 +316,28 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
         turnController.recordToolStart(ev.payload.tool_id, ev.payload.name ?? 'tool', ev.payload.context ?? '')
 
         return
+      case 'tool.complete': {
+        const inlineDiffText =
+          ev.payload.inline_diff && getUiState().inlineDiffs ? stripAnsi(String(ev.payload.inline_diff)).trim() : ''
 
-      case 'tool.complete':
-        turnController.recordToolComplete(ev.payload.tool_id, ev.payload.name, ev.payload.error, ev.payload.summary)
+        turnController.recordToolComplete(
+          ev.payload.tool_id,
+          ev.payload.name,
+          ev.payload.error,
+          inlineDiffText ? '' : ev.payload.summary
+        )
 
-        if (ev.payload.inline_diff && getUiState().inlineDiffs) {
-          sys(ev.payload.inline_diff)
+        if (!inlineDiffText) {
+          return
         }
 
+        // Keep inline diffs attached to the assistant completion body so
+        // they render in the same message flow, not as a standalone system
+        // artifact that can look out-of-place around tool rows.
+        turnController.queueInlineDiff(inlineDiffText)
+
         return
+      }
 
       case 'clarify.request':
         patchOverlayState({
@@ -283,7 +350,6 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
         const description = String(ev.payload.description ?? 'dangerous command')
 
         patchOverlayState({ approval: { command: String(ev.payload.command ?? ''), description } })
-        turnController.pushActivity(`approval needed · ${description}`, 'warn')
         setStatus('approval needed')
 
         return
@@ -315,8 +381,23 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
 
         return
 
+      case 'subagent.spawn_requested':
+        // Child built but not yet running (waiting on ThreadPoolExecutor slot).
+        // Preserve completed state if a later event races in before this one.
+        turnController.upsertSubagent(ev.payload, c => (isTerminalStatus(c.status) ? {} : { status: 'queued' }))
+
+        // Prime the status-bar HUD: fetch caps (once every 5s) so we can
+        // warn as depth/concurrency approaches the configured ceiling.
+        if (getDelegationState().maxSpawnDepth === null) {
+          refreshDelegationStatus(true)
+        } else {
+          refreshDelegationStatus()
+        }
+
+        return
+
       case 'subagent.start':
-        turnController.upsertSubagent(ev.payload, () => ({ status: 'running' }))
+        turnController.upsertSubagent(ev.payload, c => (isTerminalStatus(c.status) ? {} : { status: 'running' }))
 
         return
       case 'subagent.thinking': {
@@ -326,10 +407,16 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
           return
         }
 
-        turnController.upsertSubagent(ev.payload, c => ({
-          status: keepCompletedElseRunning(c.status),
-          thinking: pushThinking(c.thinking, text)
-        }))
+        // Update-only: never resurrect subagents whose spawn_requested/start
+        // we missed or that already flushed via message.complete.
+        turnController.upsertSubagent(
+          ev.payload,
+          c => ({
+            status: keepTerminalElseRunning(c.status),
+            thinking: pushThinking(c.thinking, text)
+          }),
+          { createIfMissing: false }
+        )
 
         return
       }
@@ -340,10 +427,14 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
           ev.payload.tool_preview ?? ev.payload.text ?? ''
         )
 
-        turnController.upsertSubagent(ev.payload, c => ({
-          status: keepCompletedElseRunning(c.status),
-          tools: pushTool(c.tools, line)
-        }))
+        turnController.upsertSubagent(
+          ev.payload,
+          c => ({
+            status: keepTerminalElseRunning(c.status),
+            tools: pushTool(c.tools, line)
+          }),
+          { createIfMissing: false }
+        )
 
         return
       }
@@ -355,20 +446,28 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
           return
         }
 
-        turnController.upsertSubagent(ev.payload, c => ({
-          notes: pushNote(c.notes, text),
-          status: keepCompletedElseRunning(c.status)
-        }))
+        turnController.upsertSubagent(
+          ev.payload,
+          c => ({
+            notes: pushNote(c.notes, text),
+            status: keepTerminalElseRunning(c.status)
+          }),
+          { createIfMissing: false }
+        )
 
         return
       }
 
       case 'subagent.complete':
-        turnController.upsertSubagent(ev.payload, c => ({
-          durationSeconds: ev.payload.duration_seconds ?? c.durationSeconds,
-          status: ev.payload.status ?? 'completed',
-          summary: ev.payload.summary || ev.payload.text || c.summary
-        }))
+        turnController.upsertSubagent(
+          ev.payload,
+          c => ({
+            durationSeconds: ev.payload.duration_seconds ?? c.durationSeconds,
+            status: ev.payload.status ?? 'completed',
+            summary: ev.payload.summary || ev.payload.text || c.summary
+          }),
+          { createIfMissing: false }
+        )
 
         return
 
@@ -394,16 +493,6 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
           patchUiState(state => ({ ...state, usage: { ...state.usage, ...ev.payload!.usage } }))
         }
 
-        if (queueEditRef.current !== null) {
-          return
-        }
-
-        const next = dequeue()
-
-        if (next) {
-          sendQueued(next)
-        }
-
         return
       }
 
diff --git a/ui-tui/src/app/delegationStore.ts b/ui-tui/src/app/delegationStore.ts
new file mode 100644
index 0000000000..aa50738edf
--- /dev/null
+++ b/ui-tui/src/app/delegationStore.ts
@@ -0,0 +1,77 @@
+import { atom } from 'nanostores'
+
+import type { DelegationStatusResponse } from '../gatewayTypes.js'
+
+export interface DelegationState {
+  // Last known caps from `delegation.status` RPC.  null until fetched.
+  maxConcurrentChildren: null | number
+  maxSpawnDepth: null | number
+  // True when spawning is globally paused (see tools/delegate_tool.py).
+  paused: boolean
+  // Monotonic clock of the last successful status fetch.
+  updatedAt: null | number
+}
+
+const buildState = (): DelegationState => ({
+  maxConcurrentChildren: null,
+  maxSpawnDepth: null,
+  paused: false,
+  updatedAt: null
+})
+
+export const $delegationState = atom<DelegationState>(buildState())
+
+export const getDelegationState = () => $delegationState.get()
+
+export const patchDelegationState = (next: Partial<DelegationState>) =>
+  $delegationState.set({ ...$delegationState.get(), ...next })
+
+export const resetDelegationState = () => $delegationState.set(buildState())
+
+// ── Overlay accordion open-state ──────────────────────────────────────
+//
+// Lifted out of OverlaySection's local useState so collapse choices
+// survive:
+//   - navigating to a different subagent (Detail remounts)
+//   - switching list ↔ detail mode (Detail unmounts in list mode)
+//   - walking history (←/→)
+// Keyed by section title; missing entries fall back to the section's
+// `defaultOpen` prop.
+
+export const $overlaySectionsOpen = atom<Record<string, boolean>>({})
+
+export const toggleOverlaySection = (title: string, defaultOpen: boolean) => {
+  const state = $overlaySectionsOpen.get()
+  const current = title in state ? state[title]! : defaultOpen
+
+  $overlaySectionsOpen.set({ ...state, [title]: !current })
+}
+
+export const getOverlaySectionOpen = (title: string, defaultOpen: boolean): boolean => {
+  const state = $overlaySectionsOpen.get()
+
+  return title in state ? state[title]! : defaultOpen
+}
+
+/** Merge a raw RPC response into the store.  Tolerant of partial/omitted fields. */
+export const applyDelegationStatus = (r: DelegationStatusResponse | null | undefined) => {
+  if (!r) {
+    return
+  }
+
+  const patch: Partial<DelegationState> = { updatedAt: Date.now() }
+
+  if (typeof r.max_spawn_depth === 'number') {
+    patch.maxSpawnDepth = r.max_spawn_depth
+  }
+
+  if (typeof r.max_concurrent_children === 'number') {
+    patch.maxConcurrentChildren = r.max_concurrent_children
+  }
+
+  if (typeof r.paused === 'boolean') {
+    patch.paused = r.paused
+  }
+
+  patchDelegationState(patch)
+}
diff --git a/ui-tui/src/app/interfaces.ts b/ui-tui/src/app/interfaces.ts
index 353c56535b..f14c232f00 100644
--- a/ui-tui/src/app/interfaces.ts
+++ b/ui-tui/src/app/interfaces.ts
@@ -3,6 +3,7 @@ import type { MutableRefObject, ReactNode, RefObject, SetStateAction } from 'rea
 
 import type { PasteEvent } from '../components/textInput.js'
 import type { GatewayClient } from '../gatewayClient.js'
+import type { ImageAttachResponse } from '../gatewayTypes.js'
 import type { RpcResult } from '../lib/rpc.js'
 import type { Theme } from '../theme.js'
 import type {
@@ -52,6 +53,8 @@ export interface GatewayProviderProps {
 }
 
 export interface OverlayState {
+  agents: boolean
+  agentsInitialHistoryIndex: number
   approval: ApprovalReq | null
   clarify: ClarifyReq | null
   confirm: ConfirmReq | null
@@ -106,11 +109,13 @@ export interface ComposerPasteResult {
   value: string
 }
 
+export type MaybePromise<T> = Promise<T> | T
+
 export interface ComposerActions {
   clearIn: () => void
   dequeue: () => string | undefined
   enqueue: (text: string) => void
-  handleTextPaste: (event: PasteEvent) => ComposerPasteResult | null
+  handleTextPaste: (event: PasteEvent) => MaybePromise<ComposerPasteResult | null>
   openEditor: () => void
   pushHistory: (text: string) => void
   replaceQueue: (index: number, text: string) => void
@@ -146,6 +151,7 @@ export interface ComposerState {
 export interface UseComposerStateOptions {
   gw: GatewayClient
   onClipboardPaste: (quiet?: boolean) => Promise<void> | void
+  onImageAttached?: (info: ImageAttachResponse) => void
   submitRef: MutableRefObject<(value: string) => void>
 }
 
@@ -193,11 +199,6 @@ export interface InputHandlerResult {
 }
 
 export interface GatewayEventHandlerContext {
-  composer: {
-    dequeue: () => string | undefined
-    queueEditRef: MutableRefObject<null | number>
-    sendQueued: (text: string) => void
-  }
   gateway: GatewayServices
   session: {
     STARTUP_RESUME_ID: string
@@ -273,7 +274,7 @@ export interface AppLayoutComposerProps {
   compIdx: number
   completions: CompletionItem[]
   empty: boolean
-  handleTextPaste: (event: PasteEvent) => ComposerPasteResult | null
+  handleTextPaste: (event: PasteEvent) => MaybePromise<ComposerPasteResult | null>
   input: string
   inputBuf: string[]
   pagerPageSize: number
@@ -308,6 +309,7 @@ export interface AppLayoutStatusProps {
   showStickyPrompt: boolean
   statusColor: string
   stickyPrompt: string
+  turnStartedAt: null | number
   voiceLabel: string
 }
 
diff --git a/ui-tui/src/app/overlayStore.ts b/ui-tui/src/app/overlayStore.ts
index 06dbd27a78..60aa09c446 100644
--- a/ui-tui/src/app/overlayStore.ts
+++ b/ui-tui/src/app/overlayStore.ts
@@ -3,6 +3,8 @@ import { atom, computed } from 'nanostores'
 import type { OverlayState } from './interfaces.js'
 
 const buildOverlayState = (): OverlayState => ({
+  agents: false,
+  agentsInitialHistoryIndex: 0,
   approval: null,
   clarify: null,
   confirm: null,
@@ -18,8 +20,8 @@ export const $overlayState = atom<OverlayState>(buildOverlayState())
 
 export const $isBlocked = computed(
   $overlayState,
-  ({ approval, clarify, confirm, modelPicker, pager, picker, secret, skillsHub, sudo }) =>
-    Boolean(approval || clarify || confirm || modelPicker || pager || picker || secret || skillsHub || sudo)
+  ({ agents, approval, clarify, confirm, modelPicker, pager, picker, secret, skillsHub, sudo }) =>
+    Boolean(agents || approval || clarify || confirm || modelPicker || pager || picker || secret || skillsHub || sudo)
 )
 
 export const getOverlayState = () => $overlayState.get()
@@ -27,4 +29,23 @@ export const getOverlayState = () => $overlayState.get()
 export const patchOverlayState = (next: Partial<OverlayState> | ((state: OverlayState) => OverlayState)) =>
   $overlayState.set(typeof next === 'function' ? next($overlayState.get()) : { ...$overlayState.get(), ...next })
 
+/** Full reset — used by session/turn teardown and tests. */
 export const resetOverlayState = () => $overlayState.set(buildOverlayState())
+
+/**
+ * Soft reset: drop FLOW-scoped overlays (approval / clarify / confirm / sudo
+ * / secret / pager) but PRESERVE user-toggled ones — agents dashboard, model
+ * picker, skills hub, session picker.  Those are opened deliberately and
+ * shouldn't vanish when a turn ends.  Called from turnController.idle() on
+ * every turn completion / interrupt; the old "reset everything" behaviour
+ * silently closed /agents the moment delegation finished.
+ */
+export const resetFlowOverlays = () =>
+  $overlayState.set({
+    ...buildOverlayState(),
+    agents: $overlayState.get().agents,
+    agentsInitialHistoryIndex: $overlayState.get().agentsInitialHistoryIndex,
+    modelPicker: $overlayState.get().modelPicker,
+    picker: $overlayState.get().picker,
+    skillsHub: $overlayState.get().skillsHub
+  })
diff --git a/ui-tui/src/app/slash/commands/core.ts b/ui-tui/src/app/slash/commands/core.ts
index 0f8916c5cb..77eb20dec3 100644
--- a/ui-tui/src/app/slash/commands/core.ts
+++ b/ui-tui/src/app/slash/commands/core.ts
@@ -9,6 +9,7 @@ import type {
   SessionUndoResponse
 } from '../../../gatewayTypes.js'
 import { writeOsc52Clipboard } from '../../../lib/osc52.js'
+import { configureDetectedTerminalKeybindings, configureTerminalKeybindings } from '../../../lib/terminalSetup.js'
 import type { DetailsMode, Msg, PanelSection } from '../../../types.js'
 import { patchOverlayState } from '../../overlayStore.js'
 import { patchUiState } from '../../uiStore.js'
@@ -224,11 +225,46 @@ export const coreCommands: SlashCommand[] = [
   },
 
   {
-    help: 'paste clipboard image',
+    help: 'attach clipboard image',
     name: 'paste',
     run: (arg, ctx) => (arg ? ctx.transcript.sys('usage: /paste') : ctx.composer.paste())
   },
 
+  {
+    help: 'configure IDE terminal keybindings for multiline + undo/redo',
+    name: 'terminal-setup',
+    run: (arg, ctx) => {
+      const target = arg.trim().toLowerCase()
+
+      if (target && !['auto', 'cursor', 'vscode', 'windsurf'].includes(target)) {
+        return ctx.transcript.sys('usage: /terminal-setup [auto|vscode|cursor|windsurf]')
+      }
+
+      const runner =
+        !target || target === 'auto'
+          ? configureDetectedTerminalKeybindings()
+          : configureTerminalKeybindings(target as 'cursor' | 'vscode' | 'windsurf')
+
+      void runner
+        .then(result => {
+          if (ctx.stale()) {
+            return
+          }
+
+          ctx.transcript.sys(result.message)
+
+          if (result.success && result.requiresRestart) {
+            ctx.transcript.sys('restart the IDE terminal for the new keybindings to take effect')
+          }
+        })
+        .catch(error => {
+          if (!ctx.stale()) {
+            ctx.transcript.sys(`terminal setup failed: ${String(error)}`)
+          }
+        })
+    }
+  },
+
   {
     help: 'view gateway logs',
     name: 'logs',
@@ -239,6 +275,34 @@ export const coreCommands: SlashCommand[] = [
     }
   },
 
+  {
+    help: 'view current transcript (user + assistant messages)',
+    name: 'history',
+    run: (arg, ctx) => {
+      // The CLI-side `/history` runs in a detached slash-worker subprocess
+      // that never sees the TUI's turns — it only surfaces whatever was
+      // persisted before this process started.  Render the TUI's own
+      // transcript so `/history` actually reflects what the user just did.
+      const items = ctx.local.getHistoryItems().filter(m => m.role === 'user' || m.role === 'assistant')
+
+      if (!items.length) {
+        return ctx.transcript.sys('no conversation yet')
+      }
+
+      const preview = Math.max(80, parseInt(arg, 10) || 400)
+
+      const lines = items.map((m, i) => {
+        const tag = m.role === 'user' ? `You #${i + 1}` : `Hermes #${i + 1}`
+        const body = m.text.trim() || (m.tools?.length ? `(${m.tools.length} tool calls)` : '(empty)')
+        const clipped = body.length > preview ? `${body.slice(0, preview).trimEnd()}…` : body
+
+        return `[${tag}]\n${clipped}`
+      })
+
+      ctx.transcript.page(lines.join('\n\n'), 'History')
+    }
+  },
+
   {
     aliases: ['sb'],
     help: 'toggle status bar',
diff --git a/ui-tui/src/app/slash/commands/debug.ts b/ui-tui/src/app/slash/commands/debug.ts
new file mode 100644
index 0000000000..b4bfc16bfb
--- /dev/null
+++ b/ui-tui/src/app/slash/commands/debug.ts
@@ -0,0 +1,48 @@
+import { formatBytes, performHeapDump } from '../../../lib/memory.js'
+import type { SlashCommand } from '../types.js'
+
+export const debugCommands: SlashCommand[] = [
+  {
+    help: 'write a V8 heap snapshot + memory diagnostics (see HERMES_HEAPDUMP_DIR)',
+    name: 'heapdump',
+    run: (_arg, ctx) => {
+      const { heapUsed, rss } = process.memoryUsage()
+
+      ctx.transcript.sys(`writing heap dump (heap ${formatBytes(heapUsed)} · rss ${formatBytes(rss)})…`)
+
+      void performHeapDump('manual').then(r => {
+        if (ctx.stale()) {
+          return
+        }
+
+        if (!r.success) {
+          return ctx.transcript.sys(`heapdump failed: ${r.error ?? 'unknown error'}`)
+        }
+
+        ctx.transcript.sys(`heapdump: ${r.heapPath}`)
+        ctx.transcript.sys(`diagnostics: ${r.diagPath}`)
+      })
+    }
+  },
+
+  {
+    help: 'print live V8 heap + rss numbers',
+    name: 'mem',
+    run: (_arg, ctx) => {
+      const { arrayBuffers, external, heapTotal, heapUsed, rss } = process.memoryUsage()
+
+      ctx.transcript.panel('Memory', [
+        {
+          rows: [
+            ['heap used', formatBytes(heapUsed)],
+            ['heap total', formatBytes(heapTotal)],
+            ['external', formatBytes(external)],
+            ['array buffers', formatBytes(arrayBuffers)],
+            ['rss', formatBytes(rss)],
+            ['uptime', `${process.uptime().toFixed(0)}s`]
+          ]
+        }
+      ])
+    }
+  }
+]
diff --git a/ui-tui/src/app/slash/commands/ops.ts b/ui-tui/src/app/slash/commands/ops.ts
index 26318b3fb0..210c6301ef 100644
--- a/ui-tui/src/app/slash/commands/ops.ts
+++ b/ui-tui/src/app/slash/commands/ops.ts
@@ -1,6 +1,14 @@
-import type { ToolsConfigureResponse } from '../../../gatewayTypes.js'
+import type {
+  DelegationPauseResponse,
+  SlashExecResponse,
+  SpawnTreeListResponse,
+  SpawnTreeLoadResponse,
+  ToolsConfigureResponse
+} from '../../../gatewayTypes.js'
 import type { PanelSection } from '../../../types.js'
+import { applyDelegationStatus, getDelegationState } from '../../delegationStore.js'
 import { patchOverlayState } from '../../overlayStore.js'
+import { getSpawnHistory, pushDiskSnapshot, setDiffPair, type SpawnSnapshot } from '../../spawnHistoryStore.js'
 import type { SlashCommand } from '../types.js'
 
 interface SkillInfo {
@@ -42,6 +50,163 @@ interface SkillsBrowseResponse {
 }
 
 export const opsCommands: SlashCommand[] = [
+  {
+    aliases: ['tasks'],
+    help: 'open the spawn-tree dashboard (live audit + kill/pause controls)',
+    name: 'agents',
+    run: (arg, ctx) => {
+      const sub = arg.trim().toLowerCase()
+
+      // Stay compatible with the gateway `/agents [pause|resume|status]` CLI —
+      // explicit subcommands skip the overlay and act directly so scripts and
+      // multi-step flows can drive it without entering interactive mode.
+      if (sub === 'pause' || sub === 'resume' || sub === 'unpause') {
+        const paused = sub === 'pause'
+        ctx.gateway.gw
+          .request<DelegationPauseResponse>('delegation.pause', { paused })
+          .then(r => {
+            applyDelegationStatus({ paused: r?.paused })
+            ctx.transcript.sys(`delegation · ${r?.paused ? 'paused' : 'resumed'}`)
+          })
+          .catch(ctx.guardedErr)
+
+        return
+      }
+
+      if (sub === 'status') {
+        const d = getDelegationState()
+        ctx.transcript.sys(
+          `delegation · ${d.paused ? 'paused' : 'active'} · caps d${d.maxSpawnDepth ?? '?'}/${d.maxConcurrentChildren ?? '?'}`
+        )
+
+        return
+      }
+
+      patchOverlayState({ agents: true, agentsInitialHistoryIndex: 0 })
+    }
+  },
+
+  {
+    help: 'replay a completed spawn tree · `/replay [N|last|list|load <path>]`',
+    name: 'replay',
+    run: (arg, ctx) => {
+      const history = getSpawnHistory()
+      const raw = arg.trim()
+      const lower = raw.toLowerCase()
+
+      // ── Disk-backed listing ─────────────────────────────────────
+      if (lower === 'list' || lower === 'ls') {
+        ctx.gateway
+          .rpc<SpawnTreeListResponse>('spawn_tree.list', {
+            limit: 30,
+            session_id: ctx.sid ?? 'default'
+          })
+          .then(
+            ctx.guarded<SpawnTreeListResponse>(r => {
+              const entries = r.entries ?? []
+
+              if (!entries.length) {
+                return ctx.transcript.sys('no archived spawn trees on disk for this session')
+              }
+
+              const rows: [string, string][] = entries.map(e => {
+                const ts = e.finished_at ? new Date(e.finished_at * 1000).toLocaleString() : '?'
+                const label = e.label || `${e.count} subagents`
+
+                return [`${ts} · ${e.count}×`, `${label}\n  ${e.path}`]
+              })
+
+              ctx.transcript.panel('Archived spawn trees', [{ rows }])
+            })
+          )
+          .catch(ctx.guardedErr)
+
+        return
+      }
+
+      // ── Disk-backed load by path ─────────────────────────────────
+      if (lower.startsWith('load ')) {
+        const path = raw.slice(5).trim()
+
+        if (!path) {
+          return ctx.transcript.sys('usage: /replay load <path>')
+        }
+
+        ctx.gateway
+          .rpc<SpawnTreeLoadResponse>('spawn_tree.load', { path })
+          .then(
+            ctx.guarded<SpawnTreeLoadResponse>(r => {
+              if (!r.subagents?.length) {
+                return ctx.transcript.sys('snapshot empty or unreadable')
+              }
+
+              // Push onto the in-memory history so the overlay picks it up
+              // by index 1 just like any other snapshot.
+              pushDiskSnapshot(r, path)
+              patchOverlayState({ agents: true, agentsInitialHistoryIndex: 1 })
+            })
+          )
+          .catch(ctx.guardedErr)
+
+        return
+      }
+
+      // ── In-memory nav (same-session) ─────────────────────────────
+      if (!history.length) {
+        return ctx.transcript.sys('no completed spawn trees this session · try /replay list')
+      }
+
+      let index = 1
+
+      if (raw && lower !== 'last') {
+        const parsed = parseInt(raw, 10)
+
+        if (Number.isNaN(parsed) || parsed < 1 || parsed > history.length) {
+          return ctx.transcript.sys(`replay: index out of range 1..${history.length} · use /replay list for disk`)
+        }
+
+        index = parsed
+      }
+
+      patchOverlayState({ agents: true, agentsInitialHistoryIndex: index })
+    }
+  },
+
+  {
+    help: 'diff two completed spawn trees · `/replay-diff <baseline> <candidate>` (indexes from /replay list or history N)',
+    name: 'replay-diff',
+    run: (arg, ctx) => {
+      const parts = arg.trim().split(/\s+/).filter(Boolean)
+
+      if (parts.length !== 2) {
+        return ctx.transcript.sys('usage: /replay-diff <a> <b>  (e.g. /replay-diff 1 2 for last two)')
+      }
+
+      const [a, b] = parts
+      const history = getSpawnHistory()
+
+      const resolve = (token: string): null | SpawnSnapshot => {
+        const n = parseInt(token!, 10)
+
+        if (Number.isFinite(n) && n >= 1 && n <= history.length) {
+          return history[n - 1] ?? null
+        }
+
+        return null
+      }
+
+      const baseline = resolve(a!)
+      const candidate = resolve(b!)
+
+      if (!baseline || !candidate) {
+        return ctx.transcript.sys(`replay-diff: could not resolve indices · history has ${history.length} entries`)
+      }
+
+      setDiffPair({ baseline, candidate })
+      patchOverlayState({ agents: true, agentsInitialHistoryIndex: 0 })
+    }
+  },
+
   {
     help: 'browse, inspect, install skills',
     name: 'skills',
@@ -207,10 +372,25 @@ export const opsCommands: SlashCommand[] = [
   {
     help: 'enable or disable tools (client-side history reset on change)',
     name: 'tools',
-    run: (arg, ctx) => {
+    run: (arg, ctx, cmd) => {
       const [subcommand, ...names] = arg.trim().split(/\s+/).filter(Boolean)
 
       if (subcommand !== 'disable' && subcommand !== 'enable') {
+        ctx.gateway.gw
+          .request<SlashExecResponse>('slash.exec', { command: cmd.slice(1), session_id: ctx.sid })
+          .then(r => {
+            if (ctx.stale()) {
+              return
+            }
+
+            const body = r?.output || '/tools: no output'
+            const text = r?.warning ? `warning: ${r.warning}\n${body}` : body
+            const long = text.length > 180 || text.split('\n').filter(Boolean).length > 2
+
+            long ? ctx.transcript.page(text, 'Tools') : ctx.transcript.sys(text)
+          })
+          .catch(ctx.guardedErr)
+
         return
       }
 
diff --git a/ui-tui/src/app/slash/commands/session.ts b/ui-tui/src/app/slash/commands/session.ts
index 354d3c1975..5f17667f03 100644
--- a/ui-tui/src/app/slash/commands/session.ts
+++ b/ui-tui/src/app/slash/commands/session.ts
@@ -1,4 +1,4 @@
-import { imageTokenMeta, introMsg, toTranscriptMessages } from '../../../domain/messages.js'
+import { attachedImageNotice, introMsg, toTranscriptMessages } from '../../../domain/messages.js'
 import type {
   BackgroundStartResponse,
   BtwStartResponse,
@@ -92,9 +92,7 @@ export const sessionCommands: SlashCommand[] = [
     run: (arg, ctx) => {
       ctx.gateway.rpc<ImageAttachResponse>('image.attach', { path: arg, session_id: ctx.sid }).then(
         ctx.guarded<ImageAttachResponse>(r => {
-          const meta = imageTokenMeta(r)
-
-          ctx.transcript.sys(`attached image: ${r.name ?? ''}${meta ? ` · ${meta}` : ''}`)
+          ctx.transcript.sys(attachedImageNotice(r))
 
           if (r.remainder) {
             ctx.composer.setInput(r.remainder)
diff --git a/ui-tui/src/app/slash/registry.ts b/ui-tui/src/app/slash/registry.ts
index ae7d7d50be..353b0a83d1 100644
--- a/ui-tui/src/app/slash/registry.ts
+++ b/ui-tui/src/app/slash/registry.ts
@@ -1,10 +1,17 @@
 import { coreCommands } from './commands/core.js'
+import { debugCommands } from './commands/debug.js'
 import { opsCommands } from './commands/ops.js'
 import { sessionCommands } from './commands/session.js'
 import { setupCommands } from './commands/setup.js'
 import type { SlashCommand } from './types.js'
 
-export const SLASH_COMMANDS: SlashCommand[] = [...coreCommands, ...sessionCommands, ...opsCommands, ...setupCommands]
+export const SLASH_COMMANDS: SlashCommand[] = [
+  ...coreCommands,
+  ...sessionCommands,
+  ...opsCommands,
+  ...setupCommands,
+  ...debugCommands
+]
 
 const byName = new Map<string, SlashCommand>(
   SLASH_COMMANDS.flatMap(cmd => [cmd.name, ...(cmd.aliases ?? [])].map(name => [name, cmd] as const))
diff --git a/ui-tui/src/app/spawnHistoryStore.ts b/ui-tui/src/app/spawnHistoryStore.ts
new file mode 100644
index 0000000000..9adb2b59cd
--- /dev/null
+++ b/ui-tui/src/app/spawnHistoryStore.ts
@@ -0,0 +1,139 @@
+import { atom } from 'nanostores'
+
+import type { SpawnTreeLoadResponse } from '../gatewayTypes.js'
+import type { SubagentProgress } from '../types.js'
+
+export interface SpawnSnapshot {
+  finishedAt: number
+  fromDisk?: boolean
+  id: string
+  label: string
+  path?: string
+  sessionId: null | string
+  startedAt: number
+  subagents: SubagentProgress[]
+}
+
+export interface SpawnDiffPair {
+  baseline: SpawnSnapshot
+  candidate: SpawnSnapshot
+}
+
+const HISTORY_LIMIT = 10
+
+export const $spawnHistory = atom<SpawnSnapshot[]>([])
+export const $spawnDiff = atom<null | SpawnDiffPair>(null)
+
+export const getSpawnHistory = () => $spawnHistory.get()
+export const getSpawnDiff = () => $spawnDiff.get()
+
+export const clearSpawnHistory = () => $spawnHistory.set([])
+export const clearDiffPair = () => $spawnDiff.set(null)
+export const setDiffPair = (pair: SpawnDiffPair) => $spawnDiff.set(pair)
+
+/**
+ * Commit a finished turn's spawn tree to history.  Keeps the last 10
+ * non-empty snapshots — empty turns (no subagents) are dropped.
+ *
+ * Why in-memory?  The primary investigation loop is "I just ran a fan-out,
+ * it misbehaved, let me look at what happened" — same-session debugging.
+ * Disk persistence across process restarts is a natural extension but
+ * adds RPC surface for a less-common path.
+ */
+export const pushSnapshot = (
+  subagents: readonly SubagentProgress[],
+  meta: { sessionId?: null | string; startedAt?: null | number }
+) => {
+  if (!subagents.length) {
+    return
+  }
+
+  const now = Date.now()
+  const started = meta.startedAt ?? Math.min(...subagents.map(s => s.startedAt ?? now))
+
+  const snap: SpawnSnapshot = {
+    finishedAt: now,
+    id: `snap-${now.toString(36)}`,
+    label: summarizeLabel(subagents),
+    sessionId: meta.sessionId ?? null,
+    startedAt: Number.isFinite(started) ? started : now,
+    subagents: subagents.map(item => ({ ...item }))
+  }
+
+  const next = [snap, ...$spawnHistory.get()].slice(0, HISTORY_LIMIT)
+  $spawnHistory.set(next)
+}
+
+function summarizeLabel(subagents: readonly SubagentProgress[]): string {
+  const top = subagents
+    .filter(s => s.parentId == null || subagents.every(o => o.id !== s.parentId))
+    .slice(0, 2)
+    .map(s => s.goal || 'subagent')
+    .join(' · ')
+
+  return top || `${subagents.length} agent${subagents.length === 1 ? '' : 's'}`
+}
+
+/**
+ * Push a disk-loaded snapshot onto the front of the history stack so the
+ * overlay can pick it up at index 1 via /replay load.  Normalises the
+ * server payload (arbitrary list) into the same SubagentProgress shape
+ * used for live data — defensive against cross-version reads.
+ */
+export const pushDiskSnapshot = (r: SpawnTreeLoadResponse, path: string) => {
+  const raw = Array.isArray(r.subagents) ? r.subagents : []
+  const normalised = raw.map(normaliseSubagent)
+
+  if (!normalised.length) {
+    return
+  }
+
+  const snap: SpawnSnapshot = {
+    finishedAt: (r.finished_at ?? Date.now() / 1000) * 1000,
+    fromDisk: true,
+    id: `disk-${path}`,
+    label: r.label || `${normalised.length} subagents`,
+    path,
+    sessionId: r.session_id ?? null,
+    startedAt: (r.started_at ?? r.finished_at ?? Date.now() / 1000) * 1000,
+    subagents: normalised
+  }
+
+  const next = [snap, ...$spawnHistory.get()].slice(0, HISTORY_LIMIT)
+  $spawnHistory.set(next)
+}
+
+function normaliseSubagent(raw: unknown): SubagentProgress {
+  const o = raw as Record<string, unknown>
+  const s = (v: unknown) => (typeof v === 'string' ? v : undefined)
+  const n = (v: unknown) => (typeof v === 'number' ? v : undefined)
+  const arr = <T>(v: unknown): T[] | undefined => (Array.isArray(v) ? (v as T[]) : undefined)
+
+  return {
+    apiCalls: n(o.apiCalls),
+    costUsd: n(o.costUsd),
+    depth: typeof o.depth === 'number' ? o.depth : 0,
+    durationSeconds: n(o.durationSeconds),
+    filesRead: arr<string>(o.filesRead),
+    filesWritten: arr<string>(o.filesWritten),
+    goal: s(o.goal) ?? 'subagent',
+    id: s(o.id) ?? `sa-${Math.random().toString(36).slice(2, 8)}`,
+    index: typeof o.index === 'number' ? o.index : 0,
+    inputTokens: n(o.inputTokens),
+    iteration: n(o.iteration),
+    model: s(o.model),
+    notes: (arr<string>(o.notes) ?? []).filter(x => typeof x === 'string'),
+    outputTail: arr(o.outputTail) as SubagentProgress['outputTail'],
+    outputTokens: n(o.outputTokens),
+    parentId: s(o.parentId) ?? null,
+    reasoningTokens: n(o.reasoningTokens),
+    startedAt: n(o.startedAt),
+    status: (s(o.status) as SubagentProgress['status']) ?? 'completed',
+    summary: s(o.summary),
+    taskCount: typeof o.taskCount === 'number' ? o.taskCount : 1,
+    thinking: (arr<string>(o.thinking) ?? []).filter(x => typeof x === 'string'),
+    toolCount: typeof o.toolCount === 'number' ? o.toolCount : 0,
+    tools: (arr<string>(o.tools) ?? []).filter(x => typeof x === 'string'),
+    toolsets: arr<string>(o.toolsets)
+  }
+}
diff --git a/ui-tui/src/app/turnController.ts b/ui-tui/src/app/turnController.ts
index 236324ffb9..804394bb19 100644
--- a/ui-tui/src/app/turnController.ts
+++ b/ui-tui/src/app/turnController.ts
@@ -10,8 +10,9 @@ import {
 } from '../lib/text.js'
 import type { ActiveTool, ActivityItem, Msg, SubagentProgress } from '../types.js'
 
-import { resetOverlayState } from './overlayStore.js'
-import { patchTurnState, resetTurnState } from './turnStore.js'
+import { resetFlowOverlays } from './overlayStore.js'
+import { pushSnapshot } from './spawnHistoryStore.js'
+import { getTurnState, patchTurnState, resetTurnState } from './turnStore.js'
 import { getUiState, patchUiState } from './uiStore.js'
 
 const INTERRUPT_COOLDOWN_MS = 1500
@@ -39,7 +40,9 @@ class TurnController {
   bufRef = ''
   interrupted = false
   lastStatusNote = ''
+  pendingInlineDiffs: string[] = []
   persistedToolLabels = new Set<string>()
+  persistSpawnTree?: (subagents: SubagentProgress[], sessionId: null | string) => Promise<void>
   protocolWarned = false
   reasoningText = ''
   segmentMessages: Msg[] = []
@@ -76,6 +79,7 @@ class TurnController {
     this.activeTools = []
     this.streamTimer = clear(this.streamTimer)
     this.bufRef = ''
+    this.pendingInlineDiffs = []
     this.pendingSegmentTools = []
     this.segmentMessages = []
 
@@ -88,21 +92,43 @@ class TurnController {
       turnTrail: []
     })
     patchUiState({ busy: false })
-    resetOverlayState()
+    resetFlowOverlays()
   }
 
   interruptTurn({ appendMessage, gw, sid, sys }: InterruptDeps) {
     this.interrupted = true
     gw.request<SessionInterruptResponse>('session.interrupt', { session_id: sid }).catch(() => {})
 
+    const segments = this.segmentMessages
     const partial = this.bufRef.trimStart()
+    const tools = this.pendingSegmentTools
 
-    partial ? appendMessage({ role: 'assistant', text: `${partial}\n\n*[interrupted]*` }) : sys('interrupted')
-
+    // Drain streaming/segment state off the nanostore before writing the
+    // preserved snapshot to the transcript — otherwise each flushed segment
+    // appears in both `turn.streamSegments` and the transcript for one frame.
     this.idle()
     this.clearReasoning()
     this.turnTools = []
     patchTurnState({ activity: [], outcome: '' })
+
+    for (const msg of segments) {
+      appendMessage(msg)
+    }
+
+    // Always surface an interruption indicator — if there's an in-flight
+    // `partial` or pending tools, fold them into a single assistant message;
+    // otherwise emit a sys note so the transcript always records that the
+    // turn was cancelled, even when only prior `segments` were preserved.
+    if (partial || tools.length) {
+      appendMessage({
+        role: 'assistant',
+        text: partial ? `${partial}\n\n*[interrupted]*` : '*[interrupted]*',
+        ...(tools.length && { tools })
+      })
+    } else {
+      sys('interrupted')
+    }
+
     patchUiState({ status: 'interrupted' })
     this.clearStatusTimer()
 
@@ -160,6 +186,20 @@ class TurnController {
     }, REASONING_PULSE_MS)
   }
 
+  queueInlineDiff(diffText: string) {
+    // Strip CLI chrome the gateway emits before the unified diff (e.g. a
+    // leading "┊ review diff" header written by `_emit_inline_diff` for the
+    // terminal printer). That header only makes sense as stdout dressing,
+    // not inside a markdown ```diff block.
+    const text = diffText.replace(/^\s*┊[^\n]*\n?/, '').trim()
+
+    if (!text || this.pendingInlineDiffs.includes(text)) {
+      return
+    }
+
+    this.pendingInlineDiffs = [...this.pendingInlineDiffs, text]
+  }
+
   pushActivity(text: string, tone: ActivityItem['tone'] = 'info', replaceLabel?: string) {
     patchTurnState(state => {
       const base = replaceLabel
@@ -194,6 +234,7 @@ class TurnController {
     this.idle()
     this.clearReasoning()
     this.clearStatusTimer()
+    this.pendingInlineDiffs = []
     this.pendingSegmentTools = []
     this.segmentMessages = []
     this.turnTools = []
@@ -204,6 +245,20 @@ class TurnController {
     const rawText = (payload.rendered ?? payload.text ?? this.bufRef).trimStart()
     const split = splitReasoning(rawText)
     const finalText = split.text
+    // Skip appending if the assistant already narrated the diff inside a
+    // markdown fence of its own — otherwise we render two stacked diff
+    // blocks for the same edit.
+    const assistantAlreadyHasDiff = /```(?:diff|patch)\b/i.test(finalText)
+
+    const remainingInlineDiffs = assistantAlreadyHasDiff
+      ? []
+      : this.pendingInlineDiffs.filter(diff => !finalText.includes(diff))
+
+    const inlineDiffBlock = remainingInlineDiffs.length
+      ? `\`\`\`diff\n${remainingInlineDiffs.join('\n\n')}\n\`\`\``
+      : ''
+
+    const mergedText = [finalText, inlineDiffBlock].filter(Boolean).join('\n\n')
     const existingReasoning = this.reasoningText.trim() || String(payload.reasoning ?? '').trim()
     const savedReasoning = [existingReasoning, existingReasoning ? '' : split.reasoning].filter(Boolean).join('\n\n')
     const savedReasoningTokens = savedReasoning ? estimateTokensRough(savedReasoning) : 0
@@ -211,10 +266,10 @@ class TurnController {
     const tools = this.pendingSegmentTools
     const finalMessages = [...this.segmentMessages]
 
-    if (finalText) {
+    if (mergedText) {
       finalMessages.push({
         role: 'assistant',
-        text: finalText,
+        text: mergedText,
         thinking: savedReasoning || undefined,
         thinkingTokens: savedReasoning ? savedReasoningTokens : undefined,
         toolTokens: savedToolTokens || undefined,
@@ -224,6 +279,20 @@ class TurnController {
 
     const wasInterrupted = this.interrupted
 
+    // Archive the turn's spawn tree to history BEFORE idle() drops subagents
+    // from turnState.  Lets /replay and the overlay's history nav pull up
+    // finished fan-outs without a round-trip to disk.
+    const finishedSubagents = getTurnState().subagents
+    const sessionId = getUiState().sid
+
+    if (finishedSubagents.length > 0) {
+      pushSnapshot(finishedSubagents, { sessionId, startedAt: null })
+      // Fire-and-forget disk persistence so /replay survives process restarts.
+      // The same snapshot lives in memory via spawnHistoryStore for immediate
+      // recall — disk is the long-term archive.
+      void this.persistSpawnTree?.(finishedSubagents, sessionId)
+    }
+
     this.idle()
     this.clearReasoning()
     this.turnTools = []
@@ -231,7 +300,7 @@ class TurnController {
     this.bufRef = ''
     patchTurnState({ activity: [], outcome: '' })
 
-    return { finalMessages, finalText, wasInterrupted }
+    return { finalMessages, finalText: mergedText, wasInterrupted }
   }
 
   recordMessageDelta({ rendered, text }: { rendered?: string; text?: string }) {
@@ -337,6 +406,7 @@ class TurnController {
     this.bufRef = ''
     this.interrupted = false
     this.lastStatusNote = ''
+    this.pendingInlineDiffs = []
     this.pendingSegmentTools = []
     this.protocolWarned = false
     this.segmentMessages = []
@@ -382,6 +452,7 @@ class TurnController {
     this.endReasoningPhase()
     this.clearReasoning()
     this.activeTools = []
+    this.pendingInlineDiffs = []
     this.turnTools = []
     this.toolTokenAcc = 0
     this.persistedToolLabels.clear()
@@ -389,33 +460,82 @@ class TurnController {
     patchTurnState({ activity: [], outcome: '', subagents: [], toolTokens: 0, tools: [], turnTrail: [] })
   }
 
-  upsertSubagent(p: SubagentEventPayload, patch: (current: SubagentProgress) => Partial<SubagentProgress>) {
-    const id = `sa:${p.task_index}:${p.goal || 'subagent'}`
+  upsertSubagent(
+    p: SubagentEventPayload,
+    patch: (current: SubagentProgress) => Partial<SubagentProgress>,
+    opts: { createIfMissing?: boolean } = { createIfMissing: true }
+  ) {
+    // Stable id: prefer the server-issued subagent_id (survives nested
+    // grandchildren + cross-tree joins).  Fall back to the composite key
+    // for older gateways that omit the field — those produce a flat list.
+    const id = p.subagent_id || `sa:${p.task_index}:${p.goal || 'subagent'}`
 
     patchTurnState(state => {
       const existing = state.subagents.find(item => item.id === id)
 
+      // Late events (subagent.complete/tool/progress arriving after message.complete
+      // has already fired idle()) would otherwise resurrect a finished
+      // subagent into turn.subagents and block the "finished" title on the
+      // /agents overlay.  When `createIfMissing` is false we drop silently.
+      if (!existing && !opts.createIfMissing) {
+        return state
+      }
+
       const base: SubagentProgress = existing ?? {
+        depth: p.depth ?? 0,
         goal: p.goal,
         id,
         index: p.task_index,
+        model: p.model,
         notes: [],
+        parentId: p.parent_id ?? null,
+        startedAt: Date.now(),
         status: 'running',
         taskCount: p.task_count ?? 1,
         thinking: [],
-        tools: []
+        toolCount: p.tool_count ?? 0,
+        tools: [],
+        toolsets: p.toolsets
       }
 
+      // Map snake_case payload keys onto camelCase state.  Only overwrite
+      // when the event actually carries the field; `??` preserves prior
+      // values across streaming events that emit partial payloads.
+      const outputTail = p.output_tail
+        ? p.output_tail.map(e => ({
+            isError: Boolean(e.is_error),
+            preview: String(e.preview ?? ''),
+            tool: String(e.tool ?? 'tool')
+          }))
+        : base.outputTail
+
       const next: SubagentProgress = {
         ...base,
+        apiCalls: p.api_calls ?? base.apiCalls,
+        costUsd: p.cost_usd ?? base.costUsd,
+        depth: p.depth ?? base.depth,
+        filesRead: p.files_read ?? base.filesRead,
+        filesWritten: p.files_written ?? base.filesWritten,
         goal: p.goal || base.goal,
+        inputTokens: p.input_tokens ?? base.inputTokens,
+        iteration: p.iteration ?? base.iteration,
+        model: p.model ?? base.model,
+        outputTail,
+        outputTokens: p.output_tokens ?? base.outputTokens,
+        parentId: p.parent_id ?? base.parentId,
+        reasoningTokens: p.reasoning_tokens ?? base.reasoningTokens,
         taskCount: p.task_count ?? base.taskCount,
+        toolCount: p.tool_count ?? base.toolCount,
+        toolsets: p.toolsets ?? base.toolsets,
         ...patch(base)
       }
 
+      // Stable order: by spawn (depth, parent, index) rather than insert time.
+      // Without it, grandchildren can shuffle relative to siblings when
+      // events arrive out of order under high concurrency.
       const subagents = existing
         ? state.subagents.map(item => (item.id === id ? next : item))
-        : [...state.subagents, next].sort((a, b) => a.index - b.index)
+        : [...state.subagents, next].sort((a, b) => a.depth - b.depth || a.index - b.index)
 
       return { ...state, subagents }
     })
diff --git a/ui-tui/src/app/useComposerState.ts b/ui-tui/src/app/useComposerState.ts
index bebda273d9..f229067edc 100644
--- a/ui-tui/src/app/useComposerState.ts
+++ b/ui-tui/src/app/useComposerState.ts
@@ -3,24 +3,111 @@ import { mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs'
 import { tmpdir } from 'node:os'
 import { join } from 'node:path'
 
+import { useStdin } from '@hermes/ink'
 import { useStore } from '@nanostores/react'
 import { useCallback, useMemo, useState } from 'react'
 
 import type { PasteEvent } from '../components/textInput.js'
 import { LARGE_PASTE } from '../config/limits.js'
+import type { ImageAttachResponse, InputDetectDropResponse } from '../gatewayTypes.js'
 import { useCompletion } from '../hooks/useCompletion.js'
 import { useInputHistory } from '../hooks/useInputHistory.js'
 import { useQueue } from '../hooks/useQueue.js'
+import { isUsableClipboardText, readClipboardText } from '../lib/clipboard.js'
+import { readOsc52Clipboard } from '../lib/osc52.js'
+import { isRemoteShellSession } from '../lib/terminalSetup.js'
 import { pasteTokenLabel, stripTrailingPasteNewlines } from '../lib/text.js'
 
-import type { PasteSnippet, UseComposerStateOptions, UseComposerStateResult } from './interfaces.js'
+import type { MaybePromise, PasteSnippet, UseComposerStateOptions, UseComposerStateResult } from './interfaces.js'
 import { $isBlocked } from './overlayStore.js'
+import { getUiState } from './uiStore.js'
 
-export function useComposerState({ gw, onClipboardPaste, submitRef }: UseComposerStateOptions): UseComposerStateResult {
+const PASTE_SNIP_MAX_COUNT = 32
+const PASTE_SNIP_MAX_TOTAL_BYTES = 4 * 1024 * 1024
+
+const trimSnips = (snips: PasteSnippet[]): PasteSnippet[] => {
+  let total = 0
+  const out: PasteSnippet[] = []
+
+  for (let i = snips.length - 1; i >= 0; i--) {
+    const snip = snips[i]!
+    const size = snip.text.length
+
+    if (out.length >= PASTE_SNIP_MAX_COUNT || total + size > PASTE_SNIP_MAX_TOTAL_BYTES) {
+      break
+    }
+
+    total += size
+    out.unshift(snip)
+  }
+
+  return out.length === snips.length ? snips : out
+}
+
+/** Insert text at the cursor position, adding spacing to separate from adjacent non-whitespace. */
+function insertAtCursor(value: string, cursor: number, text: string): { cursor: number; value: string } {
+  const lead = cursor > 0 && !/\s/.test(value[cursor - 1] ?? '') ? ' ' : ''
+  const tail = cursor < value.length && !/\s/.test(value[cursor] ?? '') ? ' ' : ''
+  const insert = `${lead}${text}${tail}`
+
+  return {
+    cursor: cursor + insert.length,
+    value: value.slice(0, cursor) + insert + value.slice(cursor)
+  }
+}
+
+/**
+ * Quick client-side heuristic to detect text that looks like a dropped file path.
+ * When this returns true the composer sends RPC calls to the server for actual
+ * validation. Keep in sync with _detect_file_drop() in cli.py — see that
+ * function for the canonical prefix list.
+ */
+export function looksLikeDroppedPath(text: string): boolean {
+  const trimmed = text.trim()
+
+  if (!trimmed || trimmed.includes('\n')) {
+    return false
+  }
+
+  // file:// URIs, relative, home-relative, quoted, and Windows drive paths
+  if (
+    trimmed.startsWith('file://') ||
+    trimmed.startsWith('~/') ||
+    trimmed.startsWith('./') ||
+    trimmed.startsWith('../') ||
+    trimmed.startsWith('"/') ||
+    trimmed.startsWith("'/") ||
+    trimmed.startsWith('"~') ||
+    trimmed.startsWith("'~") ||
+    /^[A-Za-z]:[/\\]/.test(trimmed) ||
+    /^["'][A-Za-z]:[/\\]/.test(trimmed)
+  ) {
+    return true
+  }
+
+  // Bare absolute paths (start with /) — require a second '/' or a '.' to avoid
+  // false positives on short strings like "/api" or "/help" which would trigger
+  // unnecessary RPC round-trips.
+  if (trimmed.startsWith('/')) {
+    const rest = trimmed.slice(1)
+
+    return rest.includes('/') || rest.includes('.')
+  }
+
+  return false
+}
+
+export function useComposerState({
+  gw,
+  onClipboardPaste,
+  onImageAttached,
+  submitRef
+}: UseComposerStateOptions): UseComposerStateResult {
   const [input, setInput] = useState('')
   const [inputBuf, setInputBuf] = useState<string[]>([])
   const [pasteSnips, setPasteSnips] = useState<PasteSnippet[]>([])
   const isBlocked = useStore($isBlocked)
+  const { querier } = useStdin() as { querier: Parameters<typeof readOsc52Clipboard>[0] }
 
   const { queueRef, queueEditRef, queuedDisplay, queueEditIdx, enqueue, dequeue, replaceQ, setQueueEdit, syncQueue } =
     useQueue()
@@ -31,19 +118,19 @@ export function useComposerState({ gw, onClipboardPaste, submitRef }: UseCompose
   const clearIn = useCallback(() => {
     setInput('')
     setInputBuf([])
+    setPasteSnips([])
     setQueueEdit(null)
     setHistoryIdx(null)
     historyDraftRef.current = ''
   }, [historyDraftRef, setQueueEdit, setHistoryIdx])
 
-  const handleTextPaste = useCallback(
-    ({ bracketed, cursor, hotkey, text, value }: PasteEvent) => {
-      if (hotkey) {
-        void onClipboardPaste(false)
-
-        return null
-      }
-
+  const handleResolvedPaste = useCallback(
+    async ({
+      bracketed,
+      cursor,
+      text,
+      value
+    }: Omit<PasteEvent, 'hotkey'>): Promise<null | { cursor: number; value: string }> => {
       const cleanedText = stripTrailingPasteNewlines(text)
 
       if (!cleanedText || !/[^\n]/.test(cleanedText)) {
@@ -54,6 +141,43 @@ export function useComposerState({ gw, onClipboardPaste, submitRef }: UseCompose
         return null
       }
 
+      const sid = getUiState().sid
+
+      if (sid && looksLikeDroppedPath(cleanedText)) {
+        try {
+          const attached = await gw.request<ImageAttachResponse>('image.attach', {
+            path: cleanedText,
+            session_id: sid
+          })
+
+          if (attached?.name) {
+            onImageAttached?.(attached)
+            const remainder = attached.remainder?.trim() ?? ''
+
+            if (!remainder) {
+              return { cursor, value }
+            }
+
+            return insertAtCursor(value, cursor, remainder)
+          }
+        } catch {
+          // Fall back to generic file-drop detection below.
+        }
+
+        try {
+          const dropped = await gw.request<InputDetectDropResponse>('input.detect_drop', {
+            session_id: sid,
+            text: cleanedText
+          })
+
+          if (dropped?.matched && dropped.text) {
+            return insertAtCursor(value, cursor, dropped.text)
+          }
+        } catch {
+          // Fall through to normal text paste behavior.
+        }
+      }
+
       const lineCount = cleanedText.split('\n').length
 
       if (cleanedText.length < LARGE_PASTE.chars && lineCount < LARGE_PASTE.lines) {
@@ -64,11 +188,9 @@ export function useComposerState({ gw, onClipboardPaste, submitRef }: UseCompose
       }
 
       const label = pasteTokenLabel(cleanedText, lineCount)
-      const lead = cursor > 0 && !/\s/.test(value[cursor - 1] ?? '') ? ' ' : ''
-      const tail = cursor < value.length && !/\s/.test(value[cursor] ?? '') ? ' ' : ''
-      const insert = `${lead}${label}${tail}`
+      const inserted = insertAtCursor(value, cursor, label)
 
-      setPasteSnips(prev => [...prev, { label, text: cleanedText }].slice(-32))
+      setPasteSnips(prev => trimSnips([...prev, { label, text: cleanedText }]))
 
       void gw
         .request<{ path?: string }>('paste.collapse', { text: cleanedText })
@@ -83,12 +205,52 @@ export function useComposerState({ gw, onClipboardPaste, submitRef }: UseCompose
         })
         .catch(() => {})
 
-      return {
-        cursor: cursor + insert.length,
-        value: value.slice(0, cursor) + insert + value.slice(cursor)
-      }
+      return inserted
     },
-    [gw, onClipboardPaste]
+    [gw, onClipboardPaste, onImageAttached]
+  )
+
+  const handleTextPaste = useCallback(
+    ({
+      bracketed,
+      cursor,
+      hotkey,
+      text,
+      value
+    }: PasteEvent): MaybePromise<null | { cursor: number; value: string }> => {
+      if (hotkey) {
+        const preferOsc52 = isRemoteShellSession(process.env)
+
+        const readPreferredText = preferOsc52
+          ? readOsc52Clipboard(querier).then(async osc52Text => {
+              if (isUsableClipboardText(osc52Text)) {
+                return osc52Text
+              }
+
+              return readClipboardText()
+            })
+          : readClipboardText().then(async clipText => {
+              if (isUsableClipboardText(clipText)) {
+                return clipText
+              }
+
+              return readOsc52Clipboard(querier)
+            })
+
+        return readPreferredText.then(async preferredText => {
+          if (isUsableClipboardText(preferredText)) {
+            return handleResolvedPaste({ bracketed: false, cursor, text: preferredText, value })
+          }
+
+          void onClipboardPaste(false)
+
+          return null
+        })
+      }
+
+      return handleResolvedPaste({ bracketed: !!bracketed, cursor, text, value })
+    },
+    [handleResolvedPaste, onClipboardPaste, querier]
   )
 
   const openEditor = useCallback(() => {
diff --git a/ui-tui/src/app/useInputHandlers.ts b/ui-tui/src/app/useInputHandlers.ts
index b71a1dc392..9d3ccdf09f 100644
--- a/ui-tui/src/app/useInputHandlers.ts
+++ b/ui-tui/src/app/useInputHandlers.ts
@@ -7,8 +7,7 @@ import type {
   SudoRespondResponse,
   VoiceRecordResponse
 } from '../gatewayTypes.js'
-
-import { writeOsc52Clipboard } from '../lib/osc52.js'
+import { isAction, isMac } from '../lib/platform.js'
 
 import { getInputSelection } from './inputSelectionStore.js'
 import type { InputHandlerContext, InputHandlerResult } from './interfaces.js'
@@ -28,6 +27,8 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
   const pagerPageSize = Math.max(5, (terminal.stdout?.rows ?? 24) - 6)
 
   const copySelection = () => {
+    // ink's copySelection() already calls setClipboard() which handles
+    // pbcopy (macOS), wl-copy/xclip (Linux), tmux, and OSC 52 fallback.
     const text = terminal.selection.copySelection()
 
     if (text) {
@@ -73,6 +74,10 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
     if (overlay.picker) {
       return patchOverlayState({ picker: false })
     }
+
+    if (overlay.agents) {
+      return patchOverlayState({ agents: false })
+    }
   }
 
   const cycleQueue = (dir: 1 | -1) => {
@@ -171,15 +176,73 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
     const live = getUiState()
 
     if (isBlocked) {
-      if (overlay.pager) {
-        if (key.return || ch === ' ') {
-          const nextOffset = overlay.pager.offset + pagerPageSize
+      // When approval/clarify/confirm overlays are active, their own useInput
+      // handlers must receive keystrokes (arrow keys, numbers, Enter).  Only
+      // intercept Ctrl+C here so the user can deny/dismiss — all other keys
+      // fall through to the component-level handlers.
+      if (overlay.approval || overlay.clarify || overlay.confirm) {
+        if (isCtrl(key, ch, 'c')) {
+          cancelOverlayFromCtrlC()
+        }
 
-          patchOverlayState({
-            pager: nextOffset >= overlay.pager.lines.length ? null : { ...overlay.pager, offset: nextOffset }
+        return
+      }
+
+      if (overlay.pager) {
+        if (key.escape || isCtrl(key, ch, 'c') || ch === 'q') {
+          return patchOverlayState({ pager: null })
+        }
+
+        const move = (delta: number | 'top' | 'bottom') =>
+          patchOverlayState(prev => {
+            if (!prev.pager) {
+              return prev
+            }
+
+            const { lines, offset } = prev.pager
+            const max = Math.max(0, lines.length - pagerPageSize)
+            const step = delta === 'top' ? -lines.length : delta === 'bottom' ? lines.length : delta
+            const next = Math.max(0, Math.min(offset + step, max))
+
+            return next === offset ? prev : { ...prev, pager: { ...prev.pager, offset: next } }
+          })
+
+        if (key.upArrow || ch === 'k') {
+          return move(-1)
+        }
+
+        if (key.downArrow || ch === 'j') {
+          return move(1)
+        }
+
+        if (key.pageUp || ch === 'b') {
+          return move(-pagerPageSize)
+        }
+
+        if (ch === 'g') {
+          return move('top')
+        }
+
+        if (ch === 'G') {
+          return move('bottom')
+        }
+
+        if (key.return || ch === ' ' || key.pageDown) {
+          patchOverlayState(prev => {
+            if (!prev.pager) {
+              return prev
+            }
+
+            const { lines, offset } = prev.pager
+            const max = Math.max(0, lines.length - pagerPageSize)
+
+            // Auto-close only when already at the last page — otherwise clamp
+            // to `max` so the offset matches what the line/page-back handlers
+            // can reach (prevents a snap-back jump on the next ↑/↓/PgUp).
+            return offset >= max
+              ? { ...prev, pager: null }
+              : { ...prev, pager: { ...prev.pager, offset: Math.min(offset + pagerPageSize, max) } }
           })
-        } else if (key.escape || isCtrl(key, ch, 'c') || ch === 'q') {
-          patchOverlayState({ pager: null })
         }
 
         return
@@ -225,27 +288,37 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
       return terminal.scrollWithSelection(key.pageUp ? -step : step)
     }
 
-    if (key.ctrl && key.shift && ch.toLowerCase() === 'c') {
-      return copySelection()
-    }
-
     if (key.escape && terminal.hasSelection) {
       return clearSelection()
     }
 
     if (key.upArrow && !cState.inputBuf.length) {
-      cycleQueue(1) || cycleHistory(-1)
+      const inputSel = getInputSelection()
+      const cursor = inputSel && inputSel.start === inputSel.end ? inputSel.start : null
 
-      return
+      const noLineAbove =
+        !cState.input || (cursor !== null && cState.input.lastIndexOf('\n', Math.max(0, cursor - 1)) < 0)
+
+      if (noLineAbove) {
+        cycleQueue(1) || cycleHistory(-1)
+
+        return
+      }
     }
 
     if (key.downArrow && !cState.inputBuf.length) {
-      cycleQueue(-1) || cycleHistory(1)
+      const inputSel = getInputSelection()
+      const cursor = inputSel && inputSel.start === inputSel.end ? inputSel.start : null
+      const noLineBelow = !cState.input || (cursor !== null && cState.input.indexOf('\n', cursor) < 0)
 
-      return
+      if (noLineBelow || cState.historyIdx !== null) {
+        cycleQueue(-1) || cycleHistory(1)
+
+        return
+      }
     }
 
-    if (isCtrl(key, ch, 'c')) {
+    if (isAction(key, ch, 'c')) {
       if (terminal.hasSelection) {
         return copySelection()
       }
@@ -253,12 +326,19 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
       const inputSel = getInputSelection()
 
       if (inputSel && inputSel.end > inputSel.start) {
-        writeOsc52Clipboard(inputSel.value.slice(inputSel.start, inputSel.end))
         inputSel.clear()
 
         return
       }
 
+      // On macOS, Cmd+C with no selection is a no-op (Ctrl+C below handles interrupt).
+      // On non-macOS, isAction uses Ctrl, so fall through to interrupt/clear/exit.
+      if (isMac) {
+        return
+      }
+    }
+
+    if (key.ctrl && ch.toLowerCase() === 'c') {
       if (live.busy && live.sid) {
         return turnController.interruptTurn({
           appendMessage: actions.appendMessage,
@@ -275,11 +355,11 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
       return actions.die()
     }
 
-    if (isCtrl(key, ch, 'd')) {
+    if (isAction(key, ch, 'd')) {
       return actions.die()
     }
 
-    if (isCtrl(key, ch, 'l')) {
+    if (isAction(key, ch, 'l')) {
       if (actions.guardBusySessionSwitch()) {
         return
       }
@@ -289,11 +369,11 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
       return actions.newSession()
     }
 
-    if (isCtrl(key, ch, 'b')) {
+    if (isAction(key, ch, 'b')) {
       return voice.recording ? voiceStop() : voiceStart()
     }
 
-    if (isCtrl(key, ch, 'g')) {
+    if (isAction(key, ch, 'g')) {
       return cActions.openEditor()
     }
 
@@ -312,7 +392,7 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
       return
     }
 
-    if (isCtrl(key, ch, 'k') && cRefs.queueRef.current.length && live.sid) {
+    if (isAction(key, ch, 'k') && cRefs.queueRef.current.length && live.sid) {
       const next = cActions.dequeue()
 
       if (next) {
diff --git a/ui-tui/src/app/useMainApp.ts b/ui-tui/src/app/useMainApp.ts
index fb48badea9..a415d34379 100644
--- a/ui-tui/src/app/useMainApp.ts
+++ b/ui-tui/src/app/useMainApp.ts
@@ -4,7 +4,7 @@ import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
 
 import { STARTUP_RESUME_ID } from '../config/env.js'
 import { MAX_HISTORY, WHEEL_SCROLL_STEP } from '../config/limits.js'
-import { imageTokenMeta } from '../domain/messages.js'
+import { attachedImageNotice, imageTokenMeta } from '../domain/messages.js'
 import { fmtCwdBranch } from '../domain/paths.js'
 import { type GatewayClient } from '../gatewayClient.js'
 import type {
@@ -16,6 +16,7 @@ import type {
 import { useGitBranch } from '../hooks/useGitBranch.js'
 import { useVirtualHistory } from '../hooks/useVirtualHistory.js'
 import { asRpcResult, rpcErrorMessage } from '../lib/rpc.js'
+import { terminalParityHints } from '../lib/terminalParity.js'
 import { buildToolTrailLine, sameToolTrailGroup, toolTrailLabel } from '../lib/text.js'
 import type { Msg, PanelSection, SlashCatalog } from '../types.js'
 
@@ -102,6 +103,7 @@ export function useMainApp(gw: GatewayClient) {
   const [voiceRecording, setVoiceRecording] = useState(false)
   const [voiceProcessing, setVoiceProcessing] = useState(false)
   const [sessionStartedAt, setSessionStartedAt] = useState(() => Date.now())
+  const [turnStartedAt, setTurnStartedAt] = useState<null | number>(null)
   const [goodVibesTick, setGoodVibesTick] = useState(0)
   const [bellOnComplete, setBellOnComplete] = useState(false)
 
@@ -116,6 +118,7 @@ export function useMainApp(gw: GatewayClient) {
   const onEventRef = useRef<(ev: GatewayEvent) => void>(() => {})
   const clipboardPasteRef = useRef<(quiet?: boolean) => Promise<void> | void>(() => {})
   const submitRef = useRef<(value: string) => void>(() => {})
+  const terminalHintsShownRef = useRef(new Set<string>())
   const historyItemsRef = useRef(historyItems)
   const lastUserMsgRef = useRef(lastUserMsg)
   const msgIdsRef = useRef(new WeakMap<Msg, string>())
@@ -135,12 +138,30 @@ export function useMainApp(gw: GatewayClient) {
   const composer = useComposerState({
     gw,
     onClipboardPaste: quiet => clipboardPasteRef.current(quiet),
+    onImageAttached: info => {
+      sys(attachedImageNotice(info))
+    },
     submitRef
   })
 
   const { actions: composerActions, refs: composerRefs, state: composerState } = composer
   const empty = !historyItems.some(msg => msg.kind !== 'intro')
 
+  useEffect(() => {
+    void terminalParityHints()
+      .then(hints => {
+        for (const hint of hints) {
+          if (terminalHintsShownRef.current.has(hint.key)) {
+            continue
+          }
+
+          terminalHintsShownRef.current.add(hint.key)
+          turnController.pushActivity(hint.message, hint.tone)
+        }
+      })
+      .catch(() => {})
+  }, [])
+
   const messageId = useCallback((msg: Msg) => {
     const hit = msgIdsRef.current.get(msg)
 
@@ -160,7 +181,7 @@ export function useMainApp(gw: GatewayClient) {
     [historyItems, messageId]
   )
 
-  const virtualHistory = useVirtualHistory(scrollRef, virtualRows)
+  const virtualHistory = useVirtualHistory(scrollRef, virtualRows, cols)
 
   const scrollWithSelection = useCallback(
     (delta: number) => {
@@ -283,6 +304,14 @@ export function useMainApp(gw: GatewayClient) {
     sys
   })
 
+  useEffect(() => {
+    if (ui.busy) {
+      setTurnStartedAt(prev => prev ?? Date.now())
+    } else {
+      setTurnStartedAt(null)
+    }
+  }, [ui.busy])
+
   useConfigSync({ gw, setBellOnComplete, setVoiceEnabled, sid: ui.sid })
 
   // ── Terminal tab title ─────────────────────────────────────────────
@@ -297,12 +326,20 @@ export function useMainApp(gw: GatewayClient) {
       return
     }
 
-    const onResize = () =>
-      rpc<TerminalResizeResponse>('terminal.resize', { cols: stdout.columns ?? 80, session_id: ui.sid })
+    let timer: ReturnType<typeof setTimeout> | undefined
+
+    const onResize = () => {
+      clearTimeout(timer)
+      timer = setTimeout(() => {
+        timer = undefined
+        void rpc<TerminalResizeResponse>('terminal.resize', { cols: stdout.columns ?? 80, session_id: ui.sid })
+      }, 100)
+    }
 
     stdout.on('resize', onResize)
 
     return () => {
+      clearTimeout(timer)
       stdout.off('resize', onResize)
     }
   }, [rpc, stdout, ui.sid])
@@ -380,12 +417,18 @@ export function useMainApp(gw: GatewayClient) {
     sys
   })
 
-  const prevSidRef = useRef<null | string>(null)
+  // Drain one queued message whenever the session settles (busy → false):
+  // agent turn ends, interrupt, shell.exec finishes, error recovered, or the
+  // session first comes up with pre-queued messages. Without this, shell.exec
+  // and error paths never emit message.complete, so anything enqueued while
+  // `!sleep` / a failed turn was running would stay stuck forever.
   useEffect(() => {
-    const prev = prevSidRef.current
-    prevSidRef.current = ui.sid
-
-    if (prev !== null || !ui.sid || ui.busy || composerRefs.queueEditRef.current !== null) {
+    if (
+      !ui.sid ||
+      ui.busy ||
+      composerRefs.queueEditRef.current !== null ||
+      composerRefs.queueRef.current.length === 0
+    ) {
       return
     }
 
@@ -416,7 +459,6 @@ export function useMainApp(gw: GatewayClient) {
   const onEvent = useMemo(
     () =>
       createGatewayEventHandler({
-        composer: { dequeue: composerActions.dequeue, queueEditRef: composerRefs.queueEditRef, sendQueued },
         gateway,
         session: {
           STARTUP_RESUME_ID,
@@ -432,11 +474,8 @@ export function useMainApp(gw: GatewayClient) {
     [
       appendMessage,
       bellOnComplete,
-      composerActions,
-      composerRefs,
       gateway,
       panel,
-      sendQueued,
       session.newSession,
       session.resetSession,
       session.resumeById,
@@ -451,6 +490,7 @@ export function useMainApp(gw: GatewayClient) {
     const handler = (ev: GatewayEvent) => onEventRef.current(ev)
 
     const exitHandler = () => {
+      turnController.reset()
       patchUiState({ busy: false, sid: null, status: 'gateway exited' })
       turnController.pushActivity('gateway exited · /logs to inspect', 'error')
       sys('error: gateway exited')
@@ -632,9 +672,21 @@ export function useMainApp(gw: GatewayClient) {
       showStickyPrompt: !!stickyPrompt,
       statusColor: statusColorOf(ui.status, ui.theme.color),
       stickyPrompt,
+      turnStartedAt: ui.sid ? turnStartedAt : null,
       voiceLabel: voiceRecording ? 'REC' : voiceProcessing ? 'STT' : `voice ${voiceEnabled ? 'on' : 'off'}`
     }),
-    [cwd, gitBranch, goodVibesTick, sessionStartedAt, stickyPrompt, ui, voiceEnabled, voiceProcessing, voiceRecording]
+    [
+      cwd,
+      gitBranch,
+      goodVibesTick,
+      sessionStartedAt,
+      stickyPrompt,
+      turnStartedAt,
+      ui,
+      voiceEnabled,
+      voiceProcessing,
+      voiceRecording
+    ]
   )
 
   const appTranscript = useMemo(
diff --git a/ui-tui/src/app/useSubmission.ts b/ui-tui/src/app/useSubmission.ts
index f8a40f5a08..f09dc36340 100644
--- a/ui-tui/src/app/useSubmission.ts
+++ b/ui-tui/src/app/useSubmission.ts
@@ -1,6 +1,6 @@
 import { type MutableRefObject, useCallback, useRef } from 'react'
 
-import { imageTokenMeta } from '../domain/messages.js'
+import { attachedImageNotice } from '../domain/messages.js'
 import { looksLikeSlashCommand } from '../domain/slash.js'
 import type { GatewayClient } from '../gatewayClient.js'
 import type { InputDetectDropResponse, PromptSubmitResponse, ShellExecResponse } from '../gatewayTypes.js'
@@ -83,9 +83,7 @@ export function useSubmission(opts: UseSubmissionOptions) {
           }
 
           if (r.is_image) {
-            const meta = imageTokenMeta(r)
-
-            turnController.pushActivity(`attached image: ${r.name}${meta ? ` · ${meta}` : ''}`)
+            turnController.pushActivity(attachedImageNotice(r))
           } else {
             turnController.pushActivity(`detected file: ${r.name}`)
           }
@@ -236,11 +234,11 @@ export function useSubmission(opts: UseSubmissionOptions) {
 
   const submit = useCallback(
     (value: string) => {
-      if (value.startsWith('/') && composerState.completions.length) {
+      if (composerState.completions.length) {
         const row = composerState.completions[composerState.compIdx]
 
         if (row?.text) {
-          const text = row.text.startsWith('/') && composerState.compReplace > 0 ? row.text.slice(1) : row.text
+          const text = value.startsWith('/') && row.text.startsWith('/') ? row.text.slice(1) : row.text
           const next = value.slice(0, composerState.compReplace) + text
 
           if (next !== value) {
diff --git a/ui-tui/src/components/agentsOverlay.tsx b/ui-tui/src/components/agentsOverlay.tsx
new file mode 100644
index 0000000000..a8ad917582
--- /dev/null
+++ b/ui-tui/src/components/agentsOverlay.tsx
@@ -0,0 +1,1064 @@
+import { Box, NoSelect, ScrollBox, type ScrollBoxHandle, Text, useInput, useStdout } from '@hermes/ink'
+import { useStore } from '@nanostores/react'
+import { type ReactNode, type RefObject, useEffect, useMemo, useRef, useState } from 'react'
+
+import {
+  $delegationState,
+  $overlaySectionsOpen,
+  applyDelegationStatus,
+  toggleOverlaySection
+} from '../app/delegationStore.js'
+import { patchOverlayState } from '../app/overlayStore.js'
+import { $spawnDiff, $spawnHistory, clearDiffPair, type SpawnSnapshot } from '../app/spawnHistoryStore.js'
+import { $turnState } from '../app/turnStore.js'
+import type { GatewayClient } from '../gatewayClient.js'
+import type { DelegationPauseResponse, DelegationStatusResponse, SubagentInterruptResponse } from '../gatewayTypes.js'
+import { asRpcResult } from '../lib/rpc.js'
+import {
+  buildSubagentTree,
+  descendantIds,
+  flattenTree,
+  fmtCost,
+  fmtDuration,
+  fmtTokens,
+  formatSummary,
+  hotnessBucket,
+  peakHotness,
+  sparkline,
+  topLevelSubagents,
+  treeTotals,
+  widthByDepth
+} from '../lib/subagentTree.js'
+import { compactPreview } from '../lib/text.js'
+import type { Theme } from '../theme.js'
+import type { SubagentNode, SubagentProgress } from '../types.js'
+
+// ── Types + lookup tables ────────────────────────────────────────────
+
+type SortMode = 'depth-first' | 'duration-desc' | 'status' | 'tools-desc'
+type FilterMode = 'all' | 'failed' | 'leaf' | 'running'
+type Status = SubagentProgress['status']
+
+const SORT_ORDER: readonly SortMode[] = ['depth-first', 'tools-desc', 'duration-desc', 'status']
+const FILTER_ORDER: readonly FilterMode[] = ['all', 'running', 'failed', 'leaf']
+
+const SORT_LABEL: Record<SortMode, string> = {
+  'depth-first': 'spawn order',
+  'duration-desc': 'slowest',
+  status: 'status',
+  'tools-desc': 'busiest'
+}
+
+const FILTER_LABEL: Record<FilterMode, string> = {
+  all: 'all',
+  failed: 'failed',
+  leaf: 'leaves',
+  running: 'running'
+}
+
+const STATUS_RANK: Record<Status, number> = {
+  failed: 0,
+  interrupted: 1,
+  running: 2,
+  queued: 3,
+  completed: 4
+}
+
+const SORT_COMPARATORS: Record<SortMode, (a: SubagentNode, b: SubagentNode) => number> = {
+  'depth-first': (a, b) => a.item.depth - b.item.depth || a.item.index - b.item.index,
+  'tools-desc': (a, b) => b.aggregate.totalTools - a.aggregate.totalTools,
+  'duration-desc': (a, b) => b.aggregate.totalDuration - a.aggregate.totalDuration,
+  status: (a, b) => STATUS_RANK[a.item.status] - STATUS_RANK[b.item.status]
+}
+
+const FILTER_PREDICATES: Record<FilterMode, (n: SubagentNode) => boolean> = {
+  all: () => true,
+  leaf: n => n.children.length === 0,
+  running: n => n.item.status === 'running' || n.item.status === 'queued',
+  failed: n => n.item.status === 'failed' || n.item.status === 'interrupted'
+}
+
+const STATUS_GLYPH: Record<Status, { color: (t: Theme) => string; glyph: string }> = {
+  running: { color: t => t.color.amber, glyph: '●' },
+  queued: { color: t => t.color.dim, glyph: '○' },
+  completed: { color: t => t.color.statusGood, glyph: '✓' },
+  interrupted: { color: t => t.color.warn, glyph: '■' },
+  failed: { color: t => t.color.error, glyph: '✗' }
+}
+
+// Heatmap palette — cold → hot, resolved against the active theme.
+const heatPalette = (t: Theme) => [t.color.bronze, t.color.amber, t.color.gold, t.color.warn, t.color.error]
+
+// ── Pure helpers ─────────────────────────────────────────────────────
+
+const fmtDur = (seconds?: number) => (seconds == null || seconds <= 0 ? '' : fmtDuration(seconds))
+const fmtElapsedLabel = (seconds: number) => (seconds < 0 ? '' : fmtDuration(seconds))
+
+const displayElapsedSeconds = (item: SubagentProgress, nowMs: number): number | null => {
+  if (item.durationSeconds != null) {
+    return item.durationSeconds
+  }
+
+  if (item.startedAt != null && (item.status === 'running' || item.status === 'queued')) {
+    return Math.max(0, (nowMs - item.startedAt) / 1000)
+  }
+
+  return null
+}
+
+const indentFor = (depth: number): string => '  '.repeat(Math.max(0, depth))
+const formatRowId = (n: number): string => String(n + 1).padStart(2, ' ')
+const cycle = <T,>(order: readonly T[], current: T): T => order[(order.indexOf(current) + 1) % order.length]!
+
+const statusGlyph = (item: SubagentProgress, t: Theme) => {
+  const g = STATUS_GLYPH[item.status]
+
+  return { color: g.color(t), glyph: g.glyph }
+}
+
+const prepareRows = (tree: SubagentNode[], sort: SortMode, filter: FilterMode): SubagentNode[] =>
+  tree.length === 0 ? [] : flattenTree([...tree].sort(SORT_COMPARATORS[sort])).filter(FILTER_PREDICATES[filter])
+
+const diffMetricLine = (name: string, a: number, b: number, fmt: (n: number) => string) => {
+  const d = b - a
+  const sign = d === 0 ? '' : d > 0 ? '+' : '-'
+
+  return `${name}: ${fmt(a)} → ${fmt(b)}  (${sign}${fmt(Math.abs(d)) || '0'})`
+}
+
+// ── Sub-components ───────────────────────────────────────────────────
+
+/** Polled on parent `tick` so accordions can resize the thumb without a scroll event. */
+function OverlayScrollbar({
+  scrollRef,
+  t,
+  tick
+}: {
+  scrollRef: RefObject<null | ScrollBoxHandle>
+  t: Theme
+  tick: number
+}) {
+  void tick // ensures re-render when the parent clock advances
+
+  const [hover, setHover] = useState(false)
+  const [grab, setGrab] = useState<null | number>(null)
+
+  const s = scrollRef.current
+  const vp = Math.max(0, s?.getViewportHeight() ?? 0)
+
+  if (!vp) {
+    return <Box width={1} />
+  }
+
+  const total = Math.max(vp, s?.getScrollHeight() ?? vp)
+  const scrollable = total > vp
+  const thumb = scrollable ? Math.max(1, Math.round((vp * vp) / total)) : vp
+  const travel = Math.max(1, vp - thumb)
+  const pos = Math.max(0, (s?.getScrollTop() ?? 0) + (s?.getPendingDelta() ?? 0))
+  const thumbTop = scrollable ? Math.round((pos / Math.max(1, total - vp)) * travel) : 0
+  const below = Math.max(0, vp - thumbTop - thumb)
+
+  const vBar = (n: number) => (n > 0 ? `${'│\n'.repeat(n - 1)}│` : '')
+  const thumbBody = `${'┃\n'.repeat(Math.max(0, thumb - 1))}┃`
+  const thumbColor = grab !== null ? t.color.gold : t.color.amber
+  const trackColor = hover ? t.color.bronze : t.color.dim
+
+  const jump = (row: number, offset: number) => {
+    if (!s || !scrollable) {
+      return
+    }
+
+    s.scrollTo(Math.round((Math.max(0, Math.min(travel, row - offset)) / travel) * Math.max(0, total - vp)))
+  }
+
+  return (
+    <Box
+      flexDirection="column"
+      onMouseDown={(e: { localRow?: number }) => {
+        const row = Math.max(0, Math.min(vp - 1, e.localRow ?? 0))
+        const off = row >= thumbTop && row < thumbTop + thumb ? row - thumbTop : Math.floor(thumb / 2)
+        setGrab(off)
+        jump(row, off)
+      }}
+      onMouseDrag={(e: { localRow?: number }) =>
+        jump(Math.max(0, Math.min(vp - 1, e.localRow ?? 0)), grab ?? Math.floor(thumb / 2))
+      }
+      onMouseEnter={() => setHover(true)}
+      onMouseLeave={() => setHover(false)}
+      onMouseUp={() => setGrab(null)}
+      width={1}
+    >
+      {!scrollable ? (
+        <Text color={trackColor} dim>
+          {vBar(vp)}
+        </Text>
+      ) : (
+        <>
+          {thumbTop > 0 ? (
+            <Text color={trackColor} dim={!hover}>
+              {vBar(thumbTop)}
+            </Text>
+          ) : null}
+
+          <Text color={thumbColor}>{thumbBody}</Text>
+
+          {below > 0 ? (
+            <Text color={trackColor} dim={!hover}>
+              {vBar(below)}
+            </Text>
+          ) : null}
+        </>
+      )}
+    </Box>
+  )
+}
+
+function GanttStrip({
+  cols,
+  cursor,
+  flatNodes,
+  maxRows,
+  now,
+  t
+}: {
+  cols: number
+  cursor: number
+  flatNodes: SubagentNode[]
+  maxRows: number
+  now: number
+  t: Theme
+}) {
+  const spans = flatNodes
+    .map((node, idx) => {
+      const started = node.item.startedAt ?? now
+
+      const ended =
+        node.item.durationSeconds != null && node.item.startedAt != null
+          ? node.item.startedAt + node.item.durationSeconds * 1000
+          : now
+
+      return { endAt: ended, idx, node, startAt: started }
+    })
+    .filter(s => s.endAt >= s.startAt)
+
+  if (!spans.length) {
+    return null
+  }
+
+  const globalStart = Math.min(...spans.map(s => s.startAt))
+  const globalEnd = Math.max(...spans.map(s => s.endAt))
+  const totalSpan = Math.max(1, globalEnd - globalStart)
+  const totalSeconds = (globalEnd - globalStart) / 1000
+
+  // 5-col id gutter ("  12  ") so the bar doesn't press against the id.
+  // 10-col right reserve: pad + up to `12m 30s`-style label without
+  // truncate-end against a full-width bar.
+  const idGutter = 5
+  const labelReserve = 10
+  const barWidth = Math.max(10, cols - idGutter - labelReserve)
+  const startIdx = Math.max(0, Math.min(Math.max(0, spans.length - maxRows), cursor - Math.floor(maxRows / 2)))
+  const shown = spans.slice(startIdx, startIdx + maxRows)
+
+  const bar = (startAt: number, endAt: number) => {
+    const s = Math.floor(((startAt - globalStart) / totalSpan) * barWidth)
+    const e = Math.min(barWidth, Math.ceil(((endAt - globalStart) / totalSpan) * barWidth))
+    const fill = Math.max(1, e - s)
+
+    return ' '.repeat(s) + '█'.repeat(fill) + ' '.repeat(Math.max(0, barWidth - s - fill))
+  }
+
+  const charStep = totalSeconds < 20 && barWidth > 20 ? 5 : 10
+
+  const ruler = Array.from({ length: barWidth }, (_, i) => {
+    if (i > 0 && i % 10 === 0) {
+      return '┼'
+    }
+
+    if (i > 0 && i % 5 === 0) {
+      return '·'
+    }
+
+    return '─'
+  }).join('')
+
+  const rulerLabels = (() => {
+    const chars = new Array(barWidth).fill(' ')
+
+    for (let pos = 0; pos < barWidth; pos += charStep) {
+      const secs = (pos / barWidth) * totalSeconds
+      const label = pos === 0 ? '0' : secs >= 1 ? `${Math.round(secs)}s` : `${secs.toFixed(1)}s`
+
+      for (let j = 0; j < label.length && pos + j < barWidth; j++) {
+        chars[pos + j] = label[j]!
+      }
+    }
+
+    return chars.join('')
+  })()
+
+  const windowLabel =
+    spans.length > maxRows ? `  (${startIdx + 1}-${Math.min(spans.length, startIdx + maxRows)}/${spans.length})` : ''
+
+  return (
+    <Box flexDirection="column" marginBottom={1}>
+      <Text color={t.color.dim}>
+        Timeline · {fmtElapsedLabel(Math.max(0, totalSeconds))}
+        {windowLabel}
+      </Text>
+
+      {shown.map(({ endAt, idx, node, startAt }) => {
+        const active = idx === cursor
+        const { color } = statusGlyph(node.item, t)
+        const accent = active ? t.color.amber : t.color.dim
+
+        const elSec = displayElapsedSeconds(node.item, now)
+        const elLabel = elSec != null ? fmtElapsedLabel(elSec) : ''
+
+        return (
+          <Text key={node.item.id} wrap="truncate-end">
+            <Text bold={active} color={accent}>
+              {formatRowId(idx)}
+              {'  '}
+            </Text>
+
+            <Text color={active ? t.color.amber : color}>{bar(startAt, endAt)}</Text>
+
+            {elLabel ? (
+              <Text color={accent}>
+                {'   '}
+                {elLabel}
+              </Text>
+            ) : null}
+          </Text>
+        )
+      })}
+
+      <Text color={t.color.dim} dim>
+        {'    '}
+        {ruler}
+      </Text>
+
+      {totalSeconds > 0 ? (
+        <Text color={t.color.dim} dim>
+          {'    '}
+          {rulerLabels}
+        </Text>
+      ) : null}
+    </Box>
+  )
+}
+
+function OverlaySection({
+  children,
+  count,
+  defaultOpen = false,
+  title,
+  t
+}: {
+  children: ReactNode
+  count?: number
+  defaultOpen?: boolean
+  title: string
+  t: Theme
+}) {
+  const openMap = useStore($overlaySectionsOpen)
+  const open = title in openMap ? openMap[title]! : defaultOpen
+
+  return (
+    <Box flexDirection="column" marginTop={1}>
+      <Box onClick={() => toggleOverlaySection(title, defaultOpen)}>
+        <Text color={t.color.label}>
+          <Text color={t.color.amber}>{open ? '▾ ' : '▸ '}</Text>
+          {title}
+          {typeof count === 'number' ? ` (${count})` : ''}
+        </Text>
+      </Box>
+
+      {open ? <Box flexDirection="column">{children}</Box> : null}
+    </Box>
+  )
+}
+
+function Field({ name, t, value }: { name: string; t: Theme; value: ReactNode }) {
+  return (
+    <Text wrap="truncate-end">
+      <Text color={t.color.label}>{name} · </Text>
+      <Text color={t.color.cornsilk}>{value}</Text>
+    </Text>
+  )
+}
+
+function Detail({ id, node, t }: { id?: string; node: SubagentNode; t: Theme }) {
+  const { aggregate: agg, item } = node
+  const { color, glyph } = statusGlyph(item, t)
+
+  const inputTokens = item.inputTokens ?? 0
+  const outputTokens = item.outputTokens ?? 0
+  const localTokens = inputTokens + outputTokens
+  const subtreeTokens = agg.inputTokens + agg.outputTokens - localTokens
+  const localCost = item.costUsd ?? 0
+  const subtreeCost = agg.costUsd - localCost
+
+  const filesRead = item.filesRead ?? []
+  const filesWritten = item.filesWritten ?? []
+  const outputTail = item.outputTail ?? []
+  // Tool calls: prefer the live stream; for archived / post-turn views
+  // that stream is often empty even when tool_count > 0, so fall back to
+  // the tool names captured in outputTail at subagent.complete time.
+  const toolLines = item.tools.length > 0 ? item.tools : outputTail.map(e => e.tool).filter(Boolean)
+
+  const filesOverflow = Math.max(0, filesRead.length - 8) + Math.max(0, filesWritten.length - 8)
+
+  return (
+    <Box flexDirection="column">
+      <Text bold color={t.color.cornsilk} wrap="wrap">
+        {id ? <Text color={t.color.amber}>#{id} </Text> : null}
+        <Text color={color}>{glyph}</Text> {item.goal}
+      </Text>
+
+      <Box flexDirection="column" marginTop={1}>
+        <Field name="depth" t={t} value={`${item.depth} · ${item.status}`} />
+        {item.model ? <Field name="model" t={t} value={item.model} /> : null}
+        {item.toolsets?.length ? <Field name="toolsets" t={t} value={item.toolsets.join(', ')} /> : null}
+        <Field name="tools" t={t} value={`${item.toolCount ?? 0} (subtree ${agg.totalTools})`} />
+        <Field
+          name="subtree"
+          t={t}
+          value={`${agg.descendantCount} agent${agg.descendantCount === 1 ? '' : 's'} · d${agg.maxDepthFromHere} · ⚡${agg.activeCount}`}
+        />
+        {item.durationSeconds ? <Field name="elapsed" t={t} value={fmtDur(item.durationSeconds)} /> : null}
+        {item.iteration != null ? <Field name="iteration" t={t} value={String(item.iteration)} /> : null}
+        {item.apiCalls ? <Field name="api calls" t={t} value={String(item.apiCalls)} /> : null}
+      </Box>
+
+      {localTokens > 0 || localCost > 0 ? (
+        <OverlaySection defaultOpen t={t} title="Budget">
+          {localTokens > 0 ? (
+            <Field
+              name="tokens"
+              t={t}
+              value={
+                <>
+                  {fmtTokens(inputTokens)} in · {fmtTokens(outputTokens)} out
+                  {item.reasoningTokens ? ` · ${fmtTokens(item.reasoningTokens)} reasoning` : ''}
+                </>
+              }
+            />
+          ) : null}
+
+          {localCost > 0 ? (
+            <Field
+              name="cost"
+              t={t}
+              value={
+                <>
+                  {fmtCost(localCost)}
+                  {subtreeCost >= 0.01 ? ` · subtree +${fmtCost(subtreeCost)}` : ''}
+                </>
+              }
+            />
+          ) : null}
+
+          {subtreeTokens > 0 ? <Field name="subtree tokens" t={t} value={`+${fmtTokens(subtreeTokens)}`} /> : null}
+        </OverlaySection>
+      ) : null}
+
+      {filesRead.length > 0 || filesWritten.length > 0 ? (
+        <OverlaySection count={filesRead.length + filesWritten.length} t={t} title="Files">
+          {filesWritten.slice(0, 8).map((p, i) => (
+            <Text color={t.color.statusGood} key={`w-${i}`} wrap="truncate-end">
+              +{p}
+            </Text>
+          ))}
+
+          {filesRead.slice(0, 8).map((p, i) => (
+            <Text color={t.color.cornsilk} key={`r-${i}`} wrap="truncate-end">
+              <Text color={t.color.dim}>·</Text> {p}
+            </Text>
+          ))}
+
+          {filesOverflow > 0 ? <Text color={t.color.dim}>…+{filesOverflow} more</Text> : null}
+        </OverlaySection>
+      ) : null}
+
+      {toolLines.length > 0 ? (
+        <OverlaySection count={toolLines.length} defaultOpen t={t} title="Tool calls">
+          {toolLines.map((line, i) => (
+            <Text color={t.color.cornsilk} key={i} wrap="wrap">
+              <Text color={t.color.dim}>·</Text> {line}
+            </Text>
+          ))}
+        </OverlaySection>
+      ) : null}
+
+      {outputTail.length > 0 ? (
+        <OverlaySection count={outputTail.length} defaultOpen t={t} title="Output">
+          {outputTail.map((entry, i) => (
+            <Text color={entry.isError ? t.color.error : t.color.cornsilk} key={i} wrap="wrap">
+              <Text bold color={entry.isError ? t.color.error : t.color.amber}>
+                {entry.tool}
+              </Text>{' '}
+              {entry.preview}
+            </Text>
+          ))}
+        </OverlaySection>
+      ) : null}
+
+      {item.notes.length ? (
+        <OverlaySection count={item.notes.length} t={t} title="Progress">
+          {item.notes.slice(-6).map((line, i) => (
+            <Text color={t.color.cornsilk} key={i} wrap="wrap">
+              <Text color={t.color.label}>·</Text> {line}
+            </Text>
+          ))}
+        </OverlaySection>
+      ) : null}
+
+      {item.summary ? (
+        <OverlaySection defaultOpen t={t} title="Summary">
+          <Text color={t.color.cornsilk} wrap="wrap">
+            {item.summary}
+          </Text>
+        </OverlaySection>
+      ) : null}
+    </Box>
+  )
+}
+
+function ListRow({
+  active,
+  index,
+  node,
+  peak,
+  t,
+  width
+}: {
+  active: boolean
+  index: number
+  node: SubagentNode
+  peak: number
+  t: Theme
+  width: number
+}) {
+  const { color, glyph } = statusGlyph(node.item, t)
+  const palette = heatPalette(t)
+  const heatIdx = hotnessBucket(node.aggregate.hotness, peak, palette.length)
+  const heatMarker = heatIdx >= 2 ? palette[heatIdx]! : null
+
+  const goal = compactPreview(node.item.goal || 'subagent', width - 28 - node.item.depth * 2)
+  const toolsCount = node.aggregate.totalTools > 0 ? ` ·${node.aggregate.totalTools}t` : ''
+  const kids = node.children.length ? ` ·${node.children.length}↓` : ''
+  const line = node.item.status === 'running' ? node.item.tools.at(-1) : undefined
+  const paren = line ? line.indexOf('(') : -1
+  const toolShort = line ? (paren > 0 ? line.slice(0, paren) : line).trim() : ''
+  const trailing = toolShort ? ` · ${compactPreview(toolShort, 14)}` : ''
+  const fg = active ? t.color.amber : t.color.cornsilk
+
+  return (
+    <Text bold={active} color={fg} inverse={active} wrap="truncate-end">
+      {' '}
+      <Text color={active ? fg : t.color.dim}>{formatRowId(index)} </Text>
+      {indentFor(node.item.depth)}
+      {heatMarker ? <Text color={heatMarker}>▍</Text> : null}
+      <Text color={active ? fg : color}>{glyph}</Text> {goal}
+      <Text color={active ? fg : t.color.dim}>
+        {toolsCount}
+        {kids}
+        {trailing}
+      </Text>
+    </Text>
+  )
+}
+
+function DiffPane({
+  label,
+  snapshot,
+  t,
+  totals,
+  width
+}: {
+  label: string
+  snapshot: SpawnSnapshot
+  t: Theme
+  totals: ReturnType<typeof treeTotals>
+  width: number
+}) {
+  return (
+    <Box flexDirection="column" width={width}>
+      <Text bold color={t.color.cornsilk}>
+        {label}
+      </Text>
+
+      <Text color={t.color.dim} wrap="truncate-end">
+        {snapshot.label}
+      </Text>
+
+      <Box marginTop={1}>
+        <Text color={t.color.dim} wrap="truncate-end">
+          {formatSummary(totals)}
+        </Text>
+      </Box>
+
+      <Box flexDirection="column" marginTop={1}>
+        {topLevelSubagents(snapshot.subagents)
+          .slice(0, 8)
+          .map(s => {
+            const { color, glyph } = statusGlyph(s, t)
+
+            return (
+              <Text color={t.color.dim} key={s.id} wrap="truncate-end">
+                <Text color={color}>{glyph}</Text> {s.goal || 'subagent'}
+              </Text>
+            )
+          })}
+      </Box>
+    </Box>
+  )
+}
+
+function DiffView({
+  cols,
+  onClose,
+  pair,
+  t
+}: {
+  cols: number
+  onClose: () => void
+  pair: { baseline: SpawnSnapshot; candidate: SpawnSnapshot }
+  t: Theme
+}) {
+  const aTotals = useMemo(() => treeTotals(buildSubagentTree(pair.baseline.subagents)), [pair.baseline])
+  const bTotals = useMemo(() => treeTotals(buildSubagentTree(pair.candidate.subagents)), [pair.candidate])
+  const paneWidth = Math.floor((cols - 4) / 2)
+
+  useInput((ch, key) => {
+    if (key.escape || ch === 'q') {
+      onClose()
+    }
+  })
+
+  const round = (n: number) => String(Math.round(n))
+  const sumTokens = (x: typeof aTotals) => x.inputTokens + x.outputTokens
+  const dollars = (n: number) => fmtCost(n) || '$0.00'
+
+  return (
+    <Box flexDirection="column" flexGrow={1} paddingX={1} paddingY={1}>
+      <Box flexDirection="column" marginBottom={1}>
+        <Text bold color={t.color.bronze}>
+          Replay diff
+        </Text>
+        <Text color={t.color.dim}>baseline vs candidate · esc/q close</Text>
+      </Box>
+
+      <Box flexDirection="row" marginBottom={1}>
+        <DiffPane label="A · baseline" snapshot={pair.baseline} t={t} totals={aTotals} width={paneWidth} />
+        <Box width={2} />
+        <DiffPane label="B · candidate" snapshot={pair.candidate} t={t} totals={bTotals} width={paneWidth} />
+      </Box>
+
+      <Box flexDirection="column" marginTop={1}>
+        <Text bold color={t.color.amber}>
+          Δ
+        </Text>
+
+        <Text color={t.color.cornsilk}>
+          {diffMetricLine('agents', aTotals.descendantCount, bTotals.descendantCount, round)}
+        </Text>
+        <Text color={t.color.cornsilk}>{diffMetricLine('tools', aTotals.totalTools, bTotals.totalTools, round)}</Text>
+        <Text color={t.color.cornsilk}>
+          {diffMetricLine('depth', aTotals.maxDepthFromHere, bTotals.maxDepthFromHere, round)}
+        </Text>
+        <Text color={t.color.cornsilk}>
+          {diffMetricLine('duration', aTotals.totalDuration, bTotals.totalDuration, n => `${n.toFixed(1)}s`)}
+        </Text>
+        <Text color={t.color.cornsilk}>
+          {diffMetricLine('tokens', sumTokens(aTotals), sumTokens(bTotals), fmtTokens)}
+        </Text>
+        <Text color={t.color.cornsilk}>{diffMetricLine('cost', aTotals.costUsd, bTotals.costUsd, dollars)}</Text>
+      </Box>
+    </Box>
+  )
+}
+
+// ── Main overlay ─────────────────────────────────────────────────────
+
+export function AgentsOverlay({ gw, initialHistoryIndex = 0, onClose, t }: AgentsOverlayProps) {
+  const turn = useStore($turnState)
+  const delegation = useStore($delegationState)
+  const history = useStore($spawnHistory)
+  const diffPair = useStore($spawnDiff)
+  const { stdout } = useStdout()
+
+  // historyIndex === 0: live turn.  1..N pulls the Nth-most-recent archived
+  // snapshot.  /replay passes N on open.
+  const [historyIndex, setHistoryIndex] = useState(() =>
+    Math.max(0, Math.min(history.length, Math.floor(initialHistoryIndex)))
+  )
+
+  const [sort, setSort] = useState<SortMode>('depth-first')
+  const [filter, setFilter] = useState<FilterMode>('all')
+  const [cursor, setCursor] = useState(0)
+  const [flash, setFlash] = useState<string>('')
+  const [now, setNow] = useState(() => Date.now())
+  // cc-style view switching: list = full-width row picker, detail = full-width
+  // scrollable pane.  Two panes side-by-side in Ink fought Yoga flex.
+  const [mode, setMode] = useState<'detail' | 'list'>('list')
+
+  const detailScrollRef = useRef<null | ScrollBoxHandle>(null)
+  const prevLiveCountRef = useRef(turn.subagents.length)
+
+  // ── Derived state ──────────────────────────────────────────────────
+
+  const activeSnapshot = historyIndex > 0 ? history[historyIndex - 1] : null
+  // Instant fallback to history[0] the moment the live list clears — avoids
+  // a one-frame "no subagents" flash while the auto-follow effect fires.
+  const justFinishedSnapshot = historyIndex === 0 && turn.subagents.length === 0 ? (history[0] ?? null) : null
+  const effectiveSnapshot = activeSnapshot ?? justFinishedSnapshot
+  const replayMode = effectiveSnapshot != null
+  const subagents = replayMode ? effectiveSnapshot.subagents : turn.subagents
+
+  const tree = useMemo(() => buildSubagentTree(subagents), [subagents])
+  const totals = useMemo(() => treeTotals(tree), [tree])
+  const widths = useMemo(() => widthByDepth(tree), [tree])
+  const spark = useMemo(() => sparkline(widths), [widths])
+  const peak = useMemo(() => peakHotness(tree), [tree])
+  const rows = useMemo(() => prepareRows(tree, sort, filter), [tree, sort, filter])
+
+  const selected = rows[cursor] ?? null
+
+  const cols = stdout?.columns ?? 80
+  const rowsH = Math.max(8, (stdout?.rows ?? 24) - 10)
+  const listWindowStart = Math.max(0, cursor - Math.floor(rowsH / 2))
+
+  // ── Effects ────────────────────────────────────────────────────────
+
+  useEffect(() => {
+    // Ticker drives both the live gantt and OverlayScrollbar content-reflow
+    // detection.  Slower in replay (nothing's growing) but not stopped
+    // because accordions still expand.
+    const id = setInterval(() => setNow(Date.now()), replayMode ? 300 : 500)
+
+    return () => clearInterval(id)
+  }, [replayMode])
+
+  useEffect(() => {
+    // Clamp stale index when history grows/shrinks beneath us.
+    if (historyIndex > history.length) {
+      setHistoryIndex(history.length)
+    }
+  }, [history.length, historyIndex])
+
+  useEffect(() => {
+    // Auto-follow the just-finished turn onto history[1] so the user isn't
+    // dropped into an empty live view.  Fires only when transitioning from
+    // "had live subagents" → "live empty" while in live mode.
+    const prev = prevLiveCountRef.current
+    prevLiveCountRef.current = turn.subagents.length
+
+    if (historyIndex === 0 && prev > 0 && turn.subagents.length === 0 && history.length > 0) {
+      setHistoryIndex(1)
+      setCursor(0)
+      setFlash('turn finished · inspect freely · q to close')
+    }
+  }, [history.length, historyIndex, turn.subagents.length])
+
+  useEffect(() => {
+    // Reset detail scroll on navigation so the top of the new node shows.
+    detailScrollRef.current?.scrollTo(0)
+  }, [cursor, historyIndex, mode])
+
+  useEffect(() => {
+    // Warm caps + paused flag on open.
+    gw.request<DelegationStatusResponse>('delegation.status', {})
+      .then(r => applyDelegationStatus(asRpcResult<DelegationStatusResponse>(r)))
+      .catch(() => {})
+  }, [gw])
+
+  useEffect(() => {
+    if (cursor >= rows.length) {
+      setCursor(Math.max(0, rows.length - 1))
+    }
+  }, [cursor, rows.length])
+
+  // ── Actions ────────────────────────────────────────────────────────
+
+  const guardLive = (action: () => void) => {
+    if (replayMode) {
+      setFlash('replay mode — controls disabled')
+    } else {
+      action()
+    }
+  }
+
+  const interrupt = (id: string) => gw.request<SubagentInterruptResponse>('subagent.interrupt', { subagent_id: id })
+
+  const killOne = (id: string) =>
+    guardLive(() => {
+      interrupt(id)
+        .then(raw => {
+          const r = asRpcResult<SubagentInterruptResponse>(raw)
+          setFlash(r?.found ? `killing ${id}` : `not found: ${id}`)
+        })
+        .catch(() => setFlash(`kill failed: ${id}`))
+    })
+
+  const killSubtree = (node: SubagentNode) =>
+    guardLive(() => {
+      const ids = [node.item.id, ...descendantIds(node)]
+      ids.forEach(id => interrupt(id).catch(() => {}))
+      setFlash(`killing subtree · ${ids.length} node${ids.length === 1 ? '' : 's'}`)
+    })
+
+  const togglePause = () =>
+    guardLive(() => {
+      gw.request<DelegationPauseResponse>('delegation.pause', { paused: !delegation.paused })
+        .then(raw => {
+          const r = asRpcResult<DelegationPauseResponse>(raw)
+          applyDelegationStatus({ paused: r?.paused })
+          setFlash(r?.paused ? 'spawning paused' : 'spawning resumed')
+        })
+        .catch(() => setFlash('pause failed'))
+    })
+
+  const stepHistory = (delta: -1 | 1) =>
+    setHistoryIndex(idx => {
+      const next = Math.max(0, Math.min(history.length, idx + delta))
+
+      if (next !== idx) {
+        setCursor(0)
+        setFlash(next === 0 ? 'live turn' : `replay · ${next}/${history.length}`)
+      }
+
+      return next
+    })
+
+  const closeWithCleanup = () => {
+    clearDiffPair()
+    onClose()
+  }
+
+  // ── Input ──────────────────────────────────────────────────────────
+
+  const detailPageSize = Math.max(4, rowsH - 2)
+  const wheelDetailDy = 3
+  const scrollDetail = (dy: number) => detailScrollRef.current?.scrollBy(dy)
+
+  useInput((ch, key) => {
+    if (ch === 'q') {
+      return closeWithCleanup()
+    }
+
+    if (key.escape) {
+      return mode === 'detail' ? setMode('list') : closeWithCleanup()
+    }
+
+    // Shared actions (both modes).
+    if (ch === '<' || ch === '[') {
+      return stepHistory(1)
+    }
+
+    if (ch === '>' || ch === ']') {
+      return stepHistory(-1)
+    }
+
+    if (ch === 'p') {
+      return togglePause()
+    }
+
+    if (ch === 'x' && selected) {
+      return killOne(selected.item.id)
+    }
+
+    if (ch === 'X' && selected) {
+      return killSubtree(selected)
+    }
+
+    if (mode === 'detail') {
+      if (key.leftArrow || ch === 'h') {
+        return setMode('list')
+      }
+
+      if (key.pageUp || (key.ctrl && ch === 'u')) {
+        return scrollDetail(-detailPageSize)
+      }
+
+      if (key.pageDown || (key.ctrl && ch === 'd')) {
+        return scrollDetail(detailPageSize)
+      }
+
+      if (key.wheelUp) {
+        return scrollDetail(-wheelDetailDy)
+      }
+
+      if (key.wheelDown) {
+        return scrollDetail(wheelDetailDy)
+      }
+
+      if (key.upArrow || ch === 'k') {
+        return scrollDetail(-2)
+      }
+
+      if (key.downArrow || ch === 'j') {
+        return scrollDetail(2)
+      }
+
+      if (ch === 'g') {
+        return detailScrollRef.current?.scrollTo(0)
+      }
+
+      if (ch === 'G') {
+        return detailScrollRef.current?.scrollToBottom?.()
+      }
+
+      return
+    }
+
+    // List mode.
+    if ((key.return || key.rightArrow || ch === 'l') && selected) {
+      return setMode('detail')
+    }
+
+    if (key.upArrow || ch === 'k' || key.wheelUp) {
+      return setCursor(c => Math.max(0, c - 1))
+    }
+
+    if (key.downArrow || ch === 'j' || key.wheelDown) {
+      return setCursor(c => Math.min(Math.max(0, rows.length - 1), c + 1))
+    }
+
+    if (ch === 'g') {
+      return setCursor(0)
+    }
+
+    if (ch === 'G') {
+      return setCursor(Math.max(0, rows.length - 1))
+    }
+
+    if (ch === 's') {
+      return setSort(m => cycle(SORT_ORDER, m))
+    }
+
+    if (ch === 'f') {
+      return setFilter(m => cycle(FILTER_ORDER, m))
+    }
+  })
+
+  // ── Header assembly ────────────────────────────────────────────────
+
+  const mix = Object.entries(
+    subagents.reduce<Record<string, number>>((acc, it) => {
+      const key = it.model ? it.model.split('/').pop()! : 'inherit'
+      acc[key] = (acc[key] ?? 0) + 1
+
+      return acc
+    }, {})
+  )
+    .sort((a, b) => b[1] - a[1])
+    .slice(0, 4)
+    .map(([k, v]) => `${k}×${v}`)
+    .join(' · ')
+
+  const capsLabel = delegation.maxSpawnDepth
+    ? `caps d${delegation.maxSpawnDepth}/${delegation.maxConcurrentChildren ?? '?'}`
+    : ''
+
+  const title =
+    replayMode && effectiveSnapshot
+      ? `${historyIndex > 0 ? `Replay ${historyIndex}/${history.length}` : 'Last turn'} · finished ${new Date(
+          effectiveSnapshot.finishedAt
+        ).toLocaleTimeString()}`
+      : `Spawn tree${delegation.paused ? ' · ⏸ paused' : ''}`
+
+  const metaLine = [formatSummary(totals), spark, capsLabel, mix ? `· ${mix}` : ''].filter(Boolean).join('  ')
+
+  const controlsHint = replayMode
+    ? ' · controls locked'
+    : ` · x kill · X subtree · p ${delegation.paused ? 'resume' : 'pause'}`
+
+  // ── Rendering ──────────────────────────────────────────────────────
+
+  if (diffPair) {
+    return <DiffView cols={cols} onClose={closeWithCleanup} pair={diffPair} t={t} />
+  }
+
+  return (
+    <Box alignItems="stretch" flexDirection="column" flexGrow={1} paddingX={1} paddingY={1}>
+      <Box flexDirection="column" marginBottom={1}>
+        <Text wrap="truncate-end">
+          <Text bold color={replayMode ? t.color.bronze : t.color.gold}>
+            {title}
+          </Text>
+          {metaLine ? (
+            <Text color={t.color.dim}>
+              {'   '}
+              {metaLine}
+            </Text>
+          ) : null}
+        </Text>
+      </Box>
+
+      {rows.length === 0 ? (
+        <Box flexDirection="column" flexGrow={1}>
+          <Text color={t.color.dim}>No subagents this turn. Trigger delegate_task to populate the tree.</Text>
+        </Box>
+      ) : mode === 'list' ? (
+        <Box flexDirection="column" flexGrow={1} flexShrink={1} minHeight={0}>
+          <GanttStrip cols={cols} cursor={cursor} flatNodes={rows} maxRows={6} now={now} t={t} />
+
+          <Box flexDirection="column" flexGrow={0} flexShrink={0} overflow="hidden">
+            {rows.slice(listWindowStart, listWindowStart + rowsH).map((node, i) => (
+              <ListRow
+                active={listWindowStart + i === cursor}
+                index={listWindowStart + i}
+                key={node.item.id}
+                node={node}
+                peak={peak}
+                t={t}
+                width={cols}
+              />
+            ))}
+          </Box>
+        </Box>
+      ) : (
+        <Box flexDirection="row" flexGrow={1} flexShrink={1} minHeight={0}>
+          <ScrollBox flexDirection="column" flexGrow={1} flexShrink={1} ref={detailScrollRef}>
+            <Box flexDirection="column" paddingBottom={4} paddingRight={1}>
+              {selected ? <Detail id={formatRowId(cursor).trim()} node={selected} t={t} /> : null}
+            </Box>
+          </ScrollBox>
+
+          <NoSelect flexShrink={0} marginLeft={1}>
+            <OverlayScrollbar scrollRef={detailScrollRef} t={t} tick={now} />
+          </NoSelect>
+        </Box>
+      )}
+
+      <Box flexDirection="column" marginTop={1}>
+        {flash ? <Text color={t.color.amber}>{flash}</Text> : null}
+
+        {mode === 'list' ? (
+          <Text color={t.color.dim}>
+            ↑↓/jk move · g/G top/bottom · Enter/→ open detail{controlsHint} · s sort:{SORT_LABEL[sort]} · f filter:
+            {FILTER_LABEL[filter]}
+            {history.length > 0 ? ` · [ / ] history ${historyIndex}/${history.length}` : ''}
+            {' · q close'}
+          </Text>
+        ) : (
+          <Text color={t.color.dim}>
+            ↑↓/jk scroll · PgUp/PgDn page · g/G top/bottom · Esc/← back to list{controlsHint} · q close
+          </Text>
+        )}
+      </Box>
+    </Box>
+  )
+}
+
+interface AgentsOverlayProps {
+  gw: GatewayClient
+  initialHistoryIndex?: number
+  onClose: () => void
+  t: Theme
+}
+
+export const closeAgentsOverlay = () => patchOverlayState({ agents: false })
+export const openAgentsOverlay = () => patchOverlayState({ agents: true })
diff --git a/ui-tui/src/components/appChrome.tsx b/ui-tui/src/components/appChrome.tsx
index 2f5f807dec..2fe2e6a5bf 100644
--- a/ui-tui/src/components/appChrome.tsx
+++ b/ui-tui/src/components/appChrome.tsx
@@ -1,10 +1,14 @@
 import { Box, type ScrollBoxHandle, Text } from '@hermes/ink'
-import { type ReactNode, type RefObject, useCallback, useEffect, useState, useSyncExternalStore } from 'react'
+import { useStore } from '@nanostores/react'
+import { type ReactNode, type RefObject, useCallback, useEffect, useMemo, useState, useSyncExternalStore } from 'react'
 
+import { $delegationState } from '../app/delegationStore.js'
+import { $turnState } from '../app/turnStore.js'
 import { FACES } from '../content/faces.js'
 import { VERBS } from '../content/verbs.js'
 import { fmtDuration } from '../domain/messages.js'
 import { stickyPromptFromViewport } from '../domain/viewport.js'
+import { buildSubagentTree, treeTotals, widthByDepth } from '../lib/subagentTree.js'
 import { fmtK } from '../lib/text.js'
 import type { Theme } from '../theme.js'
 import type { Msg, Usage } from '../types.js'
@@ -12,18 +16,23 @@ import type { Msg, Usage } from '../types.js'
 const FACE_TICK_MS = 2500
 const HEART_COLORS = ['#ff5fa2', '#ff4d6d']
 
-function FaceTicker({ color }: { color: string }) {
+function FaceTicker({ color, startedAt }: { color: string; startedAt?: null | number }) {
   const [tick, setTick] = useState(() => Math.floor(Math.random() * 1000))
+  const [now, setNow] = useState(() => Date.now())
 
   useEffect(() => {
-    const id = setInterval(() => setTick(n => n + 1), FACE_TICK_MS)
+    const face = setInterval(() => setTick(n => n + 1), FACE_TICK_MS)
+    const clock = setInterval(() => setNow(Date.now()), 1000)
 
-    return () => clearInterval(id)
+    return () => {
+      clearInterval(face)
+      clearInterval(clock)
+    }
   }, [])
 
   return (
     <Text color={color}>
-      {FACES[tick % FACES.length]} {VERBS[tick % VERBS.length]}…
+      {FACES[tick % FACES.length]} {VERBS[tick % VERBS.length]}…{startedAt ? ` · ${fmtDuration(now - startedAt)}` : ''}
     </Text>
   )
 }
@@ -55,6 +64,67 @@ function ctxBar(pct: number | undefined, w = 10) {
   return '█'.repeat(filled) + '░'.repeat(w - filled)
 }
 
+function SpawnHud({ t }: { t: Theme }) {
+  // Tight HUD that only appears when the session is actually fanning out.
+  // Colour escalates to warn/error as depth or concurrency approaches the cap.
+  const delegation = useStore($delegationState)
+  const turn = useStore($turnState)
+
+  const tree = useMemo(() => buildSubagentTree(turn.subagents), [turn.subagents])
+  const totals = useMemo(() => treeTotals(tree), [tree])
+
+  if (!totals.descendantCount && !delegation.paused) {
+    return null
+  }
+
+  const maxDepth = delegation.maxSpawnDepth
+  const maxConc = delegation.maxConcurrentChildren
+  const depth = Math.max(0, totals.maxDepthFromHere)
+  const active = totals.activeCount
+
+  // `max_concurrent_children` is a per-parent cap, not a global one.
+  // `activeCount` sums every running agent across the tree and would
+  // over-warn for multi-orchestrator runs.  The widest level of the tree
+  // is a closer proxy to "most concurrent spawns that could be hitting a
+  // single parent's slot budget".
+  const widestLevel = widthByDepth(tree).reduce((a, b) => Math.max(a, b), 0)
+  const depthRatio = maxDepth ? depth / maxDepth : 0
+  const concRatio = maxConc ? widestLevel / maxConc : 0
+  const ratio = Math.max(depthRatio, concRatio)
+
+  const color = delegation.paused || ratio >= 1 ? t.color.error : ratio >= 0.66 ? t.color.warn : t.color.dim
+
+  const pieces: string[] = []
+
+  if (delegation.paused) {
+    pieces.push('⏸ paused')
+  }
+
+  if (totals.descendantCount > 0) {
+    const depthLabel = maxDepth ? `${depth}/${maxDepth}` : `${depth}`
+    pieces.push(`d${depthLabel}`)
+
+    if (active > 0) {
+      // Label pairs the widest-level count (drives concRatio above) with
+      // the total active count for context.  `W/cap` triggers the warn,
+      // `+N` is everything else currently running across the tree.
+      const extra = Math.max(0, active - widestLevel)
+      const widthLabel = maxConc ? `${widestLevel}/${maxConc}` : `${widestLevel}`
+      const suffix = extra > 0 ? `+${extra}` : ''
+      pieces.push(`⚡${widthLabel}${suffix}`)
+    }
+  }
+
+  const atCap = depthRatio >= 1 || concRatio >= 1
+
+  return (
+    <Text color={color}>
+      {atCap ? ' │ ⚠ ' : ' │ '}
+      {pieces.join(' ')}
+    </Text>
+  )
+}
+
 function SessionDuration({ startedAt }: { startedAt: number }) {
   const [now, setNow] = useState(() => Date.now())
 
@@ -100,6 +170,7 @@ export function StatusRule({
   bgCount,
   sessionStartedAt,
   showCost,
+  turnStartedAt,
   voiceLabel,
   t
 }: StatusRuleProps) {
@@ -120,7 +191,11 @@ export function StatusRule({
       <Box flexShrink={1} width={leftWidth}>
         <Text color={t.color.bronze} wrap="truncate-end">
           {'─ '}
-          {busy ? <FaceTicker color={statusColor} /> : <Text color={statusColor}>{status}</Text>}
+          {busy ? (
+            <FaceTicker color={statusColor} startedAt={turnStartedAt} />
+          ) : (
+            <Text color={statusColor}>{status}</Text>
+          )}
           <Text color={t.color.dim}> │ {model}</Text>
           {ctxLabel ? <Text color={t.color.dim}> │ {ctxLabel}</Text> : null}
           {bar ? (
@@ -135,6 +210,7 @@ export function StatusRule({
               <SessionDuration startedAt={sessionStartedAt} />
             </Text>
           ) : null}
+          <SpawnHud t={t} />
           {voiceLabel ? <Text color={t.color.dim}> │ {voiceLabel}</Text> : null}
           {bgCount > 0 ? <Text color={t.color.dim}> │ {bgCount} bg</Text> : null}
           {showCost && typeof usage.cost_usd === 'number' ? (
@@ -288,11 +364,12 @@ interface StatusRuleProps {
   cols: number
   cwdLabel: string
   model: string
-  sessionStartedAt?: number | null
+  sessionStartedAt?: null | number
   showCost: boolean
   status: string
   statusColor: string
   t: Theme
+  turnStartedAt?: null | number
   usage: Usage
   voiceLabel?: string
 }
diff --git a/ui-tui/src/components/appLayout.tsx b/ui-tui/src/components/appLayout.tsx
index f13adf1bbd..959b6ea70c 100644
--- a/ui-tui/src/components/appLayout.tsx
+++ b/ui-tui/src/components/appLayout.tsx
@@ -2,13 +2,15 @@ import { AlternateScreen, Box, NoSelect, ScrollBox, Text } from '@hermes/ink'
 import { useStore } from '@nanostores/react'
 import { memo } from 'react'
 
+import { useGateway } from '../app/gatewayContext.js'
 import type { AppLayoutProgressProps, AppLayoutProps } from '../app/interfaces.js'
-import { $isBlocked } from '../app/overlayStore.js'
+import { $isBlocked, $overlayState, patchOverlayState } from '../app/overlayStore.js'
 import { $uiState } from '../app/uiStore.js'
 import { PLACEHOLDER } from '../content/placeholders.js'
 import type { Theme } from '../theme.js'
 import type { DetailsMode } from '../types.js'
 
+import { AgentsOverlay } from './agentsOverlay.js'
 import { GoodVibesHeart, StatusRule, StickyPromptTracker, TranscriptScrollbar } from './appChrome.js'
 import { FloatingOverlays, PromptZone } from './appOverlays.js'
 import { Banner, Panel, SessionPanel } from './branding.js'
@@ -194,6 +196,7 @@ const ComposerPane = memo(function ComposerPane({
             status={ui.status}
             statusColor={status.statusColor}
             t={ui.theme}
+            turnStartedAt={status.turnStartedAt}
             usage={ui.usage}
             voiceLabel={status.voiceLabel}
           />
@@ -255,6 +258,21 @@ const ComposerPane = memo(function ComposerPane({
   )
 })
 
+const AgentsOverlayPane = memo(function AgentsOverlayPane() {
+  const { gw } = useGateway()
+  const ui = useStore($uiState)
+  const overlay = useStore($overlayState)
+
+  return (
+    <AgentsOverlay
+      gw={gw}
+      initialHistoryIndex={overlay.agentsInitialHistoryIndex}
+      onClose={() => patchOverlayState({ agents: false, agentsInitialHistoryIndex: 0 })}
+      t={ui.theme}
+    />
+  )
+})
+
 export const AppLayout = memo(function AppLayout({
   actions,
   composer,
@@ -263,22 +281,30 @@ export const AppLayout = memo(function AppLayout({
   status,
   transcript
 }: AppLayoutProps) {
+  const overlay = useStore($overlayState)
+
   return (
     <AlternateScreen mouseTracking={mouseTracking}>
       <Box flexDirection="column" flexGrow={1}>
         <Box flexDirection="row" flexGrow={1}>
-          <TranscriptPane actions={actions} composer={composer} progress={progress} transcript={transcript} />
+          {overlay.agents ? (
+            <AgentsOverlayPane />
+          ) : (
+            <TranscriptPane actions={actions} composer={composer} progress={progress} transcript={transcript} />
+          )}
         </Box>
 
-        <PromptZone
-          cols={composer.cols}
-          onApprovalChoice={actions.answerApproval}
-          onClarifyAnswer={actions.answerClarify}
-          onSecretSubmit={actions.answerSecret}
-          onSudoSubmit={actions.answerSudo}
-        />
+        {!overlay.agents && (
+          <PromptZone
+            cols={composer.cols}
+            onApprovalChoice={actions.answerApproval}
+            onClarifyAnswer={actions.answerClarify}
+            onSecretSubmit={actions.answerSecret}
+            onSudoSubmit={actions.answerSudo}
+          />
+        )}
 
-        <ComposerPane actions={actions} composer={composer} status={status} />
+        {!overlay.agents && <ComposerPane actions={actions} composer={composer} status={status} />}
       </Box>
     </AlternateScreen>
   )
diff --git a/ui-tui/src/components/appOverlays.tsx b/ui-tui/src/components/appOverlays.tsx
index 844996af3f..331fb58733 100644
--- a/ui-tui/src/components/appOverlays.tsx
+++ b/ui-tui/src/components/appOverlays.tsx
@@ -13,6 +13,8 @@ import { ApprovalPrompt, ClarifyPrompt, ConfirmPrompt } from './prompts.js'
 import { SessionPicker } from './sessionPicker.js'
 import { SkillsHub } from './skillsHub.js'
 
+const COMPLETION_WINDOW = 16
+
 export function PromptZone({
   cols,
   onApprovalChoice,
@@ -106,7 +108,12 @@ export function FloatingOverlays({
     return null
   }
 
-  const start = Math.max(0, compIdx - 8)
+  // Fixed viewport centered on compIdx — previously the slice end was
+  // compIdx + 8 so the dropdown grew from 8 rows to 16 as the user scrolled
+  // down, bouncing the height on every keystroke.
+  const viewportSize = Math.min(COMPLETION_WINDOW, completions.length)
+
+  const start = Math.max(0, Math.min(compIdx - Math.floor(COMPLETION_WINDOW / 2), completions.length - viewportSize))
 
   return (
     <Box alignItems="flex-start" bottom="100%" flexDirection="column" left={0} position="absolute" right={0}>
@@ -157,8 +164,8 @@ export function FloatingOverlays({
             <Box marginTop={1}>
               <Text color={ui.theme.color.dim}>
                 {overlay.pager.offset + pagerPageSize < overlay.pager.lines.length
-                  ? `Enter/Space for more · q to close (${Math.min(overlay.pager.offset + pagerPageSize, overlay.pager.lines.length)}/${overlay.pager.lines.length})`
-                  : `end · q to close (${overlay.pager.lines.length} lines)`}
+                  ? `↑↓/jk line · Enter/Space/PgDn page · b/PgUp back · g/G top/bottom · q close (${Math.min(overlay.pager.offset + pagerPageSize, overlay.pager.lines.length)}/${overlay.pager.lines.length})`
+                  : `end · ↑↓/jk · b/PgUp back · g top · q close (${overlay.pager.lines.length} lines)`}
               </Text>
             </Box>
           </Box>
@@ -168,7 +175,7 @@ export function FloatingOverlays({
       {!!completions.length && (
         <FloatBox color={ui.theme.color.gold}>
           <Box flexDirection="column" width={Math.max(28, cols - 6)}>
-            {completions.slice(start, compIdx + 8).map((item, i) => {
+            {completions.slice(start, start + viewportSize).map((item, i) => {
               const active = start + i === compIdx
 
               return (
diff --git a/ui-tui/src/components/branding.tsx b/ui-tui/src/components/branding.tsx
index 919c34b612..5922e71ba7 100644
--- a/ui-tui/src/components/branding.tsx
+++ b/ui-tui/src/components/branding.tsx
@@ -161,16 +161,16 @@ export function SessionPanel({ info, sid, t }: SessionPanelProps) {
         </Text>
 
         {typeof info.update_behind === 'number' && info.update_behind > 0 && (
-          <Text bold color="yellow">
+          <Text bold color={t.color.warn}>
             ! {info.update_behind} {info.update_behind === 1 ? 'commit' : 'commits'} behind
-            <Text bold={false} color="yellow" dimColor>
+            <Text bold={false} color={t.color.warn} dimColor>
               {' '}
               - run{' '}
             </Text>
-            <Text bold color="yellow">
+            <Text bold color={t.color.warn}>
               {info.update_command || 'hermes update'}
             </Text>
-            <Text bold={false} color="yellow" dimColor>
+            <Text bold={false} color={t.color.warn} dimColor>
               {' '}
               to update
             </Text>
diff --git a/ui-tui/src/components/markdown.tsx b/ui-tui/src/components/markdown.tsx
index 5e1063837b..3fd1b494ac 100644
--- a/ui-tui/src/components/markdown.tsx
+++ b/ui-tui/src/components/markdown.tsx
@@ -1,130 +1,113 @@
 import { Box, Link, Text } from '@hermes/ink'
 import { memo, type ReactNode, useMemo } from 'react'
 
+import { ensureEmojiPresentation } from '../lib/emoji.js'
 import { highlightLine, isHighlightable } from '../lib/syntax.js'
 import type { Theme } from '../theme.js'
 
 const FENCE_RE = /^\s*(`{3,}|~{3,})(.*)$/
+const FENCE_CLOSE_RE = /^\s*(`{3,}|~{3,})\s*$/
 const HR_RE = /^ {0,3}([-*_])(?:\s*\1){2,}\s*$/
 const HEADING_RE = /^\s{0,3}(#{1,6})\s+(.*?)(?:\s+#+\s*)?$/
+const SETEXT_RE = /^\s{0,3}(=+|-+)\s*$/
 const FOOTNOTE_RE = /^\[\^([^\]]+)\]:\s*(.*)$/
 const DEF_RE = /^\s*:\s+(.+)$/
+const BULLET_RE = /^(\s*)[-+*]\s+(.*)$/
+const TASK_RE = /^\[( |x|X)\]\s+(.*)$/
+const NUMBERED_RE = /^(\s*)(\d+)[.)]\s+(.*)$/
+const QUOTE_RE = /^\s*(?:>\s*)+/
 const TABLE_DIVIDER_CELL_RE = /^:?-{3,}:?$/
 const MD_URL_RE = '((?:[^\\s()]|\\([^\\s()]*\\))+?)'
 
-const INLINE_RE = new RegExp(
-  `(!\\[(.*?)\\]\\(${MD_URL_RE}\\)|\\[(.+?)\\]\\(${MD_URL_RE}\\)|<((?:https?:\\/\\/|mailto:)[^>\\s]+|[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,})>|~~(.+?)~~|\`([^\\\`]+)\`|\\*\\*(.+?)\\*\\*|__(.+?)__|\\*(.+?)\\*|_(.+?)_|==(.+?)==|\\[\\^([^\\]]+)\\]|\\^([^^\\s][^^]*?)\\^|~([^~\\s][^~]*?)~|(https?:\\/\\/[^\\s<]+))`,
+export const MEDIA_LINE_RE = /^\s*[`"']?MEDIA:\s*(\S+?)[`"']?\s*$/
+export const AUDIO_DIRECTIVE_RE = /^\s*\[\[audio_as_voice\]\]\s*$/
+
+// Inline markdown tokens, in priority order. The outer regex picks the
+// leftmost match at each position, preferring earlier alternatives on tie —
+// so `**` must come before `*`, `__` before `_`, etc. Each pattern owns its
+// own capture groups; MdInline dispatches on which group matched.
+//
+// Subscript (`~x~`) is restricted to short alphanumeric runs so prose like
+// `thing ~! more ~?` from Kimi / Qwen / GLM (kaomoji-style decorators)
+// doesn't pair up the first `~` with the next one on the line and swallow
+// the text between them as a dim `_`-prefixed span.
+export const INLINE_RE = new RegExp(
+  [
+    `!\\[(.*?)\\]\\(${MD_URL_RE}\\)`, // 1,2  image
+    `\\[(.+?)\\]\\(${MD_URL_RE}\\)`, // 3,4  link
+    `<((?:https?:\\/\\/|mailto:)[^>\\s]+|[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,})>`, // 5   autolink
+    `~~(.+?)~~`, // 6    strike
+    `\`([^\\\`]+)\``, // 7    code
+    `\\*\\*(.+?)\\*\\*`, // 8    bold *
+    `(?<!\\w)__(.+?)__(?!\\w)`, // 9    bold _
+    `\\*(.+?)\\*`, // 10   italic *
+    `(?<!\\w)_(.+?)_(?!\\w)`, // 11   italic _
+    `==(.+?)==`, // 12   highlight
+    `\\[\\^([^\\]]+)\\]`, // 13   footnote ref
+    `\\^([^^\\s][^^]*?)\\^`, // 14   superscript
+    `~([A-Za-z0-9]{1,8})~`, // 15   subscript
+    `https?:\\/\\/[^\\s<]+` //  16   bare URL
+  ].join('|'),
   'g'
 )
 
-type Fence = {
-  char: '`' | '~'
-  lang: string
-  len: number
-}
+const indentDepth = (s: string) => Math.floor(s.replace(/\t/g, '  ').length / 2)
 
-const renderLink = (key: number, t: Theme, label: string, url: string) => (
-  <Link key={key} url={url}>
-    <Text color={t.color.amber} underline>
-      {label}
-    </Text>
-  </Link>
-)
-
-const trimBareUrl = (value: string) => {
-  const trimmed = value.replace(/[),.;:!?]+$/g, '')
-
-  return {
-    tail: value.slice(trimmed.length),
-    url: trimmed
-  }
-}
-
-const renderAutolink = (key: number, t: Theme, raw: string) => {
-  const url = raw.startsWith('mailto:') ? raw : raw.includes('@') && !raw.startsWith('http') ? `mailto:${raw}` : raw
-
-  return (
-    <Link key={key} url={url}>
-      <Text color={t.color.amber} underline>
-        {raw.replace(/^mailto:/, '')}
-      </Text>
-    </Link>
-  )
-}
-
-const indentDepth = (indent: string) => Math.floor(indent.replace(/\t/g, '  ').length / 2)
-
-const parseFence = (line: string): Fence | null => {
-  const m = line.match(FENCE_RE)
-
-  if (!m) {
-    return null
-  }
-
-  return {
-    char: m[1]![0] as '`' | '~',
-    lang: m[2]!.trim().toLowerCase(),
-    len: m[1]!.length
-  }
-}
-
-const isFenceClose = (line: string, fence: Fence) => {
-  const end = line.match(/^\s*(`{3,}|~{3,})\s*$/)
-
-  return Boolean(end && end[1]![0] === fence.char && end[1]!.length >= fence.len)
-}
-
-const isMarkdownFence = (lang: string) => ['md', 'markdown'].includes(lang)
-
-const splitTableRow = (row: string) =>
+const splitRow = (row: string) =>
   row
     .trim()
     .replace(/^\|/, '')
     .replace(/\|$/, '')
     .split('|')
-    .map(cell => cell.trim())
+    .map(c => c.trim())
 
 const isTableDivider = (row: string) => {
-  const cells = splitTableRow(row)
+  const cells = splitRow(row)
 
-  return cells.length > 1 && cells.every(cell => TABLE_DIVIDER_CELL_RE.test(cell))
+  return cells.length > 1 && cells.every(c => TABLE_DIVIDER_CELL_RE.test(c))
 }
 
-const stripInlineMarkup = (value: string) =>
-  value
+const autolinkUrl = (raw: string) =>
+  raw.startsWith('mailto:') || raw.startsWith('http') || !raw.includes('@') ? raw : `mailto:${raw}`
+
+const renderAutolink = (k: number, t: Theme, raw: string) => (
+  <Link key={k} url={autolinkUrl(raw)}>
+    <Text color={t.color.amber} underline>
+      {raw.replace(/^mailto:/, '')}
+    </Text>
+  </Link>
+)
+
+export const stripInlineMarkup = (v: string) =>
+  v
     .replace(/!\[(.*?)\]\(((?:[^\s()]|\([^\s()]*\))+?)\)/g, '[image: $1] $2')
     .replace(/\[(.+?)\]\(((?:[^\s()]|\([^\s()]*\))+?)\)/g, '$1')
     .replace(/<((?:https?:\/\/|mailto:)[^>\s]+|[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,})>/g, '$1')
     .replace(/~~(.+?)~~/g, '$1')
     .replace(/`([^`]+)`/g, '$1')
     .replace(/\*\*(.+?)\*\*/g, '$1')
-    .replace(/__(.+?)__/g, '$1')
+    .replace(/(?<!\w)__(.+?)__(?!\w)/g, '$1')
     .replace(/\*(.+?)\*/g, '$1')
-    .replace(/_(.+?)_/g, '$1')
+    .replace(/(?<!\w)_(.+?)_(?!\w)/g, '$1')
     .replace(/==(.+?)==/g, '$1')
     .replace(/\[\^([^\]]+)\]/g, '[$1]')
     .replace(/\^([^^\s][^^]*?)\^/g, '^$1')
-    .replace(/~([^~\s][^~]*?)~/g, '_$1')
+    .replace(/~([A-Za-z0-9]{1,8})~/g, '_$1')
 
-const renderTable = (key: number, rows: string[][], t: Theme) => {
+const renderTable = (k: number, rows: string[][], t: Theme) => {
   const widths = rows[0]!.map((_, ci) => Math.max(...rows.map(r => stripInlineMarkup(r[ci] ?? '').length)))
 
   return (
-    <Box flexDirection="column" key={key} paddingLeft={2}>
+    <Box flexDirection="column" key={k} paddingLeft={2}>
       {rows.map((row, ri) => (
         <Box key={ri}>
-          {widths.map((width, ci) => {
-            const cell = row[ci] ?? ''
-            const pad = ' '.repeat(Math.max(0, width - stripInlineMarkup(cell).length))
-
-            return (
-              <Text color={ri === 0 ? t.color.amber : undefined} key={ci}>
-                <MdInline t={t} text={cell} />
-                {pad}
-                {ci < widths.length - 1 ? '  ' : ''}
-              </Text>
-            )
-          })}
+          {widths.map((w, ci) => (
+            <Text color={ri === 0 ? t.color.amber : undefined} key={ci}>
+              <MdInline t={t} text={row[ci] ?? ''} />
+              {' '.repeat(Math.max(0, w - stripInlineMarkup(row[ci] ?? '').length))}
+              {ci < widths.length - 1 ? '  ' : ''}
+            </Text>
+          ))}
         </Box>
       ))}
     </Box>
@@ -138,76 +121,85 @@ function MdInline({ t, text }: { t: Theme; text: string }) {
 
   for (const m of text.matchAll(INLINE_RE)) {
     const i = m.index ?? 0
+    const k = parts.length
 
     if (i > last) {
-      parts.push(<Text key={parts.length}>{text.slice(last, i)}</Text>)
+      parts.push(<Text key={k}>{text.slice(last, i)}</Text>)
     }
 
-    if (m[2] && m[3]) {
+    if (m[1] && m[2]) {
       parts.push(
         <Text color={t.color.dim} key={parts.length}>
-          [image: {m[2]}] {m[3]}
+          [image: {m[1]}] {m[2]}
         </Text>
       )
-    } else if (m[4] && m[5]) {
-      parts.push(renderLink(parts.length, t, m[4], m[5]))
+    } else if (m[3] && m[4]) {
+      parts.push(
+        <Link key={parts.length} url={m[4]}>
+          <Text color={t.color.amber} underline>
+            {m[3]}
+          </Text>
+        </Link>
+      )
+    } else if (m[5]) {
+      parts.push(renderAutolink(parts.length, t, m[5]))
     } else if (m[6]) {
-      parts.push(renderAutolink(parts.length, t, m[6]))
-    } else if (m[7]) {
       parts.push(
         <Text key={parts.length} strikethrough>
+          {m[6]}
+        </Text>
+      )
+    } else if (m[7]) {
+      parts.push(
+        <Text color={t.color.amber} dimColor key={parts.length}>
           {m[7]}
         </Text>
       )
-    } else if (m[8]) {
-      parts.push(
-        <Text color={t.color.amber} dimColor key={parts.length}>
-          {m[8]}
-        </Text>
-      )
-    } else if (m[9] || m[10]) {
+    } else if (m[8] ?? m[9]) {
       parts.push(
         <Text bold key={parts.length}>
-          {m[9] ?? m[10]}
+          {m[8] ?? m[9]}
         </Text>
       )
-    } else if (m[11] || m[12]) {
+    } else if (m[10] ?? m[11]) {
       parts.push(
         <Text italic key={parts.length}>
-          {m[11] ?? m[12]}
+          {m[10] ?? m[11]}
+        </Text>
+      )
+    } else if (m[12]) {
+      parts.push(
+        <Text backgroundColor={t.color.diffAdded} color={t.color.diffAddedWord} key={parts.length}>
+          {m[12]}
         </Text>
       )
     } else if (m[13]) {
       parts.push(
-        <Text backgroundColor={t.color.diffAdded} color={t.color.diffAddedWord} key={parts.length}>
-          {m[13]}
+        <Text color={t.color.dim} key={parts.length}>
+          [{m[13]}]
         </Text>
       )
     } else if (m[14]) {
       parts.push(
         <Text color={t.color.dim} key={parts.length}>
-          [{m[14]}]
+          ^{m[14]}
         </Text>
       )
     } else if (m[15]) {
       parts.push(
         <Text color={t.color.dim} key={parts.length}>
-          ^{m[15]}
+          _{m[15]}
         </Text>
       )
     } else if (m[16]) {
-      parts.push(
-        <Text color={t.color.dim} key={parts.length}>
-          _{m[16]}
-        </Text>
-      )
-    } else if (m[17]) {
-      const { tail, url } = trimBareUrl(m[17])
+      // Bare URL — trim trailing prose punctuation into a sibling text node
+      // so `see https://x.com/, which…` keeps the comma outside the link.
+      const url = m[16].replace(/[),.;:!?]+$/g, '')
 
       parts.push(renderAutolink(parts.length, t, url))
 
-      if (tail) {
-        parts.push(<Text key={parts.length}>{tail}</Text>)
+      if (url.length < m[16].length) {
+        parts.push(<Text key={parts.length}>{m[16].slice(url.length)}</Text>)
       }
     }
 
@@ -221,19 +213,13 @@ function MdInline({ t, text }: { t: Theme; text: string }) {
   return <Text>{parts.length ? parts : <Text>{text}</Text>}</Text>
 }
 
-interface MdProps {
-  compact?: boolean
-  t: Theme
-  text: string
-}
-
 function MdImpl({ compact, t, text }: MdProps) {
   const nodes = useMemo(() => {
-    const lines = text.split('\n')
+    const lines = ensureEmojiPresentation(text).split('\n')
     const nodes: ReactNode[] = []
-    let i = 0
 
-    let prevKind: 'blank' | 'code' | 'heading' | 'list' | 'paragraph' | 'quote' | 'rule' | 'table' | null = null
+    let prevKind: Kind = null
+    let i = 0
 
     const gap = () => {
       if (nodes.length && prevKind !== 'blank') {
@@ -242,7 +228,7 @@ function MdImpl({ compact, t, text }: MdProps) {
       }
     }
 
-    const start = (kind: Exclude<typeof prevKind, null | 'blank'>) => {
+    const start = (kind: Exclude<Kind, null | 'blank'>) => {
       if (prevKind && prevKind !== 'blank' && prevKind !== kind) {
         gap()
       }
@@ -254,26 +240,57 @@ function MdImpl({ compact, t, text }: MdProps) {
       const line = lines[i]!
       const key = nodes.length
 
-      if (compact && !line.trim()) {
-        i++
-
-        continue
-      }
-
       if (!line.trim()) {
-        gap()
+        if (!compact) {
+          gap()
+        }
+
         i++
 
         continue
       }
 
-      const fence = parseFence(line)
+      if (AUDIO_DIRECTIVE_RE.test(line)) {
+        i++
+
+        continue
+      }
+
+      const media = line.match(MEDIA_LINE_RE)?.[1]
+
+      if (media) {
+        start('paragraph')
+        nodes.push(
+          <Text color={t.color.dim} key={key}>
+            {'▸ '}
+
+            <Link url={/^(?:\/|[a-z]:[\\/])/i.test(media) ? `file://${media}` : media}>
+              <Text color={t.color.amber} underline>
+                {media}
+              </Text>
+            </Link>
+          </Text>
+        )
+        i++
+
+        continue
+      }
+
+      const fence = line.match(FENCE_RE)
 
       if (fence) {
+        const char = fence[1]![0] as '`' | '~'
+        const len = fence[1]!.length
+        const lang = fence[2]!.trim().toLowerCase()
         const block: string[] = []
-        const lang = fence.lang
 
-        for (i++; i < lines.length && !isFenceClose(lines[i]!, fence); i++) {
+        for (i++; i < lines.length; i++) {
+          const close = lines[i]!.match(FENCE_CLOSE_RE)?.[1]
+
+          if (close && close[0] === char && close.length >= len) {
+            break
+          }
+
           block.push(lines[i]!)
         }
 
@@ -281,7 +298,7 @@ function MdImpl({ compact, t, text }: MdProps) {
           i++
         }
 
-        if (isMarkdownFence(lang)) {
+        if (['md', 'markdown'].includes(lang)) {
           start('paragraph')
           nodes.push(<Md compact={compact} key={key} t={t} text={block.join('\n')} />)
 
@@ -296,17 +313,18 @@ function MdImpl({ compact, t, text }: MdProps) {
         nodes.push(
           <Box flexDirection="column" key={key} paddingLeft={2}>
             {lang && !isDiff && <Text color={t.color.dim}>{'─ ' + lang}</Text>}
+
             {block.map((l, j) => {
               if (highlighted) {
                 return (
                   <Text key={j}>
-                    {highlightLine(l, lang, t).map(([color, text], k) =>
+                    {highlightLine(l, lang, t).map(([color, text], kk) =>
                       color ? (
-                        <Text color={color} key={k}>
+                        <Text color={color} key={kk}>
                           {text}
                         </Text>
                       ) : (
-                        <Text key={k}>{text}</Text>
+                        <Text key={kk}>{text}</Text>
                       )
                     )}
                   </Text>
@@ -352,6 +370,7 @@ function MdImpl({ compact, t, text }: MdProps) {
         nodes.push(
           <Box flexDirection="column" key={key} paddingLeft={2}>
             <Text color={t.color.dim}>─ math</Text>
+
             {block.map((l, j) => (
               <Text color={t.color.amber} key={j}>
                 {l}
@@ -363,13 +382,13 @@ function MdImpl({ compact, t, text }: MdProps) {
         continue
       }
 
-      const heading = line.match(HEADING_RE)
+      const heading = line.match(HEADING_RE)?.[2]
 
       if (heading) {
         start('heading')
         nodes.push(
           <Text bold color={t.color.amber} key={key}>
-            {heading[2]}
+            {heading}
           </Text>
         )
         i++
@@ -377,20 +396,16 @@ function MdImpl({ compact, t, text }: MdProps) {
         continue
       }
 
-      if (i + 1 < lines.length && line.trim()) {
-        const setext = lines[i + 1]!.match(/^\s{0,3}(=+|-+)\s*$/)
+      if (i + 1 < lines.length && SETEXT_RE.test(lines[i + 1]!)) {
+        start('heading')
+        nodes.push(
+          <Text bold color={t.color.amber} key={key}>
+            {line.trim()}
+          </Text>
+        )
+        i += 2
 
-        if (setext) {
-          start('heading')
-          nodes.push(
-            <Text bold color={t.color.amber} key={key}>
-              {line.trim()}
-            </Text>
-          )
-          i += 2
-
-          continue
-        }
+        continue
       }
 
       if (HR_RE.test(line)) {
@@ -440,7 +455,7 @@ function MdImpl({ compact, t, text }: MdProps) {
         i++
 
         while (i < lines.length) {
-          const def = lines[i]!.match(DEF_RE)
+          const def = lines[i]!.match(DEF_RE)?.[1]
 
           if (!def) {
             break
@@ -449,7 +464,7 @@ function MdImpl({ compact, t, text }: MdProps) {
           nodes.push(
             <Text key={`${key}-def-${i}`}>
               <Text color={t.color.dim}> · </Text>
-              <MdInline t={t} text={def[1]!} />
+              <MdInline t={t} text={def} />
             </Text>
           )
           i++
@@ -458,22 +473,22 @@ function MdImpl({ compact, t, text }: MdProps) {
         continue
       }
 
-      const bullet = line.match(/^(\s*)[-+*]\s+(.*)$/)
+      const bullet = line.match(BULLET_RE)
 
       if (bullet) {
         start('list')
-        const depth = indentDepth(bullet[1]!)
-        const task = bullet[2]!.match(/^\[( |x|X)\]\s+(.*)$/)
+
+        const task = bullet[2]!.match(TASK_RE)
         const marker = task ? (task[1]!.toLowerCase() === 'x' ? '☑' : '☐') : '•'
-        const body = task ? task[2]! : bullet[2]!
 
         nodes.push(
           <Text key={key}>
             <Text color={t.color.dim}>
-              {' '.repeat(depth * 2)}
+              {' '.repeat(indentDepth(bullet[1]!) * 2)}
               {marker}{' '}
             </Text>
-            <MdInline t={t} text={body} />
+
+            <MdInline t={t} text={task ? task[2]! : bullet[2]!} />
           </Text>
         )
         i++
@@ -481,18 +496,17 @@ function MdImpl({ compact, t, text }: MdProps) {
         continue
       }
 
-      const numbered = line.match(/^(\s*)(\d+)[.)]\s+(.*)$/)
+      const numbered = line.match(NUMBERED_RE)
 
       if (numbered) {
         start('list')
-        const depth = indentDepth(numbered[1]!)
-
         nodes.push(
           <Text key={key}>
             <Text color={t.color.dim}>
-              {' '.repeat(depth * 2)}
+              {' '.repeat(indentDepth(numbered[1]!) * 2)}
               {numbered[2]}.{' '}
             </Text>
+
             <MdInline t={t} text={numbered[3]!} />
           </Text>
         )
@@ -501,18 +515,15 @@ function MdImpl({ compact, t, text }: MdProps) {
         continue
       }
 
-      if (/^\s*(?:>\s*)+/.test(line)) {
+      if (QUOTE_RE.test(line)) {
         start('quote')
+
         const quoteLines: Array<{ depth: number; text: string }> = []
 
-        while (i < lines.length && /^\s*(?:>\s*)+/.test(lines[i]!)) {
-          const raw = lines[i]!
-          const prefix = raw.match(/^\s*(?:>\s*)+/)?.[0] ?? ''
+        while (i < lines.length && QUOTE_RE.test(lines[i]!)) {
+          const prefix = lines[i]!.match(QUOTE_RE)?.[0] ?? ''
 
-          quoteLines.push({
-            depth: (prefix.match(/>/g) ?? []).length,
-            text: raw.slice(prefix.length)
-          })
+          quoteLines.push({ depth: (prefix.match(/>/g) ?? []).length, text: lines[i]!.slice(prefix.length) })
           i++
         }
 
@@ -533,34 +544,31 @@ function MdImpl({ compact, t, text }: MdProps) {
 
       if (line.includes('|') && i + 1 < lines.length && isTableDivider(lines[i + 1]!)) {
         start('table')
-        const tableRows: string[][] = []
 
-        tableRows.push(splitTableRow(line))
-        i += 2
+        const rows: string[][] = [splitRow(line)]
 
-        while (i < lines.length && lines[i]!.includes('|') && lines[i]!.trim()) {
-          tableRows.push(splitTableRow(lines[i]!))
-          i++
+        for (i += 2; i < lines.length && lines[i]!.includes('|') && lines[i]!.trim(); i++) {
+          rows.push(splitRow(lines[i]!))
         }
 
-        nodes.push(renderTable(key, tableRows, t))
+        nodes.push(renderTable(key, rows, t))
 
         continue
       }
 
-      if (/^<details\b/i.test(line) || /^<\/details>/i.test(line)) {
+      if (/^<\/?details\b/i.test(line)) {
         i++
 
         continue
       }
 
-      const summary = line.match(/^<summary>(.*?)<\/summary>$/i)
+      const summary = line.match(/^<summary>(.*?)<\/summary>$/i)?.[1]
 
       if (summary) {
         start('paragraph')
         nodes.push(
           <Text color={t.color.dim} key={key}>
-            ▶ {summary[1]}
+            ▶ {summary}
           </Text>
         )
         i++
@@ -582,20 +590,21 @@ function MdImpl({ compact, t, text }: MdProps) {
 
       if (line.includes('|') && line.trim().startsWith('|')) {
         start('table')
-        const tableRows: string[][] = []
+
+        const rows: string[][] = []
 
         while (i < lines.length && lines[i]!.trim().startsWith('|')) {
           const row = lines[i]!.trim()
 
           if (!/^[|\s:-]+$/.test(row)) {
-            tableRows.push(splitTableRow(row))
+            rows.push(splitRow(row))
           }
 
           i++
         }
 
-        if (tableRows.length) {
-          nodes.push(renderTable(key, tableRows, t))
+        if (rows.length) {
+          nodes.push(renderTable(key, rows, t))
         }
 
         continue
@@ -603,7 +612,6 @@ function MdImpl({ compact, t, text }: MdProps) {
 
       start('paragraph')
       nodes.push(<MdInline key={key} t={t} text={line} />)
-
       i++
     }
 
@@ -614,3 +622,11 @@ function MdImpl({ compact, t, text }: MdProps) {
 }
 
 export const Md = memo(MdImpl)
+
+type Kind = 'blank' | 'code' | 'heading' | 'list' | 'paragraph' | 'quote' | 'rule' | 'table' | null
+
+interface MdProps {
+  compact?: boolean
+  t: Theme
+  text: string
+}
diff --git a/ui-tui/src/components/modelPicker.tsx b/ui-tui/src/components/modelPicker.tsx
index 406047bc11..7927f3b736 100644
--- a/ui-tui/src/components/modelPicker.tsx
+++ b/ui-tui/src/components/modelPicker.tsx
@@ -1,4 +1,4 @@
-import { Box, Text, useInput } from '@hermes/ink'
+import { Box, Text, useInput, useStdout } from '@hermes/ink'
 import { useEffect, useMemo, useState } from 'react'
 
 import { providerDisplayNames } from '../domain/providers.js'
@@ -8,6 +8,8 @@ import { asRpcResult, rpcErrorMessage } from '../lib/rpc.js'
 import type { Theme } from '../theme.js'
 
 const VISIBLE = 12
+const MIN_WIDTH = 40
+const MAX_WIDTH = 90
 
 const pageOffset = (count: number, sel: number) => Math.max(0, Math.min(sel - Math.floor(VISIBLE / 2), count - VISIBLE))
 
@@ -27,6 +29,13 @@ export function ModelPicker({ gw, onCancel, onSelect, sessionId, t }: ModelPicke
   const [modelIdx, setModelIdx] = useState(0)
   const [stage, setStage] = useState<'model' | 'provider'>('provider')
 
+  const { stdout } = useStdout()
+  // Pin the picker to a stable width so the FloatBox parent (which shrinks-
+  // to-fit with alignSelf="flex-start") doesn't resize as long provider /
+  // model names scroll into view, and so `wrap="truncate-end"` on each row
+  // has an actual constraint to truncate against.
+  const width = Math.max(MIN_WIDTH, Math.min(MAX_WIDTH, (stdout?.columns ?? 80) - 6))
+
   useEffect(() => {
     gw.request<ModelOptionsResponse>('model.options', sessionId ? { session_id: sessionId } : {})
       .then(raw => {
@@ -168,29 +177,53 @@ export function ModelPicker({ gw, onCancel, onSelect, sessionId, t }: ModelPicke
     const { items, off } = visibleItems(rows, providerIdx)
 
     return (
-      <Box flexDirection="column">
-        <Text bold color={t.color.amber}>
+      <Box flexDirection="column" width={width}>
+        <Text bold color={t.color.amber} wrap="truncate-end">
           Select Provider
         </Text>
 
-        <Text color={t.color.dim}>Current model: {currentModel || '(unknown)'}</Text>
-        {provider?.warning ? <Text color={t.color.label}>warning: {provider.warning}</Text> : null}
-        {off > 0 && <Text color={t.color.dim}> ↑ {off} more</Text>}
+        <Text color={t.color.dim} wrap="truncate-end">
+          Current model: {currentModel || '(unknown)'}
+        </Text>
+        <Text color={t.color.label} wrap="truncate-end">
+          {provider?.warning ? `warning: ${provider.warning}` : ' '}
+        </Text>
+        <Text color={t.color.dim} wrap="truncate-end">
+          {off > 0 ? ` ↑ ${off} more` : ' '}
+        </Text>
 
-        {items.map((row, i) => {
+        {Array.from({ length: VISIBLE }, (_, i) => {
+          const row = items[i]
           const idx = off + i
 
-          return (
-            <Text color={providerIdx === idx ? t.color.cornsilk : t.color.dim} key={providers[idx]?.slug ?? `row-${idx}`}>
+          return row ? (
+            <Text
+              bold={providerIdx === idx}
+              color={providerIdx === idx ? t.color.amber : t.color.dim}
+              inverse={providerIdx === idx}
+              key={providers[idx]?.slug ?? `row-${idx}`}
+              wrap="truncate-end"
+            >
               {providerIdx === idx ? '▸ ' : '  '}
               {i + 1}. {row}
             </Text>
+          ) : (
+            <Text color={t.color.dim} key={`pad-${i}`} wrap="truncate-end">
+              {' '}
+            </Text>
           )
         })}
 
-        {off + VISIBLE < rows.length && <Text color={t.color.dim}> ↓ {rows.length - off - VISIBLE} more</Text>}
-        <Text color={t.color.dim}>persist: {persistGlobal ? 'global' : 'session'} · g toggle</Text>
-        <Text color={t.color.dim}>↑/↓ select · Enter choose · 1-9,0 quick · Esc cancel</Text>
+        <Text color={t.color.dim} wrap="truncate-end">
+          {off + VISIBLE < rows.length ? ` ↓ ${rows.length - off - VISIBLE} more` : ' '}
+        </Text>
+
+        <Text color={t.color.dim} wrap="truncate-end">
+          persist: {persistGlobal ? 'global' : 'session'} · g toggle
+        </Text>
+        <Text color={t.color.dim} wrap="truncate-end">
+          ↑/↓ select · Enter choose · 1-9,0 quick · Esc cancel
+        </Text>
       </Box>
     )
   }
@@ -198,30 +231,59 @@ export function ModelPicker({ gw, onCancel, onSelect, sessionId, t }: ModelPicke
   const { items, off } = visibleItems(models, modelIdx)
 
   return (
-    <Box flexDirection="column">
-      <Text bold color={t.color.amber}>
+    <Box flexDirection="column" width={width}>
+      <Text bold color={t.color.amber} wrap="truncate-end">
         Select Model
       </Text>
 
-      <Text color={t.color.dim}>{names[providerIdx] || '(unknown provider)'}</Text>
-      {!models.length ? <Text color={t.color.dim}>no models listed for this provider</Text> : null}
-      {provider?.warning ? <Text color={t.color.label}>warning: {provider.warning}</Text> : null}
-      {off > 0 && <Text color={t.color.dim}> ↑ {off} more</Text>}
+      <Text color={t.color.dim} wrap="truncate-end">
+        {names[providerIdx] || '(unknown provider)'}
+      </Text>
+      <Text color={t.color.label} wrap="truncate-end">
+        {provider?.warning ? `warning: ${provider.warning}` : ' '}
+      </Text>
+      <Text color={t.color.dim} wrap="truncate-end">
+        {off > 0 ? ` ↑ ${off} more` : ' '}
+      </Text>
 
-      {items.map((row, i) => {
+      {Array.from({ length: VISIBLE }, (_, i) => {
+        const row = items[i]
         const idx = off + i
 
+        if (!row) {
+          return !models.length && i === 0 ? (
+            <Text color={t.color.dim} key="empty" wrap="truncate-end">
+              no models listed for this provider
+            </Text>
+          ) : (
+            <Text color={t.color.dim} key={`pad-${i}`} wrap="truncate-end">
+              {' '}
+            </Text>
+          )
+        }
+
         return (
-          <Text color={modelIdx === idx ? t.color.cornsilk : t.color.dim} key={`${provider?.slug ?? 'prov'}:${idx}:${row}`}>
+          <Text
+            bold={modelIdx === idx}
+            color={modelIdx === idx ? t.color.amber : t.color.dim}
+            inverse={modelIdx === idx}
+            key={`${provider?.slug ?? 'prov'}:${idx}:${row}`}
+            wrap="truncate-end"
+          >
             {modelIdx === idx ? '▸ ' : '  '}
             {i + 1}. {row}
           </Text>
         )
       })}
 
-      {off + VISIBLE < models.length && <Text color={t.color.dim}> ↓ {models.length - off - VISIBLE} more</Text>}
-      <Text color={t.color.dim}>persist: {persistGlobal ? 'global' : 'session'} · g toggle</Text>
-      <Text color={t.color.dim}>
+      <Text color={t.color.dim} wrap="truncate-end">
+        {off + VISIBLE < models.length ? ` ↓ ${models.length - off - VISIBLE} more` : ' '}
+      </Text>
+
+      <Text color={t.color.dim} wrap="truncate-end">
+        persist: {persistGlobal ? 'global' : 'session'} · g toggle
+      </Text>
+      <Text color={t.color.dim} wrap="truncate-end">
         {models.length ? '↑/↓ select · Enter switch · 1-9,0 quick · Esc back' : 'Enter/Esc back'}
       </Text>
     </Box>
diff --git a/ui-tui/src/components/prompts.tsx b/ui-tui/src/components/prompts.tsx
index cd9c3a2d1d..1be68da178 100644
--- a/ui-tui/src/components/prompts.tsx
+++ b/ui-tui/src/components/prompts.tsx
@@ -1,6 +1,7 @@
 import { Box, Text, useInput } from '@hermes/ink'
 import { useState } from 'react'
 
+import { isMac } from '../lib/platform.js'
 import type { Theme } from '../theme.js'
 import type { ApprovalReq, ClarifyReq, ConfirmReq } from '../types.js'
 
@@ -63,8 +64,8 @@ export function ApprovalPrompt({ onChoice, req, t }: ApprovalPromptProps) {
 
       {OPTS.map((o, i) => (
         <Text key={o}>
-          <Text color={sel === i ? t.color.warn : t.color.dim}>{sel === i ? '▸ ' : '  '}</Text>
-          <Text color={sel === i ? t.color.cornsilk : t.color.dim}>
+          <Text bold={sel === i} color={sel === i ? t.color.warn : t.color.dim} inverse={sel === i}>
+            {sel === i ? '▸ ' : '  '}
             {i + 1}. {LABELS[o]}
           </Text>
         </Text>
@@ -128,7 +129,10 @@ export function ClarifyPrompt({ cols = 80, onAnswer, onCancel, req, t }: Clarify
           <TextInput columns={Math.max(20, cols - 6)} onChange={setCustom} onSubmit={onAnswer} value={custom} />
         </Box>
 
-        <Text color={t.color.dim}>Enter send · Esc {choices.length ? 'back' : 'cancel'} · Ctrl+C cancel</Text>
+        <Text color={t.color.dim}>
+          Enter send · Esc {choices.length ? 'back' : 'cancel'} ·{' '}
+          {isMac ? 'Cmd+C copy · Cmd+V paste · Ctrl+C cancel' : 'Ctrl+C cancel'}
+        </Text>
       </Box>
     )
   }
@@ -139,8 +143,8 @@ export function ClarifyPrompt({ cols = 80, onAnswer, onCancel, req, t }: Clarify
 
       {[...choices, 'Other (type your answer)'].map((c, i) => (
         <Text key={i}>
-          <Text color={sel === i ? t.color.label : t.color.dim}>{sel === i ? '▸ ' : '  '}</Text>
-          <Text color={sel === i ? t.color.cornsilk : t.color.dim}>
+          <Text bold={sel === i} color={sel === i ? t.color.label : t.color.dim} inverse={sel === i}>
+            {sel === i ? '▸ ' : '  '}
             {i + 1}. {c}
           </Text>
         </Text>
@@ -155,31 +159,21 @@ export function ConfirmPrompt({ onCancel, onConfirm, req, t }: ConfirmPromptProp
   const [sel, setSel] = useState(0)
 
   useInput((ch, key) => {
-    if (key.escape || (key.ctrl && ch.toLowerCase() === 'c')) {
-      onCancel()
-
-      return
-    }
-
     const lower = ch.toLowerCase()
 
+    if (key.escape || (key.ctrl && lower === 'c') || lower === 'n') {
+      return onCancel()
+    }
+
     if (lower === 'y') {
-      onConfirm()
-
-      return
+      return onConfirm()
     }
 
-    if (lower === 'n') {
-      onCancel()
-
-      return
-    }
-
-    if (key.upArrow && sel > 0) {
+    if (key.upArrow) {
       setSel(0)
     }
 
-    if (key.downArrow && sel < 1) {
+    if (key.downArrow) {
       setSel(1)
     }
 
@@ -189,12 +183,10 @@ export function ConfirmPrompt({ onCancel, onConfirm, req, t }: ConfirmPromptProp
   })
 
   const accent = req.danger ? t.color.error : t.color.warn
-  const confirmLabel = req.confirmLabel ?? 'Yes'
-  const cancelLabel = req.cancelLabel ?? 'No'
 
   const rows = [
-    { color: t.color.cornsilk, label: cancelLabel },
-    { color: req.danger ? t.color.error : t.color.cornsilk, label: confirmLabel }
+    { color: t.color.cornsilk, label: req.cancelLabel ?? 'No' },
+    { color: req.danger ? t.color.error : t.color.cornsilk, label: req.confirmLabel ?? 'Yes' }
   ]
 
   return (
diff --git a/ui-tui/src/components/sessionPicker.tsx b/ui-tui/src/components/sessionPicker.tsx
index 905fa707e3..c840782399 100644
--- a/ui-tui/src/components/sessionPicker.tsx
+++ b/ui-tui/src/components/sessionPicker.tsx
@@ -1,4 +1,4 @@
-import { Box, Text, useInput } from '@hermes/ink'
+import { Box, Text, useInput, useStdout } from '@hermes/ink'
 import { useEffect, useState } from 'react'
 
 import type { GatewayClient } from '../gatewayClient.js'
@@ -7,6 +7,8 @@ import { asRpcResult, rpcErrorMessage } from '../lib/rpc.js'
 import type { Theme } from '../theme.js'
 
 const VISIBLE = 15
+const MIN_WIDTH = 60
+const MAX_WIDTH = 120
 
 const age = (ts: number) => {
   const d = (Date.now() / 1000 - ts) / 86400
@@ -28,6 +30,9 @@ export function SessionPicker({ gw, onCancel, onSelect, t }: SessionPickerProps)
   const [sel, setSel] = useState(0)
   const [loading, setLoading] = useState(true)
 
+  const { stdout } = useStdout()
+  const width = Math.max(MIN_WIDTH, Math.min(MAX_WIDTH, (stdout?.columns ?? 80) - 6))
+
   useEffect(() => {
     gw.request<SessionListResponse>('session.list', { limit: 20 })
       .then(raw => {
@@ -99,7 +104,7 @@ export function SessionPicker({ gw, onCancel, onSelect, t }: SessionPickerProps)
   const off = Math.max(0, Math.min(sel - Math.floor(VISIBLE / 2), items.length - VISIBLE))
 
   return (
-    <Box flexDirection="column">
+    <Box flexDirection="column" width={width}>
       <Text bold color={t.color.amber}>
         Resume Session
       </Text>
@@ -108,24 +113,29 @@ export function SessionPicker({ gw, onCancel, onSelect, t }: SessionPickerProps)
 
       {items.slice(off, off + VISIBLE).map((s, vi) => {
         const i = off + vi
+        const selected = sel === i
 
         return (
           <Box key={s.id}>
-            <Text color={sel === i ? t.color.label : t.color.dim}>{sel === i ? '▸ ' : '  '}</Text>
+            <Text bold={selected} color={selected ? t.color.amber : t.color.dim} inverse={selected}>
+              {selected ? '▸ ' : '  '}
+            </Text>
 
             <Box width={30}>
-              <Text color={sel === i ? t.color.cornsilk : t.color.dim}>
+              <Text bold={selected} color={selected ? t.color.amber : t.color.dim} inverse={selected}>
                 {String(i + 1).padStart(2)}. [{s.id}]
               </Text>
             </Box>
 
             <Box width={30}>
-              <Text color={t.color.dim}>
+              <Text bold={selected} color={selected ? t.color.amber : t.color.dim} inverse={selected}>
                 ({s.message_count} msgs, {age(s.started_at)}, {s.source || 'tui'})
               </Text>
             </Box>
 
-            <Text color={sel === i ? t.color.cornsilk : t.color.dim}>{s.title || s.preview || '(untitled)'}</Text>
+            <Text bold={selected} color={selected ? t.color.amber : t.color.dim} inverse={selected} wrap="truncate-end">
+              {s.title || s.preview || '(untitled)'}
+            </Text>
           </Box>
         )
       })}
diff --git a/ui-tui/src/components/skillsHub.tsx b/ui-tui/src/components/skillsHub.tsx
index 877bb0ef38..1bff92c0c8 100644
--- a/ui-tui/src/components/skillsHub.tsx
+++ b/ui-tui/src/components/skillsHub.tsx
@@ -1,4 +1,4 @@
-import { Box, Text, useInput } from '@hermes/ink'
+import { Box, Text, useInput, useStdout } from '@hermes/ink'
 import { useEffect, useState } from 'react'
 
 import type { GatewayClient } from '../gatewayClient.js'
@@ -6,6 +6,8 @@ import { rpcErrorMessage } from '../lib/rpc.js'
 import type { Theme } from '../theme.js'
 
 const VISIBLE = 12
+const MIN_WIDTH = 40
+const MAX_WIDTH = 90
 
 const pageOffset = (count: number, sel: number) => Math.max(0, Math.min(sel - Math.floor(VISIBLE / 2), count - VISIBLE))
 
@@ -26,6 +28,9 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
   const [err, setErr] = useState('')
   const [loading, setLoading] = useState(true)
 
+  const { stdout } = useStdout()
+  const width = Math.max(MIN_WIDTH, Math.min(MAX_WIDTH, (stdout?.columns ?? 80) - 6))
+
   useEffect(() => {
     gw.request<{ skills?: Record<string, string[]> }>('skills.manage', { action: 'list' })
       .then(r => {
@@ -186,7 +191,7 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
 
   if (err && stage === 'category') {
     return (
-      <Box flexDirection="column">
+      <Box flexDirection="column" width={width}>
         <Text color={t.color.label}>error: {err}</Text>
         <Text color={t.color.dim}>Esc to cancel</Text>
       </Box>
@@ -195,7 +200,7 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
 
   if (!cats.length) {
     return (
-      <Box flexDirection="column">
+      <Box flexDirection="column" width={width}>
         <Text color={t.color.dim}>no skills available</Text>
         <Text color={t.color.dim}>Esc to cancel</Text>
       </Box>
@@ -207,7 +212,7 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
     const { items, off } = visibleItems(rows, catIdx)
 
     return (
-      <Box flexDirection="column">
+      <Box flexDirection="column" width={width}>
         <Text bold color={t.color.amber}>
           Skills Hub
         </Text>
@@ -219,7 +224,13 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
           const idx = off + i
 
           return (
-            <Text color={catIdx === idx ? t.color.cornsilk : t.color.dim} key={row}>
+            <Text
+              bold={catIdx === idx}
+              color={catIdx === idx ? t.color.amber : t.color.dim}
+              inverse={catIdx === idx}
+              key={row}
+              wrap="truncate-end"
+            >
               {catIdx === idx ? '▸ ' : '  '}
               {i + 1}. {row}
             </Text>
@@ -236,7 +247,7 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
     const { items, off } = visibleItems(skills, skillIdx)
 
     return (
-      <Box flexDirection="column">
+      <Box flexDirection="column" width={width}>
         <Text bold color={t.color.amber}>
           {selectedCat}
         </Text>
@@ -249,7 +260,13 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
           const idx = off + i
 
           return (
-            <Text color={skillIdx === idx ? t.color.cornsilk : t.color.dim} key={row}>
+            <Text
+              bold={skillIdx === idx}
+              color={skillIdx === idx ? t.color.amber : t.color.dim}
+              inverse={skillIdx === idx}
+              key={row}
+              wrap="truncate-end"
+            >
               {skillIdx === idx ? '▸ ' : '  '}
               {i + 1}. {row}
             </Text>
@@ -265,7 +282,7 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
   }
 
   return (
-    <Box flexDirection="column">
+    <Box flexDirection="column" width={width}>
       <Text bold color={t.color.amber}>
         {info?.name ?? skillName}
       </Text>
diff --git a/ui-tui/src/components/textInput.tsx b/ui-tui/src/components/textInput.tsx
index dff8121b5e..12b228c1f8 100644
--- a/ui-tui/src/components/textInput.tsx
+++ b/ui-tui/src/components/textInput.tsx
@@ -3,6 +3,8 @@ import * as Ink from '@hermes/ink'
 import { useEffect, useMemo, useRef, useState } from 'react'
 
 import { setInputSelection } from '../app/inputSelectionStore.js'
+import { readClipboardText, writeClipboardText } from '../lib/clipboard.js'
+import { isActionMod, isMac, isMacActionFallback } from '../lib/platform.js'
 
 type InkExt = typeof Ink & {
   stringWidth: (s: string) => number
@@ -132,6 +134,39 @@ function wordRight(s: string, p: number) {
   return i
 }
 
+/**
+ * Move cursor one logical line up or down inside `s` while preserving the
+ * column offset from the current line's start. Returns `null` when the cursor
+ * is already on the first line (up) or last line (down) — callers use that
+ * signal to fall through to history cycling instead of eating the arrow key.
+ */
+export function lineNav(s: string, p: number, dir: -1 | 1): null | number {
+  const pos = snapPos(s, p)
+  const curStart = s.lastIndexOf('\n', pos - 1) + 1
+  const col = pos - curStart
+
+  if (dir < 0) {
+    if (curStart === 0) {
+      return null
+    }
+
+    const prevStart = s.lastIndexOf('\n', curStart - 2) + 1
+
+    return snapPos(s, Math.min(prevStart + col, curStart - 1))
+  }
+
+  const nextBreak = s.indexOf('\n', pos)
+
+  if (nextBreak < 0) {
+    return null
+  }
+
+  const nextEnd = s.indexOf('\n', nextBreak + 1)
+  const lineEnd = nextEnd < 0 ? s.length : nextEnd
+
+  return snapPos(s, Math.min(nextBreak + 1 + col, lineEnd))
+}
+
 function cursorLayout(value: string, cursor: number, cols: number) {
   const pos = Math.max(0, Math.min(cursor, value.length))
   const w = Math.max(1, cols - 1)
@@ -273,6 +308,12 @@ function useFwdDelete(active: boolean) {
   return ref
 }
 
+type PasteResult = { cursor: number; value: string } | null
+
+const isPasteResultPromise = (
+  value: PasteResult | Promise<PasteResult> | null | undefined
+): value is Promise<PasteResult> => !!value && typeof (value as PromiseLike<PasteResult>).then === 'function'
+
 export function TextInput({
   columns = 80,
   value,
@@ -296,6 +337,7 @@ export function TextInput({
   const pasteEnd = useRef<null | number>(null)
   const pasteTimer = useRef<ReturnType<typeof setTimeout> | null>(null)
   const pastePos = useRef(0)
+  const editVersionRef = useRef(0)
   const undo = useRef<{ cursor: number; value: string }[]>([])
   const redo = useRef<{ cursor: number; value: string }[]>([])
 
@@ -358,22 +400,20 @@ export function TextInput({
       return
     }
 
-    if (selected) {
-      setInputSelection({
-        clear: () => {
+    setInputSelection({
+      clear: () => {
+        if (selRef.current) {
           selRef.current = null
           setSel(null)
-        },
-        end: selected.end,
-        start: selected.start,
-        value: vRef.current
-      })
-    } else {
-      setInputSelection(null)
-    }
+        }
+      },
+      end: selected?.end ?? curRef.current,
+      start: selected?.start ?? curRef.current,
+      value: vRef.current
+    })
 
     return () => setInputSelection(null)
-  }, [focus, selected])
+  }, [cur, focus, selected])
 
   useEffect(
     () => () => {
@@ -387,6 +427,7 @@ export function TextInput({
   const commit = (next: string, nextCur: number, track = true) => {
     const prev = vRef.current
     const c = snapPos(next, nextCur)
+    editVersionRef.current += 1
 
     if (selRef.current) {
       selRef.current = null
@@ -425,8 +466,29 @@ export function TextInput({
   }
 
   const emitPaste = (e: PasteEvent) => {
+    const startVersion = editVersionRef.current
     const h = cbPaste.current?.(e)
 
+    if (isPasteResultPromise(h)) {
+      const fallbackText = e.text
+
+      void h
+        .then(result => {
+          if (result && editVersionRef.current === startVersion) {
+            commit(result.value, result.cursor)
+          } else if (result && fallbackText && PRINTABLE.test(fallbackText)) {
+            // User typed while async paste was in-flight — fall back to raw text insert
+            // so the pasted content is not silently lost.
+            const cur = curRef.current
+            const v = vRef.current
+            commit(v.slice(0, cur) + fallbackText + v.slice(cur), cur + fallbackText.length)
+          }
+        })
+        .catch(() => {})
+
+      return true
+    }
+
     if (h) {
       commit(h.value, h.cursor)
     }
@@ -484,29 +546,81 @@ export function TextInput({
 
   const ins = (v: string, c: number, s: string) => v.slice(0, c) + s + v.slice(c)
 
+  const pastePlainText = (text: string) => {
+    const cleaned = text.replace(/\r\n/g, '\n').replace(/\r/g, '\n')
+
+    if (!cleaned) {
+      return
+    }
+
+    const range = selRange()
+
+    const nextValue = range
+      ? vRef.current.slice(0, range.start) + cleaned + vRef.current.slice(range.end)
+      : vRef.current.slice(0, curRef.current) + cleaned + vRef.current.slice(curRef.current)
+
+    const nextCursor = range ? range.start + cleaned.length : curRef.current + cleaned.length
+
+    commit(nextValue, nextCursor)
+  }
+
   useInput(
     (inp: string, k: Key, event: InputEvent) => {
       const eventRaw = event.keypress.raw
 
-      if (eventRaw === '\x1bv' || eventRaw === '\x1bV' || eventRaw === '\x16') {
-        return void emitPaste({ cursor: curRef.current, hotkey: true, text: '', value: vRef.current })
+      if (
+        eventRaw === '\x1bv' ||
+        eventRaw === '\x1bV' ||
+        eventRaw === '\x16' ||
+        (isMac && isActionMod(k) && inp.toLowerCase() === 'v')
+      ) {
+        if (cbPaste.current) {
+          return void emitPaste({ cursor: curRef.current, hotkey: true, text: '', value: vRef.current })
+        }
+
+        if (isMac) {
+          void readClipboardText().then(text => {
+            if (text) {
+              pastePlainText(text)
+            }
+          })
+        }
+
+        return
       }
 
-      if (
-        k.upArrow ||
-        k.downArrow ||
-        (k.ctrl && inp === 'c') ||
-        k.tab ||
-        (k.shift && k.tab) ||
-        k.pageUp ||
-        k.pageDown ||
-        k.escape
-      ) {
+      if (isMac && isActionMod(k) && inp.toLowerCase() === 'c') {
+        const range = selRange()
+
+        if (range) {
+          const text = vRef.current.slice(range.start, range.end)
+
+          void writeClipboardText(text)
+        }
+
+        return
+      }
+
+      if (k.upArrow || k.downArrow) {
+        const next = lineNav(vRef.current, curRef.current, k.upArrow ? -1 : 1)
+
+        if (next !== null) {
+          clearSel()
+          setCur(next)
+          curRef.current = next
+
+          return
+        }
+
+        return
+      }
+
+      if ((k.ctrl && inp === 'c') || k.tab || (k.shift && k.tab) || k.pageUp || k.pageDown || k.escape) {
         return
       }
 
       if (k.return) {
-        k.shift || k.meta
+        k.shift || (isMac ? isActionMod(k) : k.meta)
           ? commit(ins(vRef.current, curRef.current, '\n'), curRef.current + 1)
           : cbSubmit.current?.(vRef.current)
 
@@ -515,55 +629,59 @@ export function TextInput({
 
       let c = curRef.current
       let v = vRef.current
-      const mod = k.ctrl || k.meta
+      const mod = isActionMod(k)
+      const wordMod = mod || k.meta
+      const actionHome = k.home || (!isMac && mod && inp === 'a') || isMacActionFallback(k, inp, 'a')
+      const actionEnd = k.end || (mod && inp === 'e') || isMacActionFallback(k, inp, 'e')
+      const actionDeleteToStart = (mod && inp === 'u') || isMacActionFallback(k, inp, 'u')
       const range = selRange()
       const delFwd = k.delete || fwdDel.current
 
-      if (k.ctrl && inp === 'z') {
+      if (mod && inp === 'z') {
         return swap(undo, redo)
       }
 
-      if ((k.ctrl && inp === 'y') || (k.meta && k.shift && inp === 'z')) {
+      if ((mod && inp === 'y') || (mod && k.shift && inp === 'z')) {
         return swap(redo, undo)
       }
 
-      if (k.ctrl && inp === 'a') {
+      if (isMac && mod && inp === 'a') {
         return selectAll()
       }
 
-      if (k.home) {
+      if (actionHome) {
         clearSel()
         c = 0
-      } else if (k.end || (k.ctrl && inp === 'e')) {
+      } else if (actionEnd) {
         clearSel()
         c = v.length
       } else if (k.leftArrow) {
-        if (range && !mod) {
+        if (range && !wordMod) {
           clearSel()
           c = range.start
         } else {
           clearSel()
-          c = mod ? wordLeft(v, c) : prevPos(v, c)
+          c = wordMod ? wordLeft(v, c) : prevPos(v, c)
         }
       } else if (k.rightArrow) {
-        if (range && !mod) {
+        if (range && !wordMod) {
           clearSel()
           c = range.end
         } else {
           clearSel()
-          c = mod ? wordRight(v, c) : nextPos(v, c)
+          c = wordMod ? wordRight(v, c) : nextPos(v, c)
         }
-      } else if (k.meta && inp === 'b') {
+      } else if (wordMod && inp === 'b') {
         clearSel()
         c = wordLeft(v, c)
-      } else if (k.meta && inp === 'f') {
+      } else if (wordMod && inp === 'f') {
         clearSel()
         c = wordRight(v, c)
       } else if (range && (k.backspace || delFwd)) {
         v = v.slice(0, range.start) + v.slice(range.end)
         c = range.start
       } else if (k.backspace && c > 0) {
-        if (mod) {
+        if (wordMod) {
           const t = wordLeft(v, c)
           v = v.slice(0, t) + v.slice(c)
           c = t
@@ -573,13 +691,13 @@ export function TextInput({
           c = t
         }
       } else if (delFwd && c < v.length) {
-        if (mod) {
+        if (wordMod) {
           const t = wordRight(v, c)
           v = v.slice(0, c) + v.slice(t)
         } else {
           v = v.slice(0, c) + v.slice(nextPos(v, c))
         }
-      } else if (k.ctrl && inp === 'w') {
+      } else if (mod && inp === 'w') {
         if (range) {
           v = v.slice(0, range.start) + v.slice(range.end)
           c = range.start
@@ -591,7 +709,7 @@ export function TextInput({
         } else {
           return
         }
-      } else if (k.ctrl && inp === 'u') {
+      } else if (actionDeleteToStart) {
         if (range) {
           v = v.slice(0, range.start) + v.slice(range.end)
           c = range.start
@@ -599,7 +717,7 @@ export function TextInput({
           v = v.slice(c)
           c = 0
         }
-      } else if (k.ctrl && inp === 'k') {
+      } else if (mod && inp === 'k') {
         if (range) {
           v = v.slice(0, range.start) + v.slice(range.end)
           c = range.start
@@ -671,6 +789,15 @@ export function TextInput({
         setCur(next)
         curRef.current = next
       }}
+      onMouseDown={(e: { button: number }) => {
+        // Right-click to paste: route through the same hotkey path as
+        // Alt+V so the composer's clipboard RPC (text or image) handles it.
+        if (!focus || e.button !== 2) {
+          return
+        }
+
+        emitPaste({ cursor: curRef.current, hotkey: true, text: '', value: vRef.current })
+      }}
       ref={boxRef}
     >
       <Text wrap="wrap">{rendered}</Text>
@@ -691,7 +818,9 @@ interface TextInputProps {
   focus?: boolean
   mask?: string
   onChange: (v: string) => void
-  onPaste?: (e: PasteEvent) => { cursor: number; value: string } | null
+  onPaste?: (
+    e: PasteEvent
+  ) => { cursor: number; value: string } | Promise<{ cursor: number; value: string } | null> | null
   onSubmit?: (v: string) => void
   placeholder?: string
   value: string
diff --git a/ui-tui/src/components/thinking.tsx b/ui-tui/src/components/thinking.tsx
index 958333d6e5..a59cdc41d2 100644
--- a/ui-tui/src/components/thinking.tsx
+++ b/ui-tui/src/components/thinking.tsx
@@ -3,6 +3,17 @@ import { memo, type ReactNode, useEffect, useMemo, useState } from 'react'
 import spinners, { type BrailleSpinnerName } from 'unicode-animations'
 
 import { THINKING_COT_MAX } from '../config/limits.js'
+import {
+  buildSubagentTree,
+  fmtCost,
+  fmtTokens,
+  formatSummary as formatSpawnSummary,
+  hotnessBucket,
+  peakHotness,
+  sparkline,
+  treeTotals,
+  widthByDepth
+} from '../lib/subagentTree.js'
 import {
   compactPreview,
   estimateTokensRough,
@@ -14,7 +25,7 @@ import {
   toolTrailLabel
 } from '../lib/text.js'
 import type { Theme } from '../theme.js'
-import type { ActiveTool, ActivityItem, DetailsMode, SubagentProgress, ThinkingMode } from '../types.js'
+import type { ActiveTool, ActivityItem, DetailsMode, SubagentNode, SubagentProgress, ThinkingMode } from '../types.js'
 
 const THINK: BrailleSpinnerName[] = ['helix', 'breathe', 'orbit', 'dna', 'waverows', 'snake', 'pulse']
 const TOOL: BrailleSpinnerName[] = ['cascade', 'scan', 'diagswipe', 'fillsweep', 'rain', 'columns', 'sparkle']
@@ -106,6 +117,8 @@ function TreeNode({
   header,
   open,
   rails = [],
+  stemColor,
+  stemDim,
   t
 }: {
   branch: TreeBranch
@@ -113,11 +126,13 @@ function TreeNode({
   header: ReactNode
   open: boolean
   rails?: TreeRails
+  stemColor?: string
+  stemDim?: boolean
   t: Theme
 }) {
   return (
     <Box flexDirection="column">
-      <TreeRow branch={branch} rails={rails} t={t}>
+      <TreeRow branch={branch} rails={rails} stemColor={stemColor} stemDim={stemDim} t={t}>
         {header}
       </TreeRow>
       {open ? children?.(nextTreeRails(rails, branch)) : null}
@@ -239,16 +254,31 @@ function Chevron({
   )
 }
 
+function heatColor(node: SubagentNode, peak: number, theme: Theme): string | undefined {
+  const palette = [theme.color.bronze, theme.color.amber, theme.color.gold, theme.color.warn, theme.color.error]
+  const idx = hotnessBucket(node.aggregate.hotness, peak, palette.length)
+
+  // Below the median bucket we keep the default dim stem so cool branches
+  // fade into the chrome — only "hot" branches draw the eye.
+  if (idx < 2) {
+    return undefined
+  }
+
+  return palette[idx]
+}
+
 function SubagentAccordion({
   branch,
   expanded,
-  item,
+  node,
+  peak,
   rails = [],
   t
 }: {
   branch: TreeBranch
   expanded: boolean
-  item: SubagentProgress
+  node: SubagentNode
+  peak: number
   rails?: TreeRails
   t: Theme
 }) {
@@ -257,6 +287,7 @@ function SubagentAccordion({
   const [openThinking, setOpenThinking] = useState(expanded)
   const [openTools, setOpenTools] = useState(expanded)
   const [openNotes, setOpenNotes] = useState(expanded)
+  const [openKids, setOpenKids] = useState(expanded)
 
   useEffect(() => {
     if (!expanded) {
@@ -268,6 +299,7 @@ function SubagentAccordion({
     setOpenThinking(true)
     setOpenTools(true)
     setOpenNotes(true)
+    setOpenKids(true)
   }, [expanded])
 
   const expandAll = () => {
@@ -276,8 +308,13 @@ function SubagentAccordion({
     setOpenThinking(true)
     setOpenTools(true)
     setOpenNotes(true)
+    setOpenKids(true)
   }
 
+  const item = node.item
+  const children = node.children
+  const aggregate = node.aggregate
+
   const statusTone: 'dim' | 'error' | 'warn' =
     item.status === 'failed' ? 'error' : item.status === 'interrupted' ? 'warn' : 'dim'
 
@@ -286,10 +323,60 @@ function SubagentAccordion({
   const title = `${prefix}${open ? goalLabel : compactPreview(goalLabel, 60)}`
   const summary = compactPreview((item.summary || '').replace(/\s+/g, ' ').trim(), 72)
 
-  const suffix =
-    item.status === 'running'
-      ? 'running'
-      : `${item.status}${item.durationSeconds ? ` · ${fmtElapsed(item.durationSeconds * 1000)}` : ''}`
+  // Suffix packs branch rollup: status · elapsed · per-branch tool/agent/token/cost.
+  // Emphasises the numbers the user can't easily eyeball from a flat list.
+  const statusLabel = item.status === 'queued' ? 'queued' : item.status === 'running' ? 'running' : String(item.status)
+
+  const rollupBits: string[] = [statusLabel]
+
+  if (item.durationSeconds) {
+    rollupBits.push(fmtElapsed(item.durationSeconds * 1000))
+  }
+
+  const localTools = item.toolCount ?? 0
+  const subtreeTools = aggregate.totalTools - localTools
+
+  if (localTools > 0) {
+    rollupBits.push(`${localTools} tool${localTools === 1 ? '' : 's'}`)
+  }
+
+  const localTokens = (item.inputTokens ?? 0) + (item.outputTokens ?? 0)
+
+  if (localTokens > 0) {
+    rollupBits.push(`${fmtTokens(localTokens)} tok`)
+  }
+
+  const localCost = item.costUsd ?? 0
+
+  if (localCost > 0) {
+    rollupBits.push(fmtCost(localCost))
+  }
+
+  const filesLocal = (item.filesWritten?.length ?? 0) + (item.filesRead?.length ?? 0)
+
+  if (filesLocal > 0) {
+    rollupBits.push(`⎘${filesLocal}`)
+  }
+
+  if (children.length > 0) {
+    rollupBits.push(`${aggregate.descendantCount}↓`)
+
+    if (subtreeTools > 0) {
+      rollupBits.push(`+${subtreeTools}t sub`)
+    }
+
+    const subCost = aggregate.costUsd - localCost
+
+    if (subCost >= 0.01) {
+      rollupBits.push(`+${fmtCost(subCost)} sub`)
+    }
+
+    if (aggregate.activeCount > 0 && item.status !== 'running') {
+      rollupBits.push(`⚡${aggregate.activeCount}`)
+    }
+  }
+
+  const suffix = rollupBits.join(' · ')
 
   const thinkingText = item.thinking.join('\n')
   const hasThinking = Boolean(thinkingText)
@@ -418,6 +505,50 @@ function SubagentAccordion({
     })
   }
 
+  if (children.length > 0) {
+    // Nested grandchildren — rendered recursively via SubagentAccordion,
+    // sharing the same keybindings / expand semantics as top-level nodes.
+    sections.push({
+      header: (
+        <Chevron
+          count={children.length}
+          onClick={shift => {
+            if (shift) {
+              expandAll()
+            } else {
+              setOpenKids(v => !v)
+            }
+          }}
+          open={showChildren || openKids}
+          suffix={`d${item.depth + 1} · ${aggregate.descendantCount} total`}
+          t={t}
+          title="Spawned"
+        />
+      ),
+      key: 'subagents',
+      open: showChildren || openKids,
+      render: childRails => (
+        <Box flexDirection="column">
+          {children.map((child, i) => (
+            <SubagentAccordion
+              branch={i === children.length - 1 ? 'last' : 'mid'}
+              expanded={expanded || deep}
+              key={child.item.id}
+              node={child}
+              peak={peak}
+              rails={childRails}
+              t={t}
+            />
+          ))}
+        </Box>
+      )
+    })
+  }
+
+  // Heatmap: amber→error gradient on the stem when this branch is "hot"
+  // (high tools/sec) relative to the whole tree's peak.
+  const stem = heatColor(node, peak, t)
+
   return (
     <TreeNode
       branch={branch}
@@ -447,6 +578,8 @@ function SubagentAccordion({
       }
       open={open}
       rails={rails}
+      stemColor={stem}
+      stemDim={stem == null}
       t={t}
     >
       {childRails => (
@@ -598,6 +731,16 @@ export const ToolTrail = memo(function ToolTrail({
 
   const cot = useMemo(() => thinkingPreview(reasoning, 'full', THINKING_COT_MAX), [reasoning])
 
+  // Spawn-tree derivations must live above any early return so React's
+  // rules-of-hooks sees a stable call order.  Cheap O(N) builds memoised
+  // by subagent-list identity.
+  const spawnTree = useMemo(() => buildSubagentTree(subagents), [subagents])
+  const spawnPeak = useMemo(() => peakHotness(spawnTree), [spawnTree])
+  const spawnTotals = useMemo(() => treeTotals(spawnTree), [spawnTree])
+  const spawnWidths = useMemo(() => widthByDepth(spawnTree), [spawnTree])
+  const spawnSpark = useMemo(() => sparkline(spawnWidths), [spawnWidths])
+  const spawnSummaryLabel = useMemo(() => formatSpawnSummary(spawnTotals), [spawnTotals])
+
   if (
     !busy &&
     !trail.length &&
@@ -753,12 +896,13 @@ export const ToolTrail = memo(function ToolTrail({
 
   const renderSubagentList = (rails: boolean[]) => (
     <Box flexDirection="column">
-      {subagents.map((item, index) => (
+      {spawnTree.map((node, index) => (
         <SubagentAccordion
-          branch={index === subagents.length - 1 ? 'last' : 'mid'}
+          branch={index === spawnTree.length - 1 ? 'last' : 'mid'}
           expanded={detailsMode === 'expanded' || deepSubagents}
-          item={item}
-          key={item.id}
+          key={node.item.id}
+          node={node}
+          peak={spawnPeak}
           rails={rails}
           t={t}
         />
@@ -881,10 +1025,14 @@ export const ToolTrail = memo(function ToolTrail({
   }
 
   if (hasSubagents && !inlineDelegateKey) {
+    // Spark + summary give a one-line read on the branch shape before
+    // opening the subtree.  `/agents` opens the full-screen audit overlay.
+    const suffix = spawnSpark ? `${spawnSummaryLabel}  ${spawnSpark}  (/agents)` : `${spawnSummaryLabel}  (/agents)`
+
     sections.push({
       header: (
         <Chevron
-          count={subagents.length}
+          count={spawnTotals.descendantCount}
           onClick={shift => {
             if (shift) {
               expandAll()
@@ -895,8 +1043,9 @@ export const ToolTrail = memo(function ToolTrail({
             }
           }}
           open={detailsMode === 'expanded' || openSubagents}
+          suffix={suffix}
           t={t}
-          title="Subagents"
+          title="Spawn tree"
         />
       ),
       key: 'subagents',
diff --git a/ui-tui/src/config/env.ts b/ui-tui/src/config/env.ts
index 999607dacf..60f1e80c53 100644
--- a/ui-tui/src/config/env.ts
+++ b/ui-tui/src/config/env.ts
@@ -1,5 +1,3 @@
 export const STARTUP_RESUME_ID = (process.env.HERMES_TUI_RESUME ?? '').trim()
 export const MOUSE_TRACKING = !/^(?:1|true|yes|on)$/i.test((process.env.HERMES_TUI_DISABLE_MOUSE ?? '').trim())
-export const NO_CONFIRM_DESTRUCTIVE = /^(?:1|true|yes|on)$/i.test(
-  (process.env.HERMES_TUI_NO_CONFIRM ?? '').trim()
-)
+export const NO_CONFIRM_DESTRUCTIVE = /^(?:1|true|yes|on)$/i.test((process.env.HERMES_TUI_NO_CONFIRM ?? '').trim())
diff --git a/ui-tui/src/content/hotkeys.ts b/ui-tui/src/content/hotkeys.ts
index f08ca61365..b0938e18eb 100644
--- a/ui-tui/src/content/hotkeys.ts
+++ b/ui-tui/src/content/hotkeys.ts
@@ -1,16 +1,26 @@
+import { isMac } from '../lib/platform.js'
+
+const action = isMac ? 'Cmd' : 'Ctrl'
+const paste = isMac ? 'Cmd' : 'Alt'
+
 export const HOTKEYS: [string, string][] = [
-  ['Ctrl+C', 'interrupt / clear draft / exit'],
-  ['Ctrl+D', 'exit'],
-  ['Ctrl+G', 'open $EDITOR for prompt'],
-  ['Ctrl+L', 'new session (clear)'],
-  ['Alt+V / /paste', 'paste clipboard image'],
+  ...(isMac
+    ? ([
+        ['Cmd+C', 'copy selection'],
+        ['Ctrl+C', 'interrupt / clear draft / exit']
+      ] as [string, string][])
+    : ([['Ctrl+C', 'copy selection / interrupt / clear draft / exit']] as [string, string][])),
+  [action + '+D', 'exit'],
+  [action + '+G', 'open $EDITOR for prompt'],
+  [action + '+L', 'new session (clear)'],
+  [paste + '+V / /paste', 'paste text; /paste attaches clipboard image'],
   ['Tab', 'apply completion'],
   ['↑/↓', 'completions / queue edit / history'],
-  ['Ctrl+A/E', 'home / end of line'],
-  ['Ctrl+Z / Ctrl+Y', 'undo / redo input edits'],
-  ['Ctrl+W', 'delete word'],
-  ['Ctrl+U/K', 'delete to start / end'],
-  ['Ctrl+←/→', 'jump word'],
+  [action + '+A/E', 'home / end of line'],
+  [action + '+Z / ' + action + '+Y', 'undo / redo input edits'],
+  [action + '+W', 'delete word'],
+  [action + '+U/K', 'delete to start / end'],
+  [action + '+←/→', 'jump word'],
   ['Home/End', 'start / end of line'],
   ['Shift+Enter / Alt+Enter', 'insert newline'],
   ['\\+Enter', 'multi-line continuation (fallback)'],
diff --git a/ui-tui/src/domain/messages.ts b/ui-tui/src/domain/messages.ts
index 34b072f01a..73f86c3e06 100644
--- a/ui-tui/src/domain/messages.ts
+++ b/ui-tui/src/domain/messages.ts
@@ -12,6 +12,13 @@ export const imageTokenMeta = (info?: ImageMeta | null) => {
     .join(' · ')
 }
 
+export const attachedImageNotice = (info?: ({ name?: string } & ImageMeta) | null) => {
+  const meta = imageTokenMeta(info)
+  const label = info?.name ? `📎 Attached image: ${info.name}` : '📎 Attached image'
+
+  return `${label}${meta ? ` · ${meta}` : ''}`
+}
+
 export const userDisplay = (text: string) => {
   if (text.length <= LONG_MSG) {
     return text
diff --git a/ui-tui/src/domain/paths.ts b/ui-tui/src/domain/paths.ts
index 6b95dcbac1..43c023b6ba 100644
--- a/ui-tui/src/domain/paths.ts
+++ b/ui-tui/src/domain/paths.ts
@@ -10,8 +10,7 @@ export const fmtCwdBranch = (cwd: string, branch: null | string, max = 40) => {
     return shortCwd(cwd, max)
   }
 
-  const b = branch.length > 16 ? `…${branch.slice(-15)}` : branch
-  const tag = ` (${b})`
+  const tag = ` (${branch.length > 16 ? `…${branch.slice(-15)}` : branch})`
 
   return `${shortCwd(cwd, Math.max(8, max - tag.length))}${tag}`
 }
diff --git a/ui-tui/src/domain/providers.ts b/ui-tui/src/domain/providers.ts
index 02cc99b922..83ac016ff1 100644
--- a/ui-tui/src/domain/providers.ts
+++ b/ui-tui/src/domain/providers.ts
@@ -5,13 +5,7 @@ export const providerDisplayNames = (providers: readonly { name: string; slug: s
     counts.set(p.name, (counts.get(p.name) ?? 0) + 1)
   }
 
-  return providers.map(p => {
-    const dup = (counts.get(p.name) ?? 0) > 1
-
-    if (!dup || !p.slug || p.slug === p.name) {
-      return p.name
-    }
-
-    return `${p.name} (${p.slug})`
-  })
+  return providers.map(p =>
+    (counts.get(p.name) ?? 0) > 1 && p.slug && p.slug !== p.name ? `${p.name} (${p.slug})` : p.name
+  )
 }
diff --git a/ui-tui/src/entry.tsx b/ui-tui/src/entry.tsx
index e0a4379342..6f1506e5aa 100644
--- a/ui-tui/src/entry.tsx
+++ b/ui-tui/src/entry.tsx
@@ -1,7 +1,9 @@
-#!/usr/bin/env node
-// Order matters: paint banner + spawn python before loading @hermes/ink.
+#!/usr/bin/env -S node --max-old-space-size=8192 --expose-gc
 import { bootBanner } from './bootBanner.js'
 import { GatewayClient } from './gatewayClient.js'
+import { setupGracefulExit } from './lib/gracefulExit.js'
+import { formatBytes, type HeapDumpResult, performHeapDump } from './lib/memory.js'
+import { type MemorySnapshot, startMemoryMonitor } from './lib/memoryMonitor.js'
 
 if (!process.stdin.isTTY) {
   console.log('hermes-tui: no TTY')
@@ -11,8 +13,37 @@ if (!process.stdin.isTTY) {
 process.stdout.write(bootBanner())
 
 const gw = new GatewayClient()
+
 gw.start()
 
+const dumpNotice = (snap: MemorySnapshot, dump: HeapDumpResult | null) =>
+  `hermes-tui: ${snap.level} memory (${formatBytes(snap.heapUsed)}) — auto heap dump → ${dump?.heapPath ?? '(failed)'}\n`
+
+setupGracefulExit({
+  cleanups: [() => gw.kill()],
+  onError: (scope, err) => {
+    const message = err instanceof Error ? `${err.name}: ${err.message}` : String(err)
+
+    process.stderr.write(`hermes-tui ${scope}: ${message.slice(0, 2000)}\n`)
+  },
+  onSignal: signal => process.stderr.write(`hermes-tui: received ${signal}\n`)
+})
+
+const stopMemoryMonitor = startMemoryMonitor({
+  onCritical: (snap, dump) => {
+    process.stderr.write(dumpNotice(snap, dump))
+    process.stderr.write('hermes-tui: exiting to avoid OOM; restart to recover\n')
+    process.exit(137)
+  },
+  onHigh: (snap, dump) => process.stderr.write(dumpNotice(snap, dump))
+})
+
+if (process.env.HERMES_HEAPDUMP_ON_START === '1') {
+  void performHeapDump('manual')
+}
+
+process.on('beforeExit', () => stopMemoryMonitor())
+
 const [{ render }, { App }] = await Promise.all([import('@hermes/ink'), import('./app.js')])
 
 render(<App gw={gw} />, { exitOnCtrlC: false })
diff --git a/ui-tui/src/gatewayClient.ts b/ui-tui/src/gatewayClient.ts
index 3d5f89eb8c..9bf681f8b2 100644
--- a/ui-tui/src/gatewayClient.ts
+++ b/ui-tui/src/gatewayClient.ts
@@ -5,12 +5,18 @@ import { delimiter, resolve } from 'node:path'
 import { createInterface } from 'node:readline'
 
 import type { GatewayEvent } from './gatewayTypes.js'
+import { CircularBuffer } from './lib/circularBuffer.js'
 
 const MAX_GATEWAY_LOG_LINES = 200
+const MAX_LOG_LINE_BYTES = 4096
+const MAX_BUFFERED_EVENTS = 2000
 const MAX_LOG_PREVIEW = 240
 const STARTUP_TIMEOUT_MS = Math.max(5000, parseInt(process.env.HERMES_TUI_STARTUP_TIMEOUT_MS ?? '15000', 10) || 15000)
 const REQUEST_TIMEOUT_MS = Math.max(30000, parseInt(process.env.HERMES_TUI_RPC_TIMEOUT_MS ?? '120000', 10) || 120000)
 
+const truncateLine = (line: string) =>
+  line.length > MAX_LOG_LINE_BYTES ? `${line.slice(0, MAX_LOG_LINE_BYTES)}… [truncated ${line.length} bytes]` : line
+
 const resolvePython = (root: string) => {
   const configured = process.env.HERMES_PYTHON?.trim() || process.env.PYTHON?.trim()
 
@@ -38,16 +44,19 @@ const asGatewayEvent = (value: unknown): GatewayEvent | null =>
     : null
 
 interface Pending {
+  id: string
+  method: string
   reject: (e: Error) => void
   resolve: (v: unknown) => void
+  timeout: ReturnType<typeof setTimeout>
 }
 
 export class GatewayClient extends EventEmitter {
   private proc: ChildProcess | null = null
   private reqId = 0
-  private logs: string[] = []
+  private logs = new CircularBuffer<string>(MAX_GATEWAY_LOG_LINES)
   private pending = new Map<string, Pending>()
-  private bufferedEvents: GatewayEvent[] = []
+  private bufferedEvents = new CircularBuffer<GatewayEvent>(MAX_BUFFERED_EVENTS)
   private pendingExit: number | null | undefined
   private ready = false
   private readyTimer: ReturnType<typeof setTimeout> | null = null
@@ -55,6 +64,13 @@ export class GatewayClient extends EventEmitter {
   private stdoutRl: ReturnType<typeof createInterface> | null = null
   private stderrRl: ReturnType<typeof createInterface> | null = null
 
+  constructor() {
+    super()
+    // useInput / createGatewayEventHandler can legitimately attach many
+    // listeners. Default 10-cap triggers spurious warnings.
+    this.setMaxListeners(0)
+  }
+
   private publish(ev: GatewayEvent) {
     if (ev.type === 'gateway.ready') {
       this.ready = true
@@ -81,7 +97,7 @@ export class GatewayClient extends EventEmitter {
     env.PYTHONPATH = pyPath ? `${root}${delimiter}${pyPath}` : root
 
     this.ready = false
-    this.bufferedEvents = []
+    this.bufferedEvents.clear()
     this.pendingExit = undefined
     this.stdoutRl?.close()
     this.stderrRl?.close()
@@ -121,7 +137,7 @@ export class GatewayClient extends EventEmitter {
 
     this.stderrRl = createInterface({ input: this.proc.stderr! })
     this.stderrRl.on('line', raw => {
-      const line = raw.trim()
+      const line = truncateLine(raw.trim())
 
       if (!line) {
         return
@@ -158,15 +174,7 @@ export class GatewayClient extends EventEmitter {
     const p = id ? this.pending.get(id) : undefined
 
     if (p) {
-      this.pending.delete(id!)
-
-      if (msg.error) {
-        const err = msg.error as { message?: unknown } | null | undefined
-
-        p.reject(new Error(typeof err?.message === 'string' ? err.message : 'request failed'))
-      } else {
-        p.resolve(msg.result)
-      }
+      this.settle(p, msg.error ? this.toError(msg.error) : null, msg.result)
 
       return
     }
@@ -180,24 +188,51 @@ export class GatewayClient extends EventEmitter {
     }
   }
 
-  private pushLog(line: string) {
-    if (this.logs.push(line) > MAX_GATEWAY_LOG_LINES) {
-      this.logs.splice(0, this.logs.length - MAX_GATEWAY_LOG_LINES)
+  private toError(raw: unknown): Error {
+    const err = raw as { message?: unknown } | null | undefined
+
+    return new Error(typeof err?.message === 'string' ? err.message : 'request failed')
+  }
+
+  private settle(p: Pending, err: Error | null, result: unknown) {
+    clearTimeout(p.timeout)
+    this.pending.delete(p.id)
+
+    if (err) {
+      p.reject(err)
+    } else {
+      p.resolve(result)
     }
   }
 
+  private pushLog(line: string) {
+    this.logs.push(truncateLine(line))
+  }
+
   private rejectPending(err: Error) {
     for (const p of this.pending.values()) {
+      clearTimeout(p.timeout)
       p.reject(err)
     }
 
     this.pending.clear()
   }
 
+  // Arrow class-field — stable identity, so `setTimeout(this.onTimeout, …, id)`
+  // doesn't allocate a bound function per request.
+  private onTimeout = (id: string) => {
+    const p = this.pending.get(id)
+
+    if (p) {
+      this.pending.delete(id)
+      p.reject(new Error(`timeout: ${p.method}`))
+    }
+  }
+
   drain() {
     this.subscribed = true
 
-    for (const ev of this.bufferedEvents.splice(0)) {
+    for (const ev of this.bufferedEvents.drain()) {
       this.emit('event', ev)
     }
 
@@ -210,7 +245,7 @@ export class GatewayClient extends EventEmitter {
   }
 
   getLogTail(limit = 20): string {
-    return this.logs.slice(-Math.max(1, limit)).join('\n')
+    return this.logs.tail(Math.max(1, limit)).join('\n')
   }
 
   request<T = unknown>(method: string, params: Record<string, unknown> = {}): Promise<T> {
@@ -224,29 +259,29 @@ export class GatewayClient extends EventEmitter {
 
     const id = `r${++this.reqId}`
 
-    return new Promise((resolve, reject) => {
-      const timeout = setTimeout(() => {
-        if (this.pending.delete(id)) {
-          reject(new Error(`timeout: ${method}`))
-        }
-      }, REQUEST_TIMEOUT_MS)
+    return new Promise<T>((resolve, reject) => {
+      const timeout = setTimeout(this.onTimeout, REQUEST_TIMEOUT_MS, id)
+
+      timeout.unref?.()
 
       this.pending.set(id, {
-        reject: e => {
-          clearTimeout(timeout)
-          reject(e)
-        },
-        resolve: v => {
-          clearTimeout(timeout)
-          resolve(v as T)
-        }
+        id,
+        method,
+        reject,
+        resolve: v => resolve(v as T),
+        timeout
       })
 
       try {
-        this.proc!.stdin!.write(JSON.stringify({ jsonrpc: '2.0', id, method, params }) + '\n')
+        this.proc!.stdin!.write(JSON.stringify({ id, jsonrpc: '2.0', method, params }) + '\n')
       } catch (e) {
-        clearTimeout(timeout)
-        this.pending.delete(id)
+        const pending = this.pending.get(id)
+
+        if (pending) {
+          clearTimeout(pending.timeout)
+          this.pending.delete(id)
+        }
+
         reject(e instanceof Error ? e : new Error(String(e)))
       }
     })
diff --git a/ui-tui/src/gatewayTypes.ts b/ui-tui/src/gatewayTypes.ts
index 6fa1ad92e5..975ec117e6 100644
--- a/ui-tui/src/gatewayTypes.ts
+++ b/ui-tui/src/gatewayTypes.ts
@@ -280,15 +280,85 @@ export interface ReloadMcpResponse {
 // ── Subagent events ──────────────────────────────────────────────────
 
 export interface SubagentEventPayload {
+  api_calls?: number
+  cost_usd?: number
+  depth?: number
   duration_seconds?: number
+  files_read?: string[]
+  files_written?: string[]
   goal: string
-  status?: 'completed' | 'failed' | 'interrupted' | 'running'
+  input_tokens?: number
+  iteration?: number
+  model?: string
+  output_tail?: { is_error?: boolean; preview?: string; tool?: string }[]
+  output_tokens?: number
+  parent_id?: null | string
+  reasoning_tokens?: number
+  status?: 'completed' | 'failed' | 'interrupted' | 'queued' | 'running'
+  subagent_id?: string
   summary?: string
   task_count?: number
   task_index: number
   text?: string
+  tool_count?: number
   tool_name?: string
   tool_preview?: string
+  toolsets?: string[]
+}
+
+// ── Delegation control RPCs ──────────────────────────────────────────
+
+export interface DelegationStatusResponse {
+  active?: {
+    depth?: number
+    goal?: string
+    model?: null | string
+    parent_id?: null | string
+    started_at?: number
+    status?: string
+    subagent_id?: string
+    tool_count?: number
+  }[]
+  max_concurrent_children?: number
+  max_spawn_depth?: number
+  paused?: boolean
+}
+
+export interface DelegationPauseResponse {
+  paused?: boolean
+}
+
+export interface SubagentInterruptResponse {
+  found?: boolean
+  subagent_id?: string
+}
+
+// ── Spawn-tree snapshots ─────────────────────────────────────────────
+
+export interface SpawnTreeListEntry {
+  count: number
+  finished_at?: number
+  label?: string
+  path: string
+  session_id?: string
+  started_at?: number | null
+}
+
+export interface SpawnTreeListResponse {
+  entries?: SpawnTreeListEntry[]
+}
+
+export interface SpawnTreeLoadResponse {
+  finished_at?: number
+  label?: string
+  session_id?: string
+  started_at?: null | number
+  subagents?: unknown[]
+}
+
+export interface SpawnTreeSaveResponse {
+  path?: string
+  session_id?: string
 }
 
 export type GatewayEvent =
@@ -320,6 +390,7 @@ export type GatewayEvent =
   | { payload: { env_var: string; prompt: string; request_id: string }; session_id?: string; type: 'secret.request' }
   | { payload: { task_id: string; text: string }; session_id?: string; type: 'background.complete' }
   | { payload: { text: string }; session_id?: string; type: 'btw.complete' }
+  | { payload: SubagentEventPayload; session_id?: string; type: 'subagent.spawn_requested' }
   | { payload: SubagentEventPayload; session_id?: string; type: 'subagent.start' }
   | { payload: SubagentEventPayload; session_id?: string; type: 'subagent.thinking' }
   | { payload: SubagentEventPayload; session_id?: string; type: 'subagent.tool' }
diff --git a/ui-tui/src/hooks/useVirtualHistory.ts b/ui-tui/src/hooks/useVirtualHistory.ts
index efa2642df3..17bc8dfd3e 100644
--- a/ui-tui/src/hooks/useVirtualHistory.ts
+++ b/ui-tui/src/hooks/useVirtualHistory.ts
@@ -15,13 +15,15 @@ const OVERSCAN = 40
 const MAX_MOUNTED = 260
 const COLD_START = 40
 const QUANTUM = OVERSCAN >> 1
+const FREEZE_RENDERS = 2
 
 const upperBound = (arr: number[], target: number) => {
-  let lo = 0,
-    hi = arr.length
+  let lo = 0
+  let hi = arr.length
 
   while (lo < hi) {
     const mid = (lo + hi) >> 1
+
     arr[mid]! <= target ? (lo = mid + 1) : (hi = mid)
   }
 
@@ -31,6 +33,7 @@ const upperBound = (arr: number[], target: number) => {
 export function useVirtualHistory(
   scrollRef: RefObject<ScrollBoxHandle | null>,
   items: readonly { key: string }[],
+  columns: number,
   { estimate = ESTIMATE, overscan = OVERSCAN, maxMounted = MAX_MOUNTED, coldStartCount = COLD_START } = {}
 ) {
   const nodes = useRef(new Map<string, unknown>())
@@ -40,6 +43,29 @@ export function useVirtualHistory(
   const [hasScrollRef, setHasScrollRef] = useState(false)
   const metrics = useRef({ sticky: true, top: 0, vp: 0 })
 
+  // Width change: scale cached heights (not clear — clearing forces a
+  // pessimistic back-walk mounting ~190 rows at once, each a fresh
+  // marked.lexer + syntax highlight ≈ 3ms). Freeze mount range for 2
+  // renders so warm memos survive; skip one measurement so useLayoutEffect
+  // doesn't poison the scaled cache with pre-resize Yoga heights.
+  const prevColumns = useRef(columns)
+  const skipMeasurement = useRef(false)
+  const prevRange = useRef<null | readonly [number, number]>(null)
+  const freezeRenders = useRef(0)
+
+  if (prevColumns.current !== columns && prevColumns.current > 0 && columns > 0) {
+    const ratio = prevColumns.current / columns
+
+    prevColumns.current = columns
+
+    for (const [k, h] of heights.current) {
+      heights.current.set(k, Math.max(1, Math.round(h * ratio)))
+    }
+
+    skipMeasurement.current = true
+    freezeRenders.current = FREEZE_RENDERS
+  }
+
   useLayoutEffect(() => {
     setHasScrollRef(Boolean(scrollRef.current))
   }, [scrollRef])
@@ -92,25 +118,41 @@ export function useVirtualHistory(
     return out
   }, [estimate, items, ver])
 
-  const total = offsets[items.length] ?? 0
+  const n = items.length
+  const total = offsets[n] ?? 0
   const top = Math.max(0, scrollRef.current?.getScrollTop() ?? 0)
   const vp = Math.max(0, scrollRef.current?.getViewportHeight() ?? 0)
   const sticky = scrollRef.current?.isSticky() ?? true
 
-  let start = 0,
-    end = items.length
+  // During a freeze, drop the frozen range if items shrank past its start
+  // (/clear, compaction) — clamping would collapse to an empty mount and
+  // flash blank. Fall through to the normal path in that case.
+  const frozenRange =
+    freezeRenders.current > 0 && prevRange.current && prevRange.current[0] < n ? prevRange.current : null
 
-  if (items.length > 0) {
+  let start = 0
+  let end = n
+
+  if (frozenRange) {
+    start = frozenRange[0]
+    end = Math.min(frozenRange[1], n)
+  } else if (n > 0) {
     if (vp <= 0) {
-      start = Math.max(0, items.length - coldStartCount)
+      start = Math.max(0, n - coldStartCount)
     } else {
-      start = Math.max(0, Math.min(items.length - 1, upperBound(offsets, Math.max(0, top - overscan)) - 1))
-      end = Math.max(start + 1, Math.min(items.length, upperBound(offsets, top + vp + overscan)))
+      start = Math.max(0, Math.min(n - 1, upperBound(offsets, Math.max(0, top - overscan)) - 1))
+      end = Math.max(start + 1, Math.min(n, upperBound(offsets, top + vp + overscan)))
     }
   }
 
   if (end - start > maxMounted) {
-    sticky ? (start = Math.max(0, end - maxMounted)) : (end = Math.min(items.length, start + maxMounted))
+    sticky ? (start = Math.max(0, end - maxMounted)) : (end = Math.min(n, start + maxMounted))
+  }
+
+  if (freezeRenders.current > 0) {
+    freezeRenders.current--
+  } else {
+    prevRange.current = [start, end]
   }
 
   const measureRef = useCallback((key: string) => {
@@ -127,18 +169,22 @@ export function useVirtualHistory(
   useLayoutEffect(() => {
     let dirty = false
 
-    for (let i = start; i < end; i++) {
-      const k = items[i]?.key
+    if (skipMeasurement.current) {
+      skipMeasurement.current = false
+    } else {
+      for (let i = start; i < end; i++) {
+        const k = items[i]?.key
 
-      if (!k) {
-        continue
-      }
+        if (!k) {
+          continue
+        }
 
-      const h = Math.ceil((nodes.current.get(k) as MeasuredNode | undefined)?.yogaNode?.getComputedHeight?.() ?? 0)
+        const h = Math.ceil((nodes.current.get(k) as MeasuredNode | undefined)?.yogaNode?.getComputedHeight?.() ?? 0)
 
-      if (h > 0 && heights.current.get(k) !== h) {
-        heights.current.set(k, h)
-        dirty = true
+        if (h > 0 && heights.current.get(k) !== h) {
+          heights.current.set(k, h)
+          dirty = true
+        }
       }
     }
 
diff --git a/ui-tui/src/lib/circularBuffer.ts b/ui-tui/src/lib/circularBuffer.ts
new file mode 100644
index 0000000000..31502fc227
--- /dev/null
+++ b/ui-tui/src/lib/circularBuffer.ts
@@ -0,0 +1,48 @@
+export class CircularBuffer<T> {
+  private buf: T[]
+  private head = 0
+  private len = 0
+
+  constructor(private capacity: number) {
+    if (!Number.isInteger(capacity) || capacity <= 0) {
+      throw new RangeError(`CircularBuffer capacity must be a positive integer, got ${capacity}`)
+    }
+
+    this.buf = new Array<T>(capacity)
+  }
+
+  push(item: T) {
+    this.buf[this.head] = item
+    this.head = (this.head + 1) % this.capacity
+
+    if (this.len < this.capacity) {
+      this.len++
+    }
+  }
+
+  tail(n = this.len): T[] {
+    const take = Math.min(Math.max(0, n), this.len)
+    const start = this.len < this.capacity ? 0 : this.head
+    const out: T[] = new Array<T>(take)
+
+    for (let i = 0; i < take; i++) {
+      out[i] = this.buf[(start + this.len - take + i) % this.capacity]!
+    }
+
+    return out
+  }
+
+  drain(): T[] {
+    const out = this.tail()
+
+    this.clear()
+
+    return out
+  }
+
+  clear() {
+    this.buf = new Array<T>(this.capacity)
+    this.head = 0
+    this.len = 0
+  }
+}
diff --git a/ui-tui/src/lib/clipboard.ts b/ui-tui/src/lib/clipboard.ts
new file mode 100644
index 0000000000..23e03e5feb
--- /dev/null
+++ b/ui-tui/src/lib/clipboard.ts
@@ -0,0 +1,122 @@
+import { execFile, spawn } from 'node:child_process'
+import { promisify } from 'node:util'
+
+const execFileAsync = promisify(execFile)
+const CLIPBOARD_MAX_BUFFER = 4 * 1024 * 1024
+const POWERSHELL_ARGS = ['-NoProfile', '-NonInteractive', '-Command', 'Get-Clipboard -Raw'] as const
+
+type ClipboardRun = typeof execFileAsync
+
+export function isUsableClipboardText(text: null | string): text is string {
+  if (!text || !/[^\s]/.test(text)) {
+    return false
+  }
+
+  if (text.includes('\u0000')) {
+    return false
+  }
+
+  let suspicious = 0
+
+  for (const ch of text) {
+    const code = ch.charCodeAt(0)
+    const isControl = code < 0x20 && ch !== '\n' && ch !== '\r' && ch !== '\t'
+
+    if (isControl || ch === '\ufffd') {
+      suspicious += 1
+    }
+  }
+
+  return suspicious <= Math.max(2, Math.floor(text.length * 0.02))
+}
+
+function readClipboardCommands(
+  platform: NodeJS.Platform,
+  env: NodeJS.ProcessEnv
+): Array<{ args: readonly string[]; cmd: string }> {
+  if (platform === 'darwin') {
+    return [{ cmd: 'pbpaste', args: [] }]
+  }
+
+  if (platform === 'win32') {
+    return [{ cmd: 'powershell', args: POWERSHELL_ARGS }]
+  }
+
+  const attempts: Array<{ args: readonly string[]; cmd: string }> = []
+
+  if (env.WSL_INTEROP) {
+    attempts.push({ cmd: 'powershell.exe', args: POWERSHELL_ARGS })
+  }
+
+  if (env.WAYLAND_DISPLAY) {
+    attempts.push({ cmd: 'wl-paste', args: ['--type', 'text'] })
+  }
+
+  attempts.push({ cmd: 'xclip', args: ['-selection', 'clipboard', '-out'] })
+
+  return attempts
+}
+
+/**
+ * Read plain text from the system clipboard.
+ *
+ * Uses native platform tools in fallback order:
+ * - macOS: pbpaste
+ * - Windows: PowerShell Get-Clipboard -Raw
+ * - WSL: powershell.exe Get-Clipboard -Raw
+ * - Linux Wayland: wl-paste --type text
+ * - Linux X11: xclip -selection clipboard -out
+ */
+export async function readClipboardText(
+  platform: NodeJS.Platform = process.platform,
+  run: ClipboardRun = execFileAsync,
+  env: NodeJS.ProcessEnv = process.env
+): Promise<string | null> {
+  for (const attempt of readClipboardCommands(platform, env)) {
+    try {
+      const result = await run(attempt.cmd, [...attempt.args], {
+        encoding: 'utf8',
+        maxBuffer: CLIPBOARD_MAX_BUFFER,
+        windowsHide: true
+      })
+
+      if (typeof result.stdout === 'string') {
+        return result.stdout
+      }
+    } catch {
+      // Fall through to the next clipboard backend.
+    }
+  }
+
+  return null
+}
+
+/**
+ * Write plain text to the system clipboard.
+ *
+ * On macOS this uses `pbcopy`. On other platforms we intentionally return
+ * false for now; non-mac copy still falls back to OSC52.
+ */
+export async function writeClipboardText(
+  text: string,
+  platform: NodeJS.Platform = process.platform,
+  start: typeof spawn = spawn
+): Promise<boolean> {
+  if (platform !== 'darwin') {
+    return false
+  }
+
+  try {
+    const ok = await new Promise<boolean>(resolve => {
+      const child = start('pbcopy', [], { stdio: ['pipe', 'ignore', 'ignore'], windowsHide: true })
+
+      child.once('error', () => resolve(false))
+      child.once('close', code => resolve(code === 0))
+      child.stdin.end(text)
+    })
+
+    return ok
+  } catch {
+    return false
+  }
+}
diff --git a/ui-tui/src/lib/emoji.ts b/ui-tui/src/lib/emoji.ts
new file mode 100644
index 0000000000..6c22e811e3
--- /dev/null
+++ b/ui-tui/src/lib/emoji.ts
@@ -0,0 +1,55 @@
+const VS15 = 0xfe0e
+const VS16 = 0xfe0f
+const KEYCAP = 0x20e3
+
+const TEXT_DEFAULT_EMOJI = new Set<number>([
+  0x00a9, 0x00ae, 0x203c, 0x2049, 0x2122, 0x2139, 0x2194, 0x2195, 0x2196, 0x2197, 0x2198, 0x2199, 0x21a9, 0x21aa,
+  0x2328, 0x23cf, 0x23ed, 0x23ee, 0x23ef, 0x23f1, 0x23f2, 0x23f8, 0x23f9, 0x23fa, 0x24c2, 0x25aa, 0x25ab, 0x25b6,
+  0x25c0, 0x25fb, 0x25fc, 0x2600, 0x2601, 0x2602, 0x2603, 0x2604, 0x260e, 0x2611, 0x2618, 0x261d, 0x2620, 0x2622,
+  0x2623, 0x2626, 0x262a, 0x262e, 0x262f, 0x2638, 0x2639, 0x263a, 0x2640, 0x2642, 0x265f, 0x2660, 0x2663, 0x2665,
+  0x2666, 0x2668, 0x267b, 0x267e, 0x2692, 0x2694, 0x2695, 0x2696, 0x2697, 0x2699, 0x269b, 0x269c, 0x26a0, 0x26a7,
+  0x26b0, 0x26b1, 0x26c8, 0x26cf, 0x26d1, 0x26d3, 0x26d4, 0x26e9, 0x26f0, 0x26f1, 0x26f4, 0x26f7, 0x26f8, 0x26f9,
+  0x2702, 0x2708, 0x2709, 0x270c, 0x270d, 0x270f, 0x2712, 0x2714, 0x2716, 0x271d, 0x2721, 0x2733, 0x2734, 0x2744,
+  0x2747, 0x2763, 0x2764, 0x27a1, 0x2934, 0x2935, 0x2b05, 0x2b06, 0x2b07, 0x3030, 0x303d, 0x3297, 0x3299
+])
+
+const MAYBE_TEXT_EMOJI_RE =
+  /[\u00a9\u00ae\u203c\u2049\u2122\u2139\u2194-\u2199\u21a9\u21aa\u2328\u23cf\u23ed-\u23ef\u23f1\u23f2\u23f8-\u23fa\u24c2\u25aa\u25ab\u25b6\u25c0\u25fb\u25fc\u2600-\u2604\u260e\u2611\u2618\u261d\u2620\u2622\u2623\u2626\u262a\u262e\u262f\u2638-\u263a\u2640\u2642\u265f\u2660\u2663\u2665\u2666\u2668\u267b\u267e\u2692\u2694-\u2697\u2699\u269b\u269c\u26a0\u26a7\u26b0\u26b1\u26c8\u26cf\u26d1\u26d3\u26d4\u26e9\u26f0\u26f1\u26f4\u26f7-\u26f9\u2702\u2708\u2709\u270c\u270d\u270f\u2712\u2714\u2716\u271d\u2721\u2733\u2734\u2744\u2747\u2763\u2764\u27a1\u2934\u2935\u2b05-\u2b07\u3030\u303d\u3297\u3299]/
+
+export function ensureEmojiPresentation(text: string): string {
+  if (!text || !MAYBE_TEXT_EMOJI_RE.test(text)) {
+    return text
+  }
+
+  // Lazy output: only start building when we actually need to insert VS16.
+  // Short-circuits the whole walk for strings where every text-default emoji
+  // is already followed by VS16/VS15, avoiding per-codepoint string growth.
+  let out: null | string = null
+  let last = 0
+  let i = 0
+
+  while (i < text.length) {
+    const cp = text.codePointAt(i)!
+    const size = cp > 0xffff ? 2 : 1
+
+    if (TEXT_DEFAULT_EMOJI.has(cp)) {
+      const next = text.codePointAt(i + size)
+
+      // Skip only when the sequence already carries an explicit presentation
+      // selector.  VS16 means the user (or a prior pass) already requested
+      // emoji presentation; VS15 is an explicit text-presentation request so
+      // leave it alone and don't pile VS16 on top of it.  Inject before ZWJ
+      // and KEYCAP so ZWJ-joined sequences (e.g. ❤️‍🔥) and digit keycaps
+      // both render as emoji rather than text.
+      if (next !== VS16 && next !== VS15) {
+        out ??= ''
+        out += text.slice(last, i + size) + '\uFE0F'
+        last = i + size
+      }
+    }
+
+    i += size
+  }
+
+  return out === null ? text : out + text.slice(last)
+}
diff --git a/ui-tui/src/lib/gracefulExit.ts b/ui-tui/src/lib/gracefulExit.ts
new file mode 100644
index 0000000000..2896fd1265
--- /dev/null
+++ b/ui-tui/src/lib/gracefulExit.ts
@@ -0,0 +1,47 @@
+interface SetupOptions {
+  cleanups?: (() => Promise<void> | void)[]
+  failsafeMs?: number
+  onError?: (scope: 'uncaughtException' | 'unhandledRejection', err: unknown) => void
+  onSignal?: (signal: NodeJS.Signals) => void
+}
+
+const SIGNAL_EXIT_CODE: Record<'SIGHUP' | 'SIGINT' | 'SIGTERM', number> = {
+  SIGHUP: 129,
+  SIGINT: 130,
+  SIGTERM: 143
+}
+
+let wired = false
+
+export function setupGracefulExit({ cleanups = [], failsafeMs = 4000, onError, onSignal }: SetupOptions = {}) {
+  if (wired) {
+    return
+  }
+
+  wired = true
+
+  let shuttingDown = false
+
+  const exit = (code: number, signal?: NodeJS.Signals) => {
+    if (shuttingDown) {
+      return
+    }
+
+    shuttingDown = true
+
+    if (signal) {
+      onSignal?.(signal)
+    }
+
+    setTimeout(() => process.exit(code), failsafeMs).unref?.()
+
+    void Promise.allSettled(cleanups.map(fn => Promise.resolve().then(fn))).finally(() => process.exit(code))
+  }
+
+  for (const sig of ['SIGINT', 'SIGTERM', 'SIGHUP'] as const) {
+    process.on(sig, () => exit(SIGNAL_EXIT_CODE[sig], sig))
+  }
+
+  process.on('uncaughtException', err => onError?.('uncaughtException', err))
+  process.on('unhandledRejection', reason => onError?.('unhandledRejection', reason))
+}
diff --git a/ui-tui/src/lib/memory.ts b/ui-tui/src/lib/memory.ts
new file mode 100644
index 0000000000..9f157adffc
--- /dev/null
+++ b/ui-tui/src/lib/memory.ts
@@ -0,0 +1,187 @@
+import { createWriteStream } from 'node:fs'
+import { mkdir, readdir, readFile, writeFile } from 'node:fs/promises'
+import { homedir, tmpdir } from 'node:os'
+import { join } from 'node:path'
+import { pipeline } from 'node:stream/promises'
+import { getHeapSnapshot, getHeapSpaceStatistics, getHeapStatistics } from 'node:v8'
+
+export type MemoryTrigger = 'auto-critical' | 'auto-high' | 'manual'
+
+export interface MemoryDiagnostics {
+  activeHandles: number
+  activeRequests: number
+  analysis: {
+    potentialLeaks: string[]
+    recommendation: string
+  }
+  memoryGrowthRate: {
+    bytesPerSecond: number
+    mbPerHour: number
+  }
+  memoryUsage: {
+    arrayBuffers: number
+    external: number
+    heapTotal: number
+    heapUsed: number
+    rss: number
+  }
+  nodeVersion: string
+  openFileDescriptors?: number
+  platform: string
+  resourceUsage: {
+    maxRSS: number
+    systemCPUTime: number
+    userCPUTime: number
+  }
+  smapsRollup?: string
+  timestamp: string
+  trigger: MemoryTrigger
+  uptimeSeconds: number
+  v8HeapSpaces?: { available: number; name: string; size: number; used: number }[]
+  v8HeapStats: {
+    detachedContexts: number
+    heapSizeLimit: number
+    mallocedMemory: number
+    nativeContexts: number
+    peakMallocedMemory: number
+  }
+}
+
+export interface HeapDumpResult {
+  diagPath?: string
+  error?: string
+  heapPath?: string
+  success: boolean
+}
+
+export async function captureMemoryDiagnostics(trigger: MemoryTrigger): Promise<MemoryDiagnostics> {
+  const usage = process.memoryUsage()
+  const heapStats = getHeapStatistics()
+  const resourceUsage = process.resourceUsage()
+  const uptimeSeconds = process.uptime()
+
+  // Not available on Bun / older Node.
+  let heapSpaces: ReturnType<typeof getHeapSpaceStatistics> | undefined
+
+  try {
+    heapSpaces = getHeapSpaceStatistics()
+  } catch {
+    /* noop */
+  }
+
+  const internals = process as unknown as {
+    _getActiveHandles: () => unknown[]
+    _getActiveRequests: () => unknown[]
+  }
+
+  const activeHandles = internals._getActiveHandles().length
+  const activeRequests = internals._getActiveRequests().length
+  const openFileDescriptors = await swallow(async () => (await readdir('/proc/self/fd')).length)
+  const smapsRollup = await swallow(() => readFile('/proc/self/smaps_rollup', 'utf8'))
+
+  const nativeMemory = usage.rss - usage.heapUsed
+  // Real growth rate since STARTED_AT (captured at module load) — NOT a lifetime
+  // average of rss/uptime, which would report phantom "growth" for a stable process.
+  const elapsed = Math.max(0, uptimeSeconds - STARTED_AT.uptime)
+  const bytesPerSecond = elapsed > 0 ? (usage.rss - STARTED_AT.rss) / elapsed : 0
+  const mbPerHour = (bytesPerSecond * 3600) / (1024 * 1024)
+
+  const potentialLeaks = [
+    heapStats.number_of_detached_contexts > 0 &&
+      `${heapStats.number_of_detached_contexts} detached context(s) — possible component/closure leak`,
+    activeHandles > 100 && `${activeHandles} active handles — possible timer/socket leak`,
+    nativeMemory > usage.heapUsed && 'Native memory > heap — leak may be in native addons',
+    mbPerHour > 100 && `High memory growth rate: ${mbPerHour.toFixed(1)} MB/hour`,
+    openFileDescriptors && openFileDescriptors > 500 && `${openFileDescriptors} open FDs — possible file/socket leak`
+  ].filter((s): s is string => typeof s === 'string')
+
+  return {
+    activeHandles,
+    activeRequests,
+    analysis: {
+      potentialLeaks,
+      recommendation: potentialLeaks.length
+        ? `WARNING: ${potentialLeaks.length} potential leak indicator(s). See potentialLeaks.`
+        : 'No obvious leak indicators. Inspect heap snapshot for retained objects.'
+    },
+    memoryGrowthRate: { bytesPerSecond, mbPerHour },
+    memoryUsage: {
+      arrayBuffers: usage.arrayBuffers,
+      external: usage.external,
+      heapTotal: usage.heapTotal,
+      heapUsed: usage.heapUsed,
+      rss: usage.rss
+    },
+    nodeVersion: process.version,
+    openFileDescriptors,
+    platform: process.platform,
+    resourceUsage: {
+      maxRSS: resourceUsage.maxRSS * 1024,
+      systemCPUTime: resourceUsage.systemCPUTime,
+      userCPUTime: resourceUsage.userCPUTime
+    },
+    smapsRollup,
+    timestamp: new Date().toISOString(),
+    trigger,
+    uptimeSeconds,
+    v8HeapSpaces: heapSpaces?.map(s => ({
+      available: s.space_available_size,
+      name: s.space_name,
+      size: s.space_size,
+      used: s.space_used_size
+    })),
+    v8HeapStats: {
+      detachedContexts: heapStats.number_of_detached_contexts,
+      heapSizeLimit: heapStats.heap_size_limit,
+      mallocedMemory: heapStats.malloced_memory,
+      nativeContexts: heapStats.number_of_native_contexts,
+      peakMallocedMemory: heapStats.peak_malloced_memory
+    }
+  }
+}
+
+export async function performHeapDump(trigger: MemoryTrigger = 'manual'): Promise<HeapDumpResult> {
+  try {
+    // Diagnostics first — heap-snapshot serialization can crash on very large
+    // heaps, and the JSON sidecar is the most actionable artifact if so.
+    const diagnostics = await captureMemoryDiagnostics(trigger)
+    const dir = process.env.HERMES_HEAPDUMP_DIR?.trim() || join(homedir() || tmpdir(), '.hermes', 'heapdumps')
+
+    await mkdir(dir, { recursive: true })
+
+    const base = `hermes-${new Date().toISOString().replace(/[:.]/g, '-')}-${process.pid}-${trigger}`
+    const heapPath = join(dir, `${base}.heapsnapshot`)
+    const diagPath = join(dir, `${base}.diagnostics.json`)
+
+    await writeFile(diagPath, JSON.stringify(diagnostics, null, 2), { mode: 0o600 })
+    await pipeline(getHeapSnapshot(), createWriteStream(heapPath, { mode: 0o600 }))
+
+    return { diagPath, heapPath, success: true }
+  } catch (e) {
+    return { error: e instanceof Error ? e.message : String(e), success: false }
+  }
+}
+
+export function formatBytes(bytes: number): string {
+  if (!Number.isFinite(bytes) || bytes <= 0) {
+    return '0B'
+  }
+
+  const exp = Math.min(UNITS.length - 1, Math.floor(Math.log10(bytes) / 3))
+  const value = bytes / 1024 ** exp
+
+  return `${value >= 100 ? value.toFixed(0) : value.toFixed(1)}${UNITS[exp]}`
+}
+
+const UNITS = ['B', 'KB', 'MB', 'GB', 'TB']
+
+const STARTED_AT = { rss: process.memoryUsage().rss, uptime: process.uptime() }
+
+// Returns undefined when the probe isn't available (non-Linux paths, sandboxed FS).
+const swallow = async <T>(fn: () => Promise<T>): Promise<T | undefined> => {
+  try {
+    return await fn()
+  } catch {
+    return undefined
+  }
+}
diff --git a/ui-tui/src/lib/memoryMonitor.ts b/ui-tui/src/lib/memoryMonitor.ts
new file mode 100644
index 0000000000..6655819b5a
--- /dev/null
+++ b/ui-tui/src/lib/memoryMonitor.ts
@@ -0,0 +1,55 @@
+import { type HeapDumpResult, performHeapDump } from './memory.js'
+
+export type MemoryLevel = 'critical' | 'high' | 'normal'
+
+export interface MemorySnapshot {
+  heapUsed: number
+  level: MemoryLevel
+  rss: number
+}
+
+export interface MemoryMonitorOptions {
+  criticalBytes?: number
+  highBytes?: number
+  intervalMs?: number
+  onCritical?: (snap: MemorySnapshot, dump: HeapDumpResult | null) => void
+  onHigh?: (snap: MemorySnapshot, dump: HeapDumpResult | null) => void
+}
+
+const GB = 1024 ** 3
+
+export function startMemoryMonitor({
+  criticalBytes = 2.5 * GB,
+  highBytes = 1.5 * GB,
+  intervalMs = 10_000,
+  onCritical,
+  onHigh
+}: MemoryMonitorOptions = {}): () => void {
+  const dumped = new Set<Exclude<MemoryLevel, 'normal'>>()
+
+  const tick = async () => {
+    const { heapUsed, rss } = process.memoryUsage()
+    const level: MemoryLevel = heapUsed >= criticalBytes ? 'critical' : heapUsed >= highBytes ? 'high' : 'normal'
+
+    if (level === 'normal') {
+      return void dumped.clear()
+    }
+
+    if (dumped.has(level)) {
+      return
+    }
+
+    dumped.add(level)
+    const dump = await performHeapDump(level === 'critical' ? 'auto-critical' : 'auto-high').catch(() => null)
+
+    const snap: MemorySnapshot = { heapUsed, level, rss }
+
+    ;(level === 'critical' ? onCritical : onHigh)?.(snap, dump)
+  }
+
+  const handle = setInterval(() => void tick(), intervalMs)
+
+  handle.unref?.()
+
+  return () => clearInterval(handle)
+}
diff --git a/ui-tui/src/lib/osc52.ts b/ui-tui/src/lib/osc52.ts
index d990829921..aaeecf4c93 100644
--- a/ui-tui/src/lib/osc52.ts
+++ b/ui-tui/src/lib/osc52.ts
@@ -1,2 +1,73 @@
+const ESC = '\x1b'
+const BEL = '\x07'
+const ST = `${ESC}\\`
+
+export const OSC52_CLIPBOARD_QUERY = `${ESC}]52;c;?${BEL}`
+
+type OscResponse = { code: number; data: string; type: 'osc' }
+
+type OscQuerier = {
+  flush: () => Promise<void>
+  send: <T>(query: { match: (r: unknown) => r is T; request: string }) => Promise<T | undefined>
+}
+
+function wrapForMultiplexer(sequence: string): string {
+  if (process.env['TMUX']) {
+    return `${ESC}Ptmux;${sequence.split(ESC).join(ESC + ESC)}${ST}`
+  }
+
+  if (process.env['STY']) {
+    return `${ESC}P${sequence}${ST}`
+  }
+
+  return sequence
+}
+
+export function buildOsc52ClipboardQuery(): string {
+  return wrapForMultiplexer(OSC52_CLIPBOARD_QUERY)
+}
+
+export function parseOsc52ClipboardData(data: string): null | string {
+  const firstSep = data.indexOf(';')
+
+  if (firstSep === -1) {
+    return null
+  }
+
+  const selection = data.slice(0, firstSep)
+  const payload = data.slice(firstSep + 1)
+
+  if ((selection !== 'c' && selection !== 'p') || !payload || payload === '?') {
+    return null
+  }
+
+  try {
+    return Buffer.from(payload, 'base64').toString('utf8')
+  } catch {
+    return null
+  }
+}
+
+export async function readOsc52Clipboard(querier: null | OscQuerier, timeoutMs = 500): Promise<null | string> {
+  if (!querier) {
+    return null
+  }
+
+  const timeout = new Promise<undefined>(resolve => setTimeout(resolve, timeoutMs))
+
+  const query = querier.send<OscResponse>({
+    request: buildOsc52ClipboardQuery(),
+    match: (r: unknown): r is OscResponse => {
+      return !!r && typeof r === 'object' && (r as OscResponse).type === 'osc' && (r as OscResponse).code === 52
+    }
+  })
+
+  const response = await Promise.race([query, timeout])
+
+  await querier.flush()
+
+  return response ? parseOsc52ClipboardData(response.data) : null
+}
+
 export const writeOsc52Clipboard = (s: string) =>
   process.stdout.write(`\x1b]52;c;${Buffer.from(s, 'utf8').toString('base64')}\x07`)
diff --git a/ui-tui/src/lib/platform.ts b/ui-tui/src/lib/platform.ts
new file mode 100644
index 0000000000..f4a5247330
--- /dev/null
+++ b/ui-tui/src/lib/platform.ts
@@ -0,0 +1,32 @@
+/** Platform-aware keybinding helpers.
+ *
+ * On macOS the "action" modifier is Cmd. Modern terminals that support kitty
+ * keyboard protocol report Cmd as `key.super`; legacy terminals often surface it
+ * as `key.meta`. Some macOS terminals also translate Cmd+Left/Right/Backspace
+ * into readline-style Ctrl+A/Ctrl+E/Ctrl+U before the app sees them.
+ * On other platforms the action modifier is Ctrl.
+ * Ctrl+C is ALWAYS the interrupt key regardless of platform — it must never be
+ * remapped to copy.
+ */
+
+export const isMac = process.platform === 'darwin'
+
+/** True when the platform action-modifier is pressed (Cmd on macOS, Ctrl elsewhere). */
+export const isActionMod = (key: { ctrl: boolean; meta: boolean; super?: boolean }): boolean =>
+  isMac ? key.meta || key.super === true : key.ctrl
+
+/**
+ * Some macOS terminals rewrite Cmd navigation/deletion into readline control keys.
+ * Treat those as action shortcuts too, but only for the specific fallbacks we
+ * have observed from terminals: Cmd+Left → Ctrl+A, Cmd+Right → Ctrl+E,
+ * Cmd+Backspace → Ctrl+U.
+ */
+export const isMacActionFallback = (
+  key: { ctrl: boolean; meta: boolean; super?: boolean },
+  ch: string,
+  target: 'a' | 'e' | 'u'
+): boolean => isMac && key.ctrl && !key.meta && key.super !== true && ch.toLowerCase() === target
+
+/** Match action-modifier + a single character (case-insensitive). */
+export const isAction = (key: { ctrl: boolean; meta: boolean; super?: boolean }, ch: string, target: string): boolean =>
+  isActionMod(key) && ch.toLowerCase() === target
diff --git a/ui-tui/src/lib/subagentTree.ts b/ui-tui/src/lib/subagentTree.ts
new file mode 100644
index 0000000000..513559b807
--- /dev/null
+++ b/ui-tui/src/lib/subagentTree.ts
@@ -0,0 +1,355 @@
+import type { SubagentAggregate, SubagentNode, SubagentProgress } from '../types.js'
+
+const ROOT_KEY = '__root__'
+
+/**
+ * Reconstruct the subagent spawn tree from a flat event-ordered list.
+ *
+ * Grouping is by `parentId`; a missing `parentId` (or one pointing at an
+ * unknown subagent) is treated as a top-level spawn of the current turn.
+ * Children within a parent are sorted by `depth` then `index` — same key
+ * used in `turnController.upsertSubagent`, so render order matches spawn
+ * order regardless of network reordering of gateway events.
+ *
+ * Older gateways omit `parentId`; every subagent is then a top-level node
+ * and the tree renders flat — matching pre-observability behaviour.
+ */
+export function buildSubagentTree(items: readonly SubagentProgress[]): SubagentNode[] {
+  if (!items.length) {
+    return []
+  }
+
+  const byParent = new Map<string, SubagentProgress[]>()
+  const known = new Set<string>()
+
+  for (const item of items) {
+    known.add(item.id)
+  }
+
+  for (const item of items) {
+    const parentKey = item.parentId && known.has(item.parentId) ? item.parentId : ROOT_KEY
+    const bucket = byParent.get(parentKey) ?? []
+    bucket.push(item)
+    byParent.set(parentKey, bucket)
+  }
+
+  for (const bucket of byParent.values()) {
+    bucket.sort((a, b) => a.depth - b.depth || a.index - b.index)
+  }
+
+  const build = (item: SubagentProgress): SubagentNode => {
+    const kids = byParent.get(item.id) ?? []
+    const children = kids.map(build)
+
+    return { aggregate: aggregate(item, children), children, item }
+  }
+
+  return (byParent.get(ROOT_KEY) ?? []).map(build)
+}
+
+/**
+ * Roll up counts for a node's whole subtree.  Kept pure so the live view
+ * and the post-hoc replay can share the same renderer unchanged.
+ *
+ * `hotness` = tools per second across the subtree — a crude proxy for
+ * "how much work is happening in this branch".  Used to colour tree rails
+ * in the overlay / inline view so the eye spots the expensive branch.
+ */
+export function aggregate(item: SubagentProgress, children: readonly SubagentNode[]): SubagentAggregate {
+  let totalTools = item.toolCount ?? 0
+  let totalDuration = item.durationSeconds ?? 0
+  let descendantCount = 0
+  let activeCount = isRunning(item) ? 1 : 0
+  let maxDepthFromHere = 0
+  let inputTokens = item.inputTokens ?? 0
+  let outputTokens = item.outputTokens ?? 0
+  let costUsd = item.costUsd ?? 0
+  let filesTouched = (item.filesRead?.length ?? 0) + (item.filesWritten?.length ?? 0)
+
+  for (const child of children) {
+    totalTools += child.aggregate.totalTools
+    totalDuration += child.aggregate.totalDuration
+    descendantCount += child.aggregate.descendantCount + 1
+    activeCount += child.aggregate.activeCount
+    maxDepthFromHere = Math.max(maxDepthFromHere, child.aggregate.maxDepthFromHere + 1)
+    inputTokens += child.aggregate.inputTokens
+    outputTokens += child.aggregate.outputTokens
+    costUsd += child.aggregate.costUsd
+    filesTouched += child.aggregate.filesTouched
+  }
+
+  const hotness = totalDuration > 0 ? totalTools / totalDuration : 0
+
+  return {
+    activeCount,
+    costUsd,
+    descendantCount,
+    filesTouched,
+    hotness,
+    inputTokens,
+    maxDepthFromHere,
+    outputTokens,
+    totalDuration,
+    totalTools
+  }
+}
+
+/**
+ * Count of subagents at each depth level, indexed by depth (0 = top level).
+ * Drives the inline sparkline (`▁▃▇▅`) and the status-bar HUD.
+ */
+export function widthByDepth(tree: readonly SubagentNode[]): number[] {
+  const widths: number[] = []
+
+  const walk = (nodes: readonly SubagentNode[], depth: number) => {
+    if (!nodes.length) {
+      return
+    }
+
+    widths[depth] = (widths[depth] ?? 0) + nodes.length
+
+    for (const node of nodes) {
+      walk(node.children, depth + 1)
+    }
+  }
+
+  walk(tree, 0)
+
+  return widths
+}
+
+/**
+ * Flat totals across the full tree — feeds the summary chip header.
+ */
+export function treeTotals(tree: readonly SubagentNode[]): SubagentAggregate {
+  let totalTools = 0
+  let totalDuration = 0
+  let descendantCount = 0
+  let activeCount = 0
+  let maxDepthFromHere = 0
+  let inputTokens = 0
+  let outputTokens = 0
+  let costUsd = 0
+  let filesTouched = 0
+
+  for (const node of tree) {
+    totalTools += node.aggregate.totalTools
+    totalDuration += node.aggregate.totalDuration
+    descendantCount += node.aggregate.descendantCount + 1
+    activeCount += node.aggregate.activeCount
+    maxDepthFromHere = Math.max(maxDepthFromHere, node.aggregate.maxDepthFromHere + 1)
+    inputTokens += node.aggregate.inputTokens
+    outputTokens += node.aggregate.outputTokens
+    costUsd += node.aggregate.costUsd
+    filesTouched += node.aggregate.filesTouched
+  }
+
+  const hotness = totalDuration > 0 ? totalTools / totalDuration : 0
+
+  return {
+    activeCount,
+    costUsd,
+    descendantCount,
+    filesTouched,
+    hotness,
+    inputTokens,
+    maxDepthFromHere,
+    outputTokens,
+    totalDuration,
+    totalTools
+  }
+}
+
+/**
+ * Flatten the tree into visit order — useful for keyboard navigation and
+ * for "kill subtree" walks that fire one RPC per descendant.
+ */
+export function flattenTree(tree: readonly SubagentNode[]): SubagentNode[] {
+  const out: SubagentNode[] = []
+
+  const walk = (nodes: readonly SubagentNode[]) => {
+    for (const node of nodes) {
+      out.push(node)
+      walk(node.children)
+    }
+  }
+
+  walk(tree)
+
+  return out
+}
+
+/**
+ * Collect every descendant's id for a given node (excluding the node itself).
+ */
+export function descendantIds(node: SubagentNode): string[] {
+  const ids: string[] = []
+
+  const walk = (children: readonly SubagentNode[]) => {
+    for (const child of children) {
+      ids.push(child.item.id)
+      walk(child.children)
+    }
+  }
+
+  walk(node.children)
+
+  return ids
+}
+
+export function isRunning(item: Pick<SubagentProgress, 'status'>): boolean {
+  return item.status === 'running' || item.status === 'queued'
+}
+
+const SPARK_RAMP = ['▁', '▂', '▃', '▄', '▅', '▆', '▇', '█'] as const
+
+/**
+ * 8-step unicode bar sparkline from a positive-integer array.  Zeroes render
+ * as spaces so a sparse tree doesn't read as equal activity at every depth.
+ */
+export function sparkline(values: readonly number[]): string {
+  if (!values.length) {
+    return ''
+  }
+
+  const max = Math.max(...values)
+
+  if (max <= 0) {
+    return ' '.repeat(values.length)
+  }
+
+  return values
+    .map(v => {
+      if (v <= 0) {
+        return ' '
+      }
+
+      const idx = Math.min(SPARK_RAMP.length - 1, Math.max(0, Math.ceil((v / max) * (SPARK_RAMP.length - 1))))
+
+      return SPARK_RAMP[idx]
+    })
+    .join('')
+}
+
+/**
+ * Format totals into a compact one-line summary: `d2 · 7 agents · 124 tools · 2m 14s`
+ */
+export function formatSummary(totals: SubagentAggregate): string {
+  const pieces = [`d${Math.max(0, totals.maxDepthFromHere)}`]
+  pieces.push(`${totals.descendantCount} agent${totals.descendantCount === 1 ? '' : 's'}`)
+
+  if (totals.totalTools > 0) {
+    pieces.push(`${totals.totalTools} tool${totals.totalTools === 1 ? '' : 's'}`)
+  }
+
+  if (totals.totalDuration > 0) {
+    pieces.push(fmtDuration(totals.totalDuration))
+  }
+
+  const tokens = totals.inputTokens + totals.outputTokens
+
+  if (tokens > 0) {
+    pieces.push(`${fmtTokens(tokens)} tok`)
+  }
+
+  if (totals.costUsd > 0) {
+    pieces.push(fmtCost(totals.costUsd))
+  }
+
+  if (totals.activeCount > 0) {
+    pieces.push(`⚡${totals.activeCount}`)
+  }
+
+  return pieces.join(' · ')
+}
+
+/** Compact dollar amount: `$0.02`, `$1.34`, `$12.4` — never > 5 chars beyond the `$`. */
+export function fmtCost(usd: number): string {
+  if (!Number.isFinite(usd) || usd <= 0) {
+    return ''
+  }
+
+  if (usd < 0.01) {
+    return '<$0.01'
+  }
+
+  if (usd < 10) {
+    return `$${usd.toFixed(2)}`
+  }
+
+  return `$${usd.toFixed(1)}`
+}
+
+/** Compact token count: `12k`, `1.2k`, `542`. */
+export function fmtTokens(n: number): string {
+  if (!Number.isFinite(n) || n <= 0) {
+    return '0'
+  }
+
+  if (n < 1000) {
+    return String(Math.round(n))
+  }
+
+  if (n < 10_000) {
+    return `${(n / 1000).toFixed(1)}k`
+  }
+
+  return `${Math.round(n / 1000)}k`
+}
+
+/**
+ * `Ns` / `Nm` / `Nm Ss` formatter for seconds.  Shared with the agents
+ * overlay so the timeline + list + summary all speak the same dialect.
+ */
+export function fmtDuration(seconds: number): string {
+  if (seconds < 60) {
+    return `${Math.max(0, Math.round(seconds))}s`
+  }
+
+  const m = Math.floor(seconds / 60)
+  const s = Math.round(seconds - m * 60)
+
+  return s === 0 ? `${m}m` : `${m}m ${s}s`
+}
+
+/**
+ * A subagent is top-level if it has no `parentId`, or its parent isn't in
+ * the same snapshot (orphaned by a pruned mid-flight root).  Same rule
+ * `buildSubagentTree` uses — keep call sites consistent across the live
+ * view, disk label, and diff pane.
+ */
+export function topLevelSubagents(items: readonly SubagentProgress[]): SubagentProgress[] {
+  const ids = new Set(items.map(s => s.id))
+
+  return items.filter(s => !s.parentId || !ids.has(s.parentId))
+}
+
+/**
+ * Normalize a node's hotness into a palette index 0..N-1 where N = buckets.
+ * Higher hotness = "hotter" colour. Normalized against the tree's peak hotness
+ * so a uniformly slow tree still shows gradient across its busiest branches.
+ */
+export function hotnessBucket(hotness: number, peakHotness: number, buckets: number): number {
+  if (!Number.isFinite(hotness) || hotness <= 0 || peakHotness <= 0 || buckets <= 1) {
+    return 0
+  }
+
+  const ratio = Math.min(1, hotness / peakHotness)
+
+  return Math.min(buckets - 1, Math.max(0, Math.round(ratio * (buckets - 1))))
+}
+
+export function peakHotness(tree: readonly SubagentNode[]): number {
+  let peak = 0
+
+  const walk = (nodes: readonly SubagentNode[]) => {
+    for (const node of nodes) {
+      peak = Math.max(peak, node.aggregate.hotness)
+      walk(node.children)
+    }
+  }
+
+  walk(tree)
+
+  return peak
+}
diff --git a/ui-tui/src/lib/terminalParity.ts b/ui-tui/src/lib/terminalParity.ts
new file mode 100644
index 0000000000..9010dedfc7
--- /dev/null
+++ b/ui-tui/src/lib/terminalParity.ts
@@ -0,0 +1,78 @@
+import {
+  detectVSCodeLikeTerminal,
+  type FileOps,
+  isRemoteShellSession,
+  shouldPromptForTerminalSetup
+} from './terminalSetup.js'
+
+export type MacTerminalHint = {
+  key: string
+  message: string
+  tone: 'info' | 'warn'
+}
+
+export type MacTerminalContext = {
+  isAppleTerminal: boolean
+  isRemote: boolean
+  isTmux: boolean
+  vscodeLike: null | 'cursor' | 'vscode' | 'windsurf'
+}
+
+export function detectMacTerminalContext(env: NodeJS.ProcessEnv = process.env): MacTerminalContext {
+  const termProgram = env['TERM_PROGRAM'] ?? ''
+
+  return {
+    isAppleTerminal: termProgram === 'Apple_Terminal' || !!env['TERM_SESSION_ID'],
+    isRemote: isRemoteShellSession(env),
+    isTmux: !!env['TMUX'],
+    vscodeLike: detectVSCodeLikeTerminal(env)
+  }
+}
+
+export async function terminalParityHints(
+  env: NodeJS.ProcessEnv = process.env,
+  options?: { fileOps?: Partial<FileOps>; homeDir?: string }
+): Promise<MacTerminalHint[]> {
+  const ctx = detectMacTerminalContext(env)
+  const hints: MacTerminalHint[] = []
+
+  if (
+    ctx.vscodeLike &&
+    (await shouldPromptForTerminalSetup({ env, fileOps: options?.fileOps, homeDir: options?.homeDir }))
+  ) {
+    hints.push({
+      key: 'ide-setup',
+      tone: 'info',
+      message: `Detected ${ctx.vscodeLike} terminal · run /terminal-setup for best Cmd+Enter / undo parity`
+    })
+  }
+
+  if (ctx.isAppleTerminal) {
+    hints.push({
+      key: 'apple-terminal',
+      tone: 'warn',
+      message:
+        'Apple Terminal detected · use /paste for image-only clipboard fallback, and try Ctrl+A / Ctrl+E / Ctrl+U if Cmd+←/→/⌫ gets rewritten'
+    })
+  }
+
+  if (ctx.isTmux) {
+    hints.push({
+      key: 'tmux',
+      tone: 'warn',
+      message:
+        'tmux detected · clipboard copy/paste uses passthrough when available; allow-passthrough improves OSC52 reliability'
+    })
+  }
+
+  if (ctx.isRemote) {
+    hints.push({
+      key: 'remote',
+      tone: 'warn',
+      message:
+        'SSH session detected · text clipboard can bridge via OSC52, but image clipboard and local screenshot paths still depend on the machine running Hermes'
+    })
+  }
+
+  return hints
+}
diff --git a/ui-tui/src/lib/terminalSetup.ts b/ui-tui/src/lib/terminalSetup.ts
new file mode 100644
index 0000000000..3c17734c63
--- /dev/null
+++ b/ui-tui/src/lib/terminalSetup.ts
@@ -0,0 +1,349 @@
+import { copyFile, mkdir, readFile, writeFile } from 'node:fs/promises'
+import { homedir } from 'node:os'
+import { join } from 'node:path'
+
+export type SupportedTerminal = 'cursor' | 'vscode' | 'windsurf'
+
+export type FileOps = {
+  copyFile: typeof copyFile
+  mkdir: typeof mkdir
+  readFile: typeof readFile
+  writeFile: typeof writeFile
+}
+
+type Keybinding = {
+  args?: { text?: string }
+  command?: string
+  key?: string
+  when?: string
+}
+
+export type TerminalSetupResult = {
+  message: string
+  requiresRestart?: boolean
+  success: boolean
+}
+
+const DEFAULT_FILE_OPS: FileOps = { copyFile, mkdir, readFile, writeFile }
+const MULTILINE_SEQUENCE = '\\\r\n'
+
+const TERMINAL_META: Record<SupportedTerminal, { appName: string; label: string }> = {
+  vscode: { appName: 'Code', label: 'VS Code' },
+  cursor: { appName: 'Cursor', label: 'Cursor' },
+  windsurf: { appName: 'Windsurf', label: 'Windsurf' }
+}
+
+const TARGET_BINDINGS: Keybinding[] = [
+  {
+    key: 'shift+enter',
+    command: 'workbench.action.terminal.sendSequence',
+    when: 'terminalFocus',
+    args: { text: MULTILINE_SEQUENCE }
+  },
+  {
+    key: 'ctrl+enter',
+    command: 'workbench.action.terminal.sendSequence',
+    when: 'terminalFocus',
+    args: { text: MULTILINE_SEQUENCE }
+  },
+  {
+    key: 'cmd+enter',
+    command: 'workbench.action.terminal.sendSequence',
+    when: 'terminalFocus',
+    args: { text: MULTILINE_SEQUENCE }
+  },
+  {
+    key: 'cmd+z',
+    command: 'workbench.action.terminal.sendSequence',
+    when: 'terminalFocus',
+    args: { text: '\u001b[122;9u' }
+  },
+  {
+    key: 'shift+cmd+z',
+    command: 'workbench.action.terminal.sendSequence',
+    when: 'terminalFocus',
+    args: { text: '\u001b[122;10u' }
+  }
+]
+
+export function detectVSCodeLikeTerminal(env: NodeJS.ProcessEnv = process.env): null | SupportedTerminal {
+  const askpass = env['VSCODE_GIT_ASKPASS_MAIN']?.toLowerCase() ?? ''
+
+  if (env['CURSOR_TRACE_ID'] || askpass.includes('cursor')) {
+    return 'cursor'
+  }
+
+  if (askpass.includes('windsurf')) {
+    return 'windsurf'
+  }
+
+  if (env['TERM_PROGRAM'] === 'vscode' || env['VSCODE_GIT_IPC_HANDLE']) {
+    return 'vscode'
+  }
+
+  return null
+}
+
+/**
+ * Strip JSONC features (// line comments, /* block comments *\/, trailing commas)
+ * so the result is valid JSON parseable by JSON.parse().
+ * Handles comments inside strings correctly (preserves them).
+ */
+export function stripJsonComments(content: string): string {
+  let result = ''
+  let i = 0
+  const len = content.length
+
+  while (i < len) {
+    const ch = content[i]!
+
+    // String literal — copy as-is, including any comment-like chars inside
+    if (ch === '"') {
+      let j = i + 1
+
+      while (j < len) {
+        if (content[j] === '\\') {
+          j += 2 // skip escaped char
+        } else if (content[j] === '"') {
+          j++
+
+          break
+        } else {
+          j++
+        }
+      }
+
+      result += content.slice(i, j)
+      i = j
+
+      continue
+    }
+
+    // Line comment
+    if (ch === '/' && content[i + 1] === '/') {
+      const eol = content.indexOf('\n', i)
+      i = eol === -1 ? len : eol
+
+      continue
+    }
+
+    // Block comment
+    if (ch === '/' && content[i + 1] === '*') {
+      const end = content.indexOf('*/', i + 2)
+      i = end === -1 ? len : end + 2
+
+      continue
+    }
+
+    result += ch
+    i++
+  }
+
+  // Remove trailing commas before ] or }
+  return result.replace(/,(\s*[}\]])/g, '$1')
+}
+
+export function isRemoteShellSession(env: NodeJS.ProcessEnv): boolean {
+  return Boolean(env['SSH_CONNECTION'] || env['SSH_TTY'] || env['SSH_CLIENT'])
+}
+
+export function getVSCodeStyleConfigDir(
+  appName: string,
+  platform: NodeJS.Platform = process.platform,
+  env: NodeJS.ProcessEnv = process.env,
+  homeDir: string = homedir()
+): null | string {
+  if (platform === 'darwin') {
+    return join(homeDir, 'Library', 'Application Support', appName, 'User')
+  }
+
+  if (platform === 'win32') {
+    return env['APPDATA'] ? join(env['APPDATA'], appName, 'User') : null
+  }
+
+  return join(homeDir, '.config', appName, 'User')
+}
+
+function isKeybinding(value: unknown): value is Keybinding {
+  return typeof value === 'object' && value !== null
+}
+
+function sameBinding(a: Keybinding, b: Keybinding): boolean {
+  return a.key === b.key && a.command === b.command && a.when === b.when && a.args?.text === b.args?.text
+}
+
+async function backupFile(filePath: string, ops: FileOps): Promise<void> {
+  const stamp = new Date().toISOString().replace(/[:.]/g, '-')
+  await ops.copyFile(filePath, `${filePath}.backup.${stamp}`)
+}
+
+export async function configureTerminalKeybindings(
+  terminal: SupportedTerminal,
+  options?: {
+    env?: NodeJS.ProcessEnv
+    fileOps?: Partial<FileOps>
+    homeDir?: string
+    platform?: NodeJS.Platform
+  }
+): Promise<TerminalSetupResult> {
+  const env = options?.env ?? process.env
+  const platform = options?.platform ?? process.platform
+  const homeDir = options?.homeDir ?? homedir()
+  const ops: FileOps = { ...DEFAULT_FILE_OPS, ...(options?.fileOps ?? {}) }
+  const meta = TERMINAL_META[terminal]
+
+  if (isRemoteShellSession(env)) {
+    return {
+      success: false,
+      message: `${meta.label} terminal setup must be run on the local machine, not inside an SSH session.`
+    }
+  }
+
+  const configDir = getVSCodeStyleConfigDir(meta.appName, platform, env, homeDir)
+
+  if (!configDir) {
+    return {
+      success: false,
+      message: `Could not determine ${meta.label} settings path on this platform.`
+    }
+  }
+
+  const keybindingsFile = join(configDir, 'keybindings.json')
+
+  try {
+    await ops.mkdir(configDir, { recursive: true })
+
+    let keybindings: unknown[] = []
+    let hasExistingFile = false
+
+    try {
+      const content = await ops.readFile(keybindingsFile, 'utf8')
+      hasExistingFile = true
+      const parsed: unknown = JSON.parse(stripJsonComments(content))
+
+      if (!Array.isArray(parsed)) {
+        return {
+          success: false,
+          message: `${meta.label} keybindings.json is not a JSON array: ${keybindingsFile}`
+        }
+      }
+
+      keybindings = parsed
+    } catch (error) {
+      const code = (error as NodeJS.ErrnoException | undefined)?.code
+
+      if (code !== 'ENOENT') {
+        return {
+          success: false,
+          message: `Failed to read ${meta.label} keybindings: ${error}`
+        }
+      }
+    }
+
+    const conflicts = TARGET_BINDINGS.filter(target =>
+      keybindings.some(
+        existing => isKeybinding(existing) && existing.key === target.key && !sameBinding(existing, target)
+      )
+    )
+
+    if (conflicts.length) {
+      return {
+        success: false,
+        message:
+          `Existing terminal keybindings would conflict in ${keybindingsFile}: ` + conflicts.map(c => c.key).join(', ')
+      }
+    }
+
+    let added = 0
+
+    for (const target of TARGET_BINDINGS.slice().reverse()) {
+      const exists = keybindings.some(existing => isKeybinding(existing) && sameBinding(existing, target))
+
+      if (!exists) {
+        keybindings.unshift(target)
+        added += 1
+      }
+    }
+
+    if (!added) {
+      return {
+        success: true,
+        message: `${meta.label} terminal keybindings already configured.`
+      }
+    }
+
+    if (hasExistingFile) {
+      await backupFile(keybindingsFile, ops)
+    }
+
+    await ops.writeFile(keybindingsFile, `${JSON.stringify(keybindings, null, 2)}\n`, 'utf8')
+
+    return {
+      success: true,
+      requiresRestart: true,
+      message: `Added ${added} ${meta.label} terminal keybinding${added === 1 ? '' : 's'} in ${keybindingsFile}`
+    }
+  } catch (error) {
+    return {
+      success: false,
+      message: `Failed to configure ${meta.label} terminal shortcuts: ${error}`
+    }
+  }
+}
+
+export async function configureDetectedTerminalKeybindings(options?: {
+  env?: NodeJS.ProcessEnv
+  fileOps?: Partial<FileOps>
+  homeDir?: string
+  platform?: NodeJS.Platform
+}): Promise<TerminalSetupResult> {
+  const detected = detectVSCodeLikeTerminal(options?.env ?? process.env)
+
+  if (!detected) {
+    return {
+      success: false,
+      message: 'No supported IDE terminal detected. Supported: VS Code, Cursor, Windsurf.'
+    }
+  }
+
+  return configureTerminalKeybindings(detected, options)
+}
+
+export async function shouldPromptForTerminalSetup(options?: {
+  env?: NodeJS.ProcessEnv
+  fileOps?: Partial<FileOps>
+  homeDir?: string
+  platform?: NodeJS.Platform
+}): Promise<boolean> {
+  const env = options?.env ?? process.env
+  const detected = detectVSCodeLikeTerminal(env)
+
+  if (!detected || isRemoteShellSession(env)) {
+    return false
+  }
+
+  const platform = options?.platform ?? process.platform
+  const homeDir = options?.homeDir ?? homedir()
+  const ops: FileOps = { ...DEFAULT_FILE_OPS, ...(options?.fileOps ?? {}) }
+  const meta = TERMINAL_META[detected]
+  const configDir = getVSCodeStyleConfigDir(meta.appName, platform, env, homeDir)
+
+  if (!configDir) {
+    return false
+  }
+
+  try {
+    const content = await ops.readFile(join(configDir, 'keybindings.json'), 'utf8')
+    const parsed: unknown = JSON.parse(stripJsonComments(content))
+
+    if (!Array.isArray(parsed)) {
+      return true
+    }
+
+    return TARGET_BINDINGS.some(
+      target => !parsed.some(existing => isKeybinding(existing) && sameBinding(existing, target))
+    )
+  } catch {
+    return true
+  }
+}
diff --git a/ui-tui/src/lib/text.ts b/ui-tui/src/lib/text.ts
index fb10d7d2d4..8541ac3f68 100644
--- a/ui-tui/src/lib/text.ts
+++ b/ui-tui/src/lib/text.ts
@@ -25,9 +25,9 @@ const renderEstimateLine = (line: string) => {
     .replace(/\[(.+?)\]\((https?:\/\/[^\s)]+)\)/g, '$1')
     .replace(/`([^`]+)`/g, '$1')
     .replace(/\*\*(.+?)\*\*/g, '$1')
-    .replace(/__(.+?)__/g, '$1')
+    .replace(/(?<!\w)__(.+?)__(?!\w)/g, '$1')
     .replace(/\*(.+?)\*/g, '$1')
-    .replace(/_(.+?)_/g, '$1')
+    .replace(/(?<!\w)_(.+?)_(?!\w)/g, '$1')
     .replace(/~~(.+?)~~/g, '$1')
     .replace(/==(.+?)==/g, '$1')
     .replace(/\[\^([^\]]+)\]/g, '[$1]')
diff --git a/ui-tui/src/theme.ts b/ui-tui/src/theme.ts
index 386e436f52..daeedb3377 100644
--- a/ui-tui/src/theme.ts
+++ b/ui-tui/src/theme.ts
@@ -94,7 +94,12 @@ export const DARK_THEME: Theme = {
     amber: '#FFBF00',
     bronze: '#CD7F32',
     cornsilk: '#FFF8DC',
-    dim: '#B8860B',
+    // Bumped from the old `#B8860B` darkgoldenrod (~53% luminance) which
+    // read as barely-visible on dark terminals for long body text.  The
+    // new value sits ~60% luminance — readable without losing the "muted /
+    // secondary" semantic.  Field labels still use `label` (65%) which
+    // stays brighter so hierarchy holds.
+    dim: '#CC9B1F',
     completionBg: '#FFFFFF',
     completionCurrentBg: mix('#FFFFFF', '#FFBF00', 0.25),
 
@@ -104,8 +109,11 @@ export const DARK_THEME: Theme = {
     warn: '#ffa726',
 
     prompt: '#FFF8DC',
-    sessionLabel: '#B8860B',
-    sessionBorder: '#B8860B',
+    // sessionLabel/sessionBorder intentionally track the `dim` value — they
+    // are "same role, same colour" by design.  fromSkin's banner_dim fallback
+    // relies on this pairing (#11300).
+    sessionLabel: '#CC9B1F',
+    sessionBorder: '#CC9B1F',
 
     statusBg: '#1a1a2e',
     statusFg: '#C0C0C0',
@@ -171,9 +179,26 @@ export const LIGHT_THEME: Theme = {
   bannerHero: ''
 }
 
-const LIGHT_MODE = /^(?:1|true|yes|on)$/i.test((process.env.HERMES_TUI_LIGHT ?? '').trim())
+// Pick light vs dark. Explicit `HERMES_TUI_LIGHT` wins; otherwise sniff
+// `COLORFGBG` (set by XFCE Terminal, rxvt, Terminal.app, etc.) — last field is the
+// background ANSI index; 7/15 are the "white" slots most light themes emit (#11300).
+export function detectLightMode(env: NodeJS.ProcessEnv = process.env): boolean {
+  const explicit = (env.HERMES_TUI_LIGHT ?? '').trim().toLowerCase()
 
-export const DEFAULT_THEME: Theme = LIGHT_MODE ? LIGHT_THEME : DARK_THEME
+  if (/^(?:1|true|yes|on)$/.test(explicit)) {
+    return true
+  }
+
+  if (/^(?:0|false|no|off)$/.test(explicit)) {
+    return false
+  }
+
+  const bg = Number((env.COLORFGBG ?? '').trim().split(';').at(-1))
+
+  return bg === 7 || bg === 15
+}
+
+export const DEFAULT_THEME: Theme = detectLightMode() ? LIGHT_THEME : DARK_THEME
 
 // ── Skin → Theme ─────────────────────────────────────────────────────
 
diff --git a/ui-tui/src/types.ts b/ui-tui/src/types.ts
index 3045a74a85..63d6c6d4fe 100644
--- a/ui-tui/src/types.ts
+++ b/ui-tui/src/types.ts
@@ -12,16 +12,72 @@ export interface ActivityItem {
 }
 
 export interface SubagentProgress {
+  apiCalls?: number
+  costUsd?: number
+  depth: number
   durationSeconds?: number
+  filesRead?: string[]
+  filesWritten?: string[]
   goal: string
   id: string
   index: number
+  inputTokens?: number
+  iteration?: number
+  model?: string
   notes: string[]
-  status: 'completed' | 'failed' | 'interrupted' | 'running'
+  outputTail?: SubagentOutputEntry[]
+  outputTokens?: number
+  parentId: null | string
+  reasoningTokens?: number
+  startedAt?: number
+  status: 'completed' | 'failed' | 'interrupted' | 'queued' | 'running'
   summary?: string
   taskCount: number
   thinking: string[]
+  toolCount: number
   tools: string[]
+  toolsets?: string[]
+}
+
+export interface SubagentOutputEntry {
+  isError: boolean
+  preview: string
+  tool: string
+}
+
+export interface SubagentNode {
+  aggregate: SubagentAggregate
+  children: SubagentNode[]
+  item: SubagentProgress
+}
+
+export interface SubagentAggregate {
+  activeCount: number
+  costUsd: number
+  descendantCount: number
+  filesTouched: number
+  hotness: number
+  inputTokens: number
+  maxDepthFromHere: number
+  outputTokens: number
+  totalDuration: number
+  totalTools: number
+}
+
+export interface DelegationStatus {
+  active: {
+    depth?: number
+    goal?: string
+    model?: null | string
+    parent_id?: null | string
+    started_at?: number
+    status?: string
+    subagent_id?: string
+    tool_count?: number
+  }[]
+  max_concurrent_children?: number
+  max_spawn_depth?: number
+  paused: boolean
 }
 
 export interface ApprovalReq {
diff --git a/ui-tui/src/types/hermes-ink.d.ts b/ui-tui/src/types/hermes-ink.d.ts
index 9f8987ad34..507be85a34 100644
--- a/ui-tui/src/types/hermes-ink.d.ts
+++ b/ui-tui/src/types/hermes-ink.d.ts
@@ -4,6 +4,7 @@ declare module '@hermes/ink' {
   export type Key = {
     readonly ctrl: boolean
     readonly meta: boolean
+    readonly super: boolean
     readonly shift: boolean
     readonly alt: boolean
     readonly upArrow: boolean
diff --git a/utils.py b/utils.py
index cf2582853f..f3d38006d1 100644
--- a/utils.py
+++ b/utils.py
@@ -7,6 +7,7 @@ import stat
 import tempfile
 from pathlib import Path
 from typing import Any, Union
+from urllib.parse import urlparse
 
 import yaml
 
@@ -194,3 +195,77 @@ def env_int(key: str, default: int = 0) -> int:
 def env_bool(key: str, default: bool = False) -> bool:
     """Read an environment variable as a boolean."""
     return is_truthy_value(os.getenv(key, ""), default=default)
+
+
+# ─── Proxy Helpers ────────────────────────────────────────────────────────────
+
+
+_PROXY_ENV_KEYS = (
+    "HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
+    "https_proxy", "http_proxy", "all_proxy",
+)
+
+
+def normalize_proxy_url(proxy_url: str | None) -> str | None:
+    """Normalize proxy URLs for httpx/aiohttp compatibility.
+
+    WSL/Clash-style environments often export SOCKS proxies as
+    ``socks://127.0.0.1:PORT``. httpx rejects that alias and expects the
+    explicit ``socks5://`` scheme instead.
+    """
+    candidate = str(proxy_url or "").strip()
+    if not candidate:
+        return None
+    if candidate.lower().startswith("socks://"):
+        return f"socks5://{candidate[len('socks://'):]}"
+    return candidate
+
+
+def normalize_proxy_env_vars() -> None:
+    """Rewrite supported proxy env vars to canonical URL forms in-place."""
+    for key in _PROXY_ENV_KEYS:
+        value = os.getenv(key, "")
+        normalized = normalize_proxy_url(value)
+        if normalized and normalized != value:
+            os.environ[key] = normalized
+
+
+# ─── URL Parsing Helpers ──────────────────────────────────────────────────────
+
+
+def base_url_hostname(base_url: str) -> str:
+    """Return the lowercased hostname for a base URL, or ``""`` if absent.
+
+    Use exact-hostname comparisons against known provider hosts
+    (``api.openai.com``, ``api.x.ai``, ``api.anthropic.com``) instead of
+    substring matches on the raw URL. Substring checks treat attacker- or
+    proxy-controlled paths/hosts like ``https://api.openai.com.example/v1``
+    or ``https://proxy.test/api.openai.com/v1`` as native endpoints, which
+    leads to wrong api_mode / auth routing.
+    """
+    raw = (base_url or "").strip()
+    if not raw:
+        return ""
+    parsed = urlparse(raw if "://" in raw else f"//{raw}")
+    return (parsed.hostname or "").lower().rstrip(".")
+
+
+def base_url_host_matches(base_url: str, domain: str) -> bool:
+    """Return True when the base URL's hostname is ``domain`` or a subdomain.
+
+    Safer counterpart to ``domain in base_url``, which is the substring
+    false-positive class documented on ``base_url_hostname``. Accepts bare
+    hosts, full URLs, and URLs with paths.
+
+        base_url_host_matches("https://api.moonshot.ai/v1", "moonshot.ai") == True
+        base_url_host_matches("https://moonshot.ai", "moonshot.ai")        == True
+        base_url_host_matches("https://evil.com/moonshot.ai/v1", "moonshot.ai") == False
+        base_url_host_matches("https://moonshot.ai.evil/v1", "moonshot.ai")     == False
+    """
+    hostname = base_url_hostname(base_url)
+    if not hostname:
+        return False
+    domain = (domain or "").strip().lower().rstrip(".")
+    if not domain:
+        return False
+    return hostname == domain or hostname.endswith("." + domain)
diff --git a/uv.lock b/uv.lock
index 133bd3f782..33b5c6628a 100644
--- a/uv.lock
+++ b/uv.lock
@@ -426,7 +426,7 @@ wheels = [
 [[package]]
 name = "atroposlib"
 version = "0.4.0"
-source = { git = "https://github.com/NousResearch/atropos.git#c421582b6f7ce8a32f751aab3117d3824ac8f709" }
+source = { git = "https://github.com/NousResearch/atropos.git?rev=c20c85256e5a45ad31edf8b7276e9c5ee1995a30#c20c85256e5a45ad31edf8b7276e9c5ee1995a30" }
 dependencies = [
     { name = "aiofiles" },
     { name = "aiohttp" },
@@ -558,6 +558,34 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/10/cb/f2ad4230dc2eb1a74edf38f1a38b9b52277f75bef262d8908e60d957e13c/blinker-1.9.0-py3-none-any.whl", hash = "sha256:ba0efaa9080b619ff2f3459d1d500c57bddea4a6b424b60a91141db6fd2f08bc", size = 8458, upload-time = "2024-11-08T17:25:46.184Z" },
 ]
 
+[[package]]
+name = "boto3"
+version = "1.42.92"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "botocore" },
+    { name = "jmespath" },
+    { name = "s3transfer" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/e7/3b/84cafa37e85a57618554bd2bc21bd569417097f45f18c23ef488e6c69683/boto3-1.42.92.tar.gz", hash = "sha256:55ec6ef6fc81f46d567a7d1d398d1e5c375d468905d0ccd9e1f767f0c77dbe9b", size = 113207, upload-time = "2026-04-20T19:38:17.293Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8f/8f/350ffd50aaa515429464deb1dc85893a21a64cb41892feb6b22ce87304ad/boto3-1.42.92-py3-none-any.whl", hash = "sha256:c90d9a170faa0585755fa103a3cd9595e1f53443864e902c180f3d8177589125", size = 140555, upload-time = "2026-04-20T19:38:14.323Z" },
+]
+
+[[package]]
+name = "botocore"
+version = "1.42.92"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "jmespath" },
+    { name = "python-dateutil" },
+    { name = "urllib3" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d5/0a/6785ce224ba4483b3e1282d959e1dd2c2898823336f013464c43cb154036/botocore-1.42.92.tar.gz", hash = "sha256:f1193d3057a2d0267353d7ef4e136be37ea432336d097fcb1951fae566ca3a22", size = 15235239, upload-time = "2026-04-20T19:38:05.085Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/32/b8/41d4d7ba75a4fb4f11362e96371a12695bc6ba0bb7cc680137db0213f97e/botocore-1.42.92-py3-none-any.whl", hash = "sha256:09ddefddbb1565ceef4b44b4b6e61b1ca5f12701d1494ecc85c1133d1b1e81fb", size = 14916275, upload-time = "2026-04-20T19:38:01.684Z" },
+]
+
 [[package]]
 name = "cachetools"
 version = "5.5.2"
@@ -1838,7 +1866,7 @@ wheels = [
 
 [[package]]
 name = "hermes-agent"
-version = "0.9.0"
+version = "0.10.0"
 source = { editable = "." }
 dependencies = [
     { name = "anthropic" },
@@ -1871,6 +1899,7 @@ all = [
     { name = "aiosqlite", marker = "sys_platform == 'linux'" },
     { name = "alibabacloud-dingtalk" },
     { name = "asyncpg", marker = "sys_platform == 'linux'" },
+    { name = "boto3" },
     { name = "croniter" },
     { name = "daytona" },
     { name = "debugpy" },
@@ -1893,12 +1922,16 @@ all = [
     { name = "pytest-xdist" },
     { name = "python-telegram-bot", extra = ["webhooks"] },
     { name = "pywinpty", marker = "sys_platform == 'win32'" },
+    { name = "qrcode" },
     { name = "simple-term-menu" },
     { name = "slack-bolt" },
     { name = "slack-sdk" },
     { name = "sounddevice" },
     { name = "uvicorn", extra = ["standard"] },
 ]
+bedrock = [
+    { name = "boto3" },
+]
 cli = [
     { name = "simple-term-menu" },
 ]
@@ -1918,9 +1951,11 @@ dev = [
 dingtalk = [
     { name = "alibabacloud-dingtalk" },
     { name = "dingtalk-stream" },
+    { name = "qrcode" },
 ]
 feishu = [
     { name = "lark-oapi" },
+    { name = "qrcode" },
 ]
 homeassistant = [
     { name = "aiohttp" },
@@ -1941,6 +1976,7 @@ messaging = [
     { name = "aiohttp" },
     { name = "discord-py", extra = ["voice"] },
     { name = "python-telegram-bot", extra = ["webhooks"] },
+    { name = "qrcode" },
     { name = "slack-bolt" },
     { name = "slack-sdk" },
 ]
@@ -1974,6 +2010,7 @@ termux = [
     { name = "honcho-ai" },
     { name = "mcp" },
     { name = "ptyprocess", marker = "sys_platform != 'win32'" },
+    { name = "python-telegram-bot", extra = ["webhooks"] },
     { name = "pywinpty", marker = "sys_platform == 'win32'" },
     { name = "simple-term-menu" },
 ]
@@ -2003,7 +2040,8 @@ requires-dist = [
     { name = "alibabacloud-dingtalk", marker = "extra == 'dingtalk'", specifier = ">=2.0.0" },
     { name = "anthropic", specifier = ">=0.39.0,<1" },
     { name = "asyncpg", marker = "extra == 'matrix'", specifier = ">=0.29" },
-    { name = "atroposlib", marker = "extra == 'rl'", git = "https://github.com/NousResearch/atropos.git" },
+    { name = "atroposlib", marker = "extra == 'rl'", git = "https://github.com/NousResearch/atropos.git?rev=c20c85256e5a45ad31edf8b7276e9c5ee1995a30" },
+    { name = "boto3", marker = "extra == 'bedrock'", specifier = ">=1.35.0,<2" },
     { name = "croniter", marker = "extra == 'cron'", specifier = ">=6.0.0,<7" },
     { name = "daytona", marker = "extra == 'daytona'", specifier = ">=0.148.0,<1" },
     { name = "debugpy", marker = "extra == 'dev'", specifier = ">=1.8.0,<2" },
@@ -2020,6 +2058,7 @@ requires-dist = [
     { name = "firecrawl-py", specifier = ">=4.16.0,<5" },
     { name = "hermes-agent", extras = ["acp"], marker = "extra == 'all'" },
     { name = "hermes-agent", extras = ["acp"], marker = "extra == 'termux'" },
+    { name = "hermes-agent", extras = ["bedrock"], marker = "extra == 'all'" },
     { name = "hermes-agent", extras = ["cli"], marker = "extra == 'all'" },
     { name = "hermes-agent", extras = ["cli"], marker = "extra == 'termux'" },
     { name = "hermes-agent", extras = ["cron"], marker = "extra == 'all'" },
@@ -2066,8 +2105,12 @@ requires-dist = [
     { name = "pytest-xdist", marker = "extra == 'dev'", specifier = ">=3.0,<4" },
     { name = "python-dotenv", specifier = ">=1.2.1,<2" },
     { name = "python-telegram-bot", extras = ["webhooks"], marker = "extra == 'messaging'", specifier = ">=22.6,<23" },
+    { name = "python-telegram-bot", extras = ["webhooks"], marker = "extra == 'termux'", specifier = ">=22.6,<23" },
     { name = "pywinpty", marker = "sys_platform == 'win32' and extra == 'pty'", specifier = ">=2.0.0,<3" },
     { name = "pyyaml", specifier = ">=6.0.2,<7" },
+    { name = "qrcode", marker = "extra == 'dingtalk'", specifier = ">=7.0,<8" },
+    { name = "qrcode", marker = "extra == 'feishu'", specifier = ">=7.0,<8" },
+    { name = "qrcode", marker = "extra == 'messaging'", specifier = ">=7.0,<8" },
     { name = "requests", specifier = ">=2.33.0,<3" },
     { name = "rich", specifier = ">=14.3.3,<15" },
     { name = "simple-term-menu", marker = "extra == 'cli'", specifier = ">=1.0,<2" },
@@ -2077,13 +2120,13 @@ requires-dist = [
     { name = "slack-sdk", marker = "extra == 'slack'", specifier = ">=3.27.0,<4" },
     { name = "sounddevice", marker = "extra == 'voice'", specifier = ">=0.4.6,<1" },
     { name = "tenacity", specifier = ">=9.1.4,<10" },
-    { name = "tinker", marker = "extra == 'rl'", git = "https://github.com/thinking-machines-lab/tinker.git" },
+    { name = "tinker", marker = "extra == 'rl'", git = "https://github.com/thinking-machines-lab/tinker.git?rev=30517b667f18a3dfb7ef33fb56cf686d5820ba2b" },
     { name = "uvicorn", extras = ["standard"], marker = "extra == 'rl'", specifier = ">=0.24.0,<1" },
     { name = "uvicorn", extras = ["standard"], marker = "extra == 'web'", specifier = ">=0.24.0,<1" },
     { name = "wandb", marker = "extra == 'rl'", specifier = ">=0.15.0,<1" },
-    { name = "yc-bench", marker = "python_full_version >= '3.12' and extra == 'yc-bench'", git = "https://github.com/collinear-ai/yc-bench.git" },
+    { name = "yc-bench", marker = "python_full_version >= '3.12' and extra == 'yc-bench'", git = "https://github.com/collinear-ai/yc-bench.git?rev=bfb0c88062450f46341bd9a5298903fc2e952a5c" },
 ]
-provides-extras = ["modal", "daytona", "dev", "messaging", "cron", "slack", "matrix", "cli", "tts-premium", "voice", "pty", "honcho", "mcp", "homeassistant", "sms", "acp", "mistral", "termux", "dingtalk", "feishu", "web", "rl", "yc-bench", "all"]
+provides-extras = ["modal", "daytona", "dev", "messaging", "cron", "slack", "matrix", "cli", "tts-premium", "voice", "pty", "honcho", "mcp", "homeassistant", "sms", "acp", "mistral", "bedrock", "termux", "dingtalk", "feishu", "web", "rl", "yc-bench", "all"]
 
 [[package]]
 name = "hf-transfer"
@@ -2410,6 +2453,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/67/8a/a342b2f0251f3dac4ca17618265d93bf244a2a4d089126e81e4c1056ac50/jiter-0.13.0-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7bb00b6d26db67a05fe3e12c76edc75f32077fb51deed13822dc648fa373bc19", size = 343768, upload-time = "2026-02-02T12:37:55.055Z" },
 ]
 
+[[package]]
+name = "jmespath"
+version = "1.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d3/59/322338183ecda247fb5d1763a6cbe46eff7222eaeebafd9fa65d4bf5cb11/jmespath-1.1.0.tar.gz", hash = "sha256:472c87d80f36026ae83c6ddd0f1d05d4e510134ed462851fd5f754c8c3cbb88d", size = 27377, upload-time = "2026-01-22T16:35:26.279Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/14/2f/967ba146e6d58cf6a652da73885f52fc68001525b4197effc174321d70b4/jmespath-1.1.0-py3-none-any.whl", hash = "sha256:a5663118de4908c91729bea0acadca56526eb2698e83de10cd116ae0f4e97c64", size = 20419, upload-time = "2026-01-22T16:35:24.919Z" },
+]
+
 [[package]]
 name = "joblib"
 version = "1.5.3"
@@ -4109,6 +4161,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/10/bd/c038d7cc38edc1aa5bf91ab8068b63d4308c66c4c8bb3cbba7dfbc049f9c/pyparsing-3.3.2-py3-none-any.whl", hash = "sha256:850ba148bd908d7e2411587e247a1e4f0327839c40e2e5e6d05a007ecc69911d", size = 122781, upload-time = "2026-01-21T03:57:55.912Z" },
 ]
 
+[[package]]
+name = "pypng"
+version = "0.20220715.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/93/cd/112f092ec27cca83e0516de0a3368dbd9128c187fb6b52aaaa7cde39c96d/pypng-0.20220715.0.tar.gz", hash = "sha256:739c433ba96f078315de54c0db975aee537cbc3e1d0ae4ed9aab0ca1e427e2c1", size = 128992, upload-time = "2022-07-15T14:11:05.301Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3e/b9/3766cc361d93edb2ce81e2e1f87dd98f314d7d513877a342d31b30741680/pypng-0.20220715.0-py3-none-any.whl", hash = "sha256:4a43e969b8f5aaafb2a415536c1a8ec7e341cd6a3f957fd5b5f32a4cfeed902c", size = 58057, upload-time = "2022-07-15T14:11:03.713Z" },
+]
+
 [[package]]
 name = "pytest"
 version = "9.0.2"
@@ -4311,6 +4372,20 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" },
 ]
 
+[[package]]
+name = "qrcode"
+version = "7.4.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "colorama", marker = "sys_platform == 'win32'" },
+    { name = "pypng" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/30/35/ad6d4c5a547fe9a5baf85a9edbafff93fc6394b014fab30595877305fa59/qrcode-7.4.2.tar.gz", hash = "sha256:9dd969454827e127dbd93696b20747239e6d540e082937c90f14ac95b30f5845", size = 535974, upload-time = "2023-02-05T22:11:46.548Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/24/79/aaf0c1c7214f2632badb2771d770b1500d3d7cbdf2590ae62e721ec50584/qrcode-7.4.2-py3-none-any.whl", hash = "sha256:581dca7a029bcb2deef5d01068e39093e80ef00b4a61098a2182eac59d01643a", size = 46197, upload-time = "2023-02-05T22:11:43.4Z" },
+]
+
 [[package]]
 name = "referencing"
 version = "0.37.0"
@@ -4577,6 +4652,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d1/b7/b95708304cd49b7b6f82fdd039f1748b66ec2b21d6a45180910802f1abf1/rpds_py-0.30.0-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:ac37f9f516c51e5753f27dfdef11a88330f04de2d564be3991384b2f3535d02e", size = 562191, upload-time = "2025-11-30T20:24:36.853Z" },
 ]
 
+[[package]]
+name = "s3transfer"
+version = "0.16.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "botocore" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/05/04/74127fc843314818edfa81b5540e26dd537353b123a4edc563109d8f17dd/s3transfer-0.16.0.tar.gz", hash = "sha256:8e990f13268025792229cd52fa10cb7163744bf56e719e0b9cb925ab79abf920", size = 153827, upload-time = "2025-12-01T02:30:59.114Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fc/51/727abb13f44c1fcf6d145979e1535a35794db0f6e450a0cb46aa24732fe2/s3transfer-0.16.0-py3-none-any.whl", hash = "sha256:18e25d66fed509e3868dc1572b3f427ff947dd2c56f844a5bf09481ad3f3b2fe", size = 86830, upload-time = "2025-12-01T02:30:57.729Z" },
+]
+
 [[package]]
 name = "safetensors"
 version = "0.7.0"
@@ -4927,8 +5014,8 @@ wheels = [
 
 [[package]]
 name = "tinker"
-version = "0.16.1"
-source = { git = "https://github.com/thinking-machines-lab/tinker.git#07bd3c2dd3cd4398ac1c26f0ec0deccbf3c1f913" }
+version = "0.18.0"
+source = { git = "https://github.com/thinking-machines-lab/tinker.git?rev=30517b667f18a3dfb7ef33fb56cf686d5820ba2b#30517b667f18a3dfb7ef33fb56cf686d5820ba2b" }
 dependencies = [
     { name = "anyio" },
     { name = "click" },
@@ -5653,7 +5740,7 @@ wheels = [
 [[package]]
 name = "yc-bench"
 version = "0.1.0"
-source = { git = "https://github.com/collinear-ai/yc-bench.git#0c53c98f01a431db2e391482bc46013045854ab2" }
+source = { git = "https://github.com/collinear-ai/yc-bench.git?rev=bfb0c88062450f46341bd9a5298903fc2e952a5c#bfb0c88062450f46341bd9a5298903fc2e952a5c" }
 dependencies = [
     { name = "litellm", marker = "python_full_version >= '3.12'" },
     { name = "matplotlib", marker = "python_full_version >= '3.12'" },
diff --git a/web/index.html b/web/index.html
index c9f0d18e1a..e420ce6dba 100644
--- a/web/index.html
+++ b/web/index.html
@@ -4,7 +4,7 @@
     <meta charset="UTF-8" />
     <link rel="icon" type="image/svg+xml" href="/favicon.ico" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>Hermes Agent</title>
+    <title>Hermes Agent - Dashboard</title>
   </head>
   <body>
     <div id="root"></div>
diff --git a/web/package-lock.json b/web/package-lock.json
index 71ca2c7a7e..c522d8ba0e 100644
--- a/web/package-lock.json
+++ b/web/package-lock.json
@@ -8,9 +8,14 @@
       "name": "web",
       "version": "0.0.0",
       "dependencies": {
+        "@nous-research/ui": "^0.3.0",
+        "@observablehq/plot": "^0.6.17",
+        "@react-three/fiber": "^9.6.0",
         "@tailwindcss/vite": "^4.2.1",
         "class-variance-authority": "^0.7.1",
         "clsx": "^2.1.1",
+        "gsap": "^3.15.0",
+        "leva": "^0.10.1",
         "lucide-react": "^0.577.0",
         "react": "^19.2.4",
         "react-dom": "^19.2.4",
@@ -28,6 +33,7 @@
         "eslint-plugin-react-hooks": "^7.0.1",
         "eslint-plugin-react-refresh": "^0.5.2",
         "globals": "^17.4.0",
+        "three": "^0.180.0",
         "typescript": "~5.9.3",
         "typescript-eslint": "^8.56.1",
         "vite": "^7.3.1"
@@ -267,6 +273,15 @@
         "@babel/core": "^7.0.0-0"
       }
     },
+    "node_modules/@babel/runtime": {
+      "version": "7.29.2",
+      "resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.29.2.tgz",
+      "integrity": "sha512-JiDShH45zKHWyGe4ZNVRrCjBz8Nh9TMmZG1kh4QTK8hCBTWBi8Da+i7s1fJw7/lYpM4ccepSNfqzZ/QvABBi5g==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
     "node_modules/@babel/template": {
       "version": "7.28.6",
       "resolved": "https://registry.npmjs.org/@babel/template/-/template-7.28.6.tgz",
@@ -888,6 +903,44 @@
         "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
       }
     },
+    "node_modules/@floating-ui/core": {
+      "version": "1.7.5",
+      "resolved": "https://registry.npmjs.org/@floating-ui/core/-/core-1.7.5.tgz",
+      "integrity": "sha512-1Ih4WTWyw0+lKyFMcBHGbb5U5FtuHJuujoyyr5zTaWS5EYMeT6Jb2AuDeftsCsEuchO+mM2ij5+q9crhydzLhQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@floating-ui/utils": "^0.2.11"
+      }
+    },
+    "node_modules/@floating-ui/dom": {
+      "version": "1.7.6",
+      "resolved": "https://registry.npmjs.org/@floating-ui/dom/-/dom-1.7.6.tgz",
+      "integrity": "sha512-9gZSAI5XM36880PPMm//9dfiEngYoC6Am2izES1FF406YFsjvyBMmeJ2g4SAju3xWwtuynNRFL2s9hgxpLI5SQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@floating-ui/core": "^1.7.5",
+        "@floating-ui/utils": "^0.2.11"
+      }
+    },
+    "node_modules/@floating-ui/react-dom": {
+      "version": "2.1.8",
+      "resolved": "https://registry.npmjs.org/@floating-ui/react-dom/-/react-dom-2.1.8.tgz",
+      "integrity": "sha512-cC52bHwM/n/CxS87FH0yWdngEZrjdtLW/qVruo68qg+prK7ZQ4YGdut2GyDVpoGeAYe/h899rVeOVm6Oi40k2A==",
+      "license": "MIT",
+      "dependencies": {
+        "@floating-ui/dom": "^1.7.6"
+      },
+      "peerDependencies": {
+        "react": ">=16.8.0",
+        "react-dom": ">=16.8.0"
+      }
+    },
+    "node_modules/@floating-ui/utils": {
+      "version": "0.2.11",
+      "resolved": "https://registry.npmjs.org/@floating-ui/utils/-/utils-0.2.11.tgz",
+      "integrity": "sha512-RiB/yIh78pcIxl6lLMG0CgBXAZ2Y0eVHqMPYugu+9U0AeT6YBeiJpf7lbdJNIugFP5SIjwNRgo4DhR1Qxi26Gg==",
+      "license": "MIT"
+    },
     "node_modules/@humanfs/core": {
       "version": "0.19.1",
       "resolved": "https://registry.npmjs.org/@humanfs/core/-/core-0.19.1.tgz",
@@ -985,6 +1038,795 @@
         "@jridgewell/sourcemap-codec": "^1.4.14"
       }
     },
+    "node_modules/@nanostores/react": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/@nanostores/react/-/react-1.1.0.tgz",
+      "integrity": "sha512-MbH35fjhcf7LAubYX5vhOChYUfTLzNLqH/mBGLVsHkcvjy0F8crO1WQwdmQ2xKbAmtpalDa2zBt3Hlg5kqr8iw==",
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/ai"
+        }
+      ],
+      "license": "MIT",
+      "engines": {
+        "node": "^20.0.0 || >=22.0.0"
+      },
+      "peerDependencies": {
+        "nanostores": "^1.2.0",
+        "react": ">=18.0.0"
+      }
+    },
+    "node_modules/@nous-research/ui": {
+      "version": "0.3.0",
+      "resolved": "https://registry.npmjs.org/@nous-research/ui/-/ui-0.3.0.tgz",
+      "integrity": "sha512-konGgtV9lkzqYkWuoUGnROqavq1svTnGbERLKItvEXmsRz4xRtbAMHI8rK6sjGpHDpwvOUN3olcOhRLTGuVfcA==",
+      "license": "MIT",
+      "dependencies": {
+        "@nanostores/react": "^1.0.0",
+        "class-variance-authority": "^0.7.1",
+        "clsx": "^2.1.1",
+        "nanostores": "^1.0.1",
+        "sanitize-html": "^2.16.0",
+        "tailwind-merge": "^3.3.1",
+        "tw-animate-css": "^1.4.0"
+      },
+      "peerDependencies": {
+        "@observablehq/plot": "^0.6.17",
+        "@react-three/fiber": "^9.4.0",
+        "gsap": "^3.13.0",
+        "leva": "^0.10.1",
+        "react": "^19.0.0",
+        "react-dom": "^19.0.0",
+        "three": "^0.180.0"
+      },
+      "peerDependenciesMeta": {
+        "@observablehq/plot": {
+          "optional": true
+        },
+        "@react-three/fiber": {
+          "optional": true
+        },
+        "gsap": {
+          "optional": true
+        },
+        "leva": {
+          "optional": true
+        },
+        "three": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@observablehq/plot": {
+      "version": "0.6.17",
+      "resolved": "https://registry.npmjs.org/@observablehq/plot/-/plot-0.6.17.tgz",
+      "integrity": "sha512-/qaXP/7mc4MUS0s4cPPFASDRjtsWp85/TbfsciqDgU1HwYixbSbbytNuInD8AcTYC3xaxACgVX06agdfQy9W+g==",
+      "license": "ISC",
+      "dependencies": {
+        "d3": "^7.9.0",
+        "interval-tree-1d": "^1.0.0",
+        "isoformat": "^0.2.0"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/@radix-ui/primitive": {
+      "version": "1.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.3.tgz",
+      "integrity": "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg==",
+      "license": "MIT"
+    },
+    "node_modules/@radix-ui/react-arrow": {
+      "version": "1.1.7",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-arrow/-/react-arrow-1.1.7.tgz",
+      "integrity": "sha512-F+M1tLhO+mlQaOWspE8Wstg+z6PwxwRd8oQ8IXceWz92kfAmalTRf0EjrouQeo7QssEPfCn05B4Ihs1K9WQ/7w==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-primitive": "2.1.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-arrow/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-arrow/node_modules/@radix-ui/react-slot": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-compose-refs": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-compose-refs/-/react-compose-refs-1.1.2.tgz",
+      "integrity": "sha512-z4eqJvfiNnFMHIIvXP3CY57y2WJs5g2v3X0zm9mEJkrkNv4rDxu+sg9Jh8EkXyeqBkB7SOcboo9dMVqhyrACIg==",
+      "license": "MIT",
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-context": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-context/-/react-context-1.1.2.tgz",
+      "integrity": "sha512-jCi/QKUM2r1Ju5a3J64TH2A5SpKAgh0LpknyqdQ4m6DCV0xJ2HG1xARRwNGPQfi1SLdLWZ1OJz6F4OMBBNiGJA==",
+      "license": "MIT",
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-dismissable-layer": {
+      "version": "1.1.11",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.1.11.tgz",
+      "integrity": "sha512-Nqcp+t5cTB8BinFkZgXiMJniQH0PsUt2k51FUhbdfeKvc4ACcG2uQniY/8+h1Yv6Kza4Q7lD7PQV0z0oicE0Mg==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/primitive": "1.1.3",
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-use-callback-ref": "1.1.1",
+        "@radix-ui/react-use-escape-keydown": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-dismissable-layer/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-dismissable-layer/node_modules/@radix-ui/react-slot": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-id": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-id/-/react-id-1.1.1.tgz",
+      "integrity": "sha512-kGkGegYIdQsOb4XjsfM97rXsiHaBwco+hFI66oO4s9LU+PLAC5oJ7khdOVFxkhsmlbpUqDAvXw11CluXP+jkHg==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-use-layout-effect": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-popper": {
+      "version": "1.2.8",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-popper/-/react-popper-1.2.8.tgz",
+      "integrity": "sha512-0NJQ4LFFUuWkE7Oxf0htBKS6zLkkjBH+hM1uk7Ng705ReR8m/uelduy1DBo0PyBXPKVnBA6YBlU94MBGXrSBCw==",
+      "license": "MIT",
+      "dependencies": {
+        "@floating-ui/react-dom": "^2.0.0",
+        "@radix-ui/react-arrow": "1.1.7",
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-use-callback-ref": "1.1.1",
+        "@radix-ui/react-use-layout-effect": "1.1.1",
+        "@radix-ui/react-use-rect": "1.1.1",
+        "@radix-ui/react-use-size": "1.1.1",
+        "@radix-ui/rect": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-popper/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-popper/node_modules/@radix-ui/react-slot": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-portal": {
+      "version": "1.1.10",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.10.tgz",
+      "integrity": "sha512-4kY9IVa6+9nJPsYmngK5Uk2kUmZnv7ChhHAFeQ5oaj8jrR1bIi3xww8nH71pz1/Ve4d/cXO3YxT8eikt1B0a8w==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-primitive": "2.1.4",
+        "@radix-ui/react-use-layout-effect": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-presence": {
+      "version": "1.1.5",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-presence/-/react-presence-1.1.5.tgz",
+      "integrity": "sha512-/jfEwNDdQVBCNvjkGit4h6pMOzq8bHkopq458dPt2lMjx+eBQUohZNG9A7DtO/O5ukSbxuaNGXMjHicgwy6rQQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-use-layout-effect": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.4",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.4.tgz",
+      "integrity": "sha512-9hQc4+GNVtJAIEPEqlYqW5RiYdrr8ea5XQ0ZOnD6fgru+83kqT15mq2OCcbe8KnjRZl5vF3ks69AKz3kh1jrhg==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.4"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-slot": {
+      "version": "1.2.4",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.4.tgz",
+      "integrity": "sha512-Jl+bCv8HxKnlTLVrcDE8zTMJ09R9/ukw4qBs/oZClOfoQk/cOTbDn+NceXfV7j09YPVQUryJPHurafcSg6EVKA==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-tooltip": {
+      "version": "1.2.8",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-tooltip/-/react-tooltip-1.2.8.tgz",
+      "integrity": "sha512-tY7sVt1yL9ozIxvmbtN5qtmH2krXcBCfjEiCgKGLqunJHvgvZG2Pcl2oQ3kbcZARb1BGEHdkLzcYGO8ynVlieg==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/primitive": "1.1.3",
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-dismissable-layer": "1.1.11",
+        "@radix-ui/react-id": "1.1.1",
+        "@radix-ui/react-popper": "1.2.8",
+        "@radix-ui/react-portal": "1.1.9",
+        "@radix-ui/react-presence": "1.1.5",
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-slot": "1.2.3",
+        "@radix-ui/react-use-controllable-state": "1.2.2",
+        "@radix-ui/react-visually-hidden": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-portal": {
+      "version": "1.1.9",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.9.tgz",
+      "integrity": "sha512-bpIxvq03if6UNwXZ+HTK71JLh4APvnXntDc6XOX8UVq4XQOVl7lwok0AvIl+b8zgCw3fSaVTZMpAPPagXbKmHQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-use-layout-effect": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-slot": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-use-callback-ref": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-use-callback-ref/-/react-use-callback-ref-1.1.1.tgz",
+      "integrity": "sha512-FkBMwD+qbGQeMu1cOHnuGB6x4yzPjho8ap5WtbEJ26umhgqVXbhekKUQO+hZEL1vU92a3wHwdp0HAcqAUF5iDg==",
+      "license": "MIT",
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-use-controllable-state": {
+      "version": "1.2.2",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-use-controllable-state/-/react-use-controllable-state-1.2.2.tgz",
+      "integrity": "sha512-BjasUjixPFdS+NKkypcyyN5Pmg83Olst0+c6vGov0diwTEo6mgdqVR6hxcEgFuh4QrAs7Rc+9KuGJ9TVCj0Zzg==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-use-effect-event": "0.0.2",
+        "@radix-ui/react-use-layout-effect": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-use-effect-event": {
+      "version": "0.0.2",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-use-effect-event/-/react-use-effect-event-0.0.2.tgz",
+      "integrity": "sha512-Qp8WbZOBe+blgpuUT+lw2xheLP8q0oatc9UpmiemEICxGvFLYmHm9QowVZGHtJlGbS6A6yJ3iViad/2cVjnOiA==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-use-layout-effect": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-use-escape-keydown": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-use-escape-keydown/-/react-use-escape-keydown-1.1.1.tgz",
+      "integrity": "sha512-Il0+boE7w/XebUHyBjroE+DbByORGR9KKmITzbR7MyQ4akpORYP/ZmbhAr0DG7RmmBqoOnZdy2QlvajJ2QA59g==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-use-callback-ref": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-use-layout-effect": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-use-layout-effect/-/react-use-layout-effect-1.1.1.tgz",
+      "integrity": "sha512-RbJRS4UWQFkzHTTwVymMTUv8EqYhOp8dOOviLj2ugtTiXRaRQS7GLGxZTLL1jWhMeoSCf5zmcZkqTl9IiYfXcQ==",
+      "license": "MIT",
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-use-rect": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-use-rect/-/react-use-rect-1.1.1.tgz",
+      "integrity": "sha512-QTYuDesS0VtuHNNvMh+CjlKJ4LJickCMUAqjlE3+j8w+RlRpwyX3apEQKGFzbZGdo7XNG1tXa+bQqIE7HIXT2w==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/rect": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-use-size": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-use-size/-/react-use-size-1.1.1.tgz",
+      "integrity": "sha512-ewrXRDTAqAXlkl6t/fkXWNAhFX9I+CkKlw6zjEwk86RSPKwZr3xpBRso655aqYafwtnbpHLj6toFzmd6xdVptQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-use-layout-effect": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-visually-hidden": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-visually-hidden/-/react-visually-hidden-1.2.3.tgz",
+      "integrity": "sha512-pzJq12tEaaIhqjbzpCuv/OypJY/BPavOofm+dbab+MHLajy277+1lLm6JFcGgF5eskJ6mquGirhXY2GD/8u8Ug==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-primitive": "2.1.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-visually-hidden/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-visually-hidden/node_modules/@radix-ui/react-slot": {
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
+      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/rect": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@radix-ui/rect/-/rect-1.1.1.tgz",
+      "integrity": "sha512-HPwpGIzkl28mWyZqG52jiqDJ12waP11Pa1lGoiyUkIEuMLBP0oeK/C89esbXrxsky5we7dfd8U58nm0SgAWpVw==",
+      "license": "MIT"
+    },
+    "node_modules/@react-three/fiber": {
+      "version": "9.6.0",
+      "resolved": "https://registry.npmjs.org/@react-three/fiber/-/fiber-9.6.0.tgz",
+      "integrity": "sha512-90abYK2q5/qDM+GACs9zRvc5KhEEpEWqWlHSd64zTPNxg+9wCJvTfyD9x2so7hlQhjRYO1Fa6flR3BC/kpTFkA==",
+      "license": "MIT",
+      "dependencies": {
+        "@babel/runtime": "^7.17.8",
+        "@types/webxr": "*",
+        "base64-js": "^1.5.1",
+        "buffer": "^6.0.3",
+        "its-fine": "^2.0.0",
+        "react-use-measure": "^2.1.7",
+        "scheduler": "^0.27.0",
+        "suspend-react": "^0.1.3",
+        "use-sync-external-store": "^1.4.0",
+        "zustand": "^5.0.3"
+      },
+      "peerDependencies": {
+        "expo": ">=43.0",
+        "expo-asset": ">=8.4",
+        "expo-file-system": ">=11.0",
+        "expo-gl": ">=11.0",
+        "react": ">=19 <19.3",
+        "react-dom": ">=19 <19.3",
+        "react-native": ">=0.78",
+        "three": ">=0.156"
+      },
+      "peerDependenciesMeta": {
+        "expo": {
+          "optional": true
+        },
+        "expo-asset": {
+          "optional": true
+        },
+        "expo-file-system": {
+          "optional": true
+        },
+        "expo-gl": {
+          "optional": true
+        },
+        "react-dom": {
+          "optional": true
+        },
+        "react-native": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@react-three/fiber/node_modules/zustand": {
+      "version": "5.0.12",
+      "resolved": "https://registry.npmjs.org/zustand/-/zustand-5.0.12.tgz",
+      "integrity": "sha512-i77ae3aZq4dhMlRhJVCYgMLKuSiZAaUPAct2AksxQ+gOtimhGMdXljRT21P5BNpeT4kXlLIckvkPM029OljD7g==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=12.20.0"
+      },
+      "peerDependencies": {
+        "@types/react": ">=18.0.0",
+        "immer": ">=9.0.6",
+        "react": ">=18.0.0",
+        "use-sync-external-store": ">=1.2.0"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "immer": {
+          "optional": true
+        },
+        "react": {
+          "optional": true
+        },
+        "use-sync-external-store": {
+          "optional": true
+        }
+      }
+    },
     "node_modules/@rolldown/pluginutils": {
       "version": "1.0.0-rc.3",
       "resolved": "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-rc.3.tgz",
@@ -1317,6 +2159,15 @@
         "win32"
       ]
     },
+    "node_modules/@stitches/react": {
+      "version": "1.2.8",
+      "resolved": "https://registry.npmjs.org/@stitches/react/-/react-1.2.8.tgz",
+      "integrity": "sha512-9g9dWI4gsSVe8bNLlb+lMkBYsnIKCZTmvqvDG+Avnn69XfmHZKiaMrx7cgTaddq7aTPPmXiTsbFcUy0xgI4+wA==",
+      "license": "MIT",
+      "peerDependencies": {
+        "react": ">= 16.3.0"
+      }
+    },
     "node_modules/@tailwindcss/node": {
       "version": "4.2.1",
       "resolved": "https://registry.npmjs.org/@tailwindcss/node/-/node-4.2.1.tgz",
@@ -1646,7 +2497,6 @@
       "version": "19.2.14",
       "resolved": "https://registry.npmjs.org/@types/react/-/react-19.2.14.tgz",
       "integrity": "sha512-ilcTH/UniCkMdtexkoCN0bI7pMcJDvmQFPvuPvmEaYA/NSfFTAgdUSLAoVjaRJm7+6PvcM+q1zYOwS4wTYMF9w==",
-      "dev": true,
       "license": "MIT",
       "dependencies": {
         "csstype": "^3.2.2"
@@ -1656,12 +2506,27 @@
       "version": "19.2.3",
       "resolved": "https://registry.npmjs.org/@types/react-dom/-/react-dom-19.2.3.tgz",
       "integrity": "sha512-jp2L/eY6fn+KgVVQAOqYItbF0VY/YApe5Mz2F0aykSO8gx31bYCZyvSeYxCHKvzHG5eZjc+zyaS5BrBWya2+kQ==",
-      "dev": true,
+      "devOptional": true,
       "license": "MIT",
       "peerDependencies": {
         "@types/react": "^19.2.0"
       }
     },
+    "node_modules/@types/react-reconciler": {
+      "version": "0.28.9",
+      "resolved": "https://registry.npmjs.org/@types/react-reconciler/-/react-reconciler-0.28.9.tgz",
+      "integrity": "sha512-HHM3nxyUZ3zAylX8ZEyrDNd2XZOnQ0D5XfunJF5FLQnZbHHYq4UWvW1QfelQNXv1ICNkwYhfxjwfnqivYB6bFg==",
+      "license": "MIT",
+      "peerDependencies": {
+        "@types/react": "*"
+      }
+    },
+    "node_modules/@types/webxr": {
+      "version": "0.5.24",
+      "resolved": "https://registry.npmjs.org/@types/webxr/-/webxr-0.5.24.tgz",
+      "integrity": "sha512-h8fgEd/DpoS9CBrjEQXR+dIDraopAEfu4wYVNY2tEPwk60stPWhvZMf4Foo5FakuQ7HFZoa8WceaWFervK2Ovg==",
+      "license": "MIT"
+    },
     "node_modules/@typescript-eslint/eslint-plugin": {
       "version": "8.57.0",
       "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.57.0.tgz",
@@ -1957,6 +2822,24 @@
         "url": "https://opencollective.com/eslint"
       }
     },
+    "node_modules/@use-gesture/core": {
+      "version": "10.3.1",
+      "resolved": "https://registry.npmjs.org/@use-gesture/core/-/core-10.3.1.tgz",
+      "integrity": "sha512-WcINiDt8WjqBdUXye25anHiNxPc0VOrlT8F6LLkU6cycrOGUDyY/yyFmsg3k8i5OLvv25llc0QC45GhR/C8llw==",
+      "license": "MIT"
+    },
+    "node_modules/@use-gesture/react": {
+      "version": "10.3.1",
+      "resolved": "https://registry.npmjs.org/@use-gesture/react/-/react-10.3.1.tgz",
+      "integrity": "sha512-Yy19y6O2GJq8f7CHf7L0nxL8bf4PZCPaVOCgJrusOeFHY1LvHgYXnmnXg6N5iwAnbgbZCDjo60SiM6IPJi9C5g==",
+      "license": "MIT",
+      "dependencies": {
+        "@use-gesture/core": "10.3.1"
+      },
+      "peerDependencies": {
+        "react": ">= 16.8.0"
+      }
+    },
     "node_modules/@vitejs/plugin-react": {
       "version": "5.2.0",
       "resolved": "https://registry.npmjs.org/@vitejs/plugin-react/-/plugin-react-5.2.0.tgz",
@@ -2041,6 +2924,24 @@
       "dev": true,
       "license": "Python-2.0"
     },
+    "node_modules/assign-symbols": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/assign-symbols/-/assign-symbols-1.0.0.tgz",
+      "integrity": "sha512-Q+JC7Whu8HhmTdBph/Tq59IoRtoy6KAm5zzPv00WdujX82lbAL8K7WVjne7vdCsAmbF4AYaDOPyO3k0kl8qIrw==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/attr-accept": {
+      "version": "2.2.5",
+      "resolved": "https://registry.npmjs.org/attr-accept/-/attr-accept-2.2.5.tgz",
+      "integrity": "sha512-0bDNnY/u6pPwHDMoF0FieU354oBi0a8rD9FcsLwzcGWbc8KS8KPIi7y+s13OlVY+gMWc/9xEMUgNE6Qm8ZllYQ==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=4"
+      }
+    },
     "node_modules/balanced-match": {
       "version": "1.0.2",
       "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
@@ -2048,6 +2949,26 @@
       "dev": true,
       "license": "MIT"
     },
+    "node_modules/base64-js": {
+      "version": "1.5.1",
+      "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
+      "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==",
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/feross"
+        },
+        {
+          "type": "patreon",
+          "url": "https://www.patreon.com/feross"
+        },
+        {
+          "type": "consulting",
+          "url": "https://feross.org/support"
+        }
+      ],
+      "license": "MIT"
+    },
     "node_modules/baseline-browser-mapping": {
       "version": "2.10.7",
       "resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.10.7.tgz",
@@ -2061,6 +2982,12 @@
         "node": ">=6.0.0"
       }
     },
+    "node_modules/binary-search-bounds": {
+      "version": "2.0.5",
+      "resolved": "https://registry.npmjs.org/binary-search-bounds/-/binary-search-bounds-2.0.5.tgz",
+      "integrity": "sha512-H0ea4Fd3lS1+sTEB2TgcLoK21lLhwEJzlQv3IN47pJS976Gx4zoWe0ak3q+uYh60ppQxg9F16Ri4tS1sfD4+jA==",
+      "license": "MIT"
+    },
     "node_modules/brace-expansion": {
       "version": "1.1.12",
       "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
@@ -2106,6 +3033,30 @@
         "node": "^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7"
       }
     },
+    "node_modules/buffer": {
+      "version": "6.0.3",
+      "resolved": "https://registry.npmjs.org/buffer/-/buffer-6.0.3.tgz",
+      "integrity": "sha512-FTiCpNxtwiZZHEZbcbTIcZjERVICn9yq/pDFkTl95/AxzD1naBctN7YO68riM/gLSDY7sdrMby8hofADYuuqOA==",
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/feross"
+        },
+        {
+          "type": "patreon",
+          "url": "https://www.patreon.com/feross"
+        },
+        {
+          "type": "consulting",
+          "url": "https://feross.org/support"
+        }
+      ],
+      "license": "MIT",
+      "dependencies": {
+        "base64-js": "^1.3.1",
+        "ieee754": "^1.2.1"
+      }
+    },
     "node_modules/callsites": {
       "version": "3.1.0",
       "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz",
@@ -2195,6 +3146,21 @@
       "dev": true,
       "license": "MIT"
     },
+    "node_modules/colord": {
+      "version": "2.9.3",
+      "resolved": "https://registry.npmjs.org/colord/-/colord-2.9.3.tgz",
+      "integrity": "sha512-jeC1axXpnb0/2nn/Y1LPuLdgXBLH7aDcHu4KEKfqw3CUhX7ZpfBSlPKyqXE6btIgEzfWtrX3/tyBCaCvXvMkOw==",
+      "license": "MIT"
+    },
+    "node_modules/commander": {
+      "version": "7.2.0",
+      "resolved": "https://registry.npmjs.org/commander/-/commander-7.2.0.tgz",
+      "integrity": "sha512-QrWXB+ZQSVPmIWIhtEO9H+gwHaMGYiF5ChvoJ+K9ZGHG/sVsa6yiesAD1GC/x46sET00Xlwo1u49RVVVzvcSkw==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 10"
+      }
+    },
     "node_modules/concat-map": {
       "version": "0.0.1",
       "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
@@ -2241,9 +3207,409 @@
       "version": "3.2.3",
       "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.2.3.tgz",
       "integrity": "sha512-z1HGKcYy2xA8AGQfwrn0PAy+PB7X/GSj3UVJW9qKyn43xWa+gl5nXmU4qqLMRzWVLFC8KusUX8T/0kCiOYpAIQ==",
-      "dev": true,
       "license": "MIT"
     },
+    "node_modules/d3": {
+      "version": "7.9.0",
+      "resolved": "https://registry.npmjs.org/d3/-/d3-7.9.0.tgz",
+      "integrity": "sha512-e1U46jVP+w7Iut8Jt8ri1YsPOvFpg46k+K8TpCb0P+zjCkjkPnV7WzfDJzMHy1LnA+wj5pLT1wjO901gLXeEhA==",
+      "license": "ISC",
+      "dependencies": {
+        "d3-array": "3",
+        "d3-axis": "3",
+        "d3-brush": "3",
+        "d3-chord": "3",
+        "d3-color": "3",
+        "d3-contour": "4",
+        "d3-delaunay": "6",
+        "d3-dispatch": "3",
+        "d3-drag": "3",
+        "d3-dsv": "3",
+        "d3-ease": "3",
+        "d3-fetch": "3",
+        "d3-force": "3",
+        "d3-format": "3",
+        "d3-geo": "3",
+        "d3-hierarchy": "3",
+        "d3-interpolate": "3",
+        "d3-path": "3",
+        "d3-polygon": "3",
+        "d3-quadtree": "3",
+        "d3-random": "3",
+        "d3-scale": "4",
+        "d3-scale-chromatic": "3",
+        "d3-selection": "3",
+        "d3-shape": "3",
+        "d3-time": "3",
+        "d3-time-format": "4",
+        "d3-timer": "3",
+        "d3-transition": "3",
+        "d3-zoom": "3"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/d3-array": {
+      "version": "3.2.4",
+      "resolved": "https://registry.npmjs.org/d3-array/-/d3-array-3.2.4.tgz",
+      "integrity": "sha512-tdQAmyA18i4J7wprpYq8ClcxZy3SC31QMeByyCFyRt7BVHdREQZ5lpzoe5mFEYZUWe+oq8HBvk9JjpibyEV4Jg==",
+      "license": "ISC",
+      "dependencies": {
+        "internmap": "1 - 2"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/d3-axis": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/d3-axis/-/d3-axis-3.0.0.tgz",
+      "integrity": "sha512-IH5tgjV4jE/GhHkRV0HiVYPDtvfjHQlQfJHs0usq7M30XcSBvOotpmH1IgkcXsO/5gEQZD43B//fc7SRT5S+xw==",
+      "license": "ISC",
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/d3-brush": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/d3-brush/-/d3-brush-3.0.0.tgz",
+      "integrity": "sha512-ALnjWlVYkXsVIGlOsuWH1+3udkYFI48Ljihfnh8FZPF2QS9o+PzGLBslO0PjzVoHLZ2KCVgAM8NVkXPJB2aNnQ==",
+      "license": "ISC",
+      "dependencies": {
+        "d3-dispatch": "1 - 3",
+        "d3-drag": "2 - 3",
+        "d3-interpolate": "1 - 3",
+        "d3-selection": "3",
+        "d3-transition": "3"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/d3-chord": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/d3-chord/-/d3-chord-3.0.1.tgz",
+      "integrity": "sha512-VE5S6TNa+j8msksl7HwjxMHDM2yNK3XCkusIlpX5kwauBfXuyLAtNg9jCp/iHH61tgI4sb6R/EIMWCqEIdjT/g==",
+      "license": "ISC",
+      "dependencies": {
+        "d3-path": "1 - 3"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/d3-color": {
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/d3-color/-/d3-color-3.1.0.tgz",
+      "integrity": "sha512-zg/chbXyeBtMQ1LbD/WSoW2DpC3I0mpmPdW+ynRTj/x2DAWYrIY7qeZIHidozwV24m4iavr15lNwIwLxRmOxhA==",
+      "license": "ISC",
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/d3-contour": {
+      "version": "4.0.2",
+      "resolved": "https://registry.npmjs.org/d3-contour/-/d3-contour-4.0.2.tgz",
+      "integrity": "sha512-4EzFTRIikzs47RGmdxbeUvLWtGedDUNkTcmzoeyg4sP/dvCexO47AaQL7VKy/gul85TOxw+IBgA8US2xwbToNA==",
+      "license": "ISC",
+      "dependencies": {
+        "d3-array": "^3.2.0"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/d3-delaunay": {
+      "version": "6.0.4",
+      "resolved": "https://registry.npmjs.org/d3-delaunay/-/d3-delaunay-6.0.4.tgz",
+      "integrity": "sha512-mdjtIZ1XLAM8bm/hx3WwjfHt6Sggek7qH043O8KEjDXN40xi3vx/6pYSVTwLjEgiXQTbvaouWKynLBiUZ6SK6A==",
+      "license": "ISC",
+      "dependencies": {
+        "delaunator": "5"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/d3-dispatch": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/d3-dispatch/-/d3-dispatch-3.0.1.tgz",
+      "integrity": "sha512-rzUyPU/S7rwUflMyLc1ETDeBj0NRuHKKAcvukozwhshr6g6c5d8zh4c2gQjY2bZ0dXeGLWc1PF174P2tVvKhfg==",
+      "license": "ISC",
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/d3-drag": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/d3-drag/-/d3-drag-3.0.0.tgz",
+      "integrity": "sha512-pWbUJLdETVA8lQNJecMxoXfH6x+mO2UQo8rSmZ+QqxcbyA3hfeprFgIT//HW2nlHChWeIIMwS2Fq+gEARkhTkg==",
+      "license": "ISC",
+      "dependencies": {
+        "d3-dispatch": "1 - 3",
+        "d3-selection": "3"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/d3-dsv": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/d3-dsv/-/d3-dsv-3.0.1.tgz",
+      "integrity": "sha512-UG6OvdI5afDIFP9w4G0mNq50dSOsXHJaRE8arAS5o9ApWnIElp8GZw1Dun8vP8OyHOZ/QJUKUJwxiiCCnUwm+Q==",
+      "license": "ISC",
+      "dependencies": {
+        "commander": "7",
+        "iconv-lite": "0.6",
+        "rw": "1"
+      },
+      "bin": {
+        "csv2json": "bin/dsv2json.js",
+        "csv2tsv": "bin/dsv2dsv.js",
+        "dsv2dsv": "bin/dsv2dsv.js",
+        "dsv2json": "bin/dsv2json.js",
+        "json2csv": "bin/json2dsv.js",
+        "json2dsv": "bin/json2dsv.js",
+        "json2tsv": "bin/json2dsv.js",
+        "tsv2csv": "bin/dsv2dsv.js",
+        "tsv2json": "bin/dsv2json.js"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/d3-ease": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/d3-ease/-/d3-ease-3.0.1.tgz",
+      "integrity": "sha512-wR/XK3D3XcLIZwpbvQwQ5fK+8Ykds1ip7A2Txe0yxncXSdq1L9skcG7blcedkOX+ZcgxGAmLX1FrRGbADwzi0w==",
+      "license": "BSD-3-Clause",
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/d3-fetch": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/d3-fetch/-/d3-fetch-3.0.1.tgz",
+      "integrity": "sha512-kpkQIM20n3oLVBKGg6oHrUchHM3xODkTzjMoj7aWQFq5QEM+R6E4WkzT5+tojDY7yjez8KgCBRoj4aEr99Fdqw==",
+      "license": "ISC",
+      "dependencies": {
+        "d3-dsv": "1 - 3"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/d3-force": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/d3-force/-/d3-force-3.0.0.tgz",
+      "integrity": "sha512-zxV/SsA+U4yte8051P4ECydjD/S+qeYtnaIyAs9tgHCqfguma/aAQDjo85A9Z6EKhBirHRJHXIgJUlffT4wdLg==",
+      "license": "ISC",
+      "dependencies": {
+        "d3-dispatch": "1 - 3",
+        "d3-quadtree": "1 - 3",
+        "d3-timer": "1 - 3"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/d3-format": {
+      "version": "3.1.2",
+      "resolved": "https://registry.npmjs.org/d3-format/-/d3-format-3.1.2.tgz",
+      "integrity": "sha512-AJDdYOdnyRDV5b6ArilzCPPwc1ejkHcoyFarqlPqT7zRYjhavcT3uSrqcMvsgh2CgoPbK3RCwyHaVyxYcP2Arg==",
+      "license": "ISC",
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/d3-geo": {
+      "version": "3.1.1",
+      "resolved": "https://registry.npmjs.org/d3-geo/-/d3-geo-3.1.1.tgz",
+      "integrity": "sha512-637ln3gXKXOwhalDzinUgY83KzNWZRKbYubaG+fGVuc/dxO64RRljtCTnf5ecMyE1RIdtqpkVcq0IbtU2S8j2Q==",
+      "license": "ISC",
+      "dependencies": {
+        "d3-array": "2.5.0 - 3"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/d3-hierarchy": {
+      "version": "3.1.2",
+      "resolved": "https://registry.npmjs.org/d3-hierarchy/-/d3-hierarchy-3.1.2.tgz",
+      "integrity": "sha512-FX/9frcub54beBdugHjDCdikxThEqjnR93Qt7PvQTOHxyiNCAlvMrHhclk3cD5VeAaq9fxmfRp+CnWw9rEMBuA==",
+      "license": "ISC",
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/d3-interpolate": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/d3-interpolate/-/d3-interpolate-3.0.1.tgz",
+      "integrity": "sha512-3bYs1rOD33uo8aqJfKP3JWPAibgw8Zm2+L9vBKEHJ2Rg+viTR7o5Mmv5mZcieN+FRYaAOWX5SJATX6k1PWz72g==",
+      "license": "ISC",
+      "dependencies": {
+        "d3-color": "1 - 3"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/d3-path": {
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/d3-path/-/d3-path-3.1.0.tgz",
+      "integrity": "sha512-p3KP5HCf/bvjBSSKuXid6Zqijx7wIfNW+J/maPs+iwR35at5JCbLUT0LzF1cnjbCHWhqzQTIN2Jpe8pRebIEFQ==",
+      "license": "ISC",
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/d3-polygon": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/d3-polygon/-/d3-polygon-3.0.1.tgz",
+      "integrity": "sha512-3vbA7vXYwfe1SYhED++fPUQlWSYTTGmFmQiany/gdbiWgU/iEyQzyymwL9SkJjFFuCS4902BSzewVGsHHmHtXg==",
+      "license": "ISC",
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/d3-quadtree": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/d3-quadtree/-/d3-quadtree-3.0.1.tgz",
+      "integrity": "sha512-04xDrxQTDTCFwP5H6hRhsRcb9xxv2RzkcsygFzmkSIOJy3PeRJP7sNk3VRIbKXcog561P9oU0/rVH6vDROAgUw==",
+      "license": "ISC",
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/d3-random": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/d3-random/-/d3-random-3.0.1.tgz",
+      "integrity": "sha512-FXMe9GfxTxqd5D6jFsQ+DJ8BJS4E/fT5mqqdjovykEB2oFbTMDVdg1MGFxfQW+FBOGoB++k8swBrgwSHT1cUXQ==",
+      "license": "ISC",
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/d3-scale": {
+      "version": "4.0.2",
+      "resolved": "https://registry.npmjs.org/d3-scale/-/d3-scale-4.0.2.tgz",
+      "integrity": "sha512-GZW464g1SH7ag3Y7hXjf8RoUuAFIqklOAq3MRl4OaWabTFJY9PN/E1YklhXLh+OQ3fM9yS2nOkCoS+WLZ6kvxQ==",
+      "license": "ISC",
+      "dependencies": {
+        "d3-array": "2.10.0 - 3",
+        "d3-format": "1 - 3",
+        "d3-interpolate": "1.2.0 - 3",
+        "d3-time": "2.1.1 - 3",
+        "d3-time-format": "2 - 4"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/d3-scale-chromatic": {
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/d3-scale-chromatic/-/d3-scale-chromatic-3.1.0.tgz",
+      "integrity": "sha512-A3s5PWiZ9YCXFye1o246KoscMWqf8BsD9eRiJ3He7C9OBaxKhAd5TFCdEx/7VbKtxxTsu//1mMJFrEt572cEyQ==",
+      "license": "ISC",
+      "dependencies": {
+        "d3-color": "1 - 3",
+        "d3-interpolate": "1 - 3"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/d3-selection": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/d3-selection/-/d3-selection-3.0.0.tgz",
+      "integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==",
+      "license": "ISC",
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/d3-shape": {
+      "version": "3.2.0",
+      "resolved": "https://registry.npmjs.org/d3-shape/-/d3-shape-3.2.0.tgz",
+      "integrity": "sha512-SaLBuwGm3MOViRq2ABk3eLoxwZELpH6zhl3FbAoJ7Vm1gofKx6El1Ib5z23NUEhF9AsGl7y+dzLe5Cw2AArGTA==",
+      "license": "ISC",
+      "dependencies": {
+        "d3-path": "^3.1.0"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/d3-time": {
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/d3-time/-/d3-time-3.1.0.tgz",
+      "integrity": "sha512-VqKjzBLejbSMT4IgbmVgDjpkYrNWUYJnbCGo874u7MMKIWsILRX+OpX/gTk8MqjpT1A/c6HY2dCA77ZN0lkQ2Q==",
+      "license": "ISC",
+      "dependencies": {
+        "d3-array": "2 - 3"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/d3-time-format": {
+      "version": "4.1.0",
+      "resolved": "https://registry.npmjs.org/d3-time-format/-/d3-time-format-4.1.0.tgz",
+      "integrity": "sha512-dJxPBlzC7NugB2PDLwo9Q8JiTR3M3e4/XANkreKSUxF8vvXKqm1Yfq4Q5dl8budlunRVlUUaDUgFt7eA8D6NLg==",
+      "license": "ISC",
+      "dependencies": {
+        "d3-time": "1 - 3"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/d3-timer": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/d3-timer/-/d3-timer-3.0.1.tgz",
+      "integrity": "sha512-ndfJ/JxxMd3nw31uyKoY2naivF+r29V+Lc0svZxe1JvvIRmi8hUsrMvdOwgS1o6uBHmiz91geQ0ylPP0aj1VUA==",
+      "license": "ISC",
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/d3-transition": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/d3-transition/-/d3-transition-3.0.1.tgz",
+      "integrity": "sha512-ApKvfjsSR6tg06xrL434C0WydLr7JewBB3V+/39RMHsaXTOG0zmt/OAXeng5M5LBm0ojmxJrpomQVZ1aPvBL4w==",
+      "license": "ISC",
+      "dependencies": {
+        "d3-color": "1 - 3",
+        "d3-dispatch": "1 - 3",
+        "d3-ease": "1 - 3",
+        "d3-interpolate": "1 - 3",
+        "d3-timer": "1 - 3"
+      },
+      "engines": {
+        "node": ">=12"
+      },
+      "peerDependencies": {
+        "d3-selection": "2 - 3"
+      }
+    },
+    "node_modules/d3-zoom": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/d3-zoom/-/d3-zoom-3.0.0.tgz",
+      "integrity": "sha512-b8AmV3kfQaqWAuacbPuNbL6vahnOJflOhexLzMMNLga62+/nh0JzvJ0aO/5a5MVgUFGS7Hu1P9P03o3fJkDCyw==",
+      "license": "ISC",
+      "dependencies": {
+        "d3-dispatch": "1 - 3",
+        "d3-drag": "2 - 3",
+        "d3-interpolate": "1 - 3",
+        "d3-selection": "2 - 3",
+        "d3-transition": "2 - 3"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
     "node_modules/debug": {
       "version": "4.4.3",
       "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
@@ -2269,6 +3635,33 @@
       "dev": true,
       "license": "MIT"
     },
+    "node_modules/deepmerge": {
+      "version": "4.3.1",
+      "resolved": "https://registry.npmjs.org/deepmerge/-/deepmerge-4.3.1.tgz",
+      "integrity": "sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/delaunator": {
+      "version": "5.1.0",
+      "resolved": "https://registry.npmjs.org/delaunator/-/delaunator-5.1.0.tgz",
+      "integrity": "sha512-AGrQ4QSgssa1NGmWmLPqN5NY2KajF5MqxetNEO+o0n3ZwZZeTmt7bBnvzHWrmkZFxGgr4HdyFgelzgi06otLuQ==",
+      "license": "ISC",
+      "dependencies": {
+        "robust-predicates": "^3.0.2"
+      }
+    },
+    "node_modules/dequal": {
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/dequal/-/dequal-2.0.3.tgz",
+      "integrity": "sha512-0je+qPKHEMohvfRTCEo3CrPG6cAzAYgmzKyxRiYSSDkS6eGJdyVJm7WaYA5ECaAD9wLB2T4EEeymA5aFVcYXCA==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    },
     "node_modules/detect-libc": {
       "version": "2.1.2",
       "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.1.2.tgz",
@@ -2278,6 +3671,73 @@
         "node": ">=8"
       }
     },
+    "node_modules/dom-serializer": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz",
+      "integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==",
+      "license": "MIT",
+      "dependencies": {
+        "domelementtype": "^2.3.0",
+        "domhandler": "^5.0.2",
+        "entities": "^4.2.0"
+      },
+      "funding": {
+        "url": "https://github.com/cheeriojs/dom-serializer?sponsor=1"
+      }
+    },
+    "node_modules/dom-serializer/node_modules/entities": {
+      "version": "4.5.0",
+      "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz",
+      "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==",
+      "license": "BSD-2-Clause",
+      "engines": {
+        "node": ">=0.12"
+      },
+      "funding": {
+        "url": "https://github.com/fb55/entities?sponsor=1"
+      }
+    },
+    "node_modules/domelementtype": {
+      "version": "2.3.0",
+      "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz",
+      "integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==",
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/fb55"
+        }
+      ],
+      "license": "BSD-2-Clause"
+    },
+    "node_modules/domhandler": {
+      "version": "5.0.3",
+      "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz",
+      "integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==",
+      "license": "BSD-2-Clause",
+      "dependencies": {
+        "domelementtype": "^2.3.0"
+      },
+      "engines": {
+        "node": ">= 4"
+      },
+      "funding": {
+        "url": "https://github.com/fb55/domhandler?sponsor=1"
+      }
+    },
+    "node_modules/domutils": {
+      "version": "3.2.2",
+      "resolved": "https://registry.npmjs.org/domutils/-/domutils-3.2.2.tgz",
+      "integrity": "sha512-6kZKyUajlDuqlHKVX1w7gyslj9MPIXzIFiz/rGu35uC1wMi+kMhQwGhl4lt9unC9Vb9INnY9Z3/ZA3+FhASLaw==",
+      "license": "BSD-2-Clause",
+      "dependencies": {
+        "dom-serializer": "^2.0.0",
+        "domelementtype": "^2.3.0",
+        "domhandler": "^5.0.3"
+      },
+      "funding": {
+        "url": "https://github.com/fb55/domutils?sponsor=1"
+      }
+    },
     "node_modules/electron-to-chromium": {
       "version": "1.5.313",
       "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.313.tgz",
@@ -2298,6 +3758,18 @@
         "node": ">=10.13.0"
       }
     },
+    "node_modules/entities": {
+      "version": "7.0.1",
+      "resolved": "https://registry.npmjs.org/entities/-/entities-7.0.1.tgz",
+      "integrity": "sha512-TWrgLOFUQTH994YUyl1yT4uyavY5nNB5muff+RtWaqNVCAK408b5ZnnbNAUEWLTCpum9w6arT70i1XdQ4UeOPA==",
+      "license": "BSD-2-Clause",
+      "engines": {
+        "node": ">=0.12"
+      },
+      "funding": {
+        "url": "https://github.com/fb55/entities?sponsor=1"
+      }
+    },
     "node_modules/esbuild": {
       "version": "0.27.4",
       "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.27.4.tgz",
@@ -2353,7 +3825,6 @@
       "version": "4.0.0",
       "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz",
       "integrity": "sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA==",
-      "dev": true,
       "license": "MIT",
       "engines": {
         "node": ">=10"
@@ -2546,6 +4017,27 @@
         "node": ">=0.10.0"
       }
     },
+    "node_modules/extend-shallow": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz",
+      "integrity": "sha512-zCnTtlxNoAiDc3gqY2aYAWFx7XWWiasuF2K8Me5WbN8otHKTUKBwjPtNpRs/rbUZm7KxWAaNj7P1a/p52GbVug==",
+      "license": "MIT",
+      "dependencies": {
+        "is-extendable": "^0.1.0"
+      },
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/extend-shallow/node_modules/is-extendable": {
+      "version": "0.1.1",
+      "resolved": "https://registry.npmjs.org/is-extendable/-/is-extendable-0.1.1.tgz",
+      "integrity": "sha512-5BMULNob1vgFX6EjQw5izWDxrecWK9AM72rugNr0TFldMOi0fj6Jk+zeKIt0xGj4cEfQIJth4w3OKWOJ4f+AFw==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
     "node_modules/fast-deep-equal": {
       "version": "3.1.3",
       "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz",
@@ -2597,6 +4089,18 @@
         "node": ">=16.0.0"
       }
     },
+    "node_modules/file-selector": {
+      "version": "0.5.0",
+      "resolved": "https://registry.npmjs.org/file-selector/-/file-selector-0.5.0.tgz",
+      "integrity": "sha512-s8KNnmIDTBoD0p9uJ9uD0XY38SCeBOtj0UMXyQSLg1Ypfrfj8+dAvwsLjYQkQ2GjhVtp2HrnF5cJzMhBjfD8HA==",
+      "license": "MIT",
+      "dependencies": {
+        "tslib": "^2.0.3"
+      },
+      "engines": {
+        "node": ">= 10"
+      }
+    },
     "node_modules/find-up": {
       "version": "5.0.0",
       "resolved": "https://registry.npmjs.org/find-up/-/find-up-5.0.0.tgz",
@@ -2635,6 +4139,15 @@
       "dev": true,
       "license": "ISC"
     },
+    "node_modules/for-in": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/for-in/-/for-in-1.0.2.tgz",
+      "integrity": "sha512-7EwmXrOjyL+ChxMhmG5lnW9MPt1aIeZEwKhQzoBUdTV0N3zuwWDZYVJatDvZ2OyzPUvdIAZDsCetk3coyMfcnQ==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
     "node_modules/fsevents": {
       "version": "2.3.3",
       "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
@@ -2659,6 +4172,15 @@
         "node": ">=6.9.0"
       }
     },
+    "node_modules/get-value": {
+      "version": "2.0.6",
+      "resolved": "https://registry.npmjs.org/get-value/-/get-value-2.0.6.tgz",
+      "integrity": "sha512-Ln0UQDlxH1BapMu3GPtf7CuYNwRZf2gwCuPqbyG6pB8WfmFpzqcy4xtAaAMUhnNqjMKTiCPZG2oMT3YSx8U2NA==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
     "node_modules/glob-parent": {
       "version": "6.0.2",
       "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-6.0.2.tgz",
@@ -2691,6 +4213,12 @@
       "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==",
       "license": "ISC"
     },
+    "node_modules/gsap": {
+      "version": "3.15.0",
+      "resolved": "https://registry.npmjs.org/gsap/-/gsap-3.15.0.tgz",
+      "integrity": "sha512-dMW4CWBTUK1AEEDeZc1g4xpPGIrSf9fJF960qbTZmN/QwZIWY5wgliS6JWl9/25fpTGJrMRtSjGtOmPnfjZB+A==",
+      "license": "Standard 'no charge' license: https://gsap.com/standard-license."
+    },
     "node_modules/has-flag": {
       "version": "4.0.0",
       "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz",
@@ -2718,6 +4246,57 @@
         "hermes-estree": "0.25.1"
       }
     },
+    "node_modules/htmlparser2": {
+      "version": "10.1.0",
+      "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-10.1.0.tgz",
+      "integrity": "sha512-VTZkM9GWRAtEpveh7MSF6SjjrpNVNNVJfFup7xTY3UpFtm67foy9HDVXneLtFVt4pMz5kZtgNcvCniNFb1hlEQ==",
+      "funding": [
+        "https://github.com/fb55/htmlparser2?sponsor=1",
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/fb55"
+        }
+      ],
+      "license": "MIT",
+      "dependencies": {
+        "domelementtype": "^2.3.0",
+        "domhandler": "^5.0.3",
+        "domutils": "^3.2.2",
+        "entities": "^7.0.1"
+      }
+    },
+    "node_modules/iconv-lite": {
+      "version": "0.6.3",
+      "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz",
+      "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==",
+      "license": "MIT",
+      "dependencies": {
+        "safer-buffer": ">= 2.1.2 < 3.0.0"
+      },
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/ieee754": {
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz",
+      "integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==",
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/feross"
+        },
+        {
+          "type": "patreon",
+          "url": "https://www.patreon.com/feross"
+        },
+        {
+          "type": "consulting",
+          "url": "https://feross.org/support"
+        }
+      ],
+      "license": "BSD-3-Clause"
+    },
     "node_modules/ignore": {
       "version": "5.3.2",
       "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.3.2.tgz",
@@ -2755,6 +4334,48 @@
         "node": ">=0.8.19"
       }
     },
+    "node_modules/internmap": {
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/internmap/-/internmap-2.0.3.tgz",
+      "integrity": "sha512-5Hh7Y1wQbvY5ooGgPbDaL5iYLAPzMTUrjMulskHLH6wnv/A+1q5rgEaiuqEjB+oxGXIVZs1FF+R/KPN3ZSQYYg==",
+      "license": "ISC",
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/interval-tree-1d": {
+      "version": "1.0.4",
+      "resolved": "https://registry.npmjs.org/interval-tree-1d/-/interval-tree-1d-1.0.4.tgz",
+      "integrity": "sha512-wY8QJH+6wNI0uh4pDQzMvl+478Qh7Rl4qLmqiluxALlNvl+I+o5x38Pw3/z7mDPTPS1dQalZJXsmbvxx5gclhQ==",
+      "license": "MIT",
+      "dependencies": {
+        "binary-search-bounds": "^2.0.0"
+      }
+    },
+    "node_modules/is-extendable": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/is-extendable/-/is-extendable-1.0.1.tgz",
+      "integrity": "sha512-arnXMxT1hhoKo9k1LZdmlNyJdDDfy2v0fXjFlmok4+i8ul/6WlbVge9bhM74OpNPQPMGUToDtz+KXa1PneJxOA==",
+      "license": "MIT",
+      "dependencies": {
+        "is-plain-object": "^2.0.4"
+      },
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/is-extendable/node_modules/is-plain-object": {
+      "version": "2.0.4",
+      "resolved": "https://registry.npmjs.org/is-plain-object/-/is-plain-object-2.0.4.tgz",
+      "integrity": "sha512-h5PpgXkWitc38BBMYawTYMWJHFZJVnBquFE57xFpjB8pJFiF6gZ+bU+WyI/yqXiFR5mdLsgYNaPe8uao6Uv9Og==",
+      "license": "MIT",
+      "dependencies": {
+        "isobject": "^3.0.1"
+      },
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
     "node_modules/is-extglob": {
       "version": "2.1.1",
       "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz",
@@ -2778,6 +4399,15 @@
         "node": ">=0.10.0"
       }
     },
+    "node_modules/is-plain-object": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/is-plain-object/-/is-plain-object-5.0.0.tgz",
+      "integrity": "sha512-VRSzKkbMm5jMDoKLbltAkFQ5Qr7VDiTFGXxYFXXowVj387GeGNOCsOH6Msy00SGZ3Fp84b1Naa1psqgcCIEP5Q==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
     "node_modules/isexe": {
       "version": "2.0.0",
       "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz",
@@ -2785,6 +4415,33 @@
       "dev": true,
       "license": "ISC"
     },
+    "node_modules/isobject": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz",
+      "integrity": "sha512-WhB9zCku7EGTj/HQQRz5aUQEUeoQZH2bWcltRErOpymJ4boYE6wL9Tbr23krRPSZ+C5zqNSrSw+Cc7sZZ4b7vg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/isoformat": {
+      "version": "0.2.1",
+      "resolved": "https://registry.npmjs.org/isoformat/-/isoformat-0.2.1.tgz",
+      "integrity": "sha512-tFLRAygk9NqrRPhJSnNGh7g7oaVWDwR0wKh/GM2LgmPa50Eg4UfyaCO4I8k6EqJHl1/uh2RAD6g06n5ygEnrjQ==",
+      "license": "ISC"
+    },
+    "node_modules/its-fine": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/its-fine/-/its-fine-2.0.0.tgz",
+      "integrity": "sha512-KLViCmWx94zOvpLwSlsx6yOCeMhZYaxrJV87Po5k/FoZzcPSahvK5qJ7fYhS61sZi5ikmh2S3Hz55A2l3U69ng==",
+      "license": "MIT",
+      "dependencies": {
+        "@types/react-reconciler": "^0.28.9"
+      },
+      "peerDependencies": {
+        "react": "^19.0.0"
+      }
+    },
     "node_modules/jiti": {
       "version": "2.6.1",
       "resolved": "https://registry.npmjs.org/jiti/-/jiti-2.6.1.tgz",
@@ -2798,7 +4455,6 @@
       "version": "4.0.0",
       "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
       "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==",
-      "dev": true,
       "license": "MIT"
     },
     "node_modules/js-yaml": {
@@ -2871,6 +4527,29 @@
         "json-buffer": "3.0.1"
       }
     },
+    "node_modules/leva": {
+      "version": "0.10.1",
+      "resolved": "https://registry.npmjs.org/leva/-/leva-0.10.1.tgz",
+      "integrity": "sha512-BcjnfUX8jpmwZUz2L7AfBtF9vn4ggTH33hmeufDULbP3YgNZ/C+ss/oO3stbrqRQyaOmRwy70y7BGTGO81S3rA==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-portal": "^1.1.4",
+        "@radix-ui/react-tooltip": "^1.1.8",
+        "@stitches/react": "^1.2.8",
+        "@use-gesture/react": "^10.2.5",
+        "colord": "^2.9.2",
+        "dequal": "^2.0.2",
+        "merge-value": "^1.0.0",
+        "react-colorful": "^5.5.1",
+        "react-dropzone": "^12.0.0",
+        "v8n": "^1.3.3",
+        "zustand": "^3.6.9"
+      },
+      "peerDependencies": {
+        "react": "^18.0.0 || ^19.0.0",
+        "react-dom": "^18.0.0 || ^19.0.0"
+      }
+    },
     "node_modules/levn": {
       "version": "0.4.1",
       "resolved": "https://registry.npmjs.org/levn/-/levn-0.4.1.tgz",
@@ -3157,6 +4836,18 @@
       "dev": true,
       "license": "MIT"
     },
+    "node_modules/loose-envify": {
+      "version": "1.4.0",
+      "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz",
+      "integrity": "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==",
+      "license": "MIT",
+      "dependencies": {
+        "js-tokens": "^3.0.0 || ^4.0.0"
+      },
+      "bin": {
+        "loose-envify": "cli.js"
+      }
+    },
     "node_modules/lru-cache": {
       "version": "5.1.1",
       "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-5.1.1.tgz",
@@ -3185,6 +4876,21 @@
         "@jridgewell/sourcemap-codec": "^1.5.5"
       }
     },
+    "node_modules/merge-value": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/merge-value/-/merge-value-1.0.0.tgz",
+      "integrity": "sha512-fJMmvat4NeKz63Uv9iHWcPDjCWcCkoiRoajRTEO8hlhUC6rwaHg0QCF9hBOTjZmm4JuglPckPSTtcuJL5kp0TQ==",
+      "license": "MIT",
+      "dependencies": {
+        "get-value": "^2.0.6",
+        "is-extendable": "^1.0.0",
+        "mixin-deep": "^1.2.0",
+        "set-value": "^2.0.0"
+      },
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
     "node_modules/minimatch": {
       "version": "3.1.5",
       "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.5.tgz",
@@ -3198,6 +4904,19 @@
         "node": "*"
       }
     },
+    "node_modules/mixin-deep": {
+      "version": "1.3.2",
+      "resolved": "https://registry.npmjs.org/mixin-deep/-/mixin-deep-1.3.2.tgz",
+      "integrity": "sha512-WRoDn//mXBiJ1H40rqa3vH0toePwSsGb45iInWlTySa+Uu4k3tYUSxa2v1KqAiLtvlrSzaExqS1gtk96A9zvEA==",
+      "license": "MIT",
+      "dependencies": {
+        "for-in": "^1.0.2",
+        "is-extendable": "^1.0.1"
+      },
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
     "node_modules/ms": {
       "version": "2.1.3",
       "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
@@ -3223,6 +4942,21 @@
         "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1"
       }
     },
+    "node_modules/nanostores": {
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/nanostores/-/nanostores-1.3.0.tgz",
+      "integrity": "sha512-XPUa/jz+P1oJvN9VBxw4L9MtdFfaH3DAryqPssqhb2kXjmb9npz0dly6rCsgFWOPr4Yg9mTfM3MDZgZZ+7A3lA==",
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/ai"
+        }
+      ],
+      "license": "MIT",
+      "engines": {
+        "node": "^20.0.0 || >=22.0.0"
+      }
+    },
     "node_modules/natural-compare": {
       "version": "1.4.0",
       "resolved": "https://registry.npmjs.org/natural-compare/-/natural-compare-1.4.0.tgz",
@@ -3237,6 +4971,15 @@
       "dev": true,
       "license": "MIT"
     },
+    "node_modules/object-assign": {
+      "version": "4.1.1",
+      "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
+      "integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
     "node_modules/optionator": {
       "version": "0.9.4",
       "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz",
@@ -3300,6 +5043,12 @@
         "node": ">=6"
       }
     },
+    "node_modules/parse-srcset": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/parse-srcset/-/parse-srcset-1.0.2.tgz",
+      "integrity": "sha512-/2qh0lav6CmI15FzA3i/2Bzk2zCgQhGMkvhOhKNcBVQ1ldgpbfiNTVslmooUmWJcADi1f1kIeynbDRVzNlfR6Q==",
+      "license": "MIT"
+    },
     "node_modules/path-exists": {
       "version": "4.0.0",
       "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz",
@@ -3376,6 +5125,17 @@
         "node": ">= 0.8.0"
       }
     },
+    "node_modules/prop-types": {
+      "version": "15.8.1",
+      "resolved": "https://registry.npmjs.org/prop-types/-/prop-types-15.8.1.tgz",
+      "integrity": "sha512-oj87CgZICdulUohogVAR7AjlC0327U4el4L6eAvOqCeudMDVU0NThNaV+b9Df4dXgSP1gXMTnPdhfe/2qDH5cg==",
+      "license": "MIT",
+      "dependencies": {
+        "loose-envify": "^1.4.0",
+        "object-assign": "^4.1.1",
+        "react-is": "^16.13.1"
+      }
+    },
     "node_modules/punycode": {
       "version": "2.3.1",
       "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz",
@@ -3395,6 +5155,16 @@
         "node": ">=0.10.0"
       }
     },
+    "node_modules/react-colorful": {
+      "version": "5.6.1",
+      "resolved": "https://registry.npmjs.org/react-colorful/-/react-colorful-5.6.1.tgz",
+      "integrity": "sha512-1exovf0uGTGyq5mXQT0zgQ80uvj2PCwvF8zY1RN9/vbJVSjSo3fsB/4L3ObbF7u70NduSiK4xu4Y6q1MHoUGEw==",
+      "license": "MIT",
+      "peerDependencies": {
+        "react": ">=16.8.0",
+        "react-dom": ">=16.8.0"
+      }
+    },
     "node_modules/react-dom": {
       "version": "19.2.4",
       "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.2.4.tgz",
@@ -3407,6 +5177,29 @@
         "react": "^19.2.4"
       }
     },
+    "node_modules/react-dropzone": {
+      "version": "12.1.0",
+      "resolved": "https://registry.npmjs.org/react-dropzone/-/react-dropzone-12.1.0.tgz",
+      "integrity": "sha512-iBYHA1rbopIvtzokEX4QubO6qk5IF/x3BtKGu74rF2JkQDXnwC4uO/lHKpaw4PJIV6iIAYOlwLv2FpiGyqHNog==",
+      "license": "MIT",
+      "dependencies": {
+        "attr-accept": "^2.2.2",
+        "file-selector": "^0.5.0",
+        "prop-types": "^15.8.1"
+      },
+      "engines": {
+        "node": ">= 10.13"
+      },
+      "peerDependencies": {
+        "react": ">= 16.8"
+      }
+    },
+    "node_modules/react-is": {
+      "version": "16.13.1",
+      "resolved": "https://registry.npmjs.org/react-is/-/react-is-16.13.1.tgz",
+      "integrity": "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ==",
+      "license": "MIT"
+    },
     "node_modules/react-refresh": {
       "version": "0.18.0",
       "resolved": "https://registry.npmjs.org/react-refresh/-/react-refresh-0.18.0.tgz",
@@ -3455,6 +5248,21 @@
         "react-dom": ">=18"
       }
     },
+    "node_modules/react-use-measure": {
+      "version": "2.1.7",
+      "resolved": "https://registry.npmjs.org/react-use-measure/-/react-use-measure-2.1.7.tgz",
+      "integrity": "sha512-KrvcAo13I/60HpwGO5jpW7E9DfusKyLPLvuHlUyP5zqnmAPhNc6qTRjUQrdTADl0lpPpDVU2/Gg51UlOGHXbdg==",
+      "license": "MIT",
+      "peerDependencies": {
+        "react": ">=16.13",
+        "react-dom": ">=16.13"
+      },
+      "peerDependenciesMeta": {
+        "react-dom": {
+          "optional": true
+        }
+      }
+    },
     "node_modules/resolve-from": {
       "version": "4.0.0",
       "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz",
@@ -3465,6 +5273,12 @@
         "node": ">=4"
       }
     },
+    "node_modules/robust-predicates": {
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/robust-predicates/-/robust-predicates-3.0.3.tgz",
+      "integrity": "sha512-NS3levdsRIUOmiJ8FZWCP7LG3QpJyrs/TE0Zpf1yvZu8cAJJ6QMW92H1c7kWpdIHo8RvmLxN/o2JXTKHp74lUA==",
+      "license": "Unlicense"
+    },
     "node_modules/rollup": {
       "version": "4.59.0",
       "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.59.0.tgz",
@@ -3509,6 +5323,32 @@
         "fsevents": "~2.3.2"
       }
     },
+    "node_modules/rw": {
+      "version": "1.3.3",
+      "resolved": "https://registry.npmjs.org/rw/-/rw-1.3.3.tgz",
+      "integrity": "sha512-PdhdWy89SiZogBLaw42zdeqtRJ//zFd2PgQavcICDUgJT5oW10QCRKbJ6bg4r0/UY2M6BWd5tkxuGFRvCkgfHQ==",
+      "license": "BSD-3-Clause"
+    },
+    "node_modules/safer-buffer": {
+      "version": "2.1.2",
+      "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz",
+      "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==",
+      "license": "MIT"
+    },
+    "node_modules/sanitize-html": {
+      "version": "2.17.3",
+      "resolved": "https://registry.npmjs.org/sanitize-html/-/sanitize-html-2.17.3.tgz",
+      "integrity": "sha512-Kn4srCAo2+wZyvCNKCSyB2g8RQ8IkX/gQs2uqoSRNu5t9I2qvUyAVvRDiFUVAiX3N3PNuwStY0eNr+ooBHVWEg==",
+      "license": "MIT",
+      "dependencies": {
+        "deepmerge": "^4.2.2",
+        "escape-string-regexp": "^4.0.0",
+        "htmlparser2": "^10.1.0",
+        "is-plain-object": "^5.0.0",
+        "parse-srcset": "^1.0.2",
+        "postcss": "^8.3.11"
+      }
+    },
     "node_modules/scheduler": {
       "version": "0.27.0",
       "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.27.0.tgz",
@@ -3531,6 +5371,42 @@
       "integrity": "sha512-oeM1lpU/UvhTxw+g3cIfxXHyJRc/uidd3yK1P242gzHds0udQBYzs3y8j4gCCW+ZJ7ad0yctld8RYO+bdurlvw==",
       "license": "MIT"
     },
+    "node_modules/set-value": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/set-value/-/set-value-2.0.1.tgz",
+      "integrity": "sha512-JxHc1weCN68wRY0fhCoXpyK55m/XPHafOmK4UWD7m2CI14GMcFypt4w/0+NV5f/ZMby2F6S2wwA7fgynh9gWSw==",
+      "license": "MIT",
+      "dependencies": {
+        "extend-shallow": "^2.0.1",
+        "is-extendable": "^0.1.1",
+        "is-plain-object": "^2.0.3",
+        "split-string": "^3.0.1"
+      },
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/set-value/node_modules/is-extendable": {
+      "version": "0.1.1",
+      "resolved": "https://registry.npmjs.org/is-extendable/-/is-extendable-0.1.1.tgz",
+      "integrity": "sha512-5BMULNob1vgFX6EjQw5izWDxrecWK9AM72rugNr0TFldMOi0fj6Jk+zeKIt0xGj4cEfQIJth4w3OKWOJ4f+AFw==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/set-value/node_modules/is-plain-object": {
+      "version": "2.0.4",
+      "resolved": "https://registry.npmjs.org/is-plain-object/-/is-plain-object-2.0.4.tgz",
+      "integrity": "sha512-h5PpgXkWitc38BBMYawTYMWJHFZJVnBquFE57xFpjB8pJFiF6gZ+bU+WyI/yqXiFR5mdLsgYNaPe8uao6Uv9Og==",
+      "license": "MIT",
+      "dependencies": {
+        "isobject": "^3.0.1"
+      },
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
     "node_modules/shebang-command": {
       "version": "2.0.0",
       "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz",
@@ -3563,6 +5439,31 @@
         "node": ">=0.10.0"
       }
     },
+    "node_modules/split-string": {
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/split-string/-/split-string-3.1.0.tgz",
+      "integrity": "sha512-NzNVhJDYpwceVVii8/Hu6DKfD2G+NrQHlS/V/qgv763EYudVwEcMQNxd2lh+0VrUByXN/oJkl5grOhYWvQUYiw==",
+      "license": "MIT",
+      "dependencies": {
+        "extend-shallow": "^3.0.0"
+      },
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/split-string/node_modules/extend-shallow": {
+      "version": "3.0.2",
+      "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-3.0.2.tgz",
+      "integrity": "sha512-BwY5b5Ql4+qZoefgMj2NUmx+tehVTH/Kf4k1ZEtOHNFcm2wSxMRo992l6X3TIgni2eZVTZ85xMOjF31fwZAj6Q==",
+      "license": "MIT",
+      "dependencies": {
+        "assign-symbols": "^1.0.0",
+        "is-extendable": "^1.0.1"
+      },
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
     "node_modules/strip-json-comments": {
       "version": "3.1.1",
       "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-3.1.1.tgz",
@@ -3589,6 +5490,15 @@
         "node": ">=8"
       }
     },
+    "node_modules/suspend-react": {
+      "version": "0.1.3",
+      "resolved": "https://registry.npmjs.org/suspend-react/-/suspend-react-0.1.3.tgz",
+      "integrity": "sha512-aqldKgX9aZqpoDp3e8/BZ8Dm7x1pJl+qI3ZKxDN0i/IQTWUwBx/ManmlVJ3wowqbno6c2bmiIfs+Um6LbsjJyQ==",
+      "license": "MIT",
+      "peerDependencies": {
+        "react": ">=17.0"
+      }
+    },
     "node_modules/tailwind-merge": {
       "version": "3.5.0",
       "resolved": "https://registry.npmjs.org/tailwind-merge/-/tailwind-merge-3.5.0.tgz",
@@ -3618,6 +5528,12 @@
         "url": "https://opencollective.com/webpack"
       }
     },
+    "node_modules/three": {
+      "version": "0.180.0",
+      "resolved": "https://registry.npmjs.org/three/-/three-0.180.0.tgz",
+      "integrity": "sha512-o+qycAMZrh+TsE01GqWUxUIKR1AL0S8pq7zDkYOQw8GqfX8b8VoCKYUoHbhiX5j+7hr8XsuHDVU6+gkQJQKg9w==",
+      "license": "MIT"
+    },
     "node_modules/tinyglobby": {
       "version": "0.2.15",
       "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz",
@@ -3647,6 +5563,21 @@
         "typescript": ">=4.8.4"
       }
     },
+    "node_modules/tslib": {
+      "version": "2.8.1",
+      "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
+      "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
+      "license": "0BSD"
+    },
+    "node_modules/tw-animate-css": {
+      "version": "1.4.0",
+      "resolved": "https://registry.npmjs.org/tw-animate-css/-/tw-animate-css-1.4.0.tgz",
+      "integrity": "sha512-7bziOlRqH0hJx80h/3mbicLW7o8qLsH5+RaLR2t+OHM3D0JlWGODQKQ4cxbK7WlvmUxpcj6Kgu6EKqjrGFe3QQ==",
+      "license": "MIT",
+      "funding": {
+        "url": "https://github.com/sponsors/Wombosvideo"
+      }
+    },
     "node_modules/type-check": {
       "version": "0.4.0",
       "resolved": "https://registry.npmjs.org/type-check/-/type-check-0.4.0.tgz",
@@ -3746,6 +5677,21 @@
         "punycode": "^2.1.0"
       }
     },
+    "node_modules/use-sync-external-store": {
+      "version": "1.6.0",
+      "resolved": "https://registry.npmjs.org/use-sync-external-store/-/use-sync-external-store-1.6.0.tgz",
+      "integrity": "sha512-Pp6GSwGP/NrPIrxVFAIkOQeyw8lFenOHijQWkUTrDvrF4ALqylP2C/KCkeS9dpUM3KvYRQhna5vt7IL95+ZQ9w==",
+      "license": "MIT",
+      "peerDependencies": {
+        "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0"
+      }
+    },
+    "node_modules/v8n": {
+      "version": "1.5.1",
+      "resolved": "https://registry.npmjs.org/v8n/-/v8n-1.5.1.tgz",
+      "integrity": "sha512-LdabyT4OffkyXFCe9UT+uMkxNBs5rcTVuZClvxQr08D5TUgo1OFKkoT65qYRCsiKBl/usHjpXvP4hHMzzDRj3A==",
+      "license": "MIT"
+    },
     "node_modules/vite": {
       "version": "7.3.1",
       "resolved": "https://registry.npmjs.org/vite/-/vite-7.3.1.tgz",
@@ -3888,6 +5834,23 @@
       "peerDependencies": {
         "zod": "^3.25.0 || ^4.0.0"
       }
+    },
+    "node_modules/zustand": {
+      "version": "3.7.2",
+      "resolved": "https://registry.npmjs.org/zustand/-/zustand-3.7.2.tgz",
+      "integrity": "sha512-PIJDIZKtokhof+9+60cpockVOq05sJzHCriyvaLBmEJixseQ1a5Kdov6fWZfWOu5SK9c+FhH1jU0tntLxRJYMA==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=12.7.0"
+      },
+      "peerDependencies": {
+        "react": ">=16.8"
+      },
+      "peerDependenciesMeta": {
+        "react": {
+          "optional": true
+        }
+      }
     }
   }
 }
diff --git a/web/package.json b/web/package.json
index 09675d283f..8882c5c1c8 100644
--- a/web/package.json
+++ b/web/package.json
@@ -4,15 +4,23 @@
   "version": "0.0.0",
   "type": "module",
   "scripts": {
+    "sync-assets": "rm -rf public/fonts public/ds-assets && cp -r node_modules/@nous-research/ui/dist/fonts public/fonts && cp -r node_modules/@nous-research/ui/dist/assets public/ds-assets",
+    "predev": "npm run sync-assets",
+    "prebuild": "npm run sync-assets",
     "dev": "vite",
     "build": "tsc -b && vite build",
     "lint": "eslint .",
     "preview": "vite preview"
   },
   "dependencies": {
+    "@nous-research/ui": "^0.3.0",
+    "@observablehq/plot": "^0.6.17",
+    "@react-three/fiber": "^9.6.0",
     "@tailwindcss/vite": "^4.2.1",
     "class-variance-authority": "^0.7.1",
     "clsx": "^2.1.1",
+    "gsap": "^3.15.0",
+    "leva": "^0.10.1",
     "lucide-react": "^0.577.0",
     "react": "^19.2.4",
     "react-dom": "^19.2.4",
@@ -30,6 +38,7 @@
     "eslint-plugin-react-hooks": "^7.0.1",
     "eslint-plugin-react-refresh": "^0.5.2",
     "globals": "^17.4.0",
+    "three": "^0.180.0",
     "typescript": "~5.9.3",
     "typescript-eslint": "^8.56.1",
     "vite": "^7.3.1"
diff --git a/web/public/fonts/CourierPrime-Bold.woff2 b/web/public/fonts/CourierPrime-Bold.woff2
deleted file mode 100644
index 4f6d5e9c86..0000000000
Binary files a/web/public/fonts/CourierPrime-Bold.woff2 and /dev/null differ
diff --git a/web/public/fonts/CourierPrime-Regular.woff2 b/web/public/fonts/CourierPrime-Regular.woff2
deleted file mode 100644
index feae1f7580..0000000000
Binary files a/web/public/fonts/CourierPrime-Regular.woff2 and /dev/null differ
diff --git a/web/src/App.tsx b/web/src/App.tsx
index b07608c311..c2dc409b38 100644
--- a/web/src/App.tsx
+++ b/web/src/App.tsx
@@ -1,11 +1,30 @@
 import { useMemo } from "react";
 import { Routes, Route, NavLink, Navigate } from "react-router-dom";
 import {
-  Activity, BarChart3, Clock, FileText, KeyRound,
-  MessageSquare, Package, Settings, Puzzle,
-  Sparkles, Terminal, Globe, Database, Shield,
-  Wrench, Zap, Heart, Star, Code, Eye,
+  Activity,
+  BarChart3,
+  Clock,
+  FileText,
+  KeyRound,
+  MessageSquare,
+  Package,
+  Settings,
+  Puzzle,
+  Sparkles,
+  Terminal,
+  Globe,
+  Database,
+  Shield,
+  Wrench,
+  Zap,
+  Heart,
+  Star,
+  Code,
+  Eye,
 } from "lucide-react";
+import { Cell, Grid, SelectionSwitcher, Typography } from "@nous-research/ui";
+import { cn } from "@/lib/utils";
+import { Backdrop } from "@/components/Backdrop";
 import StatusPage from "@/pages/StatusPage";
 import ConfigPage from "@/pages/ConfigPage";
 import EnvPage from "@/pages/EnvPage";
@@ -20,21 +39,20 @@ import { useI18n } from "@/i18n";
 import { usePlugins } from "@/plugins";
 import type { RegisteredPlugin } from "@/plugins";
 
-// ---------------------------------------------------------------------------
-// Built-in nav items
-// ---------------------------------------------------------------------------
-
-interface NavItem {
-  path: string;
-  label: string;
-  labelKey?: string;
-  icon: React.ComponentType<{ className?: string }>;
-}
-
 const BUILTIN_NAV: NavItem[] = [
   { path: "/", labelKey: "status", label: "Status", icon: Activity },
-  { path: "/sessions", labelKey: "sessions", label: "Sessions", icon: MessageSquare },
-  { path: "/analytics", labelKey: "analytics", label: "Analytics", icon: BarChart3 },
+  {
+    path: "/sessions",
+    labelKey: "sessions",
+    label: "Sessions",
+    icon: MessageSquare,
+  },
+  {
+    path: "/analytics",
+    labelKey: "analytics",
+    label: "Analytics",
+    icon: BarChart3,
+  },
   { path: "/logs", labelKey: "logs", label: "Logs", icon: FileText },
   { path: "/cron", labelKey: "cron", label: "Cron", icon: Clock },
   { path: "/skills", labelKey: "skills", label: "Skills", icon: Package },
@@ -42,25 +60,41 @@ const BUILTIN_NAV: NavItem[] = [
   { path: "/env", labelKey: "keys", label: "Keys", icon: KeyRound },
 ];
 
-// ---------------------------------------------------------------------------
-// Helpers
-// ---------------------------------------------------------------------------
-
-/** Map of icon names plugins can use. Covers common choices without importing all of lucide. */
+// Plugins can reference any of these by name in their manifest — keeps bundle
+// size sane vs. importing the full lucide-react set.
 const ICON_MAP: Record<string, React.ComponentType<{ className?: string }>> = {
-  Activity, BarChart3, Clock, FileText, KeyRound,
-  MessageSquare, Package, Settings, Puzzle,
-  Sparkles, Terminal, Globe, Database, Shield,
-  Wrench, Zap, Heart, Star, Code, Eye,
+  Activity,
+  BarChart3,
+  Clock,
+  FileText,
+  KeyRound,
+  MessageSquare,
+  Package,
+  Settings,
+  Puzzle,
+  Sparkles,
+  Terminal,
+  Globe,
+  Database,
+  Shield,
+  Wrench,
+  Zap,
+  Heart,
+  Star,
+  Code,
+  Eye,
 };
 
-/** Resolve a Lucide icon name to a component, fallback to Puzzle. */
-function resolveIcon(name: string): React.ComponentType<{ className?: string }> {
+function resolveIcon(
+  name: string,
+): React.ComponentType<{ className?: string }> {
   return ICON_MAP[name] ?? Puzzle;
 }
 
-/** Insert plugin nav items at the position specified in their manifest. */
-function buildNavItems(builtIn: NavItem[], plugins: RegisteredPlugin[]): NavItem[] {
+function buildNavItems(
+  builtIn: NavItem[],
+  plugins: RegisteredPlugin[],
+): NavItem[] {
   const items = [...builtIn];
 
   for (const { manifest } of plugins) {
@@ -89,10 +123,6 @@ function buildNavItems(builtIn: NavItem[], plugins: RegisteredPlugin[]): NavItem
   return items;
 }
 
-// ---------------------------------------------------------------------------
-// App
-// ---------------------------------------------------------------------------
-
 export default function App() {
   const { t } = useI18n();
   const { plugins } = usePlugins();
@@ -103,59 +133,101 @@ export default function App() {
   );
 
   return (
-    <div className="flex min-h-screen flex-col bg-background text-foreground overflow-x-hidden">
-      <div className="noise-overlay" />
-      <div className="warm-glow" />
+    <div className="text-midground font-mondwest bg-black min-h-screen flex flex-col uppercase antialiased overflow-x-hidden">
+      <SelectionSwitcher />
+      <Backdrop />
 
-      <header className="fixed top-0 left-0 right-0 z-40 border-b border-border bg-background/90 backdrop-blur-sm">
-        <div className="mx-auto flex h-12 max-w-[1400px] items-stretch">
-          <div className="flex items-center border-r border-border px-3 sm:px-5 shrink-0">
-            <span className="font-collapse text-lg sm:text-xl font-bold tracking-wider uppercase blend-lighter">
-              H<span className="hidden sm:inline">ermes </span>A<span className="hidden sm:inline">gent</span>
-            </span>
-          </div>
+      <header
+        className={cn(
+          "fixed top-0 left-0 right-0 z-40",
+          "border-b border-current/20",
+          "bg-background-base/90 backdrop-blur-sm",
+        )}
+      >
+        <div className="mx-auto flex h-12 max-w-[1600px]">
+          <div className="min-w-0 flex-1 overflow-x-auto scrollbar-none">
+            <Grid
+              className="h-full !border-t-0 !border-b-0"
+              style={{
+                gridTemplateColumns: `auto repeat(${navItems.length}, auto)`,
+              }}
+            >
+              <Cell className="flex items-center !p-0 !px-3 sm:!px-5">
+                <Typography
+                  className="font-bold text-[1.0625rem] sm:text-[1.125rem] leading-[0.95] tracking-[0.0525rem] text-midground"
+                  style={{ mixBlendMode: "plus-lighter" }}
+                >
+                  Hermes
+                  <br />
+                  Agent
+                </Typography>
+              </Cell>
 
-          <nav className="flex items-stretch overflow-x-auto scrollbar-none">
-            {navItems.map(({ path, label, labelKey, icon: Icon }) => (
-              <NavLink
-                key={path}
-                to={path}
-                end={path === "/"}
-                className={({ isActive }) =>
-                  `group relative inline-flex items-center gap-1 sm:gap-1.5 border-r border-border px-2.5 sm:px-4 py-2 font-display text-[0.65rem] sm:text-[0.8rem] tracking-[0.12em] uppercase whitespace-nowrap transition-colors cursor-pointer shrink-0 focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring ${
-                    isActive
-                      ? "text-foreground"
-                      : "text-muted-foreground hover:text-foreground"
-                  }`
-                }
-              >
-                {({ isActive }) => (
-                  <>
-                    <Icon className="h-4 w-4 sm:h-3.5 sm:w-3.5 shrink-0" />
-                    <span className="hidden sm:inline">
-                      {labelKey ? (t.app.nav as Record<string, string>)[labelKey] ?? label : label}
-                    </span>
-                    <span className="absolute inset-0 bg-foreground pointer-events-none transition-opacity duration-150 group-hover:opacity-5 opacity-0" />
-                    {isActive && (
-                      <span className="absolute bottom-0 left-0 right-0 h-px bg-foreground" />
+              {navItems.map(({ path, label, labelKey, icon: Icon }) => (
+                <Cell key={path} className="relative !p-0">
+                  <NavLink
+                    to={path}
+                    end={path === "/"}
+                    className={({ isActive }) =>
+                      cn(
+                        "group relative flex h-full w-full items-center gap-1.5",
+                        "px-2.5 sm:px-4 py-2",
+                        "font-mondwest text-[0.65rem] sm:text-[0.8rem] tracking-[0.12em]",
+                        "whitespace-nowrap transition-colors cursor-pointer",
+                        "focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-midground",
+                        isActive
+                          ? "text-midground"
+                          : "opacity-60 hover:opacity-100",
+                      )
+                    }
+                  >
+                    {({ isActive }) => (
+                      <>
+                        <Icon className="h-3.5 w-3.5 shrink-0" />
+                        <span className="hidden sm:inline">
+                          {labelKey
+                            ? ((t.app.nav as Record<string, string>)[
+                                labelKey
+                              ] ?? label)
+                            : label}
+                        </span>
+
+                        <span
+                          aria-hidden
+                          className="absolute inset-1 bg-midground opacity-0 pointer-events-none transition-opacity duration-200 group-hover:opacity-5"
+                        />
+
+                        {isActive && (
+                          <span
+                            aria-hidden
+                            className="absolute bottom-0 left-0 right-0 h-px bg-midground"
+                            style={{ mixBlendMode: "plus-lighter" }}
+                          />
+                        )}
+                      </>
                     )}
-                  </>
-                )}
-              </NavLink>
-            ))}
-          </nav>
-
-          <div className="ml-auto flex items-center gap-2 px-2 sm:px-4">
-            <ThemeSwitcher />
-            <LanguageSwitcher />
-            <span className="hidden sm:inline font-display text-[0.7rem] tracking-[0.15em] uppercase opacity-50">
-              {t.app.webUi}
-            </span>
+                  </NavLink>
+                </Cell>
+              ))}
+            </Grid>
           </div>
+
+          <Grid className="h-full shrink-0 !border-t-0 !border-b-0">
+            <Cell className="flex items-center gap-2 !p-0 !px-2 sm:!px-4">
+              <ThemeSwitcher />
+              <LanguageSwitcher />
+              <Typography
+                mondwest
+                className="hidden sm:inline text-[0.7rem] tracking-[0.15em] opacity-50"
+              >
+                {t.app.webUi}
+              </Typography>
+            </Cell>
+          </Grid>
         </div>
       </header>
 
-      <main className="relative z-2 mx-auto w-full max-w-[1400px] flex-1 px-3 sm:px-6 pt-16 sm:pt-20 pb-4 sm:pb-8">
+      <main className="relative z-2 mx-auto w-full max-w-[1600px] flex-1 px-3 sm:px-6 pt-16 sm:pt-20 pb-4 sm:pb-8">
         <Routes>
           <Route path="/" element={<StatusPage />} />
           <Route path="/sessions" element={<SessionsPage />} />
@@ -166,7 +238,6 @@ export default function App() {
           <Route path="/config" element={<ConfigPage />} />
           <Route path="/env" element={<EnvPage />} />
 
-          {/* Plugin routes */}
           {plugins.map(({ manifest, component: PluginComponent }) => (
             <Route
               key={manifest.name}
@@ -179,16 +250,34 @@ export default function App() {
         </Routes>
       </main>
 
-      <footer className="relative z-2 border-t border-border">
-        <div className="mx-auto flex max-w-[1400px] items-center justify-between px-3 sm:px-6 py-3">
-          <span className="font-display text-[0.7rem] sm:text-[0.8rem] tracking-[0.12em] uppercase opacity-50">
-            {t.app.footer.name}
-          </span>
-          <span className="font-display text-[0.6rem] sm:text-[0.7rem] tracking-[0.15em] uppercase text-foreground/40">
-            {t.app.footer.org}
-          </span>
-        </div>
+      <footer className="relative z-2 border-t border-current/20">
+        <Grid className="mx-auto max-w-[1600px] !border-t-0 !border-b-0">
+          <Cell className="flex items-center !px-3 sm:!px-6 !py-3">
+            <Typography
+              mondwest
+              className="text-[0.7rem] sm:text-[0.8rem] tracking-[0.12em] opacity-60"
+            >
+              {t.app.footer.name}
+            </Typography>
+          </Cell>
+          <Cell className="flex items-center justify-end !px-3 sm:!px-6 !py-3">
+            <Typography
+              mondwest
+              className="text-[0.6rem] sm:text-[0.7rem] tracking-[0.15em] text-midground"
+              style={{ mixBlendMode: "plus-lighter" }}
+            >
+              {t.app.footer.org}
+            </Typography>
+          </Cell>
+        </Grid>
       </footer>
     </div>
   );
 }
+
+interface NavItem {
+  icon: React.ComponentType<{ className?: string }>;
+  label: string;
+  labelKey?: string;
+  path: string;
+}
diff --git a/web/src/components/Backdrop.tsx b/web/src/components/Backdrop.tsx
new file mode 100644
index 0000000000..a48ab0f262
--- /dev/null
+++ b/web/src/components/Backdrop.tsx
@@ -0,0 +1,77 @@
+import { useGpuTier } from "@nous-research/ui/hooks/use-gpu-tier";
+
+/**
+ * Replicates the visual layer stack of `<Overlays dark />` from
+ * `@nous-research/ui` without pulling in its leva / gsap / three peer deps.
+ *
+ * See `design-language/src/ui/components/overlays/index.tsx` for the source of
+ * truth. Defaults match LENS_0 (the Hermes teal dark preset); the deep canvas
+ * and the warm vignette both read theme-switchable CSS custom properties so
+ * `ThemeProvider` can repaint the stack without remounting.
+ *
+ *   z-1   bg = `var(--background-base)`, mix-blend-mode: difference
+ *   z-2   filler-bg jpeg, inverted, opacity 0.033, difference
+ *   z-99  warm top-left vignette (`var(--warm-glow)`), opacity 0.22, lighten
+ *   z-101 noise grain (SVG, ~55% opacity × `--noise-opacity-mul`,
+ *         color-dodge) — gated on GPU tier
+ *
+ * `useGpuTier` returns 0 when WebGL is unavailable, the renderer is a
+ * software rasterizer (SwiftShader/llvmpipe), or the user has
+ * `prefers-reduced-motion: reduce` set. We skip the animated noise layer
+ * in that case so low-power / accessibility-conscious sessions stay crisp,
+ * mirroring the DS `<Noise />` component's own opt-out.
+ */
+export function Backdrop() {
+  const gpuTier = useGpuTier();
+
+  return (
+    <>
+      <div
+        aria-hidden
+        className="pointer-events-none fixed inset-0 z-[1]"
+        style={{
+          backgroundColor: "var(--background-base)",
+          mixBlendMode: "difference",
+        }}
+      />
+
+      <div
+        aria-hidden
+        className="pointer-events-none fixed inset-0 z-[2]"
+        style={{ mixBlendMode: "difference", opacity: 0.033 }}
+      >
+        <img
+          alt=""
+          className="h-[150dvh] w-auto min-w-[100dvw] object-cover object-top-left invert"
+          fetchPriority="low"
+          src="/ds-assets/filler-bg0.jpg"
+        />
+      </div>
+
+      <div
+        aria-hidden
+        className="pointer-events-none fixed inset-0 z-[99]"
+        style={{
+          background:
+            "radial-gradient(ellipse at 0% 0%, transparent 60%, var(--warm-glow) 100%)",
+          mixBlendMode: "lighten",
+          opacity: 0.22,
+        }}
+      />
+
+      {gpuTier > 0 && (
+        <div
+          aria-hidden
+          className="pointer-events-none fixed inset-0 z-[101]"
+          style={{
+            backgroundImage:
+              "url(\"data:image/svg+xml,%3Csvg viewBox='0 0 512 512' xmlns='http://www.w3.org/2000/svg'%3E%3Cfilter id='n'%3E%3CfeTurbulence type='fractalNoise' baseFrequency='0.85' numOctaves='4' stitchTiles='stitch'/%3E%3C/filter%3E%3Crect width='100%25' height='100%25' fill='%23eaeaea' filter='url(%23n)' opacity='0.6'/%3E%3C/svg%3E\")",
+            backgroundSize: "512px 512px",
+            mixBlendMode: "color-dodge",
+            opacity: "calc(0.55 * var(--noise-opacity-mul, 1))",
+          }}
+        />
+      )}
+    </>
+  );
+}
diff --git a/web/src/components/LanguageSwitcher.tsx b/web/src/components/LanguageSwitcher.tsx
index 02f35a9daa..bf2d300b0c 100644
--- a/web/src/components/LanguageSwitcher.tsx
+++ b/web/src/components/LanguageSwitcher.tsx
@@ -1,3 +1,4 @@
+import { Typography } from "@nous-research/ui";
 import { useI18n } from "@/i18n/context";
 
 /**
@@ -18,10 +19,15 @@ export function LanguageSwitcher() {
       aria-label={t.language.switchTo}
     >
       {/* Show the *current* language's flag — tooltip advertises the click action */}
-      <span className="text-base leading-none">{locale === "en" ? "🇬🇧" : "🇨🇳"}</span>
-      <span className="hidden sm:inline font-display tracking-wide uppercase text-[0.65rem]">
-        {locale === "en" ? "EN" : "中文"}
+      <span className="text-base leading-none">
+        {locale === "en" ? "🇬🇧" : "🇨🇳"}
       </span>
+      <Typography
+        mondwest
+        className="hidden sm:inline tracking-wide uppercase text-[0.65rem]"
+      >
+        {locale === "en" ? "EN" : "中文"}
+      </Typography>
     </button>
   );
 }
diff --git a/web/src/components/OAuthLoginModal.tsx b/web/src/components/OAuthLoginModal.tsx
index e0e756eca7..66c78139ef 100644
--- a/web/src/components/OAuthLoginModal.tsx
+++ b/web/src/components/OAuthLoginModal.tsx
@@ -1,5 +1,6 @@
 import { useEffect, useRef, useState } from "react";
 import { ExternalLink, Copy, X, Check, Loader2 } from "lucide-react";
+import { H2 } from "@nous-research/ui";
 import { api, type OAuthProvider, type OAuthStartResponse } from "@/lib/api";
 import { Button } from "@/components/ui/button";
 import { Input } from "@/components/ui/input";
@@ -12,9 +13,21 @@ interface Props {
   onError: (msg: string) => void;
 }
 
-type Phase = "idle" | "starting" | "awaiting_user" | "submitting" | "polling" | "approved" | "error";
+type Phase =
+  | "idle"
+  | "starting"
+  | "awaiting_user"
+  | "submitting"
+  | "polling"
+  | "approved"
+  | "error";
 
-export function OAuthLoginModal({ provider, onClose, onSuccess, onError }: Props) {
+export function OAuthLoginModal({
+  provider,
+  onClose,
+  onSuccess,
+  onError,
+}: Props) {
   const [phase, setPhase] = useState<Phase>("starting");
   const [start, setStart] = useState<OAuthStartResponse | null>(null);
   const [pkceCode, setPkceCode] = useState("");
@@ -81,13 +94,15 @@ export function OAuthLoginModal({ provider, onClose, onSuccess, onError }: Props
         if (!isMounted.current) return;
         if (resp.status === "approved") {
           setPhase("approved");
-          if (pollTimer.current !== null) window.clearInterval(pollTimer.current);
+          if (pollTimer.current !== null)
+            window.clearInterval(pollTimer.current);
           onSuccess(`${provider.name} connected`);
           window.setTimeout(() => isMounted.current && onClose(), 1500);
         } else if (resp.status !== "pending") {
           setPhase("error");
           setErrorMsg(resp.error_message || `Login ${resp.status}`);
-          if (pollTimer.current !== null) window.clearInterval(pollTimer.current);
+          if (pollTimer.current !== null)
+            window.clearInterval(pollTimer.current);
         }
       } catch (e) {
         if (!isMounted.current) return;
@@ -107,7 +122,11 @@ export function OAuthLoginModal({ provider, onClose, onSuccess, onError }: Props
     setPhase("submitting");
     setErrorMsg(null);
     try {
-      const resp = await api.submitOAuthCode(provider.id, start.session_id, pkceCode.trim());
+      const resp = await api.submitOAuthCode(
+        provider.id,
+        start.session_id,
+        pkceCode.trim(),
+      );
       if (!isMounted.current) return;
       if (resp.ok && resp.status === "approved") {
         setPhase("approved");
@@ -175,14 +194,24 @@ export function OAuthLoginModal({ provider, onClose, onSuccess, onError }: Props
         </button>
         <div className="p-6 flex flex-col gap-4">
           <div>
-            <h2 id="oauth-modal-title" className="font-display text-base tracking-wider uppercase">
+            <H2
+              id="oauth-modal-title"
+              variant="sm"
+              mondwest
+              className="tracking-wider uppercase"
+            >
               {t.oauth.connect} {provider.name}
-            </h2>
-            {secondsLeft !== null && phase !== "approved" && phase !== "error" && (
-              <p className="text-xs text-muted-foreground mt-1">
-                {t.oauth.sessionExpires.replace("{time}", fmtTime(secondsLeft))}
-              </p>
-            )}
+            </H2>
+            {secondsLeft !== null &&
+              phase !== "approved" &&
+              phase !== "error" && (
+                <p className="text-xs text-muted-foreground mt-1">
+                  {t.oauth.sessionExpires.replace(
+                    "{time}",
+                    fmtTime(secondsLeft),
+                  )}
+                </p>
+              )}
           </div>
 
           {/* ── starting ───────────────────────────────────── */}
@@ -211,7 +240,10 @@ export function OAuthLoginModal({ provider, onClose, onSuccess, onError }: Props
                 />
                 <div className="flex items-center gap-2 justify-between">
                   <a
-                    href={(start as Extract<OAuthStartResponse, { flow: "pkce" }>).auth_url}
+                    href={
+                      (start as Extract<OAuthStartResponse, { flow: "pkce" }>)
+                        .auth_url
+                    }
                     target="_blank"
                     rel="noopener noreferrer"
                     className="text-xs text-muted-foreground hover:text-foreground inline-flex items-center gap-1"
@@ -219,7 +251,11 @@ export function OAuthLoginModal({ provider, onClose, onSuccess, onError }: Props
                     <ExternalLink className="h-3 w-3" />
                     {t.oauth.reOpenAuth}
                   </a>
-                  <Button onClick={handleSubmitPkceCode} disabled={!pkceCode.trim()} size="sm">
+                  <Button
+                    onClick={handleSubmitPkceCode}
+                    disabled={!pkceCode.trim()}
+                    size="sm"
+                  >
                     {t.oauth.submitCode}
                   </Button>
                 </div>
@@ -243,23 +279,46 @@ export function OAuthLoginModal({ provider, onClose, onSuccess, onError }: Props
               </p>
               <div className="flex items-center justify-between gap-2 border border-border bg-secondary/30 p-4">
                 <code className="font-mono-ui text-2xl tracking-widest text-foreground">
-                  {(start as Extract<OAuthStartResponse, { flow: "device_code" }>).user_code}
+                  {
+                    (
+                      start as Extract<
+                        OAuthStartResponse,
+                        { flow: "device_code" }
+                      >
+                    ).user_code
+                  }
                 </code>
                 <Button
                   variant="outline"
                   size="sm"
                   onClick={() =>
                     handleCopyUserCode(
-                      (start as Extract<OAuthStartResponse, { flow: "device_code" }>).user_code,
+                      (
+                        start as Extract<
+                          OAuthStartResponse,
+                          { flow: "device_code" }
+                        >
+                      ).user_code,
                     )
                   }
                   className="text-xs"
                 >
-                  {codeCopied ? <Check className="h-3 w-3" /> : <Copy className="h-3 w-3" />}
+                  {codeCopied ? (
+                    <Check className="h-3 w-3" />
+                  ) : (
+                    <Copy className="h-3 w-3" />
+                  )}
                 </Button>
               </div>
               <a
-                href={(start as Extract<OAuthStartResponse, { flow: "device_code" }>).verification_url}
+                href={
+                  (
+                    start as Extract<
+                      OAuthStartResponse,
+                      { flow: "device_code" }
+                    >
+                  ).verification_url
+                }
                 target="_blank"
                 rel="noopener noreferrer"
                 className="text-xs text-muted-foreground hover:text-foreground inline-flex items-center gap-1"
@@ -302,21 +361,36 @@ export function OAuthLoginModal({ provider, onClose, onSuccess, onError }: Props
                     setStart(null);
                     setPkceCode("");
                     setPhase("starting");
-                    api.startOAuthLogin(provider.id).then((resp) => {
-                      if (!isMounted.current) return;
-                      setStart(resp);
-                      setSecondsLeft(resp.expires_in);
-                      setPhase(resp.flow === "device_code" ? "polling" : "awaiting_user");
-                      if (resp.flow === "pkce") {
-                        window.open(resp.auth_url, "_blank", "noopener,noreferrer");
-                      } else {
-                        window.open(resp.verification_url, "_blank", "noopener,noreferrer");
-                      }
-                    }).catch((e) => {
-                      if (!isMounted.current) return;
-                      setPhase("error");
-                      setErrorMsg(`${t.common.retry} failed: ${e}`);
-                    });
+                    api
+                      .startOAuthLogin(provider.id)
+                      .then((resp) => {
+                        if (!isMounted.current) return;
+                        setStart(resp);
+                        setSecondsLeft(resp.expires_in);
+                        setPhase(
+                          resp.flow === "device_code"
+                            ? "polling"
+                            : "awaiting_user",
+                        );
+                        if (resp.flow === "pkce") {
+                          window.open(
+                            resp.auth_url,
+                            "_blank",
+                            "noopener,noreferrer",
+                          );
+                        } else {
+                          window.open(
+                            resp.verification_url,
+                            "_blank",
+                            "noopener,noreferrer",
+                          );
+                        }
+                      })
+                      .catch((e) => {
+                        if (!isMounted.current) return;
+                        setPhase("error");
+                        setErrorMsg(`${t.common.retry} failed: ${e}`);
+                      });
                   }}
                 >
                   {t.common.retry}
diff --git a/web/src/components/OAuthProvidersCard.tsx b/web/src/components/OAuthProvidersCard.tsx
index a681218ded..940848787d 100644
--- a/web/src/components/OAuthProvidersCard.tsx
+++ b/web/src/components/OAuthProvidersCard.tsx
@@ -158,11 +158,11 @@ export function OAuthProvidersCard({ onError, onSuccess }: Props) {
                       )}
                     </div>
                     {p.status.logged_in && p.status.token_preview && (
-                      <code className="text-xs text-muted-foreground font-mono-ui truncate">
-                        token{" "}
-                        <span className="text-foreground">{p.status.token_preview}</span>
+                      <code className="text-xs font-mono-ui truncate">
+                        <span className="opacity-50">token{" "}</span>
+                        {p.status.token_preview}
                         {p.status.source_label && (
-                          <span className="text-muted-foreground/70">
+                          <span className="opacity-40">
                             {" "}· {p.status.source_label}
                           </span>
                         )}
diff --git a/web/src/components/ThemeSwitcher.tsx b/web/src/components/ThemeSwitcher.tsx
index 03801bebf5..b3475bf460 100644
--- a/web/src/components/ThemeSwitcher.tsx
+++ b/web/src/components/ThemeSwitcher.tsx
@@ -1,53 +1,58 @@
-import { useState, useRef, useEffect, useCallback } from "react";
+import { useCallback, useEffect, useRef, useState } from "react";
 import { Palette, Check } from "lucide-react";
-import { useTheme } from "@/themes";
+import { Typography } from "@nous-research/ui";
+import { BUILTIN_THEMES, useTheme } from "@/themes";
 import { useI18n } from "@/i18n";
 import { cn } from "@/lib/utils";
 
 /**
- * Compact theme picker for the dashboard header.
- * Shows a palette icon + current theme name; opens a dropdown of all
- * available themes with color swatches for instant preview.
+ * Compact theme picker mounted next to the language switcher in the header.
+ * Each dropdown row shows a 3-stop swatch (background / midground / warm
+ * glow) so users can preview the palette before committing. User-defined
+ * themes from `~/.hermes/dashboard-themes/*.yaml` that aren't in
+ * `BUILTIN_THEMES` render without swatches and apply the default palette.
  */
 export function ThemeSwitcher() {
   const { themeName, availableThemes, setTheme } = useTheme();
   const { t } = useI18n();
   const [open, setOpen] = useState(false);
-  const ref = useRef<HTMLDivElement>(null);
+  const wrapperRef = useRef<HTMLDivElement>(null);
 
   const close = useCallback(() => setOpen(false), []);
 
-  // Close on outside click.
   useEffect(() => {
     if (!open) return;
-    const handler = (e: MouseEvent) => {
-      if (ref.current && !ref.current.contains(e.target as Node)) close();
+    const onMouseDown = (e: MouseEvent) => {
+      if (
+        wrapperRef.current &&
+        !wrapperRef.current.contains(e.target as Node)
+      ) {
+        close();
+      }
     };
-    document.addEventListener("mousedown", handler);
-    return () => document.removeEventListener("mousedown", handler);
-  }, [open, close]);
-
-  // Close on Escape.
-  useEffect(() => {
-    if (!open) return;
-    const handler = (e: KeyboardEvent) => {
+    const onKey = (e: KeyboardEvent) => {
       if (e.key === "Escape") close();
     };
-    document.addEventListener("keydown", handler);
-    return () => document.removeEventListener("keydown", handler);
+    document.addEventListener("mousedown", onMouseDown);
+    document.addEventListener("keydown", onKey);
+    return () => {
+      document.removeEventListener("mousedown", onMouseDown);
+      document.removeEventListener("keydown", onKey);
+    };
   }, [open, close]);
 
-  const current = availableThemes.find((t) => t.name === themeName);
+  const current = availableThemes.find((th) => th.name === themeName);
+  const label = current?.label ?? themeName;
 
   return (
-    <div ref={ref} className="relative">
+    <div ref={wrapperRef} className="relative">
       <button
         type="button"
         onClick={() => setOpen((o) => !o)}
         className={cn(
           "group relative inline-flex items-center gap-1.5 px-2 py-1 text-xs",
-          "text-muted-foreground hover:text-foreground transition-colors",
-          "cursor-pointer focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring",
+          "text-muted-foreground hover:text-foreground transition-colors cursor-pointer",
+          "focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-midground",
         )}
         title={t.theme?.switchTheme ?? "Switch theme"}
         aria-label={t.theme?.switchTheme ?? "Switch theme"}
@@ -55,56 +60,79 @@ export function ThemeSwitcher() {
         aria-haspopup="listbox"
       >
         <Palette className="h-3.5 w-3.5" />
-        <span className="hidden sm:inline font-display tracking-wide uppercase text-[0.65rem]">
-          {current?.label ?? themeName}
-        </span>
+        <Typography
+          mondwest
+          className="hidden sm:inline tracking-wide uppercase text-[0.65rem]"
+        >
+          {label}
+        </Typography>
       </button>
 
       {open && (
         <div
           role="listbox"
+          aria-label={t.theme?.title ?? "Theme"}
           className={cn(
-            "absolute right-0 top-full mt-1 z-50 min-w-[200px]",
-            "border border-border bg-popover text-popover-foreground shadow-lg",
-            "animate-[fade-in_100ms_ease-out]",
+            "absolute right-0 top-full mt-1 z-50 min-w-[240px]",
+            "border border-current/20 bg-background-base/95 backdrop-blur-sm",
+            "shadow-[0_12px_32px_-8px_rgba(0,0,0,0.6)]",
           )}
         >
-          <div className="px-3 py-2 border-b border-border">
-            <span className="font-display text-[0.7rem] tracking-[0.12em] uppercase text-muted-foreground">
+          <div className="border-b border-current/20 px-3 py-2">
+            <Typography
+              mondwest
+              className="text-[0.65rem] tracking-[0.15em] uppercase text-midground/70"
+            >
               {t.theme?.title ?? "Theme"}
-            </span>
+            </Typography>
           </div>
 
-          {availableThemes.map((theme) => {
-            const isActive = theme.name === themeName;
+          {availableThemes.map((th) => {
+            const isActive = th.name === themeName;
+            const preset = BUILTIN_THEMES[th.name];
+
             return (
               <button
-                key={theme.name}
+                key={th.name}
                 type="button"
                 role="option"
                 aria-selected={isActive}
                 onClick={() => {
-                  setTheme(theme.name);
+                  setTheme(th.name);
                   close();
                 }}
                 className={cn(
-                  "flex w-full items-center gap-2.5 px-3 py-2 text-left text-sm transition-colors cursor-pointer",
-                  "hover:bg-foreground/10",
-                  isActive ? "text-foreground" : "text-muted-foreground",
+                  "flex w-full items-center gap-3 px-3 py-2 text-left transition-colors cursor-pointer",
+                  "hover:bg-midground/10",
+                  isActive ? "text-midground" : "text-midground/60",
                 )}
               >
+                {preset ? (
+                  <ThemeSwatch theme={preset.name} />
+                ) : (
+                  <PlaceholderSwatch />
+                )}
+
+                <div className="flex min-w-0 flex-1 flex-col gap-0.5">
+                  <Typography
+                    mondwest
+                    className="truncate text-[0.75rem] tracking-wide uppercase"
+                  >
+                    {th.label}
+                  </Typography>
+                  {th.description && (
+                    <Typography className="truncate text-[0.65rem] normal-case tracking-normal text-midground/50">
+                      {th.description}
+                    </Typography>
+                  )}
+                </div>
+
                 <Check
                   className={cn(
-                    "h-3 w-3 shrink-0",
+                    "h-3 w-3 shrink-0 text-midground",
                     isActive ? "opacity-100" : "opacity-0",
                   )}
                 />
-                <div className="flex flex-col gap-0.5 min-w-0">
-                  <span className="font-medium text-xs truncate">{theme.label}</span>
-                  <span className="text-[0.65rem] text-muted-foreground truncate">
-                    {theme.description}
-                  </span>
-                </div>
               </button>
             );
           })}
@@ -113,3 +141,28 @@ export function ThemeSwitcher() {
     </div>
   );
 }
+
+function ThemeSwatch({ theme }: { theme: string }) {
+  const preset = BUILTIN_THEMES[theme];
+  if (!preset) return <PlaceholderSwatch />;
+  const { background, midground, warmGlow } = preset.palette;
+  return (
+    <div
+      aria-hidden
+      className="flex h-4 w-9 shrink-0 overflow-hidden border border-current/20"
+    >
+      <span className="flex-1" style={{ background: background.hex }} />
+      <span className="flex-1" style={{ background: midground.hex }} />
+      <span className="flex-1" style={{ background: warmGlow }} />
+    </div>
+  );
+}
+
+function PlaceholderSwatch() {
+  return (
+    <div
+      aria-hidden
+      className="h-4 w-9 shrink-0 border border-dashed border-current/20"
+    />
+  );
+}
diff --git a/web/src/components/ui/button.tsx b/web/src/components/ui/button.tsx
index 38ca71017d..f8e10a6cf4 100644
--- a/web/src/components/ui/button.tsx
+++ b/web/src/components/ui/button.tsx
@@ -2,7 +2,7 @@ import { cva, type VariantProps } from "class-variance-authority";
 import { cn } from "@/lib/utils";
 
 const buttonVariants = cva(
-  "inline-flex items-center justify-center gap-2 whitespace-nowrap font-display text-xs tracking-[0.1em] uppercase transition-colors cursor-pointer"
+  "inline-flex items-center justify-center gap-2 whitespace-nowrap font-mondwest text-xs tracking-[0.1em] uppercase transition-colors cursor-pointer"
   + " disabled:pointer-events-none disabled:opacity-50",
   {
     variants: {
diff --git a/web/src/components/ui/card.tsx b/web/src/components/ui/card.tsx
index d83b786bbd..cebe9e604a 100644
--- a/web/src/components/ui/card.tsx
+++ b/web/src/components/ui/card.tsx
@@ -21,7 +21,7 @@ export function CardTitle({ className, ...props }: React.HTMLAttributes<HTMLHead
 }
 
 export function CardDescription({ className, ...props }: React.HTMLAttributes<HTMLParagraphElement>) {
-  return <p className={cn("font-display text-xs text-muted-foreground", className)} {...props} />;
+  return <p className={cn("font-mondwest text-xs text-muted-foreground", className)} {...props} />;
 }
 
 export function CardContent({ className, ...props }: React.HTMLAttributes<HTMLDivElement>) {
diff --git a/web/src/components/ui/label.tsx b/web/src/components/ui/label.tsx
index a18b2e5d43..a5807e4bd4 100644
--- a/web/src/components/ui/label.tsx
+++ b/web/src/components/ui/label.tsx
@@ -4,7 +4,7 @@ export function Label({ className, ...props }: React.LabelHTMLAttributes<HTMLLab
   return (
     <label
       className={cn(
-        "font-display text-xs tracking-[0.1em] uppercase leading-none peer-disabled:cursor-not-allowed peer-disabled:opacity-70",
+        "font-mondwest text-xs tracking-[0.1em] uppercase leading-none peer-disabled:cursor-not-allowed peer-disabled:opacity-70",
         className,
       )}
       {...props}
diff --git a/web/src/components/ui/tabs.tsx b/web/src/components/ui/tabs.tsx
index 039af02f3e..ffc2e36a7a 100644
--- a/web/src/components/ui/tabs.tsx
+++ b/web/src/components/ui/tabs.tsx
@@ -37,7 +37,7 @@ export function TabsTrigger({
     <button
       type="button"
       className={cn(
-        "relative inline-flex items-center justify-center whitespace-nowrap px-3 py-1.5 font-display text-xs tracking-[0.1em] uppercase transition-all cursor-pointer",
+        "relative inline-flex items-center justify-center whitespace-nowrap px-3 py-1.5 font-mondwest text-xs tracking-[0.1em] uppercase transition-all cursor-pointer",
         "focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring",
         active
           ? "text-foreground after:absolute after:bottom-0 after:left-0 after:right-0 after:h-px after:bg-foreground"
diff --git a/web/src/i18n/en.ts b/web/src/i18n/en.ts
index 07e9319950..90b4aae630 100644
--- a/web/src/i18n/en.ts
+++ b/web/src/i18n/en.ts
@@ -65,27 +65,36 @@ export const en: Translations = {
   },
 
   status: {
+    actionFailed: "Action failed",
+    actionFinished: "Finished",
+    actions: "Actions",
     agent: "Agent",
-    gateway: "Gateway",
     activeSessions: "Active Sessions",
-    recentSessions: "Recent Sessions",
-    connectedPlatforms: "Connected Platforms",
-    running: "Running",
-    starting: "Starting",
-    failed: "Failed",
-    stopped: "Stopped",
     connected: "Connected",
+    connectedPlatforms: "Connected Platforms",
     disconnected: "Disconnected",
     error: "Error",
-    notRunning: "Not running",
-    startFailed: "Start failed",
-    pid: "PID",
-    runningRemote: "Running (remote)",
-    noneRunning: "None",
+    failed: "Failed",
+    gateway: "Gateway",
     gatewayFailedToStart: "Gateway failed to start",
     lastUpdate: "Last update",
-    platformError: "error",
+    noneRunning: "None",
+    notRunning: "Not running",
+    pid: "PID",
     platformDisconnected: "disconnected",
+    platformError: "error",
+    recentSessions: "Recent Sessions",
+    restartGateway: "Restart Gateway",
+    restartingGateway: "Restarting gateway…",
+    running: "Running",
+    runningRemote: "Running (remote)",
+    startFailed: "Start failed",
+    starting: "Starting",
+    startedInBackground: "Started in background — check logs for progress",
+    stopped: "Stopped",
+    updateHermes: "Update Hermes",
+    updatingHermes: "Updating Hermes…",
+    waitingForOutput: "Waiting for output…",
   },
 
   sessions: {
@@ -115,6 +124,11 @@ export const en: Translations = {
     dailyTokenUsage: "Daily Token Usage",
     dailyBreakdown: "Daily Breakdown",
     perModelBreakdown: "Per-Model Breakdown",
+    topSkills: "Top Skills",
+    skill: "Skill",
+    loads: "Agent Loaded",
+    edits: "Agent Managed",
+    lastUsed: "Last Used",
     input: "Input",
     output: "Output",
     total: "Total",
diff --git a/web/src/i18n/types.ts b/web/src/i18n/types.ts
index 55f5cffc40..1e16ee9f64 100644
--- a/web/src/i18n/types.ts
+++ b/web/src/i18n/types.ts
@@ -68,27 +68,36 @@ export interface Translations {
 
   // ── Status page ──
   status: {
+    actionFailed: string;
+    actionFinished: string;
+    actions: string;
     agent: string;
-    gateway: string;
-    activeSessions: string;
-    recentSessions: string;
-    connectedPlatforms: string;
-    running: string;
-    starting: string;
-    failed: string;
-    stopped: string;
     connected: string;
+    connectedPlatforms: string;
     disconnected: string;
     error: string;
-    notRunning: string;
-    startFailed: string;
-    pid: string;
-    runningRemote: string;
-    noneRunning: string;
+    failed: string;
+    gateway: string;
     gatewayFailedToStart: string;
     lastUpdate: string;
-    platformError: string;
+    noneRunning: string;
+    notRunning: string;
+    pid: string;
     platformDisconnected: string;
+    platformError: string;
+    activeSessions: string;
+    recentSessions: string;
+    restartGateway: string;
+    restartingGateway: string;
+    running: string;
+    runningRemote: string;
+    startFailed: string;
+    starting: string;
+    startedInBackground: string;
+    stopped: string;
+    updateHermes: string;
+    updatingHermes: string;
+    waitingForOutput: string;
   };
 
   // ── Sessions page ──
@@ -120,6 +129,11 @@ export interface Translations {
     dailyTokenUsage: string;
     dailyBreakdown: string;
     perModelBreakdown: string;
+    topSkills: string;
+    skill: string;
+    loads: string;
+    edits: string;
+    lastUsed: string;
     input: string;
     output: string;
     total: string;
diff --git a/web/src/i18n/zh.ts b/web/src/i18n/zh.ts
index 869ec9ed94..a6f3c067f1 100644
--- a/web/src/i18n/zh.ts
+++ b/web/src/i18n/zh.ts
@@ -65,27 +65,36 @@ export const zh: Translations = {
   },
 
   status: {
+    actionFailed: "操作失败",
+    actionFinished: "已完成",
+    actions: "操作",
     agent: "代理",
-    gateway: "网关",
     activeSessions: "活跃会话",
-    recentSessions: "最近会话",
-    connectedPlatforms: "已连接平台",
-    running: "运行中",
-    starting: "启动中",
-    failed: "失败",
-    stopped: "已停止",
     connected: "已连接",
+    connectedPlatforms: "已连接平台",
     disconnected: "已断开",
     error: "错误",
-    notRunning: "未运行",
-    startFailed: "启动失败",
-    pid: "进程",
-    runningRemote: "运行中（远程）",
-    noneRunning: "无",
+    failed: "失败",
+    gateway: "网关",
     gatewayFailedToStart: "网关启动失败",
     lastUpdate: "最后更新",
-    platformError: "错误",
+    noneRunning: "无",
+    notRunning: "未运行",
+    pid: "进程",
     platformDisconnected: "已断开",
+    platformError: "错误",
+    recentSessions: "最近会话",
+    restartGateway: "重启网关",
+    restartingGateway: "正在重启网关…",
+    running: "运行中",
+    runningRemote: "运行中（远程）",
+    startFailed: "启动失败",
+    starting: "启动中",
+    startedInBackground: "已在后台启动 — 请查看日志",
+    stopped: "已停止",
+    updateHermes: "更新 Hermes",
+    updatingHermes: "正在更新 Hermes…",
+    waitingForOutput: "等待输出…",
   },
 
   sessions: {
@@ -115,6 +124,11 @@ export const zh: Translations = {
     dailyTokenUsage: "每日 Token 用量",
     dailyBreakdown: "每日明细",
     perModelBreakdown: "模型用量明细",
+    topSkills: "常用技能",
+    skill: "技能",
+    loads: "代理加载",
+    edits: "代理管理",
+    lastUsed: "最近使用",
     input: "输入",
     output: "输出",
     total: "总计",
diff --git a/web/src/index.css b/web/src/index.css
index 72ea606129..b602361e2e 100644
--- a/web/src/index.css
+++ b/web/src/index.css
@@ -1,132 +1,74 @@
-@import "tailwindcss";
+@import 'tailwindcss';
+@import '@nous-research/ui/styles/globals.css';
+
+/* Scan the published design-system bundle so its utility classes survive
+   Tailwind's JIT purge. */
+@source '../node_modules/@nous-research/ui/dist';
 
 /* ------------------------------------------------------------------ */
-/* Hermes Agent — Design tokens                                        */
-/* Matched to hermes-agent.nousresearch.com (dark teal theme)          */
+/* Hermes Agent — Nous DS with the LENS_0 (Hermes teal) lens applied   */
+/* statically. Mirrors nousnet-web/(hermes-agent)/layout.tsx so the    */
+/* canonical Hermes palette is the default — teal canvas + cream      */
+/* accent — without relying on leva/gsap at runtime.                  */
 /* ------------------------------------------------------------------ */
 
-/* --- Font faces --- */
-@font-face { font-family: "Collapse"; src: url("/fonts/Collapse-Regular.woff2") format("woff2"); font-weight: 400; font-display: swap; }
-@font-face { font-family: "Collapse"; src: url("/fonts/Collapse-Bold.woff2") format("woff2"); font-weight: 700; font-display: swap; }
-@font-face { font-family: "Courier Prime"; src: url("/fonts/CourierPrime-Regular.woff2") format("woff2"); font-weight: 400; font-display: swap; }
-@font-face { font-family: "Courier Prime"; src: url("/fonts/CourierPrime-Bold.woff2") format("woff2"); font-weight: 700; font-display: swap; }
-@font-face { font-family: "RulesCompressed"; src: url("/fonts/RulesCompressed-Regular.woff2") format("woff2"); font-weight: 400; font-display: swap; }
-@font-face { font-family: "RulesCompressed"; src: url("/fonts/RulesCompressed-Medium.woff2") format("woff2"); font-weight: 600; font-display: swap; }
-@font-face { font-family: "RulesExpanded"; src: url("/fonts/RulesExpanded-Regular.woff2") format("woff2"); font-weight: 400; font-display: swap; }
-@font-face { font-family: "RulesExpanded"; src: url("/fonts/RulesExpanded-Bold.woff2") format("woff2"); font-weight: 700; font-display: swap; }
-@font-face { font-family: "Mondwest"; src: url("/fonts/Mondwest-Regular.woff2") format("woff2"); font-weight: 400; font-display: swap; }
+:root {
+  /* LENS_0 — from design-language/src/ui/components/overlays/index.tsx.
+     These are the defaults for the `default` (Hermes Teal) dashboard theme;
+     ThemeProvider rewrites them as inline styles when a user switches themes. */
+  --foreground: color-mix(in srgb, #ffffff 0%, transparent);
+  --foreground-base: #ffffff;
+  --foreground-alpha: 0;
+  --midground: color-mix(in srgb, #ffe6cb 100%, transparent);
+  --midground-base: #ffe6cb;
+  --midground-alpha: 1;
+  --background: color-mix(in srgb, #041c1c 100%, transparent);
+  --background-base: #041c1c;
+  --background-alpha: 1;
 
-@theme {
-  /* ---- Hermes palette (dark teal, from live site) ---- */
-  --color-background: #041C1C;
-  --color-foreground: #ffe6cb;
-  --color-card: #062424;
-  --color-card-foreground: #ffe6cb;
-  --color-primary: #ffe6cb;
-  --color-primary-foreground: #041C1C;
-  --color-secondary: #0a2e2e;
-  --color-secondary-foreground: #ffe6cb;
-  --color-muted: #083030;
-  --color-muted-foreground: #8aaa9a;
-  --color-accent: #0c3838;
-  --color-accent-foreground: #ffe6cb;
+  /* Consumed by <Backdrop />; also theme-switchable. */
+  --warm-glow: rgba(255, 189, 56, 0.35);
+  --noise-opacity-mul: 1;
+}
+
+/* Nousnet's hermes-agent layout bumps `small` and `code` to readable
+   dashboard sizes. Keep in sync. */
+small { font-size: 1.0625rem; }
+code { font-size: 0.875rem; }
+
+/* Shadcn-compat tokens.
+   The dashboard's page code predates the Nous DS and uses shadcn-style
+   utility classes (bg-card, text-muted-foreground, border-border, etc.)
+   extensively. Rather than rewrite every call site, we expose those
+   tokens on top of the Nous palette so classes continue to resolve. */
+@theme inline {
+  /* Remap foreground to midground so `text-foreground` / `bg-foreground`
+     stay visible — in LENS_0, `--foreground` itself has alpha 0. */
+  --color-foreground: var(--midground);
+
+  --color-card: color-mix(in srgb, var(--midground-base) 4%, var(--background-base));
+  --color-card-foreground: var(--midground);
+  --color-primary: var(--midground);
+  --color-primary-foreground: var(--background-base);
+  --color-secondary: color-mix(in srgb, var(--midground-base) 6%, var(--background-base));
+  --color-secondary-foreground: var(--midground);
+  --color-muted: color-mix(in srgb, var(--midground-base) 8%, var(--background-base));
+  --color-muted-foreground: color-mix(in srgb, var(--midground-base) 55%, transparent);
+  --color-accent: color-mix(in srgb, var(--midground-base) 10%, var(--background-base));
+  --color-accent-foreground: var(--midground);
   --color-destructive: #fb2c36;
-  --color-destructive-foreground: #fff;
+  --color-destructive-foreground: #ffffff;
   --color-success: #4ade80;
   --color-warning: #ffbd38;
-  --color-border: color-mix(in srgb, #ffe6cb 15%, transparent);
-  --color-input: color-mix(in srgb, #ffe6cb 15%, transparent);
-  --color-ring: #ffe6cb;
-  --color-popover: #062424;
-  --color-popover-foreground: #ffe6cb;
-
-  /* ---- Font stacks ---- */
-  --font-sans: "Mondwest", Arial, sans-serif;
-  --font-mono: "Courier Prime", "Courier New", monospace;
-  --font-display: "Mondwest", Arial, sans-serif;
-  --font-expanded: "RulesExpanded", Arial, sans-serif;
-  --font-compressed: "RulesCompressed", Arial, sans-serif;
+  --color-border: color-mix(in srgb, var(--midground-base) 15%, transparent);
+  --color-input: color-mix(in srgb, var(--midground-base) 15%, transparent);
+  --color-ring: var(--midground);
+  --color-popover: color-mix(in srgb, var(--midground-base) 4%, var(--background-base));
+  --color-popover-foreground: var(--midground);
 }
 
-/* ---- Global body ---- */
-body {
-  margin: 0;
-  font-family: "Mondwest", Arial, sans-serif;
-  background: var(--color-background);
-  color: var(--color-foreground);
-  -webkit-font-smoothing: antialiased;
-  -moz-osx-font-smoothing: grayscale;
-  text-rendering: optimizeLegibility;
-}
 
-/* ---- Selection ---- */
-::selection {
-  background: var(--color-foreground);
-  color: var(--color-background);
-}
-
-/* ---- Scrollbars (thin, subtle) ---- */
-* {
-  scrollbar-width: thin;
-  scrollbar-color: transparent transparent;
-}
-*:hover {
-  scrollbar-color: color-mix(in srgb, var(--color-foreground) 15%, transparent) transparent;
-}
-html, body {
-  overflow-x: hidden;
-  scrollbar-color: color-mix(in srgb, var(--color-foreground) 25%, transparent) transparent;
-}
-::-webkit-scrollbar { width: 4px; height: 4px; }
-::-webkit-scrollbar-track { background: transparent; }
-::-webkit-scrollbar-thumb {
-  background: color-mix(in srgb, var(--color-foreground) 20%, transparent);
-}
-::-webkit-scrollbar-thumb:hover {
-  background: color-mix(in srgb, var(--color-foreground) 35%, transparent);
-}
-
-/* ---- Hide scrollbar utility ---- */
-.scrollbar-none {
-  -ms-overflow-style: none;
-  scrollbar-width: none;
-}
-.scrollbar-none::-webkit-scrollbar {
-  display: none;
-}
-
-/* ---- Code blocks ---- */
-code {
-  font-family: "Courier Prime", "Courier New", monospace;
-  font-size: 0.85em;
-  padding: 0.15em 0.4em;
-  border-radius: 0;
-  background: color-mix(in srgb, var(--color-foreground) 8%, transparent);
-}
-
-/* ---- Dither texture ---- */
-.dither {
-  background: repeating-conic-gradient(currentColor 0% 25%, #0000 0% 50%) 0 0 / 2px 2px;
-}
-
-/* ---- Blink cursor (only on group hover, like canonical) ---- */
-@keyframes blink {
-  0%, 100% { opacity: 1; }
-  50% { opacity: 0; }
-}
-.blink {
-  display: none;
-}
-.group:hover .blink {
-  display: inline-block;
-  animation: blink 1s step-end infinite;
-}
-
-/* ---- Page transitions ---- */
-@keyframes fade-in {
-  from { opacity: 0; transform: translateY(4px); }
-  to   { opacity: 1; transform: translateY(0); }
-}
+/* Toast animations used by `components/Toast.tsx`. */
 @keyframes toast-in {
   from { opacity: 0; transform: translateX(16px); }
   to   { opacity: 1; transform: translateX(0); }
@@ -136,62 +78,38 @@ code {
   to   { opacity: 0; transform: translateX(16px); }
 }
 
-/* ---- Plus-lighter blend for headings ---- */
+/* Hide scrollbar utility — used by the header's overflow-x nav row. */
+.scrollbar-none {
+  -ms-overflow-style: none;
+  scrollbar-width: none;
+}
+.scrollbar-none::-webkit-scrollbar {
+  display: none;
+}
+
+/* Plus-lighter blend used by logos/titles for a subtle glow. */
 .blend-lighter {
   mix-blend-mode: plus-lighter;
 }
 
-/* ---- Font utilities ---- */
-.font-display { font-family: "Mondwest", Arial, sans-serif; }
-.font-expanded { font-family: "RulesExpanded", Arial, sans-serif; }
-.font-compressed { font-family: "RulesCompressed", Arial, sans-serif; }
-.font-courier { font-family: "Courier Prime", "Courier New", monospace; }
-.font-collapse { font-family: "Collapse", Arial, sans-serif; }
-.font-mono-ui { font-family: ui-monospace, "SF Mono", "Cascadia Mono", Menlo, monospace; }
+/* System UI-monospace stack — distinct from `font-courier` (Courier
+   Prime), used for dense data readouts where the display font would
+   break the grid. */
+.font-mono-ui {
+  font-family: ui-monospace, 'SF Mono', 'Cascadia Mono', Menlo, monospace;
+}
 
-/* ---- Subtle grain overlay for badges ---- */
+/* Subtle grain overlay for badges. */
 .grain {
   position: relative;
 }
 .grain::after {
-  content: "";
+  content: '';
   position: absolute;
   inset: 0;
   opacity: 0.12;
   pointer-events: none;
-  background: repeating-conic-gradient(currentColor 0% 25%, #0000 0% 50%) 0 0 / 2px 2px;
+  background: repeating-conic-gradient(currentColor 0% 25%, #0000 0% 50%) 0 0 /
+    2px 2px;
 }
 
-/* ---- Global noise grain (canonical: color-dodge, #eaeaea, high density) ---- */
-.noise-overlay {
-  pointer-events: none;
-  position: fixed;
-  inset: 0;
-  z-index: 101;
-  mix-blend-mode: color-dodge;
-  opacity: 0.10;
-  background-image: url("data:image/svg+xml,%3Csvg viewBox='0 0 512 512' xmlns='http://www.w3.org/2000/svg'%3E%3Cfilter id='n'%3E%3CfeTurbulence type='fractalNoise' baseFrequency='0.85' numOctaves='4' stitchTiles='stitch'/%3E%3C/filter%3E%3Crect width='100%25' height='100%25' fill='%23eaeaea' filter='url(%23n)' opacity='0.6'/%3E%3C/svg%3E");
-  background-size: 512px 512px;
-}
-
-/* ---- Vignette (canonical: top-left amber radial, lighten blend) ---- */
-.warm-glow {
-  pointer-events: none;
-  position: fixed;
-  inset: 0;
-  z-index: 99;
-  mix-blend-mode: lighten;
-  opacity: 0.22;
-  background: radial-gradient(ellipse at 0% 0%, rgba(255,189,56,0.35) 0%, rgba(255,189,56,0) 60%);
-}
-
-/* ---- Reduced motion ---- */
-@media (prefers-reduced-motion: reduce) {
-  *,
-  *::before,
-  *::after {
-    animation-duration: 0.01ms !important;
-    animation-iteration-count: 1 !important;
-    transition-duration: 0.01ms !important;
-  }
-}
diff --git a/web/src/lib/api.ts b/web/src/lib/api.ts
index c8bee0408d..04951c02b7 100644
--- a/web/src/lib/api.ts
+++ b/web/src/lib/api.ts
@@ -183,23 +183,47 @@ export const api = {
     );
   },
 
-  // Dashboard themes
-  getThemes: () =>
-    fetchJSON<ThemeListResponse>("/api/dashboard/themes"),
-  setTheme: (name: string) =>
-    fetchJSON<{ ok: boolean; theme: string }>("/api/dashboard/theme", {
-      method: "PUT",
-      headers: { "Content-Type": "application/json" },
-      body: JSON.stringify({ name }),
-    }),
+  // Gateway / update actions
+  restartGateway: () =>
+    fetchJSON<ActionResponse>("/api/gateway/restart", { method: "POST" }),
+  updateHermes: () =>
+    fetchJSON<ActionResponse>("/api/hermes/update", { method: "POST" }),
+  getActionStatus: (name: string, lines = 200) =>
+    fetchJSON<ActionStatusResponse>(
+      `/api/actions/${encodeURIComponent(name)}/status?lines=${lines}`,
+    ),
 
   // Dashboard plugins
   getPlugins: () =>
     fetchJSON<PluginManifestResponse[]>("/api/dashboard/plugins"),
   rescanPlugins: () =>
     fetchJSON<{ ok: boolean; count: number }>("/api/dashboard/plugins/rescan"),
+
+  // Dashboard themes
+  getThemes: () =>
+    fetchJSON<DashboardThemesResponse>("/api/dashboard/themes"),
+  setTheme: (name: string) =>
+    fetchJSON<{ ok: boolean; theme: string }>("/api/dashboard/theme", {
+      method: "PUT",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ name }),
+    }),
 };
 
+export interface ActionResponse {
+  name: string;
+  ok: boolean;
+  pid: number;
+}
+
+export interface ActionStatusResponse {
+  exit_code: number | null;
+  lines: string[];
+  name: string;
+  pid: number | null;
+  running: boolean;
+}
+
 export interface PlatformStatus {
   error_code?: string;
   error_message?: string;
@@ -290,6 +314,7 @@ export interface AnalyticsDailyEntry {
   estimated_cost: number;
   actual_cost: number;
   sessions: number;
+  api_calls: number;
 }
 
 export interface AnalyticsModelEntry {
@@ -298,6 +323,23 @@ export interface AnalyticsModelEntry {
   output_tokens: number;
   estimated_cost: number;
   sessions: number;
+  api_calls: number;
+}
+
+export interface AnalyticsSkillEntry {
+  skill: string;
+  view_count: number;
+  manage_count: number;
+  total_count: number;
+  percentage: number;
+  last_used_at: number | null;
+}
+
+export interface AnalyticsSkillsSummary {
+  total_skill_loads: number;
+  total_skill_edits: number;
+  total_skill_actions: number;
+  distinct_skills_used: number;
 }
 
 export interface AnalyticsResponse {
@@ -311,6 +353,11 @@ export interface AnalyticsResponse {
     total_estimated_cost: number;
     total_actual_cost: number;
     total_sessions: number;
+    total_api_calls: number;
+  };
+  skills: {
+    summary: AnalyticsSkillsSummary;
+    top_skills: AnalyticsSkillEntry[];
   };
 }
 
@@ -435,9 +482,15 @@ export interface OAuthPollResponse {
 
 // ── Dashboard theme types ──────────────────────────────────────────────
 
-export interface ThemeListResponse {
-  themes: Array<{ name: string; label: string; description: string }>;
+export interface DashboardThemeSummary {
+  description: string;
+  label: string;
+  name: string;
+}
+
+export interface DashboardThemesResponse {
   active: string;
+  themes: DashboardThemeSummary[];
 }
 
 // ── Dashboard plugin types ─────────────────────────────────────────────
diff --git a/web/src/main.tsx b/web/src/main.tsx
index 076d746d22..909e26d2ea 100644
--- a/web/src/main.tsx
+++ b/web/src/main.tsx
@@ -3,8 +3,8 @@ import { BrowserRouter } from "react-router-dom";
 import "./index.css";
 import App from "./App";
 import { I18nProvider } from "./i18n";
-import { ThemeProvider } from "./themes";
 import { exposePluginSDK } from "./plugins";
+import { ThemeProvider } from "./themes";
 
 // Expose the plugin SDK before rendering so plugins loaded via <script>
 // can access React, components, etc. immediately.
diff --git a/web/src/pages/AnalyticsPage.tsx b/web/src/pages/AnalyticsPage.tsx
index 2f947cbb6a..92384e137e 100644
--- a/web/src/pages/AnalyticsPage.tsx
+++ b/web/src/pages/AnalyticsPage.tsx
@@ -1,12 +1,14 @@
 import { useEffect, useState, useCallback } from "react";
 import {
   BarChart3,
+  Brain,
   Cpu,
   Hash,
   TrendingUp,
 } from "lucide-react";
 import { api } from "@/lib/api";
-import type { AnalyticsResponse, AnalyticsDailyEntry, AnalyticsModelEntry } from "@/lib/api";
+import type { AnalyticsResponse, AnalyticsDailyEntry, AnalyticsModelEntry, AnalyticsSkillEntry } from "@/lib/api";
+import { timeAgo } from "@/lib/utils";
 import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
 import { Button } from "@/components/ui/button";
 import { useI18n } from "@/i18n";
@@ -227,6 +229,52 @@ function ModelTable({ models }: { models: AnalyticsModelEntry[] }) {
   );
 }
 
+function SkillTable({ skills }: { skills: AnalyticsSkillEntry[] }) {
+  const { t } = useI18n();
+  if (skills.length === 0) return null;
+
+  return (
+    <Card>
+      <CardHeader>
+        <div className="flex items-center gap-2">
+          <Brain className="h-5 w-5 text-muted-foreground" />
+          <CardTitle className="text-base">{t.analytics.topSkills}</CardTitle>
+        </div>
+      </CardHeader>
+      <CardContent>
+        <div className="overflow-x-auto">
+          <table className="w-full text-sm">
+            <thead>
+              <tr className="border-b border-border text-muted-foreground text-xs">
+                <th className="text-left py-2 pr-4 font-medium">{t.analytics.skill}</th>
+                <th className="text-right py-2 px-4 font-medium">{t.analytics.loads}</th>
+                <th className="text-right py-2 px-4 font-medium">{t.analytics.edits}</th>
+                <th className="text-right py-2 px-4 font-medium">{t.analytics.total}</th>
+                <th className="text-right py-2 pl-4 font-medium">{t.analytics.lastUsed}</th>
+              </tr>
+            </thead>
+            <tbody>
+              {skills.map((skill) => (
+                <tr key={skill.skill} className="border-b border-border/50 hover:bg-secondary/20 transition-colors">
+                  <td className="py-2 pr-4">
+                    <span className="font-mono-ui text-xs">{skill.skill}</span>
+                  </td>
+                  <td className="text-right py-2 px-4 text-muted-foreground">{skill.view_count}</td>
+                  <td className="text-right py-2 px-4 text-muted-foreground">{skill.manage_count}</td>
+                  <td className="text-right py-2 px-4">{skill.total_count}</td>
+                  <td className="text-right py-2 pl-4 text-muted-foreground">
+                    {skill.last_used_at ? timeAgo(skill.last_used_at) : "—"}
+                  </td>
+                </tr>
+              ))}
+            </tbody>
+          </table>
+        </div>
+      </CardContent>
+    </Card>
+  );
+}
+
 export default function AnalyticsPage() {
   const [days, setDays] = useState(30);
   const [data, setData] = useState<AnalyticsResponse | null>(null);
@@ -299,7 +347,7 @@ export default function AnalyticsPage() {
             <SummaryCard
               icon={TrendingUp}
               label={t.analytics.apiCalls}
-              value={String(data.daily.reduce((sum, d) => sum + d.sessions, 0))}
+              value={String(data.totals.total_api_calls ?? data.daily.reduce((sum, d) => sum + d.sessions, 0))}
               sub={t.analytics.acrossModels.replace("{count}", String(data.by_model.length))}
             />
           </div>
@@ -310,10 +358,11 @@ export default function AnalyticsPage() {
           {/* Tables */}
           <DailyTable daily={data.daily} />
           <ModelTable models={data.by_model} />
+          <SkillTable skills={data.skills.top_skills} />
         </>
       )}
 
-      {data && data.daily.length === 0 && data.by_model.length === 0 && (
+      {data && data.daily.length === 0 && data.by_model.length === 0 && data.skills.top_skills.length === 0 && (
         <Card>
           <CardContent className="py-12">
             <div className="flex flex-col items-center text-muted-foreground">
diff --git a/web/src/pages/CronPage.tsx b/web/src/pages/CronPage.tsx
index 62dce200a0..5db9bac414 100644
--- a/web/src/pages/CronPage.tsx
+++ b/web/src/pages/CronPage.tsx
@@ -1,5 +1,6 @@
 import { useEffect, useState } from "react";
 import { Clock, Pause, Play, Plus, Trash2, Zap } from "lucide-react";
+import { H2 } from "@nous-research/ui";
 import { api } from "@/lib/api";
 import type { CronJob } from "@/lib/api";
 import { useToast } from "@/hooks/useToast";
@@ -82,10 +83,16 @@ export default function CronPage() {
       const isPaused = job.state === "paused";
       if (isPaused) {
         await api.resumeCronJob(job.id);
-        showToast(`${t.cron.resume}: "${job.name || job.prompt.slice(0, 30)}"`, "success");
+        showToast(
+          `${t.cron.resume}: "${job.name || job.prompt.slice(0, 30)}"`,
+          "success",
+        );
       } else {
         await api.pauseCronJob(job.id);
-        showToast(`${t.cron.pause}: "${job.name || job.prompt.slice(0, 30)}"`, "success");
+        showToast(
+          `${t.cron.pause}: "${job.name || job.prompt.slice(0, 30)}"`,
+          "success",
+        );
       }
       loadJobs();
     } catch (e) {
@@ -96,7 +103,10 @@ export default function CronPage() {
   const handleTrigger = async (job: CronJob) => {
     try {
       await api.triggerCronJob(job.id);
-      showToast(`${t.cron.triggerNow}: "${job.name || job.prompt.slice(0, 30)}"`, "success");
+      showToast(
+        `${t.cron.triggerNow}: "${job.name || job.prompt.slice(0, 30)}"`,
+        "success",
+      );
       loadJobs();
     } catch (e) {
       showToast(`${t.status.error}: ${e}`, "error");
@@ -106,7 +116,10 @@ export default function CronPage() {
   const handleDelete = async (job: CronJob) => {
     try {
       await api.deleteCronJob(job.id);
-      showToast(`${t.common.delete}: "${job.name || job.prompt.slice(0, 30)}"`, "success");
+      showToast(
+        `${t.common.delete}: "${job.name || job.prompt.slice(0, 30)}"`,
+        "success",
+      );
       loadJobs();
     } catch (e) {
       showToast(`${t.status.error}: ${e}`, "error");
@@ -174,16 +187,30 @@ export default function CronPage() {
                   value={deliver}
                   onValueChange={(v) => setDeliver(v)}
                 >
-                  <SelectOption value="local">{t.cron.delivery.local}</SelectOption>
-                  <SelectOption value="telegram">{t.cron.delivery.telegram}</SelectOption>
-                  <SelectOption value="discord">{t.cron.delivery.discord}</SelectOption>
-                  <SelectOption value="slack">{t.cron.delivery.slack}</SelectOption>
-                  <SelectOption value="email">{t.cron.delivery.email}</SelectOption>
+                  <SelectOption value="local">
+                    {t.cron.delivery.local}
+                  </SelectOption>
+                  <SelectOption value="telegram">
+                    {t.cron.delivery.telegram}
+                  </SelectOption>
+                  <SelectOption value="discord">
+                    {t.cron.delivery.discord}
+                  </SelectOption>
+                  <SelectOption value="slack">
+                    {t.cron.delivery.slack}
+                  </SelectOption>
+                  <SelectOption value="email">
+                    {t.cron.delivery.email}
+                  </SelectOption>
                 </Select>
               </div>
 
               <div className="flex items-end">
-                <Button onClick={handleCreate} disabled={creating} className="w-full">
+                <Button
+                  onClick={handleCreate}
+                  disabled={creating}
+                  className="w-full"
+                >
                   <Plus className="h-3 w-3" />
                   {creating ? t.common.creating : t.common.create}
                 </Button>
@@ -195,10 +222,13 @@ export default function CronPage() {
 
       {/* Jobs list */}
       <div className="flex flex-col gap-3">
-        <h2 className="text-sm font-medium text-muted-foreground flex items-center gap-2">
+        <H2
+          variant="sm"
+          className="flex items-center gap-2 text-muted-foreground"
+        >
           <Clock className="h-4 w-4" />
           {t.cron.scheduledJobs} ({jobs.length})
-        </h2>
+        </H2>
 
         {jobs.length === 0 && (
           <Card>
@@ -215,7 +245,9 @@ export default function CronPage() {
               <div className="flex-1 min-w-0">
                 <div className="flex items-center gap-2 mb-1">
                   <span className="font-medium text-sm truncate">
-                    {job.name || job.prompt.slice(0, 60) + (job.prompt.length > 60 ? "..." : "")}
+                    {job.name ||
+                      job.prompt.slice(0, 60) +
+                        (job.prompt.length > 60 ? "..." : "")}
                   </span>
                   <Badge variant={STATUS_VARIANT[job.state] ?? "secondary"}>
                     {job.state}
@@ -226,16 +258,23 @@ export default function CronPage() {
                 </div>
                 {job.name && (
                   <p className="text-xs text-muted-foreground truncate mb-1">
-                    {job.prompt.slice(0, 100)}{job.prompt.length > 100 ? "..." : ""}
+                    {job.prompt.slice(0, 100)}
+                    {job.prompt.length > 100 ? "..." : ""}
                   </p>
                 )}
                 <div className="flex items-center gap-4 text-xs text-muted-foreground">
                   <span className="font-mono">{job.schedule_display}</span>
-                  <span>{t.cron.last}: {formatTime(job.last_run_at)}</span>
-                  <span>{t.cron.next}: {formatTime(job.next_run_at)}</span>
+                  <span>
+                    {t.cron.last}: {formatTime(job.last_run_at)}
+                  </span>
+                  <span>
+                    {t.cron.next}: {formatTime(job.next_run_at)}
+                  </span>
                 </div>
                 {job.last_error && (
-                  <p className="text-xs text-destructive mt-1">{job.last_error}</p>
+                  <p className="text-xs text-destructive mt-1">
+                    {job.last_error}
+                  </p>
                 )}
               </div>
 
@@ -245,7 +284,9 @@ export default function CronPage() {
                   variant="ghost"
                   size="icon"
                   title={job.state === "paused" ? t.cron.resume : t.cron.pause}
-                  aria-label={job.state === "paused" ? t.cron.resume : t.cron.pause}
+                  aria-label={
+                    job.state === "paused" ? t.cron.resume : t.cron.pause
+                  }
                   onClick={() => handlePauseResume(job)}
                 >
                   {job.state === "paused" ? (
diff --git a/web/src/pages/LogsPage.tsx b/web/src/pages/LogsPage.tsx
index bd79d0d618..ec4d7bc16a 100644
--- a/web/src/pages/LogsPage.tsx
+++ b/web/src/pages/LogsPage.tsx
@@ -1,5 +1,6 @@
 import { useEffect, useState, useCallback, useRef } from "react";
 import { FileText, RefreshCw, ChevronRight } from "lucide-react";
+import { H2 } from "@nous-research/ui";
 import { api } from "@/lib/api";
 import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
 import { Button } from "@/components/ui/button";
@@ -15,7 +16,12 @@ const LINE_COUNTS = [50, 100, 200, 500] as const;
 
 function classifyLine(line: string): "error" | "warning" | "info" | "debug" {
   const upper = line.toUpperCase();
-  if (upper.includes("ERROR") || upper.includes("CRITICAL") || upper.includes("FATAL")) return "error";
+  if (
+    upper.includes("ERROR") ||
+    upper.includes("CRITICAL") ||
+    upper.includes("FATAL")
+  )
+    return "error";
   if (upper.includes("WARNING") || upper.includes("WARN")) return "warning";
   if (upper.includes("DEBUG")) return "debug";
   return "info";
@@ -54,7 +60,9 @@ function SidebarItem<T extends string>({
       }`}
     >
       <span className="flex-1 truncate">{label}</span>
-      {isActive && <ChevronRight className="h-3 w-3 text-primary/50 shrink-0" />}
+      {isActive && (
+        <ChevronRight className="h-3 w-3 text-primary/50 shrink-0" />
+      )}
     </button>
   );
 }
@@ -62,7 +70,8 @@ function SidebarItem<T extends string>({
 export default function LogsPage() {
   const [file, setFile] = useState<(typeof FILES)[number]>("agent");
   const [level, setLevel] = useState<(typeof LEVELS)[number]>("ALL");
-  const [component, setComponent] = useState<(typeof COMPONENTS)[number]>("all");
+  const [component, setComponent] =
+    useState<(typeof COMPONENTS)[number]>("all");
   const [lineCount, setLineCount] = useState<(typeof LINE_COUNTS)[number]>(100);
   const [autoRefresh, setAutoRefresh] = useState(false);
   const [lines, setLines] = useState<string[]>([]);
@@ -104,7 +113,7 @@ export default function LogsPage() {
       <div className="flex items-center justify-between gap-4">
         <div className="flex items-center gap-2">
           <FileText className="h-5 w-5 text-muted-foreground" />
-          <h1 className="text-base font-semibold">{t.logs.title}</h1>
+          <H2 variant="sm">{t.logs.title}</H2>
           {loading && (
             <div className="h-4 w-4 animate-spin rounded-full border-2 border-primary border-t-transparent" />
           )}
@@ -123,7 +132,12 @@ export default function LogsPage() {
               </Badge>
             )}
           </div>
-          <Button variant="outline" size="sm" onClick={fetchLogs} className="text-xs h-7">
+          <Button
+            variant="outline"
+            size="sm"
+            onClick={fetchLogs}
+            className="text-xs h-7"
+          >
             <RefreshCw className="h-3 w-3 mr-1" />
             {t.common.refresh}
           </Button>
@@ -131,23 +145,44 @@ export default function LogsPage() {
       </div>
 
       {/* ═══════════════ Sidebar + Content ═══════════════ */}
-      <div className="flex flex-col sm:flex-row gap-4" style={{ minHeight: "calc(100vh - 180px)" }}>
+      <div
+        className="flex flex-col sm:flex-row gap-4"
+        style={{ minHeight: "calc(100vh - 180px)" }}
+      >
         {/* ---- Sidebar ---- */}
         <div className="sm:w-44 sm:shrink-0">
           <div className="sm:sticky sm:top-[72px] flex flex-col gap-0.5">
             <SidebarHeading>{t.logs.file}</SidebarHeading>
             {FILES.map((f) => (
-              <SidebarItem key={f} label={f} value={f} current={file} onChange={setFile} />
+              <SidebarItem
+                key={f}
+                label={f}
+                value={f}
+                current={file}
+                onChange={setFile}
+              />
             ))}
 
             <SidebarHeading>{t.logs.level}</SidebarHeading>
             {LEVELS.map((l) => (
-              <SidebarItem key={l} label={l} value={l} current={level} onChange={setLevel} />
+              <SidebarItem
+                key={l}
+                label={l}
+                value={l}
+                current={level}
+                onChange={setLevel}
+              />
             ))}
 
             <SidebarHeading>{t.logs.component}</SidebarHeading>
             {COMPONENTS.map((c) => (
-              <SidebarItem key={c} label={c} value={c} current={component} onChange={setComponent} />
+              <SidebarItem
+                key={c}
+                label={c}
+                value={c}
+                current={component}
+                onChange={setComponent}
+              />
             ))}
 
             <SidebarHeading>{t.logs.lines}</SidebarHeading>
@@ -157,7 +192,9 @@ export default function LogsPage() {
                 label={String(n)}
                 value={String(n)}
                 current={String(lineCount)}
-                onChange={(v) => setLineCount(Number(v) as (typeof LINE_COUNTS)[number])}
+                onChange={(v) =>
+                  setLineCount(Number(v) as (typeof LINE_COUNTS)[number])
+                }
               />
             ))}
           </div>
@@ -184,12 +221,17 @@ export default function LogsPage() {
                 className="p-4 font-mono-ui text-xs leading-5 overflow-auto max-h-[600px] min-h-[200px]"
               >
                 {lines.length === 0 && !loading && (
-                  <p className="text-muted-foreground text-center py-8">{t.logs.noLogLines}</p>
+                  <p className="text-muted-foreground text-center py-8">
+                    {t.logs.noLogLines}
+                  </p>
                 )}
                 {lines.map((line, i) => {
                   const cls = classifyLine(line);
                   return (
-                    <div key={i} className={`${LINE_COLORS[cls]} hover:bg-secondary/20 px-1 -mx-1`}>
+                    <div
+                      key={i}
+                      className={`${LINE_COLORS[cls]} hover:bg-secondary/20 px-1 -mx-1`}
+                    >
                       {line}
                     </div>
                   );
diff --git a/web/src/pages/SessionsPage.tsx b/web/src/pages/SessionsPage.tsx
index 31b21e518d..370b499a8b 100644
--- a/web/src/pages/SessionsPage.tsx
+++ b/web/src/pages/SessionsPage.tsx
@@ -13,8 +13,13 @@ import {
   Hash,
   X,
 } from "lucide-react";
+import { H2 } from "@nous-research/ui";
 import { api } from "@/lib/api";
-import type { SessionInfo, SessionMessage, SessionSearchResult } from "@/lib/api";
+import type {
+  SessionInfo,
+  SessionMessage,
+  SessionSearchResult,
+} from "@/lib/api";
 import { timeAgo } from "@/lib/utils";
 import { Markdown } from "@/components/Markdown";
 import { Badge } from "@/components/ui/badge";
@@ -22,14 +27,15 @@ import { Button } from "@/components/ui/button";
 import { Input } from "@/components/ui/input";
 import { useI18n } from "@/i18n";
 
-const SOURCE_CONFIG: Record<string, { icon: typeof Terminal; color: string }> = {
-  cli: { icon: Terminal, color: "text-primary" },
-  telegram: { icon: MessageCircle, color: "text-[oklch(0.65_0.15_250)]" },
-  discord: { icon: Hash, color: "text-[oklch(0.65_0.15_280)]" },
-  slack: { icon: MessageSquare, color: "text-[oklch(0.7_0.15_155)]" },
-  whatsapp: { icon: Globe, color: "text-success" },
-  cron: { icon: Clock, color: "text-warning" },
-};
+const SOURCE_CONFIG: Record<string, { icon: typeof Terminal; color: string }> =
+  {
+    cli: { icon: Terminal, color: "text-primary" },
+    telegram: { icon: MessageCircle, color: "text-[oklch(0.65_0.15_250)]" },
+    discord: { icon: Hash, color: "text-[oklch(0.65_0.15_280)]" },
+    slack: { icon: MessageSquare, color: "text-[oklch(0.7_0.15_155)]" },
+    whatsapp: { icon: Globe, color: "text-success" },
+    cron: { icon: Clock, color: "text-warning" },
+  };
 
 /** Render an FTS5 snippet with highlighted matches.
  *  The backend wraps matches in >>> and <<< delimiters. */
@@ -46,7 +52,7 @@ function SnippetHighlight({ snippet }: { snippet: string }) {
     parts.push(
       <mark key={i++} className="bg-warning/30 text-warning px-0.5">
         {match[1]}
-      </mark>
+      </mark>,
     );
     last = regex.lastIndex;
   }
@@ -60,7 +66,11 @@ function SnippetHighlight({ snippet }: { snippet: string }) {
   );
 }
 
-function ToolCallBlock({ toolCall }: { toolCall: { id: string; function: { name: string; arguments: string } } }) {
+function ToolCallBlock({
+  toolCall,
+}: {
+  toolCall: { id: string; function: { name: string; arguments: string } };
+}) {
   const [open, setOpen] = useState(false);
   const { t } = useI18n();
 
@@ -79,8 +89,14 @@ function ToolCallBlock({ toolCall }: { toolCall: { id: string; function: { name:
         onClick={() => setOpen(!open)}
         aria-label={`${open ? t.common.collapse : t.common.expand} tool call ${toolCall.function.name}`}
       >
-        {open ? <ChevronDown className="h-3 w-3" /> : <ChevronRight className="h-3 w-3" />}
-        <span className="font-mono-ui font-medium">{toolCall.function.name}</span>
+        {open ? (
+          <ChevronDown className="h-3 w-3" />
+        ) : (
+          <ChevronRight className="h-3 w-3" />
+        )}
+        <span className="font-mono-ui font-medium">
+          {toolCall.function.name}
+        </span>
         <span className="text-warning/50 ml-auto">{toolCall.id}</span>
       </button>
       {open && (
@@ -92,18 +108,45 @@ function ToolCallBlock({ toolCall }: { toolCall: { id: string; function: { name:
   );
 }
 
-function MessageBubble({ msg, highlight }: { msg: SessionMessage; highlight?: string }) {
+function MessageBubble({
+  msg,
+  highlight,
+}: {
+  msg: SessionMessage;
+  highlight?: string;
+}) {
   const { t } = useI18n();
 
-  const ROLE_STYLES: Record<string, { bg: string; text: string; label: string }> = {
-    user: { bg: "bg-primary/10", text: "text-primary", label: t.sessions.roles.user },
-    assistant: { bg: "bg-success/10", text: "text-success", label: t.sessions.roles.assistant },
-    system: { bg: "bg-muted", text: "text-muted-foreground", label: t.sessions.roles.system },
-    tool: { bg: "bg-warning/10", text: "text-warning", label: t.sessions.roles.tool },
+  const ROLE_STYLES: Record<
+    string,
+    { bg: string; text: string; label: string }
+  > = {
+    user: {
+      bg: "bg-primary/10",
+      text: "text-primary",
+      label: t.sessions.roles.user,
+    },
+    assistant: {
+      bg: "bg-success/10",
+      text: "text-success",
+      label: t.sessions.roles.assistant,
+    },
+    system: {
+      bg: "bg-muted",
+      text: "text-muted-foreground",
+      label: t.sessions.roles.system,
+    },
+    tool: {
+      bg: "bg-warning/10",
+      text: "text-warning",
+      label: t.sessions.roles.tool,
+    },
   };
 
   const style = ROLE_STYLES[msg.role] ?? ROLE_STYLES.system;
-  const label = msg.tool_name ? `${t.sessions.roles.tool}: ${msg.tool_name}` : style.label;
+  const label = msg.tool_name
+    ? `${t.sessions.roles.tool}: ${msg.tool_name}`
+    : style.label;
 
   // Check if any search term appears as a prefix of any word in content
   const isHit = (() => {
@@ -114,26 +157,35 @@ function MessageBubble({ msg, highlight }: { msg: SessionMessage; highlight?: st
   })();
 
   // Split search query into terms for inline highlighting
-  const highlightTerms = isHit && highlight
-    ? highlight.split(/\s+/).filter(Boolean)
-    : undefined;
+  const highlightTerms =
+    isHit && highlight ? highlight.split(/\s+/).filter(Boolean) : undefined;
 
   return (
-    <div className={`${style.bg} p-3 ${isHit ? "ring-1 ring-warning/40" : ""}`} data-search-hit={isHit || undefined}>
+    <div
+      className={`${style.bg} p-3 ${isHit ? "ring-1 ring-warning/40" : ""}`}
+      data-search-hit={isHit || undefined}
+    >
       <div className="flex items-center gap-2 mb-1">
         <span className={`text-xs font-semibold ${style.text}`}>{label}</span>
         {isHit && (
-          <Badge variant="warning" className="text-[9px] py-0 px-1.5">{t.common.match}</Badge>
+          <Badge variant="warning" className="text-[9px] py-0 px-1.5">
+            {t.common.match}
+          </Badge>
         )}
         {msg.timestamp && (
-          <span className="text-[10px] text-muted-foreground">{timeAgo(msg.timestamp)}</span>
+          <span className="text-[10px] text-muted-foreground">
+            {timeAgo(msg.timestamp)}
+          </span>
         )}
       </div>
-      {msg.content && (
-        msg.role === "system"
-          ? <div className="text-sm text-foreground whitespace-pre-wrap leading-relaxed">{msg.content}</div>
-          : <Markdown content={msg.content} highlightTerms={highlightTerms} />
-      )}
+      {msg.content &&
+        (msg.role === "system" ? (
+          <div className="text-sm text-foreground whitespace-pre-wrap leading-relaxed">
+            {msg.content}
+          </div>
+        ) : (
+          <Markdown content={msg.content} highlightTerms={highlightTerms} />
+        ))}
       {msg.tool_calls && msg.tool_calls.length > 0 && (
         <div className="mt-1">
           {msg.tool_calls.map((tc) => (
@@ -146,7 +198,13 @@ function MessageBubble({ msg, highlight }: { msg: SessionMessage; highlight?: st
 }
 
 /** Message list with auto-scroll to first search hit. */
-function MessageList({ messages, highlight }: { messages: SessionMessage[]; highlight?: string }) {
+function MessageList({
+  messages,
+  highlight,
+}: {
+  messages: SessionMessage[];
+  highlight?: string;
+}) {
   const containerRef = useRef<HTMLDivElement>(null);
 
   useEffect(() => {
@@ -162,7 +220,10 @@ function MessageList({ messages, highlight }: { messages: SessionMessage[]; high
   }, [messages, highlight]);
 
   return (
-    <div ref={containerRef} className="flex flex-col gap-3 max-h-[600px] overflow-y-auto pr-2">
+    <div
+      ref={containerRef}
+      className="flex flex-col gap-3 max-h-[600px] overflow-y-auto pr-2"
+    >
       {messages.map((msg, i) => (
         <MessageBubble key={i} msg={msg} highlight={highlight} />
       ))}
@@ -201,16 +262,20 @@ function SessionRow({
     }
   }, [isExpanded, session.id, messages, loading]);
 
-  const sourceInfo = (session.source ? SOURCE_CONFIG[session.source] : null) ?? { icon: Globe, color: "text-muted-foreground" };
+  const sourceInfo = (session.source
+    ? SOURCE_CONFIG[session.source]
+    : null) ?? { icon: Globe, color: "text-muted-foreground" };
   const SourceIcon = sourceInfo.icon;
   const hasTitle = session.title && session.title !== "Untitled";
 
   return (
-    <div className={`border overflow-hidden transition-colors ${
-      session.is_active
-        ? "border-success/30 bg-success/[0.03]"
-        : "border-border"
-    }`}>
+    <div
+      className={`border overflow-hidden transition-colors ${
+        session.is_active
+          ? "border-success/30 bg-success/[0.03]"
+          : "border-border"
+      }`}
+    >
       <div
         className="flex items-center justify-between p-3 cursor-pointer hover:bg-secondary/30 transition-colors"
         onClick={onToggle}
@@ -221,8 +286,14 @@ function SessionRow({
           </div>
           <div className="flex flex-col gap-0.5 min-w-0">
             <div className="flex items-center gap-2">
-              <span className={`text-sm truncate pr-2 ${hasTitle ? "font-medium" : "text-muted-foreground italic"}`}>
-                {hasTitle ? session.title : (session.preview ? session.preview.slice(0, 60) : t.sessions.untitledSession)}
+              <span
+                className={`text-sm truncate pr-2 ${hasTitle ? "font-medium" : "text-muted-foreground italic"}`}
+              >
+                {hasTitle
+                  ? session.title
+                  : session.preview
+                    ? session.preview.slice(0, 60)
+                    : t.sessions.untitledSession}
               </span>
               {session.is_active && (
                 <Badge variant="success" className="text-[10px] shrink-0">
@@ -232,21 +303,25 @@ function SessionRow({
               )}
             </div>
             <div className="flex items-center gap-1.5 text-xs text-muted-foreground">
-              <span className="truncate max-w-[120px] sm:max-w-[180px]">{(session.model ?? t.common.unknown).split("/").pop()}</span>
+              <span className="truncate max-w-[120px] sm:max-w-[180px]">
+                {(session.model ?? t.common.unknown).split("/").pop()}
+              </span>
               <span className="text-border">&#183;</span>
-              <span>{session.message_count} {t.common.msgs}</span>
+              <span>
+                {session.message_count} {t.common.msgs}
+              </span>
               {session.tool_call_count > 0 && (
                 <>
                   <span className="text-border">&#183;</span>
-                  <span>{session.tool_call_count} {t.common.tools}</span>
+                  <span>
+                    {session.tool_call_count} {t.common.tools}
+                  </span>
                 </>
               )}
               <span className="text-border">&#183;</span>
               <span>{timeAgo(session.last_active)}</span>
             </div>
-            {snippet && (
-              <SnippetHighlight snippet={snippet} />
-            )}
+            {snippet && <SnippetHighlight snippet={snippet} />}
           </div>
         </div>
 
@@ -280,7 +355,9 @@ function SessionRow({
             <p className="text-sm text-destructive py-4 text-center">{error}</p>
           )}
           {messages && messages.length === 0 && (
-            <p className="text-sm text-muted-foreground py-4 text-center">{t.sessions.noMessages}</p>
+            <p className="text-sm text-muted-foreground py-4 text-center">
+              {t.sessions.noMessages}
+            </p>
           )}
           {messages && messages.length > 0 && (
             <MessageList messages={messages} highlight={searchQuery} />
@@ -299,7 +376,9 @@ export default function SessionsPage() {
   const [loading, setLoading] = useState(true);
   const [search, setSearch] = useState("");
   const [expandedId, setExpandedId] = useState<string | null>(null);
-  const [searchResults, setSearchResults] = useState<SessionSearchResult[] | null>(null);
+  const [searchResults, setSearchResults] = useState<
+    SessionSearchResult[] | null
+  >(null);
   const [searching, setSearching] = useState(false);
   const debounceRef = useRef<ReturnType<typeof setTimeout>>(null);
   const { t } = useI18n();
@@ -383,7 +462,7 @@ export default function SessionsPage() {
       <div className="flex flex-col sm:flex-row sm:items-center gap-2 sm:justify-between">
         <div className="flex items-center gap-2">
           <MessageSquare className="h-5 w-5 text-muted-foreground" />
-          <h1 className="text-base font-semibold">{t.sessions.title}</h1>
+          <H2 variant="sm">{t.sessions.title}</H2>
           <Badge variant="secondary" className="text-xs">
             {total}
           </Badge>
@@ -419,7 +498,9 @@ export default function SessionsPage() {
             {search ? t.sessions.noMatch : t.sessions.noSessions}
           </p>
           {!search && (
-            <p className="text-xs mt-1 text-muted-foreground/60">{t.sessions.startConversation}</p>
+            <p className="text-xs mt-1 text-muted-foreground/60">
+              {t.sessions.startConversation}
+            </p>
           )}
         </div>
       ) : (
@@ -444,7 +525,8 @@ export default function SessionsPage() {
           {!searchResults && total > PAGE_SIZE && (
             <div className="flex items-center justify-between pt-2">
               <span className="text-xs text-muted-foreground">
-                {page * PAGE_SIZE + 1}–{Math.min((page + 1) * PAGE_SIZE, total)} {t.common.of} {total}
+                {page * PAGE_SIZE + 1}–{Math.min((page + 1) * PAGE_SIZE, total)}{" "}
+                {t.common.of} {total}
               </span>
               <div className="flex items-center gap-1">
                 <Button
@@ -458,7 +540,8 @@ export default function SessionsPage() {
                   <ChevronLeft className="h-4 w-4" />
                 </Button>
                 <span className="text-xs text-muted-foreground px-2">
-                  {t.common.page} {page + 1} {t.common.of} {Math.ceil(total / PAGE_SIZE)}
+                  {t.common.page} {page + 1} {t.common.of}{" "}
+                  {Math.ceil(total / PAGE_SIZE)}
                 </span>
                 <Button
                   variant="outline"
diff --git a/web/src/pages/SkillsPage.tsx b/web/src/pages/SkillsPage.tsx
index 3fc462b100..d7414937b0 100644
--- a/web/src/pages/SkillsPage.tsx
+++ b/web/src/pages/SkillsPage.tsx
@@ -15,6 +15,7 @@ import {
   Code,
   Zap,
 } from "lucide-react";
+import { H2 } from "@nous-research/ui";
 import { api } from "@/lib/api";
 import type { SkillInfo, ToolsetInfo } from "@/lib/api";
 import { useToast } from "@/hooks/useToast";
@@ -46,7 +47,10 @@ const CATEGORY_LABELS: Record<string, string> = {
   ui: "UI",
 };
 
-function prettyCategory(raw: string | null | undefined, generalLabel: string): string {
+function prettyCategory(
+  raw: string | null | undefined,
+  generalLabel: string,
+): string {
   if (!raw) return generalLabel;
   if (CATEGORY_LABELS[raw]) return CATEGORY_LABELS[raw];
   return raw
@@ -55,7 +59,10 @@ function prettyCategory(raw: string | null | undefined, generalLabel: string): s
     .join(" ");
 }
 
-const TOOLSET_ICONS: Record<string, React.ComponentType<{ className?: string }>> = {
+const TOOLSET_ICONS: Record<
+  string,
+  React.ComponentType<{ className?: string }>
+> = {
   computer: Cpu,
   web: Globe,
   security: Shield,
@@ -67,7 +74,9 @@ const TOOLSET_ICONS: Record<string, React.ComponentType<{ className?: string }>>
   automation: Zap,
 };
 
-function toolsetIcon(name: string): React.ComponentType<{ className?: string }> {
+function toolsetIcon(
+  name: string,
+): React.ComponentType<{ className?: string }> {
   const lower = name.toLowerCase();
   for (const [key, icon] of Object.entries(TOOLSET_ICONS)) {
     if (lower.includes(key)) return icon;
@@ -107,12 +116,12 @@ export default function SkillsPage() {
       await api.toggleSkill(skill.name, !skill.enabled);
       setSkills((prev) =>
         prev.map((s) =>
-          s.name === skill.name ? { ...s, enabled: !s.enabled } : s
-        )
+          s.name === skill.name ? { ...s, enabled: !s.enabled } : s,
+        ),
       );
       showToast(
         `${skill.name} ${skill.enabled ? t.common.disabled : t.common.enabled}`,
-        "success"
+        "success",
       );
     } catch {
       showToast(`${t.common.failedToToggle} ${skill.name}`, "error");
@@ -135,16 +144,19 @@ export default function SkillsPage() {
       (s) =>
         s.name.toLowerCase().includes(lowerSearch) ||
         s.description.toLowerCase().includes(lowerSearch) ||
-        (s.category ?? "").toLowerCase().includes(lowerSearch)
+        (s.category ?? "").toLowerCase().includes(lowerSearch),
     );
   }, [skills, isSearching, lowerSearch]);
 
   const activeSkills = useMemo(() => {
     if (isSearching) return [];
-    if (!activeCategory) return [...skills].sort((a, b) => a.name.localeCompare(b.name));
+    if (!activeCategory)
+      return [...skills].sort((a, b) => a.name.localeCompare(b.name));
     return skills
       .filter((s) =>
-        activeCategory === "__none__" ? !s.category : s.category === activeCategory
+        activeCategory === "__none__"
+          ? !s.category
+          : s.category === activeCategory,
       )
       .sort((a, b) => a.name.localeCompare(b.name));
   }, [skills, activeCategory, isSearching]);
@@ -161,7 +173,11 @@ export default function SkillsPage() {
         if (b[0] === "__none__") return 1;
         return a[0].localeCompare(b[0]);
       })
-      .map(([key, count]) => ({ key, name: prettyCategory(key === "__none__" ? null : key, t.common.general), count }));
+      .map(([key, count]) => ({
+        key,
+        name: prettyCategory(key === "__none__" ? null : key, t.common.general),
+        count,
+      }));
   }, [skills, t]);
 
   const enabledCount = skills.filter((s) => s.enabled).length;
@@ -172,7 +188,7 @@ export default function SkillsPage() {
         !search ||
         ts.name.toLowerCase().includes(lowerSearch) ||
         ts.label.toLowerCase().includes(lowerSearch) ||
-        ts.description.toLowerCase().includes(lowerSearch)
+        ts.description.toLowerCase().includes(lowerSearch),
     );
   }, [toolsets, search, lowerSearch]);
 
@@ -193,15 +209,20 @@ export default function SkillsPage() {
       <div className="flex items-center justify-between gap-4">
         <div className="flex items-center gap-3">
           <Package className="h-5 w-5 text-muted-foreground" />
-          <h1 className="text-base font-semibold">{t.skills.title}</h1>
+          <H2 variant="sm">{t.skills.title}</H2>
           <span className="text-xs text-muted-foreground">
-            {t.skills.enabledOf.replace("{enabled}", String(enabledCount)).replace("{total}", String(skills.length))}
+            {t.skills.enabledOf
+              .replace("{enabled}", String(enabledCount))
+              .replace("{total}", String(skills.length))}
           </span>
         </div>
       </div>
 
       {/* ═══════════════ Sidebar + Content ═══════════════ */}
-      <div className="flex flex-col sm:flex-row gap-4" style={{ minHeight: "calc(100vh - 180px)" }}>
+      <div
+        className="flex flex-col sm:flex-row gap-4"
+        style={{ minHeight: "calc(100vh - 180px)" }}
+      >
         {/* ---- Sidebar ---- */}
         <div className="sm:w-52 sm:shrink-0">
           <div className="sm:sticky sm:top-[72px] flex flex-col gap-1">
@@ -229,7 +250,11 @@ export default function SkillsPage() {
             <div className="flex sm:flex-col gap-1 overflow-x-auto sm:overflow-x-visible scrollbar-none pb-1 sm:pb-0">
               <button
                 type="button"
-                onClick={() => { setView("skills"); setActiveCategory(null); setSearch(""); }}
+                onClick={() => {
+                  setView("skills");
+                  setActiveCategory(null);
+                  setSearch("");
+                }}
                 className={`group flex items-center gap-2 px-2.5 py-1.5 text-left text-xs transition-colors cursor-pointer ${
                   view === "skills" && !isSearching
                     ? "bg-primary/10 text-primary font-medium"
@@ -237,35 +262,48 @@ export default function SkillsPage() {
                 }`}
               >
                 <Package className="h-3.5 w-3.5 shrink-0" />
-                <span className="flex-1 truncate">{t.skills.all} ({skills.length})</span>
-                {view === "skills" && !isSearching && <ChevronRight className="h-3 w-3 text-primary/50 shrink-0" />}
+                <span className="flex-1 truncate">
+                  {t.skills.all} ({skills.length})
+                </span>
+                {view === "skills" && !isSearching && (
+                  <ChevronRight className="h-3 w-3 text-primary/50 shrink-0" />
+                )}
               </button>
 
               {/* Skill categories (nested under All Skills) */}
-              {view === "skills" && !isSearching && allCategories.map(({ key, name, count }) => {
-                const isActive = activeCategory === key;
-                return (
-                  <button
-                    key={key}
-                    type="button"
-                    onClick={() => setActiveCategory(activeCategory === key ? null : key)}
-                    className={`group flex items-center gap-2 px-2.5 py-1 pl-7 text-left text-[11px] transition-colors cursor-pointer ${
-                      isActive
-                        ? "text-primary font-medium"
-                        : "text-muted-foreground hover:text-foreground hover:bg-muted/50"
-                    }`}
-                  >
-                    <span className="flex-1 truncate">{name}</span>
-                    <span className={`text-[10px] tabular-nums ${isActive ? "text-primary/60" : "text-muted-foreground/50"}`}>
-                      {count}
-                    </span>
-                  </button>
-                );
-              })}
+              {view === "skills" &&
+                !isSearching &&
+                allCategories.map(({ key, name, count }) => {
+                  const isActive = activeCategory === key;
+                  return (
+                    <button
+                      key={key}
+                      type="button"
+                      onClick={() =>
+                        setActiveCategory(activeCategory === key ? null : key)
+                      }
+                      className={`group flex items-center gap-2 px-2.5 py-1 pl-7 text-left text-[11px] transition-colors cursor-pointer ${
+                        isActive
+                          ? "text-primary font-medium"
+                          : "text-muted-foreground hover:text-foreground hover:bg-muted/50"
+                      }`}
+                    >
+                      <span className="flex-1 truncate">{name}</span>
+                      <span
+                        className={`text-[10px] tabular-nums ${isActive ? "text-primary/60" : "text-muted-foreground/50"}`}
+                      >
+                        {count}
+                      </span>
+                    </button>
+                  );
+                })}
 
               <button
                 type="button"
-                onClick={() => { setView("toolsets"); setSearch(""); }}
+                onClick={() => {
+                  setView("toolsets");
+                  setSearch("");
+                }}
                 className={`group flex items-center gap-2 px-2.5 py-1.5 text-left text-xs transition-colors cursor-pointer ${
                   view === "toolsets"
                     ? "bg-primary/10 text-primary font-medium"
@@ -273,8 +311,12 @@ export default function SkillsPage() {
                 }`}
               >
                 <Wrench className="h-3.5 w-3.5 shrink-0" />
-                <span className="flex-1 truncate">{t.skills.toolsets} ({toolsets.length})</span>
-                {view === "toolsets" && <ChevronRight className="h-3 w-3 text-primary/50 shrink-0" />}
+                <span className="flex-1 truncate">
+                  {t.skills.toolsets} ({toolsets.length})
+                </span>
+                {view === "toolsets" && (
+                  <ChevronRight className="h-3 w-3 text-primary/50 shrink-0" />
+                )}
               </button>
             </div>
           </div>
@@ -292,7 +334,12 @@ export default function SkillsPage() {
                     {t.skills.title}
                   </CardTitle>
                   <Badge variant="secondary" className="text-[10px]">
-                    {t.skills.resultCount.replace("{count}", String(searchMatchedSkills.length)).replace("{s}", searchMatchedSkills.length !== 1 ? "s" : "")}
+                    {t.skills.resultCount
+                      .replace("{count}", String(searchMatchedSkills.length))
+                      .replace(
+                        "{s}",
+                        searchMatchedSkills.length !== 1 ? "s" : "",
+                      )}
                   </Badge>
                 </div>
               </CardHeader>
@@ -324,18 +371,26 @@ export default function SkillsPage() {
                   <CardTitle className="text-sm flex items-center gap-2">
                     <Package className="h-4 w-4" />
                     {activeCategory
-                      ? prettyCategory(activeCategory === "__none__" ? null : activeCategory, t.common.general)
+                      ? prettyCategory(
+                          activeCategory === "__none__" ? null : activeCategory,
+                          t.common.general,
+                        )
                       : t.skills.all}
                   </CardTitle>
                   <Badge variant="secondary" className="text-[10px]">
-                    {activeSkills.length} {t.skills.skillCount.replace("{count}", String(activeSkills.length)).replace("{s}", activeSkills.length !== 1 ? "s" : "")}
+                    {activeSkills.length}{" "}
+                    {t.skills.skillCount
+                      .replace("{count}", String(activeSkills.length))
+                      .replace("{s}", activeSkills.length !== 1 ? "s" : "")}
                   </Badge>
                 </div>
               </CardHeader>
               <CardContent className="px-4 pb-4">
                 {activeSkills.length === 0 ? (
                   <p className="text-sm text-muted-foreground text-center py-8">
-                    {skills.length === 0 ? t.skills.noSkills : t.skills.noSkillsMatch}
+                    {skills.length === 0
+                      ? t.skills.noSkills
+                      : t.skills.noSkillsMatch}
                   </p>
                 ) : (
                   <div className="grid gap-1">
@@ -365,7 +420,9 @@ export default function SkillsPage() {
                 <div className="grid gap-3 sm:grid-cols-2 lg:grid-cols-3">
                   {filteredToolsets.map((ts) => {
                     const TsIcon = toolsetIcon(ts.name);
-                    const labelText = ts.label.replace(/^[\p{Emoji}\s]+/u, "").trim() || ts.name;
+                    const labelText =
+                      ts.label.replace(/^[\p{Emoji}\s]+/u, "").trim() ||
+                      ts.name;
 
                     return (
                       <Card key={ts.name} className="relative">
@@ -374,12 +431,16 @@ export default function SkillsPage() {
                             <TsIcon className="h-5 w-5 text-muted-foreground shrink-0 mt-0.5" />
                             <div className="flex-1 min-w-0">
                               <div className="flex items-center gap-2 mb-1">
-                                <span className="font-medium text-sm">{labelText}</span>
+                                <span className="font-medium text-sm">
+                                  {labelText}
+                                </span>
                                 <Badge
                                   variant={ts.enabled ? "success" : "outline"}
                                   className="text-[10px]"
                                 >
-                                  {ts.enabled ? t.common.active : t.common.inactive}
+                                  {ts.enabled
+                                    ? t.common.active
+                                    : t.common.inactive}
                                 </Badge>
                               </div>
                               <p className="text-xs text-muted-foreground mb-2">
@@ -405,7 +466,12 @@ export default function SkillsPage() {
                               )}
                               {ts.tools.length === 0 && (
                                 <span className="text-[10px] text-muted-foreground/60">
-                                  {ts.enabled ? t.skills.toolsetLabel.replace("{name}", ts.name) : t.skills.disabledForCli}
+                                  {ts.enabled
+                                    ? t.skills.toolsetLabel.replace(
+                                        "{name}",
+                                        ts.name,
+                                      )
+                                    : t.skills.disabledForCli}
                                 </span>
                               )}
                             </div>
diff --git a/web/src/pages/StatusPage.tsx b/web/src/pages/StatusPage.tsx
index c13645b051..3c213b5cbb 100644
--- a/web/src/pages/StatusPage.tsx
+++ b/web/src/pages/StatusPage.tsx
@@ -1,36 +1,141 @@
-import { useEffect, useState } from "react";
+import { useEffect, useRef, useState } from "react";
 import {
   Activity,
   AlertTriangle,
+  CheckCircle2,
   Clock,
   Cpu,
   Database,
+  Download,
+  Loader2,
   Radio,
+  RotateCw,
   Wifi,
   WifiOff,
+  Wrench,
+  X,
 } from "lucide-react";
+import { Cell, Grid } from "@nous-research/ui";
 import { api } from "@/lib/api";
-import type { PlatformStatus, SessionInfo, StatusResponse } from "@/lib/api";
-import { timeAgo, isoTimeAgo } from "@/lib/utils";
+import type {
+  ActionStatusResponse,
+  PlatformStatus,
+  SessionInfo,
+  StatusResponse,
+} from "@/lib/api";
+import { cn, timeAgo, isoTimeAgo } from "@/lib/utils";
 import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
 import { Badge } from "@/components/ui/badge";
+import { Button } from "@/components/ui/button";
+import { Toast } from "@/components/Toast";
 import { useI18n } from "@/i18n";
 
+const ACTION_NAMES: Record<"restart" | "update", string> = {
+  restart: "gateway-restart",
+  update: "hermes-update",
+};
+
 export default function StatusPage() {
   const [status, setStatus] = useState<StatusResponse | null>(null);
   const [sessions, setSessions] = useState<SessionInfo[]>([]);
+  const [pendingAction, setPendingAction] = useState<
+    "restart" | "update" | null
+  >(null);
+  const [activeAction, setActiveAction] = useState<"restart" | "update" | null>(
+    null,
+  );
+  const [actionStatus, setActionStatus] = useState<ActionStatusResponse | null>(
+    null,
+  );
+  const [toast, setToast] = useState<ToastState | null>(null);
+  const logScrollRef = useRef<HTMLPreElement | null>(null);
   const { t } = useI18n();
 
   useEffect(() => {
     const load = () => {
-      api.getStatus().then(setStatus).catch(() => {});
-      api.getSessions(50).then((resp) => setSessions(resp.sessions)).catch(() => {});
+      api
+        .getStatus()
+        .then(setStatus)
+        .catch(() => {});
+      api
+        .getSessions(50)
+        .then((resp) => setSessions(resp.sessions))
+        .catch(() => {});
     };
     load();
     const interval = setInterval(load, 5000);
     return () => clearInterval(interval);
   }, []);
 
+  useEffect(() => {
+    if (!toast) return;
+    const timer = setTimeout(() => setToast(null), 4000);
+    return () => clearTimeout(timer);
+  }, [toast]);
+
+  useEffect(() => {
+    if (!activeAction) return;
+    const name = ACTION_NAMES[activeAction];
+    let cancelled = false;
+
+    const poll = async () => {
+      try {
+        const resp = await api.getActionStatus(name);
+        if (cancelled) return;
+        setActionStatus(resp);
+        if (!resp.running) {
+          const ok = resp.exit_code === 0;
+          setToast({
+            type: ok ? "success" : "error",
+            message: ok
+              ? t.status.actionFinished
+              : `${t.status.actionFailed} (exit ${resp.exit_code ?? "?"})`,
+          });
+          return;
+        }
+      } catch {
+        // transient fetch error; keep polling
+      }
+      if (!cancelled) setTimeout(poll, 1500);
+    };
+
+    poll();
+    return () => {
+      cancelled = true;
+    };
+  }, [activeAction, t.status.actionFinished, t.status.actionFailed]);
+
+  useEffect(() => {
+    const el = logScrollRef.current;
+    if (el) el.scrollTop = el.scrollHeight;
+  }, [actionStatus?.lines]);
+
+  const runAction = async (action: "restart" | "update") => {
+    setPendingAction(action);
+    setActionStatus(null);
+    try {
+      if (action === "restart") {
+        await api.restartGateway();
+      } else {
+        await api.updateHermes();
+      }
+      setActiveAction(action);
+    } catch (err) {
+      const detail = err instanceof Error ? err.message : String(err);
+      setToast({
+        type: "error",
+        message: `${t.status.actionFailed}: ${detail}`,
+      });
+    } finally {
+      setPendingAction(null);
+    }
+  };
+
+  const dismissLog = () => {
+    setActiveAction(null);
+    setActionStatus(null);
+  };
+
   if (!status) {
     return (
       <div className="flex items-center justify-center py-24">
@@ -39,13 +144,19 @@ export default function StatusPage() {
     );
   }
 
-  const PLATFORM_STATE_BADGE: Record<string, { variant: "success" | "warning" | "destructive"; label: string }> = {
+  const PLATFORM_STATE_BADGE: Record<
+    string,
+    { variant: "success" | "warning" | "destructive"; label: string }
+  > = {
     connected: { variant: "success", label: t.status.connected },
     disconnected: { variant: "warning", label: t.status.disconnected },
     fatal: { variant: "destructive", label: t.status.error },
   };
 
-  const GATEWAY_STATE_DISPLAY: Record<string, { badge: "success" | "warning" | "destructive" | "outline"; label: string }> = {
+  const GATEWAY_STATE_DISPLAY: Record<
+    string,
+    { badge: "success" | "warning" | "destructive" | "outline"; label: string }
+  > = {
     running: { badge: "success", label: t.status.running },
     starting: { badge: "warning", label: t.status.starting },
     startup_failed: { badge: "destructive", label: t.status.failed },
@@ -53,15 +164,19 @@ export default function StatusPage() {
   };
 
   function gatewayValue(): string {
-    if (status!.gateway_running && status!.gateway_health_url) return status!.gateway_health_url;
-    if (status!.gateway_running && status!.gateway_pid) return `${t.status.pid} ${status!.gateway_pid}`;
+    if (status!.gateway_running && status!.gateway_health_url)
+      return status!.gateway_health_url;
+    if (status!.gateway_running && status!.gateway_pid)
+      return `${t.status.pid} ${status!.gateway_pid}`;
     if (status!.gateway_running) return t.status.runningRemote;
     if (status!.gateway_state === "startup_failed") return t.status.startFailed;
     return t.status.notRunning;
   }
 
   function gatewayBadge() {
-    const info = status!.gateway_state ? GATEWAY_STATE_DISPLAY[status!.gateway_state] : null;
+    const info = status!.gateway_state
+      ? GATEWAY_STATE_DISPLAY[status!.gateway_state]
+      : null;
     if (info) return info;
     return status!.gateway_running
       ? { badge: "success" as const, label: t.status.running }
@@ -88,9 +203,14 @@ export default function StatusPage() {
     {
       icon: Activity,
       label: t.status.activeSessions,
-      value: status.active_sessions > 0 ? `${status.active_sessions} ${t.status.running.toLowerCase()}` : t.status.noneRunning,
+      value:
+        status.active_sessions > 0
+          ? `${status.active_sessions} ${t.status.running.toLowerCase()}`
+          : t.status.noneRunning,
       badgeText: status.active_sessions > 0 ? t.common.live : t.common.off,
-      badgeVariant: (status.active_sessions > 0 ? "success" : "outline") as "success" | "outline",
+      badgeVariant: (status.active_sessions > 0 ? "success" : "outline") as
+        | "success"
+        | "outline",
     },
   ];
 
@@ -106,9 +226,14 @@ export default function StatusPage() {
       detail: status.gateway_exit_reason ?? undefined,
     });
   }
-  const failedPlatforms = platforms.filter(([, info]) => info.state === "fatal" || info.state === "disconnected");
+  const failedPlatforms = platforms.filter(
+    ([, info]) => info.state === "fatal" || info.state === "disconnected",
+  );
   for (const [name, info] of failedPlatforms) {
-    const stateLabel = info.state === "fatal" ? t.status.platformError : t.status.platformDisconnected;
+    const stateLabel =
+      info.state === "fatal"
+        ? t.status.platformError
+        : t.status.platformDisconnected;
     alerts.push({
       message: `${name.charAt(0).toUpperCase() + name.slice(1)} ${stateLabel}`,
       detail: info.error_message ?? undefined,
@@ -117,7 +242,8 @@ export default function StatusPage() {
 
   return (
     <div className="flex flex-col gap-6">
-      {/* Alert banner — breaks grid monotony for critical states */}
+      <Toast toast={toast} />
+
       {alerts.length > 0 && (
         <div className="border border-destructive/30 bg-destructive/[0.06] p-4">
           <div className="flex items-start gap-3">
@@ -125,9 +251,13 @@ export default function StatusPage() {
             <div className="flex flex-col gap-2 min-w-0">
               {alerts.map((alert, i) => (
                 <div key={i}>
-                  <p className="text-sm font-medium text-destructive">{alert.message}</p>
+                  <p className="text-sm font-medium text-destructive">
+                    {alert.message}
+                  </p>
                   {alert.detail && (
-                    <p className="text-xs text-destructive/70 mt-0.5">{alert.detail}</p>
+                    <p className="text-xs text-destructive/70 mt-0.5">
+                      {alert.detail}
+                    </p>
                   )}
                 </div>
               ))}
@@ -136,32 +266,163 @@ export default function StatusPage() {
         </div>
       )}
 
-      <div className="grid gap-4 sm:grid-cols-3">
+      <Grid className="border-b md:!grid-cols-2 lg:!grid-cols-4">
         {items.map(({ icon: Icon, label, value, badgeText, badgeVariant }) => (
-          <Card key={label} className="min-w-0 overflow-hidden">
-            <CardHeader className="flex flex-row items-center justify-between pb-2">
+          <Cell
+            key={label}
+            className="flex min-w-0 flex-col gap-2 overflow-hidden"
+          >
+            <div className="flex items-center justify-between">
               <CardTitle className="text-sm font-medium">{label}</CardTitle>
               <Icon className="h-4 w-4 text-muted-foreground" />
-            </CardHeader>
+            </div>
 
-            <CardContent>
-              <div className="text-2xl font-bold font-display truncate" title={value}>{value}</div>
+            <div
+              className="truncate text-2xl font-bold font-mondwest"
+              title={value}
+            >
+              {value}
+            </div>
 
-              {badgeText && (
-                <Badge variant={badgeVariant} className="mt-2">
-                  {badgeVariant === "success" && (
-                    <span className="mr-1 inline-block h-1.5 w-1.5 animate-pulse rounded-full bg-current" />
-                  )}
-                  {badgeText}
-                </Badge>
-              )}
-            </CardContent>
-          </Card>
+            {badgeText && (
+              <Badge variant={badgeVariant} className="self-start">
+                {badgeVariant === "success" && (
+                  <span className="mr-1 inline-block h-1.5 w-1.5 animate-pulse rounded-full bg-current" />
+                )}
+                {badgeText}
+              </Badge>
+            )}
+          </Cell>
         ))}
-      </div>
+
+        <Cell className="flex min-w-0 flex-col gap-2 overflow-hidden">
+          <div className="flex items-center justify-between">
+            <CardTitle className="text-sm font-medium">
+              {t.status.actions}
+            </CardTitle>
+            <Wrench className="h-4 w-4 text-muted-foreground" />
+          </div>
+
+          <div className="flex gap-4">
+            <Button
+              variant="outline"
+              size="sm"
+              onClick={() => runAction("restart")}
+              disabled={
+                pendingAction !== null ||
+                (activeAction !== null && actionStatus?.running !== false)
+              }
+              className="flex-1 min-w-0"
+            >
+              <RotateCw
+                className={cn(
+                  "h-3.5 w-3.5",
+                  (pendingAction === "restart" ||
+                    (activeAction === "restart" && actionStatus?.running)) &&
+                    "animate-spin",
+                )}
+              />
+
+              {activeAction === "restart" && actionStatus?.running
+                ? t.status.restartingGateway
+                : t.status.restartGateway}
+            </Button>
+
+            <Button
+              variant="outline"
+              size="sm"
+              onClick={() => runAction("update")}
+              disabled={
+                pendingAction !== null ||
+                (activeAction !== null && actionStatus?.running !== false)
+              }
+              className="flex-1 min-w-0"
+            >
+              <Download
+                className={cn(
+                  "h-3.5 w-3.5",
+                  (pendingAction === "update" ||
+                    (activeAction === "update" && actionStatus?.running)) &&
+                    "animate-pulse",
+                )}
+              />
+
+              {activeAction === "update" && actionStatus?.running
+                ? t.status.updatingHermes
+                : t.status.updateHermes}
+            </Button>
+          </div>
+        </Cell>
+      </Grid>
+
+      {activeAction && (
+        <div className="border border-border bg-background-base/50">
+          <div className="flex items-center justify-between gap-2 border-b border-border px-3 py-2">
+            <div className="flex items-center gap-2 min-w-0">
+              {actionStatus?.running ? (
+                <Loader2 className="h-3.5 w-3.5 shrink-0 animate-spin text-warning" />
+              ) : actionStatus?.exit_code === 0 ? (
+                <CheckCircle2 className="h-3.5 w-3.5 shrink-0 text-success" />
+              ) : actionStatus !== null ? (
+                <AlertTriangle className="h-3.5 w-3.5 shrink-0 text-destructive" />
+              ) : (
+                <Loader2 className="h-3.5 w-3.5 shrink-0 animate-spin text-muted-foreground" />
+              )}
+
+              <span className="text-xs font-mondwest tracking-[0.12em] truncate">
+                {activeAction === "restart"
+                  ? t.status.restartGateway
+                  : t.status.updateHermes}
+              </span>
+
+              <Badge
+                variant={
+                  actionStatus?.running
+                    ? "warning"
+                    : actionStatus?.exit_code === 0
+                      ? "success"
+                      : actionStatus
+                        ? "destructive"
+                        : "outline"
+                }
+                className="text-[10px] shrink-0"
+              >
+                {actionStatus?.running
+                  ? t.status.running
+                  : actionStatus?.exit_code === 0
+                    ? t.status.actionFinished
+                    : actionStatus
+                      ? `${t.status.actionFailed} (${actionStatus.exit_code ?? "?"})`
+                      : t.common.loading}
+              </Badge>
+            </div>
+
+            <button
+              type="button"
+              onClick={dismissLog}
+              className="shrink-0 opacity-60 hover:opacity-100 cursor-pointer"
+              aria-label={t.common.close}
+            >
+              <X className="h-3.5 w-3.5" />
+            </button>
+          </div>
+
+          <pre
+            ref={logScrollRef}
+            className="max-h-72 overflow-auto px-3 py-2 font-mono-ui text-[11px] leading-relaxed whitespace-pre-wrap break-all"
+          >
+            {actionStatus?.lines && actionStatus.lines.length > 0
+              ? actionStatus.lines.join("\n")
+              : t.status.waitingForOutput}
+          </pre>
+        </div>
+      )}
 
       {platforms.length > 0 && (
-        <PlatformsCard platforms={platforms} platformStateBadge={PLATFORM_STATE_BADGE} />
+        <PlatformsCard
+          platforms={platforms}
+          platformStateBadge={PLATFORM_STATE_BADGE}
+        />
       )}
 
       {activeSessions.length > 0 && (
@@ -169,7 +430,9 @@ export default function StatusPage() {
           <CardHeader>
             <div className="flex items-center gap-2">
               <Activity className="h-5 w-5 text-success" />
-              <CardTitle className="text-base">{t.status.activeSessions}</CardTitle>
+              <CardTitle className="text-base">
+                {t.status.activeSessions}
+              </CardTitle>
             </div>
           </CardHeader>
 
@@ -181,7 +444,9 @@ export default function StatusPage() {
               >
                 <div className="flex flex-col gap-1 min-w-0 w-full">
                   <div className="flex items-center gap-2">
-                    <span className="font-medium text-sm truncate">{s.title ?? t.common.untitled}</span>
+                    <span className="font-medium text-sm truncate">
+                      {s.title ?? t.common.untitled}
+                    </span>
 
                     <Badge variant="success" className="text-[10px] shrink-0">
                       <span className="mr-1 inline-block h-1.5 w-1.5 animate-pulse rounded-full bg-current" />
@@ -190,7 +455,11 @@ export default function StatusPage() {
                   </div>
 
                   <span className="text-xs text-muted-foreground truncate">
-                    <span className="font-mono-ui">{(s.model ?? t.common.unknown).split("/").pop()}</span> · {s.message_count} {t.common.msgs} · {timeAgo(s.last_active)}
+                    <span className="font-mono-ui">
+                      {(s.model ?? t.common.unknown).split("/").pop()}
+                    </span>{" "}
+                    · {s.message_count} {t.common.msgs} ·{" "}
+                    {timeAgo(s.last_active)}
                   </span>
                 </div>
               </div>
@@ -204,7 +473,9 @@ export default function StatusPage() {
           <CardHeader>
             <div className="flex items-center gap-2">
               <Clock className="h-5 w-5 text-muted-foreground" />
-              <CardTitle className="text-base">{t.status.recentSessions}</CardTitle>
+              <CardTitle className="text-base">
+                {t.status.recentSessions}
+              </CardTitle>
             </div>
           </CardHeader>
 
@@ -215,10 +486,16 @@ export default function StatusPage() {
                 className="flex flex-col sm:flex-row sm:items-center sm:justify-between gap-2 border border-border p-3 w-full"
               >
                 <div className="flex flex-col gap-1 min-w-0 w-full">
-                  <span className="font-medium text-sm truncate">{s.title ?? t.common.untitled}</span>
+                  <span className="font-medium text-sm truncate">
+                    {s.title ?? t.common.untitled}
+                  </span>
 
                   <span className="text-xs text-muted-foreground truncate">
-                    <span className="font-mono-ui">{(s.model ?? t.common.unknown).split("/").pop()}</span> · {s.message_count} {t.common.msgs} · {timeAgo(s.last_active)}
+                    <span className="font-mono-ui">
+                      {(s.model ?? t.common.unknown).split("/").pop()}
+                    </span>{" "}
+                    · {s.message_count} {t.common.msgs} ·{" "}
+                    {timeAgo(s.last_active)}
                   </span>
 
                   {s.preview && (
@@ -228,7 +505,10 @@ export default function StatusPage() {
                   )}
                 </div>
 
-                <Badge variant="outline" className="text-[10px] shrink-0 self-start sm:self-center">
+                <Badge
+                  variant="outline"
+                  className="text-[10px] shrink-0 self-start sm:self-center"
+                >
                   <Database className="mr-1 h-3 w-3" />
                   {s.source ?? "local"}
                 </Badge>
@@ -249,7 +529,9 @@ function PlatformsCard({ platforms, platformStateBadge }: PlatformsCardProps) {
       <CardHeader>
         <div className="flex items-center gap-2">
           <Radio className="h-5 w-5 text-muted-foreground" />
-          <CardTitle className="text-base">{t.status.connectedPlatforms}</CardTitle>
+          <CardTitle className="text-base">
+            {t.status.connectedPlatforms}
+          </CardTitle>
         </div>
       </CardHeader>
 
@@ -259,7 +541,12 @@ function PlatformsCard({ platforms, platformStateBadge }: PlatformsCardProps) {
             variant: "outline" as const,
             label: info.state,
           };
-          const IconComponent = info.state === "connected" ? Wifi : info.state === "fatal" ? AlertTriangle : WifiOff;
+          const IconComponent =
+            info.state === "connected"
+              ? Wifi
+              : info.state === "fatal"
+                ? AlertTriangle
+                : WifiOff;
 
           return (
             <div
@@ -267,19 +554,25 @@ function PlatformsCard({ platforms, platformStateBadge }: PlatformsCardProps) {
               className="flex flex-col sm:flex-row sm:items-center sm:justify-between gap-2 border border-border p-3 w-full"
             >
               <div className="flex items-center gap-3 min-w-0 w-full">
-                <IconComponent className={`h-4 w-4 shrink-0 ${
-                  info.state === "connected"
-                    ? "text-success"
-                    : info.state === "fatal"
-                      ? "text-destructive"
-                      : "text-warning"
-                }`} />
+                <IconComponent
+                  className={`h-4 w-4 shrink-0 ${
+                    info.state === "connected"
+                      ? "text-success"
+                      : info.state === "fatal"
+                        ? "text-destructive"
+                        : "text-warning"
+                  }`}
+                />
 
                 <div className="flex flex-col gap-0.5 min-w-0">
-                  <span className="text-sm font-medium capitalize truncate">{name}</span>
+                  <span className="text-sm font-medium capitalize truncate">
+                    {name}
+                  </span>
 
                   {info.error_message && (
-                    <span className="text-xs text-destructive">{info.error_message}</span>
+                    <span className="text-xs text-destructive">
+                      {info.error_message}
+                    </span>
                   )}
 
                   {info.updated_at && (
@@ -290,7 +583,10 @@ function PlatformsCard({ platforms, platformStateBadge }: PlatformsCardProps) {
                 </div>
               </div>
 
-              <Badge variant={display.variant} className="shrink-0 self-start sm:self-center">
+              <Badge
+                variant={display.variant}
+                className="shrink-0 self-start sm:self-center"
+              >
                 {display.variant === "success" && (
                   <span className="mr-1 inline-block h-1.5 w-1.5 animate-pulse rounded-full bg-current" />
                 )}
@@ -304,7 +600,15 @@ function PlatformsCard({ platforms, platformStateBadge }: PlatformsCardProps) {
   );
 }
 
+interface ToastState {
+  message: string;
+  type: "success" | "error";
+}
+
 interface PlatformsCardProps {
   platforms: [string, PlatformStatus][];
-  platformStateBadge: Record<string, { variant: "success" | "warning" | "destructive"; label: string }>;
+  platformStateBadge: Record<
+    string,
+    { variant: "success" | "warning" | "destructive"; label: string }
+  >;
 }
diff --git a/web/src/plugins/registry.ts b/web/src/plugins/registry.ts
index f671f24445..eb4ea58e8c 100644
--- a/web/src/plugins/registry.ts
+++ b/web/src/plugins/registry.ts
@@ -28,7 +28,6 @@ import { Select, SelectOption } from "@/components/ui/select";
 import { Separator } from "@/components/ui/separator";
 import { Tabs, TabsList, TabsTrigger } from "@/components/ui/tabs";
 import { useI18n } from "@/i18n";
-import { useTheme } from "@/themes";
 
 // ---------------------------------------------------------------------------
 // Plugin registry — plugins call register() to add their component.
@@ -126,6 +125,5 @@ export function exposePluginSDK() {
 
     // Hooks
     useI18n,
-    useTheme,
   };
 }
diff --git a/web/src/themes/context.tsx b/web/src/themes/context.tsx
index cdceb1532f..4bc50f9b33 100644
--- a/web/src/themes/context.tsx
+++ b/web/src/themes/context.tsx
@@ -3,167 +3,122 @@ import {
   useCallback,
   useContext,
   useEffect,
+  useMemo,
   useState,
   type ReactNode,
 } from "react";
-import type { DashboardTheme, ThemeColors, ThemeOverlay } from "./types";
 import { BUILTIN_THEMES, defaultTheme } from "./presets";
+import type { DashboardTheme, ThemeLayer, ThemePalette } from "./types";
 import { api } from "@/lib/api";
 
-// ---------------------------------------------------------------------------
-// Helpers
-// ---------------------------------------------------------------------------
+/** LocalStorage key — pre-applied before the React tree mounts to avoid
+ *  a visible flash of the default palette on theme-overridden installs. */
+const STORAGE_KEY = "hermes-dashboard-theme";
 
-/** Apply a theme's color overrides to `document.documentElement`. */
-function applyColors(colors: ThemeColors) {
+/** Turn a ThemeLayer into the two CSS expressions the DS consumes:
+ *  `--<name>` (color-mix'd with alpha) and `--<name>-base` (opaque hex). */
+function layerVars(name: "background" | "midground" | "foreground", layer: ThemeLayer) {
+  const pct = Math.round(layer.alpha * 100);
+  return {
+    [`--${name}`]: `color-mix(in srgb, ${layer.hex} ${pct}%, transparent)`,
+    [`--${name}-base`]: layer.hex,
+    [`--${name}-alpha`]: String(layer.alpha),
+  };
+}
+
+/** Write a theme's palette to `document.documentElement` as inline styles.
+ *  Inline styles beat the `:root { }` rule in index.css, so this cascades
+ *  into every shadcn-compat token defined over the DS triplet. */
+function applyPalette(palette: ThemePalette) {
   const root = document.documentElement;
-  for (const [key, value] of Object.entries(colors)) {
-    root.style.setProperty(`--color-${key}`, value);
+  const vars = {
+    ...layerVars("background", palette.background),
+    ...layerVars("midground", palette.midground),
+    ...layerVars("foreground", palette.foreground),
+    "--warm-glow": palette.warmGlow,
+    "--noise-opacity-mul": String(palette.noiseOpacity),
+  };
+  for (const [k, v] of Object.entries(vars)) {
+    root.style.setProperty(k, v);
   }
 }
 
-/** Apply overlay overrides (noise + warm-glow). */
-function applyOverlay(overlay: ThemeOverlay | undefined) {
-  const noiseEl = document.querySelector<HTMLElement>(".noise-overlay");
-  const glowEl = document.querySelector<HTMLElement>(".warm-glow");
-
-  if (noiseEl) {
-    noiseEl.style.opacity = String(overlay?.noiseOpacity ?? 0.10);
-    noiseEl.style.mixBlendMode = overlay?.noiseBlendMode ?? "color-dodge";
-  }
-  if (glowEl) {
-    glowEl.style.opacity = String(overlay?.warmGlowOpacity ?? 0.22);
-    if (overlay?.warmGlowColor) {
-      glowEl.style.background = `radial-gradient(ellipse at 0% 0%, ${overlay.warmGlowColor} 0%, rgba(0,0,0,0) 60%)`;
-    }
-  }
-}
-
-/** Remove all inline overrides — reverts to stylesheet defaults. */
-function clearOverrides() {
-  const root = document.documentElement;
-  // Clear color overrides
-  for (const key of Object.keys(defaultTheme.colors)) {
-    root.style.removeProperty(`--color-${key}`);
-  }
-  // Clear overlay overrides
-  const noiseEl = document.querySelector<HTMLElement>(".noise-overlay");
-  const glowEl = document.querySelector<HTMLElement>(".warm-glow");
-  if (noiseEl) {
-    noiseEl.style.opacity = "";
-    noiseEl.style.mixBlendMode = "";
-  }
-  if (glowEl) {
-    glowEl.style.opacity = "";
-    glowEl.style.background = "";
-  }
-}
-
-function applyTheme(theme: DashboardTheme) {
-  if (theme.name === "default") {
-    clearOverrides();
-  } else {
-    applyColors(theme.colors);
-    applyOverlay(theme.overlay);
-  }
-}
-
-// ---------------------------------------------------------------------------
-// Context
-// ---------------------------------------------------------------------------
-
-interface ThemeContextValue {
-  /** Currently active theme name. */
-  themeName: string;
-  /** Currently active theme object. */
-  theme: DashboardTheme;
-  /** Available theme names (built-in + any server-provided custom themes). */
-  availableThemes: Array<{ name: string; label: string; description: string }>;
-  /** Switch theme — applies CSS immediately and persists to config.yaml. */
-  setTheme: (name: string) => void;
-  /** True while initial theme is loading from server. */
-  loading: boolean;
-}
-
-const ThemeContext = createContext<ThemeContextValue>({
-  themeName: "default",
-  theme: defaultTheme,
-  availableThemes: Object.values(BUILTIN_THEMES).map((t) => ({
-    name: t.name,
-    label: t.label,
-    description: t.description,
-  })),
-  setTheme: () => {},
-  loading: true,
-});
-
-// ---------------------------------------------------------------------------
-// Provider
-// ---------------------------------------------------------------------------
-
 export function ThemeProvider({ children }: { children: ReactNode }) {
-  const [themeName, setThemeName] = useState("default");
-  const [availableThemes, setAvailableThemes] = useState(
+  const [themeName, setThemeName] = useState<string>(() => {
+    if (typeof window === "undefined") return "default";
+    return window.localStorage.getItem(STORAGE_KEY) ?? "default";
+  });
+  const [availableThemes, setAvailableThemes] = useState<
+    Array<{ description: string; label: string; name: string }>
+  >(() =>
     Object.values(BUILTIN_THEMES).map((t) => ({
       name: t.name,
       label: t.label,
       description: t.description,
     })),
   );
-  const [loading, setLoading] = useState(true);
 
-  // Fetch active theme + available list from server on mount.
   useEffect(() => {
+    const t = BUILTIN_THEMES[themeName] ?? defaultTheme;
+    applyPalette(t.palette);
+  }, [themeName]);
+
+  useEffect(() => {
+    let cancelled = false;
     api
       .getThemes()
       .then((resp) => {
-        if (resp.themes?.length) {
-          setAvailableThemes(resp.themes);
-        }
-        if (resp.active && resp.active !== "default") {
+        if (cancelled) return;
+        if (resp.themes?.length) setAvailableThemes(resp.themes);
+        if (resp.active && resp.active !== themeName) {
           setThemeName(resp.active);
-          const t = BUILTIN_THEMES[resp.active];
-          if (t) applyTheme(t);
+          window.localStorage.setItem(STORAGE_KEY, resp.active);
         }
       })
-      .catch(() => {
-        // Server might not support theme API yet — stay on default.
-      })
-      .finally(() => setLoading(false));
+      .catch(() => {});
+    return () => {
+      cancelled = true;
+    };
   }, []);
 
-  const resolvedTheme = BUILTIN_THEMES[themeName] ?? defaultTheme;
+  const setTheme = useCallback((name: string) => {
+    const next = BUILTIN_THEMES[name] ? name : "default";
+    setThemeName(next);
+    window.localStorage.setItem(STORAGE_KEY, next);
+    api.setTheme(next).catch(() => {});
+  }, []);
 
-  const setTheme = useCallback(
-    (name: string) => {
-      const t = BUILTIN_THEMES[name] ?? defaultTheme;
-      setThemeName(t.name);
-      applyTheme(t);
-      // Persist to config.yaml — fire and forget.
-      api.setTheme(t.name).catch(() => {});
-    },
-    [],
+  const value = useMemo<ThemeContextValue>(
+    () => ({
+      theme: BUILTIN_THEMES[themeName] ?? defaultTheme,
+      themeName,
+      availableThemes,
+      setTheme,
+    }),
+    [themeName, availableThemes, setTheme],
   );
 
-  return (
-    <ThemeContext.Provider
-      value={{
-        themeName,
-        theme: resolvedTheme,
-        availableThemes,
-        setTheme,
-        loading,
-      }}
-    >
-      {children}
-    </ThemeContext.Provider>
-  );
+  return <ThemeContext.Provider value={value}>{children}</ThemeContext.Provider>;
 }
 
-// ---------------------------------------------------------------------------
-// Hook
-// ---------------------------------------------------------------------------
-
-export function useTheme() {
+export function useTheme(): ThemeContextValue {
   return useContext(ThemeContext);
 }
+
+const ThemeContext = createContext<ThemeContextValue>({
+  theme: defaultTheme,
+  themeName: "default",
+  availableThemes: Object.values(BUILTIN_THEMES).map((t) => ({
+    name: t.name,
+    label: t.label,
+    description: t.description,
+  })),
+  setTheme: () => {},
+});
+
+interface ThemeContextValue {
+  availableThemes: Array<{ description: string; label: string; name: string }>;
+  setTheme: (name: string) => void;
+  theme: DashboardTheme;
+  themeName: string;
+}
diff --git a/web/src/themes/index.ts b/web/src/themes/index.ts
index 2c3509e8e0..32f5813bfa 100644
--- a/web/src/themes/index.ts
+++ b/web/src/themes/index.ts
@@ -1,3 +1,3 @@
 export { ThemeProvider, useTheme } from "./context";
-export { BUILTIN_THEMES } from "./presets";
-export type { DashboardTheme, ThemeColors, ThemeOverlay, ThemeListResponse } from "./types";
+export { BUILTIN_THEMES, defaultTheme } from "./presets";
+export type { DashboardTheme, ThemeLayer, ThemeListResponse, ThemePalette } from "./types";
diff --git a/web/src/themes/presets.ts b/web/src/themes/presets.ts
index 65fcd4655f..20a7b47c22 100644
--- a/web/src/themes/presets.ts
+++ b/web/src/themes/presets.ts
@@ -3,43 +3,25 @@ import type { DashboardTheme } from "./types";
 /**
  * Built-in dashboard themes.
  *
- * The "default" theme matches the current index.css @theme values exactly,
- * so applying it is a no-op (CSS vars stay at their stylesheet defaults).
- * Other themes override only what they change.
+ * The `default` theme mirrors LENS_0 (canonical Hermes teal) exactly — the
+ * same triplet `src/index.css` declares on `:root`. Applying it should be a
+ * visual no-op; other themes override the triplet + warm-glow and let the DS
+ * cascade handle every derived surface.
+ *
+ * Theme names must stay in sync with the backend's
+ * `_BUILTIN_DASHBOARD_THEMES` list in `hermes_cli/web_server.py`.
  */
 
 export const defaultTheme: DashboardTheme = {
   name: "default",
   label: "Hermes Teal",
   description: "Classic dark teal — the canonical Hermes look",
-  colors: {
-    background: "#041C1C",
-    foreground: "#ffe6cb",
-    card: "#062424",
-    "card-foreground": "#ffe6cb",
-    primary: "#ffe6cb",
-    "primary-foreground": "#041C1C",
-    secondary: "#0a2e2e",
-    "secondary-foreground": "#ffe6cb",
-    muted: "#083030",
-    "muted-foreground": "#8aaa9a",
-    accent: "#0c3838",
-    "accent-foreground": "#ffe6cb",
-    destructive: "#fb2c36",
-    "destructive-foreground": "#fff",
-    success: "#4ade80",
-    warning: "#ffbd38",
-    border: "color-mix(in srgb, #ffe6cb 15%, transparent)",
-    input: "color-mix(in srgb, #ffe6cb 15%, transparent)",
-    ring: "#ffe6cb",
-    popover: "#062424",
-    "popover-foreground": "#ffe6cb",
-  },
-  overlay: {
-    noiseOpacity: 0.10,
-    noiseBlendMode: "color-dodge",
-    warmGlowOpacity: 0.22,
-    warmGlowColor: "rgba(255,189,56,0.35)",
+  palette: {
+    background: { hex: "#041c1c", alpha: 1 },
+    midground: { hex: "#ffe6cb", alpha: 1 },
+    foreground: { hex: "#ffffff", alpha: 0 },
+    warmGlow: "rgba(255, 189, 56, 0.35)",
+    noiseOpacity: 1,
   },
 };
 
@@ -47,34 +29,12 @@ export const midnightTheme: DashboardTheme = {
   name: "midnight",
   label: "Midnight",
   description: "Deep blue-violet with cool accents",
-  colors: {
-    background: "#0a0a1a",
-    foreground: "#e0e0f0",
-    card: "#10102a",
-    "card-foreground": "#e0e0f0",
-    primary: "#a78bfa",
-    "primary-foreground": "#0a0a1a",
-    secondary: "#151530",
-    "secondary-foreground": "#e0e0f0",
-    muted: "#1a1a3a",
-    "muted-foreground": "#8888bb",
-    accent: "#1e1e44",
-    "accent-foreground": "#e0e0f0",
-    destructive: "#f43f5e",
-    "destructive-foreground": "#fff",
-    success: "#34d399",
-    warning: "#fbbf24",
-    border: "color-mix(in srgb, #a78bfa 15%, transparent)",
-    input: "color-mix(in srgb, #a78bfa 15%, transparent)",
-    ring: "#a78bfa",
-    popover: "#10102a",
-    "popover-foreground": "#e0e0f0",
-  },
-  overlay: {
-    noiseOpacity: 0.08,
-    noiseBlendMode: "color-dodge",
-    warmGlowOpacity: 0.15,
-    warmGlowColor: "rgba(120,80,220,0.3)",
+  palette: {
+    background: { hex: "#0a0a1f", alpha: 1 },
+    midground: { hex: "#d4c8ff", alpha: 1 },
+    foreground: { hex: "#ffffff", alpha: 0 },
+    warmGlow: "rgba(167, 139, 250, 0.32)",
+    noiseOpacity: 0.8,
   },
 };
 
@@ -82,34 +42,12 @@ export const emberTheme: DashboardTheme = {
   name: "ember",
   label: "Ember",
   description: "Warm crimson and bronze — forge vibes",
-  colors: {
-    background: "#1a0a0a",
-    foreground: "#fde8d0",
-    card: "#241010",
-    "card-foreground": "#fde8d0",
-    primary: "#f97316",
-    "primary-foreground": "#1a0a0a",
-    secondary: "#2a1515",
-    "secondary-foreground": "#fde8d0",
-    muted: "#301818",
-    "muted-foreground": "#b08878",
-    accent: "#381e1e",
-    "accent-foreground": "#fde8d0",
-    destructive: "#ef4444",
-    "destructive-foreground": "#fff",
-    success: "#4ade80",
-    warning: "#fbbf24",
-    border: "color-mix(in srgb, #f97316 15%, transparent)",
-    input: "color-mix(in srgb, #f97316 15%, transparent)",
-    ring: "#f97316",
-    popover: "#241010",
-    "popover-foreground": "#fde8d0",
-  },
-  overlay: {
-    noiseOpacity: 0.10,
-    noiseBlendMode: "color-dodge",
-    warmGlowOpacity: 0.25,
-    warmGlowColor: "rgba(249,115,22,0.3)",
+  palette: {
+    background: { hex: "#1a0a06", alpha: 1 },
+    midground: { hex: "#ffd8b0", alpha: 1 },
+    foreground: { hex: "#ffffff", alpha: 0 },
+    warmGlow: "rgba(249, 115, 22, 0.38)",
+    noiseOpacity: 1,
   },
 };
 
@@ -117,34 +55,12 @@ export const monoTheme: DashboardTheme = {
   name: "mono",
   label: "Mono",
   description: "Clean grayscale — minimal and focused",
-  colors: {
-    background: "#111111",
-    foreground: "#e0e0e0",
-    card: "#1a1a1a",
-    "card-foreground": "#e0e0e0",
-    primary: "#e0e0e0",
-    "primary-foreground": "#111111",
-    secondary: "#1e1e1e",
-    "secondary-foreground": "#e0e0e0",
-    muted: "#222222",
-    "muted-foreground": "#888888",
-    accent: "#2a2a2a",
-    "accent-foreground": "#e0e0e0",
-    destructive: "#ef4444",
-    "destructive-foreground": "#fff",
-    success: "#4ade80",
-    warning: "#fbbf24",
-    border: "color-mix(in srgb, #e0e0e0 12%, transparent)",
-    input: "color-mix(in srgb, #e0e0e0 12%, transparent)",
-    ring: "#e0e0e0",
-    popover: "#1a1a1a",
-    "popover-foreground": "#e0e0e0",
-  },
-  overlay: {
-    noiseOpacity: 0.06,
-    noiseBlendMode: "color-dodge",
-    warmGlowOpacity: 0.0,
-    warmGlowColor: "rgba(255,255,255,0)",
+  palette: {
+    background: { hex: "#0e0e0e", alpha: 1 },
+    midground: { hex: "#eaeaea", alpha: 1 },
+    foreground: { hex: "#ffffff", alpha: 0 },
+    warmGlow: "rgba(255, 255, 255, 0.1)",
+    noiseOpacity: 0.6,
   },
 };
 
@@ -152,34 +68,12 @@ export const cyberpunkTheme: DashboardTheme = {
   name: "cyberpunk",
   label: "Cyberpunk",
   description: "Neon green on black — matrix terminal",
-  colors: {
-    background: "#050505",
-    foreground: "#00ff88",
-    card: "#0a0a0a",
-    "card-foreground": "#00ff88",
-    primary: "#00ff88",
-    "primary-foreground": "#050505",
-    secondary: "#0e0e0e",
-    "secondary-foreground": "#00ff88",
-    muted: "#121212",
-    "muted-foreground": "#00aa55",
-    accent: "#161616",
-    "accent-foreground": "#00ff88",
-    destructive: "#ff0055",
-    "destructive-foreground": "#fff",
-    success: "#00ff88",
-    warning: "#ffff00",
-    border: "color-mix(in srgb, #00ff88 12%, transparent)",
-    input: "color-mix(in srgb, #00ff88 12%, transparent)",
-    ring: "#00ff88",
-    popover: "#0a0a0a",
-    "popover-foreground": "#00ff88",
-  },
-  overlay: {
-    noiseOpacity: 0.12,
-    noiseBlendMode: "color-dodge",
-    warmGlowOpacity: 0.10,
-    warmGlowColor: "rgba(0,255,136,0.15)",
+  palette: {
+    background: { hex: "#040608", alpha: 1 },
+    midground: { hex: "#9bffcf", alpha: 1 },
+    foreground: { hex: "#ffffff", alpha: 0 },
+    warmGlow: "rgba(0, 255, 136, 0.22)",
+    noiseOpacity: 1.2,
   },
 };
 
@@ -187,38 +81,15 @@ export const roseTheme: DashboardTheme = {
   name: "rose",
   label: "Rosé",
   description: "Soft pink and warm ivory — easy on the eyes",
-  colors: {
-    background: "#1a1015",
-    foreground: "#f5e6e0",
-    card: "#221820",
-    "card-foreground": "#f5e6e0",
-    primary: "#f9a8d4",
-    "primary-foreground": "#1a1015",
-    secondary: "#281e28",
-    "secondary-foreground": "#f5e6e0",
-    muted: "#2e2230",
-    "muted-foreground": "#b08898",
-    accent: "#352838",
-    "accent-foreground": "#f5e6e0",
-    destructive: "#fb2c36",
-    "destructive-foreground": "#fff",
-    success: "#4ade80",
-    warning: "#fbbf24",
-    border: "color-mix(in srgb, #f9a8d4 14%, transparent)",
-    input: "color-mix(in srgb, #f9a8d4 14%, transparent)",
-    ring: "#f9a8d4",
-    popover: "#221820",
-    "popover-foreground": "#f5e6e0",
-  },
-  overlay: {
-    noiseOpacity: 0.08,
-    noiseBlendMode: "color-dodge",
-    warmGlowOpacity: 0.18,
-    warmGlowColor: "rgba(249,168,212,0.2)",
+  palette: {
+    background: { hex: "#1a0f15", alpha: 1 },
+    midground: { hex: "#ffd4e1", alpha: 1 },
+    foreground: { hex: "#ffffff", alpha: 0 },
+    warmGlow: "rgba(249, 168, 212, 0.3)",
+    noiseOpacity: 0.9,
   },
 };
 
-/** All built-in themes, keyed by name. */
 export const BUILTIN_THEMES: Record<string, DashboardTheme> = {
   default: defaultTheme,
   midnight: midnightTheme,
diff --git a/web/src/themes/types.ts b/web/src/themes/types.ts
index b6cd371a5c..4a423aeeee 100644
--- a/web/src/themes/types.ts
+++ b/web/src/themes/types.ts
@@ -1,44 +1,44 @@
-/** Dashboard theme definition. Maps 1:1 to CSS custom properties in index.css. */
-export interface ThemeColors {
-  background: string;
-  foreground: string;
-  card: string;
-  "card-foreground": string;
-  primary: string;
-  "primary-foreground": string;
-  secondary: string;
-  "secondary-foreground": string;
-  muted: string;
-  "muted-foreground": string;
-  accent: string;
-  "accent-foreground": string;
-  destructive: string;
-  "destructive-foreground": string;
-  success: string;
-  warning: string;
-  border: string;
-  input: string;
-  ring: string;
-  popover: string;
-  "popover-foreground": string;
+/**
+ * Dashboard theme model.
+ *
+ * Unlike the pre-DS implementation (which overrode 21 shadcn tokens directly),
+ * themes are now expressed in the Nous DS's own 3-triplet vocabulary —
+ * `background`, `midground`, `foreground` — plus a warm-glow tint for the
+ * vignette in <Backdrop />. All downstream shadcn-compat tokens
+ * (`--color-card`, `--color-muted-foreground`, `--color-border`, etc.) are
+ * defined in `src/index.css` as `color-mix()` expressions over the triplets,
+ * so overriding the triplets at runtime cascades to every surface.
+ */
+
+/** A color layer: hex base + alpha (0–1). */
+export interface ThemeLayer {
+  alpha: number;
+  hex: string;
 }
 
-export interface ThemeOverlay {
-  noiseOpacity?: number;
-  noiseBlendMode?: string;
-  warmGlowOpacity?: number;
-  warmGlowColor?: string;
+export interface ThemePalette {
+  /** Deepest canvas color (typically near-black). */
+  background: ThemeLayer;
+  /** Primary text + accent. Most UI chrome reads this. */
+  midground: ThemeLayer;
+  /** Top-layer highlight. In LENS_0 this is white @ alpha 0 — invisible by
+   *  default but still drives `--color-ring`-style accents. */
+  foreground: ThemeLayer;
+  /** Warm vignette color for <Backdrop />, as an rgba() string. */
+  warmGlow: string;
+  /** Scalar multiplier (0–1.2) on the noise overlay. Lower for softer themes
+   *  like Mono and Rosé, higher for grittier themes like Cyberpunk. */
+  noiseOpacity: number;
 }
 
 export interface DashboardTheme {
-  name: string;
-  label: string;
   description: string;
-  colors: ThemeColors;
-  overlay?: ThemeOverlay;
+  label: string;
+  name: string;
+  palette: ThemePalette;
 }
 
 export interface ThemeListResponse {
-  themes: Array<{ name: string; label: string; description: string }>;
   active: string;
+  themes: Array<{ description: string; label: string; name: string }>;
 }
diff --git a/web/vite.config.ts b/web/vite.config.ts
index 0ed9f1ccb7..2b7c864560 100644
--- a/web/vite.config.ts
+++ b/web/vite.config.ts
@@ -1,10 +1,58 @@
-import { defineConfig } from "vite";
+import { defineConfig, type Plugin } from "vite";
 import react from "@vitejs/plugin-react";
 import tailwindcss from "@tailwindcss/vite";
 import path from "path";
 
+const BACKEND = process.env.HERMES_DASHBOARD_URL ?? "http://127.0.0.1:9119";
+
+/**
+ * In production the Python `hermes dashboard` server injects a one-shot
+ * session token into `index.html` (see `hermes_cli/web_server.py`). The
+ * Vite dev server serves its own `index.html`, so unless we forward that
+ * token, every protected `/api/*` call 401s.
+ *
+ * This plugin fetches the running dashboard's `index.html` on each dev page
+ * load, scrapes the `window.__HERMES_SESSION_TOKEN__` assignment, and
+ * re-injects it into the dev HTML. No-op in production builds.
+ */
+function hermesDevToken(): Plugin {
+  const TOKEN_RE = /window\.__HERMES_SESSION_TOKEN__\s*=\s*"([^"]+)"/;
+
+  return {
+    name: "hermes:dev-session-token",
+    apply: "serve",
+    async transformIndexHtml() {
+      try {
+        const res = await fetch(BACKEND, { headers: { accept: "text/html" } });
+        const html = await res.text();
+        const match = html.match(TOKEN_RE);
+        if (!match) {
+          console.warn(
+            `[hermes] Could not find session token in ${BACKEND} — ` +
+              `is \`hermes dashboard\` running? /api calls will 401.`,
+          );
+          return;
+        }
+        return [
+          {
+            tag: "script",
+            injectTo: "head",
+            children: `window.__HERMES_SESSION_TOKEN__="${match[1]}";`,
+          },
+        ];
+      } catch (err) {
+        console.warn(
+          `[hermes] Dashboard at ${BACKEND} unreachable — ` +
+            `start it with \`hermes dashboard\` or set HERMES_DASHBOARD_URL. ` +
+            `(${(err as Error).message})`,
+        );
+      }
+    },
+  };
+}
+
 export default defineConfig({
-  plugins: [react(), tailwindcss()],
+  plugins: [react(), tailwindcss(), hermesDevToken()],
   resolve: {
     alias: {
       "@": path.resolve(__dirname, "./src"),
@@ -16,7 +64,7 @@ export default defineConfig({
   },
   server: {
     proxy: {
-      "/api": "http://127.0.0.1:9119",
+      "/api": BACKEND,
     },
   },
 });
diff --git a/website/docs/developer-guide/creating-skills.md b/website/docs/developer-guide/creating-skills.md
index 9fdb7fd115..43f088a9a3 100644
--- a/website/docs/developer-guide/creating-skills.md
+++ b/website/docs/developer-guide/creating-skills.md
@@ -272,6 +272,45 @@ Put the most common workflow first. Edge cases and advanced usage go at the bott
 
 For XML/JSON parsing or complex logic, include helper scripts in `scripts/` — don't expect the LLM to write parsers inline every time.
 
+#### Referencing bundled scripts from SKILL.md
+
+When a skill is loaded, the activation message exposes the absolute skill directory as `[Skill directory: /abs/path]` and also substitutes two template tokens anywhere in the SKILL.md body:
+
+| Token | Replaced with |
+|---|---|
+| `${HERMES_SKILL_DIR}` | Absolute path to the skill's directory |
+| `${HERMES_SESSION_ID}` | The active session id (left in place if there is no session) |
+
+So a SKILL.md can tell the agent to run a bundled script directly with:
+
+```markdown
+To analyse the input, run:
+
+    node ${HERMES_SKILL_DIR}/scripts/analyse.js <input>
+```
+
+The agent sees the substituted absolute path and invokes the `terminal` tool with a ready-to-run command — no path math, no extra `skill_view` round-trip. Disable substitution globally with `skills.template_vars: false` in `config.yaml`.
+
+#### Inline shell snippets (opt-in)
+
+Skills can also embed inline shell snippets written as `` !`cmd` `` in the SKILL.md body. When enabled, each snippet's stdout is inlined into the message before the agent reads it, so skills can inject dynamic context:
+
+```markdown
+Current date: !`date -u +%Y-%m-%d`
+Git branch: !`git -C ${HERMES_SKILL_DIR} rev-parse --abbrev-ref HEAD`
+```
+
+This is **off by default** — any snippet in a SKILL.md runs on the host without approval, so only enable it for skill sources you trust:
+
+```yaml
+# config.yaml
+skills:
+  inline_shell: true
+  inline_shell_timeout: 10   # seconds per snippet
+```
+
+Snippets run with the skill directory as their working directory, and output is capped at 4000 characters. Failures (timeouts, non-zero exits) show up as a short `[inline-shell error: ...]` marker instead of breaking the whole skill.
+
 ### Test It
 
 Run the skill and verify the agent follows the instructions correctly:
diff --git a/website/docs/getting-started/installation.md b/website/docs/getting-started/installation.md
index a28b1256e6..219c1e7d55 100644
--- a/website/docs/getting-started/installation.md
+++ b/website/docs/getting-started/installation.md
@@ -6,7 +6,7 @@ description: "Install Hermes Agent on Linux, macOS, WSL2, or Android via Termux"
 
 # Installation
 
-Get Hermes Agent up and running in under two minutes with the one-line installer, or follow the manual steps for full control.
+Get Hermes Agent up and running in under two minutes with the one-line installer.
 
 ## Quick Install
 
@@ -82,202 +82,9 @@ If you use Nix (on NixOS, macOS, or Linux), there's a dedicated setup path with
 
 ---
 
-## Manual Installation
+## Manual / Developer Installation
 
-If you prefer full control over the installation process, follow these steps.
-
-### Step 1: Clone the Repository
-
-Clone with `--recurse-submodules` to pull the required submodules:
-
-```bash
-git clone --recurse-submodules https://github.com/NousResearch/hermes-agent.git
-cd hermes-agent
-```
-
-If you already cloned without `--recurse-submodules`:
-```bash
-git submodule update --init --recursive
-```
-
-### Step 2: Install uv & Create Virtual Environment
-
-```bash
-# Install uv (if not already installed)
-curl -LsSf https://astral.sh/uv/install.sh | sh
-
-# Create venv with Python 3.11 (uv downloads it if not present — no sudo needed)
-uv venv venv --python 3.11
-```
-
-:::tip
-You do **not** need to activate the venv to use `hermes`. The entry point has a hardcoded shebang pointing to the venv Python, so it works globally once symlinked.
-:::
-
-### Step 3: Install Python Dependencies
-
-```bash
-# Tell uv which venv to install into
-export VIRTUAL_ENV="$(pwd)/venv"
-
-# Install with all extras
-uv pip install -e ".[all]"
-```
-
-If you only want the core agent (no Telegram/Discord/cron support):
-```bash
-uv pip install -e "."
-```
-
-<details>
-<summary><strong>Optional extras breakdown</strong></summary>
-
-| Extra | What it adds | Install command |
-|-------|-------------|-----------------|
-| `all` | Everything below | `uv pip install -e ".[all]"` |
-| `messaging` | Telegram, Discord & Slack gateway | `uv pip install -e ".[messaging]"` |
-| `cron` | Cron expression parsing for scheduled tasks | `uv pip install -e ".[cron]"` |
-| `cli` | Terminal menu UI for setup wizard | `uv pip install -e ".[cli]"` |
-| `modal` | Modal cloud execution backend | `uv pip install -e ".[modal]"` |
-| `tts-premium` | ElevenLabs premium voices | `uv pip install -e ".[tts-premium]"` |
-| `voice` | CLI microphone input + audio playback | `uv pip install -e ".[voice]"` |
-| `pty` | PTY terminal support | `uv pip install -e ".[pty]"` |
-| `termux` | Tested Android / Termux bundle (`cron`, `cli`, `pty`, `mcp`, `honcho`, `acp`) | `python -m pip install -e ".[termux]" -c constraints-termux.txt` |
-| `honcho` | AI-native memory (Honcho integration) | `uv pip install -e ".[honcho]"` |
-| `mcp` | Model Context Protocol support | `uv pip install -e ".[mcp]"` |
-| `homeassistant` | Home Assistant integration | `uv pip install -e ".[homeassistant]"` |
-| `acp` | ACP editor integration support | `uv pip install -e ".[acp]"` |
-| `slack` | Slack messaging | `uv pip install -e ".[slack]"` |
-| `dev` | pytest & test utilities | `uv pip install -e ".[dev]"` |
-
-You can combine extras: `uv pip install -e ".[messaging,cron]"`
-
-:::tip Termux users
-`.[all]` is not currently available on Android because the `voice` extra pulls `faster-whisper`, which depends on `ctranslate2` wheels that are not published for Android. Use `.[termux]` for the tested mobile install path, then add individual extras only as needed.
-:::
-
-</details>
-
-### Step 4: Install Optional Submodules (if needed)
-
-```bash
-# RL training backend (optional)
-uv pip install -e "./tinker-atropos"
-```
-
-Both are optional — if you skip them, the corresponding toolsets simply won't be available.
-
-### Step 5: Install Node.js Dependencies (Optional)
-
-Only needed for **browser automation** (Browserbase-powered) and **WhatsApp bridge**:
-
-```bash
-npm install
-```
-
-### Step 6: Create the Configuration Directory
-
-```bash
-# Create the directory structure
-mkdir -p ~/.hermes/{cron,sessions,logs,memories,skills,pairing,hooks,image_cache,audio_cache,whatsapp/session}
-
-# Copy the example config file
-cp cli-config.yaml.example ~/.hermes/config.yaml
-
-# Create an empty .env file for API keys
-touch ~/.hermes/.env
-```
-
-### Step 7: Add Your API Keys
-
-Open `~/.hermes/.env` and add at minimum an LLM provider key:
-
-```bash
-# Required — at least one LLM provider:
-OPENROUTER_API_KEY=sk-or-v1-your-key-here
-
-# Optional — enable additional tools:
-FIRECRAWL_API_KEY=fc-your-key          # Web search & scraping (or self-host, see docs)
-FAL_KEY=your-fal-key                   # Image generation (FLUX)
-```
-
-Or set them via the CLI:
-```bash
-hermes config set OPENROUTER_API_KEY sk-or-v1-your-key-here
-```
-
-### Step 8: Add `hermes` to Your PATH
-
-```bash
-mkdir -p ~/.local/bin
-ln -sf "$(pwd)/venv/bin/hermes" ~/.local/bin/hermes
-```
-
-If `~/.local/bin` isn't on your PATH, add it to your shell config:
-
-```bash
-# Bash
-echo 'export PATH="$HOME/.local/bin:$PATH"' >> ~/.bashrc && source ~/.bashrc
-
-# Zsh
-echo 'export PATH="$HOME/.local/bin:$PATH"' >> ~/.zshrc && source ~/.zshrc
-
-# Fish
-fish_add_path $HOME/.local/bin
-```
-
-### Step 9: Configure Your Provider
-
-```bash
-hermes model       # Select your LLM provider and model
-```
-
-### Step 10: Verify the Installation
-
-```bash
-hermes version    # Check that the command is available
-hermes doctor     # Run diagnostics to verify everything is working
-hermes status     # Check your configuration
-hermes chat -q "Hello! What tools do you have available?"
-```
-
----
-
-## Quick-Reference: Manual Install (Condensed)
-
-For those who just want the commands:
-
-```bash
-# Install uv
-curl -LsSf https://astral.sh/uv/install.sh | sh
-
-# Clone & enter
-git clone --recurse-submodules https://github.com/NousResearch/hermes-agent.git
-cd hermes-agent
-
-# Create venv with Python 3.11
-uv venv venv --python 3.11
-export VIRTUAL_ENV="$(pwd)/venv"
-
-# Install everything
-uv pip install -e ".[all]"
-uv pip install -e "./tinker-atropos"
-npm install  # optional, for browser tools and WhatsApp
-
-# Configure
-mkdir -p ~/.hermes/{cron,sessions,logs,memories,skills,pairing,hooks,image_cache,audio_cache,whatsapp/session}
-cp cli-config.yaml.example ~/.hermes/config.yaml
-touch ~/.hermes/.env
-echo 'OPENROUTER_API_KEY=sk-or-v1-your-key' >> ~/.hermes/.env
-
-# Make hermes available globally
-mkdir -p ~/.local/bin
-ln -sf "$(pwd)/venv/bin/hermes" ~/.local/bin/hermes
-
-# Verify
-hermes doctor
-hermes
-```
+If you want to clone the repo and install from source — for contributing, running from a specific branch, or having full control over the virtual environment — see the [Development Setup](../developer-guide/contributing.md#development-setup) section in the Contributing guide.
 
 ---
 
diff --git a/website/docs/getting-started/quickstart.md b/website/docs/getting-started/quickstart.md
index 8a39c49f1e..b67f63ae36 100644
--- a/website/docs/getting-started/quickstart.md
+++ b/website/docs/getting-started/quickstart.md
@@ -1,12 +1,35 @@
 ---
 sidebar_position: 1
 title: "Quickstart"
-description: "Your first conversation with Hermes Agent — from install to chatting in 2 minutes"
+description: "Your first conversation with Hermes Agent — from install to chatting in under 5 minutes"
 ---
 
 # Quickstart
 
-This guide walks you through installing Hermes Agent, setting up a provider, and having your first conversation. By the end, you'll know the key features and how to explore further.
+This guide gets you from zero to a working Hermes setup that survives real use. Install, choose a provider, verify a working chat, and know exactly what to do when something breaks.
+
+## Who this is for
+
+- Brand new and want the shortest path to a working setup
+- Switching providers and don't want to lose time to config mistakes
+- Setting up Hermes for a team, bot, or always-on workflow
+- Tired of "it installed, but it still does nothing"
+
+## The fastest path
+
+Pick the row that matches your goal:
+
+| Goal | Do this first | Then do this |
+|---|---|---|
+| I just want Hermes working on my machine | `hermes setup` | Run a real chat and verify it responds |
+| I already know my provider | `hermes model` | Save the config, then start chatting |
+| I want a bot or always-on setup | `hermes gateway setup` after CLI works | Connect Telegram, Discord, Slack, or another platform |
+| I want a local or self-hosted model | `hermes model` → custom endpoint | Verify the endpoint, model name, and context length |
+| I want multi-provider fallback | `hermes model` first | Add routing and fallback only after the base chat works |
+
+**Rule of thumb:** if Hermes cannot complete a normal chat, do not add more features yet. Get one clean conversation working first, then layer on gateway, cron, skills, voice, or routing.
+
+---
 
 ## 1. Install Hermes Agent
 
@@ -31,86 +54,109 @@ After it finishes, reload your shell:
 source ~/.bashrc   # or source ~/.zshrc
 ```
 
-## 2. Set Up a Provider
+For detailed installation options, prerequisites, and troubleshooting, see the [Installation guide](./installation.md).
 
-The installer configures your LLM provider automatically. To change it later, use one of these commands:
+## 2. Choose a Provider
+
+The single most important setup step. Use `hermes model` to walk through the choice interactively:
 
 ```bash
-hermes model       # Choose your LLM provider and model
-hermes tools       # Configure which tools are enabled
-hermes setup       # Or configure everything at once
+hermes model
 ```
 
-`hermes model` walks you through selecting an inference provider:
+Good defaults:
 
-| Provider | What it is | How to set up |
-|----------|-----------|---------------|
-| **Nous Portal** | Subscription-based, zero-config | OAuth login via `hermes model` |
-| **OpenAI Codex** | ChatGPT OAuth, uses Codex models | Device code auth via `hermes model` |
-| **Anthropic** | Claude models directly (Pro/Max or API key) | `hermes model` with Claude Code auth, or an Anthropic API key |
-| **OpenRouter** | Multi-provider routing across many models | Enter your API key |
-| **Z.AI** | GLM / Zhipu-hosted models | Set `GLM_API_KEY` / `ZAI_API_KEY` |
-| **Kimi / Moonshot** | Moonshot-hosted coding and chat models | Set `KIMI_API_KEY` |
-| **Kimi / Moonshot China** | China-region Moonshot endpoint | Set `KIMI_CN_API_KEY` |
-| **Arcee AI** | Trinity models | Set `ARCEEAI_API_KEY` |
-| **Xiaomi MiMo** | Xiaomi MiMo models via [platform.xiaomimimo.com](https://platform.xiaomimimo.com) | Set `XIAOMI_API_KEY` |
-| **AWS Bedrock** | Anthropic Claude, Amazon Nova, DeepSeek v3.2, and Meta Llama via AWS | Standard boto3 auth (`AWS_PROFILE` or `AWS_ACCESS_KEY_ID` + `AWS_REGION`) |
-| **Qwen Portal (OAuth)** | Qwen 3.5 / Qwen-Coder models via Alibaba's consumer Qwen Portal | OAuth via `hermes model` (optional: `HERMES_QWEN_BASE_URL`) |
-| **MiniMax** | International MiniMax endpoint | Set `MINIMAX_API_KEY` |
-| **MiniMax China** | China-region MiniMax endpoint | Set `MINIMAX_CN_API_KEY` |
-| **Alibaba Cloud** | Qwen models via DashScope | Set `DASHSCOPE_API_KEY` |
-| **Hugging Face** | 20+ open models via unified router (Qwen, DeepSeek, Kimi, etc.) | Set `HF_TOKEN` |
-| **Kilo Code** | KiloCode-hosted models | Set `KILOCODE_API_KEY` |
-| **OpenCode Zen** | Pay-as-you-go access to curated models | Set `OPENCODE_ZEN_API_KEY` |
-| **OpenCode Go** | $10/month subscription for open models | Set `OPENCODE_GO_API_KEY` |
-| **DeepSeek** | Direct DeepSeek API access | Set `DEEPSEEK_API_KEY` |
-| **NVIDIA NIM** | Nemotron models via build.nvidia.com or local NIM | Set `NVIDIA_API_KEY` (optional: `NVIDIA_BASE_URL`) |
-| **Ollama Cloud** | Managed Ollama catalog without local GPU | Set `OLLAMA_API_KEY` (or pick **Ollama Cloud** in `hermes model`) |
-| **Google Gemini (OAuth)** | Gemini via Cloud Code Assist — free and paid tiers | OAuth via `hermes model` (optional: `HERMES_GEMINI_PROJECT_ID` for paid tiers) |
-| **xAI (Grok)** | Grok 4 models via Responses API + prompt caching | Set `XAI_API_KEY` (alias: `grok`) |
-| **GitHub Copilot** | GitHub Copilot subscription (GPT-5.x, Claude, Gemini, etc.) | OAuth via `hermes model`, or `COPILOT_GITHUB_TOKEN` / `GH_TOKEN` |
-| **GitHub Copilot ACP** | Copilot ACP agent backend (spawns local `copilot` CLI) | `hermes model` (requires `copilot` CLI + `copilot login`) |
-| **Vercel AI Gateway** | Vercel AI Gateway routing | Set `AI_GATEWAY_API_KEY` |
-| **Custom Endpoint** | VLLM, SGLang, Ollama, or any OpenAI-compatible API | Set base URL + API key |
+| Situation | Recommended path |
+|---|---|
+| Least friction | Nous Portal or OpenRouter |
+| You already have Claude or Codex auth | Anthropic or OpenAI Codex |
+| You want local/private inference | Ollama or any custom OpenAI-compatible endpoint |
+| You want multi-provider routing | OpenRouter |
+| You have a custom GPU server | vLLM, SGLang, LiteLLM, or any OpenAI-compatible endpoint |
+
+For most first-time users: choose a provider, accept the defaults unless you know why you're changing them. The full provider catalog with env vars and setup steps lives on the [Providers](../integrations/providers.md) page.
 
 :::caution Minimum context: 64K tokens
 Hermes Agent requires a model with at least **64,000 tokens** of context. Models with smaller windows cannot maintain enough working memory for multi-step tool-calling workflows and will be rejected at startup. Most hosted models (Claude, GPT, Gemini, Qwen, DeepSeek) meet this easily. If you're running a local model, set its context size to at least 64K (e.g. `--ctx-size 65536` for llama.cpp or `-c 65536` for Ollama).
 :::
 
 :::tip
-You can switch providers at any time with `hermes model` — no code changes, no lock-in. When configuring a custom endpoint, Hermes will prompt for the context window size and auto-detect it when possible. See [Context Length Detection](../integrations/providers.md#context-length-detection) for details.
+You can switch providers at any time with `hermes model` — no lock-in. For a full list of all supported providers and setup details, see [AI Providers](../integrations/providers.md).
 :::
 
-## 3. Start Chatting
+### How settings are stored
+
+Hermes separates secrets from normal config:
+
+- **Secrets and tokens** → `~/.hermes/.env`
+- **Non-secret settings** → `~/.hermes/config.yaml`
+
+The easiest way to set values correctly is through the CLI:
+
+```bash
+hermes config set model anthropic/claude-opus-4.6
+hermes config set terminal.backend docker
+hermes config set OPENROUTER_API_KEY sk-or-...
+```
+
+The right value goes to the right file automatically.
+
+## 3. Run Your First Chat
 
 ```bash
 hermes            # classic CLI
 hermes --tui      # modern TUI (recommended)
 ```
 
-That's it! You'll see a welcome banner with your model, available tools, and skills. Type a message and press Enter.
+You'll see a welcome banner with your model, available tools, and skills. Use a prompt that's specific and easy to verify:
 
 :::tip Pick your interface
 Hermes ships with two terminal interfaces: the classic `prompt_toolkit` CLI and a newer [TUI](../user-guide/tui.md) with modal overlays, mouse selection, and non-blocking input. Both share the same sessions, slash commands, and config — try each with `hermes` vs `hermes --tui`.
 :::
 
 ```
-❯ What can you help me with?
+Summarize this repo in 5 bullets and tell me what the main entrypoint is.
 ```
 
-The agent has access to tools for web search, file operations, terminal commands, and more — all out of the box.
+```
+Check my current directory and tell me what looks like the main project file.
+```
 
-## 4. Try Key Features
+```
+Help me set up a clean GitHub PR workflow for this codebase.
+```
 
-### Ask it to use the terminal
+**What success looks like:**
+
+- The banner shows your chosen model/provider
+- Hermes replies without error
+- It can use a tool if needed (terminal, file read, web search)
+- The conversation continues normally for more than one turn
+
+If that works, you're past the hardest part.
+
+## 4. Verify Sessions Work
+
+Before moving on, make sure resume works:
+
+```bash
+hermes --continue    # Resume the most recent session
+hermes -c            # Short form
+```
+
+That should bring you back to the session you just had. If it doesn't, check whether you're in the same profile and whether the session actually saved. This matters later when you're juggling multiple setups or machines.
+
+## 5. Try Key Features
+
+### Use the terminal
 
 ```
 ❯ What's my disk usage? Show the top 5 largest directories.
 ```
 
-The agent will run terminal commands on your behalf and show you the results.
+The agent runs terminal commands on your behalf and shows results.
 
-### Use slash commands
+### Slash commands
 
 Type `/` to see an autocomplete dropdown of all commands:
 
@@ -128,22 +174,27 @@ Press `Alt+Enter` or `Ctrl+J` to add a new line. Great for pasting code or writi
 
 ### Interrupt the agent
 
-If the agent is taking too long, just type a new message and press Enter — it interrupts the current task and switches to your new instructions. `Ctrl+C` also works.
+If the agent is taking too long, type a new message and press Enter — it interrupts the current task and switches to your new instructions. `Ctrl+C` also works.
 
-### Resume a session
+## 6. Add the Next Layer
 
-When you exit, hermes prints a resume command:
+Only after the base chat works. Pick what you need:
+
+### Bot or shared assistant
 
 ```bash
-hermes --continue    # Resume the most recent session
-hermes -c            # Short form
+hermes gateway setup    # Interactive platform configuration
 ```
 
-## 5. Explore Further
+Connect [Telegram](/docs/user-guide/messaging/telegram), [Discord](/docs/user-guide/messaging/discord), [Slack](/docs/user-guide/messaging/slack), [WhatsApp](/docs/user-guide/messaging/whatsapp), [Signal](/docs/user-guide/messaging/signal), [Email](/docs/user-guide/messaging/email), or [Home Assistant](/docs/user-guide/messaging/homeassistant).
 
-Here are some things to try next:
+### Automation and tools
 
-### Set up a sandboxed terminal
+- `hermes tools` — tune tool access per platform
+- `hermes skills` — browse and install reusable workflows
+- Cron — only after your bot or CLI setup is stable
+
+### Sandboxed terminal
 
 For safety, run the agent in a Docker container or on a remote server:
 
@@ -152,71 +203,25 @@ hermes config set terminal.backend docker    # Docker isolation
 hermes config set terminal.backend ssh       # Remote server
 ```
 
-### Connect messaging platforms
-
-Chat with Hermes from your phone or other surfaces via Telegram, Discord, Slack, WhatsApp, Signal, Email, or Home Assistant:
-
-```bash
-hermes gateway setup    # Interactive platform configuration
-```
-
-### Add voice mode
-
-Want microphone input in the CLI or spoken replies in messaging?
+### Voice mode
 
 ```bash
 pip install "hermes-agent[voice]"
 # Includes faster-whisper for free local speech-to-text
 ```
 
-Then start Hermes and enable it inside the CLI:
+Then in the CLI: `/voice on`. Press `Ctrl+B` to record. See [Voice Mode](../user-guide/features/voice-mode.md).
 
-```text
-/voice on
-```
-
-Press `Ctrl+B` to record, or use `/voice tts` to have Hermes speak its replies. See [Voice Mode](../user-guide/features/voice-mode.md) for the full setup across CLI, Telegram, Discord, and Discord voice channels.
-
-### Schedule automated tasks
-
-```
-❯ Every morning at 9am, check Hacker News for AI news and send me a summary on Telegram.
-```
-
-The agent will set up a cron job that runs automatically via the gateway.
-
-### Browse and install skills
+### Skills
 
 ```bash
 hermes skills search kubernetes
-hermes skills search react --source skills-sh
-hermes skills search https://mintlify.com/docs --source well-known
 hermes skills install openai/skills/k8s
-hermes skills install official/security/1password
-hermes skills install skills-sh/vercel-labs/json-render/json-render-react --force
 ```
 
-Tips:
-- Use `--source skills-sh` to search the public `skills.sh` directory.
-- Use `--source well-known` with a docs/site URL to discover skills from `/.well-known/skills/index.json`.
-- Use `--force` only after reviewing a third-party skill. It can override non-dangerous policy blocks, but not a `dangerous` scan verdict.
+Or use `/skills` inside a chat session.
 
-Or use the `/skills` slash command inside chat.
-
-### Use Hermes inside an editor via ACP
-
-Hermes can also run as an ACP server for ACP-compatible editors like VS Code, Zed, and JetBrains:
-
-```bash
-pip install -e '.[acp]'
-hermes acp
-```
-
-See [ACP Editor Integration](../user-guide/features/acp.md) for setup details.
-
-### Try MCP servers
-
-Connect to external tools via the Model Context Protocol:
+### MCP servers
 
 ```yaml
 # Add to ~/.hermes/config.yaml
@@ -228,6 +233,43 @@ mcp_servers:
       GITHUB_PERSONAL_ACCESS_TOKEN: "ghp_xxx"
 ```
 
+### Editor integration (ACP)
+
+```bash
+pip install -e '.[acp]'
+hermes acp
+```
+
+See [ACP Editor Integration](../user-guide/features/acp.md).
+
+---
+
+## Common Failure Modes
+
+These are the problems that waste the most time:
+
+| Symptom | Likely cause | Fix |
+|---|---|---|
+| Hermes opens but gives empty or broken replies | Provider auth or model selection is wrong | Run `hermes model` again and confirm provider, model, and auth |
+| Custom endpoint "works" but returns garbage | Wrong base URL, model name, or not actually OpenAI-compatible | Verify the endpoint in a separate client first |
+| Gateway starts but nobody can message it | Bot token, allowlist, or platform setup is incomplete | Re-run `hermes gateway setup` and check `hermes gateway status` |
+| `hermes --continue` can't find old session | Switched profiles or session never saved | Check `hermes sessions list` and confirm you're in the right profile |
+| Model unavailable or odd fallback behavior | Provider routing or fallback settings are too aggressive | Keep routing off until the base provider is stable |
+| `hermes doctor` flags config problems | Config values are missing or stale | Fix the config, retest a plain chat before adding features |
+
+## Recovery Toolkit
+
+When something feels off, use this order:
+
+1. `hermes doctor`
+2. `hermes model`
+3. `hermes setup`
+4. `hermes sessions list`
+5. `hermes --continue`
+6. `hermes gateway status`
+
+That sequence gets you from "broken vibes" back to a known state fast.
+
 ---
 
 ## Quick Reference
@@ -249,3 +291,6 @@ mcp_servers:
 - **[Configuration](../user-guide/configuration.md)** — Customize your setup
 - **[Messaging Gateway](../user-guide/messaging/index.md)** — Connect Telegram, Discord, Slack, WhatsApp, Signal, Email, or Home Assistant
 - **[Tools & Toolsets](../user-guide/features/tools.md)** — Explore available capabilities
+- **[AI Providers](../integrations/providers.md)** — Full provider list and setup details
+- **[Skills System](../user-guide/features/skills.md)** — Reusable workflows and knowledge
+- **[Tips & Best Practices](../guides/tips.md)** — Power user tips
diff --git a/website/docs/guides/delegation-patterns.md b/website/docs/guides/delegation-patterns.md
index 4de7ebbd9e..11e276b121 100644
--- a/website/docs/guides/delegation-patterns.md
+++ b/website/docs/guides/delegation-patterns.md
@@ -216,10 +216,10 @@ Restricting toolsets keeps the subagent focused and prevents accidental side eff
 
 ## Constraints
 
-- **Default 3 parallel tasks** — batches default to 3 concurrent subagents (configurable via `delegation.max_concurrent_children` in config.yaml)
-- **No nesting** — subagents cannot call `delegate_task`, `clarify`, `memory`, `send_message`, or `execute_code`
+- **Default 3 parallel tasks** — batches default to 3 concurrent subagents (configurable via `delegation.max_concurrent_children` in config.yaml — no hard ceiling, only a floor of 1)
+- **Nested delegation is opt-in** — leaf subagents (default) cannot call `delegate_task`, `clarify`, `memory`, `send_message`, or `execute_code`. Orchestrator subagents (`role="orchestrator"`) retain `delegate_task` for further delegation, but only when `delegation.max_spawn_depth` is raised above the default of 1 (1-3 supported); the other four remain blocked. Disable globally via `delegation.orchestrator_enabled: false`.
 - **Separate terminals** — each subagent gets its own terminal session with separate working directory and state
-- **No conversation history** — subagents see only what you put in `goal` and `context`
+- **No conversation history** — subagents see only the `goal` and `context` the parent agent passes when calling `delegate_task`
 - **Default 50 iterations** — set `max_iterations` lower for simple tasks to save cost
 
 ---
diff --git a/website/docs/guides/github-pr-review-agent.md b/website/docs/guides/github-pr-review-agent.md
new file mode 100644
index 0000000000..51b3c9799f
--- /dev/null
+++ b/website/docs/guides/github-pr-review-agent.md
@@ -0,0 +1,303 @@
+---
+sidebar_position: 10
+title: "Tutorial: GitHub PR Review Agent"
+description: "Build an automated AI code reviewer that monitors your repos, reviews pull requests, and delivers feedback — hands-free"
+---
+
+# Tutorial: Build a GitHub PR Review Agent
+
+**The problem:** Your team opens PRs faster than you can review them. PRs sit for days waiting for eyeballs. Junior devs merge bugs because nobody had time to check. You spend your mornings catching up on diffs instead of building.
+
+**The solution:** An AI agent that watches your repos around the clock, reviews every new PR for bugs, security issues, and code quality, and sends you a summary — so you only spend time on PRs that actually need human judgment.
+
+**What you'll build:**
+
+```
+┌───────────────────────────────────────────────────────────────────┐
+│                                                                   │
+│   Cron Timer  ──▶  Hermes Agent  ──▶  GitHub API  ──▶  Review     │
+│   (every 2h)       + gh CLI           (PR diffs)       delivery   │
+│                    + skill                             (Telegram, │
+│                    + memory                            Discord,   │
+│                                                        local)     │
+│                                                                   │
+└───────────────────────────────────────────────────────────────────┘
+```
+
+This guide uses **cron jobs** to poll for PRs on a schedule — no server or public endpoint needed. Works behind NAT and firewalls.
+
+:::tip Want real-time reviews instead?
+If you have a public endpoint available, check out [Automated GitHub PR Comments with Webhooks](./webhook-github-pr-review.md) — GitHub pushes events to Hermes instantly when PRs are opened or updated.
+:::
+
+---
+
+## Prerequisites
+
+- **Hermes Agent installed** — see the [Installation guide](/docs/getting-started/installation)
+- **Gateway running** for cron jobs:
+  ```bash
+  hermes gateway install   # Install as a service
+  # or
+  hermes gateway           # Run in foreground
+  ```
+- **GitHub CLI (`gh`) installed and authenticated**:
+  ```bash
+  # Install
+  brew install gh        # macOS
+  sudo apt install gh    # Ubuntu/Debian
+
+  # Authenticate
+  gh auth login
+  ```
+- **Messaging configured** (optional) — [Telegram](/docs/user-guide/messaging/telegram) or [Discord](/docs/user-guide/messaging/discord)
+
+:::tip No messaging? No problem
+Use `deliver: "local"` to save reviews to `~/.hermes/cron/output/`. Great for testing before wiring up notifications.
+:::
+
+---
+
+## Step 1: Verify the Setup
+
+Make sure Hermes can access GitHub. Start a chat:
+
+```bash
+hermes
+```
+
+Test with a simple command:
+
+```
+Run: gh pr list --repo NousResearch/hermes-agent --state open --limit 3
+```
+
+You should see a list of open PRs. If this works, you're ready.
+
+---
+
+## Step 2: Try a Manual Review
+
+Still in the chat, ask Hermes to review a real PR:
+
+```
+Review this pull request. Read the diff, check for bugs, security issues,
+and code quality. Be specific about line numbers and quote problematic code.
+
+Run: gh pr diff 3888 --repo NousResearch/hermes-agent
+```
+
+Hermes will:
+1. Execute `gh pr diff` to fetch the code changes
+2. Read through the entire diff
+3. Produce a structured review with specific findings
+
+If you're happy with the quality, time to automate it.
+
+---
+
+## Step 3: Create a Review Skill
+
+A skill gives Hermes consistent review guidelines that persist across sessions and cron runs. Without one, review quality varies.
+
+```bash
+mkdir -p ~/.hermes/skills/code-review
+```
+
+Create `~/.hermes/skills/code-review/SKILL.md`:
+
+```markdown
+---
+name: code-review
+description: Review pull requests for bugs, security issues, and code quality
+---
+
+# Code Review Guidelines
+
+When reviewing a pull request:
+
+## What to Check
+1. **Bugs** — Logic errors, off-by-one, null/undefined handling
+2. **Security** — Injection, auth bypass, secrets in code, SSRF
+3. **Performance** — N+1 queries, unbounded loops, memory leaks
+4. **Style** — Naming conventions, dead code, missing error handling
+5. **Tests** — Are changes tested? Do tests cover edge cases?
+
+## Output Format
+For each finding:
+- **File:Line** — exact location
+- **Severity** — Critical / Warning / Suggestion
+- **What's wrong** — one sentence
+- **Fix** — how to fix it
+
+## Rules
+- Be specific. Quote the problematic code.
+- Don't flag style nitpicks unless they affect readability.
+- If the PR looks good, say so. Don't invent problems.
+- End with: APPROVE / REQUEST_CHANGES / COMMENT
+```
+
+Verify it loaded — start `hermes` and you should see `code-review` in the skills list at startup.
+
+---
+
+## Step 4: Teach It Your Conventions
+
+This is what makes the reviewer actually useful. Start a session and teach Hermes your team's standards:
+
+```
+Remember: In our backend repo, we use Python with FastAPI.
+All endpoints must have type annotations and Pydantic models.
+We don't allow raw SQL — only SQLAlchemy ORM.
+Test files go in tests/ and must use pytest fixtures.
+```
+
+```
+Remember: In our frontend repo, we use TypeScript with React.
+No `any` types allowed. All components must have props interfaces.
+We use React Query for data fetching, never useEffect for API calls.
+```
+
+These memories persist forever — the reviewer will enforce your conventions without being told each time.
+
+---
+
+## Step 5: Create the Automated Cron Job
+
+Now wire it all together. Create a cron job that runs every 2 hours:
+
+```bash
+hermes cron create "0 */2 * * *" \
+  "Check for new open PRs and review them.
+
+Repos to monitor:
+- myorg/backend-api
+- myorg/frontend-app
+
+Steps:
+1. Run: gh pr list --repo REPO --state open --limit 5 --json number,title,author,createdAt
+2. For each PR created or updated in the last 4 hours:
+   - Run: gh pr diff NUMBER --repo REPO
+   - Review the diff using the code-review guidelines
+3. Format output as:
+
+## PR Reviews — today
+
+### [repo] #[number]: [title]
+**Author:** [name] | **Verdict:** APPROVE/REQUEST_CHANGES/COMMENT
+[findings]
+
+If no new PRs found, say: No new PRs to review." \
+  --name "pr-review" \
+  --deliver telegram \
+  --skill code-review
+```
+
+Verify it's scheduled:
+
+```bash
+hermes cron list
+```
+
+### Other useful schedules
+
+| Schedule | When |
+|----------|------|
+| `0 */2 * * *` | Every 2 hours |
+| `0 9,13,17 * * 1-5` | Three times a day, weekdays only |
+| `0 9 * * 1` | Weekly Monday morning roundup |
+| `30m` | Every 30 minutes (high-traffic repos) |
+
+---
+
+## Step 6: Run It On Demand
+
+Don't want to wait for the schedule? Trigger it manually:
+
+```bash
+hermes cron run pr-review
+```
+
+Or from within a chat session:
+
+```
+/cron run pr-review
+```
+
+---
+
+## Going Further
+
+### Post Reviews Directly to GitHub
+
+Instead of delivering to Telegram, have the agent comment on the PR itself:
+
+Add this to your cron prompt:
+
+```
+After reviewing, post your review:
+- For issues: gh pr review NUMBER --repo REPO --comment --body "YOUR_REVIEW"
+- For critical issues: gh pr review NUMBER --repo REPO --request-changes --body "YOUR_REVIEW"
+- For clean PRs: gh pr review NUMBER --repo REPO --approve --body "Looks good"
+```
+
+:::caution
+Make sure `gh` has a token with `repo` scope. Reviews are posted as whoever `gh` is authenticated as.
+:::
+
+### Weekly PR Dashboard
+
+Create a Monday morning overview of all your repos:
+
+```bash
+hermes cron create "0 9 * * 1" \
+  "Generate a weekly PR dashboard:
+- myorg/backend-api
+- myorg/frontend-app
+- myorg/infra
+
+For each repo show:
+1. Open PR count and oldest PR age
+2. PRs merged this week
+3. Stale PRs (older than 5 days)
+4. PRs with no reviewer assigned
+
+Format as a clean summary." \
+  --name "weekly-dashboard" \
+  --deliver telegram
+```
+
+### Multi-Repo Monitoring
+
+Scale up by adding more repos to the prompt. The agent processes them sequentially — no extra setup needed.
+
+---
+
+## Troubleshooting
+
+### "gh: command not found"
+The gateway runs in a minimal environment. Ensure `gh` is in the system PATH and restart the gateway.
+
+### Reviews are too generic
+1. Add the `code-review` skill (Step 3)
+2. Teach Hermes your conventions via memory (Step 4)
+3. The more context it has about your stack, the better the reviews
+
+### Cron job doesn't run
+```bash
+hermes gateway status    # Is the gateway running?
+hermes cron list         # Is the job enabled?
+```
+
+### Rate limits
+GitHub allows 5,000 API requests/hour for authenticated users. Each PR review uses ~3-5 requests (list + diff + optional comments). Even reviewing 100 PRs/day stays well within limits.
+
+---
+
+## What's Next?
+
+- **[Webhook-Based PR Reviews](./webhook-github-pr-review.md)** — get instant reviews when PRs are opened (requires a public endpoint)
+- **[Daily Briefing Bot](/docs/guides/daily-briefing-bot)** — combine PR reviews with your morning news digest
+- **[Build a Plugin](/docs/guides/build-a-hermes-plugin)** — wrap the review logic into a shareable plugin
+- **[Profiles](/docs/user-guide/profiles)** — run a dedicated reviewer profile with its own memory and config
+- **[Fallback Providers](/docs/user-guide/features/fallback-providers)** — ensure reviews run even when one provider is down
diff --git a/website/docs/guides/use-voice-mode-with-hermes.md b/website/docs/guides/use-voice-mode-with-hermes.md
index 42b3355595..d43c0a0182 100644
--- a/website/docs/guides/use-voice-mode-with-hermes.md
+++ b/website/docs/guides/use-voice-mode-with-hermes.md
@@ -164,6 +164,7 @@ voice:
   record_key: "ctrl+b"
   max_recording_seconds: 120
   auto_tts: false
+  beep_enabled: true
   silence_threshold: 200
   silence_duration: 3.0
 
diff --git a/website/docs/guides/webhook-github-pr-review.md b/website/docs/guides/webhook-github-pr-review.md
new file mode 100644
index 0000000000..b0dd15ecea
--- /dev/null
+++ b/website/docs/guides/webhook-github-pr-review.md
@@ -0,0 +1,329 @@
+---
+sidebar_position: 11
+sidebar_label: "GitHub PR Reviews via Webhook"
+title: "Automated GitHub PR Comments with Webhooks"
+description: "Connect Hermes to GitHub so it automatically fetches PR diffs, reviews code changes, and posts comments — triggered by webhooks with no manual prompting"
+---
+
+# Automated GitHub PR Comments with Webhooks
+
+This guide walks you through connecting Hermes Agent to GitHub so it automatically fetches a pull request's diff, analyzes the code changes, and posts a comment — triggered by a webhook event with no manual prompting.
+
+When a PR is opened or updated, GitHub sends a webhook POST to your Hermes instance. Hermes runs the agent with a prompt that instructs it to retrieve the diff via the `gh` CLI, and the response is posted back to the PR thread.
+
+:::tip Want a simpler setup without a public endpoint?
+If you don't have a public URL or just want to get started quickly, check out [Build a GitHub PR Review Agent](./github-pr-review-agent.md) — uses cron jobs to poll for PRs on a schedule, works behind NAT and firewalls.
+:::
+
+:::info Reference docs
+For the full webhook platform reference (all config options, delivery types, dynamic subscriptions, security model) see [Webhooks](/docs/user-guide/messaging/webhooks).
+:::
+
+:::warning Prompt injection risk
+Webhook payloads contain attacker-controlled data — PR titles, commit messages, and descriptions can contain malicious instructions. When your webhook endpoint is exposed to the internet, run the gateway in a sandboxed environment (Docker, SSH backend). See the [security section](#security-notes) below.
+:::
+
+---
+
+## Prerequisites
+
+- Hermes Agent installed and running (`hermes gateway`)
+- [`gh` CLI](https://cli.github.com/) installed and authenticated on the gateway host (`gh auth login`)
+- A publicly reachable URL for your Hermes instance (see [Local testing with ngrok](#local-testing-with-ngrok) if running locally)
+- Admin access to the GitHub repository (required to manage webhooks)
+
+---
+
+## Step 1 — Enable the webhook platform
+
+Add the following to your `~/.hermes/config.yaml`:
+
+```yaml
+platforms:
+  webhook:
+    enabled: true
+    extra:
+      port: 8644          # default; change if another service occupies this port
+      rate_limit: 30      # max requests per minute per route (not a global cap)
+
+      routes:
+        github-pr-review:
+          secret: "your-webhook-secret-here"   # must match the GitHub webhook secret exactly
+          events:
+            - pull_request
+
+          # The agent is instructed to fetch the actual diff before reviewing.
+          # {number} and {repository.full_name} are resolved from the GitHub payload.
+          prompt: |
+            A pull request event was received (action: {action}).
+
+            PR #{number}: {pull_request.title}
+            Author: {pull_request.user.login}
+            Branch: {pull_request.head.ref} → {pull_request.base.ref}
+            Description: {pull_request.body}
+            URL: {pull_request.html_url}
+
+            If the action is "closed" or "labeled", stop here and do not post a comment.
+
+            Otherwise:
+            1. Run: gh pr diff {number} --repo {repository.full_name}
+            2. Review the code changes for correctness, security issues, and clarity.
+            3. Write a concise, actionable review comment and post it.
+
+          deliver: github_comment
+          deliver_extra:
+            repo: "{repository.full_name}"
+            pr_number: "{number}"
+```
+
+**Key fields:**
+
+| Field | Description |
+|---|---|
+| `secret` (route-level) | HMAC secret for this route. Falls back to `extra.secret` global if omitted. |
+| `events` | List of `X-GitHub-Event` header values to accept. Empty list = accept all. |
+| `prompt` | Template; `{field}` and `{nested.field}` resolve from the GitHub payload. |
+| `deliver` | `github_comment` posts via `gh pr comment`. `log` just writes to the gateway log. |
+| `deliver_extra.repo` | Resolves to e.g. `org/repo` from the payload. |
+| `deliver_extra.pr_number` | Resolves to the PR number from the payload. |
+
+:::note The payload does not contain code
+The GitHub webhook payload includes PR metadata (title, description, branch names, URLs) but **not the diff**. The prompt above instructs the agent to run `gh pr diff` to fetch the actual changes. The `terminal` tool is included in the default `hermes-webhook` toolset, so no extra configuration is needed.
+:::
+
+---
+
+## Step 2 — Start the gateway
+
+```bash
+hermes gateway
+```
+
+You should see:
+
+```
+[webhook] Listening on 0.0.0.0:8644 — routes: github-pr-review
+```
+
+Verify it's running:
+
+```bash
+curl http://localhost:8644/health
+# {"status": "ok", "platform": "webhook"}
+```
+
+---
+
+## Step 3 — Register the webhook on GitHub
+
+1. Go to your repository → **Settings** → **Webhooks** → **Add webhook**
+2. Fill in:
+   - **Payload URL:** `https://your-public-url.example.com/webhooks/github-pr-review`
+   - **Content type:** `application/json`
+   - **Secret:** the same value you set for `secret` in the route config
+   - **Which events?** → Select individual events → check **Pull requests**
+3. Click **Add webhook**
+
+GitHub will immediately send a `ping` event to confirm the connection. It is safely ignored — `ping` is not in your `events` list — and returns `{"status": "ignored", "event": "ping"}`. It is only logged at DEBUG level, so it won't appear in the console at the default log level.
+
+---
+
+## Step 4 — Open a test PR
+
+Create a branch, push a change, and open a PR. Within 30–90 seconds (depending on PR size and model), Hermes should post a review comment.
+
+To follow the agent's progress in real time:
+
+```bash
+tail -f "${HERMES_HOME:-$HOME/.hermes}/logs/gateway.log"
+```
+
+---
+
+## Local testing with ngrok
+
+If Hermes is running on your laptop, use [ngrok](https://ngrok.com/) to expose it:
+
+```bash
+ngrok http 8644
+```
+
+Copy the `https://...ngrok-free.app` URL and use it as your GitHub Payload URL. On the free ngrok tier the URL changes each time ngrok restarts — update your GitHub webhook each session. Paid ngrok accounts get a static domain.
+
+You can smoke-test a static route directly with `curl` — no GitHub account or real PR needed.
+
+:::tip Use `deliver: log` when testing locally
+Change `deliver: github_comment` to `deliver: log` in your config while testing. Otherwise the agent will attempt to post a comment to the fake `org/repo#99` repo in the test payload, which will fail. Switch back to `deliver: github_comment` once you're satisfied with the prompt output.
+:::
+
+```bash
+SECRET="your-webhook-secret-here"
+BODY='{"action":"opened","number":99,"pull_request":{"title":"Test PR","body":"Adds a feature.","user":{"login":"testuser"},"head":{"ref":"feat/x"},"base":{"ref":"main"},"html_url":"https://github.com/org/repo/pull/99"},"repository":{"full_name":"org/repo"}}'
+SIG=$(printf '%s' "$BODY" | openssl dgst -sha256 -hmac "$SECRET" -hex | awk '{print "sha256="$2}')
+
+curl -s -X POST http://localhost:8644/webhooks/github-pr-review \
+  -H "Content-Type: application/json" \
+  -H "X-GitHub-Event: pull_request" \
+  -H "X-Hub-Signature-256: $SIG" \
+  -d "$BODY"
+# Expected: {"status":"accepted","route":"github-pr-review","event":"pull_request","delivery_id":"..."}
+```
+
+Then watch the agent run:
+```bash
+tail -f "${HERMES_HOME:-$HOME/.hermes}/logs/gateway.log"
+```
+
+:::note
+`hermes webhook test <name>` only works for **dynamic subscriptions** created with `hermes webhook subscribe`. It does not read routes from `config.yaml`.
+:::
+
+---
+
+## Filtering to specific actions
+
+GitHub sends `pull_request` events for many actions: `opened`, `synchronize`, `reopened`, `closed`, `labeled`, etc. The `events` list filters only by the `X-GitHub-Event` header value — it cannot filter by action sub-type at the routing level.
+
+The prompt in Step 1 already handles this by instructing the agent to stop early for `closed` and `labeled` events.
+
+:::warning The agent still runs and consumes tokens
+The "stop here" instruction prevents a meaningful review, but the agent still runs to completion for every `pull_request` event regardless of action. GitHub webhooks can only filter by event type (`pull_request`, `push`, `issues`, etc.) — not by action sub-type (`opened`, `closed`, `labeled`). There is no routing-level filter for sub-actions. For high-volume repos, accept this cost or filter upstream with a GitHub Actions workflow that calls your webhook URL conditionally.
+:::
+
+> There is no Jinja2 or conditional template syntax. `{field}` and `{nested.field}` are the only substitutions supported. Anything else is passed verbatim to the agent.
+
+---
+
+## Using a skill for consistent review style
+
+Load a [Hermes skill](/docs/user-guide/features/skills) to give the agent a consistent review persona. Add `skills` to your route inside `platforms.webhook.extra.routes` in `config.yaml`:
+
+```yaml
+platforms:
+  webhook:
+    enabled: true
+    extra:
+      routes:
+        github-pr-review:
+          secret: "your-webhook-secret-here"
+          events: [pull_request]
+          prompt: |
+            A pull request event was received (action: {action}).
+            PR #{number}: {pull_request.title} by {pull_request.user.login}
+            URL: {pull_request.html_url}
+
+            If the action is "closed" or "labeled", stop here and do not post a comment.
+
+            Otherwise:
+            1. Run: gh pr diff {number} --repo {repository.full_name}
+            2. Review the diff using your review guidelines.
+            3. Write a concise, actionable review comment and post it.
+          skills:
+            - review
+          deliver: github_comment
+          deliver_extra:
+            repo: "{repository.full_name}"
+            pr_number: "{number}"
+```
+
+> **Note:** Only the first skill in the list that is found is loaded. Hermes does not stack multiple skills — subsequent entries are ignored.
+
+---
+
+## Sending responses to Slack or Discord instead
+
+Replace the `deliver` and `deliver_extra` fields inside your route with your target platform:
+
+```yaml
+# Inside platforms.webhook.extra.routes.<route-name>:
+
+# Slack
+deliver: slack
+deliver_extra:
+  chat_id: "C0123456789"   # Slack channel ID (omit to use the configured home channel)
+
+# Discord
+deliver: discord
+deliver_extra:
+  chat_id: "987654321012345678"  # Discord channel ID (omit to use home channel)
+```
+
+The target platform must also be enabled and connected in the gateway. If `chat_id` is omitted, the response is sent to that platform's configured home channel.
+
+Valid `deliver` values: `log` · `github_comment` · `telegram` · `discord` · `slack` · `signal` · `sms`
+
+---
+
+## GitLab support
+
+The same adapter works with GitLab. GitLab uses `X-Gitlab-Token` for authentication (plain string match, not HMAC) — Hermes handles both automatically.
+
+For event filtering, GitLab sets `X-GitLab-Event` to values like `Merge Request Hook`, `Push Hook`, `Pipeline Hook`. Use the exact header value in `events`:
+
+```yaml
+events:
+  - Merge Request Hook
+```
+
+GitLab payload fields differ from GitHub's — e.g. `{object_attributes.title}` for the MR title and `{object_attributes.iid}` for the MR number. The easiest way to discover the full payload structure is GitLab's **Test** button in your webhook settings, combined with the **Recent Deliveries** log. Alternatively, omit `prompt` from your route config — Hermes will then pass the full payload as formatted JSON directly to the agent, and the agent's response (visible in the gateway log with `deliver: log`) will describe its structure.
+
+---
+
+## Security notes
+
+- **Never use `INSECURE_NO_AUTH`** in production — it disables signature validation entirely. It is only for local development.
+- **Rotate your webhook secret** periodically and update it in both GitHub (webhook settings) and your `config.yaml`.
+- **Rate limiting** is 30 req/min per route by default (configurable via `extra.rate_limit`). Exceeding it returns `429`.
+- **Duplicate deliveries** (webhook retries) are deduplicated via a 1-hour idempotency cache. The cache key is `X-GitHub-Delivery` if present, then `X-Request-ID`, then a millisecond timestamp. When neither delivery ID header is set, retries are **not** deduplicated.
+- **Prompt injection:** PR titles, descriptions, and commit messages are attacker-controlled. Malicious PRs could attempt to manipulate the agent's actions. Run the gateway in a sandboxed environment (Docker, VM) when exposed to the public internet.
+
+---
+
+## Troubleshooting
+
+| Symptom | Check |
+|---|---|
+| `401 Invalid signature` | Secret in config.yaml doesn't match GitHub webhook secret |
+| `404 Unknown route` | Route name in the URL doesn't match the key in `routes:` |
+| `429 Rate limit exceeded` | 30 req/min per route exceeded — common when re-delivering test events from GitHub's UI; wait a minute or raise `extra.rate_limit` |
+| No comment posted | `gh` not installed, not on PATH, or not authenticated (`gh auth login`) |
+| Agent runs but no comment | Check the gateway log — if the agent output was empty or just "SKIP", delivery is still attempted |
+| Port already in use | Change `extra.port` in config.yaml |
+| Agent runs but reviews only the PR description | The prompt isn't including the `gh pr diff` instruction — the diff is not in the webhook payload |
+| Can't see the ping event | Ignored events return `{"status":"ignored","event":"ping"}` at DEBUG log level only — check GitHub's delivery log (repo → Settings → Webhooks → your webhook → Recent Deliveries) |
+
+**GitHub's Recent Deliveries tab** (repo → Settings → Webhooks → your webhook) shows the exact request headers, payload, HTTP status, and response body for every delivery. It is the fastest way to diagnose failures without touching your server logs.
+
+---
+
+## Full config reference
+
+```yaml
+platforms:
+  webhook:
+    enabled: true
+    extra:
+      host: "0.0.0.0"         # bind address (default: 0.0.0.0)
+      port: 8644               # listen port (default: 8644)
+      secret: ""               # optional global fallback secret
+      rate_limit: 30           # requests per minute per route
+      max_body_bytes: 1048576  # payload size limit in bytes (default: 1 MB)
+
+      routes:
+        <route-name>:
+          secret: "required-per-route"
+          events: []            # [] = accept all; otherwise list X-GitHub-Event values
+          prompt: ""            # {field} / {nested.field} resolved from payload
+          skills: []            # first matching skill is loaded (only one)
+          deliver: "log"        # log | github_comment | telegram | discord | slack | signal | sms
+          deliver_extra: {}     # repo + pr_number for github_comment; chat_id for others
+```
+
+---
+
+## What's Next?
+
+- **[Cron-Based PR Reviews](./github-pr-review-agent.md)** — poll for PRs on a schedule, no public endpoint needed
+- **[Webhook Reference](/docs/user-guide/messaging/webhooks)** — full config reference for the webhook platform
+- **[Build a Plugin](/docs/guides/build-a-hermes-plugin)** — package review logic into a shareable plugin
+- **[Profiles](/docs/user-guide/profiles)** — run a dedicated reviewer profile with its own memory and config
diff --git a/website/docs/integrations/providers.md b/website/docs/integrations/providers.md
index 4f536ec749..013c6a3e3c 100644
--- a/website/docs/integrations/providers.md
+++ b/website/docs/integrations/providers.md
@@ -1052,11 +1052,11 @@ custom_providers:
     # api_key omitted — Hermes uses "no-key-required" for keyless local servers
   - name: work
     base_url: https://gpu-server.internal.corp/v1
-    api_key: corp-api-key
+    key_env: CORP_API_KEY
     api_mode: chat_completions   # optional, auto-detected from URL
   - name: anthropic-proxy
     base_url: https://proxy.example.com/anthropic
-    api_key: proxy-key
+    key_env: ANTHROPIC_PROXY_KEY
     api_mode: anthropic_messages  # for Anthropic-compatible proxies
 ```
 
@@ -1154,7 +1154,7 @@ fallback_model:
   provider: openrouter                    # required
   model: anthropic/claude-sonnet-4        # required
   # base_url: http://localhost:8000/v1    # optional, for custom endpoints
-  # api_key_env: MY_CUSTOM_KEY           # optional, env var name for custom endpoint API key
+  # key_env: MY_CUSTOM_KEY               # optional, env var name for custom endpoint API key
 ```
 
 When activated, the fallback swaps the model and provider mid-session without losing your conversation. It fires **at most once** per session.
@@ -1165,39 +1165,6 @@ Supported providers: `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-a
 Fallback is configured exclusively through `config.yaml` — there are no environment variables for it. For full details on when it triggers, supported providers, and how it interacts with auxiliary tasks and delegation, see [Fallback Providers](/docs/user-guide/features/fallback-providers).
 :::
 
-## Smart Model Routing
-
-Optional cheap-vs-strong routing lets Hermes keep your main model for complex work while sending very short/simple turns to a cheaper model.
-
-```yaml
-smart_model_routing:
-  enabled: true
-  max_simple_chars: 160
-  max_simple_words: 28
-  cheap_model:
-    provider: openrouter
-    model: google/gemini-2.5-flash
-    # base_url: http://localhost:8000/v1  # optional custom endpoint
-    # api_key_env: MY_CUSTOM_KEY          # optional env var name for that endpoint's API key
-```
-
-How it works:
-- If a turn is short, single-line, and does not look code/tool/debug heavy, Hermes may route it to `cheap_model`
-- If the turn looks complex, Hermes stays on your primary model/provider
-- If the cheap route cannot be resolved cleanly, Hermes falls back to the primary model automatically
-
-This is intentionally conservative. It is meant for quick, low-stakes turns like:
-- short factual questions
-- quick rewrites
-- lightweight summaries
-
-It will avoid routing prompts that look like:
-- coding/debugging work
-- tool-heavy requests
-- long or multi-line analysis asks
-
-Use this when you want lower latency or cost without fully changing your default model.
-
 ---
 
 ## See Also
diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md
index 640e7be999..886db482c4 100644
--- a/website/docs/reference/environment-variables.md
+++ b/website/docs/reference/environment-variables.md
@@ -360,12 +360,14 @@ For cloud sandbox backends, persistence is filesystem-oriented. `TERMINAL_LIFETI
 | `HERMES_HUMAN_DELAY_MAX_MS` | Custom delay range maximum (ms) |
 | `HERMES_QUIET` | Suppress non-essential output (`true`/`false`) |
 | `HERMES_API_TIMEOUT` | LLM API call timeout in seconds (default: `1800`) |
+| `HERMES_API_CALL_STALE_TIMEOUT` | Non-streaming stale-call timeout in seconds (default: `300`). Auto-disabled for local providers when left unset. Also configurable via `providers.<id>.stale_timeout_seconds` or `providers.<id>.models.<model>.stale_timeout_seconds` in `config.yaml`. |
 | `HERMES_STREAM_READ_TIMEOUT` | Streaming socket read timeout in seconds (default: `120`). Auto-increased to `HERMES_API_TIMEOUT` for local providers. Increase if local LLMs time out during long code generation. |
 | `HERMES_STREAM_STALE_TIMEOUT` | Stale stream detection timeout in seconds (default: `180`). Auto-disabled for local providers. Triggers connection kill if no chunks arrive within this window. |
 | `HERMES_EXEC_ASK` | Enable execution approval prompts in gateway mode (`true`/`false`) |
 | `HERMES_ENABLE_PROJECT_PLUGINS` | Enable auto-discovery of repo-local plugins from `./.hermes/plugins/` (`true`/`false`, default: `false`) |
 | `HERMES_BACKGROUND_NOTIFICATIONS` | Background process notification mode in gateway: `all` (default), `result`, `error`, `off` |
 | `HERMES_EPHEMERAL_SYSTEM_PROMPT` | Ephemeral system prompt injected at API-call time (never persisted to sessions) |
+| `DELEGATION_MAX_CONCURRENT_CHILDREN` | Max parallel subagents per `delegate_task` batch (default: `3`, floor of 1, no ceiling). Also configurable via `delegation.max_concurrent_children` in `config.yaml` — the config value takes priority. |
 
 ## Interface
 
diff --git a/website/docs/reference/faq.md b/website/docs/reference/faq.md
index 132a4d00a9..8a8b9df414 100644
--- a/website/docs/reference/faq.md
+++ b/website/docs/reference/faq.md
@@ -160,6 +160,33 @@ brew install python@3.12      # macOS
 
 The installer handles this automatically — if you see this error during manual installation, upgrade Python first.
 
+#### Terminal commands say `node: command not found` (or `nvm`, `pyenv`, `asdf`, …)
+
+**Cause:** Hermes builds a per-session environment snapshot by running `bash -l` once at startup. A bash login shell reads `/etc/profile`, `~/.bash_profile`, and `~/.profile`, but **does not source `~/.bashrc`** — so tools that install themselves there (`nvm`, `asdf`, `pyenv`, `cargo`, custom `PATH` exports) stay invisible to the snapshot. This most commonly happens when Hermes runs under systemd or in a minimal shell where nothing has pre-loaded the interactive shell profile.
+
+**Solution:** Hermes auto-sources `~/.bashrc` by default. If that's not enough — e.g. you're a zsh user whose PATH lives in `~/.zshrc`, or you init `nvm` from a standalone file — list the extra files to source in `~/.hermes/config.yaml`:
+
+```yaml
+terminal:
+  shell_init_files:
+    - ~/.zshrc                     # zsh users: pulls zsh-managed PATH into the bash snapshot
+    - ~/.nvm/nvm.sh                # direct nvm init (works regardless of shell)
+    - /etc/profile.d/cargo.sh      # system-wide rc files
+  # When this list is set, the default ~/.bashrc auto-source is NOT added —
+  # include it explicitly if you want both:
+  #   - ~/.bashrc
+  #   - ~/.zshrc
+```
+
+Missing files are skipped silently. Sourcing happens in bash, so files that rely on zsh-only syntax may error — if that's a concern, source just the PATH-setting portion (e.g. nvm's `nvm.sh` directly) rather than the whole rc file.
+
+To disable the auto-source behaviour (strict login-shell semantics only):
+
+```yaml
+terminal:
+  auto_source_bashrc: false
+```
+
 #### `uv: command not found`
 
 **Cause:** The `uv` package manager isn't installed or not in PATH.
diff --git a/website/docs/reference/optional-skills-catalog.md b/website/docs/reference/optional-skills-catalog.md
index 044060e9dd..ab48e036dd 100644
--- a/website/docs/reference/optional-skills-catalog.md
+++ b/website/docs/reference/optional-skills-catalog.md
@@ -58,6 +58,12 @@ hermes skills uninstall <skill-name>
 | **meme-generation** | Generate real meme images by picking a template and overlaying text with Pillow. Produces actual `.png` meme files. |
 | **touchdesigner-mcp** | Control a running TouchDesigner instance via the twozero MCP plugin — create operators, set parameters, wire connections, execute Python, build real-time audio-reactive visuals and GLSL networks. 36 native tools. |
 
+## Dogfood
+
+| Skill | Description |
+|-------|-------------|
+| **adversarial-ux-test** | Roleplay the most difficult, tech-resistant user for a product — browse in-persona, rant, then filter through a RED/YELLOW/WHITE/GREEN pragmatism layer so only real UX friction becomes tickets. |
+
 ## DevOps
 
 | Skill | Description |
@@ -83,6 +89,7 @@ hermes skills uninstall <skill-name>
 | Skill | Description |
 |-------|-------------|
 | **fastmcp** | Build, test, inspect, install, and deploy MCP servers with FastMCP in Python. Covers wrapping APIs or databases as MCP tools, exposing resources or prompts, and deployment. |
+| **mcporter** | The `mcporter` CLI — list, configure, auth, and call MCP servers/tools directly (HTTP or stdio) from the terminal. Useful for ad-hoc MCP interactions; for always-on tool discovery use the built-in `native-mcp` client instead. |
 
 ## Migration
 
@@ -98,6 +105,7 @@ The largest optional category — covers the full ML pipeline from data curation
 |-------|-------------|
 | **accelerate** | Simplest distributed training API. 4 lines to add distributed support to any PyTorch script. Unified API for DeepSpeed/FSDP/Megatron/DDP. |
 | **chroma** | Open-source embedding database. Store embeddings and metadata, perform vector and full-text search. Simple 4-function API for RAG and semantic search. |
+| **clip** | OpenAI's vision-language model connecting images and text. Zero-shot image classification, image-text matching, and cross-modal retrieval. Trained on 400M image-text pairs. Use for image search, content moderation, or vision-language tasks without fine-tuning. |
 | **faiss** | Facebook's library for efficient similarity search and clustering of dense vectors. Supports billions of vectors, GPU acceleration, and various index types (Flat, IVF, HNSW). |
 | **flash-attention** | Optimize transformer attention with Flash Attention for 2-4x speedup and 10-20x memory reduction. Supports PyTorch SDPA, flash-attn library, H100 FP8, and sliding window. |
 | **guidance** | Control LLM output with regex and grammars, guarantee valid JSON/XML/code generation, enforce structured formats, and build multi-step workflows with Guidance — Microsoft Research's constrained generation framework. |
@@ -106,15 +114,20 @@ The largest optional category — covers the full ML pipeline from data curation
 | **instructor** | Extract structured data from LLM responses with Pydantic validation, retry failed extractions automatically, and stream partial results. |
 | **lambda-labs** | Reserved and on-demand GPU cloud instances for ML training and inference. SSH access, persistent filesystems, and multi-node clusters. |
 | **llava** | Large Language and Vision Assistant — visual instruction tuning and image-based conversations combining CLIP vision with LLaMA language models. |
+| **modal** | Serverless GPU cloud platform for running ML workloads. On-demand GPU access without infrastructure management, ML model deployment as APIs, or batch jobs with automatic scaling. |
 | **nemo-curator** | GPU-accelerated data curation for LLM training. Fuzzy deduplication (16x faster), quality filtering (30+ heuristics), semantic dedup, PII redaction. Scales with RAPIDS. |
+| **peft-fine-tuning** | Parameter-efficient fine-tuning for LLMs using LoRA, QLoRA, and 25+ methods. Train `<1%` of parameters with minimal accuracy loss for 7B–70B models on limited GPU memory. HuggingFace's official PEFT library. |
 | **pinecone** | Managed vector database for production AI. Auto-scaling, hybrid search (dense + sparse), metadata filtering, and low latency (under 100ms p95). |
+| **pytorch-fsdp** | Expert guidance for Fully Sharded Data Parallel training with PyTorch FSDP — parameter sharding, mixed precision, CPU offloading, FSDP2. |
 | **pytorch-lightning** | High-level PyTorch framework with Trainer class, automatic distributed training (DDP/FSDP/DeepSpeed), callbacks, and minimal boilerplate. |
 | **qdrant** | High-performance vector similarity search engine. Rust-powered with fast nearest neighbor search, hybrid search with filtering, and scalable vector storage. |
 | **saelens** | Train and analyze Sparse Autoencoders (SAEs) using SAELens to decompose neural network activations into interpretable features. |
 | **simpo** | Simple Preference Optimization — reference-free alternative to DPO with better performance (+6.4 pts on AlpacaEval 2.0). No reference model needed. |
 | **slime** | LLM post-training with RL using Megatron+SGLang framework. Custom data generation workflows and tight Megatron-LM integration for RL scaling. |
+| **stable-diffusion-image-generation** | State-of-the-art text-to-image generation with Stable Diffusion via HuggingFace Diffusers. Text-to-image, image-to-image translation, inpainting, and custom diffusion pipelines. |
 | **tensorrt-llm** | Optimize LLM inference with NVIDIA TensorRT for maximum throughput. 10-100x faster than PyTorch on A100/H100 with quantization (FP8/INT4) and in-flight batching. |
 | **torchtitan** | PyTorch-native distributed LLM pretraining with 4D parallelism (FSDP2, TP, PP, CP). Scale from 8 to 512+ GPUs with Float8 and torch.compile. |
+| **whisper** | OpenAI's general-purpose speech recognition. 99 languages, transcription, translation to English, and language ID. Six model sizes from tiny (39M) to large (1550M). Best for robust multilingual ASR. |
 
 ## Productivity
 
diff --git a/website/docs/reference/profile-commands.md b/website/docs/reference/profile-commands.md
index 8c8feafb51..e4f28e8346 100644
--- a/website/docs/reference/profile-commands.md
+++ b/website/docs/reference/profile-commands.md
@@ -81,6 +81,8 @@ Creates a new profile.
 | `--clone-from <profile>` | Clone from a specific profile instead of the current one. Used with `--clone` or `--clone-all`. |
 | `--no-alias` | Skip wrapper script creation. |
 
+Creating a profile does **not** make that profile directory the default project/workspace directory for terminal commands. If you want a profile to start in a specific project, set `terminal.cwd` in that profile's `config.yaml`.
+
 **Examples:**
 
 ```bash
@@ -129,6 +131,8 @@ hermes profile show <name>
 
 Displays details about a profile including its home directory, configured model, gateway status, skills count, and configuration file status.
 
+This shows the profile's Hermes home directory, not the terminal working directory. Terminal commands start from `terminal.cwd` (or the launch directory on the local backend when `cwd: "."`).
+
 | Argument | Description |
 |----------|-------------|
 | `<name>` | Profile to inspect. |
diff --git a/website/docs/reference/skills-catalog.md b/website/docs/reference/skills-catalog.md
index 16be6a6581..301d7ee545 100644
--- a/website/docs/reference/skills-catalog.md
+++ b/website/docs/reference/skills-catalog.md
@@ -69,6 +69,7 @@ Internal dogfooding and QA skills used to test Hermes Agent itself.
 | Skill | Description | Path |
 |-------|-------------|------|
 | `dogfood` | Systematic exploratory QA testing of web applications — find bugs, capture evidence, and generate structured reports | `dogfood` |
+| `adversarial-ux-test` | Roleplay the most difficult, tech-resistant user for a product — browse in-persona, rant, then filter through a RED/YELLOW/WHITE/GREEN pragmatism layer so only real UX friction becomes tickets. | `dogfood/adversarial-ux-test` |
 
 ## email
 
@@ -100,21 +101,12 @@ GitHub workflow skills for managing repositories, pull requests, code reviews, i
 | `github-pr-workflow` | Full pull request lifecycle — create branches, commit changes, open PRs, monitor CI status, auto-fix failures, and merge. Works with gh CLI or falls back to git + GitHub REST API via curl. | `github/github-pr-workflow` |
 | `github-repo-management` | Clone, create, fork, configure, and manage GitHub repositories. Manage remotes, secrets, releases, and workflows. Works with gh CLI or falls back to git + GitHub REST API via curl. | `github/github-repo-management` |
 
-## leisure
-
-Skills for discovery and everyday tasks.
-
-| Skill | Description | Path |
-|-------|-------------|------|
-| `find-nearby` | Find nearby places (restaurants, cafes, bars, pharmacies, etc.) using OpenStreetMap. Works with coordinates, addresses, cities, zip codes, or Telegram location pins. No API keys needed. | `leisure/find-nearby` |
-
 ## mcp
 
 Skills for working with MCP (Model Context Protocol) servers, tools, and integrations.
 
 | Skill | Description | Path |
 |-------|-------------|------|
-| `mcporter` | Use the mcporter CLI to list, configure, auth, and call MCP servers/tools directly (HTTP or stdio), including ad-hoc servers, config edits, and CLI/type generation. | `mcp/mcporter` |
 | `native-mcp` | Built-in MCP (Model Context Protocol) client that connects to external MCP servers, discovers their tools, and registers them as native Hermes Agent tools. Supports stdio and HTTP transports with automatic reconnection, security filtering, and zero-config tool injection. | `mcp/native-mcp` |
 
 ## media
@@ -136,14 +128,6 @@ General-purpose ML operations tools — model hub management, dataset operations
 |-------|-------------|------|
 | `huggingface-hub` | Hugging Face Hub CLI (hf) — search, download, and upload models and datasets, manage repos, query datasets with SQL, deploy inference endpoints, manage Spaces and buckets. | `mlops/huggingface-hub` |
 
-## mlops/cloud
-
-GPU cloud providers and serverless compute platforms for ML workloads.
-
-| Skill | Description | Path |
-|-------|-------------|------|
-| `modal-serverless-gpu` | Serverless GPU cloud platform for running ML workloads. Use when you need on-demand GPU access without infrastructure management, deploying ML models as APIs, or running batch jobs with automatic scaling. | `mlops/cloud/modal` |
-
 ## mlops/evaluation
 
 Model evaluation benchmarks, experiment tracking, and interpretability tools.
@@ -166,15 +150,12 @@ Model serving, quantization (GGUF/GPTQ), structured output, inference optimizati
 
 ## mlops/models
 
-Specific model architectures — computer vision (CLIP, SAM, Stable Diffusion), speech (Whisper), and audio generation (AudioCraft).
+Specific model architectures — image segmentation (SAM) and audio generation (AudioCraft / MusicGen). Additional model skills (CLIP, Stable Diffusion, Whisper, LLaVA) are available as optional skills.
 
 | Skill | Description | Path |
 |-------|-------------|------|
 | `audiocraft-audio-generation` | PyTorch library for audio generation including text-to-music (MusicGen) and text-to-sound (AudioGen). Use when you need to generate music from text descriptions, create sound effects, or perform melody-conditioned music generation. | `mlops/models/audiocraft` |
-| `clip` | OpenAI's model connecting vision and language. Enables zero-shot image classification, image-text matching, and cross-modal retrieval. Trained on 400M image-text pairs. Use for image search, content moderation, or vision-language tasks without fine-tuning. Best for general-pur… | `mlops/models/clip` |
 | `segment-anything-model` | Foundation model for image segmentation with zero-shot transfer. Use when you need to segment any object in images using points, boxes, or masks as prompts, or automatically generate all object masks in an image. | `mlops/models/segment-anything` |
-| `stable-diffusion-image-generation` | State-of-the-art text-to-image generation with Stable Diffusion models via HuggingFace Diffusers. Use when generating images from text prompts, performing image-to-image translation, inpainting, or building custom diffusion pipelines. | `mlops/models/stable-diffusion` |
-| `whisper` | OpenAI's general-purpose speech recognition model. Supports 99 languages, transcription, translation to English, and language identification. Six model sizes from tiny (39M params) to large (1550M params). Use for speech-to-text, podcast transcription, or multilingual audio pr… | `mlops/models/whisper` |
 
 ## mlops/research
 
@@ -192,8 +173,6 @@ Fine-tuning, RLHF/DPO/GRPO training, distributed training frameworks, and optimi
 |-------|-------------|------|
 | `axolotl` | Expert guidance for fine-tuning LLMs with Axolotl - YAML configs, 100+ models, LoRA/QLoRA, DPO/KTO/ORPO/GRPO, multimodal support | `mlops/training/axolotl` |
 | `fine-tuning-with-trl` | Fine-tune LLMs using reinforcement learning with TRL - SFT for instruction tuning, DPO for preference alignment, PPO/GRPO for reward optimization, and reward model training. Use when need RLHF, align model with preferences, or train from human feedback. Works with HuggingFace … | `mlops/training/trl-fine-tuning` |
-| `peft-fine-tuning` | Parameter-efficient fine-tuning for LLMs using LoRA, QLoRA, and 25+ methods. Use when fine-tuning large models (7B-70B) with limited GPU memory, when you need to train &lt;1% of parameters with minimal accuracy loss, or for multi-adapter serving. HuggingFace's official library… | `mlops/training/peft` |
-| `pytorch-fsdp` | Expert guidance for Fully Sharded Data Parallel training with PyTorch FSDP - parameter sharding, mixed precision, CPU offloading, FSDP2 | `mlops/training/pytorch-fsdp` |
 | `unsloth` | Expert guidance for fast fine-tuning with Unsloth - 2-5x faster training, 50-80% less memory, LoRA/QLoRA optimization | `mlops/training/unsloth` |
 
 ## note-taking
@@ -212,6 +191,7 @@ Skills for document creation, presentations, spreadsheets, and other productivit
 |-------|-------------|------|
 | `google-workspace` | Gmail, Calendar, Drive, Contacts, Sheets, and Docs integration for Hermes. Uses Hermes-managed OAuth2 setup, prefers the Google Workspace CLI (`gws`) when available for broader API coverage, and falls back to the Python client libraries otherwise. | `productivity/google-workspace` |
 | `linear` | Manage Linear issues, projects, and teams via the GraphQL API. Create, update, search, and organize issues. Uses API key auth (no OAuth needed). All operations via curl — no dependencies. | `productivity/linear` |
+| `maps` | Location intelligence — geocode, reverse-geocode, nearby POI search (44 categories, coordinates or address via `--near`), driving/walking/cycling distance + time, turn-by-turn directions, timezone, bounding box + area, POI search in a rectangle. Uses OpenStreetMap + Overpass + OSRM. No API key needed. Telegram location-pin friendly. | `productivity/maps` |
 | `nano-pdf` | Edit PDFs with natural-language instructions using the nano-pdf CLI. Modify text, fix typos, update titles, and make content changes to specific pages without manual editing. | `productivity/nano-pdf` |
 | `notion` | Notion API for creating and managing pages, databases, and blocks via curl. Search, create, update, and query Notion workspaces directly from the terminal. | `productivity/notion` |
 | `ocr-and-documents` | Extract text from PDFs and scanned documents. Use web_extract for remote URLs, pymupdf for local text-based PDFs, marker-pdf for OCR/scanned docs. For DOCX use python-docx, for PPTX see the powerpoint skill. | `productivity/ocr-and-documents` |
diff --git a/website/docs/reference/slash-commands.md b/website/docs/reference/slash-commands.md
index 79453474fc..bde142820a 100644
--- a/website/docs/reference/slash-commands.md
+++ b/website/docs/reference/slash-commands.md
@@ -78,9 +78,10 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in
 | `/usage` | Show token usage, cost breakdown, and session duration |
 | `/insights` | Show usage insights and analytics (last 30 days) |
 | `/platforms` (alias: `/gateway`) | Show gateway/messaging platform status |
-| `/paste` | Check clipboard for an image and attach it |
+| `/paste` | Attach a clipboard image |
 | `/copy [number]` | Copy the last assistant response to clipboard (or the Nth-from-last with a number). CLI-only. |
 | `/image <path>` | Attach a local image file for your next prompt. |
+| `/terminal-setup [auto\|vscode\|cursor\|windsurf]` | TUI-only: configure local VS Code-family terminal bindings for better multiline + undo/redo parity. |
 | `/debug` | Upload debug report (system info + logs) and get shareable links. Also available in messaging. |
 | `/profile` | Show active profile name and home directory |
 | `/gquota` | Show Google Gemini Code Assist quota usage with progress bars (only available when the `google-gemini-cli` provider is active). |
@@ -157,7 +158,7 @@ The messaging gateway supports the following built-in commands inside Telegram,
 
 ## Notes
 
-- `/skin`, `/tools`, `/toolsets`, `/browser`, `/config`, `/cron`, `/skills`, `/platforms`, `/paste`, `/image`, `/statusbar`, and `/plugins` are **CLI-only** commands.
+- `/skin`, `/tools`, `/toolsets`, `/browser`, `/config`, `/cron`, `/skills`, `/platforms`, `/paste`, `/image`, `/terminal-setup`, `/statusbar`, and `/plugins` are **CLI-only** commands.
 - `/verbose` is **CLI-only by default**, but can be enabled for messaging platforms by setting `display.tool_progress_command: true` in `config.yaml`. When enabled, it cycles the `display.tool_progress` mode and saves to config.
 - `/sethome`, `/update`, `/restart`, `/approve`, `/deny`, and `/commands` are **messaging-only** commands.
 - `/status`, `/background`, `/voice`, `/reload-mcp`, `/rollback`, `/snapshot`, `/debug`, `/fast`, and `/yolo` work in **both** the CLI and the messaging gateway.
diff --git a/website/docs/reference/tools-reference.md b/website/docs/reference/tools-reference.md
index 40d44627ec..c255c8f6a4 100644
--- a/website/docs/reference/tools-reference.md
+++ b/website/docs/reference/tools-reference.md
@@ -6,9 +6,9 @@ description: "Authoritative reference for Hermes built-in tools, grouped by tool
 
 # Built-in Tools Reference
 
-This page documents all 52 built-in tools in the Hermes tool registry, grouped by toolset. Availability varies by platform, credentials, and enabled toolsets.
+This page documents all 53 built-in tools in the Hermes tool registry, grouped by toolset. Availability varies by platform, credentials, and enabled toolsets.
 
-**Quick counts:** 10 browser tools, 4 file tools, 10 RL tools, 4 Home Assistant tools, 2 terminal tools, 2 web tools, 5 Feishu tools, and 15 standalone tools across other toolsets.
+**Quick counts:** 11 browser tools, 4 file tools, 10 RL tools, 4 Home Assistant tools, 2 terminal tools, 2 web tools, 5 Feishu tools, and 15 standalone tools across other toolsets.
 
 :::tip MCP Tools
 In addition to built-in tools, Hermes can load tools dynamically from MCP servers. MCP tools appear with a server-name prefix (e.g., `github_create_issue` for the `github` MCP server). See [MCP Integration](/docs/user-guide/features/mcp) for configuration.
@@ -19,6 +19,7 @@ In addition to built-in tools, Hermes can load tools dynamically from MCP server
 | Tool | Description | Requires environment |
 |------|-------------|----------------------|
 | `browser_back` | Navigate back to the previous page in browser history. Requires browser_navigate to be called first. | — |
+| `browser_cdp` | Send a raw Chrome DevTools Protocol (CDP) command. Escape hatch for browser operations not covered by browser_navigate, browser_click, browser_console, etc. Only available when a CDP endpoint is reachable at session start — via `/browser connect` or `browser.cdp_url` config. See https://chromedevtools.github.io/devtools-protocol/ | — |
 | `browser_click` | Click on an element identified by its ref ID from the snapshot (e.g., '@e5'). The ref IDs are shown in square brackets in the snapshot output. Requires browser_navigate and browser_snapshot to be called first. | — |
 | `browser_console` | Get browser console output and JavaScript errors from the current page. Returns console.log/warn/error/info messages and uncaught JS exceptions. Use this to detect silent JavaScript errors, failed API calls, and application warnings. Requi… | — |
 | `browser_get_images` | Get a list of all images on the current page with their URLs and alt text. Useful for finding images to analyze with the vision tool. Requires browser_navigate to be called first. | — |
diff --git a/website/docs/reference/toolsets-reference.md b/website/docs/reference/toolsets-reference.md
index 7593a3fdcf..bb911004e1 100644
--- a/website/docs/reference/toolsets-reference.md
+++ b/website/docs/reference/toolsets-reference.md
@@ -52,7 +52,7 @@ Or in-session:
 
 | Toolset | Tools | Purpose |
 |---------|-------|---------|
-| `browser` | `browser_back`, `browser_click`, `browser_console`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `web_search` | Full browser automation. Includes `web_search` as a fallback for quick lookups. |
+| `browser` | `browser_back`, `browser_cdp`, `browser_click`, `browser_console`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `web_search` | Full browser automation. Includes `web_search` as a fallback for quick lookups. `browser_cdp` is a raw CDP passthrough gated on a reachable CDP endpoint — it only appears when `/browser connect` is active or `browser.cdp_url` is set. |
 | `clarify` | `clarify` | Ask the user a question when the agent needs clarification. |
 | `code_execution` | `execute_code` | Run Python scripts that call Hermes tools programmatically. |
 | `cronjob` | `cronjob` | Schedule and manage recurring tasks. |
diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md
index dbc6b0e47e..1c491a48ce 100644
--- a/website/docs/user-guide/configuration.md
+++ b/website/docs/user-guide/configuration.md
@@ -73,6 +73,14 @@ Multiple references in a single value work: `url: "${HOST}:${PORT}"`. If a refer
 
 For AI provider setup (OpenRouter, Anthropic, Copilot, custom endpoints, self-hosted LLMs, fallback models, etc.), see [AI Providers](/docs/integrations/providers).
 
+### Provider Timeouts
+
+You can set `providers.<id>.request_timeout_seconds` for a provider-wide request timeout, plus `providers.<id>.models.<model>.timeout_seconds` for a model-specific override. Applies to the primary turn client on every transport (OpenAI-wire, native Anthropic, Anthropic-compatible), the fallback chain, rebuilds after credential rotation, and (for OpenAI-wire) the per-request timeout kwarg — so the configured value wins over the legacy `HERMES_API_TIMEOUT` env var.
+
+You can also set `providers.<id>.stale_timeout_seconds` for the non-streaming stale-call detector, plus `providers.<id>.models.<model>.stale_timeout_seconds` for a model-specific override. This wins over the legacy `HERMES_API_CALL_STALE_TIMEOUT` env var.
+
+Leaving these unset keeps the legacy defaults (`HERMES_API_TIMEOUT=1800`s, `HERMES_API_CALL_STALE_TIMEOUT=300`s, native Anthropic 900s). Not currently wired for AWS Bedrock (both `bedrock_converse` and AnthropicBedrock SDK paths use boto3 with its own timeout configuration). See the commented example in [`cli-config.yaml.example`](https://github.com/NousResearch/hermes-agent/blob/main/cli-config.yaml.example).
+
 ## Terminal Backend Configuration
 
 Hermes supports six terminal backends. Each determines where the agent's shell commands actually execute — your local machine, a Docker container, a remote server via SSH, a Modal cloud sandbox, a Daytona workspace, or a Singularity/Apptainer container.
@@ -257,7 +265,7 @@ terminal:
   docker_volumes:
     - "/home/user/projects:/workspace/projects"   # Read-write (default)
     - "/home/user/datasets:/data:ro"              # Read-only
-    - "/home/user/outputs:/outputs"               # Agent writes, you read
+    - "/home/user/.hermes/cache/documents:/output" # Gateway-visible exports
 ```
 
 This is useful for:
@@ -265,6 +273,22 @@ This is useful for:
 - **Receiving files** from the agent (generated code, reports, exports)
 - **Shared workspaces** where both you and the agent access the same files
 
+If you use a messaging gateway and want the agent to send generated files via
+`MEDIA:/...`, prefer a dedicated host-visible export mount such as
+`/home/user/.hermes/cache/documents:/output`.
+
+- Write files inside Docker to `/output/...`
+- Emit the **host path** in `MEDIA:`, for example:
+  `MEDIA:/home/user/.hermes/cache/documents/report.txt`
+- Do **not** emit `/workspace/...` or `/output/...` unless that exact path also
+  exists for the gateway process on the host
+
+:::warning
+YAML duplicate keys silently override earlier ones. If you already have a
+`docker_volumes:` block, merge new mounts into the same list instead of adding
+another `docker_volumes:` key later in the file.
+:::
+
 Can also be set via environment variable: `TERMINAL_DOCKER_VOLUMES='["/host:/container"]'` (JSON array).
 
 ### Docker Credential Forwarding
@@ -534,20 +558,23 @@ Budget pressure is enabled by default. The agent sees warnings naturally as part
 
 When the iteration budget is fully exhausted, the CLI shows a notification to the user: `⚠ Iteration budget reached (90/90) — response may be incomplete`. If the budget runs out during active work, the agent generates a summary of what was accomplished before stopping.
 
-### Streaming Timeouts
+### API Timeouts
 
-The LLM streaming connection has two timeout layers. Both auto-adjust for local providers (localhost, LAN IPs) — no configuration needed for most setups.
+Hermes has separate timeout layers for streaming, plus a stale detector for non-streaming calls. The stale detectors auto-adjust for local providers only when you leave them at their implicit defaults.
 
-| Timeout | Default | Local providers | Env var |
-|---------|---------|----------------|---------|
+| Timeout | Default | Local providers | Config / env |
+|---------|---------|----------------|--------------|
 | Socket read timeout | 120s | Auto-raised to 1800s | `HERMES_STREAM_READ_TIMEOUT` |
 | Stale stream detection | 180s | Auto-disabled | `HERMES_STREAM_STALE_TIMEOUT` |
-| API call (non-streaming) | 1800s | Unchanged | `HERMES_API_TIMEOUT` |
+| Stale non-stream detection | 300s | Auto-disabled when left implicit | `providers.<id>.stale_timeout_seconds` or `HERMES_API_CALL_STALE_TIMEOUT` |
+| API call (non-streaming) | 1800s | Unchanged | `providers.<id>.request_timeout_seconds` / `timeout_seconds` or `HERMES_API_TIMEOUT` |
 
 The **socket read timeout** controls how long httpx waits for the next chunk of data from the provider. Local LLMs can take minutes for prefill on large contexts before producing the first token, so Hermes raises this to 30 minutes when it detects a local endpoint. If you explicitly set `HERMES_STREAM_READ_TIMEOUT`, that value is always used regardless of endpoint detection.
 
 The **stale stream detection** kills connections that receive SSE keep-alive pings but no actual content. This is disabled entirely for local providers since they don't send keep-alive pings during prefill.
 
+The **stale non-stream detection** kills non-streaming calls that produce no response for too long. By default Hermes disables this on local endpoints to avoid false positives during long prefills. If you explicitly set `providers.<id>.stale_timeout_seconds`, `providers.<id>.models.<model>.stale_timeout_seconds`, or `HERMES_API_CALL_STALE_TIMEOUT`, that explicit value is honored even on local endpoints.
+
 ## Context Pressure Warnings
 
 Separate from iteration budget pressure, context pressure tracks how close the conversation is to the **compaction threshold** — the point where context compression fires to summarize older messages. This helps both you and the agent understand when the conversation is getting long.
@@ -647,6 +674,8 @@ auxiliary:
     base_url: ""
     api_key: ""
     timeout: 30
+    max_concurrency: 3       # Limit parallel summaries to reduce request-burst 429s
+    extra_body: {}           # Provider-specific OpenAI-compatible request fields
 
   # Skills hub — skill matching and search
   skills_hub:
@@ -681,6 +710,34 @@ Each auxiliary task has a configurable `timeout` (in seconds). Defaults: vision
 Context compression has its own `compression:` block for thresholds and an `auxiliary.compression:` block for model/provider settings — see [Context Compression](#context-compression) above. The fallback model uses a `fallback_model:` block — see [Fallback Model](/docs/integrations/providers#fallback-model). All three follow the same provider/model/base_url pattern.
 :::
 
+### Session Search Tuning
+
+If you use a reasoning-heavy model for `auxiliary.session_search`, Hermes now gives you two built-in controls:
+
+- `auxiliary.session_search.max_concurrency`: limits how many matched sessions Hermes summarizes at once
+- `auxiliary.session_search.extra_body`: forwards provider-specific OpenAI-compatible request fields on the summarization calls
+
+Example:
+
+```yaml
+auxiliary:
+  session_search:
+    provider: "main"
+    model: "glm-4.5-air"
+    timeout: 60
+    max_concurrency: 2
+    extra_body:
+      enable_thinking: false
+```
+
+Use `max_concurrency` when your provider rate-limits request bursts and you want `session_search` to trade some parallelism for stability.
+
+Use `extra_body` only when your provider documents OpenAI-compatible request-body fields you want Hermes to pass through for that task. Hermes forwards the object as-is.
+
+:::warning
+`extra_body` is only effective when your provider actually supports the field you send. If the provider does not expose a native OpenAI-compatible reasoning-off flag, Hermes cannot synthesize one on its behalf.
+:::
+
 ### Changing the Vision Model
 
 To use GPT-4o instead of Gemini Flash for image analysis:
@@ -992,6 +1049,7 @@ voice:
   record_key: "ctrl+b"         # Push-to-talk key inside the CLI
   max_recording_seconds: 120    # Hard stop for long recordings
   auto_tts: false               # Enable spoken replies automatically when /voice on
+  beep_enabled: true            # Play record start/stop beeps in CLI voice mode
   silence_threshold: 200        # RMS threshold for speech detection
   silence_duration: 3.0         # Seconds of silence before auto-stop
 ```
@@ -1274,6 +1332,9 @@ delegation:
   # provider: "openrouter"                  # Override provider (empty = inherit parent)
   # base_url: "http://localhost:1234/v1"    # Direct OpenAI-compatible endpoint (takes precedence over provider)
   # api_key: "local-key"                    # API key for base_url (falls back to OPENAI_API_KEY)
+  max_concurrent_children: 3                # Parallel children per batch (floor 1, no ceiling). Also via DELEGATION_MAX_CONCURRENT_CHILDREN env var.
+  max_spawn_depth: 1                        # Delegation tree depth cap (1-3, clamped). 1 = flat (default): parent spawns leaves that cannot delegate. 2 = orchestrator children can spawn leaf grandchildren. 3 = three levels.
+  orchestrator_enabled: true                # Global kill switch. When false, role="orchestrator" is ignored and every child is forced to leaf regardless of max_spawn_depth.
 ```
 
 **Subagent provider:model override:** By default, subagents inherit the parent agent's provider and model. Set `delegation.provider` and `delegation.model` to route subagents to a different provider:model pair — e.g., use a cheap/fast model for narrowly-scoped subtasks while your primary agent runs an expensive reasoning model.
@@ -1284,6 +1345,8 @@ The delegation provider uses the same credential resolution as CLI/gateway start
 
 **Precedence:** `delegation.base_url` in config → `delegation.provider` in config → parent provider (inherited). `delegation.model` in config → parent model (inherited). Setting just `model` without `provider` changes only the model name while keeping the parent's credentials (useful for switching models within the same provider like OpenRouter).
 
+**Width and depth:** `max_concurrent_children` caps how many subagents run in parallel per batch (default `3`, floor of 1, no ceiling). Can also be set via the `DELEGATION_MAX_CONCURRENT_CHILDREN` env var. When the model submits a `tasks` array longer than the cap, `delegate_task` returns a tool error explaining the limit rather than silently truncating. `max_spawn_depth` controls the delegation tree depth (clamped to 1-3). At the default `1`, delegation is flat: children cannot spawn grandchildren, and passing `role="orchestrator"` silently degrades to `leaf`. Raise to `2` so orchestrator children can spawn leaf grandchildren; `3` for three-level trees. The agent opts into orchestration per call via `role="orchestrator"`; `orchestrator_enabled: false` forces every child back to leaf regardless. Cost scales multiplicatively — at `max_spawn_depth: 3` with `max_concurrent_children: 3`, the tree can reach 3×3×3 = 27 concurrent leaf agents. See [Subagent Delegation → Depth Limit and Nested Orchestration](features/delegation.md#depth-limit-and-nested-orchestration) for usage patterns.
+
 ## Clarify
 
 Configure the clarification prompt behavior:
diff --git a/website/docs/user-guide/features/api-server.md b/website/docs/user-guide/features/api-server.md
index 82c6db0b2c..baae1d2d57 100644
--- a/website/docs/user-guide/features/api-server.md
+++ b/website/docs/user-guide/features/api-server.md
@@ -83,6 +83,25 @@ Standard OpenAI Chat Completions format. Stateless — the full conversation is
 }
 ```
 
+**Inline image input:** user messages may send `content` as an array of `text` and `image_url` parts. Both remote `http(s)` URLs and `data:image/...` URLs are supported:
+
+```json
+{
+  "model": "hermes-agent",
+  "messages": [
+    {
+      "role": "user",
+      "content": [
+        {"type": "text", "text": "What is in this image?"},
+        {"type": "image_url", "image_url": {"url": "https://example.com/cat.png", "detail": "high"}}
+      ]
+    }
+  ]
+}
+```
+
+Uploaded files (`file` / `input_file` / `file_id`) and non-image `data:` URLs return `400 unsupported_content_type`.
+
 **Streaming** (`"stream": true`): Returns Server-Sent Events (SSE) with token-by-token response chunks. For **Chat Completions**, the stream uses standard `chat.completion.chunk` events plus Hermes' custom `hermes.tool.progress` event for tool-start UX. For **Responses**, the stream uses OpenAI Responses event types such as `response.created`, `response.output_text.delta`, `response.output_item.added`, `response.output_item.done`, and `response.completed`.
 
 **Tool progress in streams**:
@@ -119,6 +138,25 @@ OpenAI Responses API format. Supports server-side conversation state via `previo
 }
 ```
 
+**Inline image input:** `input[].content` can contain `input_text` and `input_image` parts. Both remote URLs and `data:image/...` URLs are supported:
+
+```json
+{
+  "model": "hermes-agent",
+  "input": [
+    {
+      "role": "user",
+      "content": [
+        {"type": "input_text", "text": "Describe this screenshot."},
+        {"type": "input_image", "image_url": "data:image/png;base64,iVBORw0K..."}
+      ]
+    }
+  ]
+}
+```
+
+Uploaded files (`input_file` / `file_id`) and non-image `data:` URLs return `400 unsupported_content_type`.
+
 #### Multi-turn with previous_response_id
 
 Chain responses to maintain full context (including tool calls) across turns:
@@ -330,7 +368,7 @@ In Open WebUI, add each as a separate connection. The model dropdown shows `alic
 ## Limitations
 
 - **Response storage** — stored responses (for `previous_response_id`) are persisted in SQLite and survive gateway restarts. Max 100 stored responses (LRU eviction).
-- **No file upload** — vision/document analysis via uploaded files is not yet supported through the API.
+- **No file upload** — inline images are supported on both `/v1/chat/completions` and `/v1/responses`, but uploaded files (`file`, `input_file`, `file_id`) and non-image document inputs are not supported through the API.
 - **Model field is cosmetic** — the `model` field in requests is accepted but the actual LLM model used is configured server-side in config.yaml.
 
 ## Proxy Mode
diff --git a/website/docs/user-guide/features/browser.md b/website/docs/user-guide/features/browser.md
index 5b2462d2e3..d6624bf7d1 100644
--- a/website/docs/user-guide/features/browser.md
+++ b/website/docs/user-guide/features/browser.md
@@ -327,6 +327,36 @@ Check the browser console for any JavaScript errors
 
 Use `clear=True` to clear the console after reading, so subsequent calls only show new messages.
 
+### `browser_cdp`
+
+Raw Chrome DevTools Protocol passthrough — the escape hatch for browser operations not covered by the other tools. Use for native dialog handling, iframe-scoped evaluation, cookie/network control, or any CDP verb the agent needs.
+
+**Only available when a CDP endpoint is reachable at session start** — meaning `/browser connect` has attached to a running Chrome, or `browser.cdp_url` is set in `config.yaml`. The default local agent-browser mode, Camofox, and cloud providers (Browserbase, Browser Use, Firecrawl) do not currently expose CDP to this tool — cloud providers have per-session CDP URLs but live-session routing is a follow-up.
+
+**CDP method reference:** https://chromedevtools.github.io/devtools-protocol/ — the agent can `web_extract` a specific method's page to look up parameters and return shape.
+
+Common patterns:
+
+```
+# List tabs (browser-level, no target_id)
+browser_cdp(method="Target.getTargets")
+
+# Handle a native JS dialog on a tab
+browser_cdp(method="Page.handleJavaScriptDialog",
+            params={"accept": true, "promptText": ""},
+            target_id="<tabId>")
+
+# Evaluate JS in a specific tab
+browser_cdp(method="Runtime.evaluate",
+            params={"expression": "document.title", "returnByValue": true},
+            target_id="<tabId>")
+
+# Get all cookies
+browser_cdp(method="Network.getAllCookies")
+```
+
+Browser-level methods (`Target.*`, `Browser.*`, `Storage.*`) omit `target_id`. Page-level methods (`Page.*`, `Runtime.*`, `DOM.*`, `Emulation.*`) require a `target_id` from `Target.getTargets`. Each call is independent — sessions do not persist between calls.
+
 ## Practical Examples
 
 ### Filling Out a Web Form
diff --git a/website/docs/user-guide/features/built-in-plugins.md b/website/docs/user-guide/features/built-in-plugins.md
new file mode 100644
index 0000000000..08cd4af3bf
--- /dev/null
+++ b/website/docs/user-guide/features/built-in-plugins.md
@@ -0,0 +1,117 @@
+---
+sidebar_position: 12
+sidebar_label: "Built-in Plugins"
+title: "Built-in Plugins"
+description: "Plugins shipped with Hermes Agent that run automatically via lifecycle hooks — disk-cleanup and friends"
+---
+
+# Built-in Plugins
+
+Hermes ships a small set of plugins bundled with the repository. They live under `<repo>/plugins/<name>/` and load automatically alongside user-installed plugins in `~/.hermes/plugins/`. They use the same plugin surface as third-party plugins — hooks, tools, slash commands — just maintained in-tree.
+
+See the [Plugins](/docs/user-guide/features/plugins) page for the general plugin system, and [Build a Hermes Plugin](/docs/guides/build-a-hermes-plugin) to write your own.
+
+## How discovery works
+
+The `PluginManager` scans four sources, in order:
+
+1. **Bundled** — `<repo>/plugins/<name>/` (what this page documents)
+2. **User** — `~/.hermes/plugins/<name>/`
+3. **Project** — `./.hermes/plugins/<name>/` (requires `HERMES_ENABLE_PROJECT_PLUGINS=1`)
+4. **Pip entry points** — `hermes_agent.plugins`
+
+On name collision, later sources win — a user plugin named `disk-cleanup` would replace the bundled one.
+
+`plugins/memory/` and `plugins/context_engine/` are deliberately excluded from bundled scanning. Those directories use their own discovery paths because memory providers and context engines are single-select providers configured through `hermes memory setup` / `context.engine` in config.
+
+## Bundled plugins are opt-in
+
+Bundled plugins ship disabled. Discovery finds them (they appear in `hermes plugins list` and the interactive `hermes plugins` UI), but none load until you explicitly enable them:
+
+```bash
+hermes plugins enable disk-cleanup
+```
+
+Or via `~/.hermes/config.yaml`:
+
+```yaml
+plugins:
+  enabled:
+    - disk-cleanup
+```
+
+This is the same mechanism user-installed plugins use. Bundled plugins are never auto-enabled — not on fresh install, not for existing users upgrading to a newer Hermes. You always opt in explicitly.
+
+To turn a bundled plugin off again:
+
+```bash
+hermes plugins disable disk-cleanup
+# or: remove it from plugins.enabled in config.yaml
+```
+
+## Currently shipped
+
+### disk-cleanup
+
+Auto-tracks and removes ephemeral files created during sessions — test scripts, temp outputs, cron logs, stale chrome profiles — without requiring the agent to remember to call a tool.
+
+**How it works:**
+
+| Hook | Behaviour |
+|---|---|
+| `post_tool_call` | When `write_file` / `terminal` / `patch` creates a file matching `test_*`, `tmp_*`, or `*.test.*` inside `HERMES_HOME` or `/tmp/hermes-*`, track it silently as `test` / `temp` / `cron-output`. |
+| `on_session_end` | If any test files were auto-tracked during the turn, run the safe `quick` cleanup and log a one-line summary. Stays silent otherwise. |
+
+**Deletion rules:**
+
+| Category | Threshold | Confirmation |
+|---|---|---|
+| `test` | every session end | Never |
+| `temp` | >7 days since tracked | Never |
+| `cron-output` | >14 days since tracked | Never |
+| empty dirs under HERMES_HOME | always | Never |
+| `research` | >30 days, beyond 10 newest | Always (deep only) |
+| `chrome-profile` | >14 days since tracked | Always (deep only) |
+| files >500 MB | never auto | Always (deep only) |
+
+**Slash command** — `/disk-cleanup` available in both CLI and gateway sessions:
+
+```
+/disk-cleanup status                     # breakdown + top-10 largest
+/disk-cleanup dry-run                    # preview without deleting
+/disk-cleanup quick                      # run safe cleanup now
+/disk-cleanup deep                       # quick + list items needing confirmation
+/disk-cleanup track <path> <category>    # manual tracking
+/disk-cleanup forget <path>              # stop tracking (does not delete)
+```
+
+**State** — everything lives at `$HERMES_HOME/disk-cleanup/`:
+
+| File | Contents |
+|---|---|
+| `tracked.json` | Tracked paths with category, size, and timestamp |
+| `tracked.json.bak` | Atomic-write backup of the above |
+| `cleanup.log` | Append-only audit trail of every track / skip / reject / delete |
+
+**Safety** — cleanup only ever touches paths under `HERMES_HOME` or `/tmp/hermes-*`. Windows mounts (`/mnt/c/...`) are rejected. Well-known top-level state dirs (`logs/`, `memories/`, `sessions/`, `cron/`, `cache/`, `skills/`, `plugins/`, `disk-cleanup/` itself) are never removed even when empty — a fresh install does not get gutted on first session end.
+
+**Enabling:** `hermes plugins enable disk-cleanup` (or check the box in `hermes plugins`).
+
+**Disabling again:** `hermes plugins disable disk-cleanup`.
+
+## Adding a bundled plugin
+
+Bundled plugins are written exactly like any other Hermes plugin — see [Build a Hermes Plugin](/docs/guides/build-a-hermes-plugin). The only differences are:
+
+- Directory lives at `<repo>/plugins/<name>/` instead of `~/.hermes/plugins/<name>/`
+- Manifest source is reported as `bundled` in `hermes plugins list`
+- User plugins with the same name override the bundled version
+
+A plugin is a good candidate for bundling when:
+
+- It has no optional dependencies (or they're already `pip install .[all]` deps)
+- The behaviour benefits most users and is opt-out rather than opt-in
+- The logic ties into lifecycle hooks that the agent would otherwise have to remember to invoke
+- It complements a core capability without expanding the model-visible tool surface
+
+Counter-examples — things that should stay as user-installable plugins, not bundled: third-party integrations with API keys, niche workflows, large dependency trees, anything that would meaningfully change agent behaviour by default.
diff --git a/website/docs/user-guide/features/cron.md b/website/docs/user-guide/features/cron.md
index 222c00827c..4628fcc639 100644
--- a/website/docs/user-guide/features/cron.md
+++ b/website/docs/user-guide/features/cron.md
@@ -30,7 +30,7 @@ Cron-run sessions cannot recursively create more cron jobs. Hermes disables cron
 /cron add 30m "Remind me to check the build"
 /cron add "every 2h" "Check server status"
 /cron add "every 1h" "Summarize new feed items" --skill blogwatcher
-/cron add "every 1h" "Use both skills and combine the result" --skill blogwatcher --skill find-nearby
+/cron add "every 1h" "Use both skills and combine the result" --skill blogwatcher --skill maps
 ```
 
 ### From the standalone CLI
@@ -40,7 +40,7 @@ hermes cron create "every 2h" "Check server status"
 hermes cron create "every 1h" "Summarize new feed items" --skill blogwatcher
 hermes cron create "every 1h" "Use both skills and combine the result" \
   --skill blogwatcher \
-  --skill find-nearby \
+  --skill maps \
   --name "Skill combo"
 ```
 
@@ -77,7 +77,7 @@ Skills are loaded in order. The prompt becomes the task instruction layered on t
 ```python
 cronjob(
     action="create",
-    skills=["blogwatcher", "find-nearby"],
+    skills=["blogwatcher", "maps"],
     prompt="Look for new local events and interesting nearby places, then combine them into one short brief.",
     schedule="every 6h",
     name="Local brief",
@@ -95,7 +95,7 @@ You do not need to delete and recreate jobs just to change them.
 ```bash
 /cron edit <job_id> --schedule "every 4h"
 /cron edit <job_id> --prompt "Use the revised task"
-/cron edit <job_id> --skill blogwatcher --skill find-nearby
+/cron edit <job_id> --skill blogwatcher --skill maps
 /cron edit <job_id> --remove-skill blogwatcher
 /cron edit <job_id> --clear-skills
 ```
@@ -105,8 +105,8 @@ You do not need to delete and recreate jobs just to change them.
 ```bash
 hermes cron edit <job_id> --schedule "every 4h"
 hermes cron edit <job_id> --prompt "Use the revised task"
-hermes cron edit <job_id> --skill blogwatcher --skill find-nearby
-hermes cron edit <job_id> --add-skill find-nearby
+hermes cron edit <job_id> --skill blogwatcher --skill maps
+hermes cron edit <job_id> --add-skill maps
 hermes cron edit <job_id> --remove-skill blogwatcher
 hermes cron edit <job_id> --clear-skills
 ```
diff --git a/website/docs/user-guide/features/delegation.md b/website/docs/user-guide/features/delegation.md
index 2e22bada34..1ab8f8cbd5 100644
--- a/website/docs/user-guide/features/delegation.md
+++ b/website/docs/user-guide/features/delegation.md
@@ -20,7 +20,7 @@ delegate_task(
 
 ## Parallel Batch
 
-Up to 3 concurrent subagents:
+Up to 3 concurrent subagents by default (configurable, no hard ceiling):
 
 ```python
 delegate_task(tasks=[
@@ -33,10 +33,10 @@ delegate_task(tasks=[
 ## How Subagent Context Works
 
 :::warning Critical: Subagents Know Nothing
-Subagents start with a **completely fresh conversation**. They have zero knowledge of the parent's conversation history, prior tool calls, or anything discussed before delegation. The subagent's only context comes from the `goal` and `context` fields you provide.
+Subagents start with a **completely fresh conversation**. They have zero knowledge of the parent's conversation history, prior tool calls, or anything discussed before delegation. The subagent's only context comes from the `goal` and `context` fields the parent agent populates when it calls `delegate_task`.
 :::
 
-This means you must pass **everything** the subagent needs:
+This means the parent agent must pass **everything** the subagent needs in the call:
 
 ```python
 # BAD - subagent has no idea what "the error" is
@@ -121,8 +121,8 @@ delegate_task(
 
 When you provide a `tasks` array, subagents run in **parallel** using a thread pool:
 
-- **Maximum concurrency:** 3 tasks (the `tasks` array is truncated to 3 if longer)
-- **Thread pool:** Uses `ThreadPoolExecutor` with `MAX_CONCURRENT_CHILDREN = 3` workers
+- **Maximum concurrency:** 3 tasks by default (configurable via `delegation.max_concurrent_children` or the `DELEGATION_MAX_CONCURRENT_CHILDREN` env var; floor of 1, no hard ceiling). Batches larger than the limit return a tool error rather than being silently truncated.
+- **Thread pool:** Uses `ThreadPoolExecutor` with the configured concurrency limit as max workers
 - **Progress display:** In CLI mode, a tree-view shows tool calls from each subagent in real-time with per-task completion lines. In gateway mode, progress is batched and relayed to the parent's progress callback
 - **Result ordering:** Results are sorted by task index to match input order regardless of completion order
 - **Interrupt propagation:** Interrupting the parent (e.g., sending a new message) interrupts all active children
@@ -154,8 +154,8 @@ The `toolsets` parameter controls what tools the subagent has access to. Choose
 | `["file"]` | Read-only analysis, code review without execution |
 | `["terminal"]` | System administration, process management |
 
-Certain toolsets are **always blocked** for subagents regardless of what you specify:
-- `delegation` — no recursive delegation (prevents infinite spawning)
+Certain toolsets are blocked for subagents regardless of what you specify:
+- `delegation` — blocked for leaf subagents (the default). Retained for `role="orchestrator"` children, bounded by `max_spawn_depth` — see [Depth Limit and Nested Orchestration](#depth-limit-and-nested-orchestration) below.
 - `clarify` — subagents cannot interact with the user
 - `memory` — no writes to shared persistent memory
 - `code_execution` — children should reason step-by-step
@@ -173,16 +173,32 @@ delegate_task(
 )
 ```
 
-## Depth Limit
+## Depth Limit and Nested Orchestration
 
-Delegation has a **depth limit of 2** — a parent (depth 0) can spawn children (depth 1), but children cannot delegate further. This prevents runaway recursive delegation chains.
+By default, delegation is **flat**: a parent (depth 0) spawns children (depth 1), and those children cannot delegate further. This prevents runaway recursive delegation.
+
+For multi-stage workflows (research → synthesis, or parallel orchestration over sub-problems), a parent can spawn **orchestrator** children that *can* delegate their own workers:
+
+```python
+delegate_task(
+    goal="Survey three code review approaches and recommend one",
+    role="orchestrator",  # Allows this child to spawn its own workers
+    context="...",
+)
+```
+
+- `role="leaf"` (default): child cannot delegate further — identical to the flat-delegation behavior.
+- `role="orchestrator"`: child retains the `delegation` toolset. Gated by `delegation.max_spawn_depth` (default **1** = flat, so `role="orchestrator"` is a no-op at defaults). Raise `max_spawn_depth` to 2 to allow orchestrator children to spawn leaf grandchildren; 3 for three levels (cap).
+- `delegation.orchestrator_enabled: false`: global kill switch that forces every child to `leaf` regardless of the `role` parameter.
+
+**Cost warning:** With `max_spawn_depth: 3` and `max_concurrent_children: 3`, the tree can reach 3×3×3 = 27 concurrent leaf agents. Each extra level multiplies spend — raise `max_spawn_depth` intentionally.
 
 ## Key Properties
 
 - Each subagent gets its **own terminal session** (separate from the parent)
-- **No nested delegation** — children cannot delegate further (no grandchildren)
-- Subagents **cannot** call: `delegate_task`, `clarify`, `memory`, `send_message`, `execute_code`
-- **Interrupt propagation** — interrupting the parent interrupts all active children
+- **Nested delegation is opt-in** — only `role="orchestrator"` children can delegate further, and only when `max_spawn_depth` is raised from its default of 1 (flat). Disable globally with `orchestrator_enabled: false`.
+- Leaf subagents **cannot** call: `delegate_task`, `clarify`, `memory`, `send_message`, `execute_code`. Orchestrator subagents retain `delegate_task` but still cannot use the other four.
+- **Interrupt propagation** — interrupting the parent interrupts all active children (including grandchildren under orchestrators)
 - Only the final summary enters the parent's context, keeping token usage efficient
 - Subagents inherit the parent's **API key, provider configuration, and credential pool** (enabling key rotation on rate limits)
 
@@ -193,7 +209,7 @@ Delegation has a **depth limit of 2** — a parent (depth 0) can spawn children
 | **Reasoning** | Full LLM reasoning loop | Just Python code execution |
 | **Context** | Fresh isolated conversation | No conversation, just script |
 | **Tool access** | All non-blocked tools with reasoning | 7 tools via RPC, no reasoning |
-| **Parallelism** | Up to 3 concurrent subagents | Single script |
+| **Parallelism** | 3 concurrent subagents by default (configurable) | Single script |
 | **Best for** | Complex tasks needing judgment | Mechanical multi-step pipelines |
 | **Token cost** | Higher (full LLM loop) | Lower (only stdout returned) |
 | **User interaction** | None (subagents can't clarify) | None |
@@ -206,7 +222,9 @@ Delegation has a **depth limit of 2** — a parent (depth 0) can spawn children
 # In ~/.hermes/config.yaml
 delegation:
   max_iterations: 50                        # Max turns per child (default: 50)
-  default_toolsets: ["terminal", "file", "web"]  # Default toolsets
+  # max_concurrent_children: 3              # Parallel children per batch (default: 3)
+  # max_spawn_depth: 1                      # Tree depth (1-3, default 1 = flat). Raise to 2 to allow orchestrator children to spawn leaves; 3 for three levels.
+  # orchestrator_enabled: true              # Disable to force all children to leaf role.
   model: "google/gemini-3-flash-preview"             # Optional provider/model override
   provider: "openrouter"                             # Optional built-in provider
 
diff --git a/website/docs/user-guide/features/fallback-providers.md b/website/docs/user-guide/features/fallback-providers.md
index 2e9bcad99b..de89acc711 100644
--- a/website/docs/user-guide/features/fallback-providers.md
+++ b/website/docs/user-guide/features/fallback-providers.md
@@ -61,18 +61,18 @@ Both `provider` and `model` are **required**. If either is missing, the fallback
 | Arcee AI | `arcee` | `ARCEEAI_API_KEY` |
 | Alibaba / DashScope | `alibaba` | `DASHSCOPE_API_KEY` |
 | Hugging Face | `huggingface` | `HF_TOKEN` |
-| Custom endpoint | `custom` | `base_url` + `api_key_env` (see below) |
+| Custom endpoint | `custom` | `base_url` + `key_env` (see below) |
 
 ### Custom Endpoint Fallback
 
-For a custom OpenAI-compatible endpoint, add `base_url` and optionally `api_key_env`:
+For a custom OpenAI-compatible endpoint, add `base_url` and optionally `key_env`:
 
 ```yaml
 fallback_model:
   provider: custom
   model: my-local-model
   base_url: http://localhost:8000/v1
-  api_key_env: MY_LOCAL_KEY          # env var name containing the API key
+  key_env: MY_LOCAL_KEY              # env var name containing the API key
 ```
 
 ### When Fallback Triggers
@@ -128,7 +128,7 @@ fallback_model:
   provider: custom
   model: llama-3.1-70b
   base_url: http://localhost:8000/v1
-  api_key_env: LOCAL_API_KEY
+  key_env: LOCAL_API_KEY
 ```
 
 **Codex OAuth as fallback:**
@@ -215,6 +215,9 @@ auxiliary:
   session_search:
     provider: "auto"
     model: ""
+    timeout: 30
+    max_concurrency: 3
+    extra_body: {}
 
   skills_hub:
     provider: "auto"
@@ -248,6 +251,25 @@ fallback_model:
   # base_url: http://localhost:8000/v1               # Optional custom endpoint
 ```
 
+For `auxiliary.session_search`, Hermes also supports:
+
+- `max_concurrency` to limit how many session summaries run at once
+- `extra_body` to pass provider-specific OpenAI-compatible request fields through on the summarization calls
+
+Example:
+
+```yaml
+auxiliary:
+  session_search:
+    provider: main
+    model: glm-4.5-air
+    max_concurrency: 2
+    extra_body:
+      enable_thinking: false
+```
+
+If your provider does not support a native OpenAI-compatible reasoning-control field, `extra_body` will not help for that part; in that case `max_concurrency` is still useful for reducing request-burst 429s.
+
 All three — auxiliary, compression, fallback — work the same way: set `provider` to pick who handles the request, `model` to pick which model, and `base_url` to point at a custom endpoint (overrides provider).
 
 ### Provider Options for Auxiliary Tasks
diff --git a/website/docs/user-guide/features/honcho.md b/website/docs/user-guide/features/honcho.md
index 2040949d25..60e82b4b08 100644
--- a/website/docs/user-guide/features/honcho.md
+++ b/website/docs/user-guide/features/honcho.md
@@ -77,7 +77,7 @@ Cost and depth are controlled by three independent knobs:
 | Knob | Controls | Default |
 |------|----------|---------|
 | `contextCadence` | Turns between `context()` API calls (base layer refresh) | `1` |
-| `dialecticCadence` | Turns between `peer.chat()` LLM calls (dialectic layer refresh) | `3` |
+| `dialecticCadence` | Turns between `peer.chat()` LLM calls (dialectic layer refresh) | `2` (recommended 1–5) |
 | `dialecticDepth` | Number of `.chat()` passes per dialectic invocation (1–3) | `1` |
 
 These are orthogonal — you can have frequent context refreshes with infrequent dialectic, or deep multi-pass dialectic at low frequency. Example: `contextCadence: 1, dialecticCadence: 5, dialecticDepth: 2` refreshes base context every turn, runs dialectic every 5 turns, and each dialectic run makes 2 passes.
@@ -94,6 +94,14 @@ Each pass uses a proportional reasoning level (lighter early passes, base level
 
 Passes bail out early if the prior pass returned strong signal (long, structured output), so depth 3 doesn't always mean 3 LLM calls.
 
+### Session-Start Prewarm
+
+On session init, Honcho fires a dialectic call in the background at the full configured `dialecticDepth` and hands the result directly to turn 1's context assembly. A single-pass prewarm on a cold peer often returns thin output — multi-pass depth runs the audit/reconcile cycle before the user ever speaks. If prewarm hasn't landed by turn 1, turn 1 falls back to a synchronous call with a bounded timeout.
+
+### Query-Adaptive Reasoning Level
+
+The auto-injected dialectic scales `dialecticReasoningLevel` by query length: +1 level at ≥120 chars, +2 at ≥400, clamped at `reasoningLevelCap` (default `"high"`). Disable with `reasoningHeuristic: false` to pin every auto call to `dialecticReasoningLevel`. Available levels: `minimal`, `low`, `medium`, `high`, `max`.
+
 ## Configuration Options
 
 Honcho is configured in `~/.honcho/config.json` (global) or `$HERMES_HOME/honcho.json` (profile-local). The setup wizard handles this for you.
@@ -104,7 +112,7 @@ Honcho is configured in `~/.honcho/config.json` (global) or `$HERMES_HOME/honcho
 |-----|---------|-------------|
 | `contextTokens` | `null` (uncapped) | Token budget for auto-injected context per turn. Set to an integer (e.g. 1200) to cap. Truncates at word boundaries |
 | `contextCadence` | `1` | Minimum turns between `context()` API calls (base layer refresh) |
-| `dialecticCadence` | `3` | Minimum turns between `peer.chat()` LLM calls (dialectic layer). In `tools` mode, irrelevant — model calls explicitly |
+| `dialecticCadence` | `2` | Minimum turns between `peer.chat()` LLM calls (dialectic layer). Recommended 1–5. In `tools` mode, irrelevant — model calls explicitly |
 | `dialecticDepth` | `1` | Number of `.chat()` passes per dialectic invocation. Clamped to 1–3 |
 | `dialecticDepthLevels` | `null` | Optional array of reasoning levels per pass, e.g. `["minimal", "low", "medium"]`. Overrides proportional defaults |
 | `dialecticReasoningLevel` | `'low'` | Base reasoning level: `minimal`, `low`, `medium`, `high`, `max` |
@@ -142,6 +150,41 @@ Honcho is configured in `~/.honcho/config.json` (global) or `$HERMES_HOME/honcho
 
 In `tools` mode, the model is fully in control — it calls `honcho_reasoning` when it wants, at whatever `reasoning_level` it picks. Cadence and budget settings only apply to modes with auto-injection (`hybrid` and `context`).
 
+## Observation (Directional vs. Unified)
+
+Honcho models a conversation as peers exchanging messages. Each peer has two observation toggles that map 1:1 to Honcho's `SessionPeerConfig`:
+
+| Toggle | Effect |
+|--------|--------|
+| `observeMe` | Honcho builds a representation of this peer from its own messages |
+| `observeOthers` | This peer observes the other peer's messages (feeds cross-peer reasoning) |
+
+Two peers × two toggles = four flags. `observationMode` is a shorthand preset:
+
+| Preset | User flags | AI flags | Semantics |
+|--------|-----------|----------|-----------|
+| `"directional"` (default) | me: on, others: on | me: on, others: on | Full mutual observation. Enables cross-peer dialectic — "what does the AI know about the user, based on what the user said and the AI replied." |
+| `"unified"` | me: on, others: off | me: off, others: on | Shared-pool semantics — the AI observes the user's messages only, the user peer only self-models. Single-observer pool. |
+
+Override the preset with an explicit `observation` block for per-peer control:
+
+```json
+"observation": {
+  "user": { "observeMe": true,  "observeOthers": true },
+  "ai":   { "observeMe": true,  "observeOthers": false }
+}
+```
+
+Common patterns:
+
+| Intent | Config |
+|--------|--------|
+| Full observation (most users) | `"observationMode": "directional"` |
+| AI shouldn't re-model the user from its own replies | `"ai": {"observeMe": true, "observeOthers": false}` |
+| Strong persona the AI peer shouldn't update from self-observation | `"ai": {"observeMe": false, "observeOthers": true}` |
+
+Server-side toggles set via the [Honcho dashboard](https://app.honcho.dev) win over local defaults — Hermes syncs them back at session init.
+
 ## Tools
 
 When Honcho is active as the memory provider, five tools become available:
diff --git a/website/docs/user-guide/features/hooks.md b/website/docs/user-guide/features/hooks.md
index a64f322095..3dd07bc1cb 100644
--- a/website/docs/user-guide/features/hooks.md
+++ b/website/docs/user-guide/features/hooks.md
@@ -6,14 +6,15 @@ description: "Run custom code at key lifecycle points — log activity, send ale
 
 # Event Hooks
 
-Hermes has two hook systems that run custom code at key lifecycle points:
+Hermes has three hook systems that run custom code at key lifecycle points:
 
 | System | Registered via | Runs in | Use case |
 |--------|---------------|---------|----------|
 | **[Gateway hooks](#gateway-event-hooks)** | `HOOK.yaml` + `handler.py` in `~/.hermes/hooks/` | Gateway only | Logging, alerts, webhooks |
 | **[Plugin hooks](#plugin-hooks)** | `ctx.register_hook()` in a [plugin](/docs/user-guide/features/plugins) | CLI + Gateway | Tool interception, metrics, guardrails |
+| **[Shell hooks](#shell-hooks)** | `hooks:` block in `~/.hermes/config.yaml` pointing at shell scripts | CLI + Gateway | Drop-in scripts for blocking, auto-formatting, context injection |
 
-Both systems are non-blocking — errors in any hook are caught and logged, never crashing the agent.
+All three systems are non-blocking — errors in any hook are caught and logged, never crashing the agent.
 
 ## Gateway Event Hooks
 
@@ -231,20 +232,21 @@ def register(ctx):
 
 - Callbacks receive **keyword arguments**. Always accept `**kwargs` for forward compatibility — new parameters may be added in future versions without breaking your plugin.
 - If a callback **crashes**, it's logged and skipped. Other hooks and the agent continue normally. A misbehaving plugin can never break the agent.
-- All hooks are **fire-and-forget observers** whose return values are ignored — except `pre_llm_call`, which can [inject context](#pre_llm_call).
+- Two hooks' return values affect behavior: [`pre_tool_call`](#pre_tool_call) can **block** the tool, and [`pre_llm_call`](#pre_llm_call) can **inject context** into the LLM call. All other hooks are fire-and-forget observers.
 
 ### Quick reference
 
 | Hook | Fires when | Returns |
 |------|-----------|---------|
-| [`pre_tool_call`](#pre_tool_call) | Before any tool executes | ignored |
+| [`pre_tool_call`](#pre_tool_call) | Before any tool executes | `{"action": "block", "message": str}` to veto the call |
 | [`post_tool_call`](#post_tool_call) | After any tool returns | ignored |
-| [`pre_llm_call`](#pre_llm_call) | Once per turn, before the tool-calling loop | context injection |
+| [`pre_llm_call`](#pre_llm_call) | Once per turn, before the tool-calling loop | `{"context": str}` to prepend context to the user message |
 | [`post_llm_call`](#post_llm_call) | Once per turn, after the tool-calling loop | ignored |
 | [`on_session_start`](#on_session_start) | New session created (first turn only) | ignored |
 | [`on_session_end`](#on_session_end) | Session ends | ignored |
 | [`on_session_finalize`](#on_session_finalize) | CLI/gateway tears down an active session (flush, save, stats) | ignored |
 | [`on_session_reset`](#on_session_reset) | Gateway swaps in a fresh session key (e.g. `/new`, `/reset`) | ignored |
+| [`subagent_stop`](#subagent_stop) | A `delegate_task` child has exited | ignored |
 
 ---
 
@@ -266,9 +268,15 @@ def my_callback(tool_name: str, args: dict, task_id: str, **kwargs):
 
 **Fires:** In `model_tools.py`, inside `handle_function_call()`, before the tool's handler runs. Fires once per tool call — if the model calls 3 tools in parallel, this fires 3 times.
 
-**Return value:** Ignored.
+**Return value — veto the call:**
 
-**Use cases:** Logging, audit trails, tool call counters, blocking dangerous operations (print a warning), rate limiting.
+```python
+return {"action": "block", "message": "Reason the tool call was blocked"}
+```
+
+The agent short-circuits the tool with `message` as the error returned to the model. The first matching block directive wins (Python plugins registered first, then shell hooks). Any other return value is ignored, so existing observer-only callbacks keep working unchanged.
+
+**Use cases:** Logging, audit trails, tool call counters, blocking dangerous operations, rate limiting, per-user policy enforcement.
 
 **Example — tool call audit log:**
 
@@ -649,3 +657,247 @@ def my_callback(session_id: str, platform: str, **kwargs):
 ---
 
 See the **[Build a Plugin guide](/docs/guides/build-a-hermes-plugin)** for the full walkthrough including tool schemas, handlers, and advanced hook patterns.
+
+---
+
+### `subagent_stop`
+
+Fires **once per child agent** after `delegate_task` finishes. Whether you delegated a single task or a batch of three, this hook fires once for each child, serialised on the parent thread.
+
+**Callback signature:**
+
+```python
+def my_callback(parent_session_id: str, child_role: str | None,
+                child_summary: str | None, child_status: str,
+                duration_ms: int, **kwargs):
+```
+
+| Parameter | Type | Description |
+|-----------|------|-------------|
+| `parent_session_id` | `str` | Session ID of the delegating parent agent |
+| `child_role` | `str \| None` | Orchestrator role tag set on the child (`None` if the feature isn't enabled) |
+| `child_summary` | `str \| None` | The final response the child returned to the parent |
+| `child_status` | `str` | `"completed"`, `"failed"`, `"interrupted"`, or `"error"` |
+| `duration_ms` | `int` | Wall-clock time spent running the child, in milliseconds |
+
+**Fires:** In `tools/delegate_tool.py`, after `ThreadPoolExecutor.as_completed()` drains all child futures. Firing is marshalled to the parent thread so hook authors don't have to reason about concurrent callback execution.
+
+**Return value:** Ignored.
+
+**Use cases:** Logging orchestration activity, accumulating child durations for billing, writing post-delegation audit records.
+
+**Example — log orchestrator activity:**
+
+```python
+import logging
+logger = logging.getLogger(__name__)
+
+def log_subagent(parent_session_id, child_role, child_status, duration_ms, **kwargs):
+    logger.info(
+        "SUBAGENT parent=%s role=%s status=%s duration_ms=%d",
+        parent_session_id, child_role, child_status, duration_ms,
+    )
+
+def register(ctx):
+    ctx.register_hook("subagent_stop", log_subagent)
+```
+
+:::info
+With heavy delegation (e.g. orchestrator roles × 5 leaves × nested depth), `subagent_stop` fires many times per turn. Keep your callback fast; push expensive work to a background queue.
+:::
+
+---
+
+## Shell Hooks
+
+Declare shell-script hooks in your `cli-config.yaml` and Hermes will run them as subprocesses whenever the corresponding plugin-hook event fires — in both CLI and gateway sessions. No Python plugin authoring required.
+
+Use shell hooks when you want a drop-in, single-file script (Bash, Python, anything with a shebang) to:
+
+- **Block a tool call** — reject dangerous `terminal` commands, enforce per-directory policies, require approval for destructive `write_file` / `patch` operations.
+- **Run after a tool call** — auto-format Python or TypeScript files that the agent just wrote, log API calls, trigger a CI workflow.
+- **Inject context into the next LLM turn** — prepend `git status` output, the current weekday, or retrieved documents to the user message (see [`pre_llm_call`](#pre_llm_call)).
+- **Observe lifecycle events** — write a log line when a subagent completes (`subagent_stop`) or a session starts (`on_session_start`).
+
+Shell hooks are registered by calling `agent.shell_hooks.register_from_config(cfg)` at both CLI startup (`hermes_cli/main.py`) and gateway startup (`gateway/run.py`). They compose naturally with Python plugin hooks — both flow through the same dispatcher.
+
+### Comparison at a glance
+
+| Dimension | Shell hooks | [Plugin hooks](#plugin-hooks) | [Gateway hooks](#gateway-event-hooks) |
+|-----------|-------------|-------------------------------|---------------------------------------|
+| Declared in | `hooks:` block in `~/.hermes/config.yaml` | `register()` in a `plugin.yaml` plugin | `HOOK.yaml` + `handler.py` directory |
+| Lives under | `~/.hermes/agent-hooks/` (by convention) | `~/.hermes/plugins/<name>/` | `~/.hermes/hooks/<name>/` |
+| Language | Any (Bash, Python, Go binary, …) | Python only | Python only |
+| Runs in | CLI + Gateway | CLI + Gateway | Gateway only |
+| Events | `VALID_HOOKS` (incl. `subagent_stop`) | `VALID_HOOKS` | Gateway lifecycle (`gateway:startup`, `agent:*`, `command:*`) |
+| Can block a tool call | Yes (`pre_tool_call`) | Yes (`pre_tool_call`) | No |
+| Can inject LLM context | Yes (`pre_llm_call`) | Yes (`pre_llm_call`) | No |
+| Consent | First-use prompt per `(event, command)` pair | Implicit (Python plugin trust) | Implicit (dir trust) |
+| Inter-process isolation | Yes (subprocess) | No (in-process) | No (in-process) |
+
+### Configuration schema
+
+```yaml
+hooks:
+  <event_name>:                  # Must be in VALID_HOOKS
+    - matcher: "<regex>"         # Optional; used for pre/post_tool_call only
+      command: "<shell command>" # Required; runs via shlex.split, shell=False
+      timeout: <seconds>         # Optional; default 60, capped at 300
+
+hooks_auto_accept: false         # See "Consent model" below
+```
+
+Event names must be one of the [plugin hook events](#plugin-hooks); typos produce a "Did you mean X?" warning and are skipped. Unknown keys inside a single entry are ignored; missing `command` is a skip-with-warning. `timeout > 300` is clamped with a warning.
+
+### JSON wire protocol
+
+Each time the event fires, Hermes spawns a subprocess for every matching hook (matcher permitting), pipes a JSON payload to **stdin**, and reads **stdout** back as JSON.
+
+**stdin — payload the script receives:**
+
+```json
+{
+  "hook_event_name": "pre_tool_call",
+  "tool_name":       "terminal",
+  "tool_input":      {"command": "rm -rf /"},
+  "session_id":      "sess_abc123",
+  "cwd":             "/home/user/project",
+  "extra":           {"task_id": "...", "tool_call_id": "..."}
+}
+```
+
+`tool_name` and `tool_input` are `null` for non-tool events (`pre_llm_call`, `subagent_stop`, session lifecycle). The `extra` dict carries all event-specific kwargs (`user_message`, `conversation_history`, `child_role`, `duration_ms`, …). Unserialisable values are stringified rather than omitted.
+
+**stdout — optional response:**
+
+```jsonc
+// Block a pre_tool_call (both shapes accepted; normalised internally):
+{"decision": "block", "reason":  "Forbidden: rm -rf"}   // Claude-Code style
+{"action":   "block", "message": "Forbidden: rm -rf"}   // Hermes-canonical
+
+// Inject context for pre_llm_call:
+{"context": "Today is Friday, 2026-04-17"}
+
+// Silent no-op — any empty / non-matching output is fine:
+```
+
+Malformed JSON, non-zero exit codes, and timeouts log a warning but never abort the agent loop.
+
+### Worked examples
+
+#### 1. Auto-format Python files after every write
+
+```yaml
+# ~/.hermes/config.yaml
+hooks:
+  post_tool_call:
+    - matcher: "write_file|patch"
+      command: "~/.hermes/agent-hooks/auto-format.sh"
+```
+
+```bash
+#!/usr/bin/env bash
+# ~/.hermes/agent-hooks/auto-format.sh
+payload="$(cat -)"
+path=$(echo "$payload" | jq -r '.tool_input.path // empty')
+[[ "$path" == *.py ]] && command -v black >/dev/null && black "$path" 2>/dev/null
+printf '{}\n'
+```
+
+The agent's in-context view of the file is **not** re-read automatically — the reformat only affects the file on disk. Subsequent `read_file` calls pick up the formatted version.
+
+#### 2. Block destructive `terminal` commands
+
+```yaml
+hooks:
+  pre_tool_call:
+    - matcher: "terminal"
+      command: "~/.hermes/agent-hooks/block-rm-rf.sh"
+      timeout: 5
+```
+
+```bash
+#!/usr/bin/env bash
+# ~/.hermes/agent-hooks/block-rm-rf.sh
+payload="$(cat -)"
+cmd=$(echo "$payload" | jq -r '.tool_input.command // empty')
+if echo "$cmd" | grep -qE 'rm[[:space:]]+-rf?[[:space:]]+/'; then
+  printf '{"decision": "block", "reason": "blocked: rm -rf / is not permitted"}\n'
+else
+  printf '{}\n'
+fi
+```
+
+#### 3. Inject `git status` into every turn (Claude-Code `UserPromptSubmit` equivalent)
+
+```yaml
+hooks:
+  pre_llm_call:
+    - command: "~/.hermes/agent-hooks/inject-cwd-context.sh"
+```
+
+```bash
+#!/usr/bin/env bash
+# ~/.hermes/agent-hooks/inject-cwd-context.sh
+cat - >/dev/null   # discard stdin payload
+if status=$(git status --porcelain 2>/dev/null) && [[ -n "$status" ]]; then
+  jq --null-input --arg s "$status" \
+     '{context: ("Uncommitted changes in cwd:\n" + $s)}'
+else
+  printf '{}\n'
+fi
+```
+
+Claude Code's `UserPromptSubmit` event is intentionally not a separate Hermes event — `pre_llm_call` fires at the same place and already supports context injection. Use it here.
+
+#### 4. Log every subagent completion
+
+```yaml
+hooks:
+  subagent_stop:
+    - command: "~/.hermes/agent-hooks/log-orchestration.sh"
+```
+
+```bash
+#!/usr/bin/env bash
+# ~/.hermes/agent-hooks/log-orchestration.sh
+log=~/.hermes/logs/orchestration.log
+jq -c '{ts: now, parent: .session_id, extra: .extra}' < /dev/stdin >> "$log"
+printf '{}\n'
+```
+
+### Consent model
+
+Each unique `(event, command)` pair prompts the user for approval the first time Hermes sees it, then persists the decision to `~/.hermes/shell-hooks-allowlist.json`. Subsequent runs (CLI or gateway) skip the prompt.
+
+Three escape hatches bypass the interactive prompt — any one is sufficient:
+
+1. `--accept-hooks` flag on the CLI (e.g. `hermes --accept-hooks chat`)
+2. `HERMES_ACCEPT_HOOKS=1` environment variable
+3. `hooks_auto_accept: true` in `cli-config.yaml`
+
+Non-TTY runs (gateway, cron, CI) need one of these three — otherwise any newly-added hook silently stays un-registered and logs a warning.
+
+**Script edits are silently trusted.** The allowlist keys on the exact command string, not the script's hash, so editing the script on disk does not invalidate consent. `hermes hooks doctor` flags mtime drift so you can spot edits and decide whether to re-approve.
+
+### The `hermes hooks` CLI
+
+| Command | What it does |
+|---------|--------------|
+| `hermes hooks list` | Dump configured hooks with matcher, timeout, and consent status |
+| `hermes hooks test <event> [--for-tool X] [--payload-file F]` | Fire every matching hook against a synthetic payload and print the parsed response |
+| `hermes hooks revoke <command>` | Remove every allowlist entry matching `<command>` (takes effect on next restart) |
+| `hermes hooks doctor` | For every configured hook: check exec bit, allowlist status, mtime drift, JSON output validity, and rough execution time |
+
+### Security
+
+Shell hooks run with **your full user credentials** — same trust boundary as a cron entry or a shell alias. Treat the `hooks:` block in `config.yaml` as privileged configuration:
+
+- Only reference scripts you wrote or fully reviewed.
+- Keep scripts inside `~/.hermes/agent-hooks/` so the path is easy to audit.
+- Re-run `hermes hooks doctor` after you pull a shared config to spot newly-added hooks before they register.
+- If your config.yaml is version-controlled across a team, review PRs that change the `hooks:` section the same way you'd review CI config.
+
+### Ordering and precedence
+
+Both Python plugin hooks and shell hooks flow through the same `invoke_hook()` dispatcher. Python plugins are registered first (`discover_and_load()`), shell hooks second (`register_from_config()`), so Python `pre_tool_call` block decisions take precedence in tie cases. The first valid block wins — the aggregator returns as soon as any callback produces `{"action": "block", "message": str}` with a non-empty message.
diff --git a/website/docs/user-guide/features/image-generation.md b/website/docs/user-guide/features/image-generation.md
index 43abc6c201..118459429e 100644
--- a/website/docs/user-guide/features/image-generation.md
+++ b/website/docs/user-guide/features/image-generation.md
@@ -1,13 +1,13 @@
 ---
 title: Image Generation
-description: Generate images via FAL.ai — 8 models including FLUX 2, GPT-Image, Nano Banana Pro, Ideogram, Recraft V4 Pro, and more, selectable via `hermes tools`.
+description: Generate images via FAL.ai — 9 models including FLUX 2, GPT Image (1.5 & 2), Nano Banana Pro, Ideogram, Recraft V4 Pro, and more, selectable via `hermes tools`.
 sidebar_label: Image Generation
 sidebar_position: 6
 ---
 
 # Image Generation
 
-Hermes Agent generates images from text prompts via FAL.ai. Eight models are supported out of the box, each with different speed, quality, and cost tradeoffs. The active model is user-configurable via `hermes tools` and persists in `config.yaml`.
+Hermes Agent generates images from text prompts via FAL.ai. Nine models are supported out of the box, each with different speed, quality, and cost tradeoffs. The active model is user-configurable via `hermes tools` and persists in `config.yaml`.
 
 ## Supported Models
 
@@ -18,6 +18,7 @@ Hermes Agent generates images from text prompts via FAL.ai. Eight models are sup
 | `fal-ai/z-image/turbo` | ~2s | Bilingual EN/CN, 6B params | $0.005/MP |
 | `fal-ai/nano-banana-pro` | ~8s | Gemini 3 Pro, reasoning depth, text rendering | $0.15/image (1K) |
 | `fal-ai/gpt-image-1.5` | ~15s | Prompt adherence | $0.034/image |
+| `fal-ai/gpt-image-2` | ~20s | SOTA text rendering + CJK, world-aware photorealism | $0.04–0.06/image |
 | `fal-ai/ideogram/v3` | ~5s | Best typography | $0.03–0.09/image |
 | `fal-ai/recraft/v4/pro/text-to-image` | ~8s | Design, brand systems, production-ready | $0.25/image |
 | `fal-ai/qwen-image` | ~12s | LLM-based, complex text | $0.02/MP |
@@ -65,7 +66,7 @@ image_gen:
 
 ### GPT-Image Quality
 
-The `fal-ai/gpt-image-1.5` request quality is pinned to `medium` (~$0.034/image at 1024×1024). We don't expose the `low` / `high` tiers as a user-facing option so that Nous Portal billing stays predictable across all users — the cost spread between tiers is ~22×. If you want a cheaper GPT-Image option, pick a different model; if you want higher quality, use Klein 9B or Imagen-class models.
+The `fal-ai/gpt-image-1.5` and `fal-ai/gpt-image-2` request quality is pinned to `medium` (~$0.034–$0.06/image at 1024×1024). We don't expose the `low` / `high` tiers as a user-facing option so that Nous Portal billing stays predictable across all users — the cost spread between tiers is 3–22×. If you want a cheaper option, pick Klein 9B or Z-Image Turbo; if you want higher quality, use Nano Banana Pro or Recraft V4 Pro.
 
 ## Usage
 
@@ -87,11 +88,13 @@ Make me a futuristic cityscape, landscape orientation
 
 Every model accepts the same three aspect ratios from the agent's perspective. Internally, each model's native size spec is filled in automatically:
 
-| Agent input | image_size (flux/z-image/qwen/recraft/ideogram) | aspect_ratio (nano-banana-pro) | image_size (gpt-image) |
-|---|---|---|---|
-| `landscape` | `landscape_16_9` | `16:9` | `1536x1024` |
-| `square` | `square_hd` | `1:1` | `1024x1024` |
-| `portrait` | `portrait_16_9` | `9:16` | `1024x1536` |
+| Agent input | image_size (flux/z-image/qwen/recraft/ideogram) | aspect_ratio (nano-banana-pro) | image_size (gpt-image-1.5) | image_size (gpt-image-2) |
+|---|---|---|---|---|
+| `landscape` | `landscape_16_9` | `16:9` | `1536x1024` | `landscape_4_3` (1024×768) |
+| `square` | `square_hd` | `1:1` | `1024x1024` | `square_hd` (1024×1024) |
+| `portrait` | `portrait_16_9` | `9:16` | `1024x1536` | `portrait_4_3` (768×1024) |
+
+GPT Image 2 maps to 4:3 presets rather than 16:9 because its minimum pixel count is 655,360 — the `landscape_16_9` preset (1024×576 = 589,824) would be rejected.
 
 This translation happens in `_build_fal_payload()` — agent code never has to know about per-model schema differences.
 
diff --git a/website/docs/user-guide/features/memory-providers.md b/website/docs/user-guide/features/memory-providers.md
index f571c7d48f..afbdac5fca 100644
--- a/website/docs/user-guide/features/memory-providers.md
+++ b/website/docs/user-guide/features/memory-providers.md
@@ -82,7 +82,7 @@ hermes memory setup        # select "honcho"
 | `workspace` | host key | Shared workspace ID |
 | `contextTokens` | `null` (uncapped) | Token budget for auto-injected context per turn. Truncates at word boundaries |
 | `contextCadence` | `1` | Minimum turns between `context()` API calls (base layer refresh) |
-| `dialecticCadence` | `3` | Minimum turns between `peer.chat()` LLM calls. Only applies to `hybrid`/`context` modes |
+| `dialecticCadence` | `2` | Minimum turns between `peer.chat()` LLM calls. Recommended 1–5. Only applies to `hybrid`/`context` modes |
 | `dialecticDepth` | `1` | Number of `.chat()` passes per dialectic invocation. Clamped 1–3. Pass 0: cold/warm prompt, pass 1: self-audit, pass 2: reconciliation |
 | `dialecticDepthLevels` | `null` | Optional array of reasoning levels per pass, e.g. `["minimal", "low", "medium"]`. Overrides proportional defaults |
 | `dialecticReasoningLevel` | `'low'` | Base reasoning level: `minimal`, `low`, `medium`, `high`, `max` |
@@ -140,23 +140,64 @@ hermes memory setup        # select "honcho"
 If you previously used `hermes honcho setup`, your config and all server-side data are intact. Just re-enable through the setup wizard again or manually set `memory.provider: honcho` to reactivate via the new system.
 :::
 
-**Multi-agent / Profiles:**
+**Multi-peer setup:**
 
-Each Hermes profile gets its own Honcho AI peer while sharing the same workspace -- all profiles see the same user representation, but each agent builds its own identity and observations.
+Honcho models conversations as peers exchanging messages — one user peer plus one AI peer per Hermes profile, all sharing a workspace. The workspace is the shared environment: the user peer is global across profiles, each AI peer is its own identity. Every AI peer builds an independent representation / card from its own observations, so a `coder` profile stays code-oriented while a `writer` profile stays editorial against the same user.
+
+The mapping:
+
+| Concept | What it is |
+|---------|-----------|
+| **Workspace** | Shared environment. All Hermes profiles under one workspace see the same user identity. |
+| **User peer** (`peerName`) | The human. Shared across profiles in the workspace. |
+| **AI peer** (`aiPeer`) | One per Hermes profile. Host key `hermes` → default; `hermes.<profile>` for others. |
+| **Observation** | Per-peer toggles controlling what Honcho models from whose messages. `directional` (default, all four on) or `unified` (single-observer pool). |
+
+### New profile, fresh Honcho peer
 
 ```bash
-hermes profile create coder --clone   # creates honcho peer "coder", inherits config from default
+hermes profile create coder --clone
 ```
 
-What `--clone` does: creates a `hermes.coder` host block in `honcho.json` with `aiPeer: "coder"`, shared `workspace`, inherited `peerName`, `recallMode`, `writeFrequency`, `observation`, etc. The peer is eagerly created in Honcho so it exists before first message.
+`--clone` creates a `hermes.coder` host block in `honcho.json` with `aiPeer: "coder"`, shared `workspace`, inherited `peerName`, `recallMode`, `writeFrequency`, `observation`, etc. The AI peer is eagerly created in Honcho so it exists before the first message.
 
-For profiles created before Honcho was set up:
+### Existing profiles, backfill Honcho peers
 
 ```bash
-hermes honcho sync   # scans all profiles, creates host blocks for any missing ones
+hermes honcho sync
 ```
 
-This inherits settings from the default `hermes` host block and creates new AI peers for each profile. Idempotent -- skips profiles that already have a host block.
+Scans every Hermes profile, creates host blocks for any profile without one, inherits settings from the default `hermes` block, and creates the new AI peers eagerly. Idempotent — skips profiles that already have a host block.
+
+### Per-profile observation
+
+Each host block can override the observation config independently. Example: a code-focused profile where the AI peer observes the user but doesn't self-model:
+
+```json
+"hermes.coder": {
+  "aiPeer": "coder",
+  "observation": {
+    "user": { "observeMe": true, "observeOthers": true },
+    "ai":   { "observeMe": false, "observeOthers": true }
+  }
+}
+```
+
+**Observation toggles (one set per peer):**
+
+| Toggle | Effect |
+|--------|--------|
+| `observeMe` | Honcho builds a representation of this peer from its own messages |
+| `observeOthers` | This peer observes the other peer's messages (feeds cross-peer reasoning) |
+
+Presets via `observationMode`:
+
+- **`"directional"`** (default) — all four flags on. Full mutual observation; enables cross-peer dialectic.
+- **`"unified"`** — user `observeMe: true`, AI `observeOthers: true`, rest false. Single-observer pool; AI models the user but not itself, user peer only self-models.
+
+Server-side toggles set via the [Honcho dashboard](https://app.honcho.dev) win over local defaults — synced back at session init.
+
+See the [Honcho page](./honcho.md#observation-directional-vs-unified) for the full observation reference.
 
 <details>
 <summary>Full honcho.json example (multi-profile)</summary>
@@ -181,7 +222,7 @@ This inherits settings from the default `hermes` host block and creates new AI p
       },
       "dialecticReasoningLevel": "low",
       "dialecticDynamic": true,
-      "dialecticCadence": 3,
+      "dialecticCadence": 2,
       "dialecticDepth": 1,
       "dialecticMaxChars": 600,
       "contextCadence": 1,
@@ -318,7 +359,11 @@ The setup wizard installs dependencies automatically and only installs what's ne
 | `auto_retain` | `true` | Automatically retain conversation turns |
 | `auto_recall` | `true` | Automatically recall memories before each turn |
 | `retain_async` | `true` | Process retain asynchronously on the server |
-| `tags` | — | Tags applied when storing memories |
+| `retain_context` | `conversation between Hermes Agent and the User` | Context label for retained memories |
+| `retain_tags` | — | Default tags applied to retained memories; merged with per-call tool tags |
+| `retain_source` | — | Optional `metadata.source` attached to retained memories |
+| `retain_user_prefix` | `User` | Label used before user turns in auto-retained transcripts |
+| `retain_assistant_prefix` | `Assistant` | Label used before assistant turns in auto-retained transcripts |
 | `recall_tags` | — | Tags to filter on recall |
 
 See [plugin README](https://github.com/NousResearch/hermes-agent/blob/main/plugins/memory/hindsight/README.md) for the full configuration reference.
diff --git a/website/docs/user-guide/features/overview.md b/website/docs/user-guide/features/overview.md
index df3c26becf..ff45a54a4a 100644
--- a/website/docs/user-guide/features/overview.md
+++ b/website/docs/user-guide/features/overview.md
@@ -20,7 +20,7 @@ Hermes Agent includes a rich set of capabilities that extend far beyond basic ch
 ## Automation
 
 - **[Scheduled Tasks (Cron)](cron.md)** — Schedule tasks to run automatically with natural language or cron expressions. Jobs can attach skills, deliver results to any platform, and support pause/resume/edit operations.
-- **[Subagent Delegation](delegation.md)** — The `delegate_task` tool spawns child agent instances with isolated context, restricted toolsets, and their own terminal sessions. Run up to 3 concurrent subagents for parallel workstreams.
+- **[Subagent Delegation](delegation.md)** — The `delegate_task` tool spawns child agent instances with isolated context, restricted toolsets, and their own terminal sessions. Run 3 concurrent subagents by default (configurable) for parallel workstreams.
 - **[Code Execution](code-execution.md)** — The `execute_code` tool lets the agent write Python scripts that call Hermes tools programmatically, collapsing multi-step workflows into a single LLM turn via sandboxed RPC execution.
 - **[Event Hooks](hooks.md)** — Run custom code at key lifecycle points. Gateway hooks handle logging, alerts, and webhooks; plugin hooks handle tool interception, metrics, and guardrails.
 - **[Batch Processing](batch-processing.md)** — Run the Hermes agent across hundreds or thousands of prompts in parallel, generating structured ShareGPT-format trajectory data for training data generation or evaluation.
diff --git a/website/docs/user-guide/features/plugins.md b/website/docs/user-guide/features/plugins.md
index bcc927bb49..19d00f906d 100644
--- a/website/docs/user-guide/features/plugins.md
+++ b/website/docs/user-guide/features/plugins.md
@@ -95,10 +95,40 @@ Project-local plugins under `./.hermes/plugins/` are disabled by default. Enable
 
 | Source | Path | Use case |
 |--------|------|----------|
+| Bundled | `<repo>/plugins/` | Ships with Hermes — see [Built-in Plugins](/docs/user-guide/features/built-in-plugins) |
 | User | `~/.hermes/plugins/` | Personal plugins |
 | Project | `.hermes/plugins/` | Project-specific plugins (requires `HERMES_ENABLE_PROJECT_PLUGINS=true`) |
 | pip | `hermes_agent.plugins` entry_points | Distributed packages |
 
+Later sources override earlier ones on name collision, so a user plugin with the same name as a bundled plugin replaces it.
+
+## Plugins are opt-in
+
+**Every plugin — user-installed, bundled, or pip — is disabled by default.** Discovery finds them (so they show up in `hermes plugins` and `/plugins`), but nothing loads until you add the plugin's name to `plugins.enabled` in `~/.hermes/config.yaml`. This stops anything with hooks or tools from running without your explicit consent.
+
+```yaml
+plugins:
+  enabled:
+    - my-tool-plugin
+    - disk-cleanup
+  disabled:       # optional deny-list — always wins if a name appears in both
+    - noisy-plugin
+```
+
+Three ways to flip state:
+
+```bash
+hermes plugins                    # interactive toggle (space to check/uncheck)
+hermes plugins enable <name>      # add to allow-list
+hermes plugins disable <name>     # remove from allow-list + add to disabled
+```
+
+After `hermes plugins install owner/repo`, you're asked `Enable 'name' now? [y/N]` — defaults to no. Skip the prompt for scripted installs with `--enable` or `--no-enable`.
+
+### Migration for existing users
+
+When you upgrade to a version of Hermes that has opt-in plugins (config schema v21+), any user plugins already installed under `~/.hermes/plugins/` that weren't already in `plugins.disabled` are **automatically grandfathered** into `plugins.enabled`. Your existing setup keeps working. Bundled plugins are NOT grandfathered — even existing users have to opt in explicitly.
+
 ## Available hooks
 
 Plugins can register callbacks for these lifecycle events. See the **[Event Hooks page](/docs/user-guide/features/hooks#plugin-hooks)** for full details, callback signatures, and examples.
@@ -127,13 +157,15 @@ Memory providers and context engines are **provider plugins** — only one of ea
 ## Managing plugins
 
 ```bash
-hermes plugins                  # unified interactive UI
-hermes plugins list             # table view with enabled/disabled status
-hermes plugins install user/repo  # install from Git
-hermes plugins update my-plugin   # pull latest
-hermes plugins remove my-plugin   # uninstall
-hermes plugins enable my-plugin   # re-enable a disabled plugin
-hermes plugins disable my-plugin  # disable without removing
+hermes plugins                               # unified interactive UI
+hermes plugins list                          # table: enabled / disabled / not enabled
+hermes plugins install user/repo             # install from Git, then prompt Enable? [y/N]
+hermes plugins install user/repo --enable    # install AND enable (no prompt)
+hermes plugins install user/repo --no-enable # install but leave disabled (no prompt)
+hermes plugins update my-plugin              # pull latest
+hermes plugins remove my-plugin              # uninstall
+hermes plugins enable my-plugin              # add to allow-list
+hermes plugins disable my-plugin             # remove from allow-list + add to disabled
 ```
 
 ### Interactive UI
@@ -147,14 +179,16 @@ Plugins
   General Plugins
  → [✓] my-tool-plugin — Custom search tool
    [ ] webhook-notifier — Event hooks
+   [ ] disk-cleanup — Auto-cleanup of ephemeral files [bundled]
 
   Provider Plugins
      Memory Provider          ▸ honcho
      Context Engine           ▸ compressor
 ```
 
-- **General Plugins section** — checkboxes, toggle with SPACE
+- **General Plugins section** — checkboxes, toggle with SPACE. Checked = in `plugins.enabled`, unchecked = in `plugins.disabled` (explicit off).
 - **Provider Plugins section** — shows current selection. Press ENTER to drill into a radio picker where you choose one active provider.
+- Bundled plugins appear in the same list with a `[bundled]` tag.
 
 Provider plugin selections are saved to `config.yaml`:
 
@@ -166,15 +200,17 @@ context:
   engine: "compressor"    # default built-in compressor
 ```
 
-### Disabling general plugins
+### Enabled vs. disabled vs. neither
 
-Disabled plugins remain installed but are skipped during loading. The disabled list is stored in `config.yaml` under `plugins.disabled`:
+Plugins occupy one of three states:
 
-```yaml
-plugins:
-  disabled:
-    - my-noisy-plugin
-```
+| State | Meaning | In `plugins.enabled`? | In `plugins.disabled`? |
+|---|---|---|---|
+| `enabled` | Loaded on next session | Yes | No |
+| `disabled` | Explicitly off — won't load even if also in `enabled` | (irrelevant) | Yes |
+| `not enabled` | Discovered but never opted in | No | No |
+
+The default for a newly-installed or bundled plugin is `not enabled`. `hermes plugins list` shows all three distinct states so you can tell what's been explicitly turned off vs. what's just waiting to be enabled.
 
 In a running session, `/plugins` shows which plugins are currently loaded.
 
diff --git a/website/docs/user-guide/features/tts.md b/website/docs/user-guide/features/tts.md
index 6f7fc89506..2bf6430ff7 100644
--- a/website/docs/user-guide/features/tts.md
+++ b/website/docs/user-guide/features/tts.md
@@ -14,7 +14,7 @@ If you have a paid [Nous Portal](https://portal.nousresearch.com) subscription,
 
 ## Text-to-Speech
 
-Convert text to speech with eight providers:
+Convert text to speech with nine providers:
 
 | Provider | Quality | Cost | API Key |
 |----------|---------|------|---------|
@@ -25,7 +25,8 @@ Convert text to speech with eight providers:
 | **Mistral (Voxtral TTS)** | Excellent | Paid | `MISTRAL_API_KEY` |
 | **Google Gemini TTS** | Excellent | Free tier | `GEMINI_API_KEY` |
 | **xAI TTS** | Excellent | Paid | `XAI_API_KEY` |
-| **NeuTTS** | Good | Free | None needed |
+| **NeuTTS** | Good | Free (local) | None needed |
+| **KittenTTS** | Good | Free (local) | None needed |
 
 ### Platform Delivery
 
@@ -41,7 +42,7 @@ Convert text to speech with eight providers:
 ```yaml
 # In ~/.hermes/config.yaml
 tts:
-  provider: "edge"              # "edge" | "elevenlabs" | "openai" | "minimax" | "mistral" | "gemini" | "xai" | "neutts"
+  provider: "edge"              # "edge" | "elevenlabs" | "openai" | "minimax" | "mistral" | "gemini" | "xai" | "neutts" | "kittentts"
   speed: 1.0                    # Global speed multiplier (provider-specific settings override this)
   edge:
     voice: "en-US-AriaNeural"   # 322 voices, 74 languages
@@ -77,6 +78,11 @@ tts:
     ref_text: ''
     model: neuphonic/neutts-air-q4-gguf
     device: cpu
+  kittentts:
+    model: KittenML/kitten-tts-nano-0.8-int8   # 25MB int8; also: kitten-tts-micro-0.8 (41MB), kitten-tts-mini-0.8 (80MB)
+    voice: Jasper                               # Jasper, Bella, Luna, Bruno, Rosie, Hugo, Kiki, Leo
+    speed: 1.0                                  # 0.5 - 2.0
+    clean_text: true                            # Expand numbers, currencies, units
 ```
 
 **Speed control**: The global `tts.speed` value applies to all providers by default. Each provider can override it with its own `speed` setting (e.g., `tts.openai.speed: 1.5`). Provider-specific speed takes precedence over the global value. Default is `1.0` (normal speed).
@@ -91,6 +97,7 @@ Telegram voice bubbles require Opus/OGG audio format:
 - **Google Gemini TTS** outputs raw PCM and uses **ffmpeg** to encode Opus directly for Telegram voice bubbles
 - **xAI TTS** outputs MP3 and needs **ffmpeg** to convert for Telegram voice bubbles
 - **NeuTTS** outputs WAV and also needs **ffmpeg** to convert for Telegram voice bubbles
+- **KittenTTS** outputs WAV and also needs **ffmpeg** to convert for Telegram voice bubbles
 
 ```bash
 # Ubuntu/Debian
@@ -103,7 +110,7 @@ brew install ffmpeg
 sudo dnf install ffmpeg
 ```
 
-Without ffmpeg, Edge TTS, MiniMax TTS, and NeuTTS audio are sent as regular audio files (playable, but shown as a rectangular player instead of a voice bubble).
+Without ffmpeg, Edge TTS, MiniMax TTS, NeuTTS, and KittenTTS audio are sent as regular audio files (playable, but shown as a rectangular player instead of a voice bubble).
 
 :::tip
 If you want voice bubbles without installing ffmpeg, switch to the OpenAI, ElevenLabs, or Mistral provider.
diff --git a/website/docs/user-guide/features/vision.md b/website/docs/user-guide/features/vision.md
index 8257c186c6..0ef77128d1 100644
--- a/website/docs/user-guide/features/vision.md
+++ b/website/docs/user-guide/features/vision.md
@@ -27,50 +27,52 @@ How you attach an image depends on your terminal environment. Not all methods wo
 
 ### `/paste` Command
 
-**The most reliable method. Works everywhere.**
+**The most reliable explicit image-attach fallback.**
 
 ```
 /paste
 ```
 
-Type `/paste` and press Enter. Hermes checks your clipboard for an image and attaches it. This works in every environment because it explicitly calls the clipboard backend — no terminal keybinding interception to worry about.
+Type `/paste` and press Enter. Hermes checks your clipboard for an image and attaches it. This is the safest option when your terminal rewrites `Cmd+V`/`Ctrl+V`, or when you copied only an image and there is no bracketed-paste text payload to inspect.
 
-### Ctrl+V / Cmd+V (Bracketed Paste)
+### Ctrl+V / Cmd+V
 
-When you paste text that's on the clipboard alongside an image, Hermes automatically checks for an image too. This works when:
-- Your clipboard contains **both text and an image** (some apps put both on the clipboard when you copy)
-- Your terminal supports bracketed paste (most modern terminals do)
+Hermes now treats paste as a layered flow:
+- normal text paste first
+- native clipboard / OSC52 text fallback if the terminal did not deliver text cleanly
+- image attach when the clipboard or pasted payload resolves to an image or image path
+
+This means pasted macOS screenshot temp paths and `file://...` image URIs can attach immediately instead of sitting in the composer as raw text.
 
 :::warning
-If your clipboard has **only an image** (no text), Ctrl+V does nothing in most terminals. Terminals can only paste text — there's no standard mechanism to paste binary image data. Use `/paste` or Alt+V instead.
+If your clipboard has **only an image** (no text), terminals still cannot send binary image bytes directly. Use `/paste` as the explicit image-attach fallback.
 :::
 
-### Alt+V
+### `/terminal-setup` for VS Code / Cursor / Windsurf
 
-Alt key combinations pass through most terminal emulators (they're sent as ESC + key rather than being intercepted). Press `Alt+V` to check the clipboard for an image.
+If you run the TUI inside a local VS Code-family integrated terminal on macOS, Hermes can install the recommended `workbench.action.terminal.sendSequence` bindings for better multiline and undo/redo parity:
 
-:::caution
-**Does not work in VSCode's integrated terminal.** VSCode intercepts many Alt+key combos for its own UI. Use `/paste` instead.
-:::
+```text
+/terminal-setup
+```
 
-### Ctrl+V (Raw — Linux Only)
-
-On Linux desktop terminals (GNOME Terminal, Konsole, Alacritty, etc.), `Ctrl+V` is **not** the paste shortcut — `Ctrl+Shift+V` is. So `Ctrl+V` sends a raw byte to the application, and Hermes catches it to check the clipboard. This only works on Linux desktop terminals with X11 or Wayland clipboard access.
+This is especially useful when `Cmd+Enter`, `Cmd+Z`, or `Shift+Cmd+Z` are being intercepted by the IDE. Run it on the local machine only — not inside an SSH session.
 
 ## Platform Compatibility
 
-| Environment | `/paste` | Ctrl+V text+image | Alt+V | Notes |
+| Environment | `/paste` | Cmd/Ctrl+V | `/terminal-setup` | Notes |
 |---|:---:|:---:|:---:|---|
-| **macOS Terminal / iTerm2** | ✅ | ✅ | ✅ | Best experience — `osascript` always available |
-| **Linux X11 desktop** | ✅ | ✅ | ✅ | Requires `xclip` (`apt install xclip`) |
-| **Linux Wayland desktop** | ✅ | ✅ | ✅ | Requires `wl-paste` (`apt install wl-clipboard`) |
-| **WSL2 (Windows Terminal)** | ✅ | ✅¹ | ✅ | Uses `powershell.exe` — no extra install needed |
-| **VSCode Terminal (local)** | ✅ | ✅¹ | ❌ | VSCode intercepts Alt+key |
-| **VSCode Terminal (SSH)** | ❌² | ❌² | ❌ | Remote clipboard not accessible |
-| **SSH terminal (any)** | ❌² | ❌² | ❌² | Remote clipboard not accessible |
+| **macOS Terminal / iTerm2** | ✅ | ✅ | n/a | Best experience — native clipboard + screenshot-path recovery |
+| **Apple Terminal** | ✅ | ✅ | n/a | If Cmd+←/→/⌫ gets rewritten, use Ctrl+A / Ctrl+E / Ctrl+U fallbacks |
+| **Linux X11 desktop** | ✅ | ✅ | n/a | Requires `xclip` (`apt install xclip`) |
+| **Linux Wayland desktop** | ✅ | ✅ | n/a | Requires `wl-paste` (`apt install wl-clipboard`) |
+| **WSL2 (Windows Terminal)** | ✅ | ✅ | n/a | Uses `powershell.exe` — no extra install needed |
+| **VS Code / Cursor / Windsurf (local)** | ✅ | ✅ | ✅ | Recommended for better Cmd+Enter / undo / redo parity |
+| **VS Code / Cursor / Windsurf (SSH)** | ❌² | ❌² | ❌³ | Run `/terminal-setup` on the local machine instead |
+| **SSH terminal (any)** | ❌² | ❌² | n/a | Remote clipboard not accessible |
 
-¹ Only when clipboard has both text and an image (image-only clipboard = nothing happens)
 ² See [SSH & Remote Sessions](#ssh--remote-sessions) below
+³ The command writes local IDE keybindings and should not be run from the remote host
 
 ## Platform-Specific Setup
 
@@ -145,7 +147,9 @@ powershell.exe -NoProfile -Command "Add-Type -AssemblyName System.Windows.Forms;
 
 ## SSH & Remote Sessions
 
-**Clipboard paste does not work over SSH.** When you SSH into a remote machine, the Hermes CLI runs on the remote host. All clipboard tools (`xclip`, `wl-paste`, `powershell.exe`, `osascript`) read the clipboard of the machine they run on — which is the remote server, not your local machine. Your local clipboard is inaccessible from the remote side.
+**Clipboard image paste does not fully work over SSH.** When you SSH into a remote machine, the Hermes CLI runs on the remote host. Clipboard tools (`xclip`, `wl-paste`, `powershell.exe`, `osascript`) read the clipboard of the machine they run on — which is the remote server, not your local machine. Your local clipboard image is therefore inaccessible from the remote side.
+
+Text can sometimes still bridge through terminal paste or OSC52, but image clipboard access and local screenshot temp paths remain tied to the machine running Hermes.
 
 ### Workarounds for SSH
 
diff --git a/website/docs/user-guide/features/voice-mode.md b/website/docs/user-guide/features/voice-mode.md
index 2befd59e0f..b82718cf04 100644
--- a/website/docs/user-guide/features/voice-mode.md
+++ b/website/docs/user-guide/features/voice-mode.md
@@ -149,7 +149,7 @@ Two-stage algorithm detects when you've finished speaking:
 
 If no speech is detected at all for 15 seconds, recording stops automatically.
 
-Both `silence_threshold` and `silence_duration` are configurable in `config.yaml`.
+Both `silence_threshold` and `silence_duration` are configurable in `config.yaml`. You can also disable the record start/stop beeps with `voice.beep_enabled: false`.
 
 ### Streaming TTS
 
@@ -383,6 +383,7 @@ voice:
   record_key: "ctrl+b"            # Key to start/stop recording
   max_recording_seconds: 120       # Maximum recording length
   auto_tts: false                  # Auto-enable TTS when voice mode starts
+  beep_enabled: true               # Play record start/stop beeps
   silence_threshold: 200           # RMS level (0-32767) below which counts as silence
   silence_duration: 3.0            # Seconds of silence before auto-stop
 
diff --git a/website/docs/user-guide/messaging/discord.md b/website/docs/user-guide/messaging/discord.md
index 0efe909b0d..2a38b9798c 100644
--- a/website/docs/user-guide/messaging/discord.md
+++ b/website/docs/user-guide/messaging/discord.md
@@ -16,7 +16,7 @@ Before setup, here's the part most people want to know: how Hermes behaves once
 |---------|----------|
 | **DMs** | Hermes responds to every message. No `@mention` needed. Each DM has its own session. |
 | **Server channels** | By default, Hermes only responds when you `@mention` it. If you post in a channel without mentioning it, Hermes ignores the message. |
-| **Free-response channels** | You can make specific channels mention-free with `DISCORD_FREE_RESPONSE_CHANNELS`, or disable mentions globally with `DISCORD_REQUIRE_MENTION=false`. |
+| **Free-response channels** | You can make specific channels mention-free with `DISCORD_FREE_RESPONSE_CHANNELS`, or disable mentions globally with `DISCORD_REQUIRE_MENTION=false`. Messages in these channels are answered inline — auto-threading is skipped so the channel stays a lightweight chat. |
 | **Threads** | Hermes replies in the same thread. Mention rules still apply unless that thread or its parent channel is configured as free-response. Threads stay isolated from the parent channel for session history. |
 | **Shared channels with multiple users** | By default, Hermes isolates session history per user inside the channel for safety and clarity. Two people talking in the same channel do not share one transcript unless you explicitly disable that. |
 | **Messages mentioning other users** | When `DISCORD_IGNORE_NO_MENTION` is `true` (the default), Hermes stays silent if a message @mentions other users but does **not** mention the bot. This prevents the bot from jumping into conversations directed at other people. Set to `false` if you want the bot to respond to all messages regardless of who is mentioned. This only applies in server channels, not DMs. |
@@ -343,13 +343,15 @@ discord:
 
 If a thread's parent channel is in this list, the thread also becomes mention-free.
 
+Free-response channels also **skip auto-threading** — the bot replies inline rather than spinning off a new thread per message. This keeps the channel usable as a lightweight chat surface. If you want threading behavior, don't list the channel as free-response (use normal `@mention` flow instead).
+
 #### `discord.auto_thread`
 
 **Type:** boolean — **Default:** `true`
 
 When enabled, every `@mention` in a regular text channel automatically creates a new thread for the conversation. This keeps the main channel clean and gives each conversation its own isolated session history. Once a thread is created, subsequent messages in that thread don't require `@mention` — the bot knows it's already participating.
 
-Messages sent in existing threads or DMs are unaffected by this setting.
+Messages sent in existing threads or DMs are unaffected by this setting. Channels listed in `discord.free_response_channels` or `discord.no_thread_channels` also bypass auto-threading and get inline replies instead.
 
 #### `discord.reactions`
 
diff --git a/website/docs/user-guide/messaging/feishu.md b/website/docs/user-guide/messaging/feishu.md
index 6e9f1d0e7f..d2b52dff4b 100644
--- a/website/docs/user-guide/messaging/feishu.md
+++ b/website/docs/user-guide/messaging/feishu.md
@@ -335,13 +335,11 @@ If the Feishu API rejects the post payload (e.g., due to unsupported markdown co
 
 Plain text messages (no markdown detected) are sent as the simple `text` message type.
 
-## ACK Emoji Reactions
+## Processing Status Reactions
 
-When the adapter receives an inbound message, it immediately adds an ✅ (OK) emoji reaction to signal that the message was received and is being processed. This provides visual feedback before the agent completes its response.
+While the agent is working, the bot shows a `Typing` reaction on your message. It's cleared when the reply arrives, or replaced with `CrossMark` if processing failed.
 
-The reaction is persistent — it remains on the message after the response is sent, serving as a receipt marker.
-
-User reactions on bot messages are also tracked. If a user adds or removes an emoji reaction on a message sent by the bot, it is routed as a synthetic text event (`reaction:added:EMOJI_TYPE` or `reaction:removed:EMOJI_TYPE`) so the agent can respond to feedback.
+Set `FEISHU_REACTIONS=false` to turn it off.
 
 ## Burst Protection and Batching
 
diff --git a/website/docs/user-guide/messaging/telegram.md b/website/docs/user-guide/messaging/telegram.md
index 6dbf9e61df..dbdfc3f4ac 100644
--- a/website/docs/user-guide/messaging/telegram.md
+++ b/website/docs/user-guide/messaging/telegram.md
@@ -112,6 +112,38 @@ hermes gateway
 
 The bot should come online within seconds. Send it a message on Telegram to verify.
 
+## Sending Generated Files from Docker-backed Terminals
+
+If your terminal backend is `docker`, keep in mind that Telegram attachments are
+sent by the **gateway process**, not from inside the container. That means the
+final `MEDIA:/...` path must be readable on the host where the gateway is
+running.
+
+Common pitfall:
+
+- the agent writes a file inside Docker to `/workspace/report.txt`
+- the model emits `MEDIA:/workspace/report.txt`
+- Telegram delivery fails because `/workspace/report.txt` only exists inside the
+  container, not on the host
+
+Recommended pattern:
+
+```yaml
+terminal:
+  backend: docker
+  docker_volumes:
+    - "/home/user/.hermes/cache/documents:/output"
+```
+
+Then:
+
+- write files inside Docker to `/output/...`
+- emit the **host-visible** path in `MEDIA:`, for example:
+  `MEDIA:/home/user/.hermes/cache/documents/report.txt`
+
+If you already have a `docker_volumes:` section, add the new mount to the same
+list. YAML duplicate keys silently override earlier ones.
+
 ## Webhook Mode
 
 By default, Hermes connects to Telegram using **long polling** — the gateway makes outbound requests to Telegram's servers to fetch new updates. This works well for local and always-on deployments.
@@ -293,6 +325,16 @@ Each topic gets its own conversation session, history, and context — completel
 
 ### Configuration
 
+:::caution Prerequisites
+Before adding topics to your config, the user must **enable Topics mode** in the DM chat with the bot:
+
+1. Open your private chat with the Hermes bot in Telegram
+2. Tap the bot's name at the top to open chat info
+3. Enable **Topics** (the toggle to turn the chat into a forum)
+
+Without this, Hermes will log `The chat is not a forum` on startup and skip topic creation. This is a Telegram client-side setting — the bot cannot enable it programmatically.
+:::
+
 Add topics under `platforms.telegram.extra.dm_topics` in `~/.hermes/config.yaml`:
 
 ```yaml
diff --git a/website/docs/user-guide/messaging/webhooks.md b/website/docs/user-guide/messaging/webhooks.md
index bbf04bcb4f..2c60624fb6 100644
--- a/website/docs/user-guide/messaging/webhooks.md
+++ b/website/docs/user-guide/messaging/webhooks.md
@@ -72,6 +72,7 @@ Routes define how different webhook sources are handled. Each route is a named e
 | `skills` | No | List of skill names to load for the agent run. |
 | `deliver` | No | Where to send the response: `github_comment`, `telegram`, `discord`, `slack`, `signal`, `sms`, `whatsapp`, `matrix`, `mattermost`, `homeassistant`, `email`, `dingtalk`, `feishu`, `wecom`, `weixin`, `bluebubbles`, `qqbot`, or `log` (default). |
 | `deliver_extra` | No | Additional delivery config — keys depend on `deliver` type (e.g. `repo`, `pr_number`, `chat_id`). Values support the same `{dot.notation}` templates as `prompt`. |
+| `deliver_only` | No | If `true`, skip the agent entirely — the rendered `prompt` template becomes the literal message that gets delivered. Zero LLM cost, sub-second delivery. See [Direct Delivery Mode](#direct-delivery-mode) for use cases. Requires `deliver` to be a real target (not `log`). |
 
 ### Full example
 
@@ -240,6 +241,80 @@ For cross-platform delivery, the target platform must also be enabled and connec
 
 ---
 
+## Direct Delivery Mode {#direct-delivery-mode}
+
+By default, every webhook POST triggers an agent run — the payload becomes a prompt, the agent processes it, and the agent's response is delivered. This costs LLM tokens on every event.
+
+For use cases where you just want to **push a plain notification** — no reasoning, no agent loop, just deliver the message — set `deliver_only: true` on the route. The rendered `prompt` template becomes the literal message body, and the adapter dispatches it directly to the configured delivery target.
+
+### When to use direct delivery
+
+- **External service push** — Supabase/Firebase webhook fires on a database change → notify a user in Telegram instantly
+- **Monitoring alerts** — Datadog/Grafana alert webhook → push to a Discord channel
+- **Inter-agent pings** — Agent A notifies Agent B's user that a long-running task finished
+- **Background job completion** — Cron job finishes → post result to Slack
+
+Benefits:
+
+- **Zero LLM tokens** — the agent is never invoked
+- **Sub-second delivery** — a single adapter call, no reasoning loop
+- **Same security as agent mode** — HMAC auth, rate limits, idempotency, and body-size limits all still apply
+- **Synchronous response** — the POST returns `200 OK` once delivery succeeds, or `502` if the target rejects it, so your upstream service can retry intelligently
+
+### Example: Telegram push from Supabase
+
+```yaml
+platforms:
+  webhook:
+    enabled: true
+    extra:
+      port: 8644
+      secret: "global-secret"
+      routes:
+        antenna-matches:
+          secret: "antenna-webhook-secret"
+          deliver: "telegram"
+          deliver_only: true
+          prompt: "🎉 New match: {match.user_name} matched with you!"
+          deliver_extra:
+            chat_id: "{match.telegram_chat_id}"
+```
+
+Your Supabase edge function signs the payload with HMAC-SHA256 and POSTs to `https://your-server:8644/webhooks/antenna-matches`. The webhook adapter validates the signature, renders the template from the payload, delivers to Telegram, and returns `200 OK`.
+
+### Example: Dynamic subscription via CLI
+
+```bash
+hermes webhook subscribe antenna-matches \
+  --deliver telegram \
+  --deliver-chat-id "123456789" \
+  --deliver-only \
+  --prompt "🎉 New match: {match.user_name} matched with you!" \
+  --description "Antenna match notifications"
+```
+
+### Response codes
+
+| Status | Meaning |
+|--------|---------|
+| `200 OK` | Delivered successfully. Body: `{"status": "delivered", "route": "...", "target": "...", "delivery_id": "..."}` |
+| `200 OK` (status=duplicate) | Duplicate `X-GitHub-Delivery` ID within the idempotency TTL (1 hour). Not re-delivered. |
+| `401 Unauthorized` | HMAC signature invalid or missing. |
+| `400 Bad Request` | Malformed JSON body. |
+| `404 Not Found` | Unknown route name. |
+| `413 Payload Too Large` | Body exceeded `max_body_bytes`. |
+| `429 Too Many Requests` | Route rate limit exceeded. |
+| `502 Bad Gateway` | Target adapter rejected the message or raised. The error is logged server-side; the response body is a generic `Delivery failed` to avoid leaking adapter internals. |
+
+### Configuration gotchas
+
+- `deliver_only: true` requires `deliver` to be a real target. `deliver: log` (or omitting `deliver`) is rejected at startup — the adapter refuses to start if it finds a misconfigured route.
+- The `skills` field is ignored in direct delivery mode (no agent runs, so there's nothing to inject skills into).
+- Template rendering uses the same `{dot.notation}` syntax as agent mode, including the `{__raw__}` token.
+- Idempotency uses the same `X-GitHub-Delivery` / `X-Request-ID` header — retries with the same ID return `status=duplicate` and do NOT re-deliver.
+
+---
+
 ## Dynamic Subscriptions (CLI) {#dynamic-subscriptions}
 
 In addition to static routes in `config.yaml`, you can create webhook subscriptions dynamically using the `hermes webhook` CLI command. This is especially useful when the agent itself needs to set up event-driven triggers.
diff --git a/website/docs/user-guide/messaging/wecom.md b/website/docs/user-guide/messaging/wecom.md
index 937872b9a1..1a98c82255 100644
--- a/website/docs/user-guide/messaging/wecom.md
+++ b/website/docs/user-guide/messaging/wecom.md
@@ -17,24 +17,52 @@ Connect Hermes to [WeCom](https://work.weixin.qq.com/) (企业微信), Tencent's
 
 ## Setup
 
-### 1. Create an AI Bot
+### Step 1: Create an AI Bot
 
-1. Log in to the [WeCom Admin Console](https://work.weixin.qq.com/wework_admin/frame)
-2. Navigate to **Applications** → **Create Application** → **AI Bot**
-3. Configure the bot name and description
-4. Copy the **Bot ID** and **Secret** from the credentials page
-
-### 2. Configure Hermes
-
-Run the interactive setup:
+#### Recommended: Scan-to-Create (one command)
 
 ```bash
 hermes gateway setup
 ```
 
-Select **WeCom** and enter your Bot ID and Secret.
+Select **WeCom** and scan the QR code with your WeCom mobile app. Hermes will automatically create a bot application with the correct permissions and save the credentials.
 
-Or set environment variables in `~/.hermes/.env`:
+The setup wizard will:
+1. Display a QR code in your terminal
+2. Wait for you to scan it with the WeCom mobile app
+3. Automatically retrieve the Bot ID and Secret
+4. Guide you through access control configuration
+
+#### Alternative: Manual Setup
+
+If scan-to-create is not available, the wizard falls back to manual input:
+
+1. Log in to the [WeCom Admin Console](https://work.weixin.qq.com/wework_admin/frame)
+2. Navigate to **Applications** → **Create Application** → **AI Bot**
+3. Configure the bot name and description
+4. Copy the **Bot ID** and **Secret** from the credentials page
+5. Run `hermes gateway setup`, select **WeCom**, and enter the credentials when prompted
+
+:::warning
+Keep the Bot Secret private. Anyone with it can impersonate your bot.
+:::
+
+### Step 2: Configure Hermes
+
+#### Option A: Interactive Setup (Recommended)
+
+```bash
+hermes gateway setup
+```
+
+Select **WeCom** and follow the prompts. The wizard will guide you through:
+- Bot credentials (via QR scan or manual entry)
+- Access control settings (allowlist, pairing mode, or open access)
+- Home channel for notifications
+
+#### Option B: Manual Configuration
+
+Add the following to `~/.hermes/.env`:
 
 ```bash
 WECOM_BOT_ID=your-bot-id
@@ -47,7 +75,7 @@ WECOM_ALLOWED_USERS=user_id_1,user_id_2
 WECOM_HOME_CHANNEL=chat_id
 ```
 
-### 3. Start the gateway
+### Step 3: Start the gateway
 
 ```bash
 hermes gateway
diff --git a/website/docs/user-guide/profiles.md b/website/docs/user-guide/profiles.md
index 67609564f7..aef4d10b21 100644
--- a/website/docs/user-guide/profiles.md
+++ b/website/docs/user-guide/profiles.md
@@ -4,11 +4,11 @@ sidebar_position: 2
 
 # Profiles: Running Multiple Agents
 
-Run multiple independent Hermes agents on the same machine — each with its own config, API keys, memory, sessions, skills, and gateway.
+Run multiple independent Hermes agents on the same machine — each with its own config, API keys, memory, sessions, skills, and gateway state.
 
 ## What are profiles?
 
-A profile is a fully isolated Hermes environment. Each profile gets its own directory containing its own `config.yaml`, `.env`, `SOUL.md`, memories, sessions, skills, cron jobs, and state database. Profiles let you run separate agents for different purposes — a coding assistant, a personal bot, a research agent — without any cross-contamination.
+A profile is a separate Hermes home directory. Each profile gets its own directory containing its own `config.yaml`, `.env`, `SOUL.md`, memories, sessions, skills, cron jobs, and state database. Profiles let you run separate agents for different purposes — a coding assistant, a personal bot, a research agent — without mixing up Hermes state.
 
 When you create a profile, it automatically becomes its own command. Create a profile called `coder` and you immediately have `coder chat`, `coder setup`, `coder gateway start`, etc.
 
@@ -20,7 +20,7 @@ coder setup                       # configure API keys and model
 coder chat                        # start chatting
 ```
 
-That's it. `coder` is now a fully independent agent. It has its own config, its own memory, its own everything.
+That's it. `coder` is now its own Hermes profile with its own config, memory, and state.
 
 ## Creating a profile
 
@@ -104,6 +104,32 @@ The CLI always shows which profile is active:
 - **Banner**: Shows `Profile: coder` on startup
 - **`hermes profile`**: Shows current profile name, path, model, gateway status
 
+## Profiles vs workspaces vs sandboxing
+
+Profiles are often confused with workspaces or sandboxes, but they are different things:
+
+- A **profile** gives Hermes its own state directory: `config.yaml`, `.env`, `SOUL.md`, sessions, memory, logs, cron jobs, and gateway state.
+- A **workspace** or **working directory** is where terminal commands start. That is controlled separately by `terminal.cwd`.
+- A **sandbox** is what limits filesystem access. Profiles do **not** sandbox the agent.
+
+On the default `local` terminal backend, the agent still has the same filesystem access as your user account. A profile does not stop it from accessing folders outside the profile directory.
+
+If you want a profile to start in a specific project folder, set an explicit absolute `terminal.cwd` in that profile's `config.yaml`:
+
+```yaml
+terminal:
+  backend: local
+  cwd: /absolute/path/to/project
+```
+
+Using `cwd: "."` on the local backend means "the directory Hermes was launched from", not "the profile directory".
+
+Also note:
+
+- `SOUL.md` can guide the model, but it does not enforce a workspace boundary.
+- Changes to `SOUL.md` take effect cleanly on a new session. Existing sessions may still be using the old prompt state.
+- Asking the model "what directory are you in?" is not a reliable isolation test. If you need a predictable starting directory for tools, set `terminal.cwd` explicitly.
+
 ## Running gateways
 
 Each profile runs its own gateway as a separate process with its own bot token:
@@ -151,6 +177,12 @@ coder config set model.model anthropic/claude-sonnet-4
 echo "You are a focused coding assistant." > ~/.hermes/profiles/coder/SOUL.md
 ```
 
+If you want this profile to work in a specific project by default, also set its own `terminal.cwd`:
+
+```bash
+coder config set terminal.cwd /absolute/path/to/project
+```
+
 ## Updating
 
 `hermes update` pulls code once (shared) and syncs new bundled skills to **all** profiles automatically:
@@ -201,6 +233,8 @@ Add the line to your `~/.bashrc` or `~/.zshrc` for persistent completion. Comple
 
 ## How it works
 
-Profiles use the `HERMES_HOME` environment variable. When you run `coder chat`, the wrapper script sets `HERMES_HOME=~/.hermes/profiles/coder` before launching hermes. Since 119+ files in the codebase resolve paths via `get_hermes_home()`, everything automatically scopes to the profile's directory — config, sessions, memory, skills, state database, gateway PID, logs, and cron jobs.
+Profiles use the `HERMES_HOME` environment variable. When you run `coder chat`, the wrapper script sets `HERMES_HOME=~/.hermes/profiles/coder` before launching hermes. Since 119+ files in the codebase resolve paths via `get_hermes_home()`, Hermes state automatically scopes to the profile's directory — config, sessions, memory, skills, state database, gateway PID, logs, and cron jobs.
+
+This is separate from terminal working directory. Tool execution starts from `terminal.cwd` (or the launch directory when `cwd: "."` on the local backend), not automatically from `HERMES_HOME`.
 
 The default profile is simply `~/.hermes` itself. No migration needed — existing installs work identically.
diff --git a/website/docs/user-guide/sessions.md b/website/docs/user-guide/sessions.md
index bd1007859e..a60f35776e 100644
--- a/website/docs/user-guide/sessions.md
+++ b/website/docs/user-guide/sessions.md
@@ -386,7 +386,21 @@ Key tables in `state.db`:
 
 - Gateway sessions auto-reset based on the configured reset policy
 - Before reset, the agent saves memories and skills from the expiring session
-- Ended sessions remain in the database until pruned
+- Opt-in auto-pruning: when `sessions.auto_prune` is `true`, ended sessions older than `sessions.retention_days` (default 90) are pruned at CLI/gateway startup
+- After a prune that actually removed rows, `state.db` is `VACUUM`ed to reclaim disk space (SQLite does not shrink the file on plain DELETE)
+- Pruning runs at most once per `sessions.min_interval_hours` (default 24); the last-run timestamp is tracked inside `state.db` itself so it's shared across every Hermes process in the same `HERMES_HOME`
+
+Default is **off** — session history is valuable for `session_search` recall, and silently deleting it could surprise users. Enable in `~/.hermes/config.yaml`:
+
+```yaml
+sessions:
+  auto_prune: true          # opt in — default is false
+  retention_days: 90        # keep ended sessions this many days
+  vacuum_after_prune: true  # reclaim disk space after a pruning sweep
+  min_interval_hours: 24    # don't re-run the sweep more often than this
+```
+
+Active sessions are never auto-pruned, regardless of age.
 
 ### Manual Cleanup
 
@@ -403,5 +417,5 @@ hermes sessions prune --older-than 30 --yes
 ```
 
 :::tip
-The database grows slowly (typical: 10-15 MB for hundreds of sessions). Pruning is mainly useful for removing old conversations you no longer need for search recall.
+The database grows slowly (typical: 10-15 MB for hundreds of sessions) and session history powers `session_search` recall across past conversations, so auto-prune ships disabled. Enable it if you're running a heavy gateway/cron workload where `state.db` is meaningfully affecting performance (observed failure mode: 384 MB state.db with ~1000 sessions slowing down FTS5 inserts and `/resume` listing). Use `hermes sessions prune` for one-off cleanup without turning on the automatic sweep.
 :::
diff --git a/website/docs/user-guide/tui.md b/website/docs/user-guide/tui.md
index a296a63f7b..72c0a47123 100644
--- a/website/docs/user-guide/tui.md
+++ b/website/docs/user-guide/tui.md
@@ -46,9 +46,9 @@ The classic CLI remains available as the default. Anything documented in [CLI In
 - **Live session panel** — tools and skills fill in progressively as they initialize.
 - **Mouse-friendly selection** — drag to highlight with a uniform background instead of SGR inverse. Copy with your terminal's normal copy gesture.
 - **Alternate-screen rendering** — differential updates mean no flicker when streaming, no scrollback clutter after you quit.
-- **Composer affordances** — inline paste-collapse for long snippets, image paste from the clipboard (`Alt+V`), bracketed-paste safety.
+- **Composer affordances** — inline paste-collapse for long snippets, `Cmd+V` / `Ctrl+V` text paste with clipboard-image fallback, bracketed-paste safety, and image/file-path attachment normalization.
 
-Same [skins](features/skins.md) and [personalities](features/personality.md) apply. Switch mid-session with `/skin ares`, `/personality pirate`, and the UI repaints live. Skin keys are marked `(both)`, `(classic)`, or `(tui)` in [`example-skin.yaml`](https://github.com/NousResearch/hermes-agent/blob/main/docs/skins/example-skin.yaml) so you can see at a glance what applies where — the TUI honors the banner palette, UI colors, prompt glyph/color, session display, completion menu, selection bg, `tool_prefix`, and `help_header`.
+Same [skins](features/skins.md) and [personalities](features/personality.md) apply. Switch mid-session with `/skin ares`, `/personality pirate`, and the UI repaints live. See [Skins & Themes](features/skins.md) for the full list of customizable keys and which ones apply to classic vs TUI — the TUI honors the banner palette, UI colors, prompt glyph/color, session display, completion menu, selection bg, `tool_prefix`, and `help_header`.
 
 ## Requirements
 
@@ -73,7 +73,8 @@ The directory must contain `dist/entry.js` and an up-to-date `node_modules`.
 Keybindings match the [Classic CLI](cli.md#keybindings) exactly. The only behavioral differences:
 
 - **Mouse drag** highlights text with a uniform selection background.
-- **`Ctrl+V`** pastes text from your clipboard directly into the composer; multi-line pastes stay on one row until you expand them.
+- **`Cmd+V` / `Ctrl+V`** first tries normal text paste, then falls back to OSC52/native clipboard reads, and finally image attach when the clipboard or pasted payload resolves to an image.
+- **`/terminal-setup`** installs local VS Code / Cursor / Windsurf terminal bindings for better `Cmd+Enter` and undo/redo parity on macOS.
 - **Slash autocompletion** opens as a floating panel with descriptions, not an inline dropdown.
 
 ## Slash commands
diff --git a/website/docusaurus.config.ts b/website/docusaurus.config.ts
index ad32679005..eff7750ebf 100644
--- a/website/docusaurus.config.ts
+++ b/website/docusaurus.config.ts
@@ -37,7 +37,9 @@ const config: Config = {
         language: ['en'],
         indexBlog: false,
         docsRouteBasePath: '/',
-        highlightSearchTermsOnTargetPage: true,
+        // Disabled: appends ?_highlight=... to URLs (before the #anchor),
+        // which makes copy/pasted doc links ugly. Ctrl+F on the page is fine.
+        highlightSearchTermsOnTargetPage: false,
       }),
     ],
   ],
diff --git a/website/package.json b/website/package.json
index 6bf50e700d..e3aa70fc47 100644
--- a/website/package.json
+++ b/website/package.json
@@ -4,7 +4,9 @@
   "private": true,
   "scripts": {
     "docusaurus": "docusaurus",
+    "prestart": "node scripts/prebuild.mjs",
     "start": "docusaurus start",
+    "prebuild": "node scripts/prebuild.mjs",
     "build": "docusaurus build",
     "swizzle": "docusaurus swizzle",
     "deploy": "docusaurus deploy",
diff --git a/website/scripts/prebuild.mjs b/website/scripts/prebuild.mjs
new file mode 100644
index 0000000000..f129d745ff
--- /dev/null
+++ b/website/scripts/prebuild.mjs
@@ -0,0 +1,50 @@
+#!/usr/bin/env node
+// Runs website/scripts/extract-skills.py before docusaurus build/start so
+// that website/src/data/skills.json (imported by src/pages/skills/index.tsx)
+// exists without contributors needing to remember to run the Python script
+// manually. CI workflows still run the extraction explicitly, which is a
+// no-op duplicate but matches their historical behaviour.
+//
+// If python3 or its deps (pyyaml) aren't available on the local machine, we
+// fall back to writing an empty skills.json so `npm run build` still
+// succeeds — the Skills Hub page just shows an empty state. CI always has
+// the deps installed, so production deploys get real data.
+
+import { spawnSync } from "node:child_process";
+import { mkdirSync, writeFileSync, existsSync } from "node:fs";
+import { dirname, join, resolve } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const scriptDir = dirname(fileURLToPath(import.meta.url));
+const websiteDir = resolve(scriptDir, "..");
+const extractScript = join(scriptDir, "extract-skills.py");
+const outputFile = join(websiteDir, "src", "data", "skills.json");
+
+function writeEmptyFallback(reason) {
+  mkdirSync(dirname(outputFile), { recursive: true });
+  writeFileSync(outputFile, "[]\n");
+  console.warn(
+    `[prebuild] extract-skills.py skipped (${reason}); wrote empty skills.json. ` +
+      `Install python3 + pyyaml locally for a populated Skills Hub page.`,
+  );
+}
+
+if (!existsSync(extractScript)) {
+  writeEmptyFallback("extract script missing");
+  process.exit(0);
+}
+
+const result = spawnSync("python3", [extractScript], {
+  stdio: "inherit",
+  cwd: websiteDir,
+});
+
+if (result.error && result.error.code === "ENOENT") {
+  writeEmptyFallback("python3 not found");
+  process.exit(0);
+}
+
+if (result.status !== 0) {
+  writeEmptyFallback(`extract-skills.py exited with status ${result.status}`);
+  process.exit(0);
+}
diff --git a/website/sidebars.ts b/website/sidebars.ts
index c84184c4e6..6905b61d1f 100644
--- a/website/sidebars.ts
+++ b/website/sidebars.ts
@@ -51,6 +51,7 @@ const sidebars: SidebarsConfig = {
             'user-guide/features/personality',
             'user-guide/features/skins',
             'user-guide/features/plugins',
+            'user-guide/features/built-in-plugins',
           ],
         },
         {
@@ -162,6 +163,8 @@ const sidebars: SidebarsConfig = {
         'guides/cron-troubleshooting',
         'guides/work-with-skills',
         'guides/delegation-patterns',
+        'guides/github-pr-review-agent',
+        'guides/webhook-github-pr-review',
         'guides/migrate-from-openclaw',
         'guides/aws-bedrock',
       ],